This commit is contained in:
Archer
2023-12-27 11:07:39 +08:00
committed by GitHub
parent 86286efb54
commit 759a2330e6
182 changed files with 3099 additions and 81685 deletions

View File

@@ -34,3 +34,41 @@ export const simpleMarkdownText = (rawText: string) => {
return rawText.trim();
};
/**
* format markdown
* 1. upload base64
* 2. replace \
*/
export const uploadMarkdownBase64 = async ({
rawText,
uploadImgController
}: {
rawText: string;
uploadImgController: (base64: string) => Promise<string>;
}) => {
// match base64, upload and replace it
const base64Regex = /data:image\/.*;base64,([^\)]+)/g;
const base64Arr = rawText.match(base64Regex) || [];
// upload base64 and replace it
await Promise.all(
base64Arr.map(async (base64Img) => {
try {
const str = await uploadImgController(base64Img);
rawText = rawText.replace(base64Img, str);
} catch (error) {
rawText = rawText.replace(base64Img, '');
rawText = rawText.replace(/!\[.*\]\(\)/g, '');
}
})
);
// Remove white space on both sides of the picture
const trimReg = /(!\[.*\]\(.*\))\s*/g;
if (trimReg.test(rawText)) {
rawText = rawText.replace(trimReg, '$1');
}
return simpleMarkdownText(rawText);
};

View File

@@ -31,7 +31,7 @@ export const splitText2Chunks = (props: {
// The larger maxLen is, the next sentence is less likely to trigger splitting
const stepReges: { reg: RegExp; maxLen: number }[] = [
...customReg.map((text) => ({ reg: new RegExp(`([${text}])`, 'g'), maxLen: chunkLen * 1.4 })),
...customReg.map((text) => ({ reg: new RegExp(`(${text})`, 'g'), maxLen: chunkLen * 1.4 })),
{ reg: /^(#\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
{ reg: /^(##\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
{ reg: /^(###\s[^\n]+)\n/gm, maxLen: chunkLen * 1.2 },
@@ -64,13 +64,22 @@ export const splitText2Chunks = (props: {
}
];
}
const isCustomSteep = checkIsCustomStep(step);
const isMarkdownSplit = checkIsMarkdownSplit(step);
const independentChunk = checkIndependentChunk(step);
const { reg } = stepReges[step];
const splitTexts = text
.replace(reg, independentChunk ? `${splitMarker}$1` : `$1${splitMarker}`)
.replace(
reg,
(() => {
if (isCustomSteep) return splitMarker;
if (independentChunk) return `${splitMarker}$1`;
return `$1${splitMarker}`;
})()
)
.split(`${splitMarker}`)
.filter((part) => part.trim());
@@ -128,11 +137,6 @@ export const splitText2Chunks = (props: {
const independentChunk = checkIndependentChunk(step);
const isCustomStep = checkIsCustomStep(step);
// mini text
if (text.length <= chunkLen) {
return [text];
}
// oversize
if (step >= stepReges.length) {
if (text.length < chunkLen * 3) {
@@ -221,6 +225,8 @@ export const splitText2Chunks = (props: {
} else {
chunks.push(`${mdTitle}${lastText}`);
}
} else if (lastText && chunks.length === 0) {
chunks.push(lastText);
}
return chunks;