perf: read file token error

This commit is contained in:
archer
2023-08-26 17:17:19 +08:00
parent 93030afe3e
commit 13439c5183

View File

@@ -173,9 +173,16 @@ export const splitText2Chunks = ({ text, maxLen }: { text: string; maxLen: numbe
chunks.push(chunk);
}
const enc = getOpenAiEncMap();
const encodeText = enc.encode(chunks.join(''));
const tokens = encodeText.length;
const tokens = (() => {
try {
const enc = getOpenAiEncMap();
const encodeText = enc.encode(chunks.join(''));
const tokens = encodeText.length;
return tokens;
} catch (error) {
return chunks.join('').length;
}
})();
return {
chunks,
@@ -274,5 +281,6 @@ export const simpleText = (text: string) => {
text = text.replace(/([\u4e00-\u9fa5])\s+([\u4e00-\u9fa5])/g, '$1$2');
text = text.replace(/\n{2,}/g, '\n');
text = text.replace(/\s{2,}/g, ' ');
text = text.replace(/[^\x00-\x7F]/g, ' ');
return text;
};