This commit is contained in:
Archer
2023-12-18 16:24:50 +08:00
committed by GitHub
parent d33c99f564
commit 703583fff7
130 changed files with 3418 additions and 2579 deletions

View File

@@ -13,12 +13,13 @@ export const splitText2Chunks = (props: {
chunkLen: number;
overlapRatio?: number;
customReg?: string[];
countTokens?: boolean;
}): {
chunks: string[];
tokens: number;
overlapRatio?: number;
} => {
let { text = '', chunkLen, overlapRatio = 0.2, customReg = [] } = props;
let { text = '', chunkLen, overlapRatio = 0.2, customReg = [], countTokens = true } = props;
const splitMarker = 'SPLIT_HERE_SPLIT_HERE';
const codeBlockMarker = 'CODE_BLOCK_LINE_MARKER';
const overlapLen = Math.round(chunkLen * overlapRatio);
@@ -233,7 +234,9 @@ export const splitText2Chunks = (props: {
mdTitle: ''
}).map((chunk) => chunk.replaceAll(codeBlockMarker, '\n')); // restore code block
const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0);
const tokens = countTokens
? chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0)
: 0;
return {
chunks,