4.6.7-alpha commit (#743)

Co-authored-by: Archer <545436317@qq.com>
Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-01-19 11:17:28 +08:00
committed by GitHub
parent 8ee7407c4c
commit c031e6dcc9
324 changed files with 8509 additions and 4757 deletions

View File

@@ -13,13 +13,12 @@ export const splitText2Chunks = (props: {
chunkLen: number;
overlapRatio?: number;
customReg?: string[];
countTokens?: boolean;
}): {
chunks: string[];
tokens: number;
chars: number;
overlapRatio?: number;
} => {
let { text = '', chunkLen, overlapRatio = 0.2, customReg = [], countTokens = true } = props;
let { text = '', chunkLen, overlapRatio = 0.2, customReg = [] } = props;
const splitMarker = 'SPLIT_HERE_SPLIT_HERE';
const codeBlockMarker = 'CODE_BLOCK_LINE_MARKER';
const overlapLen = Math.round(chunkLen * overlapRatio);
@@ -240,13 +239,11 @@ export const splitText2Chunks = (props: {
mdTitle: ''
}).map((chunk) => chunk?.replaceAll(codeBlockMarker, '\n') || ''); // restore code block
const tokens = countTokens
? chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0)
: 0;
const chars = chunks.reduce((sum, chunk) => sum + chunk.length, 0);
return {
chunks,
tokens
chars
};
} catch (err) {
throw new Error(getErrText(err));