perf: password special chars;feat: llm paragraph;perf: chunk setting params;perf: text splitter worker (#4984)

* perf: password special chars

* feat: llm paragraph;perf: chunk setting params

* perf: text splitter worker

* perf: get rawtext buffer

* fix: test

* fix: test

* doc

* min chunk size
This commit is contained in:
Archer
2025-06-10 00:05:54 +08:00
committed by GitHub
parent 068918a9ee
commit 01ff56b42b
41 changed files with 546 additions and 448 deletions

View File

@@ -3,9 +3,9 @@ export const checkPasswordRule = (password: string) => {
/\d/, // Contains digits
/[a-z]/, // Contains lowercase letters
/[A-Z]/, // Contains uppercase letters
/[!@#$%^&*()_+=-]/ // Contains special characters
/[!@#$%^&*()_+=.,:;?\/\\|`~"'<>{}\[\]-]/ // Contains special characters
];
const validChars = /^[\dA-Za-z!@#$%^&*()_+=-]{8,100}$/;
const validChars = /^[\dA-Za-z!@#$%^&*()_+=.,:;?\/\\|`~"'<>{}\[\]-]{8,100}$/;
// Check length and valid characters
if (!validChars.test(password)) return false;

View File

@@ -1,10 +1,11 @@
import { defaultMaxChunkSize } from '../../core/dataset/training/utils';
import { getErrText } from '../error/utils';
import { simpleText } from './tools';
import { getTextValidLength } from './utils';
export const CUSTOM_SPLIT_SIGN = '-----CUSTOM_SPLIT_SIGN-----';
type SplitProps = {
export type SplitProps = {
text: string;
chunkSize: number;
@@ -19,7 +20,7 @@ export type TextSplitProps = Omit<SplitProps, 'text' | 'chunkSize'> & {
chunkSize?: number;
};
type SplitResponse = {
export type SplitResponse = {
chunks: string[];
chars: number;
};
@@ -474,7 +475,10 @@ export const splitText2Chunks = (props: SplitProps): SplitResponse => {
});
return {
chunks: splitResult.map((item) => item.chunks).flat(),
chunks: splitResult
.map((item) => item.chunks)
.flat()
.map((chunk) => simpleText(chunk)),
chars: splitResult.reduce((sum, item) => sum + item.chars, 0)
};
};

View File

@@ -7,3 +7,4 @@ export const DEFAULT_ORG_AVATAR = '/imgs/avatar/defaultOrgAvatar.svg';
export const DEFAULT_USER_AVATAR = '/imgs/avatar/BlueAvatar.svg';
export const isProduction = process.env.NODE_ENV === 'production';
export const isTestEnv = process.env.NODE_ENV === 'test';