perf: password special chars;feat: llm paragraph;perf: chunk setting params;perf: text splitter worker (#4984)

* perf: password special chars

* feat: llm paragraph;perf: chunk setting params

* perf: text splitter worker

* perf: get rawtext buffer

* fix: test

* fix: test

* doc

* min chunk size
This commit is contained in:
Archer
2025-06-10 00:05:54 +08:00
committed by GitHub
parent 068918a9ee
commit 01ff56b42b
41 changed files with 546 additions and 448 deletions

View File

@@ -8,6 +8,8 @@ import {
type CreateUsageProps
} from '@fastgpt/global/support/wallet/usage/api';
import { i18nT } from '../../../../web/i18n/utils';
import { formatModelChars2Points } from './utils';
import { ModelTypeEnum } from '@fastgpt/global/core/ai/model';
export async function createUsage(data: CreateUsageProps) {
try {
@@ -67,6 +69,14 @@ export const createChatUsage = ({
return { totalPoints };
};
export type DatasetTrainingMode = 'paragraph' | 'qa' | 'autoIndex' | 'imageIndex' | 'imageParse';
export const datasetTrainingUsageIndexMap: Record<DatasetTrainingMode, number> = {
paragraph: 1,
qa: 2,
autoIndex: 3,
imageIndex: 4,
imageParse: 5
};
export const createTrainingUsage = async ({
teamId,
tmbId,
@@ -108,6 +118,13 @@ export const createTrainingUsage = async ({
: []),
...(agentModel
? [
{
moduleName: i18nT('account_usage:llm_paragraph'),
model: agentModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
},
{
moduleName: i18nT('account_usage:qa'),
model: agentModel,
@@ -126,6 +143,13 @@ export const createTrainingUsage = async ({
: []),
...(vllmModel
? [
{
moduleName: i18nT('account_usage:image_index'),
model: vllmModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
},
{
moduleName: i18nT('account_usage:image_parse'),
model: vllmModel,
@@ -171,3 +195,43 @@ export const createPdfParseUsage = async ({
]
});
};
export const pushLLMTrainingUsage = async ({
teamId,
tmbId,
model,
inputTokens,
outputTokens,
billId,
mode
}: {
teamId: string;
tmbId: string;
model: string;
inputTokens: number;
outputTokens: number;
billId: string;
mode: DatasetTrainingMode;
}) => {
const index = datasetTrainingUsageIndexMap[mode];
// Compute points
const { totalPoints } = formatModelChars2Points({
model,
modelType: ModelTypeEnum.llm,
inputTokens,
outputTokens
});
concatUsage({
billId,
teamId,
tmbId,
totalPoints,
inputTokens,
outputTokens,
listIndex: index
});
return { totalPoints };
};