Add image index and pdf parse (#3956)

* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
This commit is contained in:
Archer
2025-03-03 23:08:29 +08:00
committed by archer
parent 08b6f594df
commit adf5377ebe
106 changed files with 2337 additions and 1454 deletions

View File

@@ -117,14 +117,16 @@ export const createTrainingUsage = async ({
billSource,
vectorModel,
agentModel,
vllmModel,
session
}: {
teamId: string;
tmbId: string;
appName: string;
billSource: UsageSourceEnum;
vectorModel: string;
agentModel: string;
vectorModel?: string;
agentModel?: string;
vllmModel?: string;
session?: ClientSession;
}) => {
const [{ _id }] = await MongoUsage.create(
@@ -136,27 +138,46 @@ export const createTrainingUsage = async ({
source: billSource,
totalPoints: 0,
list: [
{
moduleName: i18nT('common:support.wallet.moduleName.index'),
model: vectorModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
},
{
moduleName: i18nT('common:support.wallet.moduleName.qa'),
model: agentModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
},
{
moduleName: i18nT('common:core.dataset.training.Auto mode'),
model: agentModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
}
...(vectorModel
? [
{
moduleName: i18nT('account_usage:embedding_index'),
model: vectorModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
}
]
: []),
...(agentModel
? [
{
moduleName: i18nT('account_usage:qa'),
model: agentModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
},
{
moduleName: i18nT('account_usage:auto_index'),
model: agentModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
}
]
: []),
...(vllmModel
? [
{
moduleName: i18nT('account_usage:image_parse'),
model: vllmModel,
amount: 0,
inputTokens: 0,
outputTokens: 0
}
]
: [])
]
}
],
@@ -165,3 +186,31 @@ export const createTrainingUsage = async ({
return { billId: String(_id) };
};
export const createPdfParseUsage = async ({
teamId,
tmbId,
pages
}: {
teamId: string;
tmbId: string;
pages: number;
}) => {
const unitPrice = global.systemEnv?.customPdfParse?.price || 0;
const totalPoints = pages * unitPrice;
createUsage({
teamId,
tmbId,
appName: i18nT('account_usage:pdf_enhanced_parse'),
totalPoints,
source: UsageSourceEnum.pdfParse,
list: [
{
moduleName: i18nT('account_usage:pdf_enhanced_parse'),
amount: totalPoints,
pages
}
]
});
};