diff --git a/docSite/assets/imgs/datasetSetting1.png b/docSite/assets/imgs/datasetSetting1.png new file mode 100644 index 000000000..81c2d8b49 Binary files /dev/null and b/docSite/assets/imgs/datasetSetting1.png differ diff --git a/docSite/content/docs/installation/upgrading/46.md b/docSite/content/docs/installation/upgrading/46.md index 4ebc6fcee..406c5e5c8 100644 --- a/docSite/content/docs/installation/upgrading/46.md +++ b/docSite/content/docs/installation/upgrading/46.md @@ -50,5 +50,6 @@ curl --location --request POST 'https://{{host}}/api/admin/initv46-2' \ 1. 新增 - 团队空间 2. 新增 - 多路向量(多个向量映射一组数据) 3. 新增 - tts语音 -4. 线上环境新增 - ReRank向量召回,提高召回精度 -5. 优化 - 知识库导出,可直接触发流下载,无需等待转圈圈 +4. 新增 - 支持知识库配置文本预处理模型 +5. 线上环境新增 - ReRank向量召回,提高召回精度 +6. 优化 - 知识库导出,可直接触发流下载,无需等待转圈圈 diff --git a/docSite/content/docs/pricing.md b/docSite/content/docs/pricing.md index 444db949e..75fe5eb29 100644 --- a/docSite/content/docs/pricing.md +++ b/docSite/content/docs/pricing.md @@ -1,10 +1,10 @@ --- -title: '定价' -description: 'FastGPT 的定价' +title: '线上版定价' +description: 'FastGPT 线上版定价' icon: 'currency_yen' draft: false toc: true -weight: 10 +weight: 11 --- ## Tokens 说明 @@ -15,7 +15,7 @@ weight: 10 ## FastGPT 线上计费 -目前,FastGPT 线上计费也仅按 Tokens 使用数量为准。以下是详细的计费表(最新定价以线上表格为准,可在点击充值后实时获取): +使用: [https://fastgpt.run](https://fastgpt.run) 或 [https://ai.fastgpt.in](https://ai.fastgpt.in) 只需仅按 Tokens 使用数量扣费即可。可在 账号-使用记录 中查看具体使用情况,以下是详细的计费表(最新定价以线上表格为准,可在点击充值后实时获取): {{< table "table-hover table-striped-columns" >}} | 计费项 | 价格: 元/ 1K tokens(包含上下文) | diff --git a/docSite/content/docs/use-cases/datasetEngine.md b/docSite/content/docs/use-cases/datasetEngine.md index 5e643a42b..48e9d6c5e 100644 --- a/docSite/content/docs/use-cases/datasetEngine.md +++ b/docSite/content/docs/use-cases/datasetEngine.md @@ -1,6 +1,6 @@ --- title: "知识库结构讲解" -description: "本节会介绍 FastGPT 知识库结构设计,理解其 QA 的存储格式和检索格式,以便更好的构建知识库。这篇介绍主要以使用为主,详细原理不多介绍。" +description: "本节会详细介绍 FastGPT 知识库结构设计,理解其 QA 的存储格式和多向量映射,以便更好的构建知识库。这篇介绍主要以使用为主,详细原理不多介绍。" icon: "dataset" draft: false toc: true @@ -25,13 +25,21 @@ FastGPT 采用了 RAG 中的 Embedding 方案构建知识库,要使用好 Fast FastGPT 采用了 `PostgresSQL` 的 `PG Vector` 插件作为向量检索器,索引为`HNSW`。且`PostgresSQL`仅用于向量检索,`MongoDB`用于其他数据的存取。 -在`PostgresSQL`的表中,设置一个 `index` 字段用于存储向量、一个 `q` 字段用于存储向量对应的内容,以及一个 `a` 字段用于检索映射。之所以取字段为 `qa` 是由于一些历史缘故,无需完全解为 “问答对” 的格式。在实际使用过程中,可以利用`q`和`a`的组合,对检索后的内容做进一步的声明,提高大模型的理解力(注意,这里不直接提高搜索精度)。 +在`PostgresSQL`的表中,设置一个 `index` 字段用于存储向量,以及一个`data_id`用于在`MongoDB`中寻找对应的映射值。多个`index`可以对应一组`data_id`,也就是说,一组向量可以对应多组数据。在进行检索时,相同数据会进行合并。 -目前,提高向量搜索的精度,主要可以通过几种途径: +![](/imgs/datasetSetting1.png) -1. 精简`q`的内容,减少向量内容的长度:当`q`的内容更少,更准确时,检索精度自然会提高。但与此同时,会牺牲一定的检索范围,适合答案较为严格的场景。 -2. 更好分词分段:当一段话的结构和语义是完整的,并且是单一的,精度也会提高。因此,许多系统都会优化分词器,尽可能的保障每组数据的完整性。 -3. 多样性文本:为一段内容增加关键词、摘要、相似问题等描述性信息,可以使得该内容的向量具有更大的检索覆盖范围。 +## 多向量的目的和使用方式 + +在一组数据中,如果我们希望它尽可能长,但语义又要在向量中尽可能提现,则没有办法通过一组向量来表示。因此,我们采用了多向量映射的方式,将一组数据映射到多组向量中,从而保障数据的完整性和语义的提现。 + +你可以为一组较长的文本,添加多组向量,从而在检索时,只要其中一组向量被检索到,该数据也将被召回。 + +## 提高向量搜索精度的方法 + +1. 更好分词分段:当一段话的结构和语义是完整的,并且是单一的,精度也会提高。因此,许多系统都会优化分词器,尽可能的保障每组数据的完整性。 +2. 精简`index`的内容,减少向量内容的长度:当`index`的内容更少,更准确时,检索精度自然会提高。但与此同时,会牺牲一定的检索范围,适合答案较为严格的场景。 +3. 丰富`index`的数量,可以为同一个`chunk`内容增加多组`index`。 4. 优化检索词:在实际使用过程中,用户的问题通常是模糊的或是缺失的,并不一定是完整清晰的问题。因此优化用户的问题(检索词)很大程度上也可以提高精度。 5. 微调向量模型:由于市面上直接使用的向量模型都是通用型模型,在特定领域的检索精度并不高,因此微调向量模型可以很大程度上提高专业领域的检索效果。 diff --git a/packages/global/common/string/textSplitter.ts b/packages/global/common/string/textSplitter.ts index 5737347f2..ea9e6de51 100644 --- a/packages/global/common/string/textSplitter.ts +++ b/packages/global/common/string/textSplitter.ts @@ -63,8 +63,8 @@ export const splitText2Chunks = (props: { text: string; maxLen: number; overlapL let chunks: string[] = []; for (let i = 0; i < splitTexts.length; i++) { let text = splitTexts[i]; - let chunkToken = countPromptTokens(lastChunk, ''); - const textToken = countPromptTokens(text, ''); + let chunkToken = lastChunk.length; + const textToken = text.length; // next chunk is too large / new chunk is too large(The current chunk must be smaller than maxLen) if (textToken >= maxLen || chunkToken + textToken > maxLen * 1.4) { diff --git a/packages/global/core/dataset/type.d.ts b/packages/global/core/dataset/type.d.ts index a1a7700da..851e1e326 100644 --- a/packages/global/core/dataset/type.d.ts +++ b/packages/global/core/dataset/type.d.ts @@ -1,4 +1,4 @@ -import type { VectorModelItemType } from '../../core/ai/model.d'; +import type { LLMModelItemType, VectorModelItemType } from '../../core/ai/model.d'; import { PermissionTypeEnum } from '../../support/permission/constant'; import { PushDatasetDataChunkProps } from './api'; import { @@ -19,6 +19,7 @@ export type DatasetSchemaType = { avatar: string; name: string; vectorModel: string; + agentModel: string; tags: string[]; type: `${DatasetTypeEnum}`; permission: `${PermissionTypeEnum}`; @@ -84,8 +85,9 @@ export type CollectionWithDatasetType = Omit & { +export type DatasetItemType = Omit & { vectorModel: VectorModelItemType; + agentModel: LLMModelItemType; isOwner: boolean; canWrite: boolean; }; diff --git a/packages/global/support/wallet/bill/api.d.ts b/packages/global/support/wallet/bill/api.d.ts index f24c6d9c6..fbd20025d 100644 --- a/packages/global/support/wallet/bill/api.d.ts +++ b/packages/global/support/wallet/bill/api.d.ts @@ -3,6 +3,8 @@ import { BillListItemType } from './type'; export type CreateTrainingBillProps = { name: string; + vectorModel?: string; + agentModel?: string; }; export type ConcatBillProps = { diff --git a/packages/service/core/app/schema.ts b/packages/service/core/app/schema.ts index dd32113fc..356f1412c 100644 --- a/packages/service/core/app/schema.ts +++ b/packages/service/core/app/schema.ts @@ -61,7 +61,6 @@ const AppSchema = new Schema({ try { AppSchema.index({ updateTime: -1 }); - AppSchema.index({ 'share.collection': -1 }); } catch (error) { console.log(error); } diff --git a/packages/service/core/dataset/collection/schema.ts b/packages/service/core/dataset/collection/schema.ts index 575cf851e..352276f24 100644 --- a/packages/service/core/dataset/collection/schema.ts +++ b/packages/service/core/dataset/collection/schema.ts @@ -69,7 +69,6 @@ const DatasetCollectionSchema = new Schema({ try { DatasetCollectionSchema.index({ datasetId: 1 }); - DatasetCollectionSchema.index({ userId: 1 }); DatasetCollectionSchema.index({ updateTime: -1 }); } catch (error) { console.log(error); diff --git a/packages/service/core/dataset/schema.ts b/packages/service/core/dataset/schema.ts index 937896829..6f61eb90a 100644 --- a/packages/service/core/dataset/schema.ts +++ b/packages/service/core/dataset/schema.ts @@ -48,6 +48,11 @@ const DatasetSchema = new Schema({ required: true, default: 'text-embedding-ada-002' }, + agentModel: { + type: String, + required: true, + default: 'gpt-3.5-turbo-16k' + }, type: { type: String, enum: Object.keys(DatasetTypeMap), diff --git a/packages/service/core/dataset/training/schema.ts b/packages/service/core/dataset/training/schema.ts index d521f1dc8..b7ce90969 100644 --- a/packages/service/core/dataset/training/schema.ts +++ b/packages/service/core/dataset/training/schema.ts @@ -95,7 +95,7 @@ const TrainingDataSchema = new Schema({ try { TrainingDataSchema.index({ lockTime: 1 }); - TrainingDataSchema.index({ userId: 1 }); + TrainingDataSchema.index({ datasetId: 1 }); TrainingDataSchema.index({ collectionId: 1 }); TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 }); } catch (error) { diff --git a/projects/app/public/locales/en/common.json b/projects/app/public/locales/en/common.json index 4808b7cbb..bdb957cf0 100644 --- a/projects/app/public/locales/en/common.json +++ b/projects/app/public/locales/en/common.json @@ -250,6 +250,7 @@ } }, "dataset": { + "Agent Model": "Learning Model", "Chunk Length": "Chunk Length", "Confirm move the folder": "Confirm Move", "Confirm to delete the data": "Confirm to delete the data?", @@ -259,6 +260,7 @@ "Delete Dataset Error": "Delete dataset failed", "Edit Folder": "Edit Folder", "Export": "Export", + "Export Dataset Limit Error": "Export Data Error", "File Input": "Import File", "File Size": "File Size", "Filename": "Filename", diff --git a/projects/app/public/locales/zh/common.json b/projects/app/public/locales/zh/common.json index 8053b1cc8..cd6ceaa42 100644 --- a/projects/app/public/locales/zh/common.json +++ b/projects/app/public/locales/zh/common.json @@ -250,6 +250,7 @@ } }, "dataset": { + "Agent Model": "文件处理模型", "Chunk Length": "数据总量", "Confirm move the folder": "确认移动到该目录", "Confirm to delete the data": "确认删除该数据?", @@ -259,6 +260,7 @@ "Delete Dataset Error": "删除知识库异常", "Edit Folder": "编辑文件夹", "Export": "导出", + "Export Dataset Limit Error": "导出数据失败", "File Input": "文件导入", "File Size": "文件大小", "Filename": "文件名", diff --git a/projects/app/src/constants/dataset.ts b/projects/app/src/constants/dataset.ts index aff029935..1303e4c42 100644 --- a/projects/app/src/constants/dataset.ts +++ b/projects/app/src/constants/dataset.ts @@ -1,3 +1,4 @@ +import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/model'; import type { DatasetCollectionItemType, DatasetItemType @@ -17,13 +18,8 @@ export const defaultDatasetDetail: DatasetItemType = { permission: 'private', isOwner: false, canWrite: false, - vectorModel: { - model: 'text-embedding-ada-002', - name: 'Embedding-2', - price: 0.2, - defaultToken: 500, - maxToken: 3000 - } + vectorModel: defaultVectorModels[0], + agentModel: defaultQAModels[0] }; export const defaultCollectionDetail: DatasetCollectionItemType = { @@ -43,7 +39,8 @@ export const defaultCollectionDetail: DatasetCollectionItemType = { name: '', tags: [], permission: 'private', - vectorModel: 'text-embedding-ada-002' + vectorModel: defaultVectorModels[0].model, + agentModel: defaultQAModels[0].model }, parentId: '', name: '', diff --git a/projects/app/src/global/core/api/datasetReq.d.ts b/projects/app/src/global/core/api/datasetReq.d.ts index 7a03d5f61..9a7d07653 100644 --- a/projects/app/src/global/core/api/datasetReq.d.ts +++ b/projects/app/src/global/core/api/datasetReq.d.ts @@ -5,6 +5,7 @@ import type { SearchTestItemType } from '@/types/core/dataset'; import { UploadChunkItemType } from '@fastgpt/global/core/dataset/type'; import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type'; import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant'; +import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d'; /* ===== dataset ===== */ export type DatasetUpdateParams = { @@ -14,6 +15,7 @@ export type DatasetUpdateParams = { name?: string; avatar?: string; permission?: `${PermissionTypeEnum}`; + agentModel?: LLMModelItemType; }; export type SearchTestProps = { diff --git a/projects/app/src/global/core/dataset/api.d.ts b/projects/app/src/global/core/dataset/api.d.ts index dae6f72f9..a888c500a 100644 --- a/projects/app/src/global/core/dataset/api.d.ts +++ b/projects/app/src/global/core/dataset/api.d.ts @@ -9,6 +9,7 @@ export type CreateDatasetParams = { tags: string; avatar: string; vectorModel?: string; + agentModel?: string; type: `${DatasetTypeEnum}`; }; diff --git a/projects/app/src/global/core/prompt/agent.ts b/projects/app/src/global/core/prompt/agent.ts index e6edde2f0..d21793ca4 100644 --- a/projects/app/src/global/core/prompt/agent.ts +++ b/projects/app/src/global/core/prompt/agent.ts @@ -1,8 +1,8 @@ export const Prompt_AgentQA = { prompt: `我会给你一段文本,{{theme}},学习它们,并整理学习成果,要求为: -1. 提出最多 25 个问题。 -2. 给出每个问题的答案。 -3. 答案要详细完整,答案可以包含普通文字、链接、代码、表格、公示、媒体链接等 markdown 元素。 +1. 提出问题并给出每个问题的答案。 +2. 每个答案都要详细完整,给出相关原文描述,答案可以包含普通文字、链接、代码、表格、公示、媒体链接等 markdown 元素。 +3. 最多提出 30 个问题。 4. 按格式返回多个问题和答案: Q1: 问题。 diff --git a/projects/app/src/pages/api/admin/initv46-2.ts b/projects/app/src/pages/api/admin/initv46-2.ts index c7c7a7264..ff1ad799a 100644 --- a/projects/app/src/pages/api/admin/initv46-2.ts +++ b/projects/app/src/pages/api/admin/initv46-2.ts @@ -11,6 +11,8 @@ import { import { authCert } from '@fastgpt/service/support/permission/auth/common'; import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema'; import { getUserDefaultTeam } from '@fastgpt/service/support/user/team/controller'; +import { MongoDataset } from '@fastgpt/service/core/dataset/schema'; +import { defaultQAModels } from '@fastgpt/global/core/ai/model'; let success = 0; /* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */ @@ -41,6 +43,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) await initPgData(); + await MongoDataset.updateMany( + {}, + { + agentModel: defaultQAModels[0].model + } + ); + jsonRes(res, { data: await init(limit), message: @@ -76,14 +85,19 @@ async function initPgData() { for (let i = 0; i < limit; i++) { init(i); } + async function init(index: number): Promise { const userId = rows[index]?.user_id; if (!userId) return; try { const tmb = await getUserDefaultTeam({ userId }); + console.log(tmb); + // update pg await PgClient.query( - `Update ${PgDatasetTableName} set team_id = '${tmb.teamId}', tmb_id = '${tmb.tmbId}' where user_id = '${userId}' AND team_id='null';` + `Update ${PgDatasetTableName} set team_id = '${String(tmb.teamId)}', tmb_id = '${String( + tmb.tmbId + )}' where user_id = '${userId}' AND team_id='null';` ); console.log(++success); init(index + limit); diff --git a/projects/app/src/pages/api/admin/initv46-3.ts b/projects/app/src/pages/api/admin/initv46-3.ts new file mode 100644 index 000000000..a564e069b --- /dev/null +++ b/projects/app/src/pages/api/admin/initv46-3.ts @@ -0,0 +1,101 @@ +import type { NextApiRequest, NextApiResponse } from 'next'; +import { jsonRes } from '@fastgpt/service/common/response'; +import { connectToDatabase } from '@/service/mongo'; +import { delay } from '@/utils/tools'; +import { PgClient } from '@fastgpt/service/common/pg'; +import { + DatasetDataIndexTypeEnum, + PgDatasetTableName +} from '@fastgpt/global/core/dataset/constant'; + +import { authCert } from '@fastgpt/service/support/permission/auth/common'; +import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema'; + +let success = 0; +/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */ +export default async function handler(req: NextApiRequest, res: NextApiResponse) { + try { + const { limit = 50 } = req.body as { limit: number }; + await authCert({ req, authRoot: true }); + await connectToDatabase(); + success = 0; + + jsonRes(res, { + data: await init(limit) + }); + } catch (error) { + console.log(error); + + jsonRes(res, { + code: 500, + error + }); + } +} + +type PgItemType = { + id: string; + q: string; + a: string; + dataset_id: string; + collection_id: string; + data_id: string; +}; + +async function init(limit: number): Promise { + const { rows: idList } = await PgClient.query<{ id: string }>( + `SELECT id FROM ${PgDatasetTableName} WHERE inited=1` + ); + + console.log('totalCount', idList.length); + + await delay(2000); + + if (idList.length === 0) return; + + for (let i = 0; i < limit; i++) { + initData(i); + } + + async function initData(index: number): Promise { + const dataId = idList[index]?.id; + if (!dataId) { + console.log('done'); + return; + } + // get limit data where data_id is null + const { rows } = await PgClient.query( + `SELECT id,q,a,dataset_id,collection_id,data_id FROM ${PgDatasetTableName} WHERE id=${dataId};` + ); + const data = rows[0]; + if (!data) { + console.log('done'); + return; + } + + try { + // update mongo data and update inited + await MongoDatasetData.findByIdAndUpdate(data.data_id, { + q: data.q, + a: data.a, + indexes: [ + { + defaultIndex: !data.a, + type: data.a ? DatasetDataIndexTypeEnum.qa : DatasetDataIndexTypeEnum.chunk, + dataId: data.id, + text: data.q + } + ] + }); + // update pg data_id + await PgClient.query(`UPDATE ${PgDatasetTableName} SET inited=0 WHERE id=${dataId};`); + + return initData(index + limit); + } catch (error) { + console.log(error); + console.log(data); + await delay(500); + return initData(index); + } + } +} diff --git a/projects/app/src/pages/api/core/dataset/allDataset.ts b/projects/app/src/pages/api/core/dataset/allDataset.ts index ab3b5ed2f..30e1861d1 100644 --- a/projects/app/src/pages/api/core/dataset/allDataset.ts +++ b/projects/app/src/pages/api/core/dataset/allDataset.ts @@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; import { MongoDataset } from '@fastgpt/service/core/dataset/schema'; -import { getVectorModel } from '@/service/core/ai/model'; +import { getQAModel, getVectorModel } from '@/service/core/ai/model'; import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d'; import { mongoRPermission } from '@fastgpt/global/support/permission/utils'; import { authUserRole } from '@fastgpt/service/support/permission/auth/user'; @@ -22,6 +22,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< const data = datasets.map((item) => ({ ...item.toJSON(), vectorModel: getVectorModel(item.vectorModel), + agentModel: getQAModel(item.agentModel), canWrite: String(item.tmbId) === tmbId, isOwner: teamOwner || String(item.tmbId) === tmbId })); diff --git a/projects/app/src/pages/api/core/dataset/checkExportLimit.ts b/projects/app/src/pages/api/core/dataset/checkExportLimit.ts new file mode 100644 index 000000000..0ec2e4d1f --- /dev/null +++ b/projects/app/src/pages/api/core/dataset/checkExportLimit.ts @@ -0,0 +1,73 @@ +import type { NextApiRequest, NextApiResponse } from 'next'; +import { jsonRes } from '@fastgpt/service/common/response'; +import { connectToDatabase } from '@/service/mongo'; +import { MongoUser } from '@fastgpt/service/support/user/schema'; +import { addLog } from '@fastgpt/service/common/mongo/controller'; +import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; +import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema'; +import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller'; + +export default async function handler(req: NextApiRequest, res: NextApiResponse) { + try { + await connectToDatabase(); + let { datasetId } = req.query as { + datasetId: string; + }; + + if (!datasetId) { + throw new Error('缺少参数'); + } + + // 凭证校验 + const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' }); + + await limitCheck({ + datasetId, + userId + }); + + jsonRes(res); + } catch (err) { + res.status(500); + jsonRes(res, { + code: 500, + error: err + }); + } +} + +export async function limitCheck({ datasetId, userId }: { datasetId: string; userId: string }) { + const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId); + + const limitMinutesAgo = new Date( + Date.now() - (global.feConfigs?.limit?.exportLimitMinutes || 0) * 60 * 1000 + ); + + // auth export times + const authTimes = await MongoUser.findOne( + { + _id: userId, + $or: [ + { 'limit.exportKbTime': { $exists: false } }, + { 'limit.exportKbTime': { $lte: limitMinutesAgo } } + ] + }, + '_id limit' + ); + + if (!authTimes) { + const minutes = `${global.feConfigs?.limit?.exportLimitMinutes || 0} 分钟`; + return Promise.reject(`上次导出未到 ${minutes},每 ${minutes}仅可导出一次。`); + } + + // auth max data + const total = await MongoDatasetData.countDocuments({ + datasetId: { $in: exportIds } + }); + + addLog.info(`export datasets: ${datasetId}`, { total }); + + if (total > 100000) { + return Promise.reject('数据量超出 10 万,无法导出'); + } +} diff --git a/projects/app/src/pages/api/core/dataset/create.ts b/projects/app/src/pages/api/core/dataset/create.ts index 86ad28d06..30547e1ce 100644 --- a/projects/app/src/pages/api/core/dataset/create.ts +++ b/projects/app/src/pages/api/core/dataset/create.ts @@ -9,7 +9,8 @@ import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/use export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { await connectToDatabase(); - const { name, tags, avatar, vectorModel, parentId, type } = req.body as CreateDatasetParams; + const { name, tags, avatar, vectorModel, agentModel, parentId, type } = + req.body as CreateDatasetParams; // 凭证校验 const { teamId, tmbId } = await authUserNotVisitor({ req, authToken: true }); @@ -20,6 +21,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< tmbId, tags, vectorModel, + agentModel, avatar, parentId: parentId || null, type diff --git a/projects/app/src/pages/api/core/dataset/data/pushData.ts b/projects/app/src/pages/api/core/dataset/data/pushData.ts index 996d0c2ac..7ed069bff 100644 --- a/projects/app/src/pages/api/core/dataset/data/pushData.ts +++ b/projects/app/src/pages/api/core/dataset/data/pushData.ts @@ -10,7 +10,7 @@ import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken'; import type { PushDataResponse } from '@/global/core/api/datasetRes.d'; import type { PushDatasetDataProps } from '@/global/core/dataset/api.d'; import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api'; -import { getVectorModel } from '@/service/core/ai/model'; +import { getQAModel, getVectorModel } from '@/service/core/ai/model'; import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset'; import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller'; @@ -63,24 +63,14 @@ export async function pushDataToDatasetCollection({ mode, prompt, billId -}: { teamId: string; tmbId: string } & PushDatasetDataProps): Promise { - // get dataset vector model - const { - datasetId: { _id: datasetId, vectorModel } - } = await getCollectionWithDataset(collectionId); - - const vectorModelData = getVectorModel(vectorModel); - - const modeMap = { - [TrainingModeEnum.chunk]: { - maxToken: vectorModelData.maxToken * 1.5, - model: vectorModelData.model - }, - [TrainingModeEnum.qa]: { - maxToken: global.qaModels[0].maxContext * 0.8, - model: global.qaModels[0].model - } - }; +}: { + teamId: string; + tmbId: string; +} & PushDatasetDataProps): Promise { + const { datasetId, model, maxToken } = await checkModelValid({ + mode, + collectionId + }); // filter repeat or equal content const set = new Set(); @@ -102,12 +92,13 @@ export async function pushDataToDatasetCollection({ // count q token const token = countPromptTokens(item.q); - if (token > modeMap[mode].maxToken) { + if (token > maxToken) { filterResult.overToken.push(item); return; } if (set.has(text)) { + console.log('repeat', item); filterResult.repeat.push(item); } else { filterResult.success.push(item); @@ -126,7 +117,7 @@ export async function pushDataToDatasetCollection({ billId, mode, prompt, - model: modeMap[mode].model, + model, q: item.q, a: item.a, indexes: item.indexes @@ -142,6 +133,44 @@ export async function pushDataToDatasetCollection({ }; } +export async function checkModelValid({ + mode, + collectionId +}: { + mode: `${TrainingModeEnum}`; + collectionId: string; +}) { + const { + datasetId: { _id: datasetId, vectorModel, agentModel } + } = await getCollectionWithDataset(collectionId); + + if (mode === TrainingModeEnum.chunk) { + if (!collectionId) return Promise.reject(`CollectionId is empty`); + const vectorModelData = getVectorModel(vectorModel); + if (!vectorModelData) { + return Promise.reject(`Model ${vectorModel} is inValid`); + } + return { + datasetId, + maxToken: vectorModelData.maxToken * 1.5, + model: vectorModelData.model + }; + } + + if (mode === TrainingModeEnum.qa) { + const qaModelData = getQAModel(agentModel); + if (!qaModelData) { + return Promise.reject(`Model ${agentModel} is inValid`); + } + return { + datasetId, + maxToken: qaModelData.maxContext * 0.8, + model: qaModelData.model + }; + } + return Promise.reject(`Mode ${mode} is inValid`); +} + export const config = { api: { bodyParser: { diff --git a/projects/app/src/pages/api/core/dataset/detail.ts b/projects/app/src/pages/api/core/dataset/detail.ts index a9a65b143..399a887c0 100644 --- a/projects/app/src/pages/api/core/dataset/detail.ts +++ b/projects/app/src/pages/api/core/dataset/detail.ts @@ -1,7 +1,7 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; -import { getVectorModel } from '@/service/core/ai/model'; +import { getQAModel, getVectorModel } from '@/service/core/ai/model'; import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d'; import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; @@ -28,6 +28,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< data: { ...dataset, vectorModel: getVectorModel(dataset.vectorModel), + agentModel: getQAModel(dataset.agentModel), canWrite, isOwner } diff --git a/projects/app/src/pages/api/core/dataset/exportAll.ts b/projects/app/src/pages/api/core/dataset/exportAll.ts index ebdd2ae2c..e05ddc8a4 100644 --- a/projects/app/src/pages/api/core/dataset/exportAll.ts +++ b/projects/app/src/pages/api/core/dataset/exportAll.ts @@ -1,5 +1,5 @@ import type { NextApiRequest, NextApiResponse } from 'next'; -import { jsonRes } from '@fastgpt/service/common/response'; +import { jsonRes, responseWriteController } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; import { MongoUser } from '@fastgpt/service/support/user/schema'; import { addLog } from '@fastgpt/service/common/mongo/controller'; @@ -8,6 +8,7 @@ import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema'; import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller'; import { Readable } from 'stream'; import type { Cursor } from '@fastgpt/service/common/mongo'; +import { limitCheck } from './checkExportLimit'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -23,39 +24,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< // 凭证校验 const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' }); - const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId); - - const limitMinutesAgo = new Date( - Date.now() - (global.feConfigs?.limit?.exportLimitMinutes || 0) * 60 * 1000 - ); - - // auth export times - const authTimes = await MongoUser.findOne( - { - _id: userId, - $or: [ - { 'limit.exportKbTime': { $exists: false } }, - { 'limit.exportKbTime': { $lte: limitMinutesAgo } } - ] - }, - '_id limit' - ); - - if (!authTimes) { - const minutes = `${global.feConfigs?.limit?.exportLimitMinutes || 0} 分钟`; - throw new Error(`上次导出未到 ${minutes},每 ${minutes}仅可导出一次。`); - } - - // auth max data - const total = await MongoDatasetData.countDocuments({ - datasetId: { $in: exportIds } + await limitCheck({ + userId, + datasetId }); - addLog.info(`export datasets: ${datasetId}`, { total }); - - if (total > 100000) { - throw new Error('数据量超出 10 万,无法导出'); - } + const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId); res.setHeader('Content-Type', 'text/csv; charset=utf-8;'); res.setHeader('Content-Disposition', 'attachment; filename=dataset.csv; '); @@ -72,35 +46,27 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< 'q a' ).cursor(); - function cursorToReadableStream(cursor: Cursor) { - const readable = new Readable({ - objectMode: true, - read() {} + const write = responseWriteController({ + res, + readStream: cursor + }); + + write(`\uFEFFindex,content`); + + cursor.on('data', (doc) => { + const q = doc.q.replace(/"/g, '""') || ''; + const a = doc.a.replace(/"/g, '""') || ''; + + write(`\n"${q}","${a}"`); + }); + + cursor.on('end', async () => { + cursor.close(); + res.end(); + await MongoUser.findByIdAndUpdate(userId, { + 'limit.exportKbTime': new Date() }); - - readable.push(`\uFEFFindex,content`); - - cursor.on('data', (doc) => { - const q = doc.q.replace(/"/g, '""') || ''; - const a = doc.a.replace(/"/g, '""') || ''; - - readable.push(`\n"${q}","${a}"`); - }); - - cursor.on('end', async () => { - readable.push(null); - cursor.close(); - await MongoUser.findByIdAndUpdate(userId, { - 'limit.exportKbTime': new Date() - }); - }); - - return readable; - } - - // @ts-ignore - const stream = cursorToReadableStream(cursor); - stream.pipe(res); + }); } catch (err) { res.status(500); jsonRes(res, { diff --git a/projects/app/src/pages/api/core/dataset/list.ts b/projects/app/src/pages/api/core/dataset/list.ts index f8dd47dfe..8d0c4319f 100644 --- a/projects/app/src/pages/api/core/dataset/list.ts +++ b/projects/app/src/pages/api/core/dataset/list.ts @@ -1,7 +1,7 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; -import { getVectorModel } from '@/service/core/ai/model'; +import { getQAModel, getVectorModel } from '@/service/core/ai/model'; import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d'; import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant'; import { MongoDataset } from '@fastgpt/service/core/dataset/schema'; @@ -28,6 +28,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< datasets.map(async (item) => ({ ...item.toJSON(), vectorModel: getVectorModel(item.vectorModel), + agentModel: getQAModel(item.agentModel), canWrite, isOwner: teamOwner || String(item.tmbId) === tmbId })) diff --git a/projects/app/src/pages/api/core/dataset/update.ts b/projects/app/src/pages/api/core/dataset/update.ts index 670f4092f..42bbefe7a 100644 --- a/projects/app/src/pages/api/core/dataset/update.ts +++ b/projects/app/src/pages/api/core/dataset/update.ts @@ -8,7 +8,8 @@ import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { await connectToDatabase(); - const { id, parentId, name, avatar, tags, permission } = req.body as DatasetUpdateParams; + const { id, parentId, name, avatar, tags, permission, agentModel } = + req.body as DatasetUpdateParams; if (!id) { throw new Error('缺少参数'); @@ -26,7 +27,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< ...(name && { name }), ...(avatar && { avatar }), ...(tags && { tags }), - ...(permission && { permission }) + ...(permission && { permission }), + ...(agentModel && { agentModel: agentModel.model }) } ); diff --git a/projects/app/src/pages/api/support/wallet/bill/createTrainingBill.ts b/projects/app/src/pages/api/support/wallet/bill/createTrainingBill.ts index ffe270e40..9660a50d9 100644 --- a/projects/app/src/pages/api/support/wallet/bill/createTrainingBill.ts +++ b/projects/app/src/pages/api/support/wallet/bill/createTrainingBill.ts @@ -5,15 +5,17 @@ import { MongoBill } from '@fastgpt/service/support/wallet/bill/schema'; import { authCert } from '@fastgpt/service/support/permission/auth/common'; import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants'; import { CreateTrainingBillProps } from '@fastgpt/global/support/wallet/bill/api.d'; +import { getQAModel, getVectorModel } from '@/service/core/ai/model'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { await connectToDatabase(); - const { name } = req.body as CreateTrainingBillProps; + const { name, vectorModel, agentModel } = req.body as CreateTrainingBillProps; const { teamId, tmbId } = await authCert({ req, authToken: true, authApiKey: true }); - const qaModel = global.qaModels[0]; + const vectorModelData = getVectorModel(vectorModel); + const agentModelData = getQAModel(agentModel); const { _id } = await MongoBill.create({ teamId, @@ -23,13 +25,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) list: [ { moduleName: '索引生成', - model: 'embedding', + model: vectorModelData.name, amount: 0, tokenLen: 0 }, { moduleName: 'QA 拆分', - model: qaModel?.name, + model: agentModelData.name, amount: 0, tokenLen: 0 } diff --git a/projects/app/src/pages/dataset/detail/components/DataCard.tsx b/projects/app/src/pages/dataset/detail/components/DataCard.tsx index 578d5a3ca..ec81738fb 100644 --- a/projects/app/src/pages/dataset/detail/components/DataCard.tsx +++ b/projects/app/src/pages/dataset/detail/components/DataCard.tsx @@ -170,7 +170,7 @@ const DataCard = () => { {datasetDataList.map((item) => ( diff --git a/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx b/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx index 2c7b33fb2..2da84e009 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/ImportModal.tsx @@ -34,10 +34,10 @@ const ImportData = ({ const theme = useTheme(); const { datasetDetail } = useDatasetStore(); const [importType, setImportType] = useState<`${ImportTypeEnum}`>(ImportTypeEnum.chunk); + const vectorModel = datasetDetail.vectorModel; + const agentModel = datasetDetail.agentModel; const typeMap = useMemo(() => { - const vectorModel = datasetDetail.vectorModel; - const qaModel = qaModelList[0]; const map = { [ImportTypeEnum.chunk]: { defaultChunkLen: vectorModel?.defaultToken || 500, @@ -45,8 +45,8 @@ const ImportData = ({ mode: TrainingModeEnum.chunk }, [ImportTypeEnum.qa]: { - defaultChunkLen: qaModel?.maxContext * 0.5 || 8000, - unitPrice: qaModel?.price || 3, + defaultChunkLen: agentModel?.maxContext * 0.6 || 9000, + unitPrice: agentModel?.price || 3, mode: TrainingModeEnum.qa }, [ImportTypeEnum.csv]: { @@ -56,7 +56,13 @@ const ImportData = ({ } }; return map[importType]; - }, [datasetDetail.vectorModel, importType]); + }, [ + agentModel?.maxContext, + agentModel?.price, + importType, + vectorModel?.defaultToken, + vectorModel?.price + ]); const TitleStyle: BoxProps = { fontWeight: 'bold', @@ -104,8 +110,10 @@ const ImportData = ({ diff --git a/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx b/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx index 0046a25ee..99f0719bf 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx @@ -90,6 +90,8 @@ const Provider = ({ parentId, unitPrice, mode, + vectorModel, + agentModel, defaultChunkLen = 500, importType, onUploadSuccess, @@ -99,6 +101,8 @@ const Provider = ({ parentId: string; unitPrice: number; mode: `${TrainingModeEnum}`; + vectorModel: string; + agentModel: string; defaultChunkLen: number; importType: `${ImportTypeEnum}`; onUploadSuccess: () => void; @@ -132,7 +136,9 @@ const Provider = ({ const chunks = file.chunks; // create training bill const billId = await postCreateTrainingBill({ - name: t('dataset.collections.Create Training Data', { filename: file.filename }) + name: t('dataset.collections.Create Training Data', { filename: file.filename }), + vectorModel, + agentModel }); // create a file collection and training bill const collectionId = await postDatasetCollection({ diff --git a/projects/app/src/pages/dataset/detail/components/Import/QA.tsx b/projects/app/src/pages/dataset/detail/components/Import/QA.tsx index 2d0f2b06c..d6bc531ad 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/QA.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/QA.tsx @@ -13,8 +13,8 @@ const fileExtension = '.txt, .doc, .docx, .pdf, .md'; const QAImport = () => { const { datasetDetail } = useDatasetStore(); - const vectorModel = datasetDetail.vectorModel; - const unitPrice = vectorModel?.price || 0.2; + const agentModel = datasetDetail.agentModel; + const unitPrice = agentModel?.price || 3; const { successChunks, diff --git a/projects/app/src/pages/dataset/detail/components/Info.tsx b/projects/app/src/pages/dataset/detail/components/Info.tsx index de798df78..9fadd3230 100644 --- a/projects/app/src/pages/dataset/detail/components/Info.tsx +++ b/projects/app/src/pages/dataset/detail/components/Info.tsx @@ -9,7 +9,7 @@ import React, { import { useRouter } from 'next/router'; import { Box, Flex, Button, FormControl, IconButton, Input } from '@chakra-ui/react'; import { QuestionOutlineIcon, DeleteIcon } from '@chakra-ui/icons'; -import { delDatasetById, putDatasetById } from '@/web/core/dataset/api'; +import { delDatasetById } from '@/web/core/dataset/api'; import { useSelectFile } from '@/web/common/file/hooks/useSelectFile'; import { useToast } from '@/web/common/hooks/useToast'; import { useDatasetStore } from '@/web/core/dataset/store/dataset'; @@ -22,6 +22,8 @@ import Tag from '@/components/Tag'; import MyTooltip from '@/components/MyTooltip'; import { useTranslation } from 'next-i18next'; import PermissionRadio from '@/components/support/permission/Radio'; +import MySelect from '@/components/Select'; +import { qaModelList } from '@/web/common/system/staticData'; export interface ComponentRef { initInput: (tags: string) => void; @@ -50,7 +52,7 @@ const Info = ( multiple: false }); - const { datasetDetail, loadDatasetDetail, loadDatasets } = useDatasetStore(); + const { datasetDetail, loadDatasetDetail, loadDatasets, updateDataset } = useDatasetStore(); /* 点击删除 */ const onclickDelKb = useCallback(async () => { @@ -76,11 +78,10 @@ const Info = ( async (data: DatasetItemType) => { setBtnLoading(true); try { - await putDatasetById({ + await updateDataset({ id: datasetId, ...data }); - await loadDatasetDetail(datasetId, true); toast({ title: '更新成功', status: 'success' @@ -94,7 +95,7 @@ const Info = ( } setBtnLoading(false); }, - [loadDatasetDetail, datasetId, loadDatasets, toast] + [updateDataset, datasetId, loadDatasetDetail, toast, loadDatasets] ); const saveSubmitError = useCallback(() => { // deep search message @@ -194,6 +195,27 @@ const Info = ( })} /> + + + {t('dataset.Agent Model')} + + + ({ + label: item.name, + value: item.model + }))} + onchange={(e) => { + const agentModel = qaModelList.find((item) => item.model === e); + if (!agentModel) return; + setValue('agentModel', agentModel); + setRefresh((state) => !state); + }} + /> + + 标签 diff --git a/projects/app/src/pages/dataset/detail/components/InputDataModal.tsx b/projects/app/src/pages/dataset/detail/components/InputDataModal.tsx index 5dbb6bed1..7bb9f8c53 100644 --- a/projects/app/src/pages/dataset/detail/components/InputDataModal.tsx +++ b/projects/app/src/pages/dataset/detail/components/InputDataModal.tsx @@ -196,7 +196,7 @@ const InputDataModal = ({ const loading = useMemo(() => isImporting || isUpdating, [isImporting, isUpdating]); return ( - + diff --git a/projects/app/src/pages/dataset/list/component/CreateModal.tsx b/projects/app/src/pages/dataset/list/component/CreateModal.tsx index 8ddcab58b..12da57e55 100644 --- a/projects/app/src/pages/dataset/list/component/CreateModal.tsx +++ b/projects/app/src/pages/dataset/list/component/CreateModal.tsx @@ -15,10 +15,12 @@ import { postCreateDataset } from '@/web/core/dataset/api'; import type { CreateDatasetParams } from '@/global/core/dataset/api.d'; import MySelect from '@/components/Select'; import { QuestionOutlineIcon } from '@chakra-ui/icons'; -import { vectorModelList } from '@/web/common/system/staticData'; +import { vectorModelList, qaModelList } from '@/web/common/system/staticData'; import Tag from '@/components/Tag'; +import { useTranslation } from 'next-i18next'; const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: string }) => { + const { t } = useTranslation(); const [refresh, setRefresh] = useState(false); const { toast } = useToast(); const router = useRouter(); @@ -29,6 +31,7 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st name: '', tags: '', vectorModel: vectorModelList[0].model, + agentModel: qaModelList[0].model, type: 'dataset', parentId } @@ -76,7 +79,7 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st }); return ( - + 创建一个知识库 @@ -106,7 +109,7 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st /> - 索引模型 + 索引模型 void; parentId?: st /> + + {t('dataset.Agent Model')} + + ({ + label: item.name, + value: item.model + }))} + onchange={(e) => { + setValue('agentModel', e); + setRefresh((state) => !state); + }} + /> + + - + 标签 diff --git a/projects/app/src/pages/dataset/list/index.tsx b/projects/app/src/pages/dataset/list/index.tsx index 5366c18aa..a8c27b16f 100644 --- a/projects/app/src/pages/dataset/list/index.tsx +++ b/projects/app/src/pages/dataset/list/index.tsx @@ -20,7 +20,8 @@ import { delDatasetById, getDatasetPaths, putDatasetById, - postCreateDataset + postCreateDataset, + getCheckExportLimit } from '@/web/core/dataset/api'; import { useTranslation } from 'next-i18next'; import Avatar from '@/components/Avatar'; @@ -38,6 +39,7 @@ import { useDrag } from '@/web/common/hooks/useDrag'; import { useUserStore } from '@/web/support/user/useUserStore'; import PermissionIconText from '@/components/support/permission/IconText'; import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant'; +import { DatasetItemType } from '@fastgpt/global/core/dataset/type'; const CreateModal = dynamic(() => import('./component/CreateModal'), { ssr: false }); const MoveModal = dynamic(() => import('./component/MoveModal'), { ssr: false }); @@ -89,6 +91,23 @@ const Kb = () => { successToast: t('common.Delete Success'), errorToast: t('dataset.Delete Dataset Error') }); + // check export limit + const { mutate: exportDataset } = useRequest({ + mutationFn: async (dataset: DatasetItemType) => { + setLoading(true); + await getCheckExportLimit(dataset._id); + const a = document.createElement('a'); + a.href = `/api/core/dataset/exportAll?datasetId=${dataset._id}`; + a.download = `${dataset.name}.csv`; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + }, + onSettled() { + setLoading(false); + }, + errorToast: t('dataset.Export Dataset Limit Error') + }); const { data, refetch } = useQuery(['loadDataset', parentId], () => { return Promise.all([loadDatasets(parentId), getDatasetPaths(parentId)]); @@ -371,12 +390,7 @@ const Kb = () => { ), onClick: () => { - const a = document.createElement('a'); - a.href = `/api/core/dataset/exportAll?datasetId=${dataset._id}`; - a.download = `${dataset.name}.csv`; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); + exportDataset(dataset); } }, { diff --git a/projects/app/src/service/events/generateQA.ts b/projects/app/src/service/events/generateQA.ts index b03b7bc80..fc28cf298 100644 --- a/projects/app/src/service/events/generateQA.ts +++ b/projects/app/src/service/events/generateQA.ts @@ -109,6 +109,7 @@ export async function generateQA(): Promise { try { const startTime = Date.now(); + const model = data.model ?? global.qaModels[0].model; // request LLM to get QA const messages: ChatMessageItemType[] = [ @@ -122,9 +123,10 @@ export async function generateQA(): Promise { }) } ]; - const ai = getAIApi(undefined, 480000); + + const ai = getAIApi(undefined, 600000); const chatResponse = await ai.chat.completions.create({ - model: global.qaModels[0].model, + model, temperature: 0.01, messages, stream: false @@ -147,8 +149,11 @@ export async function generateQA(): Promise { // delete data from training await MongoDatasetTraining.findByIdAndDelete(data._id); - console.log(`split result length: `, qaArr.length); - console.log('生成QA成功,time:', `${(Date.now() - startTime) / 1000}s`); + addLog.info(`QA Training Finish`, { + time: `${(Date.now() - startTime) / 1000}s`, + splitLength: qaArr.length, + usage: chatResponse.usage + }); // add bill if (qaArr.length > 0) { @@ -156,7 +161,8 @@ export async function generateQA(): Promise { teamId: data.teamId, tmbId: data.tmbId, totalTokens, - billId: data.billId + billId: data.billId, + model }); } else { addLog.info(`QA result 0:`, { answer }); diff --git a/projects/app/src/service/support/wallet/bill/push.ts b/projects/app/src/service/support/wallet/bill/push.ts index 28a77e410..4efa32f95 100644 --- a/projects/app/src/service/support/wallet/bill/push.ts +++ b/projects/app/src/service/support/wallet/bill/push.ts @@ -1,5 +1,5 @@ import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants'; -import { getAudioSpeechModel } from '@/service/core/ai/model'; +import { getAudioSpeechModel, getQAModel } from '@/service/core/ai/model'; import type { ChatHistoryItemResType } from '@fastgpt/global/core/chat/api.d'; import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools'; import { addLog } from '@fastgpt/service/common/mongo/controller'; @@ -9,10 +9,16 @@ import { POST } from '@fastgpt/service/common/api/plusRequest'; export function createBill(data: CreateBillProps) { if (!global.systemEnv.pluginBaseUrl) return; + if (data.total === 0) { + addLog.info('0 Bill', data); + } POST('/support/wallet/bill/createBill', data); } export function concatBill(data: ConcatBillProps) { if (!global.systemEnv.pluginBaseUrl) return; + if (data.total === 0) { + addLog.info('0 Bill', data); + } POST('/support/wallet/bill/concatBill', data); } @@ -59,18 +65,18 @@ export const pushChatBill = ({ export const pushQABill = async ({ teamId, tmbId, + model, totalTokens, billId }: { teamId: string; tmbId: string; + model: string; totalTokens: number; billId: string; }) => { - addLog.info('splitData generate success', { totalTokens }); - // 获取模型单价格 - const unitPrice = global.qaModels?.[0]?.price || 3; + const unitPrice = getQAModel(model).price; // 计算价格 const total = unitPrice * totalTokens; diff --git a/projects/app/src/web/core/dataset/api.ts b/projects/app/src/web/core/dataset/api.ts index 5ee65ebaa..d5a1635b8 100644 --- a/projects/app/src/web/core/dataset/api.ts +++ b/projects/app/src/web/core/dataset/api.ts @@ -48,6 +48,9 @@ export const putDatasetById = (data: DatasetUpdateParams) => PUT(`/core/dataset/ export const delDatasetById = (id: string) => DELETE(`/core/dataset/delete?id=${id}`); +export const getCheckExportLimit = (datasetId: string) => + GET(`/core/dataset/checkExportLimit`, { datasetId }); + /* =========== search test ============ */ export const postSearchText = (data: SearchTestProps) => POST(`/core/dataset/searchTest`, data);