mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 21:13:50 +00:00
v4.6-4 (#473)
This commit is contained in:
BIN
docSite/assets/imgs/datasetSetting1.png
Normal file
BIN
docSite/assets/imgs/datasetSetting1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 54 KiB |
@@ -50,5 +50,6 @@ curl --location --request POST 'https://{{host}}/api/admin/initv46-2' \
|
|||||||
1. 新增 - 团队空间
|
1. 新增 - 团队空间
|
||||||
2. 新增 - 多路向量(多个向量映射一组数据)
|
2. 新增 - 多路向量(多个向量映射一组数据)
|
||||||
3. 新增 - tts语音
|
3. 新增 - tts语音
|
||||||
4. 线上环境新增 - ReRank向量召回,提高召回精度
|
4. 新增 - 支持知识库配置文本预处理模型
|
||||||
5. 优化 - 知识库导出,可直接触发流下载,无需等待转圈圈
|
5. 线上环境新增 - ReRank向量召回,提高召回精度
|
||||||
|
6. 优化 - 知识库导出,可直接触发流下载,无需等待转圈圈
|
||||||
|
@@ -1,10 +1,10 @@
|
|||||||
---
|
---
|
||||||
title: '定价'
|
title: '线上版定价'
|
||||||
description: 'FastGPT 的定价'
|
description: 'FastGPT 线上版定价'
|
||||||
icon: 'currency_yen'
|
icon: 'currency_yen'
|
||||||
draft: false
|
draft: false
|
||||||
toc: true
|
toc: true
|
||||||
weight: 10
|
weight: 11
|
||||||
---
|
---
|
||||||
|
|
||||||
## Tokens 说明
|
## Tokens 说明
|
||||||
@@ -15,7 +15,7 @@ weight: 10
|
|||||||
|
|
||||||
## FastGPT 线上计费
|
## FastGPT 线上计费
|
||||||
|
|
||||||
目前,FastGPT 线上计费也仅按 Tokens 使用数量为准。以下是详细的计费表(最新定价以线上表格为准,可在点击充值后实时获取):
|
使用: [https://fastgpt.run](https://fastgpt.run) 或 [https://ai.fastgpt.in](https://ai.fastgpt.in) 只需仅按 Tokens 使用数量扣费即可。可在 账号-使用记录 中查看具体使用情况,以下是详细的计费表(最新定价以线上表格为准,可在点击充值后实时获取):
|
||||||
|
|
||||||
{{< table "table-hover table-striped-columns" >}}
|
{{< table "table-hover table-striped-columns" >}}
|
||||||
| 计费项 | 价格: 元/ 1K tokens(包含上下文) |
|
| 计费项 | 价格: 元/ 1K tokens(包含上下文) |
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
---
|
---
|
||||||
title: "知识库结构讲解"
|
title: "知识库结构讲解"
|
||||||
description: "本节会介绍 FastGPT 知识库结构设计,理解其 QA 的存储格式和检索格式,以便更好的构建知识库。这篇介绍主要以使用为主,详细原理不多介绍。"
|
description: "本节会详细介绍 FastGPT 知识库结构设计,理解其 QA 的存储格式和多向量映射,以便更好的构建知识库。这篇介绍主要以使用为主,详细原理不多介绍。"
|
||||||
icon: "dataset"
|
icon: "dataset"
|
||||||
draft: false
|
draft: false
|
||||||
toc: true
|
toc: true
|
||||||
@@ -25,13 +25,21 @@ FastGPT 采用了 RAG 中的 Embedding 方案构建知识库,要使用好 Fast
|
|||||||
|
|
||||||
FastGPT 采用了 `PostgresSQL` 的 `PG Vector` 插件作为向量检索器,索引为`HNSW`。且`PostgresSQL`仅用于向量检索,`MongoDB`用于其他数据的存取。
|
FastGPT 采用了 `PostgresSQL` 的 `PG Vector` 插件作为向量检索器,索引为`HNSW`。且`PostgresSQL`仅用于向量检索,`MongoDB`用于其他数据的存取。
|
||||||
|
|
||||||
在`PostgresSQL`的表中,设置一个 `index` 字段用于存储向量、一个 `q` 字段用于存储向量对应的内容,以及一个 `a` 字段用于检索映射。之所以取字段为 `qa` 是由于一些历史缘故,无需完全解为 “问答对” 的格式。在实际使用过程中,可以利用`q`和`a`的组合,对检索后的内容做进一步的声明,提高大模型的理解力(注意,这里不直接提高搜索精度)。
|
在`PostgresSQL`的表中,设置一个 `index` 字段用于存储向量,以及一个`data_id`用于在`MongoDB`中寻找对应的映射值。多个`index`可以对应一组`data_id`,也就是说,一组向量可以对应多组数据。在进行检索时,相同数据会进行合并。
|
||||||
|
|
||||||
目前,提高向量搜索的精度,主要可以通过几种途径:
|

|
||||||
|
|
||||||
1. 精简`q`的内容,减少向量内容的长度:当`q`的内容更少,更准确时,检索精度自然会提高。但与此同时,会牺牲一定的检索范围,适合答案较为严格的场景。
|
## 多向量的目的和使用方式
|
||||||
2. 更好分词分段:当一段话的结构和语义是完整的,并且是单一的,精度也会提高。因此,许多系统都会优化分词器,尽可能的保障每组数据的完整性。
|
|
||||||
3. 多样性文本:为一段内容增加关键词、摘要、相似问题等描述性信息,可以使得该内容的向量具有更大的检索覆盖范围。
|
在一组数据中,如果我们希望它尽可能长,但语义又要在向量中尽可能提现,则没有办法通过一组向量来表示。因此,我们采用了多向量映射的方式,将一组数据映射到多组向量中,从而保障数据的完整性和语义的提现。
|
||||||
|
|
||||||
|
你可以为一组较长的文本,添加多组向量,从而在检索时,只要其中一组向量被检索到,该数据也将被召回。
|
||||||
|
|
||||||
|
## 提高向量搜索精度的方法
|
||||||
|
|
||||||
|
1. 更好分词分段:当一段话的结构和语义是完整的,并且是单一的,精度也会提高。因此,许多系统都会优化分词器,尽可能的保障每组数据的完整性。
|
||||||
|
2. 精简`index`的内容,减少向量内容的长度:当`index`的内容更少,更准确时,检索精度自然会提高。但与此同时,会牺牲一定的检索范围,适合答案较为严格的场景。
|
||||||
|
3. 丰富`index`的数量,可以为同一个`chunk`内容增加多组`index`。
|
||||||
4. 优化检索词:在实际使用过程中,用户的问题通常是模糊的或是缺失的,并不一定是完整清晰的问题。因此优化用户的问题(检索词)很大程度上也可以提高精度。
|
4. 优化检索词:在实际使用过程中,用户的问题通常是模糊的或是缺失的,并不一定是完整清晰的问题。因此优化用户的问题(检索词)很大程度上也可以提高精度。
|
||||||
5. 微调向量模型:由于市面上直接使用的向量模型都是通用型模型,在特定领域的检索精度并不高,因此微调向量模型可以很大程度上提高专业领域的检索效果。
|
5. 微调向量模型:由于市面上直接使用的向量模型都是通用型模型,在特定领域的检索精度并不高,因此微调向量模型可以很大程度上提高专业领域的检索效果。
|
||||||
|
|
||||||
|
@@ -63,8 +63,8 @@ export const splitText2Chunks = (props: { text: string; maxLen: number; overlapL
|
|||||||
let chunks: string[] = [];
|
let chunks: string[] = [];
|
||||||
for (let i = 0; i < splitTexts.length; i++) {
|
for (let i = 0; i < splitTexts.length; i++) {
|
||||||
let text = splitTexts[i];
|
let text = splitTexts[i];
|
||||||
let chunkToken = countPromptTokens(lastChunk, '');
|
let chunkToken = lastChunk.length;
|
||||||
const textToken = countPromptTokens(text, '');
|
const textToken = text.length;
|
||||||
|
|
||||||
// next chunk is too large / new chunk is too large(The current chunk must be smaller than maxLen)
|
// next chunk is too large / new chunk is too large(The current chunk must be smaller than maxLen)
|
||||||
if (textToken >= maxLen || chunkToken + textToken > maxLen * 1.4) {
|
if (textToken >= maxLen || chunkToken + textToken > maxLen * 1.4) {
|
||||||
|
6
packages/global/core/dataset/type.d.ts
vendored
6
packages/global/core/dataset/type.d.ts
vendored
@@ -1,4 +1,4 @@
|
|||||||
import type { VectorModelItemType } from '../../core/ai/model.d';
|
import type { LLMModelItemType, VectorModelItemType } from '../../core/ai/model.d';
|
||||||
import { PermissionTypeEnum } from '../../support/permission/constant';
|
import { PermissionTypeEnum } from '../../support/permission/constant';
|
||||||
import { PushDatasetDataChunkProps } from './api';
|
import { PushDatasetDataChunkProps } from './api';
|
||||||
import {
|
import {
|
||||||
@@ -19,6 +19,7 @@ export type DatasetSchemaType = {
|
|||||||
avatar: string;
|
avatar: string;
|
||||||
name: string;
|
name: string;
|
||||||
vectorModel: string;
|
vectorModel: string;
|
||||||
|
agentModel: string;
|
||||||
tags: string[];
|
tags: string[];
|
||||||
type: `${DatasetTypeEnum}`;
|
type: `${DatasetTypeEnum}`;
|
||||||
permission: `${PermissionTypeEnum}`;
|
permission: `${PermissionTypeEnum}`;
|
||||||
@@ -84,8 +85,9 @@ export type CollectionWithDatasetType = Omit<DatasetCollectionSchemaType, 'datas
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* ================= dataset ===================== */
|
/* ================= dataset ===================== */
|
||||||
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel'> & {
|
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel' | 'agentModel'> & {
|
||||||
vectorModel: VectorModelItemType;
|
vectorModel: VectorModelItemType;
|
||||||
|
agentModel: LLMModelItemType;
|
||||||
isOwner: boolean;
|
isOwner: boolean;
|
||||||
canWrite: boolean;
|
canWrite: boolean;
|
||||||
};
|
};
|
||||||
|
2
packages/global/support/wallet/bill/api.d.ts
vendored
2
packages/global/support/wallet/bill/api.d.ts
vendored
@@ -3,6 +3,8 @@ import { BillListItemType } from './type';
|
|||||||
|
|
||||||
export type CreateTrainingBillProps = {
|
export type CreateTrainingBillProps = {
|
||||||
name: string;
|
name: string;
|
||||||
|
vectorModel?: string;
|
||||||
|
agentModel?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type ConcatBillProps = {
|
export type ConcatBillProps = {
|
||||||
|
@@ -61,7 +61,6 @@ const AppSchema = new Schema({
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
AppSchema.index({ updateTime: -1 });
|
AppSchema.index({ updateTime: -1 });
|
||||||
AppSchema.index({ 'share.collection': -1 });
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
}
|
}
|
||||||
|
@@ -69,7 +69,6 @@ const DatasetCollectionSchema = new Schema({
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
DatasetCollectionSchema.index({ datasetId: 1 });
|
DatasetCollectionSchema.index({ datasetId: 1 });
|
||||||
DatasetCollectionSchema.index({ userId: 1 });
|
|
||||||
DatasetCollectionSchema.index({ updateTime: -1 });
|
DatasetCollectionSchema.index({ updateTime: -1 });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
|
@@ -48,6 +48,11 @@ const DatasetSchema = new Schema({
|
|||||||
required: true,
|
required: true,
|
||||||
default: 'text-embedding-ada-002'
|
default: 'text-embedding-ada-002'
|
||||||
},
|
},
|
||||||
|
agentModel: {
|
||||||
|
type: String,
|
||||||
|
required: true,
|
||||||
|
default: 'gpt-3.5-turbo-16k'
|
||||||
|
},
|
||||||
type: {
|
type: {
|
||||||
type: String,
|
type: String,
|
||||||
enum: Object.keys(DatasetTypeMap),
|
enum: Object.keys(DatasetTypeMap),
|
||||||
|
@@ -95,7 +95,7 @@ const TrainingDataSchema = new Schema({
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
TrainingDataSchema.index({ lockTime: 1 });
|
TrainingDataSchema.index({ lockTime: 1 });
|
||||||
TrainingDataSchema.index({ userId: 1 });
|
TrainingDataSchema.index({ datasetId: 1 });
|
||||||
TrainingDataSchema.index({ collectionId: 1 });
|
TrainingDataSchema.index({ collectionId: 1 });
|
||||||
TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 });
|
TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@@ -250,6 +250,7 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"dataset": {
|
"dataset": {
|
||||||
|
"Agent Model": "Learning Model",
|
||||||
"Chunk Length": "Chunk Length",
|
"Chunk Length": "Chunk Length",
|
||||||
"Confirm move the folder": "Confirm Move",
|
"Confirm move the folder": "Confirm Move",
|
||||||
"Confirm to delete the data": "Confirm to delete the data?",
|
"Confirm to delete the data": "Confirm to delete the data?",
|
||||||
@@ -259,6 +260,7 @@
|
|||||||
"Delete Dataset Error": "Delete dataset failed",
|
"Delete Dataset Error": "Delete dataset failed",
|
||||||
"Edit Folder": "Edit Folder",
|
"Edit Folder": "Edit Folder",
|
||||||
"Export": "Export",
|
"Export": "Export",
|
||||||
|
"Export Dataset Limit Error": "Export Data Error",
|
||||||
"File Input": "Import File",
|
"File Input": "Import File",
|
||||||
"File Size": "File Size",
|
"File Size": "File Size",
|
||||||
"Filename": "Filename",
|
"Filename": "Filename",
|
||||||
|
@@ -250,6 +250,7 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"dataset": {
|
"dataset": {
|
||||||
|
"Agent Model": "文件处理模型",
|
||||||
"Chunk Length": "数据总量",
|
"Chunk Length": "数据总量",
|
||||||
"Confirm move the folder": "确认移动到该目录",
|
"Confirm move the folder": "确认移动到该目录",
|
||||||
"Confirm to delete the data": "确认删除该数据?",
|
"Confirm to delete the data": "确认删除该数据?",
|
||||||
@@ -259,6 +260,7 @@
|
|||||||
"Delete Dataset Error": "删除知识库异常",
|
"Delete Dataset Error": "删除知识库异常",
|
||||||
"Edit Folder": "编辑文件夹",
|
"Edit Folder": "编辑文件夹",
|
||||||
"Export": "导出",
|
"Export": "导出",
|
||||||
|
"Export Dataset Limit Error": "导出数据失败",
|
||||||
"File Input": "文件导入",
|
"File Input": "文件导入",
|
||||||
"File Size": "文件大小",
|
"File Size": "文件大小",
|
||||||
"Filename": "文件名",
|
"Filename": "文件名",
|
||||||
|
@@ -1,3 +1,4 @@
|
|||||||
|
import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/model';
|
||||||
import type {
|
import type {
|
||||||
DatasetCollectionItemType,
|
DatasetCollectionItemType,
|
||||||
DatasetItemType
|
DatasetItemType
|
||||||
@@ -17,13 +18,8 @@ export const defaultDatasetDetail: DatasetItemType = {
|
|||||||
permission: 'private',
|
permission: 'private',
|
||||||
isOwner: false,
|
isOwner: false,
|
||||||
canWrite: false,
|
canWrite: false,
|
||||||
vectorModel: {
|
vectorModel: defaultVectorModels[0],
|
||||||
model: 'text-embedding-ada-002',
|
agentModel: defaultQAModels[0]
|
||||||
name: 'Embedding-2',
|
|
||||||
price: 0.2,
|
|
||||||
defaultToken: 500,
|
|
||||||
maxToken: 3000
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
export const defaultCollectionDetail: DatasetCollectionItemType = {
|
export const defaultCollectionDetail: DatasetCollectionItemType = {
|
||||||
@@ -43,7 +39,8 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
|
|||||||
name: '',
|
name: '',
|
||||||
tags: [],
|
tags: [],
|
||||||
permission: 'private',
|
permission: 'private',
|
||||||
vectorModel: 'text-embedding-ada-002'
|
vectorModel: defaultVectorModels[0].model,
|
||||||
|
agentModel: defaultQAModels[0].model
|
||||||
},
|
},
|
||||||
parentId: '',
|
parentId: '',
|
||||||
name: '',
|
name: '',
|
||||||
|
@@ -5,6 +5,7 @@ import type { SearchTestItemType } from '@/types/core/dataset';
|
|||||||
import { UploadChunkItemType } from '@fastgpt/global/core/dataset/type';
|
import { UploadChunkItemType } from '@fastgpt/global/core/dataset/type';
|
||||||
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
|
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||||
import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant';
|
import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant';
|
||||||
|
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||||
|
|
||||||
/* ===== dataset ===== */
|
/* ===== dataset ===== */
|
||||||
export type DatasetUpdateParams = {
|
export type DatasetUpdateParams = {
|
||||||
@@ -14,6 +15,7 @@ export type DatasetUpdateParams = {
|
|||||||
name?: string;
|
name?: string;
|
||||||
avatar?: string;
|
avatar?: string;
|
||||||
permission?: `${PermissionTypeEnum}`;
|
permission?: `${PermissionTypeEnum}`;
|
||||||
|
agentModel?: LLMModelItemType;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type SearchTestProps = {
|
export type SearchTestProps = {
|
||||||
|
@@ -9,6 +9,7 @@ export type CreateDatasetParams = {
|
|||||||
tags: string;
|
tags: string;
|
||||||
avatar: string;
|
avatar: string;
|
||||||
vectorModel?: string;
|
vectorModel?: string;
|
||||||
|
agentModel?: string;
|
||||||
type: `${DatasetTypeEnum}`;
|
type: `${DatasetTypeEnum}`;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -1,8 +1,8 @@
|
|||||||
export const Prompt_AgentQA = {
|
export const Prompt_AgentQA = {
|
||||||
prompt: `我会给你一段文本,{{theme}},学习它们,并整理学习成果,要求为:
|
prompt: `我会给你一段文本,{{theme}},学习它们,并整理学习成果,要求为:
|
||||||
1. 提出最多 25 个问题。
|
1. 提出问题并给出每个问题的答案。
|
||||||
2. 给出每个问题的答案。
|
2. 每个答案都要详细完整,给出相关原文描述,答案可以包含普通文字、链接、代码、表格、公示、媒体链接等 markdown 元素。
|
||||||
3. 答案要详细完整,答案可以包含普通文字、链接、代码、表格、公示、媒体链接等 markdown 元素。
|
3. 最多提出 30 个问题。
|
||||||
4. 按格式返回多个问题和答案:
|
4. 按格式返回多个问题和答案:
|
||||||
|
|
||||||
Q1: 问题。
|
Q1: 问题。
|
||||||
|
@@ -11,6 +11,8 @@ import {
|
|||||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||||
import { getUserDefaultTeam } from '@fastgpt/service/support/user/team/controller';
|
import { getUserDefaultTeam } from '@fastgpt/service/support/user/team/controller';
|
||||||
|
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||||
|
import { defaultQAModels } from '@fastgpt/global/core/ai/model';
|
||||||
|
|
||||||
let success = 0;
|
let success = 0;
|
||||||
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
|
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
|
||||||
@@ -41,6 +43,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
|||||||
|
|
||||||
await initPgData();
|
await initPgData();
|
||||||
|
|
||||||
|
await MongoDataset.updateMany(
|
||||||
|
{},
|
||||||
|
{
|
||||||
|
agentModel: defaultQAModels[0].model
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
jsonRes(res, {
|
jsonRes(res, {
|
||||||
data: await init(limit),
|
data: await init(limit),
|
||||||
message:
|
message:
|
||||||
@@ -76,14 +85,19 @@ async function initPgData() {
|
|||||||
for (let i = 0; i < limit; i++) {
|
for (let i = 0; i < limit; i++) {
|
||||||
init(i);
|
init(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function init(index: number): Promise<any> {
|
async function init(index: number): Promise<any> {
|
||||||
const userId = rows[index]?.user_id;
|
const userId = rows[index]?.user_id;
|
||||||
if (!userId) return;
|
if (!userId) return;
|
||||||
try {
|
try {
|
||||||
const tmb = await getUserDefaultTeam({ userId });
|
const tmb = await getUserDefaultTeam({ userId });
|
||||||
|
console.log(tmb);
|
||||||
|
|
||||||
// update pg
|
// update pg
|
||||||
await PgClient.query(
|
await PgClient.query(
|
||||||
`Update ${PgDatasetTableName} set team_id = '${tmb.teamId}', tmb_id = '${tmb.tmbId}' where user_id = '${userId}' AND team_id='null';`
|
`Update ${PgDatasetTableName} set team_id = '${String(tmb.teamId)}', tmb_id = '${String(
|
||||||
|
tmb.tmbId
|
||||||
|
)}' where user_id = '${userId}' AND team_id='null';`
|
||||||
);
|
);
|
||||||
console.log(++success);
|
console.log(++success);
|
||||||
init(index + limit);
|
init(index + limit);
|
||||||
|
101
projects/app/src/pages/api/admin/initv46-3.ts
Normal file
101
projects/app/src/pages/api/admin/initv46-3.ts
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||||
|
import { jsonRes } from '@fastgpt/service/common/response';
|
||||||
|
import { connectToDatabase } from '@/service/mongo';
|
||||||
|
import { delay } from '@/utils/tools';
|
||||||
|
import { PgClient } from '@fastgpt/service/common/pg';
|
||||||
|
import {
|
||||||
|
DatasetDataIndexTypeEnum,
|
||||||
|
PgDatasetTableName
|
||||||
|
} from '@fastgpt/global/core/dataset/constant';
|
||||||
|
|
||||||
|
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||||
|
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||||
|
|
||||||
|
let success = 0;
|
||||||
|
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
|
||||||
|
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||||
|
try {
|
||||||
|
const { limit = 50 } = req.body as { limit: number };
|
||||||
|
await authCert({ req, authRoot: true });
|
||||||
|
await connectToDatabase();
|
||||||
|
success = 0;
|
||||||
|
|
||||||
|
jsonRes(res, {
|
||||||
|
data: await init(limit)
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.log(error);
|
||||||
|
|
||||||
|
jsonRes(res, {
|
||||||
|
code: 500,
|
||||||
|
error
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type PgItemType = {
|
||||||
|
id: string;
|
||||||
|
q: string;
|
||||||
|
a: string;
|
||||||
|
dataset_id: string;
|
||||||
|
collection_id: string;
|
||||||
|
data_id: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
async function init(limit: number): Promise<any> {
|
||||||
|
const { rows: idList } = await PgClient.query<{ id: string }>(
|
||||||
|
`SELECT id FROM ${PgDatasetTableName} WHERE inited=1`
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log('totalCount', idList.length);
|
||||||
|
|
||||||
|
await delay(2000);
|
||||||
|
|
||||||
|
if (idList.length === 0) return;
|
||||||
|
|
||||||
|
for (let i = 0; i < limit; i++) {
|
||||||
|
initData(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function initData(index: number): Promise<any> {
|
||||||
|
const dataId = idList[index]?.id;
|
||||||
|
if (!dataId) {
|
||||||
|
console.log('done');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// get limit data where data_id is null
|
||||||
|
const { rows } = await PgClient.query<PgItemType>(
|
||||||
|
`SELECT id,q,a,dataset_id,collection_id,data_id FROM ${PgDatasetTableName} WHERE id=${dataId};`
|
||||||
|
);
|
||||||
|
const data = rows[0];
|
||||||
|
if (!data) {
|
||||||
|
console.log('done');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// update mongo data and update inited
|
||||||
|
await MongoDatasetData.findByIdAndUpdate(data.data_id, {
|
||||||
|
q: data.q,
|
||||||
|
a: data.a,
|
||||||
|
indexes: [
|
||||||
|
{
|
||||||
|
defaultIndex: !data.a,
|
||||||
|
type: data.a ? DatasetDataIndexTypeEnum.qa : DatasetDataIndexTypeEnum.chunk,
|
||||||
|
dataId: data.id,
|
||||||
|
text: data.q
|
||||||
|
}
|
||||||
|
]
|
||||||
|
});
|
||||||
|
// update pg data_id
|
||||||
|
await PgClient.query(`UPDATE ${PgDatasetTableName} SET inited=0 WHERE id=${dataId};`);
|
||||||
|
|
||||||
|
return initData(index + limit);
|
||||||
|
} catch (error) {
|
||||||
|
console.log(error);
|
||||||
|
console.log(data);
|
||||||
|
await delay(500);
|
||||||
|
return initData(index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
|||||||
import { jsonRes } from '@fastgpt/service/common/response';
|
import { jsonRes } from '@fastgpt/service/common/response';
|
||||||
import { connectToDatabase } from '@/service/mongo';
|
import { connectToDatabase } from '@/service/mongo';
|
||||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||||
import { getVectorModel } from '@/service/core/ai/model';
|
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||||
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
|
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
|
||||||
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
|
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
|
||||||
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
|
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
|
||||||
@@ -22,6 +22,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||||||
const data = datasets.map((item) => ({
|
const data = datasets.map((item) => ({
|
||||||
...item.toJSON(),
|
...item.toJSON(),
|
||||||
vectorModel: getVectorModel(item.vectorModel),
|
vectorModel: getVectorModel(item.vectorModel),
|
||||||
|
agentModel: getQAModel(item.agentModel),
|
||||||
canWrite: String(item.tmbId) === tmbId,
|
canWrite: String(item.tmbId) === tmbId,
|
||||||
isOwner: teamOwner || String(item.tmbId) === tmbId
|
isOwner: teamOwner || String(item.tmbId) === tmbId
|
||||||
}));
|
}));
|
||||||
|
73
projects/app/src/pages/api/core/dataset/checkExportLimit.ts
Normal file
73
projects/app/src/pages/api/core/dataset/checkExportLimit.ts
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||||
|
import { jsonRes } from '@fastgpt/service/common/response';
|
||||||
|
import { connectToDatabase } from '@/service/mongo';
|
||||||
|
import { MongoUser } from '@fastgpt/service/support/user/schema';
|
||||||
|
import { addLog } from '@fastgpt/service/common/mongo/controller';
|
||||||
|
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||||
|
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||||
|
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
|
||||||
|
|
||||||
|
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||||
|
try {
|
||||||
|
await connectToDatabase();
|
||||||
|
let { datasetId } = req.query as {
|
||||||
|
datasetId: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!datasetId) {
|
||||||
|
throw new Error('缺少参数');
|
||||||
|
}
|
||||||
|
|
||||||
|
// 凭证校验
|
||||||
|
const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
|
||||||
|
|
||||||
|
await limitCheck({
|
||||||
|
datasetId,
|
||||||
|
userId
|
||||||
|
});
|
||||||
|
|
||||||
|
jsonRes(res);
|
||||||
|
} catch (err) {
|
||||||
|
res.status(500);
|
||||||
|
jsonRes(res, {
|
||||||
|
code: 500,
|
||||||
|
error: err
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function limitCheck({ datasetId, userId }: { datasetId: string; userId: string }) {
|
||||||
|
const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId);
|
||||||
|
|
||||||
|
const limitMinutesAgo = new Date(
|
||||||
|
Date.now() - (global.feConfigs?.limit?.exportLimitMinutes || 0) * 60 * 1000
|
||||||
|
);
|
||||||
|
|
||||||
|
// auth export times
|
||||||
|
const authTimes = await MongoUser.findOne(
|
||||||
|
{
|
||||||
|
_id: userId,
|
||||||
|
$or: [
|
||||||
|
{ 'limit.exportKbTime': { $exists: false } },
|
||||||
|
{ 'limit.exportKbTime': { $lte: limitMinutesAgo } }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
'_id limit'
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!authTimes) {
|
||||||
|
const minutes = `${global.feConfigs?.limit?.exportLimitMinutes || 0} 分钟`;
|
||||||
|
return Promise.reject(`上次导出未到 ${minutes},每 ${minutes}仅可导出一次。`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// auth max data
|
||||||
|
const total = await MongoDatasetData.countDocuments({
|
||||||
|
datasetId: { $in: exportIds }
|
||||||
|
});
|
||||||
|
|
||||||
|
addLog.info(`export datasets: ${datasetId}`, { total });
|
||||||
|
|
||||||
|
if (total > 100000) {
|
||||||
|
return Promise.reject('数据量超出 10 万,无法导出');
|
||||||
|
}
|
||||||
|
}
|
@@ -9,7 +9,8 @@ import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/use
|
|||||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||||
try {
|
try {
|
||||||
await connectToDatabase();
|
await connectToDatabase();
|
||||||
const { name, tags, avatar, vectorModel, parentId, type } = req.body as CreateDatasetParams;
|
const { name, tags, avatar, vectorModel, agentModel, parentId, type } =
|
||||||
|
req.body as CreateDatasetParams;
|
||||||
|
|
||||||
// 凭证校验
|
// 凭证校验
|
||||||
const { teamId, tmbId } = await authUserNotVisitor({ req, authToken: true });
|
const { teamId, tmbId } = await authUserNotVisitor({ req, authToken: true });
|
||||||
@@ -20,6 +21,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||||||
tmbId,
|
tmbId,
|
||||||
tags,
|
tags,
|
||||||
vectorModel,
|
vectorModel,
|
||||||
|
agentModel,
|
||||||
avatar,
|
avatar,
|
||||||
parentId: parentId || null,
|
parentId: parentId || null,
|
||||||
type
|
type
|
||||||
|
@@ -10,7 +10,7 @@ import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
|
|||||||
import type { PushDataResponse } from '@/global/core/api/datasetRes.d';
|
import type { PushDataResponse } from '@/global/core/api/datasetRes.d';
|
||||||
import type { PushDatasetDataProps } from '@/global/core/dataset/api.d';
|
import type { PushDatasetDataProps } from '@/global/core/dataset/api.d';
|
||||||
import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
|
import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
|
||||||
import { getVectorModel } from '@/service/core/ai/model';
|
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||||
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
|
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
|
||||||
import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller';
|
import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller';
|
||||||
|
|
||||||
@@ -63,24 +63,14 @@ export async function pushDataToDatasetCollection({
|
|||||||
mode,
|
mode,
|
||||||
prompt,
|
prompt,
|
||||||
billId
|
billId
|
||||||
}: { teamId: string; tmbId: string } & PushDatasetDataProps): Promise<PushDataResponse> {
|
}: {
|
||||||
// get dataset vector model
|
teamId: string;
|
||||||
const {
|
tmbId: string;
|
||||||
datasetId: { _id: datasetId, vectorModel }
|
} & PushDatasetDataProps): Promise<PushDataResponse> {
|
||||||
} = await getCollectionWithDataset(collectionId);
|
const { datasetId, model, maxToken } = await checkModelValid({
|
||||||
|
mode,
|
||||||
const vectorModelData = getVectorModel(vectorModel);
|
collectionId
|
||||||
|
});
|
||||||
const modeMap = {
|
|
||||||
[TrainingModeEnum.chunk]: {
|
|
||||||
maxToken: vectorModelData.maxToken * 1.5,
|
|
||||||
model: vectorModelData.model
|
|
||||||
},
|
|
||||||
[TrainingModeEnum.qa]: {
|
|
||||||
maxToken: global.qaModels[0].maxContext * 0.8,
|
|
||||||
model: global.qaModels[0].model
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// filter repeat or equal content
|
// filter repeat or equal content
|
||||||
const set = new Set();
|
const set = new Set();
|
||||||
@@ -102,12 +92,13 @@ export async function pushDataToDatasetCollection({
|
|||||||
// count q token
|
// count q token
|
||||||
const token = countPromptTokens(item.q);
|
const token = countPromptTokens(item.q);
|
||||||
|
|
||||||
if (token > modeMap[mode].maxToken) {
|
if (token > maxToken) {
|
||||||
filterResult.overToken.push(item);
|
filterResult.overToken.push(item);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (set.has(text)) {
|
if (set.has(text)) {
|
||||||
|
console.log('repeat', item);
|
||||||
filterResult.repeat.push(item);
|
filterResult.repeat.push(item);
|
||||||
} else {
|
} else {
|
||||||
filterResult.success.push(item);
|
filterResult.success.push(item);
|
||||||
@@ -126,7 +117,7 @@ export async function pushDataToDatasetCollection({
|
|||||||
billId,
|
billId,
|
||||||
mode,
|
mode,
|
||||||
prompt,
|
prompt,
|
||||||
model: modeMap[mode].model,
|
model,
|
||||||
q: item.q,
|
q: item.q,
|
||||||
a: item.a,
|
a: item.a,
|
||||||
indexes: item.indexes
|
indexes: item.indexes
|
||||||
@@ -142,6 +133,44 @@ export async function pushDataToDatasetCollection({
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function checkModelValid({
|
||||||
|
mode,
|
||||||
|
collectionId
|
||||||
|
}: {
|
||||||
|
mode: `${TrainingModeEnum}`;
|
||||||
|
collectionId: string;
|
||||||
|
}) {
|
||||||
|
const {
|
||||||
|
datasetId: { _id: datasetId, vectorModel, agentModel }
|
||||||
|
} = await getCollectionWithDataset(collectionId);
|
||||||
|
|
||||||
|
if (mode === TrainingModeEnum.chunk) {
|
||||||
|
if (!collectionId) return Promise.reject(`CollectionId is empty`);
|
||||||
|
const vectorModelData = getVectorModel(vectorModel);
|
||||||
|
if (!vectorModelData) {
|
||||||
|
return Promise.reject(`Model ${vectorModel} is inValid`);
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
datasetId,
|
||||||
|
maxToken: vectorModelData.maxToken * 1.5,
|
||||||
|
model: vectorModelData.model
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mode === TrainingModeEnum.qa) {
|
||||||
|
const qaModelData = getQAModel(agentModel);
|
||||||
|
if (!qaModelData) {
|
||||||
|
return Promise.reject(`Model ${agentModel} is inValid`);
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
datasetId,
|
||||||
|
maxToken: qaModelData.maxContext * 0.8,
|
||||||
|
model: qaModelData.model
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return Promise.reject(`Mode ${mode} is inValid`);
|
||||||
|
}
|
||||||
|
|
||||||
export const config = {
|
export const config = {
|
||||||
api: {
|
api: {
|
||||||
bodyParser: {
|
bodyParser: {
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||||
import { jsonRes } from '@fastgpt/service/common/response';
|
import { jsonRes } from '@fastgpt/service/common/response';
|
||||||
import { connectToDatabase } from '@/service/mongo';
|
import { connectToDatabase } from '@/service/mongo';
|
||||||
import { getVectorModel } from '@/service/core/ai/model';
|
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||||
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
|
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
|
||||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||||
|
|
||||||
@@ -28,6 +28,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||||||
data: {
|
data: {
|
||||||
...dataset,
|
...dataset,
|
||||||
vectorModel: getVectorModel(dataset.vectorModel),
|
vectorModel: getVectorModel(dataset.vectorModel),
|
||||||
|
agentModel: getQAModel(dataset.agentModel),
|
||||||
canWrite,
|
canWrite,
|
||||||
isOwner
|
isOwner
|
||||||
}
|
}
|
||||||
|
@@ -1,5 +1,5 @@
|
|||||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||||
import { jsonRes } from '@fastgpt/service/common/response';
|
import { jsonRes, responseWriteController } from '@fastgpt/service/common/response';
|
||||||
import { connectToDatabase } from '@/service/mongo';
|
import { connectToDatabase } from '@/service/mongo';
|
||||||
import { MongoUser } from '@fastgpt/service/support/user/schema';
|
import { MongoUser } from '@fastgpt/service/support/user/schema';
|
||||||
import { addLog } from '@fastgpt/service/common/mongo/controller';
|
import { addLog } from '@fastgpt/service/common/mongo/controller';
|
||||||
@@ -8,6 +8,7 @@ import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
|||||||
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
|
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
|
||||||
import { Readable } from 'stream';
|
import { Readable } from 'stream';
|
||||||
import type { Cursor } from '@fastgpt/service/common/mongo';
|
import type { Cursor } from '@fastgpt/service/common/mongo';
|
||||||
|
import { limitCheck } from './checkExportLimit';
|
||||||
|
|
||||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||||
try {
|
try {
|
||||||
@@ -23,39 +24,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||||||
// 凭证校验
|
// 凭证校验
|
||||||
const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
|
const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
|
||||||
|
|
||||||
const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId);
|
await limitCheck({
|
||||||
|
userId,
|
||||||
const limitMinutesAgo = new Date(
|
datasetId
|
||||||
Date.now() - (global.feConfigs?.limit?.exportLimitMinutes || 0) * 60 * 1000
|
|
||||||
);
|
|
||||||
|
|
||||||
// auth export times
|
|
||||||
const authTimes = await MongoUser.findOne(
|
|
||||||
{
|
|
||||||
_id: userId,
|
|
||||||
$or: [
|
|
||||||
{ 'limit.exportKbTime': { $exists: false } },
|
|
||||||
{ 'limit.exportKbTime': { $lte: limitMinutesAgo } }
|
|
||||||
]
|
|
||||||
},
|
|
||||||
'_id limit'
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!authTimes) {
|
|
||||||
const minutes = `${global.feConfigs?.limit?.exportLimitMinutes || 0} 分钟`;
|
|
||||||
throw new Error(`上次导出未到 ${minutes},每 ${minutes}仅可导出一次。`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// auth max data
|
|
||||||
const total = await MongoDatasetData.countDocuments({
|
|
||||||
datasetId: { $in: exportIds }
|
|
||||||
});
|
});
|
||||||
|
|
||||||
addLog.info(`export datasets: ${datasetId}`, { total });
|
const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId);
|
||||||
|
|
||||||
if (total > 100000) {
|
|
||||||
throw new Error('数据量超出 10 万,无法导出');
|
|
||||||
}
|
|
||||||
|
|
||||||
res.setHeader('Content-Type', 'text/csv; charset=utf-8;');
|
res.setHeader('Content-Type', 'text/csv; charset=utf-8;');
|
||||||
res.setHeader('Content-Disposition', 'attachment; filename=dataset.csv; ');
|
res.setHeader('Content-Disposition', 'attachment; filename=dataset.csv; ');
|
||||||
@@ -72,35 +46,27 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||||||
'q a'
|
'q a'
|
||||||
).cursor();
|
).cursor();
|
||||||
|
|
||||||
function cursorToReadableStream(cursor: Cursor) {
|
const write = responseWriteController({
|
||||||
const readable = new Readable({
|
res,
|
||||||
objectMode: true,
|
readStream: cursor
|
||||||
read() {}
|
});
|
||||||
|
|
||||||
|
write(`\uFEFFindex,content`);
|
||||||
|
|
||||||
|
cursor.on('data', (doc) => {
|
||||||
|
const q = doc.q.replace(/"/g, '""') || '';
|
||||||
|
const a = doc.a.replace(/"/g, '""') || '';
|
||||||
|
|
||||||
|
write(`\n"${q}","${a}"`);
|
||||||
|
});
|
||||||
|
|
||||||
|
cursor.on('end', async () => {
|
||||||
|
cursor.close();
|
||||||
|
res.end();
|
||||||
|
await MongoUser.findByIdAndUpdate(userId, {
|
||||||
|
'limit.exportKbTime': new Date()
|
||||||
});
|
});
|
||||||
|
});
|
||||||
readable.push(`\uFEFFindex,content`);
|
|
||||||
|
|
||||||
cursor.on('data', (doc) => {
|
|
||||||
const q = doc.q.replace(/"/g, '""') || '';
|
|
||||||
const a = doc.a.replace(/"/g, '""') || '';
|
|
||||||
|
|
||||||
readable.push(`\n"${q}","${a}"`);
|
|
||||||
});
|
|
||||||
|
|
||||||
cursor.on('end', async () => {
|
|
||||||
readable.push(null);
|
|
||||||
cursor.close();
|
|
||||||
await MongoUser.findByIdAndUpdate(userId, {
|
|
||||||
'limit.exportKbTime': new Date()
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
return readable;
|
|
||||||
}
|
|
||||||
|
|
||||||
// @ts-ignore
|
|
||||||
const stream = cursorToReadableStream(cursor);
|
|
||||||
stream.pipe(res);
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
res.status(500);
|
res.status(500);
|
||||||
jsonRes(res, {
|
jsonRes(res, {
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||||
import { jsonRes } from '@fastgpt/service/common/response';
|
import { jsonRes } from '@fastgpt/service/common/response';
|
||||||
import { connectToDatabase } from '@/service/mongo';
|
import { connectToDatabase } from '@/service/mongo';
|
||||||
import { getVectorModel } from '@/service/core/ai/model';
|
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||||
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
|
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
|
||||||
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||||
@@ -28,6 +28,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||||||
datasets.map(async (item) => ({
|
datasets.map(async (item) => ({
|
||||||
...item.toJSON(),
|
...item.toJSON(),
|
||||||
vectorModel: getVectorModel(item.vectorModel),
|
vectorModel: getVectorModel(item.vectorModel),
|
||||||
|
agentModel: getQAModel(item.agentModel),
|
||||||
canWrite,
|
canWrite,
|
||||||
isOwner: teamOwner || String(item.tmbId) === tmbId
|
isOwner: teamOwner || String(item.tmbId) === tmbId
|
||||||
}))
|
}))
|
||||||
|
@@ -8,7 +8,8 @@ import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
|||||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||||
try {
|
try {
|
||||||
await connectToDatabase();
|
await connectToDatabase();
|
||||||
const { id, parentId, name, avatar, tags, permission } = req.body as DatasetUpdateParams;
|
const { id, parentId, name, avatar, tags, permission, agentModel } =
|
||||||
|
req.body as DatasetUpdateParams;
|
||||||
|
|
||||||
if (!id) {
|
if (!id) {
|
||||||
throw new Error('缺少参数');
|
throw new Error('缺少参数');
|
||||||
@@ -26,7 +27,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||||||
...(name && { name }),
|
...(name && { name }),
|
||||||
...(avatar && { avatar }),
|
...(avatar && { avatar }),
|
||||||
...(tags && { tags }),
|
...(tags && { tags }),
|
||||||
...(permission && { permission })
|
...(permission && { permission }),
|
||||||
|
...(agentModel && { agentModel: agentModel.model })
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@@ -5,15 +5,17 @@ import { MongoBill } from '@fastgpt/service/support/wallet/bill/schema';
|
|||||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||||
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
|
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
|
||||||
import { CreateTrainingBillProps } from '@fastgpt/global/support/wallet/bill/api.d';
|
import { CreateTrainingBillProps } from '@fastgpt/global/support/wallet/bill/api.d';
|
||||||
|
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||||
|
|
||||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||||
try {
|
try {
|
||||||
await connectToDatabase();
|
await connectToDatabase();
|
||||||
const { name } = req.body as CreateTrainingBillProps;
|
const { name, vectorModel, agentModel } = req.body as CreateTrainingBillProps;
|
||||||
|
|
||||||
const { teamId, tmbId } = await authCert({ req, authToken: true, authApiKey: true });
|
const { teamId, tmbId } = await authCert({ req, authToken: true, authApiKey: true });
|
||||||
|
|
||||||
const qaModel = global.qaModels[0];
|
const vectorModelData = getVectorModel(vectorModel);
|
||||||
|
const agentModelData = getQAModel(agentModel);
|
||||||
|
|
||||||
const { _id } = await MongoBill.create({
|
const { _id } = await MongoBill.create({
|
||||||
teamId,
|
teamId,
|
||||||
@@ -23,13 +25,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
|||||||
list: [
|
list: [
|
||||||
{
|
{
|
||||||
moduleName: '索引生成',
|
moduleName: '索引生成',
|
||||||
model: 'embedding',
|
model: vectorModelData.name,
|
||||||
amount: 0,
|
amount: 0,
|
||||||
tokenLen: 0
|
tokenLen: 0
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
moduleName: 'QA 拆分',
|
moduleName: 'QA 拆分',
|
||||||
model: qaModel?.name,
|
model: agentModelData.name,
|
||||||
amount: 0,
|
amount: 0,
|
||||||
tokenLen: 0
|
tokenLen: 0
|
||||||
}
|
}
|
||||||
|
@@ -170,7 +170,7 @@ const DataCard = () => {
|
|||||||
</Flex>
|
</Flex>
|
||||||
<Grid
|
<Grid
|
||||||
minH={'100px'}
|
minH={'100px'}
|
||||||
gridTemplateColumns={['1fr', 'repeat(2,1fr)', 'repeat(3,1fr)']}
|
gridTemplateColumns={['1fr', 'repeat(2,1fr)', 'repeat(3,1fr)', 'repeat(4,1fr)']}
|
||||||
gridGap={4}
|
gridGap={4}
|
||||||
>
|
>
|
||||||
{datasetDataList.map((item) => (
|
{datasetDataList.map((item) => (
|
||||||
|
@@ -34,10 +34,10 @@ const ImportData = ({
|
|||||||
const theme = useTheme();
|
const theme = useTheme();
|
||||||
const { datasetDetail } = useDatasetStore();
|
const { datasetDetail } = useDatasetStore();
|
||||||
const [importType, setImportType] = useState<`${ImportTypeEnum}`>(ImportTypeEnum.chunk);
|
const [importType, setImportType] = useState<`${ImportTypeEnum}`>(ImportTypeEnum.chunk);
|
||||||
|
const vectorModel = datasetDetail.vectorModel;
|
||||||
|
const agentModel = datasetDetail.agentModel;
|
||||||
|
|
||||||
const typeMap = useMemo(() => {
|
const typeMap = useMemo(() => {
|
||||||
const vectorModel = datasetDetail.vectorModel;
|
|
||||||
const qaModel = qaModelList[0];
|
|
||||||
const map = {
|
const map = {
|
||||||
[ImportTypeEnum.chunk]: {
|
[ImportTypeEnum.chunk]: {
|
||||||
defaultChunkLen: vectorModel?.defaultToken || 500,
|
defaultChunkLen: vectorModel?.defaultToken || 500,
|
||||||
@@ -45,8 +45,8 @@ const ImportData = ({
|
|||||||
mode: TrainingModeEnum.chunk
|
mode: TrainingModeEnum.chunk
|
||||||
},
|
},
|
||||||
[ImportTypeEnum.qa]: {
|
[ImportTypeEnum.qa]: {
|
||||||
defaultChunkLen: qaModel?.maxContext * 0.5 || 8000,
|
defaultChunkLen: agentModel?.maxContext * 0.6 || 9000,
|
||||||
unitPrice: qaModel?.price || 3,
|
unitPrice: agentModel?.price || 3,
|
||||||
mode: TrainingModeEnum.qa
|
mode: TrainingModeEnum.qa
|
||||||
},
|
},
|
||||||
[ImportTypeEnum.csv]: {
|
[ImportTypeEnum.csv]: {
|
||||||
@@ -56,7 +56,13 @@ const ImportData = ({
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
return map[importType];
|
return map[importType];
|
||||||
}, [datasetDetail.vectorModel, importType]);
|
}, [
|
||||||
|
agentModel?.maxContext,
|
||||||
|
agentModel?.price,
|
||||||
|
importType,
|
||||||
|
vectorModel?.defaultToken,
|
||||||
|
vectorModel?.price
|
||||||
|
]);
|
||||||
|
|
||||||
const TitleStyle: BoxProps = {
|
const TitleStyle: BoxProps = {
|
||||||
fontWeight: 'bold',
|
fontWeight: 'bold',
|
||||||
@@ -104,8 +110,10 @@ const ImportData = ({
|
|||||||
|
|
||||||
<Provider
|
<Provider
|
||||||
{...typeMap}
|
{...typeMap}
|
||||||
|
vectorModel={vectorModel.model}
|
||||||
|
agentModel={agentModel.model}
|
||||||
|
datasetId={datasetDetail._id}
|
||||||
importType={importType}
|
importType={importType}
|
||||||
datasetId={datasetId}
|
|
||||||
parentId={parentId}
|
parentId={parentId}
|
||||||
onUploadSuccess={uploadSuccess}
|
onUploadSuccess={uploadSuccess}
|
||||||
>
|
>
|
||||||
|
@@ -90,6 +90,8 @@ const Provider = ({
|
|||||||
parentId,
|
parentId,
|
||||||
unitPrice,
|
unitPrice,
|
||||||
mode,
|
mode,
|
||||||
|
vectorModel,
|
||||||
|
agentModel,
|
||||||
defaultChunkLen = 500,
|
defaultChunkLen = 500,
|
||||||
importType,
|
importType,
|
||||||
onUploadSuccess,
|
onUploadSuccess,
|
||||||
@@ -99,6 +101,8 @@ const Provider = ({
|
|||||||
parentId: string;
|
parentId: string;
|
||||||
unitPrice: number;
|
unitPrice: number;
|
||||||
mode: `${TrainingModeEnum}`;
|
mode: `${TrainingModeEnum}`;
|
||||||
|
vectorModel: string;
|
||||||
|
agentModel: string;
|
||||||
defaultChunkLen: number;
|
defaultChunkLen: number;
|
||||||
importType: `${ImportTypeEnum}`;
|
importType: `${ImportTypeEnum}`;
|
||||||
onUploadSuccess: () => void;
|
onUploadSuccess: () => void;
|
||||||
@@ -132,7 +136,9 @@ const Provider = ({
|
|||||||
const chunks = file.chunks;
|
const chunks = file.chunks;
|
||||||
// create training bill
|
// create training bill
|
||||||
const billId = await postCreateTrainingBill({
|
const billId = await postCreateTrainingBill({
|
||||||
name: t('dataset.collections.Create Training Data', { filename: file.filename })
|
name: t('dataset.collections.Create Training Data', { filename: file.filename }),
|
||||||
|
vectorModel,
|
||||||
|
agentModel
|
||||||
});
|
});
|
||||||
// create a file collection and training bill
|
// create a file collection and training bill
|
||||||
const collectionId = await postDatasetCollection({
|
const collectionId = await postDatasetCollection({
|
||||||
|
@@ -13,8 +13,8 @@ const fileExtension = '.txt, .doc, .docx, .pdf, .md';
|
|||||||
|
|
||||||
const QAImport = () => {
|
const QAImport = () => {
|
||||||
const { datasetDetail } = useDatasetStore();
|
const { datasetDetail } = useDatasetStore();
|
||||||
const vectorModel = datasetDetail.vectorModel;
|
const agentModel = datasetDetail.agentModel;
|
||||||
const unitPrice = vectorModel?.price || 0.2;
|
const unitPrice = agentModel?.price || 3;
|
||||||
|
|
||||||
const {
|
const {
|
||||||
successChunks,
|
successChunks,
|
||||||
|
@@ -9,7 +9,7 @@ import React, {
|
|||||||
import { useRouter } from 'next/router';
|
import { useRouter } from 'next/router';
|
||||||
import { Box, Flex, Button, FormControl, IconButton, Input } from '@chakra-ui/react';
|
import { Box, Flex, Button, FormControl, IconButton, Input } from '@chakra-ui/react';
|
||||||
import { QuestionOutlineIcon, DeleteIcon } from '@chakra-ui/icons';
|
import { QuestionOutlineIcon, DeleteIcon } from '@chakra-ui/icons';
|
||||||
import { delDatasetById, putDatasetById } from '@/web/core/dataset/api';
|
import { delDatasetById } from '@/web/core/dataset/api';
|
||||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||||
import { useToast } from '@/web/common/hooks/useToast';
|
import { useToast } from '@/web/common/hooks/useToast';
|
||||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||||
@@ -22,6 +22,8 @@ import Tag from '@/components/Tag';
|
|||||||
import MyTooltip from '@/components/MyTooltip';
|
import MyTooltip from '@/components/MyTooltip';
|
||||||
import { useTranslation } from 'next-i18next';
|
import { useTranslation } from 'next-i18next';
|
||||||
import PermissionRadio from '@/components/support/permission/Radio';
|
import PermissionRadio from '@/components/support/permission/Radio';
|
||||||
|
import MySelect from '@/components/Select';
|
||||||
|
import { qaModelList } from '@/web/common/system/staticData';
|
||||||
|
|
||||||
export interface ComponentRef {
|
export interface ComponentRef {
|
||||||
initInput: (tags: string) => void;
|
initInput: (tags: string) => void;
|
||||||
@@ -50,7 +52,7 @@ const Info = (
|
|||||||
multiple: false
|
multiple: false
|
||||||
});
|
});
|
||||||
|
|
||||||
const { datasetDetail, loadDatasetDetail, loadDatasets } = useDatasetStore();
|
const { datasetDetail, loadDatasetDetail, loadDatasets, updateDataset } = useDatasetStore();
|
||||||
|
|
||||||
/* 点击删除 */
|
/* 点击删除 */
|
||||||
const onclickDelKb = useCallback(async () => {
|
const onclickDelKb = useCallback(async () => {
|
||||||
@@ -76,11 +78,10 @@ const Info = (
|
|||||||
async (data: DatasetItemType) => {
|
async (data: DatasetItemType) => {
|
||||||
setBtnLoading(true);
|
setBtnLoading(true);
|
||||||
try {
|
try {
|
||||||
await putDatasetById({
|
await updateDataset({
|
||||||
id: datasetId,
|
id: datasetId,
|
||||||
...data
|
...data
|
||||||
});
|
});
|
||||||
await loadDatasetDetail(datasetId, true);
|
|
||||||
toast({
|
toast({
|
||||||
title: '更新成功',
|
title: '更新成功',
|
||||||
status: 'success'
|
status: 'success'
|
||||||
@@ -94,7 +95,7 @@ const Info = (
|
|||||||
}
|
}
|
||||||
setBtnLoading(false);
|
setBtnLoading(false);
|
||||||
},
|
},
|
||||||
[loadDatasetDetail, datasetId, loadDatasets, toast]
|
[updateDataset, datasetId, loadDatasetDetail, toast, loadDatasets]
|
||||||
);
|
);
|
||||||
const saveSubmitError = useCallback(() => {
|
const saveSubmitError = useCallback(() => {
|
||||||
// deep search message
|
// deep search message
|
||||||
@@ -194,6 +195,27 @@ const Info = (
|
|||||||
})}
|
})}
|
||||||
/>
|
/>
|
||||||
</FormControl>
|
</FormControl>
|
||||||
|
<Flex mt={6} alignItems={'center'}>
|
||||||
|
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
|
||||||
|
{t('dataset.Agent Model')}
|
||||||
|
</Box>
|
||||||
|
<Box flex={[1, '0 0 300px']}>
|
||||||
|
<MySelect
|
||||||
|
w={'100%'}
|
||||||
|
value={getValues('agentModel').model}
|
||||||
|
list={qaModelList.map((item) => ({
|
||||||
|
label: item.name,
|
||||||
|
value: item.model
|
||||||
|
}))}
|
||||||
|
onchange={(e) => {
|
||||||
|
const agentModel = qaModelList.find((item) => item.model === e);
|
||||||
|
if (!agentModel) return;
|
||||||
|
setValue('agentModel', agentModel);
|
||||||
|
setRefresh((state) => !state);
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
</Flex>
|
||||||
<Flex mt={8} alignItems={'center'} w={'100%'} flexWrap={'wrap'}>
|
<Flex mt={8} alignItems={'center'} w={'100%'} flexWrap={'wrap'}>
|
||||||
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
|
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
|
||||||
标签
|
标签
|
||||||
|
@@ -196,7 +196,7 @@ const InputDataModal = ({
|
|||||||
const loading = useMemo(() => isImporting || isUpdating, [isImporting, isUpdating]);
|
const loading = useMemo(() => isImporting || isUpdating, [isImporting, isUpdating]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<MyModal isOpen={true} isCentered w={'90vw'} maxW={'90vw'} h={'90vh'}>
|
<MyModal isOpen={true} isCentered w={'90vw'} maxW={'1440px'} h={'90vh'}>
|
||||||
<Flex h={'100%'}>
|
<Flex h={'100%'}>
|
||||||
<Box p={5} borderRight={theme.borders.base}>
|
<Box p={5} borderRight={theme.borders.base}>
|
||||||
<RawSourceText
|
<RawSourceText
|
||||||
@@ -250,7 +250,7 @@ const InputDataModal = ({
|
|||||||
mt={1}
|
mt={1}
|
||||||
placeholder={`该输入框是必填项,该内容通常是对于知识点的描述,也可以是用户的问题,最多 ${maxToken} 字。`}
|
placeholder={`该输入框是必填项,该内容通常是对于知识点的描述,也可以是用户的问题,最多 ${maxToken} 字。`}
|
||||||
maxLength={maxToken}
|
maxLength={maxToken}
|
||||||
rows={10}
|
rows={12}
|
||||||
bg={'myWhite.400'}
|
bg={'myWhite.400'}
|
||||||
{...register(`q`, {
|
{...register(`q`, {
|
||||||
required: true
|
required: true
|
||||||
@@ -274,7 +274,7 @@ const InputDataModal = ({
|
|||||||
maxToken * 1.5
|
maxToken * 1.5
|
||||||
} 字。`}
|
} 字。`}
|
||||||
bg={'myWhite.400'}
|
bg={'myWhite.400'}
|
||||||
rows={10}
|
rows={12}
|
||||||
maxLength={maxToken * 1.5}
|
maxLength={maxToken * 1.5}
|
||||||
{...register('a')}
|
{...register('a')}
|
||||||
/>
|
/>
|
||||||
|
@@ -15,10 +15,12 @@ import { postCreateDataset } from '@/web/core/dataset/api';
|
|||||||
import type { CreateDatasetParams } from '@/global/core/dataset/api.d';
|
import type { CreateDatasetParams } from '@/global/core/dataset/api.d';
|
||||||
import MySelect from '@/components/Select';
|
import MySelect from '@/components/Select';
|
||||||
import { QuestionOutlineIcon } from '@chakra-ui/icons';
|
import { QuestionOutlineIcon } from '@chakra-ui/icons';
|
||||||
import { vectorModelList } from '@/web/common/system/staticData';
|
import { vectorModelList, qaModelList } from '@/web/common/system/staticData';
|
||||||
import Tag from '@/components/Tag';
|
import Tag from '@/components/Tag';
|
||||||
|
import { useTranslation } from 'next-i18next';
|
||||||
|
|
||||||
const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: string }) => {
|
const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: string }) => {
|
||||||
|
const { t } = useTranslation();
|
||||||
const [refresh, setRefresh] = useState(false);
|
const [refresh, setRefresh] = useState(false);
|
||||||
const { toast } = useToast();
|
const { toast } = useToast();
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
@@ -29,6 +31,7 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
|
|||||||
name: '',
|
name: '',
|
||||||
tags: '',
|
tags: '',
|
||||||
vectorModel: vectorModelList[0].model,
|
vectorModel: vectorModelList[0].model,
|
||||||
|
agentModel: qaModelList[0].model,
|
||||||
type: 'dataset',
|
type: 'dataset',
|
||||||
parentId
|
parentId
|
||||||
}
|
}
|
||||||
@@ -76,7 +79,7 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
|
|||||||
});
|
});
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<MyModal isOpen onClose={onClose} isCentered={!isPc} w={'400px'}>
|
<MyModal isOpen onClose={onClose} isCentered={!isPc} w={'450px'}>
|
||||||
<ModalHeader fontSize={'2xl'}>创建一个知识库</ModalHeader>
|
<ModalHeader fontSize={'2xl'}>创建一个知识库</ModalHeader>
|
||||||
<ModalBody>
|
<ModalBody>
|
||||||
<Box color={'myGray.800'} fontWeight={'bold'}>
|
<Box color={'myGray.800'} fontWeight={'bold'}>
|
||||||
@@ -106,7 +109,7 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
|
|||||||
/>
|
/>
|
||||||
</Flex>
|
</Flex>
|
||||||
<Flex mt={6} alignItems={'center'}>
|
<Flex mt={6} alignItems={'center'}>
|
||||||
<Box flex={'0 0 80px'}>索引模型</Box>
|
<Box flex={'0 0 100px'}>索引模型</Box>
|
||||||
<Box flex={1}>
|
<Box flex={1}>
|
||||||
<MySelect
|
<MySelect
|
||||||
w={'100%'}
|
w={'100%'}
|
||||||
@@ -122,8 +125,25 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
|
|||||||
/>
|
/>
|
||||||
</Box>
|
</Box>
|
||||||
</Flex>
|
</Flex>
|
||||||
|
<Flex mt={6} alignItems={'center'}>
|
||||||
|
<Box flex={'0 0 100px'}>{t('dataset.Agent Model')}</Box>
|
||||||
|
<Box flex={1}>
|
||||||
|
<MySelect
|
||||||
|
w={'100%'}
|
||||||
|
value={getValues('agentModel')}
|
||||||
|
list={qaModelList.map((item) => ({
|
||||||
|
label: item.name,
|
||||||
|
value: item.model
|
||||||
|
}))}
|
||||||
|
onchange={(e) => {
|
||||||
|
setValue('agentModel', e);
|
||||||
|
setRefresh((state) => !state);
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
</Flex>
|
||||||
<Flex mt={6} alignItems={'center'} w={'100%'}>
|
<Flex mt={6} alignItems={'center'} w={'100%'}>
|
||||||
<Box flex={'0 0 80px'}>
|
<Box flex={'0 0 100px'}>
|
||||||
标签
|
标签
|
||||||
<MyTooltip label={'用空格隔开多个标签,便于搜索'} forceShow>
|
<MyTooltip label={'用空格隔开多个标签,便于搜索'} forceShow>
|
||||||
<QuestionOutlineIcon ml={1} />
|
<QuestionOutlineIcon ml={1} />
|
||||||
|
@@ -20,7 +20,8 @@ import {
|
|||||||
delDatasetById,
|
delDatasetById,
|
||||||
getDatasetPaths,
|
getDatasetPaths,
|
||||||
putDatasetById,
|
putDatasetById,
|
||||||
postCreateDataset
|
postCreateDataset,
|
||||||
|
getCheckExportLimit
|
||||||
} from '@/web/core/dataset/api';
|
} from '@/web/core/dataset/api';
|
||||||
import { useTranslation } from 'next-i18next';
|
import { useTranslation } from 'next-i18next';
|
||||||
import Avatar from '@/components/Avatar';
|
import Avatar from '@/components/Avatar';
|
||||||
@@ -38,6 +39,7 @@ import { useDrag } from '@/web/common/hooks/useDrag';
|
|||||||
import { useUserStore } from '@/web/support/user/useUserStore';
|
import { useUserStore } from '@/web/support/user/useUserStore';
|
||||||
import PermissionIconText from '@/components/support/permission/IconText';
|
import PermissionIconText from '@/components/support/permission/IconText';
|
||||||
import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant';
|
import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant';
|
||||||
|
import { DatasetItemType } from '@fastgpt/global/core/dataset/type';
|
||||||
|
|
||||||
const CreateModal = dynamic(() => import('./component/CreateModal'), { ssr: false });
|
const CreateModal = dynamic(() => import('./component/CreateModal'), { ssr: false });
|
||||||
const MoveModal = dynamic(() => import('./component/MoveModal'), { ssr: false });
|
const MoveModal = dynamic(() => import('./component/MoveModal'), { ssr: false });
|
||||||
@@ -89,6 +91,23 @@ const Kb = () => {
|
|||||||
successToast: t('common.Delete Success'),
|
successToast: t('common.Delete Success'),
|
||||||
errorToast: t('dataset.Delete Dataset Error')
|
errorToast: t('dataset.Delete Dataset Error')
|
||||||
});
|
});
|
||||||
|
// check export limit
|
||||||
|
const { mutate: exportDataset } = useRequest({
|
||||||
|
mutationFn: async (dataset: DatasetItemType) => {
|
||||||
|
setLoading(true);
|
||||||
|
await getCheckExportLimit(dataset._id);
|
||||||
|
const a = document.createElement('a');
|
||||||
|
a.href = `/api/core/dataset/exportAll?datasetId=${dataset._id}`;
|
||||||
|
a.download = `${dataset.name}.csv`;
|
||||||
|
document.body.appendChild(a);
|
||||||
|
a.click();
|
||||||
|
document.body.removeChild(a);
|
||||||
|
},
|
||||||
|
onSettled() {
|
||||||
|
setLoading(false);
|
||||||
|
},
|
||||||
|
errorToast: t('dataset.Export Dataset Limit Error')
|
||||||
|
});
|
||||||
|
|
||||||
const { data, refetch } = useQuery(['loadDataset', parentId], () => {
|
const { data, refetch } = useQuery(['loadDataset', parentId], () => {
|
||||||
return Promise.all([loadDatasets(parentId), getDatasetPaths(parentId)]);
|
return Promise.all([loadDatasets(parentId), getDatasetPaths(parentId)]);
|
||||||
@@ -371,12 +390,7 @@ const Kb = () => {
|
|||||||
</Flex>
|
</Flex>
|
||||||
),
|
),
|
||||||
onClick: () => {
|
onClick: () => {
|
||||||
const a = document.createElement('a');
|
exportDataset(dataset);
|
||||||
a.href = `/api/core/dataset/exportAll?datasetId=${dataset._id}`;
|
|
||||||
a.download = `${dataset.name}.csv`;
|
|
||||||
document.body.appendChild(a);
|
|
||||||
a.click();
|
|
||||||
document.body.removeChild(a);
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@@ -109,6 +109,7 @@ export async function generateQA(): Promise<any> {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
|
const model = data.model ?? global.qaModels[0].model;
|
||||||
|
|
||||||
// request LLM to get QA
|
// request LLM to get QA
|
||||||
const messages: ChatMessageItemType[] = [
|
const messages: ChatMessageItemType[] = [
|
||||||
@@ -122,9 +123,10 @@ export async function generateQA(): Promise<any> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
const ai = getAIApi(undefined, 480000);
|
|
||||||
|
const ai = getAIApi(undefined, 600000);
|
||||||
const chatResponse = await ai.chat.completions.create({
|
const chatResponse = await ai.chat.completions.create({
|
||||||
model: global.qaModels[0].model,
|
model,
|
||||||
temperature: 0.01,
|
temperature: 0.01,
|
||||||
messages,
|
messages,
|
||||||
stream: false
|
stream: false
|
||||||
@@ -147,8 +149,11 @@ export async function generateQA(): Promise<any> {
|
|||||||
// delete data from training
|
// delete data from training
|
||||||
await MongoDatasetTraining.findByIdAndDelete(data._id);
|
await MongoDatasetTraining.findByIdAndDelete(data._id);
|
||||||
|
|
||||||
console.log(`split result length: `, qaArr.length);
|
addLog.info(`QA Training Finish`, {
|
||||||
console.log('生成QA成功,time:', `${(Date.now() - startTime) / 1000}s`);
|
time: `${(Date.now() - startTime) / 1000}s`,
|
||||||
|
splitLength: qaArr.length,
|
||||||
|
usage: chatResponse.usage
|
||||||
|
});
|
||||||
|
|
||||||
// add bill
|
// add bill
|
||||||
if (qaArr.length > 0) {
|
if (qaArr.length > 0) {
|
||||||
@@ -156,7 +161,8 @@ export async function generateQA(): Promise<any> {
|
|||||||
teamId: data.teamId,
|
teamId: data.teamId,
|
||||||
tmbId: data.tmbId,
|
tmbId: data.tmbId,
|
||||||
totalTokens,
|
totalTokens,
|
||||||
billId: data.billId
|
billId: data.billId,
|
||||||
|
model
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
addLog.info(`QA result 0:`, { answer });
|
addLog.info(`QA result 0:`, { answer });
|
||||||
|
@@ -1,5 +1,5 @@
|
|||||||
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
|
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
|
||||||
import { getAudioSpeechModel } from '@/service/core/ai/model';
|
import { getAudioSpeechModel, getQAModel } from '@/service/core/ai/model';
|
||||||
import type { ChatHistoryItemResType } from '@fastgpt/global/core/chat/api.d';
|
import type { ChatHistoryItemResType } from '@fastgpt/global/core/chat/api.d';
|
||||||
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
|
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
|
||||||
import { addLog } from '@fastgpt/service/common/mongo/controller';
|
import { addLog } from '@fastgpt/service/common/mongo/controller';
|
||||||
@@ -9,10 +9,16 @@ import { POST } from '@fastgpt/service/common/api/plusRequest';
|
|||||||
|
|
||||||
export function createBill(data: CreateBillProps) {
|
export function createBill(data: CreateBillProps) {
|
||||||
if (!global.systemEnv.pluginBaseUrl) return;
|
if (!global.systemEnv.pluginBaseUrl) return;
|
||||||
|
if (data.total === 0) {
|
||||||
|
addLog.info('0 Bill', data);
|
||||||
|
}
|
||||||
POST('/support/wallet/bill/createBill', data);
|
POST('/support/wallet/bill/createBill', data);
|
||||||
}
|
}
|
||||||
export function concatBill(data: ConcatBillProps) {
|
export function concatBill(data: ConcatBillProps) {
|
||||||
if (!global.systemEnv.pluginBaseUrl) return;
|
if (!global.systemEnv.pluginBaseUrl) return;
|
||||||
|
if (data.total === 0) {
|
||||||
|
addLog.info('0 Bill', data);
|
||||||
|
}
|
||||||
POST('/support/wallet/bill/concatBill', data);
|
POST('/support/wallet/bill/concatBill', data);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -59,18 +65,18 @@ export const pushChatBill = ({
|
|||||||
export const pushQABill = async ({
|
export const pushQABill = async ({
|
||||||
teamId,
|
teamId,
|
||||||
tmbId,
|
tmbId,
|
||||||
|
model,
|
||||||
totalTokens,
|
totalTokens,
|
||||||
billId
|
billId
|
||||||
}: {
|
}: {
|
||||||
teamId: string;
|
teamId: string;
|
||||||
tmbId: string;
|
tmbId: string;
|
||||||
|
model: string;
|
||||||
totalTokens: number;
|
totalTokens: number;
|
||||||
billId: string;
|
billId: string;
|
||||||
}) => {
|
}) => {
|
||||||
addLog.info('splitData generate success', { totalTokens });
|
|
||||||
|
|
||||||
// 获取模型单价格
|
// 获取模型单价格
|
||||||
const unitPrice = global.qaModels?.[0]?.price || 3;
|
const unitPrice = getQAModel(model).price;
|
||||||
// 计算价格
|
// 计算价格
|
||||||
const total = unitPrice * totalTokens;
|
const total = unitPrice * totalTokens;
|
||||||
|
|
||||||
|
@@ -48,6 +48,9 @@ export const putDatasetById = (data: DatasetUpdateParams) => PUT(`/core/dataset/
|
|||||||
|
|
||||||
export const delDatasetById = (id: string) => DELETE(`/core/dataset/delete?id=${id}`);
|
export const delDatasetById = (id: string) => DELETE(`/core/dataset/delete?id=${id}`);
|
||||||
|
|
||||||
|
export const getCheckExportLimit = (datasetId: string) =>
|
||||||
|
GET(`/core/dataset/checkExportLimit`, { datasetId });
|
||||||
|
|
||||||
/* =========== search test ============ */
|
/* =========== search test ============ */
|
||||||
export const postSearchText = (data: SearchTestProps) =>
|
export const postSearchText = (data: SearchTestProps) =>
|
||||||
POST<SearchDataResponseItemType[]>(`/core/dataset/searchTest`, data);
|
POST<SearchDataResponseItemType[]>(`/core/dataset/searchTest`, data);
|
||||||
|
Reference in New Issue
Block a user