mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 12:20:34 +00:00
v4.6-4 (#473)
This commit is contained in:
BIN
docSite/assets/imgs/datasetSetting1.png
Normal file
BIN
docSite/assets/imgs/datasetSetting1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 54 KiB |
@@ -50,5 +50,6 @@ curl --location --request POST 'https://{{host}}/api/admin/initv46-2' \
|
||||
1. 新增 - 团队空间
|
||||
2. 新增 - 多路向量(多个向量映射一组数据)
|
||||
3. 新增 - tts语音
|
||||
4. 线上环境新增 - ReRank向量召回,提高召回精度
|
||||
5. 优化 - 知识库导出,可直接触发流下载,无需等待转圈圈
|
||||
4. 新增 - 支持知识库配置文本预处理模型
|
||||
5. 线上环境新增 - ReRank向量召回,提高召回精度
|
||||
6. 优化 - 知识库导出,可直接触发流下载,无需等待转圈圈
|
||||
|
@@ -1,10 +1,10 @@
|
||||
---
|
||||
title: '定价'
|
||||
description: 'FastGPT 的定价'
|
||||
title: '线上版定价'
|
||||
description: 'FastGPT 线上版定价'
|
||||
icon: 'currency_yen'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 10
|
||||
weight: 11
|
||||
---
|
||||
|
||||
## Tokens 说明
|
||||
@@ -15,7 +15,7 @@ weight: 10
|
||||
|
||||
## FastGPT 线上计费
|
||||
|
||||
目前,FastGPT 线上计费也仅按 Tokens 使用数量为准。以下是详细的计费表(最新定价以线上表格为准,可在点击充值后实时获取):
|
||||
使用: [https://fastgpt.run](https://fastgpt.run) 或 [https://ai.fastgpt.in](https://ai.fastgpt.in) 只需仅按 Tokens 使用数量扣费即可。可在 账号-使用记录 中查看具体使用情况,以下是详细的计费表(最新定价以线上表格为准,可在点击充值后实时获取):
|
||||
|
||||
{{< table "table-hover table-striped-columns" >}}
|
||||
| 计费项 | 价格: 元/ 1K tokens(包含上下文) |
|
||||
|
@@ -1,6 +1,6 @@
|
||||
---
|
||||
title: "知识库结构讲解"
|
||||
description: "本节会介绍 FastGPT 知识库结构设计,理解其 QA 的存储格式和检索格式,以便更好的构建知识库。这篇介绍主要以使用为主,详细原理不多介绍。"
|
||||
description: "本节会详细介绍 FastGPT 知识库结构设计,理解其 QA 的存储格式和多向量映射,以便更好的构建知识库。这篇介绍主要以使用为主,详细原理不多介绍。"
|
||||
icon: "dataset"
|
||||
draft: false
|
||||
toc: true
|
||||
@@ -25,13 +25,21 @@ FastGPT 采用了 RAG 中的 Embedding 方案构建知识库,要使用好 Fast
|
||||
|
||||
FastGPT 采用了 `PostgresSQL` 的 `PG Vector` 插件作为向量检索器,索引为`HNSW`。且`PostgresSQL`仅用于向量检索,`MongoDB`用于其他数据的存取。
|
||||
|
||||
在`PostgresSQL`的表中,设置一个 `index` 字段用于存储向量、一个 `q` 字段用于存储向量对应的内容,以及一个 `a` 字段用于检索映射。之所以取字段为 `qa` 是由于一些历史缘故,无需完全解为 “问答对” 的格式。在实际使用过程中,可以利用`q`和`a`的组合,对检索后的内容做进一步的声明,提高大模型的理解力(注意,这里不直接提高搜索精度)。
|
||||
在`PostgresSQL`的表中,设置一个 `index` 字段用于存储向量,以及一个`data_id`用于在`MongoDB`中寻找对应的映射值。多个`index`可以对应一组`data_id`,也就是说,一组向量可以对应多组数据。在进行检索时,相同数据会进行合并。
|
||||
|
||||
目前,提高向量搜索的精度,主要可以通过几种途径:
|
||||

|
||||
|
||||
1. 精简`q`的内容,减少向量内容的长度:当`q`的内容更少,更准确时,检索精度自然会提高。但与此同时,会牺牲一定的检索范围,适合答案较为严格的场景。
|
||||
2. 更好分词分段:当一段话的结构和语义是完整的,并且是单一的,精度也会提高。因此,许多系统都会优化分词器,尽可能的保障每组数据的完整性。
|
||||
3. 多样性文本:为一段内容增加关键词、摘要、相似问题等描述性信息,可以使得该内容的向量具有更大的检索覆盖范围。
|
||||
## 多向量的目的和使用方式
|
||||
|
||||
在一组数据中,如果我们希望它尽可能长,但语义又要在向量中尽可能提现,则没有办法通过一组向量来表示。因此,我们采用了多向量映射的方式,将一组数据映射到多组向量中,从而保障数据的完整性和语义的提现。
|
||||
|
||||
你可以为一组较长的文本,添加多组向量,从而在检索时,只要其中一组向量被检索到,该数据也将被召回。
|
||||
|
||||
## 提高向量搜索精度的方法
|
||||
|
||||
1. 更好分词分段:当一段话的结构和语义是完整的,并且是单一的,精度也会提高。因此,许多系统都会优化分词器,尽可能的保障每组数据的完整性。
|
||||
2. 精简`index`的内容,减少向量内容的长度:当`index`的内容更少,更准确时,检索精度自然会提高。但与此同时,会牺牲一定的检索范围,适合答案较为严格的场景。
|
||||
3. 丰富`index`的数量,可以为同一个`chunk`内容增加多组`index`。
|
||||
4. 优化检索词:在实际使用过程中,用户的问题通常是模糊的或是缺失的,并不一定是完整清晰的问题。因此优化用户的问题(检索词)很大程度上也可以提高精度。
|
||||
5. 微调向量模型:由于市面上直接使用的向量模型都是通用型模型,在特定领域的检索精度并不高,因此微调向量模型可以很大程度上提高专业领域的检索效果。
|
||||
|
||||
|
@@ -63,8 +63,8 @@ export const splitText2Chunks = (props: { text: string; maxLen: number; overlapL
|
||||
let chunks: string[] = [];
|
||||
for (let i = 0; i < splitTexts.length; i++) {
|
||||
let text = splitTexts[i];
|
||||
let chunkToken = countPromptTokens(lastChunk, '');
|
||||
const textToken = countPromptTokens(text, '');
|
||||
let chunkToken = lastChunk.length;
|
||||
const textToken = text.length;
|
||||
|
||||
// next chunk is too large / new chunk is too large(The current chunk must be smaller than maxLen)
|
||||
if (textToken >= maxLen || chunkToken + textToken > maxLen * 1.4) {
|
||||
|
6
packages/global/core/dataset/type.d.ts
vendored
6
packages/global/core/dataset/type.d.ts
vendored
@@ -1,4 +1,4 @@
|
||||
import type { VectorModelItemType } from '../../core/ai/model.d';
|
||||
import type { LLMModelItemType, VectorModelItemType } from '../../core/ai/model.d';
|
||||
import { PermissionTypeEnum } from '../../support/permission/constant';
|
||||
import { PushDatasetDataChunkProps } from './api';
|
||||
import {
|
||||
@@ -19,6 +19,7 @@ export type DatasetSchemaType = {
|
||||
avatar: string;
|
||||
name: string;
|
||||
vectorModel: string;
|
||||
agentModel: string;
|
||||
tags: string[];
|
||||
type: `${DatasetTypeEnum}`;
|
||||
permission: `${PermissionTypeEnum}`;
|
||||
@@ -84,8 +85,9 @@ export type CollectionWithDatasetType = Omit<DatasetCollectionSchemaType, 'datas
|
||||
};
|
||||
|
||||
/* ================= dataset ===================== */
|
||||
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel'> & {
|
||||
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel' | 'agentModel'> & {
|
||||
vectorModel: VectorModelItemType;
|
||||
agentModel: LLMModelItemType;
|
||||
isOwner: boolean;
|
||||
canWrite: boolean;
|
||||
};
|
||||
|
2
packages/global/support/wallet/bill/api.d.ts
vendored
2
packages/global/support/wallet/bill/api.d.ts
vendored
@@ -3,6 +3,8 @@ import { BillListItemType } from './type';
|
||||
|
||||
export type CreateTrainingBillProps = {
|
||||
name: string;
|
||||
vectorModel?: string;
|
||||
agentModel?: string;
|
||||
};
|
||||
|
||||
export type ConcatBillProps = {
|
||||
|
@@ -61,7 +61,6 @@ const AppSchema = new Schema({
|
||||
|
||||
try {
|
||||
AppSchema.index({ updateTime: -1 });
|
||||
AppSchema.index({ 'share.collection': -1 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
@@ -69,7 +69,6 @@ const DatasetCollectionSchema = new Schema({
|
||||
|
||||
try {
|
||||
DatasetCollectionSchema.index({ datasetId: 1 });
|
||||
DatasetCollectionSchema.index({ userId: 1 });
|
||||
DatasetCollectionSchema.index({ updateTime: -1 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
|
@@ -48,6 +48,11 @@ const DatasetSchema = new Schema({
|
||||
required: true,
|
||||
default: 'text-embedding-ada-002'
|
||||
},
|
||||
agentModel: {
|
||||
type: String,
|
||||
required: true,
|
||||
default: 'gpt-3.5-turbo-16k'
|
||||
},
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetTypeMap),
|
||||
|
@@ -95,7 +95,7 @@ const TrainingDataSchema = new Schema({
|
||||
|
||||
try {
|
||||
TrainingDataSchema.index({ lockTime: 1 });
|
||||
TrainingDataSchema.index({ userId: 1 });
|
||||
TrainingDataSchema.index({ datasetId: 1 });
|
||||
TrainingDataSchema.index({ collectionId: 1 });
|
||||
TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 });
|
||||
} catch (error) {
|
||||
|
@@ -250,6 +250,7 @@
|
||||
}
|
||||
},
|
||||
"dataset": {
|
||||
"Agent Model": "Learning Model",
|
||||
"Chunk Length": "Chunk Length",
|
||||
"Confirm move the folder": "Confirm Move",
|
||||
"Confirm to delete the data": "Confirm to delete the data?",
|
||||
@@ -259,6 +260,7 @@
|
||||
"Delete Dataset Error": "Delete dataset failed",
|
||||
"Edit Folder": "Edit Folder",
|
||||
"Export": "Export",
|
||||
"Export Dataset Limit Error": "Export Data Error",
|
||||
"File Input": "Import File",
|
||||
"File Size": "File Size",
|
||||
"Filename": "Filename",
|
||||
|
@@ -250,6 +250,7 @@
|
||||
}
|
||||
},
|
||||
"dataset": {
|
||||
"Agent Model": "文件处理模型",
|
||||
"Chunk Length": "数据总量",
|
||||
"Confirm move the folder": "确认移动到该目录",
|
||||
"Confirm to delete the data": "确认删除该数据?",
|
||||
@@ -259,6 +260,7 @@
|
||||
"Delete Dataset Error": "删除知识库异常",
|
||||
"Edit Folder": "编辑文件夹",
|
||||
"Export": "导出",
|
||||
"Export Dataset Limit Error": "导出数据失败",
|
||||
"File Input": "文件导入",
|
||||
"File Size": "文件大小",
|
||||
"Filename": "文件名",
|
||||
|
@@ -1,3 +1,4 @@
|
||||
import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/model';
|
||||
import type {
|
||||
DatasetCollectionItemType,
|
||||
DatasetItemType
|
||||
@@ -17,13 +18,8 @@ export const defaultDatasetDetail: DatasetItemType = {
|
||||
permission: 'private',
|
||||
isOwner: false,
|
||||
canWrite: false,
|
||||
vectorModel: {
|
||||
model: 'text-embedding-ada-002',
|
||||
name: 'Embedding-2',
|
||||
price: 0.2,
|
||||
defaultToken: 500,
|
||||
maxToken: 3000
|
||||
}
|
||||
vectorModel: defaultVectorModels[0],
|
||||
agentModel: defaultQAModels[0]
|
||||
};
|
||||
|
||||
export const defaultCollectionDetail: DatasetCollectionItemType = {
|
||||
@@ -43,7 +39,8 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
|
||||
name: '',
|
||||
tags: [],
|
||||
permission: 'private',
|
||||
vectorModel: 'text-embedding-ada-002'
|
||||
vectorModel: defaultVectorModels[0].model,
|
||||
agentModel: defaultQAModels[0].model
|
||||
},
|
||||
parentId: '',
|
||||
name: '',
|
||||
|
@@ -5,6 +5,7 @@ import type { SearchTestItemType } from '@/types/core/dataset';
|
||||
import { UploadChunkItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant';
|
||||
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
|
||||
/* ===== dataset ===== */
|
||||
export type DatasetUpdateParams = {
|
||||
@@ -14,6 +15,7 @@ export type DatasetUpdateParams = {
|
||||
name?: string;
|
||||
avatar?: string;
|
||||
permission?: `${PermissionTypeEnum}`;
|
||||
agentModel?: LLMModelItemType;
|
||||
};
|
||||
|
||||
export type SearchTestProps = {
|
||||
|
@@ -9,6 +9,7 @@ export type CreateDatasetParams = {
|
||||
tags: string;
|
||||
avatar: string;
|
||||
vectorModel?: string;
|
||||
agentModel?: string;
|
||||
type: `${DatasetTypeEnum}`;
|
||||
};
|
||||
|
||||
|
@@ -1,8 +1,8 @@
|
||||
export const Prompt_AgentQA = {
|
||||
prompt: `我会给你一段文本,{{theme}},学习它们,并整理学习成果,要求为:
|
||||
1. 提出最多 25 个问题。
|
||||
2. 给出每个问题的答案。
|
||||
3. 答案要详细完整,答案可以包含普通文字、链接、代码、表格、公示、媒体链接等 markdown 元素。
|
||||
1. 提出问题并给出每个问题的答案。
|
||||
2. 每个答案都要详细完整,给出相关原文描述,答案可以包含普通文字、链接、代码、表格、公示、媒体链接等 markdown 元素。
|
||||
3. 最多提出 30 个问题。
|
||||
4. 按格式返回多个问题和答案:
|
||||
|
||||
Q1: 问题。
|
||||
|
@@ -11,6 +11,8 @@ import {
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { getUserDefaultTeam } from '@fastgpt/service/support/user/team/controller';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
import { defaultQAModels } from '@fastgpt/global/core/ai/model';
|
||||
|
||||
let success = 0;
|
||||
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
|
||||
@@ -41,6 +43,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
|
||||
await initPgData();
|
||||
|
||||
await MongoDataset.updateMany(
|
||||
{},
|
||||
{
|
||||
agentModel: defaultQAModels[0].model
|
||||
}
|
||||
);
|
||||
|
||||
jsonRes(res, {
|
||||
data: await init(limit),
|
||||
message:
|
||||
@@ -76,14 +85,19 @@ async function initPgData() {
|
||||
for (let i = 0; i < limit; i++) {
|
||||
init(i);
|
||||
}
|
||||
|
||||
async function init(index: number): Promise<any> {
|
||||
const userId = rows[index]?.user_id;
|
||||
if (!userId) return;
|
||||
try {
|
||||
const tmb = await getUserDefaultTeam({ userId });
|
||||
console.log(tmb);
|
||||
|
||||
// update pg
|
||||
await PgClient.query(
|
||||
`Update ${PgDatasetTableName} set team_id = '${tmb.teamId}', tmb_id = '${tmb.tmbId}' where user_id = '${userId}' AND team_id='null';`
|
||||
`Update ${PgDatasetTableName} set team_id = '${String(tmb.teamId)}', tmb_id = '${String(
|
||||
tmb.tmbId
|
||||
)}' where user_id = '${userId}' AND team_id='null';`
|
||||
);
|
||||
console.log(++success);
|
||||
init(index + limit);
|
||||
|
101
projects/app/src/pages/api/admin/initv46-3.ts
Normal file
101
projects/app/src/pages/api/admin/initv46-3.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { delay } from '@/utils/tools';
|
||||
import { PgClient } from '@fastgpt/service/common/pg';
|
||||
import {
|
||||
DatasetDataIndexTypeEnum,
|
||||
PgDatasetTableName
|
||||
} from '@fastgpt/global/core/dataset/constant';
|
||||
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
|
||||
let success = 0;
|
||||
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
const { limit = 50 } = req.body as { limit: number };
|
||||
await authCert({ req, authRoot: true });
|
||||
await connectToDatabase();
|
||||
success = 0;
|
||||
|
||||
jsonRes(res, {
|
||||
data: await init(limit)
|
||||
});
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
type PgItemType = {
|
||||
id: string;
|
||||
q: string;
|
||||
a: string;
|
||||
dataset_id: string;
|
||||
collection_id: string;
|
||||
data_id: string;
|
||||
};
|
||||
|
||||
async function init(limit: number): Promise<any> {
|
||||
const { rows: idList } = await PgClient.query<{ id: string }>(
|
||||
`SELECT id FROM ${PgDatasetTableName} WHERE inited=1`
|
||||
);
|
||||
|
||||
console.log('totalCount', idList.length);
|
||||
|
||||
await delay(2000);
|
||||
|
||||
if (idList.length === 0) return;
|
||||
|
||||
for (let i = 0; i < limit; i++) {
|
||||
initData(i);
|
||||
}
|
||||
|
||||
async function initData(index: number): Promise<any> {
|
||||
const dataId = idList[index]?.id;
|
||||
if (!dataId) {
|
||||
console.log('done');
|
||||
return;
|
||||
}
|
||||
// get limit data where data_id is null
|
||||
const { rows } = await PgClient.query<PgItemType>(
|
||||
`SELECT id,q,a,dataset_id,collection_id,data_id FROM ${PgDatasetTableName} WHERE id=${dataId};`
|
||||
);
|
||||
const data = rows[0];
|
||||
if (!data) {
|
||||
console.log('done');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// update mongo data and update inited
|
||||
await MongoDatasetData.findByIdAndUpdate(data.data_id, {
|
||||
q: data.q,
|
||||
a: data.a,
|
||||
indexes: [
|
||||
{
|
||||
defaultIndex: !data.a,
|
||||
type: data.a ? DatasetDataIndexTypeEnum.qa : DatasetDataIndexTypeEnum.chunk,
|
||||
dataId: data.id,
|
||||
text: data.q
|
||||
}
|
||||
]
|
||||
});
|
||||
// update pg data_id
|
||||
await PgClient.query(`UPDATE ${PgDatasetTableName} SET inited=0 WHERE id=${dataId};`);
|
||||
|
||||
return initData(index + limit);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
console.log(data);
|
||||
await delay(500);
|
||||
return initData(index);
|
||||
}
|
||||
}
|
||||
}
|
@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
import { getVectorModel } from '@/service/core/ai/model';
|
||||
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
|
||||
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
|
||||
@@ -22,6 +22,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
const data = datasets.map((item) => ({
|
||||
...item.toJSON(),
|
||||
vectorModel: getVectorModel(item.vectorModel),
|
||||
agentModel: getQAModel(item.agentModel),
|
||||
canWrite: String(item.tmbId) === tmbId,
|
||||
isOwner: teamOwner || String(item.tmbId) === tmbId
|
||||
}));
|
||||
|
73
projects/app/src/pages/api/core/dataset/checkExportLimit.ts
Normal file
73
projects/app/src/pages/api/core/dataset/checkExportLimit.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { MongoUser } from '@fastgpt/service/support/user/schema';
|
||||
import { addLog } from '@fastgpt/service/common/mongo/controller';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
let { datasetId } = req.query as {
|
||||
datasetId: string;
|
||||
};
|
||||
|
||||
if (!datasetId) {
|
||||
throw new Error('缺少参数');
|
||||
}
|
||||
|
||||
// 凭证校验
|
||||
const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
|
||||
|
||||
await limitCheck({
|
||||
datasetId,
|
||||
userId
|
||||
});
|
||||
|
||||
jsonRes(res);
|
||||
} catch (err) {
|
||||
res.status(500);
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export async function limitCheck({ datasetId, userId }: { datasetId: string; userId: string }) {
|
||||
const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId);
|
||||
|
||||
const limitMinutesAgo = new Date(
|
||||
Date.now() - (global.feConfigs?.limit?.exportLimitMinutes || 0) * 60 * 1000
|
||||
);
|
||||
|
||||
// auth export times
|
||||
const authTimes = await MongoUser.findOne(
|
||||
{
|
||||
_id: userId,
|
||||
$or: [
|
||||
{ 'limit.exportKbTime': { $exists: false } },
|
||||
{ 'limit.exportKbTime': { $lte: limitMinutesAgo } }
|
||||
]
|
||||
},
|
||||
'_id limit'
|
||||
);
|
||||
|
||||
if (!authTimes) {
|
||||
const minutes = `${global.feConfigs?.limit?.exportLimitMinutes || 0} 分钟`;
|
||||
return Promise.reject(`上次导出未到 ${minutes},每 ${minutes}仅可导出一次。`);
|
||||
}
|
||||
|
||||
// auth max data
|
||||
const total = await MongoDatasetData.countDocuments({
|
||||
datasetId: { $in: exportIds }
|
||||
});
|
||||
|
||||
addLog.info(`export datasets: ${datasetId}`, { total });
|
||||
|
||||
if (total > 100000) {
|
||||
return Promise.reject('数据量超出 10 万,无法导出');
|
||||
}
|
||||
}
|
@@ -9,7 +9,8 @@ import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/use
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { name, tags, avatar, vectorModel, parentId, type } = req.body as CreateDatasetParams;
|
||||
const { name, tags, avatar, vectorModel, agentModel, parentId, type } =
|
||||
req.body as CreateDatasetParams;
|
||||
|
||||
// 凭证校验
|
||||
const { teamId, tmbId } = await authUserNotVisitor({ req, authToken: true });
|
||||
@@ -20,6 +21,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
tmbId,
|
||||
tags,
|
||||
vectorModel,
|
||||
agentModel,
|
||||
avatar,
|
||||
parentId: parentId || null,
|
||||
type
|
||||
|
@@ -10,7 +10,7 @@ import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
|
||||
import type { PushDataResponse } from '@/global/core/api/datasetRes.d';
|
||||
import type { PushDatasetDataProps } from '@/global/core/dataset/api.d';
|
||||
import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
|
||||
import { getVectorModel } from '@/service/core/ai/model';
|
||||
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller';
|
||||
|
||||
@@ -63,24 +63,14 @@ export async function pushDataToDatasetCollection({
|
||||
mode,
|
||||
prompt,
|
||||
billId
|
||||
}: { teamId: string; tmbId: string } & PushDatasetDataProps): Promise<PushDataResponse> {
|
||||
// get dataset vector model
|
||||
const {
|
||||
datasetId: { _id: datasetId, vectorModel }
|
||||
} = await getCollectionWithDataset(collectionId);
|
||||
|
||||
const vectorModelData = getVectorModel(vectorModel);
|
||||
|
||||
const modeMap = {
|
||||
[TrainingModeEnum.chunk]: {
|
||||
maxToken: vectorModelData.maxToken * 1.5,
|
||||
model: vectorModelData.model
|
||||
},
|
||||
[TrainingModeEnum.qa]: {
|
||||
maxToken: global.qaModels[0].maxContext * 0.8,
|
||||
model: global.qaModels[0].model
|
||||
}
|
||||
};
|
||||
}: {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
} & PushDatasetDataProps): Promise<PushDataResponse> {
|
||||
const { datasetId, model, maxToken } = await checkModelValid({
|
||||
mode,
|
||||
collectionId
|
||||
});
|
||||
|
||||
// filter repeat or equal content
|
||||
const set = new Set();
|
||||
@@ -102,12 +92,13 @@ export async function pushDataToDatasetCollection({
|
||||
// count q token
|
||||
const token = countPromptTokens(item.q);
|
||||
|
||||
if (token > modeMap[mode].maxToken) {
|
||||
if (token > maxToken) {
|
||||
filterResult.overToken.push(item);
|
||||
return;
|
||||
}
|
||||
|
||||
if (set.has(text)) {
|
||||
console.log('repeat', item);
|
||||
filterResult.repeat.push(item);
|
||||
} else {
|
||||
filterResult.success.push(item);
|
||||
@@ -126,7 +117,7 @@ export async function pushDataToDatasetCollection({
|
||||
billId,
|
||||
mode,
|
||||
prompt,
|
||||
model: modeMap[mode].model,
|
||||
model,
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
indexes: item.indexes
|
||||
@@ -142,6 +133,44 @@ export async function pushDataToDatasetCollection({
|
||||
};
|
||||
}
|
||||
|
||||
export async function checkModelValid({
|
||||
mode,
|
||||
collectionId
|
||||
}: {
|
||||
mode: `${TrainingModeEnum}`;
|
||||
collectionId: string;
|
||||
}) {
|
||||
const {
|
||||
datasetId: { _id: datasetId, vectorModel, agentModel }
|
||||
} = await getCollectionWithDataset(collectionId);
|
||||
|
||||
if (mode === TrainingModeEnum.chunk) {
|
||||
if (!collectionId) return Promise.reject(`CollectionId is empty`);
|
||||
const vectorModelData = getVectorModel(vectorModel);
|
||||
if (!vectorModelData) {
|
||||
return Promise.reject(`Model ${vectorModel} is inValid`);
|
||||
}
|
||||
return {
|
||||
datasetId,
|
||||
maxToken: vectorModelData.maxToken * 1.5,
|
||||
model: vectorModelData.model
|
||||
};
|
||||
}
|
||||
|
||||
if (mode === TrainingModeEnum.qa) {
|
||||
const qaModelData = getQAModel(agentModel);
|
||||
if (!qaModelData) {
|
||||
return Promise.reject(`Model ${agentModel} is inValid`);
|
||||
}
|
||||
return {
|
||||
datasetId,
|
||||
maxToken: qaModelData.maxContext * 0.8,
|
||||
model: qaModelData.model
|
||||
};
|
||||
}
|
||||
return Promise.reject(`Mode ${mode} is inValid`);
|
||||
}
|
||||
|
||||
export const config = {
|
||||
api: {
|
||||
bodyParser: {
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { getVectorModel } from '@/service/core/ai/model';
|
||||
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
|
||||
@@ -28,6 +28,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
data: {
|
||||
...dataset,
|
||||
vectorModel: getVectorModel(dataset.vectorModel),
|
||||
agentModel: getQAModel(dataset.agentModel),
|
||||
canWrite,
|
||||
isOwner
|
||||
}
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { jsonRes, responseWriteController } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { MongoUser } from '@fastgpt/service/support/user/schema';
|
||||
import { addLog } from '@fastgpt/service/common/mongo/controller';
|
||||
@@ -8,6 +8,7 @@ import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller';
|
||||
import { Readable } from 'stream';
|
||||
import type { Cursor } from '@fastgpt/service/common/mongo';
|
||||
import { limitCheck } from './checkExportLimit';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -23,39 +24,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
// 凭证校验
|
||||
const { userId } = await authDataset({ req, authToken: true, datasetId, per: 'w' });
|
||||
|
||||
const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId);
|
||||
|
||||
const limitMinutesAgo = new Date(
|
||||
Date.now() - (global.feConfigs?.limit?.exportLimitMinutes || 0) * 60 * 1000
|
||||
);
|
||||
|
||||
// auth export times
|
||||
const authTimes = await MongoUser.findOne(
|
||||
{
|
||||
_id: userId,
|
||||
$or: [
|
||||
{ 'limit.exportKbTime': { $exists: false } },
|
||||
{ 'limit.exportKbTime': { $lte: limitMinutesAgo } }
|
||||
]
|
||||
},
|
||||
'_id limit'
|
||||
);
|
||||
|
||||
if (!authTimes) {
|
||||
const minutes = `${global.feConfigs?.limit?.exportLimitMinutes || 0} 分钟`;
|
||||
throw new Error(`上次导出未到 ${minutes},每 ${minutes}仅可导出一次。`);
|
||||
}
|
||||
|
||||
// auth max data
|
||||
const total = await MongoDatasetData.countDocuments({
|
||||
datasetId: { $in: exportIds }
|
||||
await limitCheck({
|
||||
userId,
|
||||
datasetId
|
||||
});
|
||||
|
||||
addLog.info(`export datasets: ${datasetId}`, { total });
|
||||
|
||||
if (total > 100000) {
|
||||
throw new Error('数据量超出 10 万,无法导出');
|
||||
}
|
||||
const exportIds = await findDatasetIdTreeByTopDatasetId(datasetId);
|
||||
|
||||
res.setHeader('Content-Type', 'text/csv; charset=utf-8;');
|
||||
res.setHeader('Content-Disposition', 'attachment; filename=dataset.csv; ');
|
||||
@@ -72,35 +46,27 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
'q a'
|
||||
).cursor();
|
||||
|
||||
function cursorToReadableStream(cursor: Cursor) {
|
||||
const readable = new Readable({
|
||||
objectMode: true,
|
||||
read() {}
|
||||
const write = responseWriteController({
|
||||
res,
|
||||
readStream: cursor
|
||||
});
|
||||
|
||||
write(`\uFEFFindex,content`);
|
||||
|
||||
cursor.on('data', (doc) => {
|
||||
const q = doc.q.replace(/"/g, '""') || '';
|
||||
const a = doc.a.replace(/"/g, '""') || '';
|
||||
|
||||
write(`\n"${q}","${a}"`);
|
||||
});
|
||||
|
||||
cursor.on('end', async () => {
|
||||
cursor.close();
|
||||
res.end();
|
||||
await MongoUser.findByIdAndUpdate(userId, {
|
||||
'limit.exportKbTime': new Date()
|
||||
});
|
||||
|
||||
readable.push(`\uFEFFindex,content`);
|
||||
|
||||
cursor.on('data', (doc) => {
|
||||
const q = doc.q.replace(/"/g, '""') || '';
|
||||
const a = doc.a.replace(/"/g, '""') || '';
|
||||
|
||||
readable.push(`\n"${q}","${a}"`);
|
||||
});
|
||||
|
||||
cursor.on('end', async () => {
|
||||
readable.push(null);
|
||||
cursor.close();
|
||||
await MongoUser.findByIdAndUpdate(userId, {
|
||||
'limit.exportKbTime': new Date()
|
||||
});
|
||||
});
|
||||
|
||||
return readable;
|
||||
}
|
||||
|
||||
// @ts-ignore
|
||||
const stream = cursorToReadableStream(cursor);
|
||||
stream.pipe(res);
|
||||
});
|
||||
} catch (err) {
|
||||
res.status(500);
|
||||
jsonRes(res, {
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { getVectorModel } from '@/service/core/ai/model';
|
||||
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
@@ -28,6 +28,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
datasets.map(async (item) => ({
|
||||
...item.toJSON(),
|
||||
vectorModel: getVectorModel(item.vectorModel),
|
||||
agentModel: getQAModel(item.agentModel),
|
||||
canWrite,
|
||||
isOwner: teamOwner || String(item.tmbId) === tmbId
|
||||
}))
|
||||
|
@@ -8,7 +8,8 @@ import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { id, parentId, name, avatar, tags, permission } = req.body as DatasetUpdateParams;
|
||||
const { id, parentId, name, avatar, tags, permission, agentModel } =
|
||||
req.body as DatasetUpdateParams;
|
||||
|
||||
if (!id) {
|
||||
throw new Error('缺少参数');
|
||||
@@ -26,7 +27,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
...(name && { name }),
|
||||
...(avatar && { avatar }),
|
||||
...(tags && { tags }),
|
||||
...(permission && { permission })
|
||||
...(permission && { permission }),
|
||||
...(agentModel && { agentModel: agentModel.model })
|
||||
}
|
||||
);
|
||||
|
||||
|
@@ -5,15 +5,17 @@ import { MongoBill } from '@fastgpt/service/support/wallet/bill/schema';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
|
||||
import { CreateTrainingBillProps } from '@fastgpt/global/support/wallet/bill/api.d';
|
||||
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const { name } = req.body as CreateTrainingBillProps;
|
||||
const { name, vectorModel, agentModel } = req.body as CreateTrainingBillProps;
|
||||
|
||||
const { teamId, tmbId } = await authCert({ req, authToken: true, authApiKey: true });
|
||||
|
||||
const qaModel = global.qaModels[0];
|
||||
const vectorModelData = getVectorModel(vectorModel);
|
||||
const agentModelData = getQAModel(agentModel);
|
||||
|
||||
const { _id } = await MongoBill.create({
|
||||
teamId,
|
||||
@@ -23,13 +25,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
||||
list: [
|
||||
{
|
||||
moduleName: '索引生成',
|
||||
model: 'embedding',
|
||||
model: vectorModelData.name,
|
||||
amount: 0,
|
||||
tokenLen: 0
|
||||
},
|
||||
{
|
||||
moduleName: 'QA 拆分',
|
||||
model: qaModel?.name,
|
||||
model: agentModelData.name,
|
||||
amount: 0,
|
||||
tokenLen: 0
|
||||
}
|
||||
|
@@ -170,7 +170,7 @@ const DataCard = () => {
|
||||
</Flex>
|
||||
<Grid
|
||||
minH={'100px'}
|
||||
gridTemplateColumns={['1fr', 'repeat(2,1fr)', 'repeat(3,1fr)']}
|
||||
gridTemplateColumns={['1fr', 'repeat(2,1fr)', 'repeat(3,1fr)', 'repeat(4,1fr)']}
|
||||
gridGap={4}
|
||||
>
|
||||
{datasetDataList.map((item) => (
|
||||
|
@@ -34,10 +34,10 @@ const ImportData = ({
|
||||
const theme = useTheme();
|
||||
const { datasetDetail } = useDatasetStore();
|
||||
const [importType, setImportType] = useState<`${ImportTypeEnum}`>(ImportTypeEnum.chunk);
|
||||
const vectorModel = datasetDetail.vectorModel;
|
||||
const agentModel = datasetDetail.agentModel;
|
||||
|
||||
const typeMap = useMemo(() => {
|
||||
const vectorModel = datasetDetail.vectorModel;
|
||||
const qaModel = qaModelList[0];
|
||||
const map = {
|
||||
[ImportTypeEnum.chunk]: {
|
||||
defaultChunkLen: vectorModel?.defaultToken || 500,
|
||||
@@ -45,8 +45,8 @@ const ImportData = ({
|
||||
mode: TrainingModeEnum.chunk
|
||||
},
|
||||
[ImportTypeEnum.qa]: {
|
||||
defaultChunkLen: qaModel?.maxContext * 0.5 || 8000,
|
||||
unitPrice: qaModel?.price || 3,
|
||||
defaultChunkLen: agentModel?.maxContext * 0.6 || 9000,
|
||||
unitPrice: agentModel?.price || 3,
|
||||
mode: TrainingModeEnum.qa
|
||||
},
|
||||
[ImportTypeEnum.csv]: {
|
||||
@@ -56,7 +56,13 @@ const ImportData = ({
|
||||
}
|
||||
};
|
||||
return map[importType];
|
||||
}, [datasetDetail.vectorModel, importType]);
|
||||
}, [
|
||||
agentModel?.maxContext,
|
||||
agentModel?.price,
|
||||
importType,
|
||||
vectorModel?.defaultToken,
|
||||
vectorModel?.price
|
||||
]);
|
||||
|
||||
const TitleStyle: BoxProps = {
|
||||
fontWeight: 'bold',
|
||||
@@ -104,8 +110,10 @@ const ImportData = ({
|
||||
|
||||
<Provider
|
||||
{...typeMap}
|
||||
vectorModel={vectorModel.model}
|
||||
agentModel={agentModel.model}
|
||||
datasetId={datasetDetail._id}
|
||||
importType={importType}
|
||||
datasetId={datasetId}
|
||||
parentId={parentId}
|
||||
onUploadSuccess={uploadSuccess}
|
||||
>
|
||||
|
@@ -90,6 +90,8 @@ const Provider = ({
|
||||
parentId,
|
||||
unitPrice,
|
||||
mode,
|
||||
vectorModel,
|
||||
agentModel,
|
||||
defaultChunkLen = 500,
|
||||
importType,
|
||||
onUploadSuccess,
|
||||
@@ -99,6 +101,8 @@ const Provider = ({
|
||||
parentId: string;
|
||||
unitPrice: number;
|
||||
mode: `${TrainingModeEnum}`;
|
||||
vectorModel: string;
|
||||
agentModel: string;
|
||||
defaultChunkLen: number;
|
||||
importType: `${ImportTypeEnum}`;
|
||||
onUploadSuccess: () => void;
|
||||
@@ -132,7 +136,9 @@ const Provider = ({
|
||||
const chunks = file.chunks;
|
||||
// create training bill
|
||||
const billId = await postCreateTrainingBill({
|
||||
name: t('dataset.collections.Create Training Data', { filename: file.filename })
|
||||
name: t('dataset.collections.Create Training Data', { filename: file.filename }),
|
||||
vectorModel,
|
||||
agentModel
|
||||
});
|
||||
// create a file collection and training bill
|
||||
const collectionId = await postDatasetCollection({
|
||||
|
@@ -13,8 +13,8 @@ const fileExtension = '.txt, .doc, .docx, .pdf, .md';
|
||||
|
||||
const QAImport = () => {
|
||||
const { datasetDetail } = useDatasetStore();
|
||||
const vectorModel = datasetDetail.vectorModel;
|
||||
const unitPrice = vectorModel?.price || 0.2;
|
||||
const agentModel = datasetDetail.agentModel;
|
||||
const unitPrice = agentModel?.price || 3;
|
||||
|
||||
const {
|
||||
successChunks,
|
||||
|
@@ -9,7 +9,7 @@ import React, {
|
||||
import { useRouter } from 'next/router';
|
||||
import { Box, Flex, Button, FormControl, IconButton, Input } from '@chakra-ui/react';
|
||||
import { QuestionOutlineIcon, DeleteIcon } from '@chakra-ui/icons';
|
||||
import { delDatasetById, putDatasetById } from '@/web/core/dataset/api';
|
||||
import { delDatasetById } from '@/web/core/dataset/api';
|
||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||
import { useToast } from '@/web/common/hooks/useToast';
|
||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||
@@ -22,6 +22,8 @@ import Tag from '@/components/Tag';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import PermissionRadio from '@/components/support/permission/Radio';
|
||||
import MySelect from '@/components/Select';
|
||||
import { qaModelList } from '@/web/common/system/staticData';
|
||||
|
||||
export interface ComponentRef {
|
||||
initInput: (tags: string) => void;
|
||||
@@ -50,7 +52,7 @@ const Info = (
|
||||
multiple: false
|
||||
});
|
||||
|
||||
const { datasetDetail, loadDatasetDetail, loadDatasets } = useDatasetStore();
|
||||
const { datasetDetail, loadDatasetDetail, loadDatasets, updateDataset } = useDatasetStore();
|
||||
|
||||
/* 点击删除 */
|
||||
const onclickDelKb = useCallback(async () => {
|
||||
@@ -76,11 +78,10 @@ const Info = (
|
||||
async (data: DatasetItemType) => {
|
||||
setBtnLoading(true);
|
||||
try {
|
||||
await putDatasetById({
|
||||
await updateDataset({
|
||||
id: datasetId,
|
||||
...data
|
||||
});
|
||||
await loadDatasetDetail(datasetId, true);
|
||||
toast({
|
||||
title: '更新成功',
|
||||
status: 'success'
|
||||
@@ -94,7 +95,7 @@ const Info = (
|
||||
}
|
||||
setBtnLoading(false);
|
||||
},
|
||||
[loadDatasetDetail, datasetId, loadDatasets, toast]
|
||||
[updateDataset, datasetId, loadDatasetDetail, toast, loadDatasets]
|
||||
);
|
||||
const saveSubmitError = useCallback(() => {
|
||||
// deep search message
|
||||
@@ -194,6 +195,27 @@ const Info = (
|
||||
})}
|
||||
/>
|
||||
</FormControl>
|
||||
<Flex mt={6} alignItems={'center'}>
|
||||
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
|
||||
{t('dataset.Agent Model')}
|
||||
</Box>
|
||||
<Box flex={[1, '0 0 300px']}>
|
||||
<MySelect
|
||||
w={'100%'}
|
||||
value={getValues('agentModel').model}
|
||||
list={qaModelList.map((item) => ({
|
||||
label: item.name,
|
||||
value: item.model
|
||||
}))}
|
||||
onchange={(e) => {
|
||||
const agentModel = qaModelList.find((item) => item.model === e);
|
||||
if (!agentModel) return;
|
||||
setValue('agentModel', agentModel);
|
||||
setRefresh((state) => !state);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Flex>
|
||||
<Flex mt={8} alignItems={'center'} w={'100%'} flexWrap={'wrap'}>
|
||||
<Box flex={['0 0 90px', '0 0 160px']} w={0}>
|
||||
标签
|
||||
|
@@ -196,7 +196,7 @@ const InputDataModal = ({
|
||||
const loading = useMemo(() => isImporting || isUpdating, [isImporting, isUpdating]);
|
||||
|
||||
return (
|
||||
<MyModal isOpen={true} isCentered w={'90vw'} maxW={'90vw'} h={'90vh'}>
|
||||
<MyModal isOpen={true} isCentered w={'90vw'} maxW={'1440px'} h={'90vh'}>
|
||||
<Flex h={'100%'}>
|
||||
<Box p={5} borderRight={theme.borders.base}>
|
||||
<RawSourceText
|
||||
@@ -250,7 +250,7 @@ const InputDataModal = ({
|
||||
mt={1}
|
||||
placeholder={`该输入框是必填项,该内容通常是对于知识点的描述,也可以是用户的问题,最多 ${maxToken} 字。`}
|
||||
maxLength={maxToken}
|
||||
rows={10}
|
||||
rows={12}
|
||||
bg={'myWhite.400'}
|
||||
{...register(`q`, {
|
||||
required: true
|
||||
@@ -274,7 +274,7 @@ const InputDataModal = ({
|
||||
maxToken * 1.5
|
||||
} 字。`}
|
||||
bg={'myWhite.400'}
|
||||
rows={10}
|
||||
rows={12}
|
||||
maxLength={maxToken * 1.5}
|
||||
{...register('a')}
|
||||
/>
|
||||
|
@@ -15,10 +15,12 @@ import { postCreateDataset } from '@/web/core/dataset/api';
|
||||
import type { CreateDatasetParams } from '@/global/core/dataset/api.d';
|
||||
import MySelect from '@/components/Select';
|
||||
import { QuestionOutlineIcon } from '@chakra-ui/icons';
|
||||
import { vectorModelList } from '@/web/common/system/staticData';
|
||||
import { vectorModelList, qaModelList } from '@/web/common/system/staticData';
|
||||
import Tag from '@/components/Tag';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
|
||||
const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: string }) => {
|
||||
const { t } = useTranslation();
|
||||
const [refresh, setRefresh] = useState(false);
|
||||
const { toast } = useToast();
|
||||
const router = useRouter();
|
||||
@@ -29,6 +31,7 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
|
||||
name: '',
|
||||
tags: '',
|
||||
vectorModel: vectorModelList[0].model,
|
||||
agentModel: qaModelList[0].model,
|
||||
type: 'dataset',
|
||||
parentId
|
||||
}
|
||||
@@ -76,7 +79,7 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
|
||||
});
|
||||
|
||||
return (
|
||||
<MyModal isOpen onClose={onClose} isCentered={!isPc} w={'400px'}>
|
||||
<MyModal isOpen onClose={onClose} isCentered={!isPc} w={'450px'}>
|
||||
<ModalHeader fontSize={'2xl'}>创建一个知识库</ModalHeader>
|
||||
<ModalBody>
|
||||
<Box color={'myGray.800'} fontWeight={'bold'}>
|
||||
@@ -106,7 +109,7 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
|
||||
/>
|
||||
</Flex>
|
||||
<Flex mt={6} alignItems={'center'}>
|
||||
<Box flex={'0 0 80px'}>索引模型</Box>
|
||||
<Box flex={'0 0 100px'}>索引模型</Box>
|
||||
<Box flex={1}>
|
||||
<MySelect
|
||||
w={'100%'}
|
||||
@@ -122,8 +125,25 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
|
||||
/>
|
||||
</Box>
|
||||
</Flex>
|
||||
<Flex mt={6} alignItems={'center'}>
|
||||
<Box flex={'0 0 100px'}>{t('dataset.Agent Model')}</Box>
|
||||
<Box flex={1}>
|
||||
<MySelect
|
||||
w={'100%'}
|
||||
value={getValues('agentModel')}
|
||||
list={qaModelList.map((item) => ({
|
||||
label: item.name,
|
||||
value: item.model
|
||||
}))}
|
||||
onchange={(e) => {
|
||||
setValue('agentModel', e);
|
||||
setRefresh((state) => !state);
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Flex>
|
||||
<Flex mt={6} alignItems={'center'} w={'100%'}>
|
||||
<Box flex={'0 0 80px'}>
|
||||
<Box flex={'0 0 100px'}>
|
||||
标签
|
||||
<MyTooltip label={'用空格隔开多个标签,便于搜索'} forceShow>
|
||||
<QuestionOutlineIcon ml={1} />
|
||||
|
@@ -20,7 +20,8 @@ import {
|
||||
delDatasetById,
|
||||
getDatasetPaths,
|
||||
putDatasetById,
|
||||
postCreateDataset
|
||||
postCreateDataset,
|
||||
getCheckExportLimit
|
||||
} from '@/web/core/dataset/api';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import Avatar from '@/components/Avatar';
|
||||
@@ -38,6 +39,7 @@ import { useDrag } from '@/web/common/hooks/useDrag';
|
||||
import { useUserStore } from '@/web/support/user/useUserStore';
|
||||
import PermissionIconText from '@/components/support/permission/IconText';
|
||||
import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant';
|
||||
import { DatasetItemType } from '@fastgpt/global/core/dataset/type';
|
||||
|
||||
const CreateModal = dynamic(() => import('./component/CreateModal'), { ssr: false });
|
||||
const MoveModal = dynamic(() => import('./component/MoveModal'), { ssr: false });
|
||||
@@ -89,6 +91,23 @@ const Kb = () => {
|
||||
successToast: t('common.Delete Success'),
|
||||
errorToast: t('dataset.Delete Dataset Error')
|
||||
});
|
||||
// check export limit
|
||||
const { mutate: exportDataset } = useRequest({
|
||||
mutationFn: async (dataset: DatasetItemType) => {
|
||||
setLoading(true);
|
||||
await getCheckExportLimit(dataset._id);
|
||||
const a = document.createElement('a');
|
||||
a.href = `/api/core/dataset/exportAll?datasetId=${dataset._id}`;
|
||||
a.download = `${dataset.name}.csv`;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
document.body.removeChild(a);
|
||||
},
|
||||
onSettled() {
|
||||
setLoading(false);
|
||||
},
|
||||
errorToast: t('dataset.Export Dataset Limit Error')
|
||||
});
|
||||
|
||||
const { data, refetch } = useQuery(['loadDataset', parentId], () => {
|
||||
return Promise.all([loadDatasets(parentId), getDatasetPaths(parentId)]);
|
||||
@@ -371,12 +390,7 @@ const Kb = () => {
|
||||
</Flex>
|
||||
),
|
||||
onClick: () => {
|
||||
const a = document.createElement('a');
|
||||
a.href = `/api/core/dataset/exportAll?datasetId=${dataset._id}`;
|
||||
a.download = `${dataset.name}.csv`;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
document.body.removeChild(a);
|
||||
exportDataset(dataset);
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@@ -109,6 +109,7 @@ export async function generateQA(): Promise<any> {
|
||||
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
const model = data.model ?? global.qaModels[0].model;
|
||||
|
||||
// request LLM to get QA
|
||||
const messages: ChatMessageItemType[] = [
|
||||
@@ -122,9 +123,10 @@ export async function generateQA(): Promise<any> {
|
||||
})
|
||||
}
|
||||
];
|
||||
const ai = getAIApi(undefined, 480000);
|
||||
|
||||
const ai = getAIApi(undefined, 600000);
|
||||
const chatResponse = await ai.chat.completions.create({
|
||||
model: global.qaModels[0].model,
|
||||
model,
|
||||
temperature: 0.01,
|
||||
messages,
|
||||
stream: false
|
||||
@@ -147,8 +149,11 @@ export async function generateQA(): Promise<any> {
|
||||
// delete data from training
|
||||
await MongoDatasetTraining.findByIdAndDelete(data._id);
|
||||
|
||||
console.log(`split result length: `, qaArr.length);
|
||||
console.log('生成QA成功,time:', `${(Date.now() - startTime) / 1000}s`);
|
||||
addLog.info(`QA Training Finish`, {
|
||||
time: `${(Date.now() - startTime) / 1000}s`,
|
||||
splitLength: qaArr.length,
|
||||
usage: chatResponse.usage
|
||||
});
|
||||
|
||||
// add bill
|
||||
if (qaArr.length > 0) {
|
||||
@@ -156,7 +161,8 @@ export async function generateQA(): Promise<any> {
|
||||
teamId: data.teamId,
|
||||
tmbId: data.tmbId,
|
||||
totalTokens,
|
||||
billId: data.billId
|
||||
billId: data.billId,
|
||||
model
|
||||
});
|
||||
} else {
|
||||
addLog.info(`QA result 0:`, { answer });
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
|
||||
import { getAudioSpeechModel } from '@/service/core/ai/model';
|
||||
import { getAudioSpeechModel, getQAModel } from '@/service/core/ai/model';
|
||||
import type { ChatHistoryItemResType } from '@fastgpt/global/core/chat/api.d';
|
||||
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
|
||||
import { addLog } from '@fastgpt/service/common/mongo/controller';
|
||||
@@ -9,10 +9,16 @@ import { POST } from '@fastgpt/service/common/api/plusRequest';
|
||||
|
||||
export function createBill(data: CreateBillProps) {
|
||||
if (!global.systemEnv.pluginBaseUrl) return;
|
||||
if (data.total === 0) {
|
||||
addLog.info('0 Bill', data);
|
||||
}
|
||||
POST('/support/wallet/bill/createBill', data);
|
||||
}
|
||||
export function concatBill(data: ConcatBillProps) {
|
||||
if (!global.systemEnv.pluginBaseUrl) return;
|
||||
if (data.total === 0) {
|
||||
addLog.info('0 Bill', data);
|
||||
}
|
||||
POST('/support/wallet/bill/concatBill', data);
|
||||
}
|
||||
|
||||
@@ -59,18 +65,18 @@ export const pushChatBill = ({
|
||||
export const pushQABill = async ({
|
||||
teamId,
|
||||
tmbId,
|
||||
model,
|
||||
totalTokens,
|
||||
billId
|
||||
}: {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
model: string;
|
||||
totalTokens: number;
|
||||
billId: string;
|
||||
}) => {
|
||||
addLog.info('splitData generate success', { totalTokens });
|
||||
|
||||
// 获取模型单价格
|
||||
const unitPrice = global.qaModels?.[0]?.price || 3;
|
||||
const unitPrice = getQAModel(model).price;
|
||||
// 计算价格
|
||||
const total = unitPrice * totalTokens;
|
||||
|
||||
|
@@ -48,6 +48,9 @@ export const putDatasetById = (data: DatasetUpdateParams) => PUT(`/core/dataset/
|
||||
|
||||
export const delDatasetById = (id: string) => DELETE(`/core/dataset/delete?id=${id}`);
|
||||
|
||||
export const getCheckExportLimit = (datasetId: string) =>
|
||||
GET(`/core/dataset/checkExportLimit`, { datasetId });
|
||||
|
||||
/* =========== search test ============ */
|
||||
export const postSearchText = (data: SearchTestProps) =>
|
||||
POST<SearchDataResponseItemType[]>(`/core/dataset/searchTest`, data);
|
||||
|
Reference in New Issue
Block a user