mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 04:06:18 +00:00
External dataset (#1519)
* perf: local file create collection * rename middleware * perf: remove code * feat: next14 * feat: external file dataset * collection tags field * external file dataset doc * fix: ts
This commit is contained in:
2
.vscode/nextapi.code-snippets
vendored
2
.vscode/nextapi.code-snippets
vendored
@@ -11,7 +11,7 @@
|
||||
"prefix": "nextapi",
|
||||
"body": [
|
||||
"import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';",
|
||||
"import { NextAPI } from '@/service/middle/entry';",
|
||||
"import { NextAPI } from '@/service/middleware/entry';",
|
||||
"",
|
||||
"export type ${TM_FILENAME_BASE}Query = {};",
|
||||
"",
|
||||
|
BIN
docSite/assets/imgs/external_file0.png
Normal file
BIN
docSite/assets/imgs/external_file0.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 163 KiB |
BIN
docSite/assets/imgs/external_file1.png
Normal file
BIN
docSite/assets/imgs/external_file1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 122 KiB |
BIN
docSite/assets/imgs/external_file2.png
Normal file
BIN
docSite/assets/imgs/external_file2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 75 KiB |
26
docSite/content/docs/course/externalFile.md
Normal file
26
docSite/content/docs/course/externalFile.md
Normal file
@@ -0,0 +1,26 @@
|
||||
---
|
||||
title: '外部文件知识库'
|
||||
description: 'FastGPT 外部文件知识库功能介绍和使用方式'
|
||||
icon: 'language'
|
||||
draft: false
|
||||
toc: true
|
||||
weight: 107
|
||||
---
|
||||
|
||||
外部文件库是 FastGPT 商业版特有功能。它允许接入你现在的文件系统,无需将文件再导入一份到 FastGPT 中。
|
||||
|
||||
并且,阅读权限可以通过你的文件系统进行控制。
|
||||
|
||||
| | | |
|
||||
| --------------------- | --------------------- | --------------------- |
|
||||
|  |  |  |
|
||||
|
||||
|
||||
## 导入参数说明
|
||||
|
||||
- 外部预览地址:用于跳转你的文件阅读地址,会携带“文件阅读ID”进行访问。
|
||||
- 文件访问URL:文件可访问的地址。
|
||||
- 文件阅读ID:通常情况下,文件访问URL是临时的。如果希望永久可以访问,你需要使用该文件阅读ID,并配合上“外部预览地址”,跳转至新的阅读地址进行原文件访问。
|
||||
- 文件名:默认会自动解析文件访问URL上的文件名。如果你手动填写,将会以手动填写的值为准。
|
||||
|
||||
[点击查看API导入文档](/docs/development/openapi/dataset/#创建一个外部文件库集合商业版)
|
@@ -295,6 +295,24 @@ curl --location --request DELETE 'http://localhost:3000/api/core/dataset/delete?
|
||||
|
||||
## 集合
|
||||
|
||||
### 通用创建参数说明
|
||||
|
||||
**入参**
|
||||
|
||||
| 参数 | 说明 | 必填 |
|
||||
| --- | --- | --- |
|
||||
| datasetId | 知识库ID | ✅ |
|
||||
| parentId: | 父级ID,不填则默认为根目录 | |
|
||||
| trainingType | 训练模式。chunk: 按文本长度进行分割;qa: QA拆分;auto: 增强训练 | ✅ |
|
||||
| chunkSize | 预估块大小 | |
|
||||
| chunkSplitter | 自定义最高优先分割符号 | |
|
||||
| qaPrompt | qa拆分提示词 | |
|
||||
|
||||
**出参**
|
||||
|
||||
- collectionId - 新建的集合ID
|
||||
- insertLen:插入的块数量
|
||||
|
||||
### 创建一个空的集合
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
@@ -500,7 +518,7 @@ data 为集合的 ID。
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
### 创建一个文件集合(商业版)
|
||||
### 创建一个文件集合
|
||||
|
||||
传入一个文件,创建一个集合,会读取文件内容进行分割。目前支持:pdf, docx, md, txt, html, csv。
|
||||
|
||||
@@ -509,7 +527,7 @@ data 为集合的 ID。
|
||||
{{< markdownify >}}
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/file' \
|
||||
curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/localFile' \
|
||||
--header 'Authorization: Bearer {{authorization}}' \
|
||||
--form 'file=@"C:\\Users\\user\\Desktop\\fastgpt测试文件\\index.html"' \
|
||||
--form 'data="{\"datasetId\":\"6593e137231a2be9c5603ba7\",\"parentId\":null,\"trainingType\":\"chunk\",\"chunkSize\":512,\"chunkSplitter\":\"\",\"qaPrompt\":\"\",\"metadata\":{}}"'
|
||||
@@ -565,6 +583,68 @@ data 为集合的 ID。
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
### 创建一个外部文件库集合(商业版)
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
{{< tab tabName="请求示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/externalFileUrl' \
|
||||
--header 'Authorization: Bearer {{authorization}}' \
|
||||
--header 'User-Agent: Apifox/1.0.0 (https://apifox.com)' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"externalFileUrl":"https://image.xxxxx.com/fastgpt-dev/%E6%91%82.pdf",
|
||||
"externalFileId":"1111",
|
||||
"filename":"自定义文件名",
|
||||
"datasetId":"6642d105a5e9d2b00255b27b",
|
||||
"parentId": null,
|
||||
|
||||
"trainingType": "chunk",
|
||||
"chunkSize":512,
|
||||
"chunkSplitter":"",
|
||||
"qaPrompt":""
|
||||
}'
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
{{< tab tabName="参数说明" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
| 参数 | 说明 | 必填 |
|
||||
| --- | --- | --- |
|
||||
| externalFileUrl | 文件访问链接(可以是临时链接) | ✅ |
|
||||
| externalFileId | 外部文件ID | |
|
||||
| filename | 自定义文件名 | |
|
||||
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
|
||||
{{< tab tabName="响应示例" >}}
|
||||
{{< markdownify >}}
|
||||
|
||||
data 为集合的 ID。
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 200,
|
||||
"statusText": "",
|
||||
"message": "",
|
||||
"data": {
|
||||
"collectionId": "6646fcedfabd823cdc6de746",
|
||||
"insertLen": 3
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
{{< /markdownify >}}
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
### 获取集合列表
|
||||
|
||||
{{< tabs tabTotal="3" >}}
|
||||
|
@@ -35,8 +35,11 @@ curl --location --request POST 'https://{{host}}/api/admin/clearInvalidData' \
|
||||
## V4.8.1 更新说明
|
||||
|
||||
1. 新增 - 知识库重新选择向量模型重建
|
||||
2. 新增 - 工作流节点版本变更提示,并可以同步最新版本。
|
||||
3. 优化 - 插件输入的 debug 模式,支持全量参数输入渲染。
|
||||
4. 修复 - 插件输入默认值被清空问题。
|
||||
5. 修复 - 工作流删除节点的动态输入和输出时候,没有正确的删除连接线,导致可能出现逻辑异常。
|
||||
6. 修复 - 定时器清理脏数据任务
|
||||
2. 新增 - 对话框支持问题模糊检索提示,可自定义预设问题词库。
|
||||
3. 新增 - 工作流节点版本变更提示,并可以同步最新版本配置,避免存在隐藏脏数据。
|
||||
4. 新增 - 开放文件导入知识库接口到开源版, [点击插件文档](/docs/development/openapi/dataset/#创建一个文件集合)
|
||||
5. 新增 - 外部文件源知识库, [点击查看文档](/docs/course/externalfile/)
|
||||
6. 优化 - 插件输入的 debug 模式,支持全量参数输入渲染。
|
||||
7. 修复 - 插件输入默认值被清空问题。
|
||||
8. 修复 - 工作流删除节点的动态输入和输出时候,没有正确的删除连接线,导致可能出现逻辑异常。
|
||||
9. 修复 - 定时器清理脏数据任务
|
16
packages/global/core/dataset/api.d.ts
vendored
16
packages/global/core/dataset/api.d.ts
vendored
@@ -26,18 +26,27 @@ export type DatasetCollectionChunkMetadataType = {
|
||||
qaPrompt?: string;
|
||||
metadata?: Record<string, any>;
|
||||
};
|
||||
|
||||
// create collection params
|
||||
export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
|
||||
datasetId: string;
|
||||
name: string;
|
||||
type: `${DatasetCollectionTypeEnum}`;
|
||||
type: DatasetCollectionTypeEnum;
|
||||
|
||||
tags?: string[];
|
||||
|
||||
fileId?: string;
|
||||
rawLink?: string;
|
||||
externalFileId?: string;
|
||||
|
||||
externalFileUrl?: string;
|
||||
rawTextLength?: number;
|
||||
hashRawText?: string;
|
||||
};
|
||||
|
||||
export type ApiCreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
|
||||
datasetId: string;
|
||||
tags?: string[];
|
||||
};
|
||||
export type TextCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
name: string;
|
||||
@@ -58,6 +67,11 @@ export type CsvTableCreateDatasetCollectionParams = {
|
||||
parentId?: string;
|
||||
fileId: string;
|
||||
};
|
||||
export type ExternalFileCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
externalFileId?: string;
|
||||
externalFileUrl: string;
|
||||
filename?: string;
|
||||
};
|
||||
|
||||
/* ================= data ===================== */
|
||||
export type PgSearchRawType = {
|
||||
|
@@ -1,4 +1,4 @@
|
||||
/* sourceId = prefix-id; id=fileId;link url;externalId */
|
||||
/* sourceId = prefix-id; id=fileId;link url;externalFileId */
|
||||
export enum CollectionSourcePrefixEnum {
|
||||
local = 'local',
|
||||
link = 'link',
|
||||
|
14
packages/global/core/dataset/collection/utils.ts
Normal file
14
packages/global/core/dataset/collection/utils.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import { CollectionWithDatasetType, DatasetCollectionSchemaType } from '../type';
|
||||
|
||||
export const getCollectionSourceData = (
|
||||
collection?: CollectionWithDatasetType | DatasetCollectionSchemaType
|
||||
) => {
|
||||
return {
|
||||
sourceId:
|
||||
collection?.fileId ||
|
||||
collection?.rawLink ||
|
||||
collection?.externalFileId ||
|
||||
collection?.externalFileUrl,
|
||||
sourceName: collection?.name || ''
|
||||
};
|
||||
};
|
@@ -22,7 +22,7 @@ export const DatasetTypeMap = {
|
||||
collectionLabel: 'common.Website'
|
||||
},
|
||||
[DatasetTypeEnum.externalFile]: {
|
||||
icon: 'core/dataset/commonDataset',
|
||||
icon: 'core/dataset/externalDataset',
|
||||
label: 'External File',
|
||||
collectionLabel: 'common.File'
|
||||
}
|
||||
@@ -44,9 +44,11 @@ export const DatasetStatusMap = {
|
||||
/* ------------ collection -------------- */
|
||||
export enum DatasetCollectionTypeEnum {
|
||||
folder = 'folder',
|
||||
virtual = 'virtual',
|
||||
|
||||
file = 'file',
|
||||
link = 'link', // one link
|
||||
virtual = 'virtual'
|
||||
externalFile = 'externalFile'
|
||||
}
|
||||
export const DatasetCollectionTypeMap = {
|
||||
[DatasetCollectionTypeEnum.folder]: {
|
||||
@@ -55,6 +57,9 @@ export const DatasetCollectionTypeMap = {
|
||||
[DatasetCollectionTypeEnum.file]: {
|
||||
name: 'core.dataset.file'
|
||||
},
|
||||
[DatasetCollectionTypeEnum.externalFile]: {
|
||||
name: 'core.dataset.externalFile'
|
||||
},
|
||||
[DatasetCollectionTypeEnum.link]: {
|
||||
name: 'core.dataset.link'
|
||||
},
|
||||
|
@@ -1,7 +1,5 @@
|
||||
import { DatasetSourceReadTypeEnum, ImportDataSourceEnum } from './constants';
|
||||
|
||||
export const rawTextBackupPrefix = 'index,content';
|
||||
|
||||
export const importType2ReadType = (type: ImportDataSourceEnum) => {
|
||||
if (type === ImportDataSourceEnum.csvTable || type === ImportDataSourceEnum.fileLocal) {
|
||||
return DatasetSourceReadTypeEnum.fileLocal;
|
||||
|
8
packages/global/core/dataset/type.d.ts
vendored
8
packages/global/core/dataset/type.d.ts
vendored
@@ -41,7 +41,7 @@ export type DatasetCollectionSchemaType = {
|
||||
datasetId: string;
|
||||
parentId?: string;
|
||||
name: string;
|
||||
type: `${DatasetCollectionTypeEnum}`;
|
||||
type: DatasetCollectionTypeEnum;
|
||||
createTime: Date;
|
||||
updateTime: Date;
|
||||
|
||||
@@ -50,13 +50,15 @@ export type DatasetCollectionSchemaType = {
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
|
||||
sourceId?: string; // relate CollectionSourcePrefixEnum
|
||||
tags?: string[];
|
||||
|
||||
fileId?: string; // local file id
|
||||
rawLink?: string; // link url
|
||||
externalFileId?: string; //external file id
|
||||
|
||||
rawTextLength?: number;
|
||||
hashRawText?: string;
|
||||
externalSourceUrl?: string; // external import url
|
||||
externalFileUrl?: string; // external import url
|
||||
metadata?: {
|
||||
webPageSelector?: string;
|
||||
relatedImgId?: string; // The id of the associated image collections
|
||||
|
@@ -3,7 +3,7 @@ import { getFileIcon } from '../../common/file/icon';
|
||||
import { strIsLink } from '../../common/string/tools';
|
||||
|
||||
export function getCollectionIcon(
|
||||
type: `${DatasetCollectionTypeEnum}` = DatasetCollectionTypeEnum.file,
|
||||
type: DatasetCollectionTypeEnum = DatasetCollectionTypeEnum.file,
|
||||
name = ''
|
||||
) {
|
||||
if (type === DatasetCollectionTypeEnum.folder) {
|
||||
@@ -24,13 +24,13 @@ export function getSourceNameIcon({
|
||||
sourceName: string;
|
||||
sourceId?: string;
|
||||
}) {
|
||||
if (strIsLink(sourceId)) {
|
||||
return 'common/linkBlue';
|
||||
}
|
||||
const fileIcon = getFileIcon(sourceName, '');
|
||||
const fileIcon = getFileIcon(decodeURIComponent(sourceName), '');
|
||||
if (fileIcon) {
|
||||
return fileIcon;
|
||||
}
|
||||
if (strIsLink(sourceId)) {
|
||||
return 'common/linkBlue';
|
||||
}
|
||||
|
||||
return 'file/fill/manual';
|
||||
}
|
||||
|
@@ -10,7 +10,7 @@
|
||||
"js-yaml": "^4.1.0",
|
||||
"jschardet": "3.1.1",
|
||||
"nanoid": "^4.0.1",
|
||||
"next": "13.5.2",
|
||||
"next": "14.2.3",
|
||||
"openai": "4.28.0",
|
||||
"openapi-types": "^12.1.3",
|
||||
"timezones-list": "^3.0.2"
|
||||
|
@@ -7,7 +7,7 @@ import { MongoFileSchema } from './schema';
|
||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
|
||||
import { readFileRawContent } from '../read/utils';
|
||||
import { readRawContentByFileBuffer } from '../read/utils';
|
||||
import { PassThrough } from 'stream';
|
||||
|
||||
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
|
||||
@@ -196,7 +196,7 @@ export const readFileContentFromMongo = async ({
|
||||
});
|
||||
})();
|
||||
|
||||
const { rawText } = await readFileRawContent({
|
||||
const { rawText } = await readRawContentByFileBuffer({
|
||||
extension,
|
||||
isQAImport,
|
||||
teamId,
|
||||
|
@@ -1,11 +1,12 @@
|
||||
import { markdownProcess, simpleMarkdownText } from '@fastgpt/global/common/string/markdown';
|
||||
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
|
||||
import { uploadMongoImg } from '../image/controller';
|
||||
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
|
||||
import { addHours } from 'date-fns';
|
||||
|
||||
import { WorkerNameEnum, runWorker } from '../../../worker/utils';
|
||||
import fs from 'fs';
|
||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
import { ReadFileResponse } from '../../../worker/file/type';
|
||||
import { rawTextBackupPrefix } from '@fastgpt/global/core/dataset/read';
|
||||
|
||||
export const initMarkdownText = ({
|
||||
teamId,
|
||||
@@ -28,7 +29,34 @@ export const initMarkdownText = ({
|
||||
})
|
||||
});
|
||||
|
||||
export const readFileRawContent = async ({
|
||||
export type readRawTextByLocalFileParams = {
|
||||
teamId: string;
|
||||
path: string;
|
||||
metadata?: Record<string, any>;
|
||||
};
|
||||
export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
|
||||
const { path } = params;
|
||||
|
||||
const extension = path?.split('.')?.pop()?.toLowerCase() || '';
|
||||
|
||||
const buffer = fs.readFileSync(path);
|
||||
const encoding = detectFileEncoding(buffer);
|
||||
|
||||
const { rawText } = await readRawContentByFileBuffer({
|
||||
extension,
|
||||
isQAImport: false,
|
||||
teamId: params.teamId,
|
||||
encoding,
|
||||
buffer,
|
||||
metadata: params.metadata
|
||||
});
|
||||
|
||||
return {
|
||||
rawText
|
||||
};
|
||||
};
|
||||
|
||||
export const readRawContentByFileBuffer = async ({
|
||||
extension,
|
||||
isQAImport,
|
||||
teamId,
|
||||
@@ -69,9 +97,3 @@ export const readFileRawContent = async ({
|
||||
|
||||
return { rawText };
|
||||
};
|
||||
|
||||
export const htmlToMarkdown = async (html?: string | null) => {
|
||||
const md = await runWorker<string>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
|
||||
|
||||
return simpleMarkdownText(md);
|
||||
};
|
||||
|
38
packages/service/common/middle/entry.ts
Normal file
38
packages/service/common/middle/entry.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import { jsonRes } from '../response';
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { withNextCors } from './cors';
|
||||
import { ApiRequestProps } from '../../type/next';
|
||||
|
||||
export type NextApiHandler<T = any> = (
|
||||
req: ApiRequestProps,
|
||||
res: NextApiResponse<T>
|
||||
) => unknown | Promise<unknown>;
|
||||
|
||||
export const NextEntry = ({ beforeCallback = [] }: { beforeCallback?: Promise<any>[] }) => {
|
||||
return (...args: NextApiHandler[]): NextApiHandler => {
|
||||
return async function api(req: ApiRequestProps, res: NextApiResponse) {
|
||||
try {
|
||||
await Promise.all([withNextCors(req, res), ...beforeCallback]);
|
||||
|
||||
let response = null;
|
||||
for (const handler of args) {
|
||||
response = await handler(req, res);
|
||||
}
|
||||
|
||||
const contentType = res.getHeader('Content-Type');
|
||||
if ((!contentType || contentType === 'application/json') && !res.writableFinished) {
|
||||
return jsonRes(res, {
|
||||
code: 200,
|
||||
data: response
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
return jsonRes(res, {
|
||||
code: 500,
|
||||
error,
|
||||
url: req.url
|
||||
});
|
||||
}
|
||||
};
|
||||
};
|
||||
};
|
@@ -1,7 +1,7 @@
|
||||
import { UrlFetchParams, UrlFetchResponse } from '@fastgpt/global/common/file/api';
|
||||
import * as cheerio from 'cheerio';
|
||||
import axios from 'axios';
|
||||
import { htmlToMarkdown } from '../file/read/utils';
|
||||
import { htmlToMarkdown } from './utils';
|
||||
|
||||
export const cheerioToHtml = ({
|
||||
fetchUrl,
|
||||
|
8
packages/service/common/string/utils.ts
Normal file
8
packages/service/common/string/utils.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import { simpleMarkdownText } from '@fastgpt/global/common/string/markdown';
|
||||
import { WorkerNameEnum, runWorker } from '../../worker/utils';
|
||||
|
||||
export const htmlToMarkdown = async (html?: string | null) => {
|
||||
const md = await runWorker<string>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
|
||||
|
||||
return simpleMarkdownText(md);
|
||||
};
|
@@ -32,6 +32,9 @@ export async function createOneCollection({
|
||||
fileId,
|
||||
rawLink,
|
||||
|
||||
externalFileId,
|
||||
externalFileUrl,
|
||||
|
||||
hashRawText,
|
||||
rawTextLength,
|
||||
metadata = {},
|
||||
@@ -61,6 +64,8 @@ export async function createOneCollection({
|
||||
|
||||
fileId,
|
||||
rawLink,
|
||||
externalFileId,
|
||||
externalFileUrl,
|
||||
|
||||
rawTextLength,
|
||||
hashRawText,
|
||||
|
@@ -66,7 +66,11 @@ const DatasetCollectionSchema = new Schema({
|
||||
type: String
|
||||
},
|
||||
|
||||
sourceId: String,
|
||||
tags: {
|
||||
type: [String],
|
||||
default: []
|
||||
},
|
||||
|
||||
// local file collection
|
||||
fileId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
@@ -74,13 +78,13 @@ const DatasetCollectionSchema = new Schema({
|
||||
},
|
||||
// web link collection
|
||||
rawLink: String,
|
||||
|
||||
// external collection
|
||||
externalFileId: String,
|
||||
|
||||
// metadata
|
||||
rawTextLength: Number,
|
||||
hashRawText: String,
|
||||
externalSourceUrl: String, // external import url
|
||||
externalFileUrl: String, // external import url
|
||||
metadata: {
|
||||
type: Object,
|
||||
default: {}
|
||||
|
@@ -2,13 +2,20 @@ import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { readFileContentFromMongo } from '../../common/file/gridfs/controller';
|
||||
import { urlsFetch } from '../../common/string/cheerio';
|
||||
import { rawTextBackupPrefix } from '@fastgpt/global/core/dataset/read';
|
||||
import { parseCsvTable2Chunks } from './training/utils';
|
||||
import { TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import axios from 'axios';
|
||||
import { readFileRawContent } from '../../common/file/read/utils';
|
||||
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
|
||||
|
||||
export const readFileRawTextByUrl = async ({ teamId, url }: { teamId: string; url: string }) => {
|
||||
export const readFileRawTextByUrl = async ({
|
||||
teamId,
|
||||
url,
|
||||
relatedId
|
||||
}: {
|
||||
teamId: string;
|
||||
url: string;
|
||||
relatedId?: string;
|
||||
}) => {
|
||||
const response = await axios({
|
||||
method: 'get',
|
||||
url: url,
|
||||
@@ -18,11 +25,14 @@ export const readFileRawTextByUrl = async ({ teamId, url }: { teamId: string; ur
|
||||
|
||||
const buffer = Buffer.from(response.data, 'binary');
|
||||
|
||||
const { rawText } = await readFileRawContent({
|
||||
const { rawText } = await readRawContentByFileBuffer({
|
||||
extension,
|
||||
teamId,
|
||||
buffer,
|
||||
encoding: 'utf-8'
|
||||
encoding: 'utf-8',
|
||||
metadata: {
|
||||
relatedId
|
||||
}
|
||||
});
|
||||
|
||||
return rawText;
|
||||
@@ -38,13 +48,15 @@ export const readDatasetSourceRawText = async ({
|
||||
type,
|
||||
sourceId,
|
||||
isQAImport,
|
||||
selector
|
||||
selector,
|
||||
relatedId
|
||||
}: {
|
||||
teamId: string;
|
||||
type: DatasetSourceReadTypeEnum;
|
||||
sourceId: string;
|
||||
isQAImport?: boolean;
|
||||
selector?: string;
|
||||
relatedId?: string;
|
||||
}): Promise<string> => {
|
||||
if (type === DatasetSourceReadTypeEnum.fileLocal) {
|
||||
const { rawText } = await readFileContentFromMongo({
|
||||
@@ -64,7 +76,8 @@ export const readDatasetSourceRawText = async ({
|
||||
} else if (type === DatasetSourceReadTypeEnum.externalFile) {
|
||||
const rawText = await readFileRawTextByUrl({
|
||||
teamId,
|
||||
url: sourceId
|
||||
url: sourceId,
|
||||
relatedId
|
||||
});
|
||||
return rawText;
|
||||
}
|
||||
|
@@ -18,6 +18,7 @@ import { countPromptTokens } from '../../../common/string/tiktoken/index';
|
||||
import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { jiebaSplit } from '../../../common/string/jieba';
|
||||
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
|
||||
|
||||
type SearchDatasetDataProps = {
|
||||
teamId: string;
|
||||
@@ -98,7 +99,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
},
|
||||
'datasetId collectionId q a chunkIndex indexes'
|
||||
)
|
||||
.populate('collectionId', 'name fileId rawLink')
|
||||
.populate('collectionId', 'name fileId rawLink externalFileId externalFileUrl')
|
||||
.lean()) as DatasetDataWithCollectionType[];
|
||||
|
||||
// add score to data(It's already sorted. The first one is the one with the most points)
|
||||
@@ -130,8 +131,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
chunkIndex: data.chunkIndex,
|
||||
datasetId: String(data.datasetId),
|
||||
collectionId: String(data.collectionId?._id),
|
||||
sourceName: data.collectionId?.name || '',
|
||||
sourceId: data.collectionId?.fileId || data.collectionId?.rawLink,
|
||||
...getCollectionSourceData(data.collectionId),
|
||||
score: [{ type: SearchScoreTypeEnum.embedding, value: data.score, index }]
|
||||
};
|
||||
|
||||
@@ -205,8 +205,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
id: String(item._id),
|
||||
datasetId: String(item.datasetId),
|
||||
collectionId: String(item.collectionId),
|
||||
sourceName: collection?.name || '',
|
||||
sourceId: collection?.fileId || collection?.rawLink,
|
||||
...getCollectionSourceData(collection),
|
||||
q: item.q,
|
||||
a: item.a,
|
||||
chunkIndex: item.chunkIndex,
|
||||
|
@@ -174,7 +174,7 @@ export async function pushDataListToTrainingQueue({
|
||||
} catch (error: any) {
|
||||
addLog.error(`Insert error`, error);
|
||||
// 如果有错误,将失败的文档添加到失败列表中
|
||||
error.writeErrors.forEach((writeError: any) => {
|
||||
error.writeErrors?.forEach((writeError: any) => {
|
||||
failedDocuments.push(data[writeError.index]);
|
||||
});
|
||||
console.log('failed', failedDocuments);
|
||||
|
@@ -35,7 +35,7 @@ const TrainingDataSchema = new Schema({
|
||||
},
|
||||
billId: {
|
||||
// concat bill
|
||||
type: Schema.Types.ObjectId
|
||||
type: String
|
||||
},
|
||||
mode: {
|
||||
type: String,
|
||||
|
@@ -53,7 +53,7 @@ export const dispatchLafRequest = async (props: LafRequestProps): Promise<LafRes
|
||||
appId,
|
||||
chatId,
|
||||
responseChatItemId,
|
||||
histories: histories.slice(0, 10)
|
||||
histories: histories?.slice(0, 10)
|
||||
},
|
||||
variables,
|
||||
...dynamicInput,
|
||||
|
@@ -21,7 +21,7 @@
|
||||
"mammoth": "^1.6.0",
|
||||
"mongoose": "^7.0.2",
|
||||
"multer": "1.4.5-lts.1",
|
||||
"next": "13.5.2",
|
||||
"next": "14.2.3",
|
||||
"nextjs-cors": "^2.1.2",
|
||||
"node-cron": "^3.0.3",
|
||||
"node-xlsx": "^0.23.0",
|
||||
|
@@ -19,7 +19,9 @@ export const checkDatasetLimit = async ({
|
||||
if (!standardConstants) return;
|
||||
|
||||
if (usedSize + insertLen >= datasetMaxSize) {
|
||||
return Promise.reject(TeamErrEnum.datasetSizeNotEnough);
|
||||
return Promise.reject(
|
||||
`您的知识库容量为: ${datasetMaxSize}组,已使用: ${usedSize}组,导入当前文件需要: ${insertLen}组,请增加知识库容量后导入。`
|
||||
);
|
||||
}
|
||||
|
||||
if (usedPoints >= totalPoints) {
|
||||
|
@@ -9,7 +9,7 @@ import { readXlsxRawText } from './extension/xlsx';
|
||||
import { readCsvRawText } from './extension/csv';
|
||||
|
||||
parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
|
||||
const readFileRawContent = async (params: ReadRawTextByBuffer) => {
|
||||
const readRawContentByFileBuffer = async (params: ReadRawTextByBuffer) => {
|
||||
switch (params.extension) {
|
||||
case 'txt':
|
||||
case 'md':
|
||||
@@ -41,7 +41,7 @@ parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
|
||||
try {
|
||||
parentPort?.postMessage({
|
||||
type: 'success',
|
||||
data: await readFileRawContent(newProps)
|
||||
data: await readRawContentByFileBuffer(newProps)
|
||||
});
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
|
@@ -101,6 +101,7 @@ export const iconPaths = {
|
||||
'core/dataset/commonDataset': () => import('./icons/core/dataset/commonDataset.svg'),
|
||||
'core/dataset/datasetFill': () => import('./icons/core/dataset/datasetFill.svg'),
|
||||
'core/dataset/datasetLight': () => import('./icons/core/dataset/datasetLight.svg'),
|
||||
'core/dataset/externalDataset': () => import('./icons/core/dataset/externalDataset.svg'),
|
||||
'core/dataset/fileCollection': () => import('./icons/core/dataset/fileCollection.svg'),
|
||||
'core/dataset/fullTextRecall': () => import('./icons/core/dataset/fullTextRecall.svg'),
|
||||
'core/dataset/manualCollection': () => import('./icons/core/dataset/manualCollection.svg'),
|
||||
|
@@ -0,0 +1,5 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 23 20">
|
||||
<path fill-rule="evenodd" clip-rule="evenodd"
|
||||
d="M15.7233 1.21707C14.9554 1.3079 14.4221 2.00197 14.532 2.76731L16.6975 17.8447C16.8074 18.6101 17.519 19.1569 18.2868 19.066L21.5433 18.6808C22.3111 18.59 22.8445 17.8959 22.7345 17.1306L20.5691 2.05317C20.4592 1.28782 19.7476 0.741021 18.9797 0.831852L15.7233 1.21707ZM0.830017 2.32412C0.830017 1.55092 1.45682 0.924121 2.23002 0.924121H5.51815C6.29135 0.924121 6.91815 1.55092 6.91815 2.32412V17.675C6.91815 18.4482 6.29135 19.075 5.51815 19.075H2.23002C1.45682 19.075 0.830017 18.4482 0.830017 17.675V2.32412ZM7.9198 2.32412C7.9198 1.55092 8.5466 0.924121 9.3198 0.924121H12.6079C13.3811 0.924121 14.0079 1.55092 14.0079 2.32412V17.675C14.0079 18.4482 13.3811 19.075 12.6079 19.075H9.3198C8.5466 19.075 7.9198 18.4482 7.9198 17.675V2.32412Z"
|
||||
fill="#8A95A7" />
|
||||
</svg>
|
After Width: | Height: | Size: 899 B |
@@ -10,6 +10,7 @@ import React from 'react';
|
||||
|
||||
type Props = Omit<NumberInputProps, 'onChange'> & {
|
||||
onChange: (e: number | '') => any;
|
||||
placeholder?: string;
|
||||
};
|
||||
|
||||
const MyNumberInput = (props: Props) => {
|
||||
@@ -24,7 +25,7 @@ const MyNumberInput = (props: Props) => {
|
||||
}
|
||||
}}
|
||||
>
|
||||
<NumberInputField />
|
||||
<NumberInputField placeholder={props?.placeholder} />
|
||||
<NumberInputStepper>
|
||||
<NumberIncrementStepper />
|
||||
<NumberDecrementStepper />
|
||||
|
@@ -22,6 +22,7 @@ type Props = Omit<BoxProps, 'resize' | 'onChange'> & {
|
||||
onChange?: (e: string) => void;
|
||||
variables?: EditorVariablePickerType[];
|
||||
defaultHeight?: number;
|
||||
placeholder?: string;
|
||||
};
|
||||
|
||||
const options = {
|
||||
|
@@ -27,18 +27,18 @@
|
||||
"next-i18next": "15.2.0",
|
||||
"papaparse": "^5.4.1",
|
||||
"pdfjs-dist": "4.0.269",
|
||||
"react": "18.2.0",
|
||||
"react": "18.3.1",
|
||||
"use-context-selector": "^1.4.4",
|
||||
"react-day-picker": "^8.7.1",
|
||||
"react-dom": "18.2.0",
|
||||
"react-dom": "18.3.1",
|
||||
"react-i18next": "13.5.0",
|
||||
"react-beautiful-dnd": "^13.1.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/lodash": "^4.14.191",
|
||||
"@types/papaparse": "^5.3.7",
|
||||
"@types/react": "18.2.0",
|
||||
"@types/react-dom": "18.2.0",
|
||||
"@types/react": "18.3.0",
|
||||
"@types/react-dom": "18.3.0",
|
||||
"@types/react-beautiful-dnd": "^13.1.8"
|
||||
}
|
||||
}
|
||||
|
1778
pnpm-lock.yaml
generated
1778
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,5 @@
|
||||
{
|
||||
"Collection tags": "Tags",
|
||||
"Common Dataset": "Common dataset",
|
||||
"Common Dataset Desc": "Can be built by importing files, web links, or manual entry",
|
||||
"Confirm to rebuild embedding tip": "Are you sure to switch the knowledge base index?\nSwitching index is a very heavy operation that requires re-indexing all the data in your knowledge base, which may take a long time. Please ensure that the remaining points in your account are sufficient.\n\nIn addition, you need to be careful to modify the applications that select this knowledge base to avoid mixing them with other index model knowledge bases.",
|
||||
@@ -6,6 +7,7 @@
|
||||
"External file Dataset Desc": "You can import files from an external file library to build a knowledge base. Files are not stored twice",
|
||||
"External id": "File id",
|
||||
"External read url": "External read url",
|
||||
"External read url tip": "You can configure the reading address of your file library. This allows users to read and authenticate. You can currently use the {{fileId}} variable to refer to the external file ID.",
|
||||
"External url": "File read url",
|
||||
"Folder Dataset": "Folder",
|
||||
"Rebuild embedding start tip": "The task of switching index models has begun",
|
||||
|
@@ -1,4 +1,5 @@
|
||||
{
|
||||
"Click to view raw source": "View source",
|
||||
"Click to view file": "Click to view the original file",
|
||||
"Release the mouse to upload the file": "Release the mouse to upload the file",
|
||||
"upload error description": "Only supports uploading multiple files or one folder at a time",
|
||||
|
@@ -1,4 +1,5 @@
|
||||
{
|
||||
"Collection tags": "集合标签",
|
||||
"Common Dataset": "通用知识库",
|
||||
"Common Dataset Desc": "可通过导入文件、网页链接或手动录入形式构建知识库",
|
||||
"Confirm to rebuild embedding tip": "确认为知识库切换索引?\n切换索引是一个非常重量的操作,需要对您知识库内所有数据进行重新索引,时间可能较长,请确保账号内剩余积分充足。\n\n此外,你还需要注意修改选择该知识库的应用,避免它们与其他索引模型知识库混用。",
|
||||
@@ -6,6 +7,7 @@
|
||||
"External file Dataset Desc": "可以从外部文件库导入文件构建知识库,文件不会进行二次存储",
|
||||
"External id": "文件阅读ID",
|
||||
"External read url": "外部预览地址",
|
||||
"External read url tip": "可以配置你文件库的阅读地址。便于对用户进行阅读鉴权操作。目前可以使用 {{fileId}} 变量来指代外部文件ID。",
|
||||
"External url": "文件访问URL",
|
||||
"Folder Dataset": "文件夹",
|
||||
"Rebuild embedding start tip": "切换索引模型任务已开始",
|
||||
|
@@ -1,4 +1,5 @@
|
||||
{
|
||||
"Click to view raw source": "点击查看来源",
|
||||
"Click to view file": "点击查看原始文件",
|
||||
"Release the mouse to upload the file": "松开鼠标上传文件",
|
||||
"upload error description": "单次只支持上传多个文件或者一个文件夹",
|
||||
|
@@ -88,8 +88,6 @@ const nextConfig = {
|
||||
},
|
||||
transpilePackages: ['@fastgpt/*', 'ahooks', '@chakra-ui/*', 'react'],
|
||||
experimental: {
|
||||
// 外部包独立打包
|
||||
serverComponentsExternalPackages: ['mongoose', 'pg'],
|
||||
// 指定导出包优化,按需引入包模块
|
||||
optimizePackageImports: ['mongoose', 'pg'],
|
||||
outputFileTracingRoot: path.join(__dirname, '../../')
|
||||
|
@@ -42,13 +42,13 @@
|
||||
"lodash": "^4.17.21",
|
||||
"mermaid": "^10.2.3",
|
||||
"nanoid": "^4.0.1",
|
||||
"next": "13.5.2",
|
||||
"next": "14.2.3",
|
||||
"next-i18next": "15.2.0",
|
||||
"nextjs-node-loader": "^1.1.5",
|
||||
"nprogress": "^0.2.0",
|
||||
"react": "18.2.0",
|
||||
"react": "18.3.1",
|
||||
"react-day-picker": "^8.7.1",
|
||||
"react-dom": "18.2.0",
|
||||
"react-dom": "18.3.1",
|
||||
"react-hook-form": "7.43.1",
|
||||
"react-i18next": "13.5.0",
|
||||
"react-markdown": "^8.0.7",
|
||||
@@ -71,12 +71,12 @@
|
||||
"@types/jsonwebtoken": "^9.0.3",
|
||||
"@types/lodash": "^4.14.191",
|
||||
"@types/node": "^20.8.5",
|
||||
"@types/react": "18.2.0",
|
||||
"@types/react-dom": "18.2.0",
|
||||
"@types/react": "18.3.0",
|
||||
"@types/react-dom": "18.3.0",
|
||||
"@types/react-syntax-highlighter": "^15.5.6",
|
||||
"@types/request-ip": "^0.0.37",
|
||||
"eslint": "8.34.0",
|
||||
"eslint-config-next": "13.1.6",
|
||||
"eslint-config-next": "14.2.3",
|
||||
"nextjs-node-loader": "^1.1.5",
|
||||
"typescript": "4.9.5"
|
||||
}
|
||||
|
@@ -46,7 +46,7 @@ const QuoteModal = ({
|
||||
title={
|
||||
<Box>
|
||||
{metadata ? (
|
||||
<RawSourceBox {...metadata} canView={false} />
|
||||
<RawSourceBox {...metadata} canView={showDetail} />
|
||||
) : (
|
||||
<>{t('core.chat.Quote Amount', { amount: rawSearch.length })}</>
|
||||
)}
|
||||
|
@@ -14,7 +14,6 @@ import MyTooltip from '../MyTooltip';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { EventNameEnum, eventBus } from '@/web/common/utils/eventbus';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { getFileAndOpen } from '@/web/core/dataset/utils';
|
||||
import { MARKDOWN_QUOTE_SIGN } from '@fastgpt/global/core/chat/constants';
|
||||
|
||||
const CodeLight = dynamic(() => import('./CodeLight'), { ssr: false });
|
||||
@@ -132,7 +131,7 @@ const A = React.memo(function A({ children, ...props }: any) {
|
||||
);
|
||||
}
|
||||
|
||||
// quote link
|
||||
// quote link(未使用)
|
||||
if (children?.length === 1 && typeof children?.[0] === 'string') {
|
||||
const text = String(children);
|
||||
if (text === MARKDOWN_QUOTE_SIGN && props.href) {
|
||||
@@ -147,7 +146,7 @@ const A = React.memo(function A({ children, ...props }: any) {
|
||||
_hover={{
|
||||
color: 'primary.700'
|
||||
}}
|
||||
onClick={() => getFileAndOpen(props.href)}
|
||||
// onClick={() => getCollectionSourceAndOpen(props.href)}
|
||||
/>
|
||||
</MyTooltip>
|
||||
);
|
||||
|
@@ -218,6 +218,7 @@ const QuoteItem = ({
|
||||
<RawSourceBox
|
||||
fontWeight={'bold'}
|
||||
color={'black'}
|
||||
collectionId={quoteItem.collectionId}
|
||||
sourceName={quoteItem.sourceName}
|
||||
sourceId={quoteItem.sourceId}
|
||||
canView={canViewSource}
|
||||
|
@@ -1,33 +1,39 @@
|
||||
import React, { useMemo } from 'react';
|
||||
import { Box, BoxProps } from '@chakra-ui/react';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { getFileAndOpen } from '@/web/core/dataset/utils';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
|
||||
import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useI18n } from '@/web/context/I18n';
|
||||
|
||||
type Props = BoxProps & {
|
||||
sourceName?: string;
|
||||
collectionId: string;
|
||||
sourceId?: string;
|
||||
canView?: boolean;
|
||||
};
|
||||
|
||||
const RawSourceBox = ({ sourceId, sourceName = '', canView = true, ...props }: Props) => {
|
||||
const RawSourceBox = ({
|
||||
sourceId,
|
||||
collectionId,
|
||||
sourceName = '',
|
||||
canView = true,
|
||||
...props
|
||||
}: Props) => {
|
||||
const { t } = useTranslation();
|
||||
const { fileT } = useI18n();
|
||||
const { toast } = useToast();
|
||||
const { setLoading } = useSystemStore();
|
||||
|
||||
const canPreview = useMemo(() => !!sourceId && canView, [canView, sourceId]);
|
||||
const canPreview = !!sourceId && canView;
|
||||
|
||||
const icon = useMemo(() => getSourceNameIcon({ sourceId, sourceName }), [sourceId, sourceName]);
|
||||
const read = getCollectionSourceAndOpen(collectionId);
|
||||
|
||||
return (
|
||||
<MyTooltip label={canPreview ? fileT('Click to view file') : ''} shouldWrapChildren={false}>
|
||||
<MyTooltip
|
||||
label={canPreview ? fileT('Click to view raw source') : ''}
|
||||
shouldWrapChildren={false}
|
||||
>
|
||||
<Box
|
||||
color={'myGray.900'}
|
||||
fontWeight={'medium'}
|
||||
@@ -37,18 +43,7 @@ const RawSourceBox = ({ sourceId, sourceName = '', canView = true, ...props }: P
|
||||
? {
|
||||
cursor: 'pointer',
|
||||
textDecoration: 'underline',
|
||||
onClick: async () => {
|
||||
setLoading(true);
|
||||
try {
|
||||
await getFileAndOpen(sourceId as string);
|
||||
} catch (error) {
|
||||
toast({
|
||||
title: getErrText(error, t('error.fileNotFound')),
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
setLoading(false);
|
||||
}
|
||||
onClick: read
|
||||
}
|
||||
: {})}
|
||||
{...props}
|
||||
|
@@ -4,11 +4,11 @@ import {
|
||||
DraggableStateSnapshot
|
||||
} from '@fastgpt/web/components/common/DndDrag/index';
|
||||
import Container from '../../components/Container';
|
||||
import { DragHandleIcon, MinusIcon, SmallAddIcon } from '@chakra-ui/icons';
|
||||
import { MinusIcon, SmallAddIcon } from '@chakra-ui/icons';
|
||||
import { IfElseListItemType } from '@fastgpt/global/core/workflow/template/system/ifElse/type';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { ReferenceValueProps } from '@fastgpt/global/core/workflow/type/io';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import { ReferSelector, useReference } from '../render/RenderInput/templates/Reference';
|
||||
import { WorkflowIOValueTypeEnum } from '@fastgpt/global/core/workflow/constants';
|
||||
import {
|
||||
|
@@ -2,7 +2,7 @@ import React, { useCallback, useMemo } from 'react';
|
||||
import NodeCard from './render/NodeCard';
|
||||
import { NodeProps } from 'reactflow';
|
||||
import { FlowNodeItemType } from '@fastgpt/global/core/workflow/type';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import {
|
||||
Box,
|
||||
Button,
|
||||
|
@@ -3,7 +3,7 @@ import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { PgClient } from '@fastgpt/service/common/vectorStore/pg';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { PgDatasetTableName } from '@fastgpt/global/common/vectorStore/constants';
|
||||
import { connectionMongo } from '@fastgpt/service/common/mongo';
|
||||
import { addLog } from '@fastgpt/service/common/system/log';
|
||||
|
@@ -4,7 +4,7 @@
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { authFile } from '@fastgpt/service/support/permission/auth/file';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
|
||||
import { countGptMessagesTokens } from '@fastgpt/service/common/string/tiktoken';
|
||||
|
@@ -7,7 +7,7 @@ import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/use
|
||||
import { checkTeamAppLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const {
|
||||
|
@@ -6,7 +6,7 @@ import { authApp } from '@fastgpt/service/support/permission/auth/app';
|
||||
import { MongoChatItem } from '@fastgpt/service/core/chat/chatItemSchema';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { appId } = req.query as { appId: string };
|
||||
|
@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authApp } from '@fastgpt/service/support/permission/auth/app';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
/* 获取我的模型 */
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
|
@@ -7,7 +7,7 @@ import { addDays } from 'date-fns';
|
||||
import type { GetAppChatLogsParams } from '@/global/core/api/appReq.d';
|
||||
import { authApp } from '@fastgpt/service/support/permission/auth/app';
|
||||
import { ChatItemCollectionName } from '@fastgpt/service/core/chat/chatItemSchema';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(
|
||||
req: NextApiRequest,
|
||||
|
@@ -3,7 +3,7 @@ import { MongoApp } from '@fastgpt/service/core/app/schema';
|
||||
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
|
||||
import { AppListItemType } from '@fastgpt/global/core/app/type';
|
||||
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>): Promise<AppListItemType[]> {
|
||||
// 凭证校验
|
||||
|
@@ -3,7 +3,7 @@ import { MongoApp } from '@fastgpt/service/core/app/schema';
|
||||
import type { AppUpdateParams } from '@/global/core/app/api';
|
||||
import { authApp } from '@fastgpt/service/support/permission/auth/app';
|
||||
import { beforeUpdateAppFormat } from '@fastgpt/service/core/app/controller';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
/* 获取我的模型 */
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
|
||||
import { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
|
||||
import { AppVersionSchemaType } from '@fastgpt/global/core/app/version';
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authApp } from '@fastgpt/service/support/permission/auth/app';
|
||||
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authApp } from '@fastgpt/service/support/permission/auth/app';
|
||||
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
|
@@ -9,7 +9,7 @@ import { getChatItems } from '@fastgpt/service/core/chat/controller';
|
||||
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
|
||||
import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runtime/constants';
|
||||
import { getAppLatestVersion } from '@fastgpt/service/core/app/controller';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(
|
||||
req: NextApiRequest,
|
||||
|
@@ -5,7 +5,7 @@ import type { DatasetSimpleItemType } from '@fastgpt/global/core/dataset/type.d'
|
||||
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
|
||||
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
|
||||
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
/* get all dataset by teamId or tmbId */
|
||||
async function handler(
|
||||
|
@@ -17,8 +17,6 @@ import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/train
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
|
||||
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
|
||||
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
@@ -106,8 +104,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
return collectionId;
|
||||
});
|
||||
|
||||
startTrainingQueue(true);
|
||||
|
||||
jsonRes(res);
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
|
@@ -1,153 +0,0 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
|
||||
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
|
||||
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const {
|
||||
fileId,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as FileIdCreateDatasetCollectionParams;
|
||||
|
||||
try {
|
||||
await connectToDatabase();
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
per: 'w',
|
||||
datasetId: body.datasetId
|
||||
});
|
||||
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContentFromMongo({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId
|
||||
});
|
||||
// 2. split chunks
|
||||
const chunks = rawText2Chunks({
|
||||
rawText,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : []
|
||||
});
|
||||
|
||||
// 3. auth limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
await mongoSessionRun(async (session) => {
|
||||
// 4. create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...body,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
name: filename,
|
||||
fileId,
|
||||
metadata: {
|
||||
relatedImgId: fileId
|
||||
},
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
hashRawText: hashStr(rawText),
|
||||
rawTextLength: rawText.length,
|
||||
session
|
||||
});
|
||||
|
||||
// 5. create training bill
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: filename,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
session
|
||||
});
|
||||
|
||||
// 6. insert to training queue
|
||||
await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((item, index) => ({
|
||||
...item,
|
||||
chunkIndex: index
|
||||
})),
|
||||
session
|
||||
});
|
||||
|
||||
// 7. remove related image ttl
|
||||
await MongoImage.updateMany(
|
||||
{
|
||||
teamId,
|
||||
'metadata.relatedId': fileId
|
||||
},
|
||||
{
|
||||
// Remove expiredTime to avoid ttl expiration
|
||||
$unset: {
|
||||
expiredTime: 1
|
||||
}
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
|
||||
return collectionId;
|
||||
});
|
||||
|
||||
// remove buffer
|
||||
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
|
||||
|
||||
startTrainingQueue(true);
|
||||
|
||||
jsonRes(res);
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
@@ -0,0 +1,149 @@
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
|
||||
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
|
||||
async function handler(
|
||||
req: ApiRequestProps<FileIdCreateDatasetCollectionParams>,
|
||||
res: NextApiResponse<any>
|
||||
) {
|
||||
const {
|
||||
fileId,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body;
|
||||
|
||||
await connectToDatabase();
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
per: 'w',
|
||||
datasetId: body.datasetId
|
||||
});
|
||||
|
||||
// 1. read file
|
||||
const { rawText, filename } = await readFileContentFromMongo({
|
||||
teamId,
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
fileId
|
||||
});
|
||||
// 2. split chunks
|
||||
const chunks = rawText2Chunks({
|
||||
rawText,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : []
|
||||
});
|
||||
|
||||
// 3. auth limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
await mongoSessionRun(async (session) => {
|
||||
// 4. create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...body,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
name: filename,
|
||||
fileId,
|
||||
metadata: {
|
||||
relatedImgId: fileId
|
||||
},
|
||||
|
||||
// special metadata
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
hashRawText: hashStr(rawText),
|
||||
rawTextLength: rawText.length,
|
||||
session
|
||||
});
|
||||
|
||||
// 5. create training bill
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: filename,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
session
|
||||
});
|
||||
|
||||
// 6. insert to training queue
|
||||
await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((item, index) => ({
|
||||
...item,
|
||||
chunkIndex: index
|
||||
})),
|
||||
session
|
||||
});
|
||||
|
||||
// 7. remove related image ttl
|
||||
await MongoImage.updateMany(
|
||||
{
|
||||
teamId,
|
||||
'metadata.relatedId': fileId
|
||||
},
|
||||
{
|
||||
// Remove expiredTime to avoid ttl expiration
|
||||
$unset: {
|
||||
expiredTime: 1
|
||||
}
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
|
||||
return collectionId;
|
||||
});
|
||||
|
||||
// remove buffer
|
||||
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
|
||||
|
||||
jsonRes(res);
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
@@ -0,0 +1,186 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { getUploadModel } from '@fastgpt/service/common/file/multer';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { FileCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
|
||||
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { getNanoid, hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getDatasetModel, getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
|
||||
import { readRawTextByLocalFile } from '@fastgpt/service/common/file/read/utils';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
/**
|
||||
* Creates the multer uploader
|
||||
*/
|
||||
const upload = getUploadModel({
|
||||
maxSize: (global.feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024
|
||||
});
|
||||
let filePaths: string[] = [];
|
||||
|
||||
try {
|
||||
const { file, data, bucketName } = await upload.doUpload<FileCreateDatasetCollectionParams>(
|
||||
req,
|
||||
res,
|
||||
BucketNameEnum.dataset
|
||||
);
|
||||
filePaths = [file.path];
|
||||
|
||||
if (!file || !bucketName) {
|
||||
throw new Error('file is empty');
|
||||
}
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authApiKey: true,
|
||||
per: 'w',
|
||||
datasetId: data.datasetId
|
||||
});
|
||||
|
||||
const {
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt
|
||||
} = data;
|
||||
const { fileMetadata, collectionMetadata, ...collectionData } = data;
|
||||
const collectionName = file.originalname;
|
||||
|
||||
const relatedImgId = getNanoid();
|
||||
|
||||
// 1. read file
|
||||
const { rawText } = await readRawTextByLocalFile({
|
||||
teamId,
|
||||
path: file.path,
|
||||
metadata: {
|
||||
...fileMetadata,
|
||||
relatedId: relatedImgId
|
||||
}
|
||||
});
|
||||
|
||||
// 2. upload file
|
||||
const fileId = await uploadFile({
|
||||
teamId,
|
||||
tmbId,
|
||||
bucketName,
|
||||
path: file.path,
|
||||
filename: file.originalname,
|
||||
contentType: file.mimetype,
|
||||
metadata: fileMetadata
|
||||
});
|
||||
|
||||
// 3. delete tmp file
|
||||
removeFilesByPaths(filePaths);
|
||||
|
||||
// 4. split raw text to chunks
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: rawText,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : []
|
||||
});
|
||||
|
||||
// 5. check dataset limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
// 6. create collection and training bill
|
||||
const { collectionId, insertResults } = await mongoSessionRun(async (session) => {
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...collectionData,
|
||||
name: collectionName,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
fileId,
|
||||
rawTextLength: rawText.length,
|
||||
hashRawText: hashStr(rawText),
|
||||
metadata: {
|
||||
...collectionMetadata,
|
||||
relatedImgId
|
||||
},
|
||||
session
|
||||
});
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: collectionName,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getDatasetModel(dataset.agentModel)?.name
|
||||
});
|
||||
|
||||
// 7. push chunks to training queue
|
||||
const insertResults = await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((text, index) => ({
|
||||
q: text,
|
||||
chunkIndex: index
|
||||
}))
|
||||
});
|
||||
|
||||
// 8. remove image expired time
|
||||
await MongoImage.updateMany(
|
||||
{
|
||||
teamId,
|
||||
'metadata.relatedId': relatedImgId
|
||||
},
|
||||
{
|
||||
// Remove expiredTime to avoid ttl expiration
|
||||
$unset: {
|
||||
expiredTime: 1
|
||||
}
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
collectionId,
|
||||
insertResults
|
||||
};
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: { collectionId, results: insertResults }
|
||||
});
|
||||
} catch (error) {
|
||||
removeFilesByPaths(filePaths);
|
||||
|
||||
return Promise.reject(error);
|
||||
}
|
||||
}
|
||||
|
||||
export const config = {
|
||||
api: {
|
||||
bodyParser: false
|
||||
}
|
||||
};
|
||||
|
||||
export default NextAPI(handler);
|
@@ -8,6 +8,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
|
||||
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { getFileById } from '@fastgpt/service/common/file/gridfs/controller';
|
||||
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
@@ -36,8 +37,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
||||
data: {
|
||||
...collection,
|
||||
canWrite,
|
||||
sourceName: collection?.name,
|
||||
sourceId: collection?.fileId || collection?.rawLink,
|
||||
...getCollectionSourceData(collection),
|
||||
file
|
||||
}
|
||||
});
|
||||
|
66
projects/app/src/pages/api/core/dataset/collection/read.ts
Normal file
66
projects/app/src/pages/api/core/dataset/collection/read.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { createFileToken } from '@fastgpt/service/support/permission/controller';
|
||||
import { BucketNameEnum, ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
|
||||
|
||||
export type readCollectionSourceQuery = {
|
||||
collectionId: string;
|
||||
};
|
||||
|
||||
export type readCollectionSourceBody = {};
|
||||
|
||||
export type readCollectionSourceResponse = {
|
||||
type: 'url';
|
||||
value: string;
|
||||
};
|
||||
|
||||
async function handler(
|
||||
req: ApiRequestProps<readCollectionSourceBody, readCollectionSourceQuery>,
|
||||
res: ApiResponseType<any>
|
||||
): Promise<readCollectionSourceResponse> {
|
||||
const { collection, teamId, tmbId } = await authDatasetCollection({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
collectionId: req.query.collectionId,
|
||||
per: 'r'
|
||||
});
|
||||
|
||||
const sourceUrl = await (async () => {
|
||||
if (collection.type === DatasetCollectionTypeEnum.file && collection.fileId) {
|
||||
const token = await createFileToken({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
teamId,
|
||||
tmbId,
|
||||
fileId: collection.fileId
|
||||
});
|
||||
|
||||
return `${ReadFileBaseUrl}?token=${token}`;
|
||||
}
|
||||
if (collection.type === DatasetCollectionTypeEnum.link && collection.rawLink) {
|
||||
return collection.rawLink;
|
||||
}
|
||||
if (collection.type === DatasetCollectionTypeEnum.externalFile) {
|
||||
if (collection.externalFileId && collection.datasetId.externalReadUrl) {
|
||||
return collection.datasetId.externalReadUrl.replace(
|
||||
'{{fileId}}',
|
||||
collection.externalFileId
|
||||
);
|
||||
}
|
||||
if (collection.externalFileUrl) {
|
||||
return collection.externalFileUrl;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
})();
|
||||
|
||||
return {
|
||||
type: 'url',
|
||||
value: sourceUrl
|
||||
};
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { authDatasetData } from '@/service/support/permission/auth/dataset';
|
||||
import { deleteDatasetData } from '@/service/core/dataset/data/controller';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { id: dataId } = req.query as {
|
||||
|
@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authDatasetData } from '@/service/support/permission/auth/dataset';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
export type Response = {
|
||||
id: string;
|
||||
|
@@ -15,7 +15,7 @@ import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
|
||||
import { InsertOneDatasetDataProps } from '@/global/core/dataset/api';
|
||||
import { simpleText } from '@fastgpt/global/common/string/tools';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { collectionId, q, a, indexes } = req.body as InsertOneDatasetDataProps;
|
||||
|
@@ -7,7 +7,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { PagingData } from '@/types';
|
||||
import { replaceRegChars } from '@fastgpt/global/common/string/tools';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
let {
|
||||
|
@@ -1,7 +1,6 @@
|
||||
/* push data to training queue */
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import type {
|
||||
PushDatasetDataProps,
|
||||
PushDatasetDataResponse
|
||||
@@ -10,7 +9,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const body = req.body as PushDatasetDataProps;
|
||||
|
@@ -6,7 +6,7 @@ import { authDatasetData } from '@/service/support/permission/auth/dataset';
|
||||
import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
|
||||
import { UpdateDatasetDataProps } from '@/global/core/dataset/api';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { id, q = '', a, indexes = [] } = req.body as UpdateDatasetDataProps;
|
||||
|
@@ -8,7 +8,7 @@ import {
|
||||
checkExportDatasetLimit,
|
||||
updateExportDatasetLimit
|
||||
} from '@fastgpt/service/support/user/utils';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
let { datasetId } = req.query as {
|
||||
|
@@ -3,7 +3,7 @@ import { authFile } from '@fastgpt/service/support/permission/auth/file';
|
||||
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { rawText2Chunks, readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
|
||||
export type PostPreviewFilesChunksProps = {
|
||||
|
@@ -1,36 +0,0 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import { authDatasetFile } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { createFileToken } from '@fastgpt/service/support/permission/controller';
|
||||
import { BucketNameEnum, ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
|
||||
const { fileId } = req.query as { fileId: string };
|
||||
|
||||
if (!fileId) {
|
||||
throw new Error('fileId is empty');
|
||||
}
|
||||
|
||||
const { teamId, tmbId } = await authDatasetFile({ req, authToken: true, fileId, per: 'r' });
|
||||
|
||||
const token = await createFileToken({
|
||||
bucketName: BucketNameEnum.dataset,
|
||||
teamId,
|
||||
tmbId,
|
||||
fileId
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: `${ReadFileBaseUrl}?token=${token}`
|
||||
});
|
||||
} catch (error) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error
|
||||
});
|
||||
}
|
||||
}
|
@@ -6,7 +6,7 @@ import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
|
||||
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
|
||||
import { getVectorModel } from '@fastgpt/service/core/ai/model';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { parentId, type } = req.query as { parentId?: string; type?: DatasetTypeEnum };
|
||||
|
@@ -12,7 +12,7 @@ import {
|
||||
checkTeamAIPoints,
|
||||
checkTeamReRankPermission
|
||||
} from '@fastgpt/service/support/permission/teamLimit';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const {
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
|
||||
|
@@ -7,7 +7,7 @@ import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { getUserChatInfoAndAuthTeamPoints } from '@/service/support/permission/auth/team';
|
||||
import { PostWorkflowDebugProps, PostWorkflowDebugResponse } from '@/global/core/workflow/api';
|
||||
import { authPluginCrud } from '@fastgpt/service/support/permission/auth/plugin';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(
|
||||
req: NextApiRequest,
|
||||
|
@@ -1,7 +1,7 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { checkExportDatasetLimit } from '@fastgpt/service/support/user/utils';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
const { datasetId } = req.query as {
|
||||
|
@@ -9,7 +9,7 @@ import { authChatCert } from '@/service/support/permission/auth/chat';
|
||||
import { MongoApp } from '@fastgpt/service/core/app/schema';
|
||||
import { getGuideModule, splitGuideModule } from '@fastgpt/global/core/workflow/utils';
|
||||
import { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
|
||||
const upload = getUploadModel({
|
||||
maxSize: 2
|
||||
|
@@ -44,7 +44,7 @@ import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runti
|
||||
|
||||
import { dispatchWorkFlowV1 } from '@fastgpt/service/core/workflow/dispatchV1';
|
||||
import { setEntryEntries } from '@fastgpt/service/core/workflow/dispatchV1/utils';
|
||||
import { NextAPI } from '@/service/middle/entry';
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { getAppLatestVersion } from '@fastgpt/service/core/app/controller';
|
||||
|
||||
type FastGptWebChatProps = {
|
||||
|
@@ -84,7 +84,7 @@ const Header = ({}: {}) => {
|
||||
...props
|
||||
}: {
|
||||
name: string;
|
||||
type: `${DatasetCollectionTypeEnum}`;
|
||||
type: DatasetCollectionTypeEnum;
|
||||
callback?: (id: string) => void;
|
||||
trainingType?: TrainingModeEnum;
|
||||
rawLink?: string;
|
||||
|
@@ -38,16 +38,14 @@ import { TabEnum } from '..';
|
||||
import { useUserStore } from '@/web/support/user/useUserStore';
|
||||
import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import {
|
||||
DatasetCollectionTypeMap,
|
||||
TrainingModeEnum,
|
||||
TrainingTypeMap
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { DatasetCollectionTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
|
||||
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
|
||||
import { formatFileSize } from '@fastgpt/global/common/file/tools';
|
||||
import { getFileAndOpen } from '@/web/core/dataset/utils';
|
||||
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
|
||||
import MyTooltip from '@/components/MyTooltip';
|
||||
import { usePagination } from '@fastgpt/web/hooks/usePagination';
|
||||
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
|
||||
import { useI18n } from '@/web/context/I18n';
|
||||
|
||||
const DataCard = () => {
|
||||
const BoxRef = useRef<HTMLDivElement>(null);
|
||||
@@ -62,6 +60,7 @@ const DataCard = () => {
|
||||
};
|
||||
const { Loading, setIsLoading } = useLoading({ defaultLoading: true });
|
||||
const { t } = useTranslation();
|
||||
const { datasetT } = useI18n();
|
||||
const [searchText, setSearchText] = useState('');
|
||||
const { toast } = useToast();
|
||||
const { openConfirm, ConfirmModal } = useConfirm({
|
||||
@@ -69,6 +68,7 @@ const DataCard = () => {
|
||||
type: 'delete'
|
||||
});
|
||||
const { isOpen, onOpen, onClose } = useDisclosure();
|
||||
const readSource = getCollectionSourceAndOpen(collectionId);
|
||||
|
||||
const {
|
||||
data: datasetDataList,
|
||||
@@ -169,7 +169,17 @@ const DataCard = () => {
|
||||
value: webSelector
|
||||
}
|
||||
]
|
||||
: [])
|
||||
: []),
|
||||
{
|
||||
...(collection.tags
|
||||
? [
|
||||
{
|
||||
label: datasetT('Collection tags'),
|
||||
value: collection.tags?.join(', ') || '-'
|
||||
}
|
||||
]
|
||||
: [])
|
||||
}
|
||||
];
|
||||
}, [collection, t]);
|
||||
|
||||
@@ -196,13 +206,15 @@ const DataCard = () => {
|
||||
/>
|
||||
<Flex className="textEllipsis" flex={'1 0 0'} mr={[3, 5]} alignItems={'center'}>
|
||||
<Box lineHeight={1.2}>
|
||||
<RawSourceBox
|
||||
sourceName={collection?.name}
|
||||
sourceId={collection?.fileId || collection?.rawLink}
|
||||
fontSize={['md', 'lg']}
|
||||
color={'black'}
|
||||
textDecoration={'none'}
|
||||
/>
|
||||
{collection?._id && (
|
||||
<RawSourceBox
|
||||
collectionId={collection._id}
|
||||
{...getCollectionSourceData(collection)}
|
||||
fontSize={['md', 'lg']}
|
||||
color={'black'}
|
||||
textDecoration={'none'}
|
||||
/>
|
||||
)}
|
||||
<Box fontSize={'sm'} color={'myGray.500'}>
|
||||
{t('core.dataset.collection.id')}:{' '}
|
||||
<Box as={'span'} userSelect={'all'}>
|
||||
@@ -412,10 +424,7 @@ const DataCard = () => {
|
||||
</Flex>
|
||||
))}
|
||||
{collection?.sourceId && (
|
||||
<Button
|
||||
variant={'whitePrimary'}
|
||||
onClick={() => collection.sourceId && getFileAndOpen(collection.sourceId)}
|
||||
>
|
||||
<Button variant={'whitePrimary'} onClick={readSource}>
|
||||
{t('core.dataset.collection.metadata.read source')}
|
||||
</Button>
|
||||
)}
|
||||
|
@@ -15,12 +15,12 @@ import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useRequest } from '@fastgpt/web/hooks/useRequest';
|
||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { useRouter } from 'next/router';
|
||||
import { TabEnum } from '../../../index';
|
||||
import {
|
||||
postCreateDatasetCsvTableCollection,
|
||||
postCreateDatasetExternalFileCollection,
|
||||
postCreateDatasetFileCollection,
|
||||
postCreateDatasetLinkCollection,
|
||||
postCreateDatasetTextCollection
|
||||
@@ -95,6 +95,13 @@ const Upload = () => {
|
||||
...commonParams,
|
||||
fileId: item.dbFileId
|
||||
});
|
||||
} else if (importSource === ImportDataSourceEnum.externalFile && item.externalFileUrl) {
|
||||
await postCreateDatasetExternalFileCollection({
|
||||
...commonParams,
|
||||
externalFileUrl: item.externalFileUrl,
|
||||
externalFileId: item.externalFileId,
|
||||
filename: item.sourceName
|
||||
});
|
||||
}
|
||||
|
||||
setSources((state) =>
|
||||
|
@@ -44,7 +44,8 @@ const PreviewChunks = ({
|
||||
if (importSource === ImportDataSourceEnum.csvTable) {
|
||||
return getPreviewChunks({
|
||||
type: importType2ReadType(importSource),
|
||||
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
|
||||
sourceId:
|
||||
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
|
||||
chunkSize,
|
||||
overlapRatio: chunkOverlapRatio,
|
||||
customSplitChar: processParamsForm.getValues('customSplitChar'),
|
||||
@@ -55,7 +56,8 @@ const PreviewChunks = ({
|
||||
|
||||
return getPreviewChunks({
|
||||
type: importType2ReadType(importSource),
|
||||
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
|
||||
sourceId:
|
||||
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
|
||||
chunkSize,
|
||||
overlapRatio: chunkOverlapRatio,
|
||||
customSplitChar: processParamsForm.getValues('customSplitChar'),
|
||||
|
@@ -22,7 +22,7 @@ const PreviewRawText = ({
|
||||
const { importSource, processParamsForm } = useContextSelector(DatasetImportContext, (v) => v);
|
||||
|
||||
const { data, isLoading } = useQuery(
|
||||
['previewSource', previewSource.dbFileId, previewSource.link, previewSource.sourceUrl],
|
||||
['previewSource', previewSource.dbFileId, previewSource.link, previewSource.externalFileUrl],
|
||||
() => {
|
||||
if (importSource === ImportDataSourceEnum.fileCustom && previewSource.rawText) {
|
||||
return {
|
||||
@@ -39,7 +39,8 @@ const PreviewRawText = ({
|
||||
|
||||
return getPreviewFileContent({
|
||||
type: importType2ReadType(importSource),
|
||||
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
|
||||
sourceId:
|
||||
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
|
||||
isQAImport: false,
|
||||
selector: processParamsForm.getValues('webSelector')
|
||||
});
|
||||
|
@@ -50,16 +50,16 @@ const CustomLinkInput = () => {
|
||||
const { register, reset, handleSubmit, control } = useForm<{
|
||||
list: {
|
||||
sourceName: string;
|
||||
sourceUrl: string;
|
||||
externalId: string;
|
||||
externalFileUrl: string;
|
||||
externalFileId: string;
|
||||
}[];
|
||||
}>({
|
||||
defaultValues: {
|
||||
list: [
|
||||
{
|
||||
sourceName: '',
|
||||
sourceUrl: '',
|
||||
externalId: ''
|
||||
externalFileUrl: '',
|
||||
externalFileId: ''
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -80,8 +80,8 @@ const CustomLinkInput = () => {
|
||||
reset({
|
||||
list: sources.map((item) => ({
|
||||
sourceName: item.sourceName,
|
||||
sourceUrl: item.sourceUrl || '',
|
||||
externalId: item.externalId || ''
|
||||
externalFileUrl: item.externalFileUrl || '',
|
||||
externalFileId: item.externalFileId || ''
|
||||
}))
|
||||
});
|
||||
}
|
||||
@@ -104,7 +104,7 @@ const CustomLinkInput = () => {
|
||||
<Tr key={item.id}>
|
||||
<Td>
|
||||
<Input
|
||||
{...register(`list.${index}.sourceUrl`, {
|
||||
{...register(`list.${index}.externalFileUrl`, {
|
||||
required: index !== list.length - 1,
|
||||
onBlur(e) {
|
||||
const val = (e.target.value || '') as string;
|
||||
@@ -112,15 +112,15 @@ const CustomLinkInput = () => {
|
||||
const sourceName = val.split('/').pop() || '';
|
||||
update(index, {
|
||||
...list[index],
|
||||
sourceUrl: val,
|
||||
externalFileUrl: val,
|
||||
sourceName: decodeURIComponent(sourceName)
|
||||
});
|
||||
}
|
||||
if (val && index === list.length - 1) {
|
||||
append({
|
||||
sourceName: '',
|
||||
sourceUrl: '',
|
||||
externalId: ''
|
||||
externalFileUrl: '',
|
||||
externalFileId: ''
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -128,7 +128,7 @@ const CustomLinkInput = () => {
|
||||
/>
|
||||
</Td>
|
||||
<Td>
|
||||
<Input {...register(`list.${index}.externalId`)} />
|
||||
<Input {...register(`list.${index}.externalFileId`)} />
|
||||
</Td>
|
||||
<Td>
|
||||
<Input {...register(`list.${index}.sourceName`)} />
|
||||
@@ -154,26 +154,26 @@ const CustomLinkInput = () => {
|
||||
onClick={() => {
|
||||
append({
|
||||
sourceName: '',
|
||||
sourceUrl: '',
|
||||
externalId: ''
|
||||
externalFileUrl: '',
|
||||
externalFileId: ''
|
||||
});
|
||||
}}
|
||||
>
|
||||
{commonT('Add new')}
|
||||
</Button>
|
||||
<Button
|
||||
isDisabled={list.filter((item) => !!item.sourceUrl).length === 0}
|
||||
isDisabled={list.filter((item) => !!item.externalFileUrl).length === 0}
|
||||
onClick={handleSubmit((data) => {
|
||||
setSources(
|
||||
data.list
|
||||
.filter((item) => !!item.sourceUrl)
|
||||
.filter((item) => !!item.externalFileUrl)
|
||||
.map((item) => ({
|
||||
id: getNanoid(32),
|
||||
createStatus: 'waiting',
|
||||
sourceName: item.sourceName || item.sourceUrl,
|
||||
icon: getFileIcon(item.sourceUrl),
|
||||
externalId: item.externalId,
|
||||
sourceUrl: item.sourceUrl
|
||||
sourceName: item.sourceName || item.externalFileUrl,
|
||||
icon: getFileIcon(item.externalFileUrl),
|
||||
externalFileId: item.externalFileId,
|
||||
externalFileUrl: item.externalFileUrl
|
||||
}))
|
||||
);
|
||||
|
||||
|
@@ -1,10 +1,9 @@
|
||||
import React, { useState, useMemo } from 'react';
|
||||
import { useRouter } from 'next/router';
|
||||
import { Box, Flex, Button, IconButton, Input, Textarea } from '@chakra-ui/react';
|
||||
import { Box, Flex, Button, IconButton, Input, Textarea, HStack } from '@chakra-ui/react';
|
||||
import { DeleteIcon } from '@chakra-ui/icons';
|
||||
import { delDatasetById } from '@/web/core/dataset/api';
|
||||
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
|
||||
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
|
||||
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
|
||||
import { useForm } from 'react-hook-form';
|
||||
import { compressImgFileAndUpload } from '@/web/common/file/controller';
|
||||
@@ -24,6 +23,7 @@ import { useContextSelector } from 'use-context-selector';
|
||||
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
|
||||
import MyDivider from '@fastgpt/web/components/common/MyDivider/index';
|
||||
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
|
||||
|
||||
const Info = ({ datasetId }: { datasetId: string }) => {
|
||||
const { t } = useTranslation();
|
||||
@@ -191,9 +191,10 @@ const Info = ({ datasetId }: { datasetId: string }) => {
|
||||
{datasetDetail.type === DatasetTypeEnum.externalFile && (
|
||||
<>
|
||||
<Flex w={'100%'} alignItems={'center'}>
|
||||
<Box fontSize={['sm', 'md']} flex={['0 0 90px', '0 0 160px']} w={0}>
|
||||
{datasetT('External read url')}
|
||||
</Box>
|
||||
<HStack fontSize={['sm', 'md']} flex={['0 0 90px', '0 0 160px']} w={0}>
|
||||
<Box>{datasetT('External read url')}</Box>
|
||||
<QuestionTip label={datasetT('External read url tip')} />
|
||||
</HStack>
|
||||
<Input
|
||||
flex={[1, '0 0 320px']}
|
||||
placeholder="https://test.com/read?fileId={{fileId}}"
|
||||
|
@@ -237,6 +237,7 @@ const InputDataModal = ({
|
||||
w={'210px'}
|
||||
className="textEllipsis3"
|
||||
whiteSpace={'pre-wrap'}
|
||||
collectionId={collection._id}
|
||||
sourceName={collection.sourceName}
|
||||
sourceId={collection.sourceId}
|
||||
mb={6}
|
||||
|
@@ -116,13 +116,13 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
|
||||
value: DatasetTypeEnum.websiteDataset,
|
||||
icon: 'core/dataset/websiteDataset',
|
||||
desc: datasetT('Website Dataset Desc')
|
||||
},
|
||||
{
|
||||
title: datasetT('External File'),
|
||||
value: DatasetTypeEnum.externalFile,
|
||||
icon: 'core/dataset/externalDataset',
|
||||
desc: datasetT('External file Dataset Desc')
|
||||
}
|
||||
// {
|
||||
// title: datasetT('External File'),
|
||||
// value: DatasetTypeEnum.externalFile,
|
||||
// icon: 'core/dataset/websiteDataset',
|
||||
// desc: datasetT('External file Dataset Desc')
|
||||
// }
|
||||
]
|
||||
: [])
|
||||
]}
|
||||
|
@@ -1,37 +0,0 @@
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { connectToDatabase } from '../mongo';
|
||||
import { withNextCors } from '@fastgpt/service/common/middle/cors';
|
||||
import { ApiRequestProps } from '@fastgpt/service/type/next';
|
||||
|
||||
export type NextApiHandler<T = any> = (
|
||||
req: ApiRequestProps,
|
||||
res: NextApiResponse<T>
|
||||
) => unknown | Promise<unknown>;
|
||||
|
||||
export const NextAPI = (...args: NextApiHandler[]): NextApiHandler => {
|
||||
return async function api(req: ApiRequestProps, res: NextApiResponse) {
|
||||
try {
|
||||
await Promise.all([withNextCors(req, res), connectToDatabase()]);
|
||||
|
||||
let response = null;
|
||||
for (const handler of args) {
|
||||
response = await handler(req, res);
|
||||
}
|
||||
|
||||
const contentType = res.getHeader('Content-Type');
|
||||
if ((!contentType || contentType === 'application/json') && !res.writableFinished) {
|
||||
return jsonRes(res, {
|
||||
code: 200,
|
||||
data: response
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
return jsonRes(res, {
|
||||
code: 500,
|
||||
error,
|
||||
url: req.url
|
||||
});
|
||||
}
|
||||
};
|
||||
};
|
6
projects/app/src/service/middleware/entry.ts
Normal file
6
projects/app/src/service/middleware/entry.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
import { connectToDatabase } from '../mongo';
|
||||
import { NextEntry } from '@fastgpt/service/common/middle/entry';
|
||||
|
||||
export const NextAPI = NextEntry({
|
||||
beforeCallback: [connectToDatabase()]
|
||||
});
|
@@ -14,6 +14,7 @@ import type {
|
||||
CreateDatasetCollectionParams,
|
||||
CsvTableCreateDatasetCollectionParams,
|
||||
DatasetUpdateBody,
|
||||
ExternalFileCreateDatasetCollectionParams,
|
||||
FileIdCreateDatasetCollectionParams,
|
||||
LinkCreateDatasetCollectionParams,
|
||||
PostWebsiteSyncParams,
|
||||
@@ -44,6 +45,7 @@ import type {
|
||||
PostPreviewFilesChunksProps,
|
||||
PreviewChunksResponse
|
||||
} from '@/pages/api/core/dataset/file/getPreviewChunks';
|
||||
import type { readCollectionSourceResponse } from '@/pages/api/core/dataset/collection/read';
|
||||
|
||||
/* ======================== dataset ======================= */
|
||||
export const getDatasets = (data: { parentId?: string; type?: DatasetTypeEnum }) =>
|
||||
@@ -85,7 +87,9 @@ export const getDatasetCollectionById = (id: string) =>
|
||||
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
|
||||
POST<string>(`/core/dataset/collection/create`, data);
|
||||
export const postCreateDatasetFileCollection = (data: FileIdCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/file`, data, { timeout: 120000 });
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/fileId`, data, {
|
||||
timeout: 120000
|
||||
});
|
||||
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
|
||||
export const postCreateDatasetTextCollection = (data: TextCreateDatasetCollectionParams) =>
|
||||
@@ -94,6 +98,12 @@ export const postCreateDatasetCsvTableCollection = (data: CsvTableCreateDatasetC
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/csvTable`, data, {
|
||||
timeout: 120000
|
||||
});
|
||||
export const postCreateDatasetExternalFileCollection = (
|
||||
data: ExternalFileCreateDatasetCollectionParams
|
||||
) =>
|
||||
POST<{ collectionId: string }>(`/proApi/core/dataset/collection/create/externalFileUrl`, data, {
|
||||
timeout: 120000
|
||||
});
|
||||
|
||||
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
|
||||
POST(`/core/dataset/collection/update`, data);
|
||||
@@ -144,6 +154,6 @@ export const getDatasetTrainingQueue = (datasetId: string) =>
|
||||
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
|
||||
POST<PreviewChunksResponse>('/core/dataset/file/getPreviewChunks', data);
|
||||
|
||||
/* ================== file ======================== */
|
||||
export const getFileViewUrl = (fileId: string) =>
|
||||
GET<string>('/core/dataset/file/getPreviewUrl', { fileId });
|
||||
/* ================== read source ======================== */
|
||||
export const getCollectionSource = (collectionId: string) =>
|
||||
GET<readCollectionSourceResponse>('/core/dataset/collection/read', { collectionId });
|
||||
|
@@ -1,5 +1,9 @@
|
||||
import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/model';
|
||||
import { DatasetTypeEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import type {
|
||||
DatasetCollectionItemType,
|
||||
DatasetItemType
|
||||
@@ -46,7 +50,7 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
|
||||
},
|
||||
parentId: '',
|
||||
name: '',
|
||||
type: 'file',
|
||||
type: DatasetCollectionTypeEnum.file,
|
||||
updateTime: new Date(),
|
||||
canWrite: false,
|
||||
sourceName: '',
|
||||
|
@@ -0,0 +1,34 @@
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import { getCollectionSource } from '@/web/core/dataset/api';
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { useToast } from '@fastgpt/web/hooks/useToast';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
|
||||
export function getCollectionSourceAndOpen(collectionId: string) {
|
||||
const { toast } = useToast();
|
||||
const { t } = useTranslation();
|
||||
const { setLoading } = useSystemStore();
|
||||
|
||||
return async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const { value: url } = await getCollectionSource(collectionId);
|
||||
|
||||
if (!url) {
|
||||
throw new Error('No file found');
|
||||
}
|
||||
|
||||
if (url.startsWith('/')) {
|
||||
window.open(`${location.origin}${url}`, '_blank');
|
||||
} else {
|
||||
window.open(url, '_blank');
|
||||
}
|
||||
} catch (error) {
|
||||
toast({
|
||||
title: getErrText(error, t('error.fileNotFound')),
|
||||
status: 'error'
|
||||
});
|
||||
}
|
||||
setLoading(false);
|
||||
};
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user