External dataset (#1519)

* perf: local file create collection

* rename middleware

* perf: remove code

* feat: next14

* feat: external file dataset

* collection tags field

* external file dataset doc

* fix: ts
This commit is contained in:
Archer
2024-05-17 16:44:15 +08:00
committed by GitHub
parent 2d1ec9b3ad
commit 67c52992d7
102 changed files with 1839 additions and 1282 deletions

1
.npmrc
View File

@@ -1,2 +1 @@
public-hoist-pattern[]=*tiktoken*
public-hoist-pattern[]=*react*

View File

@@ -11,7 +11,7 @@
"prefix": "nextapi",
"body": [
"import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';",
"import { NextAPI } from '@/service/middle/entry';",
"import { NextAPI } from '@/service/middleware/entry';",
"",
"export type ${TM_FILENAME_BASE}Query = {};",
"",

Binary file not shown.

After

Width:  |  Height:  |  Size: 163 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 75 KiB

View File

@@ -0,0 +1,26 @@
---
title: '外部文件知识库'
description: 'FastGPT 外部文件知识库功能介绍和使用方式'
icon: 'language'
draft: false
toc: true
weight: 107
---
外部文件库是 FastGPT 商业版特有功能。它允许接入你现在的文件系统,无需将文件再导入一份到 FastGPT 中。
并且,阅读权限可以通过你的文件系统进行控制。
| | | |
| --------------------- | --------------------- | --------------------- |
| ![](/imgs/external_file0.png) | ![](/imgs/external_file1.png) | ![](/imgs/external_file2.png) |
## 导入参数说明
- 外部预览地址用于跳转你的文件阅读地址会携带“文件阅读ID”进行访问。
- 文件访问URL文件可访问的地址。
- 文件阅读ID通常情况下文件访问URL是临时的。如果希望永久可以访问你需要使用该文件阅读ID并配合上“外部预览地址”跳转至新的阅读地址进行原文件访问。
- 文件名默认会自动解析文件访问URL上的文件名。如果你手动填写将会以手动填写的值为准。
[点击查看API导入文档](/docs/development/openapi/dataset/#创建一个外部文件库集合商业版)

View File

@@ -295,6 +295,24 @@ curl --location --request DELETE 'http://localhost:3000/api/core/dataset/delete?
## 集合
### 通用创建参数说明
**入参**
| 参数 | 说明 | 必填 |
| --- | --- | --- |
| datasetId | 知识库ID | ✅ |
| parentId | 父级ID不填则默认为根目录 | |
| trainingType | 训练模式。chunk: 按文本长度进行分割;qa: QA拆分;auto: 增强训练 | ✅ |
| chunkSize | 预估块大小 | |
| chunkSplitter | 自定义最高优先分割符号 | |
| qaPrompt | qa拆分提示词 | |
**出参**
- collectionId - 新建的集合ID
- insertLen插入的块数量
### 创建一个空的集合
{{< tabs tabTotal="3" >}}
@@ -500,7 +518,7 @@ data 为集合的 ID。
{{< /tab >}}
{{< /tabs >}}
### 创建一个文件集合(商业版)
### 创建一个文件集合
传入一个文件创建一个集合会读取文件内容进行分割。目前支持pdf, docx, md, txt, html, csv。
@@ -509,7 +527,7 @@ data 为集合的 ID。
{{< markdownify >}}
```bash
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/file' \
curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/localFile' \
--header 'Authorization: Bearer {{authorization}}' \
--form 'file=@"C:\\Users\\user\\Desktop\\fastgpt测试文件\\index.html"' \
--form 'data="{\"datasetId\":\"6593e137231a2be9c5603ba7\",\"parentId\":null,\"trainingType\":\"chunk\",\"chunkSize\":512,\"chunkSplitter\":\"\",\"qaPrompt\":\"\",\"metadata\":{}}"'
@@ -565,6 +583,68 @@ data 为集合的 ID。
{{< /tab >}}
{{< /tabs >}}
### 创建一个外部文件库集合(商业版)
{{< tabs tabTotal="3" >}}
{{< tab tabName="请求示例" >}}
{{< markdownify >}}
```bash
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/externalFileUrl' \
--header 'Authorization: Bearer {{authorization}}' \
--header 'User-Agent: Apifox/1.0.0 (https://apifox.com)' \
--header 'Content-Type: application/json' \
--data-raw '{
"externalFileUrl":"https://image.xxxxx.com/fastgpt-dev/%E6%91%82.pdf",
"externalFileId":"1111",
"filename":"自定义文件名",
"datasetId":"6642d105a5e9d2b00255b27b",
"parentId": null,
"trainingType": "chunk",
"chunkSize":512,
"chunkSplitter":"",
"qaPrompt":""
}'
```
{{< /markdownify >}}
{{< /tab >}}
{{< tab tabName="参数说明" >}}
{{< markdownify >}}
| 参数 | 说明 | 必填 |
| --- | --- | --- |
| externalFileUrl | 文件访问链接(可以是临时链接) | ✅ |
| externalFileId | 外部文件ID | |
| filename | 自定义文件名 | |
{{< /markdownify >}}
{{< /tab >}}
{{< tab tabName="响应示例" >}}
{{< markdownify >}}
data 为集合的 ID。
```json
{
"code": 200,
"statusText": "",
"message": "",
"data": {
"collectionId": "6646fcedfabd823cdc6de746",
"insertLen": 3
}
}
```
{{< /markdownify >}}
{{< /tab >}}
{{< /tabs >}}
### 获取集合列表
{{< tabs tabTotal="3" >}}

View File

@@ -35,8 +35,11 @@ curl --location --request POST 'https://{{host}}/api/admin/clearInvalidData' \
## V4.8.1 更新说明
1. 新增 - 知识库重新选择向量模型重建
2. 新增 - 工作流节点版本变更提示,并可以同步最新版本
3. 优化 - 插件输入的 debug 模式,支持全量参数输入渲染
4. 修复 - 插件输入默认值被清空问题。
5. 修复 - 工作流删除节点的动态输入和输出时候,没有正确的删除连接线,导致可能出现逻辑异常。
6. 修复 - 定时器清理脏数据任务
2. 新增 - 对话框支持问题模糊检索提示,可自定义预设问题词库
3. 新增 - 工作流节点版本变更提示,并可以同步最新版本配置,避免存在隐藏脏数据
4. 新增 - 开放文件导入知识库接口到开源版, [点击插件文档](/docs/development/openapi/dataset/#创建一个文件集合)
5. 新增 - 外部文件源知识库, [点击查看文档](/docs/course/externalfile/)
6. 优化 - 插件输入的 debug 模式,支持全量参数输入渲染。
7. 修复 - 插件输入默认值被清空问题。
8. 修复 - 工作流删除节点的动态输入和输出时候,没有正确的删除连接线,导致可能出现逻辑异常。
9. 修复 - 定时器清理脏数据任务

View File

@@ -26,18 +26,27 @@ export type DatasetCollectionChunkMetadataType = {
qaPrompt?: string;
metadata?: Record<string, any>;
};
// create collection params
export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
datasetId: string;
name: string;
type: `${DatasetCollectionTypeEnum}`;
type: DatasetCollectionTypeEnum;
tags?: string[];
fileId?: string;
rawLink?: string;
externalFileId?: string;
externalFileUrl?: string;
rawTextLength?: number;
hashRawText?: string;
};
export type ApiCreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
datasetId: string;
tags?: string[];
};
export type TextCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
name: string;
@@ -58,6 +67,11 @@ export type CsvTableCreateDatasetCollectionParams = {
parentId?: string;
fileId: string;
};
export type ExternalFileCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
externalFileId?: string;
externalFileUrl: string;
filename?: string;
};
/* ================= data ===================== */
export type PgSearchRawType = {

View File

@@ -1,4 +1,4 @@
/* sourceId = prefix-id; id=fileId;link url;externalId */
/* sourceId = prefix-id; id=fileId;link url;externalFileId */
export enum CollectionSourcePrefixEnum {
local = 'local',
link = 'link',

View File

@@ -0,0 +1,14 @@
import { CollectionWithDatasetType, DatasetCollectionSchemaType } from '../type';
export const getCollectionSourceData = (
collection?: CollectionWithDatasetType | DatasetCollectionSchemaType
) => {
return {
sourceId:
collection?.fileId ||
collection?.rawLink ||
collection?.externalFileId ||
collection?.externalFileUrl,
sourceName: collection?.name || ''
};
};

View File

@@ -22,7 +22,7 @@ export const DatasetTypeMap = {
collectionLabel: 'common.Website'
},
[DatasetTypeEnum.externalFile]: {
icon: 'core/dataset/commonDataset',
icon: 'core/dataset/externalDataset',
label: 'External File',
collectionLabel: 'common.File'
}
@@ -44,9 +44,11 @@ export const DatasetStatusMap = {
/* ------------ collection -------------- */
export enum DatasetCollectionTypeEnum {
folder = 'folder',
virtual = 'virtual',
file = 'file',
link = 'link', // one link
virtual = 'virtual'
externalFile = 'externalFile'
}
export const DatasetCollectionTypeMap = {
[DatasetCollectionTypeEnum.folder]: {
@@ -55,6 +57,9 @@ export const DatasetCollectionTypeMap = {
[DatasetCollectionTypeEnum.file]: {
name: 'core.dataset.file'
},
[DatasetCollectionTypeEnum.externalFile]: {
name: 'core.dataset.externalFile'
},
[DatasetCollectionTypeEnum.link]: {
name: 'core.dataset.link'
},

View File

@@ -1,7 +1,5 @@
import { DatasetSourceReadTypeEnum, ImportDataSourceEnum } from './constants';
export const rawTextBackupPrefix = 'index,content';
export const importType2ReadType = (type: ImportDataSourceEnum) => {
if (type === ImportDataSourceEnum.csvTable || type === ImportDataSourceEnum.fileLocal) {
return DatasetSourceReadTypeEnum.fileLocal;

View File

@@ -41,7 +41,7 @@ export type DatasetCollectionSchemaType = {
datasetId: string;
parentId?: string;
name: string;
type: `${DatasetCollectionTypeEnum}`;
type: DatasetCollectionTypeEnum;
createTime: Date;
updateTime: Date;
@@ -50,13 +50,15 @@ export type DatasetCollectionSchemaType = {
chunkSplitter?: string;
qaPrompt?: string;
sourceId?: string; // relate CollectionSourcePrefixEnum
tags?: string[];
fileId?: string; // local file id
rawLink?: string; // link url
externalFileId?: string; //external file id
rawTextLength?: number;
hashRawText?: string;
externalSourceUrl?: string; // external import url
externalFileUrl?: string; // external import url
metadata?: {
webPageSelector?: string;
relatedImgId?: string; // The id of the associated image collections

View File

@@ -3,7 +3,7 @@ import { getFileIcon } from '../../common/file/icon';
import { strIsLink } from '../../common/string/tools';
export function getCollectionIcon(
type: `${DatasetCollectionTypeEnum}` = DatasetCollectionTypeEnum.file,
type: DatasetCollectionTypeEnum = DatasetCollectionTypeEnum.file,
name = ''
) {
if (type === DatasetCollectionTypeEnum.folder) {
@@ -24,13 +24,13 @@ export function getSourceNameIcon({
sourceName: string;
sourceId?: string;
}) {
if (strIsLink(sourceId)) {
return 'common/linkBlue';
}
const fileIcon = getFileIcon(sourceName, '');
const fileIcon = getFileIcon(decodeURIComponent(sourceName), '');
if (fileIcon) {
return fileIcon;
}
if (strIsLink(sourceId)) {
return 'common/linkBlue';
}
return 'file/fill/manual';
}

View File

@@ -10,7 +10,7 @@
"js-yaml": "^4.1.0",
"jschardet": "3.1.1",
"nanoid": "^4.0.1",
"next": "13.5.2",
"next": "14.2.3",
"openai": "4.28.0",
"openapi-types": "^12.1.3",
"timezones-list": "^3.0.2"

View File

@@ -7,7 +7,7 @@ import { MongoFileSchema } from './schema';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
import { readFileRawContent } from '../read/utils';
import { readRawContentByFileBuffer } from '../read/utils';
import { PassThrough } from 'stream';
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
@@ -196,7 +196,7 @@ export const readFileContentFromMongo = async ({
});
})();
const { rawText } = await readFileRawContent({
const { rawText } = await readRawContentByFileBuffer({
extension,
isQAImport,
teamId,

View File

@@ -1,11 +1,12 @@
import { markdownProcess, simpleMarkdownText } from '@fastgpt/global/common/string/markdown';
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
import { uploadMongoImg } from '../image/controller';
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
import { addHours } from 'date-fns';
import { WorkerNameEnum, runWorker } from '../../../worker/utils';
import fs from 'fs';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { ReadFileResponse } from '../../../worker/file/type';
import { rawTextBackupPrefix } from '@fastgpt/global/core/dataset/read';
export const initMarkdownText = ({
teamId,
@@ -28,7 +29,34 @@ export const initMarkdownText = ({
})
});
export const readFileRawContent = async ({
export type readRawTextByLocalFileParams = {
teamId: string;
path: string;
metadata?: Record<string, any>;
};
export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
const { path } = params;
const extension = path?.split('.')?.pop()?.toLowerCase() || '';
const buffer = fs.readFileSync(path);
const encoding = detectFileEncoding(buffer);
const { rawText } = await readRawContentByFileBuffer({
extension,
isQAImport: false,
teamId: params.teamId,
encoding,
buffer,
metadata: params.metadata
});
return {
rawText
};
};
export const readRawContentByFileBuffer = async ({
extension,
isQAImport,
teamId,
@@ -69,9 +97,3 @@ export const readFileRawContent = async ({
return { rawText };
};
export const htmlToMarkdown = async (html?: string | null) => {
const md = await runWorker<string>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
return simpleMarkdownText(md);
};

View File

@@ -0,0 +1,38 @@
import { jsonRes } from '../response';
import type { NextApiResponse } from 'next';
import { withNextCors } from './cors';
import { ApiRequestProps } from '../../type/next';
export type NextApiHandler<T = any> = (
req: ApiRequestProps,
res: NextApiResponse<T>
) => unknown | Promise<unknown>;
export const NextEntry = ({ beforeCallback = [] }: { beforeCallback?: Promise<any>[] }) => {
return (...args: NextApiHandler[]): NextApiHandler => {
return async function api(req: ApiRequestProps, res: NextApiResponse) {
try {
await Promise.all([withNextCors(req, res), ...beforeCallback]);
let response = null;
for (const handler of args) {
response = await handler(req, res);
}
const contentType = res.getHeader('Content-Type');
if ((!contentType || contentType === 'application/json') && !res.writableFinished) {
return jsonRes(res, {
code: 200,
data: response
});
}
} catch (error) {
return jsonRes(res, {
code: 500,
error,
url: req.url
});
}
};
};
};

View File

@@ -1,7 +1,7 @@
import { UrlFetchParams, UrlFetchResponse } from '@fastgpt/global/common/file/api';
import * as cheerio from 'cheerio';
import axios from 'axios';
import { htmlToMarkdown } from '../file/read/utils';
import { htmlToMarkdown } from './utils';
export const cheerioToHtml = ({
fetchUrl,

View File

@@ -0,0 +1,8 @@
import { simpleMarkdownText } from '@fastgpt/global/common/string/markdown';
import { WorkerNameEnum, runWorker } from '../../worker/utils';
export const htmlToMarkdown = async (html?: string | null) => {
const md = await runWorker<string>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
return simpleMarkdownText(md);
};

View File

@@ -32,6 +32,9 @@ export async function createOneCollection({
fileId,
rawLink,
externalFileId,
externalFileUrl,
hashRawText,
rawTextLength,
metadata = {},
@@ -61,6 +64,8 @@ export async function createOneCollection({
fileId,
rawLink,
externalFileId,
externalFileUrl,
rawTextLength,
hashRawText,

View File

@@ -66,7 +66,11 @@ const DatasetCollectionSchema = new Schema({
type: String
},
sourceId: String,
tags: {
type: [String],
default: []
},
// local file collection
fileId: {
type: Schema.Types.ObjectId,
@@ -74,13 +78,13 @@ const DatasetCollectionSchema = new Schema({
},
// web link collection
rawLink: String,
// external collection
externalFileId: String,
// metadata
rawTextLength: Number,
hashRawText: String,
externalSourceUrl: String, // external import url
externalFileUrl: String, // external import url
metadata: {
type: Object,
default: {}

View File

@@ -2,13 +2,20 @@ import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { readFileContentFromMongo } from '../../common/file/gridfs/controller';
import { urlsFetch } from '../../common/string/cheerio';
import { rawTextBackupPrefix } from '@fastgpt/global/core/dataset/read';
import { parseCsvTable2Chunks } from './training/utils';
import { TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import axios from 'axios';
import { readFileRawContent } from '../../common/file/read/utils';
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
export const readFileRawTextByUrl = async ({ teamId, url }: { teamId: string; url: string }) => {
export const readFileRawTextByUrl = async ({
teamId,
url,
relatedId
}: {
teamId: string;
url: string;
relatedId?: string;
}) => {
const response = await axios({
method: 'get',
url: url,
@@ -18,11 +25,14 @@ export const readFileRawTextByUrl = async ({ teamId, url }: { teamId: string; ur
const buffer = Buffer.from(response.data, 'binary');
const { rawText } = await readFileRawContent({
const { rawText } = await readRawContentByFileBuffer({
extension,
teamId,
buffer,
encoding: 'utf-8'
encoding: 'utf-8',
metadata: {
relatedId
}
});
return rawText;
@@ -38,13 +48,15 @@ export const readDatasetSourceRawText = async ({
type,
sourceId,
isQAImport,
selector
selector,
relatedId
}: {
teamId: string;
type: DatasetSourceReadTypeEnum;
sourceId: string;
isQAImport?: boolean;
selector?: string;
relatedId?: string;
}): Promise<string> => {
if (type === DatasetSourceReadTypeEnum.fileLocal) {
const { rawText } = await readFileContentFromMongo({
@@ -64,7 +76,8 @@ export const readDatasetSourceRawText = async ({
} else if (type === DatasetSourceReadTypeEnum.externalFile) {
const rawText = await readFileRawTextByUrl({
teamId,
url: sourceId
url: sourceId,
relatedId
});
return rawText;
}

View File

@@ -18,6 +18,7 @@ import { countPromptTokens } from '../../../common/string/tiktoken/index';
import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { jiebaSplit } from '../../../common/string/jieba';
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
type SearchDatasetDataProps = {
teamId: string;
@@ -98,7 +99,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
},
'datasetId collectionId q a chunkIndex indexes'
)
.populate('collectionId', 'name fileId rawLink')
.populate('collectionId', 'name fileId rawLink externalFileId externalFileUrl')
.lean()) as DatasetDataWithCollectionType[];
// add score to data(It's already sorted. The first one is the one with the most points)
@@ -130,8 +131,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
chunkIndex: data.chunkIndex,
datasetId: String(data.datasetId),
collectionId: String(data.collectionId?._id),
sourceName: data.collectionId?.name || '',
sourceId: data.collectionId?.fileId || data.collectionId?.rawLink,
...getCollectionSourceData(data.collectionId),
score: [{ type: SearchScoreTypeEnum.embedding, value: data.score, index }]
};
@@ -205,8 +205,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
id: String(item._id),
datasetId: String(item.datasetId),
collectionId: String(item.collectionId),
sourceName: collection?.name || '',
sourceId: collection?.fileId || collection?.rawLink,
...getCollectionSourceData(collection),
q: item.q,
a: item.a,
chunkIndex: item.chunkIndex,

View File

@@ -174,7 +174,7 @@ export async function pushDataListToTrainingQueue({
} catch (error: any) {
addLog.error(`Insert error`, error);
// 如果有错误,将失败的文档添加到失败列表中
error.writeErrors.forEach((writeError: any) => {
error.writeErrors?.forEach((writeError: any) => {
failedDocuments.push(data[writeError.index]);
});
console.log('failed', failedDocuments);

View File

@@ -35,7 +35,7 @@ const TrainingDataSchema = new Schema({
},
billId: {
// concat bill
type: Schema.Types.ObjectId
type: String
},
mode: {
type: String,

View File

@@ -53,7 +53,7 @@ export const dispatchLafRequest = async (props: LafRequestProps): Promise<LafRes
appId,
chatId,
responseChatItemId,
histories: histories.slice(0, 10)
histories: histories?.slice(0, 10)
},
variables,
...dynamicInput,

View File

@@ -21,7 +21,7 @@
"mammoth": "^1.6.0",
"mongoose": "^7.0.2",
"multer": "1.4.5-lts.1",
"next": "13.5.2",
"next": "14.2.3",
"nextjs-cors": "^2.1.2",
"node-cron": "^3.0.3",
"node-xlsx": "^0.23.0",

View File

@@ -19,7 +19,9 @@ export const checkDatasetLimit = async ({
if (!standardConstants) return;
if (usedSize + insertLen >= datasetMaxSize) {
return Promise.reject(TeamErrEnum.datasetSizeNotEnough);
return Promise.reject(
`您的知识库容量为: ${datasetMaxSize}组,已使用: ${usedSize}组,导入当前文件需要: ${insertLen}组,请增加知识库容量后导入。`
);
}
if (usedPoints >= totalPoints) {

View File

@@ -9,7 +9,7 @@ import { readXlsxRawText } from './extension/xlsx';
import { readCsvRawText } from './extension/csv';
parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
const readFileRawContent = async (params: ReadRawTextByBuffer) => {
const readRawContentByFileBuffer = async (params: ReadRawTextByBuffer) => {
switch (params.extension) {
case 'txt':
case 'md':
@@ -41,7 +41,7 @@ parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
try {
parentPort?.postMessage({
type: 'success',
data: await readFileRawContent(newProps)
data: await readRawContentByFileBuffer(newProps)
});
} catch (error) {
console.log(error);

View File

@@ -101,6 +101,7 @@ export const iconPaths = {
'core/dataset/commonDataset': () => import('./icons/core/dataset/commonDataset.svg'),
'core/dataset/datasetFill': () => import('./icons/core/dataset/datasetFill.svg'),
'core/dataset/datasetLight': () => import('./icons/core/dataset/datasetLight.svg'),
'core/dataset/externalDataset': () => import('./icons/core/dataset/externalDataset.svg'),
'core/dataset/fileCollection': () => import('./icons/core/dataset/fileCollection.svg'),
'core/dataset/fullTextRecall': () => import('./icons/core/dataset/fullTextRecall.svg'),
'core/dataset/manualCollection': () => import('./icons/core/dataset/manualCollection.svg'),

View File

@@ -0,0 +1,5 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 23 20">
<path fill-rule="evenodd" clip-rule="evenodd"
d="M15.7233 1.21707C14.9554 1.3079 14.4221 2.00197 14.532 2.76731L16.6975 17.8447C16.8074 18.6101 17.519 19.1569 18.2868 19.066L21.5433 18.6808C22.3111 18.59 22.8445 17.8959 22.7345 17.1306L20.5691 2.05317C20.4592 1.28782 19.7476 0.741021 18.9797 0.831852L15.7233 1.21707ZM0.830017 2.32412C0.830017 1.55092 1.45682 0.924121 2.23002 0.924121H5.51815C6.29135 0.924121 6.91815 1.55092 6.91815 2.32412V17.675C6.91815 18.4482 6.29135 19.075 5.51815 19.075H2.23002C1.45682 19.075 0.830017 18.4482 0.830017 17.675V2.32412ZM7.9198 2.32412C7.9198 1.55092 8.5466 0.924121 9.3198 0.924121H12.6079C13.3811 0.924121 14.0079 1.55092 14.0079 2.32412V17.675C14.0079 18.4482 13.3811 19.075 12.6079 19.075H9.3198C8.5466 19.075 7.9198 18.4482 7.9198 17.675V2.32412Z"
fill="#8A95A7" />
</svg>

After

Width:  |  Height:  |  Size: 899 B

View File

@@ -10,6 +10,7 @@ import React from 'react';
type Props = Omit<NumberInputProps, 'onChange'> & {
onChange: (e: number | '') => any;
placeholder?: string;
};
const MyNumberInput = (props: Props) => {
@@ -24,7 +25,7 @@ const MyNumberInput = (props: Props) => {
}
}}
>
<NumberInputField />
<NumberInputField placeholder={props?.placeholder} />
<NumberInputStepper>
<NumberIncrementStepper />
<NumberDecrementStepper />

View File

@@ -22,6 +22,7 @@ type Props = Omit<BoxProps, 'resize' | 'onChange'> & {
onChange?: (e: string) => void;
variables?: EditorVariablePickerType[];
defaultHeight?: number;
placeholder?: string;
};
const options = {

View File

@@ -27,18 +27,18 @@
"next-i18next": "15.2.0",
"papaparse": "^5.4.1",
"pdfjs-dist": "4.0.269",
"react": "18.2.0",
"react": "18.3.1",
"use-context-selector": "^1.4.4",
"react-day-picker": "^8.7.1",
"react-dom": "18.2.0",
"react-dom": "18.3.1",
"react-i18next": "13.5.0",
"react-beautiful-dnd": "^13.1.1"
},
"devDependencies": {
"@types/lodash": "^4.14.191",
"@types/papaparse": "^5.3.7",
"@types/react": "18.2.0",
"@types/react-dom": "18.2.0",
"@types/react": "18.3.0",
"@types/react-dom": "18.3.0",
"@types/react-beautiful-dnd": "^13.1.8"
}
}

1778
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,5 @@
{
"Collection tags": "Tags",
"Common Dataset": "Common dataset",
"Common Dataset Desc": "Can be built by importing files, web links, or manual entry",
"Confirm to rebuild embedding tip": "Are you sure to switch the knowledge base index?\nSwitching index is a very heavy operation that requires re-indexing all the data in your knowledge base, which may take a long time. Please ensure that the remaining points in your account are sufficient.\n\nIn addition, you need to be careful to modify the applications that select this knowledge base to avoid mixing them with other index model knowledge bases.",
@@ -6,6 +7,7 @@
"External file Dataset Desc": "You can import files from an external file library to build a knowledge base. Files are not stored twice",
"External id": "File id",
"External read url": "External read url",
"External read url tip": "You can configure the reading address of your file library. This allows users to read and authenticate. You can currently use the {{fileId}} variable to refer to the external file ID.",
"External url": "File read url",
"Folder Dataset": "Folder",
"Rebuild embedding start tip": "The task of switching index models has begun",

View File

@@ -1,4 +1,5 @@
{
"Click to view raw source": "View source",
"Click to view file": "Click to view the original file",
"Release the mouse to upload the file": "Release the mouse to upload the file",
"upload error description": "Only supports uploading multiple files or one folder at a time",

View File

@@ -1,4 +1,5 @@
{
"Collection tags": "集合标签",
"Common Dataset": "通用知识库",
"Common Dataset Desc": "可通过导入文件、网页链接或手动录入形式构建知识库",
"Confirm to rebuild embedding tip": "确认为知识库切换索引?\n切换索引是一个非常重量的操作需要对您知识库内所有数据进行重新索引时间可能较长请确保账号内剩余积分充足。\n\n此外你还需要注意修改选择该知识库的应用避免它们与其他索引模型知识库混用。",
@@ -6,6 +7,7 @@
"External file Dataset Desc": "可以从外部文件库导入文件构建知识库,文件不会进行二次存储",
"External id": "文件阅读ID",
"External read url": "外部预览地址",
"External read url tip": "可以配置你文件库的阅读地址。便于对用户进行阅读鉴权操作。目前可以使用 {{fileId}} 变量来指代外部文件ID。",
"External url": "文件访问URL",
"Folder Dataset": "文件夹",
"Rebuild embedding start tip": "切换索引模型任务已开始",

View File

@@ -1,4 +1,5 @@
{
"Click to view raw source": "点击查看来源",
"Click to view file": "点击查看原始文件",
"Release the mouse to upload the file": "松开鼠标上传文件",
"upload error description": "单次只支持上传多个文件或者一个文件夹",

View File

@@ -88,8 +88,6 @@ const nextConfig = {
},
transpilePackages: ['@fastgpt/*', 'ahooks', '@chakra-ui/*', 'react'],
experimental: {
// 外部包独立打包
serverComponentsExternalPackages: ['mongoose', 'pg'],
// 指定导出包优化,按需引入包模块
optimizePackageImports: ['mongoose', 'pg'],
outputFileTracingRoot: path.join(__dirname, '../../')

View File

@@ -42,13 +42,13 @@
"lodash": "^4.17.21",
"mermaid": "^10.2.3",
"nanoid": "^4.0.1",
"next": "13.5.2",
"next": "14.2.3",
"next-i18next": "15.2.0",
"nextjs-node-loader": "^1.1.5",
"nprogress": "^0.2.0",
"react": "18.2.0",
"react": "18.3.1",
"react-day-picker": "^8.7.1",
"react-dom": "18.2.0",
"react-dom": "18.3.1",
"react-hook-form": "7.43.1",
"react-i18next": "13.5.0",
"react-markdown": "^8.0.7",
@@ -71,12 +71,12 @@
"@types/jsonwebtoken": "^9.0.3",
"@types/lodash": "^4.14.191",
"@types/node": "^20.8.5",
"@types/react": "18.2.0",
"@types/react-dom": "18.2.0",
"@types/react": "18.3.0",
"@types/react-dom": "18.3.0",
"@types/react-syntax-highlighter": "^15.5.6",
"@types/request-ip": "^0.0.37",
"eslint": "8.34.0",
"eslint-config-next": "13.1.6",
"eslint-config-next": "14.2.3",
"nextjs-node-loader": "^1.1.5",
"typescript": "4.9.5"
}

View File

@@ -46,7 +46,7 @@ const QuoteModal = ({
title={
<Box>
{metadata ? (
<RawSourceBox {...metadata} canView={false} />
<RawSourceBox {...metadata} canView={showDetail} />
) : (
<>{t('core.chat.Quote Amount', { amount: rawSearch.length })}</>
)}

View File

@@ -14,7 +14,6 @@ import MyTooltip from '../MyTooltip';
import { useTranslation } from 'next-i18next';
import { EventNameEnum, eventBus } from '@/web/common/utils/eventbus';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { getFileAndOpen } from '@/web/core/dataset/utils';
import { MARKDOWN_QUOTE_SIGN } from '@fastgpt/global/core/chat/constants';
const CodeLight = dynamic(() => import('./CodeLight'), { ssr: false });
@@ -132,7 +131,7 @@ const A = React.memo(function A({ children, ...props }: any) {
);
}
// quote link
// quote link(未使用)
if (children?.length === 1 && typeof children?.[0] === 'string') {
const text = String(children);
if (text === MARKDOWN_QUOTE_SIGN && props.href) {
@@ -147,7 +146,7 @@ const A = React.memo(function A({ children, ...props }: any) {
_hover={{
color: 'primary.700'
}}
onClick={() => getFileAndOpen(props.href)}
// onClick={() => getCollectionSourceAndOpen(props.href)}
/>
</MyTooltip>
);

View File

@@ -218,6 +218,7 @@ const QuoteItem = ({
<RawSourceBox
fontWeight={'bold'}
color={'black'}
collectionId={quoteItem.collectionId}
sourceName={quoteItem.sourceName}
sourceId={quoteItem.sourceId}
canView={canViewSource}

View File

@@ -1,33 +1,39 @@
import React, { useMemo } from 'react';
import { Box, BoxProps } from '@chakra-ui/react';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { getErrText } from '@fastgpt/global/common/error/utils';
import MyTooltip from '@/components/MyTooltip';
import { useTranslation } from 'next-i18next';
import { getFileAndOpen } from '@/web/core/dataset/utils';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useI18n } from '@/web/context/I18n';
type Props = BoxProps & {
sourceName?: string;
collectionId: string;
sourceId?: string;
canView?: boolean;
};
const RawSourceBox = ({ sourceId, sourceName = '', canView = true, ...props }: Props) => {
const RawSourceBox = ({
sourceId,
collectionId,
sourceName = '',
canView = true,
...props
}: Props) => {
const { t } = useTranslation();
const { fileT } = useI18n();
const { toast } = useToast();
const { setLoading } = useSystemStore();
const canPreview = useMemo(() => !!sourceId && canView, [canView, sourceId]);
const canPreview = !!sourceId && canView;
const icon = useMemo(() => getSourceNameIcon({ sourceId, sourceName }), [sourceId, sourceName]);
const read = getCollectionSourceAndOpen(collectionId);
return (
<MyTooltip label={canPreview ? fileT('Click to view file') : ''} shouldWrapChildren={false}>
<MyTooltip
label={canPreview ? fileT('Click to view raw source') : ''}
shouldWrapChildren={false}
>
<Box
color={'myGray.900'}
fontWeight={'medium'}
@@ -37,18 +43,7 @@ const RawSourceBox = ({ sourceId, sourceName = '', canView = true, ...props }: P
? {
cursor: 'pointer',
textDecoration: 'underline',
onClick: async () => {
setLoading(true);
try {
await getFileAndOpen(sourceId as string);
} catch (error) {
toast({
title: getErrText(error, t('error.fileNotFound')),
status: 'error'
});
}
setLoading(false);
}
onClick: read
}
: {})}
{...props}

View File

@@ -4,11 +4,11 @@ import {
DraggableStateSnapshot
} from '@fastgpt/web/components/common/DndDrag/index';
import Container from '../../components/Container';
import { DragHandleIcon, MinusIcon, SmallAddIcon } from '@chakra-ui/icons';
import { MinusIcon, SmallAddIcon } from '@chakra-ui/icons';
import { IfElseListItemType } from '@fastgpt/global/core/workflow/template/system/ifElse/type';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { ReferenceValueProps } from '@fastgpt/global/core/workflow/type/io';
import { useTranslation } from 'react-i18next';
import { useTranslation } from 'next-i18next';
import { ReferSelector, useReference } from '../render/RenderInput/templates/Reference';
import { WorkflowIOValueTypeEnum } from '@fastgpt/global/core/workflow/constants';
import {

View File

@@ -2,7 +2,7 @@ import React, { useCallback, useMemo } from 'react';
import NodeCard from './render/NodeCard';
import { NodeProps } from 'reactflow';
import { FlowNodeItemType } from '@fastgpt/global/core/workflow/type';
import { useTranslation } from 'react-i18next';
import { useTranslation } from 'next-i18next';
import {
Box,
Button,

View File

@@ -3,7 +3,7 @@ import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { PgClient } from '@fastgpt/service/common/vectorStore/pg';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { PgDatasetTableName } from '@fastgpt/global/common/vectorStore/constants';
import { connectionMongo } from '@fastgpt/service/common/mongo';
import { addLog } from '@fastgpt/service/common/system/log';

View File

@@ -4,7 +4,7 @@
import type { NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { authFile } from '@fastgpt/service/support/permission/auth/file';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
import { ApiRequestProps } from '@fastgpt/service/type/next';

View File

@@ -1,5 +1,5 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { ChatCompletionMessageParam } from '@fastgpt/global/core/ai/type';
import { countGptMessagesTokens } from '@fastgpt/service/common/string/tiktoken';

View File

@@ -7,7 +7,7 @@ import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/use
import { checkTeamAppLimit } from '@fastgpt/service/support/permission/teamLimit';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const {

View File

@@ -6,7 +6,7 @@ import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { MongoChatItem } from '@fastgpt/service/core/chat/chatItemSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { appId } = req.query as { appId: string };

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
/* 获取我的模型 */
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {

View File

@@ -7,7 +7,7 @@ import { addDays } from 'date-fns';
import type { GetAppChatLogsParams } from '@/global/core/api/appReq.d';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { ChatItemCollectionName } from '@fastgpt/service/core/chat/chatItemSchema';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(
req: NextApiRequest,

View File

@@ -3,7 +3,7 @@ import { MongoApp } from '@fastgpt/service/core/app/schema';
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { AppListItemType } from '@fastgpt/global/core/app/type';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>): Promise<AppListItemType[]> {
// 凭证校验

View File

@@ -3,7 +3,7 @@ import { MongoApp } from '@fastgpt/service/core/app/schema';
import type { AppUpdateParams } from '@/global/core/app/api';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { beforeUpdateAppFormat } from '@fastgpt/service/core/app/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
/* 获取我的模型 */
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {

View File

@@ -1,5 +1,5 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
import { AppVersionSchemaType } from '@fastgpt/global/core/app/version';

View File

@@ -1,5 +1,5 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';

View File

@@ -1,5 +1,5 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authApp } from '@fastgpt/service/support/permission/auth/app';
import { MongoAppVersion } from '@fastgpt/service/core/app/versionSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';

View File

@@ -9,7 +9,7 @@ import { getChatItems } from '@fastgpt/service/core/chat/controller';
import { ChatErrEnum } from '@fastgpt/global/common/error/code/chat';
import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runtime/constants';
import { getAppLatestVersion } from '@fastgpt/service/core/app/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(
req: NextApiRequest,

View File

@@ -5,7 +5,7 @@ import type { DatasetSimpleItemType } from '@fastgpt/global/core/dataset/type.d'
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
/* get all dataset by teamId or tmbId */
async function handler(

View File

@@ -17,8 +17,6 @@ import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/train
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { parseCsvTable2Chunks } from '@fastgpt/service/core/dataset/training/utils';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
@@ -106,8 +104,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
return collectionId;
});
startTrainingQueue(true);
jsonRes(res);
} catch (error) {
jsonRes(res, {

View File

@@ -1,153 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const {
fileId,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body as FileIdCreateDatasetCollectionParams;
try {
await connectToDatabase();
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
per: 'w',
datasetId: body.datasetId
});
// 1. read file
const { rawText, filename } = await readFileContentFromMongo({
teamId,
bucketName: BucketNameEnum.dataset,
fileId
});
// 2. split chunks
const chunks = rawText2Chunks({
rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
await mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
name: filename,
fileId,
metadata: {
relatedImgId: fileId
},
// special metadata
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((item, index) => ({
...item,
chunkIndex: index
})),
session
});
// 7. remove related image ttl
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': fileId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return collectionId;
});
// remove buffer
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
startTrainingQueue(true);
jsonRes(res);
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -0,0 +1,149 @@
import type { NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { readFileContentFromMongo } from '@fastgpt/service/common/file/gridfs/controller';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileIdCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { MongoRawTextBuffer } from '@fastgpt/service/common/buffer/rawText/schema';
import { rawText2Chunks } from '@fastgpt/service/core/dataset/read';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
async function handler(
req: ApiRequestProps<FileIdCreateDatasetCollectionParams>,
res: NextApiResponse<any>
) {
const {
fileId,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body;
await connectToDatabase();
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
per: 'w',
datasetId: body.datasetId
});
// 1. read file
const { rawText, filename } = await readFileContentFromMongo({
teamId,
bucketName: BucketNameEnum.dataset,
fileId
});
// 2. split chunks
const chunks = rawText2Chunks({
rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 3. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
await mongoSessionRun(async (session) => {
// 4. create collection
const { _id: collectionId } = await createOneCollection({
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
name: filename,
fileId,
metadata: {
relatedImgId: fileId
},
// special metadata
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
session
});
// 5. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: filename,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 6. insert to training queue
await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((item, index) => ({
...item,
chunkIndex: index
})),
session
});
// 7. remove related image ttl
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': fileId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return collectionId;
});
// remove buffer
await MongoRawTextBuffer.deleteOne({ sourceId: fileId });
jsonRes(res);
}
export default NextAPI(handler);

View File

@@ -0,0 +1,186 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { uploadFile } from '@fastgpt/service/common/file/gridfs/controller';
import { getUploadModel } from '@fastgpt/service/common/file/multer';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { FileCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api';
import { removeFilesByPaths } from '@fastgpt/service/common/file/utils';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { getNanoid, hashStr } from '@fastgpt/global/common/string/tools';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { createTrainingUsage } from '@fastgpt/service/support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getDatasetModel, getVectorModel } from '@fastgpt/service/core/ai/model';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoImage } from '@fastgpt/service/common/file/image/schema';
import { readRawTextByLocalFile } from '@fastgpt/service/common/file/read/utils';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
/**
* Creates the multer uploader
*/
const upload = getUploadModel({
maxSize: (global.feConfigs?.uploadFileMaxSize || 500) * 1024 * 1024
});
let filePaths: string[] = [];
try {
const { file, data, bucketName } = await upload.doUpload<FileCreateDatasetCollectionParams>(
req,
res,
BucketNameEnum.dataset
);
filePaths = [file.path];
if (!file || !bucketName) {
throw new Error('file is empty');
}
const { teamId, tmbId, dataset } = await authDataset({
req,
authApiKey: true,
per: 'w',
datasetId: data.datasetId
});
const {
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt
} = data;
const { fileMetadata, collectionMetadata, ...collectionData } = data;
const collectionName = file.originalname;
const relatedImgId = getNanoid();
// 1. read file
const { rawText } = await readRawTextByLocalFile({
teamId,
path: file.path,
metadata: {
...fileMetadata,
relatedId: relatedImgId
}
});
// 2. upload file
const fileId = await uploadFile({
teamId,
tmbId,
bucketName,
path: file.path,
filename: file.originalname,
contentType: file.mimetype,
metadata: fileMetadata
});
// 3. delete tmp file
removeFilesByPaths(filePaths);
// 4. split raw text to chunks
const { chunks } = splitText2Chunks({
text: rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 5. check dataset limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
// 6. create collection and training bill
const { collectionId, insertResults } = await mongoSessionRun(async (session) => {
const { _id: collectionId } = await createOneCollection({
...collectionData,
name: collectionName,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.file,
fileId,
rawTextLength: rawText.length,
hashRawText: hashStr(rawText),
metadata: {
...collectionMetadata,
relatedImgId
},
session
});
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: collectionName,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getDatasetModel(dataset.agentModel)?.name
});
// 7. push chunks to training queue
const insertResults = await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
}))
});
// 8. remove image expired time
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': relatedImgId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
return {
collectionId,
insertResults
};
});
jsonRes(res, {
data: { collectionId, results: insertResults }
});
} catch (error) {
removeFilesByPaths(filePaths);
return Promise.reject(error);
}
}
export const config = {
api: {
bodyParser: false
}
};
export default NextAPI(handler);

View File

@@ -8,6 +8,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { getFileById } from '@fastgpt/service/common/file/gridfs/controller';
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -36,8 +37,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
data: {
...collection,
canWrite,
sourceName: collection?.name,
sourceId: collection?.fileId || collection?.rawLink,
...getCollectionSourceData(collection),
file
}
});

View File

@@ -0,0 +1,66 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middleware/entry';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { createFileToken } from '@fastgpt/service/support/permission/controller';
import { BucketNameEnum, ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
export type readCollectionSourceQuery = {
collectionId: string;
};
export type readCollectionSourceBody = {};
export type readCollectionSourceResponse = {
type: 'url';
value: string;
};
async function handler(
req: ApiRequestProps<readCollectionSourceBody, readCollectionSourceQuery>,
res: ApiResponseType<any>
): Promise<readCollectionSourceResponse> {
const { collection, teamId, tmbId } = await authDatasetCollection({
req,
authToken: true,
authApiKey: true,
collectionId: req.query.collectionId,
per: 'r'
});
const sourceUrl = await (async () => {
if (collection.type === DatasetCollectionTypeEnum.file && collection.fileId) {
const token = await createFileToken({
bucketName: BucketNameEnum.dataset,
teamId,
tmbId,
fileId: collection.fileId
});
return `${ReadFileBaseUrl}?token=${token}`;
}
if (collection.type === DatasetCollectionTypeEnum.link && collection.rawLink) {
return collection.rawLink;
}
if (collection.type === DatasetCollectionTypeEnum.externalFile) {
if (collection.externalFileId && collection.datasetId.externalReadUrl) {
return collection.datasetId.externalReadUrl.replace(
'{{fileId}}',
collection.externalFileId
);
}
if (collection.externalFileUrl) {
return collection.externalFileUrl;
}
}
return '';
})();
return {
type: 'url',
value: sourceUrl
};
}
export default NextAPI(handler);

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { deleteDatasetData } from '@/service/core/dataset/data/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { id: dataId } = req.query as {

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
export type Response = {
id: string;

View File

@@ -15,7 +15,7 @@ import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
import { InsertOneDatasetDataProps } from '@/global/core/dataset/api';
import { simpleText } from '@fastgpt/global/common/string/tools';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { collectionId, q, a, indexes } = req.body as InsertOneDatasetDataProps;

View File

@@ -7,7 +7,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { PagingData } from '@/types';
import { replaceRegChars } from '@fastgpt/global/common/string/tools';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let {

View File

@@ -1,7 +1,6 @@
/* push data to training queue */
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type {
PushDatasetDataProps,
PushDatasetDataResponse
@@ -10,7 +9,7 @@ import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const body = req.body as PushDatasetDataProps;

View File

@@ -6,7 +6,7 @@ import { authDatasetData } from '@/service/support/permission/auth/dataset';
import { pushGenerateVectorUsage } from '@/service/support/wallet/usage/push';
import { UpdateDatasetDataProps } from '@/global/core/dataset/api';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/teamLimit';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { id, q = '', a, indexes = [] } = req.body as UpdateDatasetDataProps;

View File

@@ -8,7 +8,7 @@ import {
checkExportDatasetLimit,
updateExportDatasetLimit
} from '@fastgpt/service/support/user/utils';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
let { datasetId } = req.query as {

View File

@@ -3,7 +3,7 @@ import { authFile } from '@fastgpt/service/support/permission/auth/file';
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { rawText2Chunks, readDatasetSourceRawText } from '@fastgpt/service/core/dataset/read';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { ApiRequestProps } from '@fastgpt/service/type/next';
export type PostPreviewFilesChunksProps = {

View File

@@ -1,36 +0,0 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import { authDatasetFile } from '@fastgpt/service/support/permission/auth/dataset';
import { createFileToken } from '@fastgpt/service/support/permission/controller';
import { BucketNameEnum, ReadFileBaseUrl } from '@fastgpt/global/common/file/constants';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { fileId } = req.query as { fileId: string };
if (!fileId) {
throw new Error('fileId is empty');
}
const { teamId, tmbId } = await authDatasetFile({ req, authToken: true, fileId, per: 'r' });
const token = await createFileToken({
bucketName: BucketNameEnum.dataset,
teamId,
tmbId,
fileId
});
jsonRes(res, {
data: `${ReadFileBaseUrl}?token=${token}`
});
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -6,7 +6,7 @@ import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { mongoRPermission } from '@fastgpt/global/support/permission/utils';
import { authUserRole } from '@fastgpt/service/support/permission/auth/user';
import { getVectorModel } from '@fastgpt/service/core/ai/model';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { parentId, type } = req.query as { parentId?: string; type?: DatasetTypeEnum };

View File

@@ -12,7 +12,7 @@ import {
checkTeamAIPoints,
checkTeamReRankPermission
} from '@fastgpt/service/support/permission/teamLimit';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const {

View File

@@ -1,5 +1,5 @@
import type { ApiRequestProps, ApiResponseType } from '@fastgpt/service/type/next';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';

View File

@@ -1,4 +1,4 @@
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';

View File

@@ -7,7 +7,7 @@ import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { getUserChatInfoAndAuthTeamPoints } from '@/service/support/permission/auth/team';
import { PostWorkflowDebugProps, PostWorkflowDebugResponse } from '@/global/core/workflow/api';
import { authPluginCrud } from '@fastgpt/service/support/permission/auth/plugin';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(
req: NextApiRequest,

View File

@@ -1,7 +1,7 @@
import type { NextApiRequest, NextApiResponse } from 'next';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { checkExportDatasetLimit } from '@fastgpt/service/support/user/utils';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
const { datasetId } = req.query as {

View File

@@ -9,7 +9,7 @@ import { authChatCert } from '@/service/support/permission/auth/chat';
import { MongoApp } from '@fastgpt/service/core/app/schema';
import { getGuideModule, splitGuideModule } from '@fastgpt/global/core/workflow/utils';
import { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
const upload = getUploadModel({
maxSize: 2

View File

@@ -44,7 +44,7 @@ import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runti
import { dispatchWorkFlowV1 } from '@fastgpt/service/core/workflow/dispatchV1';
import { setEntryEntries } from '@fastgpt/service/core/workflow/dispatchV1/utils';
import { NextAPI } from '@/service/middle/entry';
import { NextAPI } from '@/service/middleware/entry';
import { getAppLatestVersion } from '@fastgpt/service/core/app/controller';
type FastGptWebChatProps = {

View File

@@ -84,7 +84,7 @@ const Header = ({}: {}) => {
...props
}: {
name: string;
type: `${DatasetCollectionTypeEnum}`;
type: DatasetCollectionTypeEnum;
callback?: (id: string) => void;
trainingType?: TrainingModeEnum;
rawLink?: string;

View File

@@ -38,16 +38,14 @@ import { TabEnum } from '..';
import { useUserStore } from '@/web/support/user/useUserStore';
import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import {
DatasetCollectionTypeMap,
TrainingModeEnum,
TrainingTypeMap
} from '@fastgpt/global/core/dataset/constants';
import { DatasetCollectionTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import { formatTime2YMDHM } from '@fastgpt/global/common/string/time';
import { formatFileSize } from '@fastgpt/global/common/file/tools';
import { getFileAndOpen } from '@/web/core/dataset/utils';
import { getCollectionSourceAndOpen } from '@/web/core/dataset/hooks/readCollectionSource';
import MyTooltip from '@/components/MyTooltip';
import { usePagination } from '@fastgpt/web/hooks/usePagination';
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
import { useI18n } from '@/web/context/I18n';
const DataCard = () => {
const BoxRef = useRef<HTMLDivElement>(null);
@@ -62,6 +60,7 @@ const DataCard = () => {
};
const { Loading, setIsLoading } = useLoading({ defaultLoading: true });
const { t } = useTranslation();
const { datasetT } = useI18n();
const [searchText, setSearchText] = useState('');
const { toast } = useToast();
const { openConfirm, ConfirmModal } = useConfirm({
@@ -69,6 +68,7 @@ const DataCard = () => {
type: 'delete'
});
const { isOpen, onOpen, onClose } = useDisclosure();
const readSource = getCollectionSourceAndOpen(collectionId);
const {
data: datasetDataList,
@@ -169,7 +169,17 @@ const DataCard = () => {
value: webSelector
}
]
: []),
{
...(collection.tags
? [
{
label: datasetT('Collection tags'),
value: collection.tags?.join(', ') || '-'
}
]
: [])
}
];
}, [collection, t]);
@@ -196,13 +206,15 @@ const DataCard = () => {
/>
<Flex className="textEllipsis" flex={'1 0 0'} mr={[3, 5]} alignItems={'center'}>
<Box lineHeight={1.2}>
{collection?._id && (
<RawSourceBox
sourceName={collection?.name}
sourceId={collection?.fileId || collection?.rawLink}
collectionId={collection._id}
{...getCollectionSourceData(collection)}
fontSize={['md', 'lg']}
color={'black'}
textDecoration={'none'}
/>
)}
<Box fontSize={'sm'} color={'myGray.500'}>
{t('core.dataset.collection.id')}:{' '}
<Box as={'span'} userSelect={'all'}>
@@ -412,10 +424,7 @@ const DataCard = () => {
</Flex>
))}
{collection?.sourceId && (
<Button
variant={'whitePrimary'}
onClick={() => collection.sourceId && getFileAndOpen(collection.sourceId)}
>
<Button variant={'whitePrimary'} onClick={readSource}>
{t('core.dataset.collection.metadata.read source')}
</Button>
)}

View File

@@ -15,12 +15,12 @@ import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useRequest } from '@fastgpt/web/hooks/useRequest';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { useRouter } from 'next/router';
import { TabEnum } from '../../../index';
import {
postCreateDatasetCsvTableCollection,
postCreateDatasetExternalFileCollection,
postCreateDatasetFileCollection,
postCreateDatasetLinkCollection,
postCreateDatasetTextCollection
@@ -95,6 +95,13 @@ const Upload = () => {
...commonParams,
fileId: item.dbFileId
});
} else if (importSource === ImportDataSourceEnum.externalFile && item.externalFileUrl) {
await postCreateDatasetExternalFileCollection({
...commonParams,
externalFileUrl: item.externalFileUrl,
externalFileId: item.externalFileId,
filename: item.sourceName
});
}
setSources((state) =>

View File

@@ -44,7 +44,8 @@ const PreviewChunks = ({
if (importSource === ImportDataSourceEnum.csvTable) {
return getPreviewChunks({
type: importType2ReadType(importSource),
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
sourceId:
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
chunkSize,
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar'),
@@ -55,7 +56,8 @@ const PreviewChunks = ({
return getPreviewChunks({
type: importType2ReadType(importSource),
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
sourceId:
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
chunkSize,
overlapRatio: chunkOverlapRatio,
customSplitChar: processParamsForm.getValues('customSplitChar'),

View File

@@ -22,7 +22,7 @@ const PreviewRawText = ({
const { importSource, processParamsForm } = useContextSelector(DatasetImportContext, (v) => v);
const { data, isLoading } = useQuery(
['previewSource', previewSource.dbFileId, previewSource.link, previewSource.sourceUrl],
['previewSource', previewSource.dbFileId, previewSource.link, previewSource.externalFileUrl],
() => {
if (importSource === ImportDataSourceEnum.fileCustom && previewSource.rawText) {
return {
@@ -39,7 +39,8 @@ const PreviewRawText = ({
return getPreviewFileContent({
type: importType2ReadType(importSource),
sourceId: previewSource.dbFileId || previewSource.link || previewSource.sourceUrl || '',
sourceId:
previewSource.dbFileId || previewSource.link || previewSource.externalFileUrl || '',
isQAImport: false,
selector: processParamsForm.getValues('webSelector')
});

View File

@@ -50,16 +50,16 @@ const CustomLinkInput = () => {
const { register, reset, handleSubmit, control } = useForm<{
list: {
sourceName: string;
sourceUrl: string;
externalId: string;
externalFileUrl: string;
externalFileId: string;
}[];
}>({
defaultValues: {
list: [
{
sourceName: '',
sourceUrl: '',
externalId: ''
externalFileUrl: '',
externalFileId: ''
}
]
}
@@ -80,8 +80,8 @@ const CustomLinkInput = () => {
reset({
list: sources.map((item) => ({
sourceName: item.sourceName,
sourceUrl: item.sourceUrl || '',
externalId: item.externalId || ''
externalFileUrl: item.externalFileUrl || '',
externalFileId: item.externalFileId || ''
}))
});
}
@@ -104,7 +104,7 @@ const CustomLinkInput = () => {
<Tr key={item.id}>
<Td>
<Input
{...register(`list.${index}.sourceUrl`, {
{...register(`list.${index}.externalFileUrl`, {
required: index !== list.length - 1,
onBlur(e) {
const val = (e.target.value || '') as string;
@@ -112,15 +112,15 @@ const CustomLinkInput = () => {
const sourceName = val.split('/').pop() || '';
update(index, {
...list[index],
sourceUrl: val,
externalFileUrl: val,
sourceName: decodeURIComponent(sourceName)
});
}
if (val && index === list.length - 1) {
append({
sourceName: '',
sourceUrl: '',
externalId: ''
externalFileUrl: '',
externalFileId: ''
});
}
}
@@ -128,7 +128,7 @@ const CustomLinkInput = () => {
/>
</Td>
<Td>
<Input {...register(`list.${index}.externalId`)} />
<Input {...register(`list.${index}.externalFileId`)} />
</Td>
<Td>
<Input {...register(`list.${index}.sourceName`)} />
@@ -154,26 +154,26 @@ const CustomLinkInput = () => {
onClick={() => {
append({
sourceName: '',
sourceUrl: '',
externalId: ''
externalFileUrl: '',
externalFileId: ''
});
}}
>
{commonT('Add new')}
</Button>
<Button
isDisabled={list.filter((item) => !!item.sourceUrl).length === 0}
isDisabled={list.filter((item) => !!item.externalFileUrl).length === 0}
onClick={handleSubmit((data) => {
setSources(
data.list
.filter((item) => !!item.sourceUrl)
.filter((item) => !!item.externalFileUrl)
.map((item) => ({
id: getNanoid(32),
createStatus: 'waiting',
sourceName: item.sourceName || item.sourceUrl,
icon: getFileIcon(item.sourceUrl),
externalId: item.externalId,
sourceUrl: item.sourceUrl
sourceName: item.sourceName || item.externalFileUrl,
icon: getFileIcon(item.externalFileUrl),
externalFileId: item.externalFileId,
externalFileUrl: item.externalFileUrl
}))
);

View File

@@ -1,10 +1,9 @@
import React, { useState, useMemo } from 'react';
import { useRouter } from 'next/router';
import { Box, Flex, Button, IconButton, Input, Textarea } from '@chakra-ui/react';
import { Box, Flex, Button, IconButton, Input, Textarea, HStack } from '@chakra-ui/react';
import { DeleteIcon } from '@chakra-ui/icons';
import { delDatasetById } from '@/web/core/dataset/api';
import { useSelectFile } from '@/web/common/file/hooks/useSelectFile';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useConfirm } from '@fastgpt/web/hooks/useConfirm';
import { useForm } from 'react-hook-form';
import { compressImgFileAndUpload } from '@/web/common/file/controller';
@@ -24,6 +23,7 @@ import { useContextSelector } from 'use-context-selector';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import MyDivider from '@fastgpt/web/components/common/MyDivider/index';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constants';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
const Info = ({ datasetId }: { datasetId: string }) => {
const { t } = useTranslation();
@@ -191,9 +191,10 @@ const Info = ({ datasetId }: { datasetId: string }) => {
{datasetDetail.type === DatasetTypeEnum.externalFile && (
<>
<Flex w={'100%'} alignItems={'center'}>
<Box fontSize={['sm', 'md']} flex={['0 0 90px', '0 0 160px']} w={0}>
{datasetT('External read url')}
</Box>
<HStack fontSize={['sm', 'md']} flex={['0 0 90px', '0 0 160px']} w={0}>
<Box>{datasetT('External read url')}</Box>
<QuestionTip label={datasetT('External read url tip')} />
</HStack>
<Input
flex={[1, '0 0 320px']}
placeholder="https://test.com/read?fileId={{fileId}}"

View File

@@ -237,6 +237,7 @@ const InputDataModal = ({
w={'210px'}
className="textEllipsis3"
whiteSpace={'pre-wrap'}
collectionId={collection._id}
sourceName={collection.sourceName}
sourceId={collection.sourceId}
mb={6}

View File

@@ -116,13 +116,13 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
value: DatasetTypeEnum.websiteDataset,
icon: 'core/dataset/websiteDataset',
desc: datasetT('Website Dataset Desc')
},
{
title: datasetT('External File'),
value: DatasetTypeEnum.externalFile,
icon: 'core/dataset/externalDataset',
desc: datasetT('External file Dataset Desc')
}
// {
// title: datasetT('External File'),
// value: DatasetTypeEnum.externalFile,
// icon: 'core/dataset/websiteDataset',
// desc: datasetT('External file Dataset Desc')
// }
]
: [])
]}

View File

@@ -1,37 +0,0 @@
import { jsonRes } from '@fastgpt/service/common/response';
import type { NextApiResponse } from 'next';
import { connectToDatabase } from '../mongo';
import { withNextCors } from '@fastgpt/service/common/middle/cors';
import { ApiRequestProps } from '@fastgpt/service/type/next';
export type NextApiHandler<T = any> = (
req: ApiRequestProps,
res: NextApiResponse<T>
) => unknown | Promise<unknown>;
export const NextAPI = (...args: NextApiHandler[]): NextApiHandler => {
return async function api(req: ApiRequestProps, res: NextApiResponse) {
try {
await Promise.all([withNextCors(req, res), connectToDatabase()]);
let response = null;
for (const handler of args) {
response = await handler(req, res);
}
const contentType = res.getHeader('Content-Type');
if ((!contentType || contentType === 'application/json') && !res.writableFinished) {
return jsonRes(res, {
code: 200,
data: response
});
}
} catch (error) {
return jsonRes(res, {
code: 500,
error,
url: req.url
});
}
};
};

View File

@@ -0,0 +1,6 @@
import { connectToDatabase } from '../mongo';
import { NextEntry } from '@fastgpt/service/common/middle/entry';
export const NextAPI = NextEntry({
beforeCallback: [connectToDatabase()]
});

View File

@@ -14,6 +14,7 @@ import type {
CreateDatasetCollectionParams,
CsvTableCreateDatasetCollectionParams,
DatasetUpdateBody,
ExternalFileCreateDatasetCollectionParams,
FileIdCreateDatasetCollectionParams,
LinkCreateDatasetCollectionParams,
PostWebsiteSyncParams,
@@ -44,6 +45,7 @@ import type {
PostPreviewFilesChunksProps,
PreviewChunksResponse
} from '@/pages/api/core/dataset/file/getPreviewChunks';
import type { readCollectionSourceResponse } from '@/pages/api/core/dataset/collection/read';
/* ======================== dataset ======================= */
export const getDatasets = (data: { parentId?: string; type?: DatasetTypeEnum }) =>
@@ -85,7 +87,9 @@ export const getDatasetCollectionById = (id: string) =>
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
POST<string>(`/core/dataset/collection/create`, data);
export const postCreateDatasetFileCollection = (data: FileIdCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/file`, data, { timeout: 120000 });
POST<{ collectionId: string }>(`/core/dataset/collection/create/fileId`, data, {
timeout: 120000
});
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
export const postCreateDatasetTextCollection = (data: TextCreateDatasetCollectionParams) =>
@@ -94,6 +98,12 @@ export const postCreateDatasetCsvTableCollection = (data: CsvTableCreateDatasetC
POST<{ collectionId: string }>(`/core/dataset/collection/create/csvTable`, data, {
timeout: 120000
});
export const postCreateDatasetExternalFileCollection = (
data: ExternalFileCreateDatasetCollectionParams
) =>
POST<{ collectionId: string }>(`/proApi/core/dataset/collection/create/externalFileUrl`, data, {
timeout: 120000
});
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
POST(`/core/dataset/collection/update`, data);
@@ -144,6 +154,6 @@ export const getDatasetTrainingQueue = (datasetId: string) =>
export const getPreviewChunks = (data: PostPreviewFilesChunksProps) =>
POST<PreviewChunksResponse>('/core/dataset/file/getPreviewChunks', data);
/* ================== file ======================== */
export const getFileViewUrl = (fileId: string) =>
GET<string>('/core/dataset/file/getPreviewUrl', { fileId });
/* ================== read source ======================== */
export const getCollectionSource = (collectionId: string) =>
GET<readCollectionSourceResponse>('/core/dataset/collection/read', { collectionId });

View File

@@ -1,5 +1,9 @@
import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/model';
import { DatasetTypeEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import {
DatasetCollectionTypeEnum,
DatasetTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import type {
DatasetCollectionItemType,
DatasetItemType
@@ -46,7 +50,7 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
},
parentId: '',
name: '',
type: 'file',
type: DatasetCollectionTypeEnum.file,
updateTime: new Date(),
canWrite: false,
sourceName: '',

View File

@@ -0,0 +1,34 @@
import { useSystemStore } from '@/web/common/system/useSystemStore';
import { getCollectionSource } from '@/web/core/dataset/api';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { useToast } from '@fastgpt/web/hooks/useToast';
import { useTranslation } from 'next-i18next';
export function getCollectionSourceAndOpen(collectionId: string) {
const { toast } = useToast();
const { t } = useTranslation();
const { setLoading } = useSystemStore();
return async () => {
try {
setLoading(true);
const { value: url } = await getCollectionSource(collectionId);
if (!url) {
throw new Error('No file found');
}
if (url.startsWith('/')) {
window.open(`${location.origin}${url}`, '_blank');
} else {
window.open(url, '_blank');
}
} catch (error) {
toast({
title: getErrText(error, t('error.fileNotFound')),
status: 'error'
});
}
setLoading(false);
};
}

Some files were not shown because too many files have changed in this diff Show More