mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 12:20:34 +00:00

* update: Add type * fix: update import statement for NextApiRequest type * fix: update imports to use type for LexicalEditor and EditorState * Refactor imports to use 'import type' for type-only imports across multiple files - Updated imports in various components and API files to use 'import type' for better clarity and to optimize TypeScript's type checking. - Ensured consistent usage of type imports in files related to chat, dataset, workflow, and user management. - Improved code readability and maintainability by distinguishing between value and type imports. * refactor: remove old ESLint configuration and add new rules - Deleted the old ESLint configuration file from the app project. - Added a new ESLint configuration file with updated rules and settings. - Changed imports to use type-only imports in various files for better clarity and performance. - Updated TypeScript configuration to remove unnecessary options. - Added an ESLint ignore file to exclude build and dependency directories from linting. * fix: update imports to use 'import type' for type-only imports in schema files
209 lines
4.6 KiB
TypeScript
209 lines
4.6 KiB
TypeScript
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
|
import { DatasetSourceReadTypeEnum } from '@fastgpt/global/core/dataset/constants';
|
|
import { readFileContentFromMongo } from '../../common/file/gridfs/controller';
|
|
import { urlsFetch } from '../../common/string/cheerio';
|
|
import { parseCsvTable2Chunks } from './training/utils';
|
|
import { type TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
|
import axios from 'axios';
|
|
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
|
|
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
|
|
import {
|
|
type APIFileServer,
|
|
type FeishuServer,
|
|
type YuqueServer
|
|
} from '@fastgpt/global/core/dataset/apiDataset';
|
|
import { useApiDatasetRequest } from './apiDataset/api';
|
|
|
|
export const readFileRawTextByUrl = async ({
|
|
teamId,
|
|
tmbId,
|
|
url,
|
|
customPdfParse,
|
|
relatedId
|
|
}: {
|
|
teamId: string;
|
|
tmbId: string;
|
|
url: string;
|
|
customPdfParse?: boolean;
|
|
relatedId: string; // externalFileId / apiFileId
|
|
}) => {
|
|
const response = await axios({
|
|
method: 'get',
|
|
url: url,
|
|
responseType: 'arraybuffer'
|
|
});
|
|
const extension = parseFileExtensionFromUrl(url);
|
|
|
|
const buffer = Buffer.from(response.data, 'binary');
|
|
|
|
const { rawText } = await readRawContentByFileBuffer({
|
|
customPdfParse,
|
|
isQAImport: false,
|
|
extension,
|
|
teamId,
|
|
tmbId,
|
|
buffer,
|
|
encoding: 'utf-8',
|
|
metadata: {
|
|
relatedId
|
|
}
|
|
});
|
|
|
|
return rawText;
|
|
};
|
|
|
|
/*
|
|
fileId - local file, read from mongo
|
|
link - request
|
|
externalFile/apiFile = request read
|
|
*/
|
|
export const readDatasetSourceRawText = async ({
|
|
teamId,
|
|
tmbId,
|
|
type,
|
|
sourceId,
|
|
isQAImport,
|
|
selector,
|
|
externalFileId,
|
|
apiServer,
|
|
feishuServer,
|
|
yuqueServer,
|
|
customPdfParse
|
|
}: {
|
|
teamId: string;
|
|
tmbId: string;
|
|
type: DatasetSourceReadTypeEnum;
|
|
sourceId: string;
|
|
customPdfParse?: boolean;
|
|
|
|
isQAImport?: boolean; // csv data
|
|
selector?: string; // link selector
|
|
externalFileId?: string; // external file dataset
|
|
apiServer?: APIFileServer; // api dataset
|
|
feishuServer?: FeishuServer; // feishu dataset
|
|
yuqueServer?: YuqueServer; // yuque dataset
|
|
}): Promise<{
|
|
title?: string;
|
|
rawText: string;
|
|
}> => {
|
|
if (type === DatasetSourceReadTypeEnum.fileLocal) {
|
|
const { filename, rawText } = await readFileContentFromMongo({
|
|
teamId,
|
|
tmbId,
|
|
bucketName: BucketNameEnum.dataset,
|
|
fileId: sourceId,
|
|
isQAImport,
|
|
customPdfParse
|
|
});
|
|
return {
|
|
title: filename,
|
|
rawText
|
|
};
|
|
} else if (type === DatasetSourceReadTypeEnum.link) {
|
|
const result = await urlsFetch({
|
|
urlList: [sourceId],
|
|
selector
|
|
});
|
|
|
|
return {
|
|
title: result[0]?.title,
|
|
rawText: result[0]?.content || ''
|
|
};
|
|
} else if (type === DatasetSourceReadTypeEnum.externalFile) {
|
|
if (!externalFileId) return Promise.reject('FileId not found');
|
|
const rawText = await readFileRawTextByUrl({
|
|
teamId,
|
|
tmbId,
|
|
url: sourceId,
|
|
relatedId: externalFileId,
|
|
customPdfParse
|
|
});
|
|
return {
|
|
rawText
|
|
};
|
|
} else if (type === DatasetSourceReadTypeEnum.apiFile) {
|
|
const { title, rawText } = await readApiServerFileContent({
|
|
apiServer,
|
|
feishuServer,
|
|
yuqueServer,
|
|
apiFileId: sourceId,
|
|
teamId,
|
|
tmbId
|
|
});
|
|
return {
|
|
title,
|
|
rawText
|
|
};
|
|
}
|
|
return {
|
|
title: '',
|
|
rawText: ''
|
|
};
|
|
};
|
|
|
|
export const readApiServerFileContent = async ({
|
|
apiServer,
|
|
feishuServer,
|
|
yuqueServer,
|
|
apiFileId,
|
|
teamId,
|
|
tmbId,
|
|
customPdfParse
|
|
}: {
|
|
apiServer?: APIFileServer;
|
|
feishuServer?: FeishuServer;
|
|
yuqueServer?: YuqueServer;
|
|
apiFileId: string;
|
|
teamId: string;
|
|
tmbId: string;
|
|
customPdfParse?: boolean;
|
|
}): Promise<{
|
|
title?: string;
|
|
rawText: string;
|
|
}> => {
|
|
if (apiServer) {
|
|
return useApiDatasetRequest({ apiServer }).getFileContent({
|
|
teamId,
|
|
tmbId,
|
|
apiFileId,
|
|
customPdfParse
|
|
});
|
|
}
|
|
|
|
if (feishuServer || yuqueServer) {
|
|
return global.getProApiDatasetFileContent({
|
|
feishuServer,
|
|
yuqueServer,
|
|
apiFileId
|
|
});
|
|
}
|
|
|
|
return Promise.reject('No apiServer or feishuServer or yuqueServer');
|
|
};
|
|
|
|
export const rawText2Chunks = ({
|
|
rawText,
|
|
isQAImport,
|
|
chunkSize = 512,
|
|
...splitProps
|
|
}: {
|
|
rawText: string;
|
|
isQAImport?: boolean;
|
|
} & TextSplitProps) => {
|
|
if (isQAImport) {
|
|
const { chunks } = parseCsvTable2Chunks(rawText);
|
|
return chunks;
|
|
}
|
|
|
|
const { chunks } = splitText2Chunks({
|
|
text: rawText,
|
|
chunkSize,
|
|
...splitProps
|
|
});
|
|
|
|
return chunks.map((item) => ({
|
|
q: item,
|
|
a: ''
|
|
}));
|
|
};
|