mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-18 09:24:03 +00:00
External dataset (#1519)
* perf: local file create collection * rename middleware * perf: remove code * feat: next14 * feat: external file dataset * collection tags field * external file dataset doc * fix: ts
This commit is contained in:
@@ -7,7 +7,7 @@ import { MongoFileSchema } from './schema';
|
||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
||||
import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
|
||||
import { readFileRawContent } from '../read/utils';
|
||||
import { readRawContentByFileBuffer } from '../read/utils';
|
||||
import { PassThrough } from 'stream';
|
||||
|
||||
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
|
||||
@@ -196,7 +196,7 @@ export const readFileContentFromMongo = async ({
|
||||
});
|
||||
})();
|
||||
|
||||
const { rawText } = await readFileRawContent({
|
||||
const { rawText } = await readRawContentByFileBuffer({
|
||||
extension,
|
||||
isQAImport,
|
||||
teamId,
|
||||
|
@@ -1,11 +1,12 @@
|
||||
import { markdownProcess, simpleMarkdownText } from '@fastgpt/global/common/string/markdown';
|
||||
import { markdownProcess } from '@fastgpt/global/common/string/markdown';
|
||||
import { uploadMongoImg } from '../image/controller';
|
||||
import { MongoImageTypeEnum } from '@fastgpt/global/common/file/image/constants';
|
||||
import { addHours } from 'date-fns';
|
||||
|
||||
import { WorkerNameEnum, runWorker } from '../../../worker/utils';
|
||||
import fs from 'fs';
|
||||
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
|
||||
import { ReadFileResponse } from '../../../worker/file/type';
|
||||
import { rawTextBackupPrefix } from '@fastgpt/global/core/dataset/read';
|
||||
|
||||
export const initMarkdownText = ({
|
||||
teamId,
|
||||
@@ -28,7 +29,34 @@ export const initMarkdownText = ({
|
||||
})
|
||||
});
|
||||
|
||||
export const readFileRawContent = async ({
|
||||
export type readRawTextByLocalFileParams = {
|
||||
teamId: string;
|
||||
path: string;
|
||||
metadata?: Record<string, any>;
|
||||
};
|
||||
export const readRawTextByLocalFile = async (params: readRawTextByLocalFileParams) => {
|
||||
const { path } = params;
|
||||
|
||||
const extension = path?.split('.')?.pop()?.toLowerCase() || '';
|
||||
|
||||
const buffer = fs.readFileSync(path);
|
||||
const encoding = detectFileEncoding(buffer);
|
||||
|
||||
const { rawText } = await readRawContentByFileBuffer({
|
||||
extension,
|
||||
isQAImport: false,
|
||||
teamId: params.teamId,
|
||||
encoding,
|
||||
buffer,
|
||||
metadata: params.metadata
|
||||
});
|
||||
|
||||
return {
|
||||
rawText
|
||||
};
|
||||
};
|
||||
|
||||
export const readRawContentByFileBuffer = async ({
|
||||
extension,
|
||||
isQAImport,
|
||||
teamId,
|
||||
@@ -69,9 +97,3 @@ export const readFileRawContent = async ({
|
||||
|
||||
return { rawText };
|
||||
};
|
||||
|
||||
export const htmlToMarkdown = async (html?: string | null) => {
|
||||
const md = await runWorker<string>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
|
||||
|
||||
return simpleMarkdownText(md);
|
||||
};
|
||||
|
38
packages/service/common/middle/entry.ts
Normal file
38
packages/service/common/middle/entry.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import { jsonRes } from '../response';
|
||||
import type { NextApiResponse } from 'next';
|
||||
import { withNextCors } from './cors';
|
||||
import { ApiRequestProps } from '../../type/next';
|
||||
|
||||
export type NextApiHandler<T = any> = (
|
||||
req: ApiRequestProps,
|
||||
res: NextApiResponse<T>
|
||||
) => unknown | Promise<unknown>;
|
||||
|
||||
export const NextEntry = ({ beforeCallback = [] }: { beforeCallback?: Promise<any>[] }) => {
|
||||
return (...args: NextApiHandler[]): NextApiHandler => {
|
||||
return async function api(req: ApiRequestProps, res: NextApiResponse) {
|
||||
try {
|
||||
await Promise.all([withNextCors(req, res), ...beforeCallback]);
|
||||
|
||||
let response = null;
|
||||
for (const handler of args) {
|
||||
response = await handler(req, res);
|
||||
}
|
||||
|
||||
const contentType = res.getHeader('Content-Type');
|
||||
if ((!contentType || contentType === 'application/json') && !res.writableFinished) {
|
||||
return jsonRes(res, {
|
||||
code: 200,
|
||||
data: response
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
return jsonRes(res, {
|
||||
code: 500,
|
||||
error,
|
||||
url: req.url
|
||||
});
|
||||
}
|
||||
};
|
||||
};
|
||||
};
|
@@ -1,7 +1,7 @@
|
||||
import { UrlFetchParams, UrlFetchResponse } from '@fastgpt/global/common/file/api';
|
||||
import * as cheerio from 'cheerio';
|
||||
import axios from 'axios';
|
||||
import { htmlToMarkdown } from '../file/read/utils';
|
||||
import { htmlToMarkdown } from './utils';
|
||||
|
||||
export const cheerioToHtml = ({
|
||||
fetchUrl,
|
||||
|
8
packages/service/common/string/utils.ts
Normal file
8
packages/service/common/string/utils.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import { simpleMarkdownText } from '@fastgpt/global/common/string/markdown';
|
||||
import { WorkerNameEnum, runWorker } from '../../worker/utils';
|
||||
|
||||
export const htmlToMarkdown = async (html?: string | null) => {
|
||||
const md = await runWorker<string>(WorkerNameEnum.htmlStr2Md, { html: html || '' });
|
||||
|
||||
return simpleMarkdownText(md);
|
||||
};
|
Reference in New Issue
Block a user