mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-19 18:14:38 +00:00

* feat: concat usage code (#5657) * feat: dataset parse queue (#5661) * feat: chat usage concat (#5669) * perf: search test usage * feat: chat usage concat * fix: ts * fix: ts * feat: chat node response store (#5675) * feat: chat node response store * limit export * test * add ai generate node (#5506) * add node copilot * apply code * update dynamic input & output * add code test * usage * dynamic input border render * optimize input & output * optimize code * update style * change card to popover * prompt editor basic * prompt editor * handle key down * update prompt * merge * fix * fix * fix * perf: workflow performance (#5677) * feat: chat node response store * limit export * perf: workflow performance * remove log * fix: app template get duplicate (#5682) * fix: dynamic input lock & code param (#5680) * fix: dynamic input lock & code param * fix * fix * feat: multi node data sync & system tool hot-swapping (#5575) * Enhance file upload functionality and system tool integration (#5257) * Enhance file upload functionality and system tool integration * Add supplementary documents and optimize the upload interface * Refactor file plugin types and update upload configurations * Refactor MinIO configuration variables and clean up API plugin handlers for improved readability and consistency * File name change * Refactor SystemTools component layout * fix i18n * fix * fix * fix * optimize app logs sort (#5310) * log keys config modal * multiple select * api * fontsize * code * chatid * fix build * fix * fix component * change name * log keys config * fix * delete unused * fix * chore: minio service class rewrite * chore: s3 plugin upload * feat: system global cache with multi node sync feature * feat: cache * chore: move images * docs: update & remove useless code * chore: resolve merge conflicts * chore: adjust the code * chore: adjust * deps: upgrade @fastgpt-sdk/plugin to 0.1.17 * perf(s3): s3 config * fix: cache syncKey refresh * fix: update @fastgpt-sdk/plugin to v0.1.18 removing mongo definition for fixing vitest * chore: adjust --------- Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com> Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Archer <545436317@qq.com> * perf: s3 api code * fix: toolbox empty when second open modal * feat: http tool set (#5599) * feat: http toolSet manual create front end * feat: http toolSet manual create i18n * feat: http toolSet manual create back end * feat: auth, as tool param, adapt mcp * fix: delete unused httpPlugin * fix: delete FlowNodeTypeEnum.httpPlugin * fix: AppTypeEnum include httpToolSet and httpPlugin * fix * delete console * fix * output schema * fix * fix bg * fix base url * fix --------- Co-authored-by: heheer <zhiyu44@qq.com> * feat: app count * perf: type check * feat: catch error * perf: plugin hot-swapping (#5688) * perf: plugin hot-swapping * chore: adjust code * perf: cite data auth * fix http toolset (#5689) * temp * fix http tool set * fix * template author hide * dynamic IO ui * fix: auth test * fix dynamic input & output (#5690) Co-authored-by: Archer <545436317@qq.com> * fix: dynamic output id * doc * feat: model permission (#5666) * feat(permission): model permission definition & api * chore: support update model's collaborators * feat: remove unauthedmodel when paste and import * fix: type error * fix: test setup global model list * fix: http tool api * chore: update fastgpt-sdk version * chore: remove useless code * chore: myModelList cache * perf: user who is not manager can not configure model permission (FE) * perf: model => Set * feat: getMyModels moved to opensource code; cache the myModelList * fix: type error * fix dynamic input reference select type (#5694) * remove unique index * read file usage * perf: connection error * fix: abort token count * fix: debug usage concat * fix: immer clone object * fix: immer clone object * perf: throw error when error chat * update audit i18n * fix: 修复识别pptx文件后,返回内容顺序错乱问题 (#5696) * fix: pptx sort error * fix prompt editor (#5695) * fix prompt editor * fix * fix: redis cache prefix (#5697) * fix: redis cache prefix * fix: cache * fix: get model collaborator by model.model * feat: hint for model per * rename bucket name * model ui * doc * doc --------- Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com> Co-authored-by: Zeng Qingwen <143274079+fishwww-ww@users.noreply.github.com> Co-authored-by: heheer <zhiyu44@qq.com> Co-authored-by: Deepturn <33342819+Deepturn@users.noreply.github.com>
267 lines
6.6 KiB
TypeScript
267 lines
6.6 KiB
TypeScript
import { Types, connectionMongo, ReadPreference } from '../../mongo';
|
|
import type { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
|
import fsp from 'fs/promises';
|
|
import fs from 'fs';
|
|
import { type DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
|
|
import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
|
|
import { detectFileEncoding, detectFileEncodingByPath } from '@fastgpt/global/common/file/tools';
|
|
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
|
|
import { readRawContentByFileBuffer } from '../read/utils';
|
|
import { computeGridFsChunSize, gridFsStream2Buffer, stream2Encoding } from './utils';
|
|
import { addLog } from '../../system/log';
|
|
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
|
|
import { Readable } from 'stream';
|
|
import { addRawTextBuffer, getRawTextBuffer } from '../../buffer/rawText/controller';
|
|
import { addMinutes } from 'date-fns';
|
|
import { retryFn } from '@fastgpt/global/common/system/utils';
|
|
|
|
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
|
|
MongoDatasetFileSchema;
|
|
MongoChatFileSchema;
|
|
|
|
return connectionMongo.connection.db!.collection(`${bucket}.files`);
|
|
}
|
|
export function getGridBucket(bucket: `${BucketNameEnum}`) {
|
|
return new connectionMongo.mongo.GridFSBucket(connectionMongo.connection.db!, {
|
|
bucketName: bucket,
|
|
// @ts-ignore
|
|
readPreference: ReadPreference.SECONDARY_PREFERRED // Read from secondary node
|
|
});
|
|
}
|
|
|
|
/* crud file */
|
|
export async function uploadFile({
|
|
bucketName,
|
|
teamId,
|
|
uid,
|
|
path,
|
|
filename,
|
|
contentType,
|
|
metadata = {}
|
|
}: {
|
|
bucketName: `${BucketNameEnum}`;
|
|
teamId: string;
|
|
uid: string; // tmbId / outLinkUId
|
|
path: string;
|
|
filename: string;
|
|
contentType?: string;
|
|
metadata?: Record<string, any>;
|
|
}) {
|
|
if (!path) return Promise.reject(`filePath is empty`);
|
|
if (!filename) return Promise.reject(`filename is empty`);
|
|
|
|
const stats = await fsp.stat(path);
|
|
if (!stats.isFile()) return Promise.reject(`${path} is not a file`);
|
|
|
|
const readStream = fs.createReadStream(path, {
|
|
highWaterMark: 256 * 1024
|
|
});
|
|
|
|
// Add default metadata
|
|
metadata.teamId = teamId;
|
|
metadata.uid = uid;
|
|
metadata.encoding = await detectFileEncodingByPath(path);
|
|
|
|
// create a gridfs bucket
|
|
const bucket = getGridBucket(bucketName);
|
|
|
|
const chunkSizeBytes = computeGridFsChunSize(stats.size);
|
|
|
|
const stream = bucket.openUploadStream(filename, {
|
|
metadata,
|
|
contentType,
|
|
chunkSizeBytes
|
|
});
|
|
|
|
// save to gridfs
|
|
await new Promise((resolve, reject) => {
|
|
readStream
|
|
.pipe(stream as any)
|
|
.on('finish', resolve)
|
|
.on('error', reject);
|
|
}).finally(() => {
|
|
readStream.destroy();
|
|
});
|
|
|
|
return String(stream.id);
|
|
}
|
|
export async function uploadFileFromBase64Img({
|
|
bucketName,
|
|
teamId,
|
|
tmbId,
|
|
base64,
|
|
filename,
|
|
metadata = {}
|
|
}: {
|
|
bucketName: `${BucketNameEnum}`;
|
|
teamId: string;
|
|
tmbId: string;
|
|
base64: string;
|
|
filename: string;
|
|
metadata?: Record<string, any>;
|
|
}) {
|
|
if (!base64) return Promise.reject(`filePath is empty`);
|
|
if (!filename) return Promise.reject(`filename is empty`);
|
|
|
|
const base64Data = base64.split(',')[1];
|
|
const contentType = base64.split(',')?.[0]?.split?.(':')?.[1];
|
|
const buffer = Buffer.from(base64Data, 'base64');
|
|
const readableStream = new Readable({
|
|
read() {
|
|
this.push(buffer);
|
|
this.push(null);
|
|
}
|
|
});
|
|
|
|
const { stream: readStream, encoding } = await stream2Encoding(readableStream);
|
|
|
|
// Add default metadata
|
|
metadata.teamId = teamId;
|
|
metadata.tmbId = tmbId;
|
|
metadata.encoding = encoding;
|
|
|
|
// create a gridfs bucket
|
|
const bucket = getGridBucket(bucketName);
|
|
|
|
const stream = bucket.openUploadStream(filename, {
|
|
metadata,
|
|
contentType
|
|
});
|
|
|
|
// save to gridfs
|
|
await new Promise((resolve, reject) => {
|
|
readStream
|
|
.pipe(stream as any)
|
|
.on('finish', resolve)
|
|
.on('error', reject);
|
|
});
|
|
|
|
return String(stream.id);
|
|
}
|
|
|
|
export async function getFileById({
|
|
bucketName,
|
|
fileId
|
|
}: {
|
|
bucketName: `${BucketNameEnum}`;
|
|
fileId: string;
|
|
}) {
|
|
const db = getGFSCollection(bucketName);
|
|
const file = await db.findOne<DatasetFileSchema>({
|
|
_id: new Types.ObjectId(fileId)
|
|
});
|
|
|
|
return file || undefined;
|
|
}
|
|
|
|
export async function delFileByFileIdList({
|
|
bucketName,
|
|
fileIdList
|
|
}: {
|
|
bucketName: `${BucketNameEnum}`;
|
|
fileIdList: string[];
|
|
}): Promise<any> {
|
|
return retryFn(async () => {
|
|
const bucket = getGridBucket(bucketName);
|
|
|
|
for await (const fileId of fileIdList) {
|
|
try {
|
|
await bucket.delete(new Types.ObjectId(String(fileId)));
|
|
} catch (error: any) {
|
|
if (typeof error?.message === 'string' && error.message.includes('File not found')) {
|
|
addLog.warn('File not found', { fileId });
|
|
return;
|
|
}
|
|
return Promise.reject(error);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
export async function getDownloadStream({
|
|
bucketName,
|
|
fileId
|
|
}: {
|
|
bucketName: `${BucketNameEnum}`;
|
|
fileId: string;
|
|
}) {
|
|
const bucket = getGridBucket(bucketName);
|
|
|
|
return bucket.openDownloadStream(new Types.ObjectId(fileId));
|
|
}
|
|
|
|
export const readFileContentFromMongo = async ({
|
|
teamId,
|
|
tmbId,
|
|
bucketName,
|
|
fileId,
|
|
customPdfParse = false,
|
|
getFormatText,
|
|
usageId
|
|
}: {
|
|
teamId: string;
|
|
tmbId: string;
|
|
bucketName: `${BucketNameEnum}`;
|
|
fileId: string;
|
|
customPdfParse?: boolean;
|
|
getFormatText?: boolean; // 数据类型都尽可能转化成 markdown 格式
|
|
usageId?: string;
|
|
}): Promise<{
|
|
rawText: string;
|
|
filename: string;
|
|
}> => {
|
|
const bufferId = `${String(fileId)}-${customPdfParse}`;
|
|
// read buffer
|
|
const fileBuffer = await getRawTextBuffer(bufferId);
|
|
if (fileBuffer) {
|
|
return {
|
|
rawText: fileBuffer.text,
|
|
filename: fileBuffer?.sourceName
|
|
};
|
|
}
|
|
|
|
const [file, fileStream] = await Promise.all([
|
|
getFileById({ bucketName, fileId }),
|
|
getDownloadStream({ bucketName, fileId })
|
|
]);
|
|
if (!file) {
|
|
return Promise.reject(CommonErrEnum.fileNotFound);
|
|
}
|
|
|
|
const extension = parseFileExtensionFromUrl(file?.filename);
|
|
|
|
const start = Date.now();
|
|
const fileBuffers = await gridFsStream2Buffer(fileStream);
|
|
addLog.debug('get file buffer', { time: Date.now() - start });
|
|
|
|
const encoding = file?.metadata?.encoding || detectFileEncoding(fileBuffers);
|
|
|
|
// Get raw text
|
|
const { rawText } = await readRawContentByFileBuffer({
|
|
customPdfParse,
|
|
usageId,
|
|
getFormatText,
|
|
extension,
|
|
teamId,
|
|
tmbId,
|
|
buffer: fileBuffers,
|
|
encoding,
|
|
metadata: {
|
|
relatedId: fileId
|
|
}
|
|
});
|
|
|
|
// Add buffer
|
|
addRawTextBuffer({
|
|
sourceId: bufferId,
|
|
sourceName: file.filename,
|
|
text: rawText,
|
|
expiredTime: addMinutes(new Date(), 20)
|
|
});
|
|
|
|
return {
|
|
rawText,
|
|
filename: file.filename
|
|
};
|
|
};
|