File input (#2270)

* doc

* feat: file upload  config

* perf: chat box file params

* feat: markdown show file

* feat: chat file store and clear

* perf: read file contentType

* feat: llm vision config

* feat: file url output

* perf: plugin error text

* perf: image load

* feat: ai chat document

* perf: file block ui

* feat: read file node

* feat: file read response field

* feat: simple mode support read files

* feat: tool call

* feat: read file histories

* perf: select file

* perf: select file config

* i18n

* i18n

* fix: ts; feat: tool response preview result
This commit is contained in:
Archer
2024-08-06 10:00:22 +08:00
committed by GitHub
parent 10dcdb5491
commit e36d9d794f
121 changed files with 2600 additions and 1142 deletions

View File

@@ -1,5 +1,5 @@
import { connectionMongo, getMongoModel, type Model } from '../../mongo';
const { Schema, model, models } = connectionMongo;
import { connectionMongo, getMongoModel } from '../../mongo';
const { Schema } = connectionMongo;
import { RawTextBufferSchemaType } from './type';
export const collectionName = 'buffer_rawtexts';

View File

@@ -3,16 +3,19 @@ import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import fsp from 'fs/promises';
import fs from 'fs';
import { DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { MongoFileSchema } from './schema';
import { MongoChatFileSchema, MongoDatasetFileSchema } from './schema';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
import { readRawContentByFileBuffer } from '../read/utils';
import { gridFsStream2Buffer, stream2Encoding } from './utils';
import { addLog } from '../../system/log';
import { readFromSecondary } from '../../mongo/utils';
export function getGFSCollection(bucket: `${BucketNameEnum}`) {
MongoFileSchema;
MongoDatasetFileSchema;
MongoChatFileSchema;
return connectionMongo.connection.db.collection(`${bucket}.files`);
}
export function getGridBucket(bucket: `${BucketNameEnum}`) {
@@ -49,6 +52,7 @@ export async function uploadFile({
const { stream: readStream, encoding } = await stream2Encoding(fs.createReadStream(path));
// Add default metadata
metadata.teamId = teamId;
metadata.tmbId = tmbId;
metadata.encoding = encoding;
@@ -103,7 +107,9 @@ export async function delFileByFileIdList({
try {
const bucket = getGridBucket(bucketName);
await Promise.all(fileIdList.map((id) => bucket.delete(new Types.ObjectId(id))));
for await (const fileId of fileIdList) {
await bucket.delete(new Types.ObjectId(fileId));
}
} catch (error) {
if (retry > 0) {
return delFileByFileIdList({ bucketName, fileIdList, retry: retry - 1 });
@@ -138,7 +144,9 @@ export const readFileContentFromMongo = async ({
filename: string;
}> => {
// read buffer
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: fileId }).lean();
const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: fileId }, undefined, {
...readFromSecondary
}).lean();
if (fileBuffer) {
return {
rawText: fileBuffer.rawText,

View File

@@ -1,13 +1,17 @@
import { connectionMongo, getMongoModel, type Model } from '../../mongo';
const { Schema, model, models } = connectionMongo;
const { Schema } = connectionMongo;
const FileSchema = new Schema({});
const DatasetFileSchema = new Schema({});
const ChatFileSchema = new Schema({});
try {
FileSchema.index({ 'metadata.teamId': 1 });
FileSchema.index({ 'metadata.uploadDate': -1 });
DatasetFileSchema.index({ uploadDate: -1 });
ChatFileSchema.index({ uploadDate: -1 });
ChatFileSchema.index({ 'metadata.chatId': 1 });
} catch (error) {
console.log(error);
}
export const MongoFileSchema = getMongoModel('dataset.files', FileSchema);
export const MongoDatasetFileSchema = getMongoModel('dataset.files', DatasetFileSchema);
export const MongoChatFileSchema = getMongoModel('chat.files', ChatFileSchema);

View File

@@ -8,28 +8,6 @@ import fs from 'fs';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import type { ReadFileResponse } from '../../../worker/readFile/type';
// match md img text and upload to db
export const matchMdImgTextAndUpload = ({
teamId,
md,
metadata
}: {
md: string;
teamId: string;
metadata?: Record<string, any>;
}) =>
markdownProcess({
rawText: md,
uploadImgController: (base64Img) =>
uploadMongoImg({
type: MongoImageTypeEnum.collectionImage,
base64Img,
teamId,
metadata,
expiredTime: addHours(new Date(), 2)
})
});
export type readRawTextByLocalFileParams = {
teamId: string;
path: string;
@@ -72,6 +50,28 @@ export const readRawContentByFileBuffer = async ({
encoding: string;
metadata?: Record<string, any>;
}) => {
// Upload image in markdown
const matchMdImgTextAndUpload = ({
teamId,
md,
metadata
}: {
md: string;
teamId: string;
metadata?: Record<string, any>;
}) =>
markdownProcess({
rawText: md,
uploadImgController: (base64Img) =>
uploadMongoImg({
type: MongoImageTypeEnum.collectionImage,
base64Img,
teamId,
metadata,
expiredTime: addHours(new Date(), 1)
})
});
let { rawText, formatText } = await runWorker<ReadFileResponse>(WorkerNameEnum.readFile, {
extension,
encoding,

View File

@@ -18,7 +18,17 @@ export const guessBase64ImageType = (str: string) => {
i: 'image/png',
R: 'image/gif',
U: 'image/webp',
Q: 'image/bmp'
Q: 'image/bmp',
P: 'image/svg+xml',
T: 'image/tiff',
J: 'image/jp2',
S: 'image/x-tga',
I: 'image/ief',
V: 'image/vnd.microsoft.icon',
W: 'image/vnd.wap.wbmp',
X: 'image/x-xbitmap',
Z: 'image/x-xpixmap',
Y: 'image/x-xwindowdump'
};
const defaultType = 'image/jpeg';
@@ -30,6 +40,11 @@ export const guessBase64ImageType = (str: string) => {
return imageTypeMap[firstChar] || defaultType;
};
export const getFileContentTypeFromHeader = (header: string): string | undefined => {
const contentType = header.split(';')[0];
return contentType;
};
export const clearDirFiles = (dirPath: string) => {
if (!fs.existsSync(dirPath)) {
return;