mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-27 00:17:31 +00:00
4.6.7 first pr (#726)
This commit is contained in:
6
packages/service/common/file/constants.ts
Normal file
6
packages/service/common/file/constants.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
import path from 'path';
|
||||
|
||||
export const tmpFileDirPath =
|
||||
process.env.NODE_ENV === 'production' ? '/app/tmp' : path.join(process.cwd(), 'tmp');
|
||||
|
||||
export const previewMaxCharCount = 3000;
|
@@ -1 +0,0 @@
|
||||
export const imageBaseUrl = '/api/system/img/';
|
@@ -1,5 +1,5 @@
|
||||
import { UploadImgProps } from '@fastgpt/global/common/file/api';
|
||||
import { imageBaseUrl } from './constant';
|
||||
import { imageBaseUrl } from '@fastgpt/global/common/file/image/constants';
|
||||
import { MongoImage } from './schema';
|
||||
|
||||
export function getMongoImgUrl(id: string) {
|
||||
@@ -8,10 +8,13 @@ export function getMongoImgUrl(id: string) {
|
||||
|
||||
export const maxImgSize = 1024 * 1024 * 12;
|
||||
export async function uploadMongoImg({
|
||||
type,
|
||||
base64Img,
|
||||
teamId,
|
||||
expiredTime,
|
||||
metadata
|
||||
metadata,
|
||||
|
||||
shareId
|
||||
}: UploadImgProps & {
|
||||
teamId: string;
|
||||
}) {
|
||||
@@ -20,12 +23,16 @@ export async function uploadMongoImg({
|
||||
}
|
||||
|
||||
const base64Data = base64Img.split(',')[1];
|
||||
const binary = Buffer.from(base64Data, 'base64');
|
||||
|
||||
const { _id } = await MongoImage.create({
|
||||
type,
|
||||
teamId,
|
||||
binary: Buffer.from(base64Data, 'base64'),
|
||||
binary,
|
||||
expiredTime: expiredTime,
|
||||
metadata
|
||||
metadata,
|
||||
|
||||
shareId
|
||||
});
|
||||
|
||||
return getMongoImgUrl(String(_id));
|
||||
|
@@ -1,5 +1,7 @@
|
||||
import { TeamCollectionName } from '@fastgpt/global/support/user/team/constant';
|
||||
import { connectionMongo, type Model } from '../../mongo';
|
||||
import { MongoImageSchemaType } from '@fastgpt/global/common/file/image/type.d';
|
||||
import { mongoImageTypeMap } from '@fastgpt/global/common/file/image/constants';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
|
||||
const ImageSchema = new Schema({
|
||||
@@ -12,12 +14,18 @@ const ImageSchema = new Schema({
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
binary: {
|
||||
type: Buffer
|
||||
},
|
||||
expiredTime: {
|
||||
type: Date
|
||||
},
|
||||
binary: {
|
||||
type: Buffer
|
||||
},
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(mongoImageTypeMap),
|
||||
required: true
|
||||
},
|
||||
|
||||
metadata: {
|
||||
type: Object
|
||||
}
|
||||
@@ -25,14 +33,13 @@ const ImageSchema = new Schema({
|
||||
|
||||
try {
|
||||
ImageSchema.index({ expiredTime: 1 }, { expireAfterSeconds: 60 });
|
||||
ImageSchema.index({ type: 1 });
|
||||
ImageSchema.index({ teamId: 1 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
export const MongoImage: Model<{
|
||||
teamId: string;
|
||||
binary: Buffer;
|
||||
metadata?: { fileId?: string };
|
||||
}> = models['image'] || model('image', ImageSchema);
|
||||
export const MongoImage: Model<MongoImageSchemaType> =
|
||||
models['image'] || model('image', ImageSchema);
|
||||
|
||||
MongoImage.syncIndexes();
|
||||
|
68
packages/service/common/file/load/pdf.ts
Normal file
68
packages/service/common/file/load/pdf.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import * as pdfjs from 'pdfjs-dist/legacy/build/pdf.mjs';
|
||||
// @ts-ignore
|
||||
import('pdfjs-dist/legacy/build/pdf.worker.min.mjs');
|
||||
import { ReadFileParams } from './type';
|
||||
|
||||
type TokenType = {
|
||||
str: string;
|
||||
dir: string;
|
||||
width: number;
|
||||
height: number;
|
||||
transform: number[];
|
||||
fontName: string;
|
||||
hasEOL: boolean;
|
||||
};
|
||||
|
||||
export const readPdfFile = async ({ path }: ReadFileParams) => {
|
||||
const readPDFPage = async (doc: any, pageNo: number) => {
|
||||
const page = await doc.getPage(pageNo);
|
||||
const tokenizedText = await page.getTextContent();
|
||||
|
||||
const viewport = page.getViewport({ scale: 1 });
|
||||
const pageHeight = viewport.height;
|
||||
const headerThreshold = pageHeight * 0.95;
|
||||
const footerThreshold = pageHeight * 0.05;
|
||||
|
||||
const pageTexts: TokenType[] = tokenizedText.items.filter((token: TokenType) => {
|
||||
return (
|
||||
!token.transform ||
|
||||
(token.transform[5] < headerThreshold && token.transform[5] > footerThreshold)
|
||||
);
|
||||
});
|
||||
|
||||
// concat empty string 'hasEOL'
|
||||
for (let i = 0; i < pageTexts.length; i++) {
|
||||
const item = pageTexts[i];
|
||||
if (item.str === '' && pageTexts[i - 1]) {
|
||||
pageTexts[i - 1].hasEOL = item.hasEOL;
|
||||
pageTexts.splice(i, 1);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
page.cleanup();
|
||||
|
||||
return pageTexts
|
||||
.map((token) => {
|
||||
const paragraphEnd = token.hasEOL && /([。?!.?!\n\r]|(\r\n))$/.test(token.str);
|
||||
|
||||
return paragraphEnd ? `${token.str}\n` : token.str;
|
||||
})
|
||||
.join('');
|
||||
};
|
||||
|
||||
const loadingTask = pdfjs.getDocument(path);
|
||||
const doc = await loadingTask.promise;
|
||||
|
||||
const pageTextPromises = [];
|
||||
for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
|
||||
pageTextPromises.push(readPDFPage(doc, pageNo));
|
||||
}
|
||||
const pageTexts = await Promise.all(pageTextPromises);
|
||||
|
||||
loadingTask.destroy();
|
||||
|
||||
return {
|
||||
rawText: pageTexts.join('')
|
||||
};
|
||||
};
|
18
packages/service/common/file/load/type.d.ts
vendored
Normal file
18
packages/service/common/file/load/type.d.ts
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
export type ReadFileParams = {
|
||||
preview: boolean;
|
||||
teamId: string;
|
||||
path: string;
|
||||
metadata?: Record<string, any>;
|
||||
};
|
||||
|
||||
export type ReadFileResponse = {
|
||||
rawText: string;
|
||||
};
|
||||
|
||||
export type ReadFileBufferItemType = ReadFileParams & {
|
||||
rawText: string;
|
||||
};
|
||||
|
||||
declare global {
|
||||
var readFileBuffers: ReadFileBufferItemType[];
|
||||
}
|
50
packages/service/common/file/load/utils.ts
Normal file
50
packages/service/common/file/load/utils.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import { readPdfFile } from './pdf';
|
||||
import { readDocFle } from './word';
|
||||
import { ReadFileBufferItemType, ReadFileParams } from './type';
|
||||
|
||||
global.readFileBuffers = global.readFileBuffers || [];
|
||||
|
||||
const bufferMaxSize = 200;
|
||||
|
||||
export const pushFileReadBuffer = (params: ReadFileBufferItemType) => {
|
||||
global.readFileBuffers.push(params);
|
||||
|
||||
if (global.readFileBuffers.length > bufferMaxSize) {
|
||||
global.readFileBuffers.shift();
|
||||
}
|
||||
};
|
||||
export const getReadFileBuffer = ({ path, teamId }: ReadFileParams) =>
|
||||
global.readFileBuffers.find((item) => item.path === path && item.teamId === teamId);
|
||||
|
||||
export const readFileContent = async (params: ReadFileParams) => {
|
||||
const { path } = params;
|
||||
|
||||
const buffer = getReadFileBuffer(params);
|
||||
|
||||
if (buffer) {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
const extension = path?.split('.')?.pop()?.toLowerCase() || '';
|
||||
|
||||
const { rawText } = await (async () => {
|
||||
switch (extension) {
|
||||
case 'pdf':
|
||||
return readPdfFile(params);
|
||||
case 'docx':
|
||||
return readDocFle(params);
|
||||
default:
|
||||
return Promise.reject('Only support .pdf, .docx');
|
||||
}
|
||||
})();
|
||||
|
||||
pushFileReadBuffer({
|
||||
...params,
|
||||
rawText
|
||||
});
|
||||
|
||||
return {
|
||||
...params,
|
||||
rawText
|
||||
};
|
||||
};
|
22
packages/service/common/file/load/word.ts
Normal file
22
packages/service/common/file/load/word.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
import mammoth from 'mammoth';
|
||||
import { htmlToMarkdown } from '../../string/markdown';
|
||||
import { ReadFileParams } from './type';
|
||||
/**
|
||||
* read docx to markdown
|
||||
*/
|
||||
export const readDocFle = async ({ path, metadata = {} }: ReadFileParams) => {
|
||||
try {
|
||||
const { value: html } = await mammoth.convertToHtml({
|
||||
path
|
||||
});
|
||||
|
||||
const md = await htmlToMarkdown(html);
|
||||
|
||||
return {
|
||||
rawText: md
|
||||
};
|
||||
} catch (error) {
|
||||
console.log('error doc read:', error);
|
||||
return Promise.reject('Can not read doc file, please convert to PDF');
|
||||
}
|
||||
};
|
@@ -1,11 +1,9 @@
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { customAlphabet } from 'nanoid';
|
||||
import multer from 'multer';
|
||||
import path from 'path';
|
||||
import { BucketNameEnum, bucketNameMap } from '@fastgpt/global/common/file/constants';
|
||||
import fs from 'fs';
|
||||
|
||||
const nanoid = customAlphabet('1234567890abcdef', 12);
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import { tmpFileDirPath } from './constants';
|
||||
|
||||
type FileType = {
|
||||
fieldname: string;
|
||||
@@ -17,7 +15,9 @@ type FileType = {
|
||||
size: number;
|
||||
};
|
||||
|
||||
export function getUploadModel({ maxSize = 500 }: { maxSize?: number }) {
|
||||
const expiredTime = 30 * 60 * 1000;
|
||||
|
||||
export const getUploadModel = ({ maxSize = 500 }: { maxSize?: number }) => {
|
||||
maxSize *= 1024 * 1024;
|
||||
class UploadModel {
|
||||
uploader = multer({
|
||||
@@ -26,9 +26,12 @@ export function getUploadModel({ maxSize = 500 }: { maxSize?: number }) {
|
||||
},
|
||||
preservePath: true,
|
||||
storage: multer.diskStorage({
|
||||
filename: (_req, file, cb) => {
|
||||
// destination: (_req, _file, cb) => {
|
||||
// cb(null, tmpFileDirPath);
|
||||
// },
|
||||
filename: async (req, file, cb) => {
|
||||
const { ext } = path.parse(decodeURIComponent(file.originalname));
|
||||
cb(null, nanoid() + ext);
|
||||
cb(null, `${Date.now() + expiredTime}-${getNanoid(32)}${ext}`);
|
||||
}
|
||||
})
|
||||
}).any();
|
||||
@@ -75,14 +78,4 @@ export function getUploadModel({ maxSize = 500 }: { maxSize?: number }) {
|
||||
}
|
||||
|
||||
return new UploadModel();
|
||||
}
|
||||
|
||||
export const removeFilesByPaths = (paths: string[]) => {
|
||||
paths.forEach((path) => {
|
||||
fs.unlink(path, (err) => {
|
||||
if (err) {
|
||||
console.error(err);
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
33
packages/service/common/file/utils.ts
Normal file
33
packages/service/common/file/utils.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import fs from 'fs';
|
||||
import { tmpFileDirPath } from './constants';
|
||||
|
||||
export const removeFilesByPaths = (paths: string[]) => {
|
||||
paths.forEach((path) => {
|
||||
fs.unlink(path, (err) => {
|
||||
if (err) {
|
||||
console.error(err);
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
/* cron job. check expired tmp files */
|
||||
export const checkExpiredTmpFiles = () => {
|
||||
// get all file name
|
||||
const files = fs.readdirSync(tmpFileDirPath).map((name) => {
|
||||
const timestampStr = name.split('-')[0];
|
||||
const expiredTimestamp = timestampStr ? Number(timestampStr) : 0;
|
||||
|
||||
return {
|
||||
filename: name,
|
||||
expiredTimestamp,
|
||||
path: `${tmpFileDirPath}/${name}`
|
||||
};
|
||||
});
|
||||
|
||||
// count expiredFiles
|
||||
const expiredFiles = files.filter((item) => item.expiredTimestamp < Date.now());
|
||||
|
||||
// remove expiredFiles
|
||||
removeFilesByPaths(expiredFiles.map((item) => item.path));
|
||||
};
|
@@ -50,8 +50,11 @@ export const cheerioToHtml = ({
|
||||
.get()
|
||||
.join('\n');
|
||||
|
||||
const title = $('head title').text() || $('h1:first').text() || fetchUrl;
|
||||
|
||||
return {
|
||||
html,
|
||||
title,
|
||||
usedSelector
|
||||
};
|
||||
};
|
||||
@@ -70,7 +73,7 @@ export const urlsFetch = async ({
|
||||
});
|
||||
|
||||
const $ = cheerio.load(fetchRes.data);
|
||||
const { html, usedSelector } = cheerioToHtml({
|
||||
const { title, html, usedSelector } = cheerioToHtml({
|
||||
fetchUrl: url,
|
||||
$,
|
||||
selector
|
||||
@@ -79,6 +82,7 @@ export const urlsFetch = async ({
|
||||
|
||||
return {
|
||||
url,
|
||||
title,
|
||||
content: md,
|
||||
selector: usedSelector
|
||||
};
|
||||
@@ -87,6 +91,7 @@ export const urlsFetch = async ({
|
||||
|
||||
return {
|
||||
url,
|
||||
title: '',
|
||||
content: '',
|
||||
selector: ''
|
||||
};
|
||||
|
@@ -15,7 +15,9 @@ export const htmlToMarkdown = (html?: string | null) =>
|
||||
worker.on('message', (md: string) => {
|
||||
worker.terminate();
|
||||
|
||||
resolve(simpleMarkdownText(md));
|
||||
let rawText = simpleMarkdownText(md);
|
||||
|
||||
resolve(rawText);
|
||||
});
|
||||
worker.on('error', (err) => {
|
||||
worker.terminate();
|
||||
|
6
packages/service/common/system/cron.ts
Normal file
6
packages/service/common/system/cron.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
import nodeCron from 'node-cron';
|
||||
|
||||
export const setCron = (time: string, cb: () => void) => {
|
||||
// second minute hour day month week
|
||||
return nodeCron.schedule(time, cb);
|
||||
};
|
@@ -49,6 +49,7 @@ export const addLog = {
|
||||
},
|
||||
error(msg: string, error?: any) {
|
||||
this.log('error', msg, {
|
||||
message: error?.message,
|
||||
stack: error?.stack,
|
||||
...(error?.config && {
|
||||
config: {
|
||||
|
@@ -2,6 +2,8 @@ export type DeleteDatasetVectorProps = {
|
||||
id?: string;
|
||||
datasetIds?: string[];
|
||||
collectionIds?: string[];
|
||||
|
||||
collectionId?: string;
|
||||
dataIds?: string[];
|
||||
};
|
||||
|
||||
|
@@ -101,14 +101,19 @@ export const deleteDatasetDataVector = async (
|
||||
retry?: number;
|
||||
}
|
||||
): Promise<any> => {
|
||||
const { id, datasetIds, collectionIds, dataIds, retry = 2 } = props;
|
||||
const { id, datasetIds, collectionIds, collectionId, dataIds, retry = 2 } = props;
|
||||
|
||||
const where = await (() => {
|
||||
if (id) return `id=${id}`;
|
||||
if (datasetIds) return `dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})`;
|
||||
if (collectionIds)
|
||||
if (collectionIds) {
|
||||
return `collection_id IN (${collectionIds.map((id) => `'${String(id)}'`).join(',')})`;
|
||||
if (dataIds) return `data_id IN (${dataIds.map((id) => `'${String(id)}'`).join(',')})`;
|
||||
}
|
||||
if (collectionId && dataIds) {
|
||||
return `collection_id='${String(collectionId)}' and data_id IN (${dataIds
|
||||
.map((id) => `'${String(id)}'`)
|
||||
.join(',')})`;
|
||||
}
|
||||
return Promise.reject('deleteDatasetData: no where');
|
||||
})();
|
||||
|
||||
|
@@ -32,7 +32,7 @@ export async function getVectorsByText({
|
||||
return Promise.reject('Embedding API 404');
|
||||
}
|
||||
if (!res?.data?.[0]?.embedding) {
|
||||
console.log(res?.data);
|
||||
console.log(res);
|
||||
// @ts-ignore
|
||||
return Promise.reject(res.data?.err?.message || 'Embedding API Error');
|
||||
}
|
||||
|
@@ -2,8 +2,7 @@ import { connectionMongo, type Model } from '../../common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { ChatItemSchema as ChatItemType } from '@fastgpt/global/core/chat/type';
|
||||
import { ChatRoleMap } from '@fastgpt/global/core/chat/constants';
|
||||
import { customAlphabet } from 'nanoid';
|
||||
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 24);
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
import {
|
||||
TeamCollectionName,
|
||||
TeamMemberCollectionName
|
||||
@@ -13,24 +12,6 @@ import { userCollectionName } from '../../support/user/schema';
|
||||
import { ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';
|
||||
|
||||
const ChatItemSchema = new Schema({
|
||||
dataId: {
|
||||
type: String,
|
||||
require: true,
|
||||
default: () => nanoid()
|
||||
},
|
||||
appId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: appCollectionName,
|
||||
required: true
|
||||
},
|
||||
chatId: {
|
||||
type: String,
|
||||
require: true
|
||||
},
|
||||
userId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: userCollectionName
|
||||
},
|
||||
teamId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamCollectionName,
|
||||
@@ -41,6 +22,24 @@ const ChatItemSchema = new Schema({
|
||||
ref: TeamMemberCollectionName,
|
||||
required: true
|
||||
},
|
||||
userId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: userCollectionName
|
||||
},
|
||||
chatId: {
|
||||
type: String,
|
||||
require: true
|
||||
},
|
||||
dataId: {
|
||||
type: String,
|
||||
require: true,
|
||||
default: () => getNanoid(22)
|
||||
},
|
||||
appId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: appCollectionName,
|
||||
required: true
|
||||
},
|
||||
time: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
@@ -80,10 +79,11 @@ const ChatItemSchema = new Schema({
|
||||
});
|
||||
|
||||
try {
|
||||
ChatItemSchema.index({ dataId: -1 });
|
||||
ChatItemSchema.index({ teamId: 1 });
|
||||
ChatItemSchema.index({ time: -1 });
|
||||
ChatItemSchema.index({ appId: 1 });
|
||||
ChatItemSchema.index({ chatId: 1 });
|
||||
ChatItemSchema.index({ obj: 1 });
|
||||
ChatItemSchema.index({ userGoodFeedback: 1 });
|
||||
ChatItemSchema.index({ userBadFeedback: 1 });
|
||||
ChatItemSchema.index({ customFeedbacks: 1 });
|
||||
|
@@ -1,7 +1,4 @@
|
||||
import {
|
||||
DatasetCollectionTrainingModeEnum,
|
||||
DatasetCollectionTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constant';
|
||||
import { TrainingModeEnum, DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
|
||||
import { MongoDatasetCollection } from './schema';
|
||||
|
||||
@@ -12,11 +9,15 @@ export async function createOneCollection({
|
||||
parentId,
|
||||
datasetId,
|
||||
type,
|
||||
trainingType = DatasetCollectionTrainingModeEnum.manual,
|
||||
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 0,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
fileId,
|
||||
rawLink,
|
||||
qaPrompt,
|
||||
|
||||
hashRawText,
|
||||
rawTextLength,
|
||||
metadata = {},
|
||||
@@ -30,11 +31,15 @@ export async function createOneCollection({
|
||||
datasetId,
|
||||
name,
|
||||
type,
|
||||
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
fileId,
|
||||
rawLink,
|
||||
qaPrompt,
|
||||
|
||||
rawTextLength,
|
||||
hashRawText,
|
||||
metadata
|
||||
@@ -74,7 +79,7 @@ export function createDefaultCollection({
|
||||
datasetId,
|
||||
parentId,
|
||||
type: DatasetCollectionTypeEnum.virtual,
|
||||
trainingType: DatasetCollectionTrainingModeEnum.manual,
|
||||
trainingType: TrainingModeEnum.chunk,
|
||||
chunkSize: 0,
|
||||
updateTime: new Date('2099')
|
||||
});
|
||||
|
@@ -1,10 +1,7 @@
|
||||
import { connectionMongo, type Model } from '../../../common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import {
|
||||
DatasetCollectionTrainingTypeMap,
|
||||
DatasetCollectionTypeMap
|
||||
} from '@fastgpt/global/core/dataset/constant';
|
||||
import { TrainingTypeMap, DatasetCollectionTypeMap } from '@fastgpt/global/core/dataset/constant';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import {
|
||||
TeamCollectionName,
|
||||
@@ -56,15 +53,23 @@ const DatasetCollectionSchema = new Schema({
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
|
||||
trainingType: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetCollectionTrainingTypeMap),
|
||||
enum: Object.keys(TrainingTypeMap),
|
||||
required: true
|
||||
},
|
||||
chunkSize: {
|
||||
type: Number,
|
||||
required: true
|
||||
},
|
||||
chunkSplitter: {
|
||||
type: String
|
||||
},
|
||||
qaPrompt: {
|
||||
type: String
|
||||
},
|
||||
|
||||
fileId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: 'dataset.files'
|
||||
@@ -72,9 +77,6 @@ const DatasetCollectionSchema = new Schema({
|
||||
rawLink: {
|
||||
type: String
|
||||
},
|
||||
qaPrompt: {
|
||||
type: String
|
||||
},
|
||||
|
||||
rawTextLength: {
|
||||
type: Number
|
||||
@@ -89,8 +91,9 @@ const DatasetCollectionSchema = new Schema({
|
||||
});
|
||||
|
||||
try {
|
||||
DatasetCollectionSchema.index({ teamId: 1 });
|
||||
DatasetCollectionSchema.index({ datasetId: 1 });
|
||||
DatasetCollectionSchema.index({ datasetId: 1, parentId: 1 });
|
||||
DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, parentId: 1 });
|
||||
DatasetCollectionSchema.index({ updateTime: -1 });
|
||||
DatasetCollectionSchema.index({ hashRawText: -1 });
|
||||
} catch (error) {
|
||||
|
@@ -4,7 +4,7 @@ import type { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { MongoDatasetTraining } from '../training/schema';
|
||||
import { urlsFetch } from '../../../common/string/cheerio';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import { DatasetCollectionTypeEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
|
||||
/**
|
||||
@@ -92,8 +92,12 @@ export const getCollectionAndRawText = async ({
|
||||
return Promise.reject('Collection not found');
|
||||
}
|
||||
|
||||
const rawText = await (async () => {
|
||||
if (newRawText) return newRawText;
|
||||
const { title, rawText } = await (async () => {
|
||||
if (newRawText)
|
||||
return {
|
||||
title: '',
|
||||
rawText: newRawText
|
||||
};
|
||||
// link
|
||||
if (col.type === DatasetCollectionTypeEnum.link && col.rawLink) {
|
||||
// crawl new data
|
||||
@@ -102,12 +106,18 @@ export const getCollectionAndRawText = async ({
|
||||
selector: col.datasetId?.websiteConfig?.selector || col?.metadata?.webPageSelector
|
||||
});
|
||||
|
||||
return result[0].content;
|
||||
return {
|
||||
title: result[0].title,
|
||||
rawText: result[0].content
|
||||
};
|
||||
}
|
||||
|
||||
// file
|
||||
|
||||
return '';
|
||||
return {
|
||||
title: '',
|
||||
rawText: ''
|
||||
};
|
||||
})();
|
||||
|
||||
const hashRawText = hashStr(rawText);
|
||||
@@ -115,6 +125,7 @@ export const getCollectionAndRawText = async ({
|
||||
|
||||
return {
|
||||
collection: col,
|
||||
title,
|
||||
rawText,
|
||||
isSameRawText
|
||||
};
|
||||
@@ -135,6 +146,7 @@ export const reloadCollectionChunks = async ({
|
||||
rawText?: string;
|
||||
}) => {
|
||||
const {
|
||||
title,
|
||||
rawText: newRawText,
|
||||
collection: col,
|
||||
isSameRawText
|
||||
@@ -154,6 +166,11 @@ export const reloadCollectionChunks = async ({
|
||||
});
|
||||
|
||||
// insert to training queue
|
||||
const model = await (() => {
|
||||
if (col.trainingType === TrainingModeEnum.chunk) return col.datasetId.vectorModel;
|
||||
if (col.trainingType === TrainingModeEnum.qa) return col.datasetId.agentModel;
|
||||
return Promise.reject('Training model error');
|
||||
})();
|
||||
await MongoDatasetTraining.insertMany(
|
||||
chunks.map((item, i) => ({
|
||||
teamId: col.teamId,
|
||||
@@ -163,7 +180,7 @@ export const reloadCollectionChunks = async ({
|
||||
billId,
|
||||
mode: col.trainingType,
|
||||
prompt: '',
|
||||
model: col.datasetId.vectorModel,
|
||||
model,
|
||||
q: item,
|
||||
a: '',
|
||||
chunkIndex: i
|
||||
@@ -172,6 +189,7 @@ export const reloadCollectionChunks = async ({
|
||||
|
||||
// update raw text
|
||||
await MongoDatasetCollection.findByIdAndUpdate(col._id, {
|
||||
...(title && { name: title }),
|
||||
rawTextLength: newRawText.length,
|
||||
hashRawText: hashStr(newRawText)
|
||||
});
|
||||
|
@@ -75,7 +75,13 @@ export async function delCollectionRelevantData({
|
||||
/**
|
||||
* delete one data by mongoDataId
|
||||
*/
|
||||
export async function delDatasetDataByDataId(mongoDataId: string) {
|
||||
await deleteDatasetDataVector({ dataIds: [mongoDataId] });
|
||||
export async function delDatasetDataByDataId({
|
||||
collectionId,
|
||||
mongoDataId
|
||||
}: {
|
||||
collectionId: string;
|
||||
mongoDataId: string;
|
||||
}) {
|
||||
await deleteDatasetDataVector({ collectionId, dataIds: [mongoDataId] });
|
||||
await MongoDatasetData.findByIdAndDelete(mongoDataId);
|
||||
}
|
||||
|
@@ -85,12 +85,13 @@ const DatasetDataSchema = new Schema({
|
||||
});
|
||||
|
||||
try {
|
||||
DatasetDataSchema.index({ teamId: 1 });
|
||||
DatasetDataSchema.index({ datasetId: 1 });
|
||||
DatasetDataSchema.index({ collectionId: 1 });
|
||||
DatasetDataSchema.index({ updateTime: -1 });
|
||||
DatasetDataSchema.index({ collectionId: 1, q: 1, a: 1 });
|
||||
// full text index
|
||||
DatasetDataSchema.index({ datasetId: 1, fullTextToken: 'text' });
|
||||
DatasetDataSchema.index({ inited: 1 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
@@ -92,7 +92,7 @@ const DatasetSchema = new Schema({
|
||||
});
|
||||
|
||||
try {
|
||||
DatasetSchema.index({ userId: 1 });
|
||||
DatasetSchema.index({ teamId: 1 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
@@ -102,6 +102,7 @@ const TrainingDataSchema = new Schema({
|
||||
});
|
||||
|
||||
try {
|
||||
TrainingDataSchema.index({ teamId: 1 });
|
||||
TrainingDataSchema.index({ weight: -1 });
|
||||
TrainingDataSchema.index({ lockTime: 1 });
|
||||
TrainingDataSchema.index({ datasetId: 1 });
|
||||
|
@@ -3,17 +3,19 @@
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"@fastgpt/global": "workspace:*",
|
||||
"cookie": "^0.5.0",
|
||||
"encoding": "^0.1.13",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"mongoose": "^7.0.2",
|
||||
"nanoid": "^4.0.1",
|
||||
"dayjs": "^1.11.7",
|
||||
"next": "13.5.2",
|
||||
"multer": "1.4.5-lts.1",
|
||||
"axios": "^1.5.1",
|
||||
"cheerio": "1.0.0-rc.12",
|
||||
"cookie": "^0.5.0",
|
||||
"dayjs": "^1.11.7",
|
||||
"encoding": "^0.1.13",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"mammoth": "^1.6.0",
|
||||
"mongoose": "^7.0.2",
|
||||
"multer": "1.4.5-lts.1",
|
||||
"next": "13.5.2",
|
||||
"nextjs-cors": "^2.1.2",
|
||||
"node-cron": "^3.0.3",
|
||||
"pdfjs-dist": "^4.0.269",
|
||||
"pg": "^8.10.0",
|
||||
"tunnel": "^0.0.6"
|
||||
},
|
||||
@@ -21,6 +23,7 @@
|
||||
"@types/cookie": "^0.5.2",
|
||||
"@types/jsonwebtoken": "^9.0.3",
|
||||
"@types/multer": "^1.4.10",
|
||||
"@types/node-cron": "^3.0.11",
|
||||
"@types/pg": "^8.6.6",
|
||||
"@types/tunnel": "^0.0.4"
|
||||
}
|
||||
|
@@ -1,18 +1,22 @@
|
||||
import { MongoOpenApi } from './schema';
|
||||
|
||||
export async function updateApiKeyUsedTime(id: string) {
|
||||
await MongoOpenApi.findByIdAndUpdate(id, {
|
||||
export function updateApiKeyUsedTime(id: string) {
|
||||
MongoOpenApi.findByIdAndUpdate(id, {
|
||||
lastUsedTime: new Date()
|
||||
}).catch((err) => {
|
||||
console.log('update apiKey used time error', err);
|
||||
});
|
||||
}
|
||||
|
||||
export async function updateApiKeyUsage({ apikey, usage }: { apikey: string; usage: number }) {
|
||||
await MongoOpenApi.findOneAndUpdate(
|
||||
export function updateApiKeyUsage({ apikey, usage }: { apikey: string; usage: number }) {
|
||||
MongoOpenApi.findOneAndUpdate(
|
||||
{ apiKey: apikey },
|
||||
{
|
||||
$inc: {
|
||||
usage
|
||||
}
|
||||
}
|
||||
);
|
||||
).catch((err) => {
|
||||
console.log('update apiKey usage error', err);
|
||||
});
|
||||
}
|
||||
|
@@ -9,17 +9,15 @@ export const updateOutLinkUsage = async ({
|
||||
shareId: string;
|
||||
total: number;
|
||||
}) => {
|
||||
try {
|
||||
await MongoOutLink.findOneAndUpdate(
|
||||
{ shareId },
|
||||
{
|
||||
$inc: { total },
|
||||
lastTime: new Date()
|
||||
}
|
||||
);
|
||||
} catch (err) {
|
||||
MongoOutLink.findOneAndUpdate(
|
||||
{ shareId },
|
||||
{
|
||||
$inc: { total },
|
||||
lastTime: new Date()
|
||||
}
|
||||
).catch((err) => {
|
||||
console.log('update shareChat error', err);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
export const pushResult2Remote = async ({
|
||||
|
20
packages/service/support/permission/limit/dataset.ts
Normal file
20
packages/service/support/permission/limit/dataset.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { getVectorCountByTeamId } from '../../../common/vectorStore/controller';
|
||||
import { getTeamDatasetValidSub } from '../../wallet/sub/utils';
|
||||
|
||||
export const checkDatasetLimit = async ({
|
||||
teamId,
|
||||
freeSize = Infinity,
|
||||
insertLen = 0
|
||||
}: {
|
||||
teamId: string;
|
||||
freeSize?: number;
|
||||
insertLen?: number;
|
||||
}) => {
|
||||
const { maxSize } = await getTeamDatasetValidSub({ teamId, freeSize });
|
||||
const usedSize = await getVectorCountByTeamId(teamId);
|
||||
|
||||
if (usedSize + insertLen >= maxSize) {
|
||||
return Promise.reject(`数据库容量已满,无法继续添加。可以在账号页面进行扩容。`);
|
||||
}
|
||||
return;
|
||||
};
|
@@ -30,9 +30,6 @@ const TeamSchema = new Schema({
|
||||
type: Number,
|
||||
default: 5
|
||||
},
|
||||
lastDatasetBillTime: {
|
||||
type: Date
|
||||
},
|
||||
limit: {
|
||||
lastExportDatasetTime: {
|
||||
type: Date
|
||||
|
@@ -54,6 +54,7 @@ const BillSchema = new Schema({
|
||||
try {
|
||||
BillSchema.index({ teamId: 1 });
|
||||
BillSchema.index({ tmbId: 1 });
|
||||
BillSchema.index({ tmbId: 1, time: 1 });
|
||||
BillSchema.index({ time: 1 }, { expireAfterSeconds: 90 * 24 * 60 * 60 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
|
55
packages/service/support/wallet/sub/schema.ts
Normal file
55
packages/service/support/wallet/sub/schema.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import { connectionMongo, type Model } from '../../../common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { TeamCollectionName } from '@fastgpt/global/support/user/team/constant';
|
||||
import { subModeMap, subStatusMap, subTypeMap } from '@fastgpt/global/support/wallet/sub/constants';
|
||||
import type { TeamSubSchema } from '@fastgpt/global/support/wallet/sub/type';
|
||||
|
||||
export const subCollectionName = 'team.subscription';
|
||||
|
||||
const SubSchema = new Schema({
|
||||
teamId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamCollectionName,
|
||||
required: true
|
||||
},
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(subTypeMap),
|
||||
required: true
|
||||
},
|
||||
mode: {
|
||||
type: String,
|
||||
enum: Object.keys(subModeMap),
|
||||
required: true
|
||||
},
|
||||
status: {
|
||||
type: String,
|
||||
enum: Object.keys(subStatusMap),
|
||||
required: true
|
||||
},
|
||||
renew: {
|
||||
type: Boolean,
|
||||
default: true
|
||||
},
|
||||
startTime: {
|
||||
type: Date
|
||||
},
|
||||
expiredTime: {
|
||||
type: Date
|
||||
},
|
||||
datasetStoreAmount: {
|
||||
type: Number
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
SubSchema.index({ teamId: 1 });
|
||||
SubSchema.index({ status: 1 });
|
||||
SubSchema.index({ type: 1 });
|
||||
SubSchema.index({ expiredTime: -1 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
export const MongoTeamSub: Model<TeamSubSchema> =
|
||||
models[subCollectionName] || model(subCollectionName, SubSchema);
|
31
packages/service/support/wallet/sub/utils.ts
Normal file
31
packages/service/support/wallet/sub/utils.ts
Normal file
@@ -0,0 +1,31 @@
|
||||
import { SubStatusEnum } from '@fastgpt/global/support/wallet/sub/constants';
|
||||
import { MongoTeamSub } from './schema';
|
||||
|
||||
/* get team dataset size */
|
||||
export const getTeamDatasetValidSub = async ({
|
||||
teamId,
|
||||
freeSize = Infinity
|
||||
}: {
|
||||
teamId: string;
|
||||
freeSize?: number;
|
||||
}) => {
|
||||
const sub = await MongoTeamSub.findOne({
|
||||
teamId,
|
||||
status: SubStatusEnum.active
|
||||
})
|
||||
.sort({
|
||||
expiredTime: -1
|
||||
})
|
||||
.lean();
|
||||
|
||||
const maxSize = (() => {
|
||||
if (!sub || !sub.datasetStoreAmount) return freeSize;
|
||||
|
||||
return sub.datasetStoreAmount + freeSize;
|
||||
})();
|
||||
|
||||
return {
|
||||
maxSize,
|
||||
sub
|
||||
};
|
||||
};
|
3
packages/service/type.d.ts
vendored
Normal file
3
packages/service/type.d.ts
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
declare global {
|
||||
var defaultTeamDatasetLimit: number;
|
||||
}
|
Reference in New Issue
Block a user