mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
4.6.7 first pr (#726)
This commit is contained in:
11
packages/global/common/file/api.d.ts
vendored
11
packages/global/common/file/api.d.ts
vendored
@@ -1,9 +1,15 @@
|
||||
export type UploadImgProps = {
|
||||
base64Img: string;
|
||||
import { MongoImageTypeEnum } from './image/constants';
|
||||
|
||||
export type preUploadImgProps = {
|
||||
type: `${MongoImageTypeEnum}`;
|
||||
|
||||
expiredTime?: Date;
|
||||
metadata?: Record<string, any>;
|
||||
shareId?: string;
|
||||
};
|
||||
export type UploadImgProps = preUploadImgProps & {
|
||||
base64Img: string;
|
||||
};
|
||||
|
||||
export type UrlFetchParams = {
|
||||
urlList: string[];
|
||||
@@ -11,6 +17,7 @@ export type UrlFetchParams = {
|
||||
};
|
||||
export type UrlFetchResponse = {
|
||||
url: string;
|
||||
title: string;
|
||||
content: string;
|
||||
selector?: string;
|
||||
}[];
|
||||
|
52
packages/global/common/file/image/constants.ts
Normal file
52
packages/global/common/file/image/constants.ts
Normal file
@@ -0,0 +1,52 @@
|
||||
export const imageBaseUrl = '/api/system/img/';
|
||||
|
||||
export enum MongoImageTypeEnum {
|
||||
systemAvatar = 'systemAvatar',
|
||||
appAvatar = 'appAvatar',
|
||||
pluginAvatar = 'pluginAvatar',
|
||||
datasetAvatar = 'datasetAvatar',
|
||||
userAvatar = 'userAvatar',
|
||||
teamAvatar = 'teamAvatar',
|
||||
|
||||
chatImage = 'chatImage',
|
||||
docImage = 'docImage'
|
||||
}
|
||||
export const mongoImageTypeMap = {
|
||||
[MongoImageTypeEnum.systemAvatar]: {
|
||||
label: 'common.file.type.appAvatar',
|
||||
unique: true
|
||||
},
|
||||
[MongoImageTypeEnum.appAvatar]: {
|
||||
label: 'common.file.type.appAvatar',
|
||||
unique: true
|
||||
},
|
||||
[MongoImageTypeEnum.pluginAvatar]: {
|
||||
label: 'common.file.type.pluginAvatar',
|
||||
unique: true
|
||||
},
|
||||
[MongoImageTypeEnum.datasetAvatar]: {
|
||||
label: 'common.file.type.datasetAvatar',
|
||||
unique: true
|
||||
},
|
||||
[MongoImageTypeEnum.userAvatar]: {
|
||||
label: 'common.file.type.userAvatar',
|
||||
unique: true
|
||||
},
|
||||
[MongoImageTypeEnum.teamAvatar]: {
|
||||
label: 'common.file.type.teamAvatar',
|
||||
unique: true
|
||||
},
|
||||
|
||||
[MongoImageTypeEnum.chatImage]: {
|
||||
label: 'common.file.type.chatImage',
|
||||
unique: false
|
||||
},
|
||||
[MongoImageTypeEnum.docImage]: {
|
||||
label: 'common.file.type.docImage',
|
||||
unique: false
|
||||
}
|
||||
};
|
||||
|
||||
export const uniqueImageTypeList = Object.entries(mongoImageTypeMap)
|
||||
.filter(([key, value]) => value.unique)
|
||||
.map(([key]) => key as `${MongoImageTypeEnum}`);
|
11
packages/global/common/file/image/type.d.ts
vendored
Normal file
11
packages/global/common/file/image/type.d.ts
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
import { MongoImageTypeEnum } from './constants';
|
||||
|
||||
export type MongoImageSchemaType = {
|
||||
teamId: string;
|
||||
binary: Buffer;
|
||||
createTime: Date;
|
||||
expiredTime?: Date;
|
||||
type: `${MongoImageTypeEnum}`;
|
||||
|
||||
metadata?: { fileId?: string };
|
||||
};
|
@@ -1,62 +0,0 @@
|
||||
/* read file to txt */
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
|
||||
export const readPdfFile = async ({ pdf }: { pdf: string | URL | ArrayBuffer }) => {
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = '/js/pdf.worker.js';
|
||||
|
||||
type TokenType = {
|
||||
str: string;
|
||||
dir: string;
|
||||
width: number;
|
||||
height: number;
|
||||
transform: number[];
|
||||
fontName: string;
|
||||
hasEOL: boolean;
|
||||
};
|
||||
|
||||
const readPDFPage = async (doc: any, pageNo: number) => {
|
||||
const page = await doc.getPage(pageNo);
|
||||
const tokenizedText = await page.getTextContent();
|
||||
|
||||
const viewport = page.getViewport({ scale: 1 });
|
||||
const pageHeight = viewport.height;
|
||||
const headerThreshold = pageHeight * 0.95;
|
||||
const footerThreshold = pageHeight * 0.05;
|
||||
|
||||
const pageTexts: TokenType[] = tokenizedText.items.filter((token: TokenType) => {
|
||||
return (
|
||||
!token.transform ||
|
||||
(token.transform[5] < headerThreshold && token.transform[5] > footerThreshold)
|
||||
);
|
||||
});
|
||||
|
||||
// concat empty string 'hasEOL'
|
||||
for (let i = 0; i < pageTexts.length; i++) {
|
||||
const item = pageTexts[i];
|
||||
if (item.str === '' && pageTexts[i - 1]) {
|
||||
pageTexts[i - 1].hasEOL = item.hasEOL;
|
||||
pageTexts.splice(i, 1);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
page.cleanup();
|
||||
|
||||
return pageTexts
|
||||
.map((token) => {
|
||||
const paragraphEnd = token.hasEOL && /([。?!.?!\n\r]|(\r\n))$/.test(token.str);
|
||||
|
||||
return paragraphEnd ? `${token.str}\n` : token.str;
|
||||
})
|
||||
.join('');
|
||||
};
|
||||
|
||||
const doc = await pdfjsLib.getDocument(pdf).promise;
|
||||
const pageTextPromises = [];
|
||||
for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
|
||||
pageTextPromises.push(readPDFPage(doc, pageNo));
|
||||
}
|
||||
const pageTexts = await Promise.all(pageTextPromises);
|
||||
|
||||
return pageTexts.join('');
|
||||
};
|
10
packages/global/common/math/date.ts
Normal file
10
packages/global/common/math/date.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
// The number of days left in the month is calculated as 30 days per month, and less than 1 day is calculated as 1 day
|
||||
export const getMonthRemainingDays = () => {
|
||||
const now = new Date();
|
||||
const year = now.getFullYear();
|
||||
const month = now.getMonth();
|
||||
const date = now.getDate();
|
||||
const days = new Date(year, month + 1, 0).getDate();
|
||||
const remainingDays = days - date;
|
||||
return remainingDays + 1;
|
||||
};
|
@@ -15,10 +15,10 @@ export const simpleMarkdownText = (rawText: string) => {
|
||||
return `[${cleanedLinkText}](${url})`;
|
||||
});
|
||||
|
||||
// replace special \.* ……
|
||||
const reg1 = /\\([-.!`_(){}\[\]])/g;
|
||||
// replace special #\.* ……
|
||||
const reg1 = /\\([#`!*()+-_\[\]{}\\.])/g;
|
||||
if (reg1.test(rawText)) {
|
||||
rawText = rawText.replace(/\\([`!*()+-_\[\]{}\\.])/g, '$1');
|
||||
rawText = rawText.replace(reg1, '$1');
|
||||
}
|
||||
|
||||
// replace \\n
|
||||
@@ -45,24 +45,26 @@ export const uploadMarkdownBase64 = async ({
|
||||
uploadImgController
|
||||
}: {
|
||||
rawText: string;
|
||||
uploadImgController: (base64: string) => Promise<string>;
|
||||
uploadImgController?: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
// match base64, upload and replace it
|
||||
const base64Regex = /data:image\/.*;base64,([^\)]+)/g;
|
||||
const base64Arr = rawText.match(base64Regex) || [];
|
||||
// upload base64 and replace it
|
||||
await Promise.all(
|
||||
base64Arr.map(async (base64Img) => {
|
||||
try {
|
||||
const str = await uploadImgController(base64Img);
|
||||
if (uploadImgController) {
|
||||
// match base64, upload and replace it
|
||||
const base64Regex = /data:image\/.*;base64,([^\)]+)/g;
|
||||
const base64Arr = rawText.match(base64Regex) || [];
|
||||
// upload base64 and replace it
|
||||
await Promise.all(
|
||||
base64Arr.map(async (base64Img) => {
|
||||
try {
|
||||
const str = await uploadImgController(base64Img);
|
||||
|
||||
rawText = rawText.replace(base64Img, str);
|
||||
} catch (error) {
|
||||
rawText = rawText.replace(base64Img, '');
|
||||
rawText = rawText.replace(/!\[.*\]\(\)/g, '');
|
||||
}
|
||||
})
|
||||
);
|
||||
rawText = rawText.replace(base64Img, str);
|
||||
} catch (error) {
|
||||
rawText = rawText.replace(base64Img, '');
|
||||
rawText = rawText.replace(/!\[.*\]\(\)/g, '');
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
// Remove white space on both sides of the picture
|
||||
const trimReg = /(!\[.*\]\(.*\))\s*/g;
|
||||
@@ -70,5 +72,20 @@ export const uploadMarkdownBase64 = async ({
|
||||
rawText = rawText.replace(trimReg, '$1');
|
||||
}
|
||||
|
||||
return simpleMarkdownText(rawText);
|
||||
return rawText;
|
||||
};
|
||||
|
||||
export const markdownProcess = async ({
|
||||
rawText,
|
||||
uploadImgController
|
||||
}: {
|
||||
rawText: string;
|
||||
uploadImgController?: (base64: string) => Promise<string>;
|
||||
}) => {
|
||||
const imageProcess = await uploadMarkdownBase64({
|
||||
rawText,
|
||||
uploadImgController
|
||||
});
|
||||
|
||||
return simpleMarkdownText(imageProcess);
|
||||
};
|
||||
|
@@ -33,6 +33,12 @@ export function countPromptTokens(
|
||||
) {
|
||||
const enc = getTikTokenEnc();
|
||||
const text = `${role}\n${prompt}`;
|
||||
|
||||
// too large a text will block the thread
|
||||
if (text.length > 15000) {
|
||||
return text.length * 1.7;
|
||||
}
|
||||
|
||||
try {
|
||||
const encodeText = enc.encode(text);
|
||||
return encodeText.length + role.length; // 补充 role 估算值
|
||||
|
@@ -1,3 +1,4 @@
|
||||
import dayjs from 'dayjs';
|
||||
|
||||
export const formatTime2YMDHM = (time: Date) => dayjs(time).format('YYYY-MM-DD HH:mm');
|
||||
export const formatTime2YMDHM = (time?: Date) =>
|
||||
time ? dayjs(time).format('YYYY-MM-DD HH:mm') : '';
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import crypto from 'crypto';
|
||||
import { customAlphabet } from 'nanoid';
|
||||
|
||||
/* check string is a web link */
|
||||
export function strIsLink(str?: string) {
|
||||
@@ -36,3 +37,7 @@ export function replaceVariable(text: string, obj: Record<string, string | numbe
|
||||
}
|
||||
return text || '';
|
||||
}
|
||||
|
||||
export const getNanoid = (size = 12) => {
|
||||
return customAlphabet('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890', size)();
|
||||
};
|
||||
|
@@ -51,6 +51,10 @@ export type FastGPTFeConfigsType = {
|
||||
favicon?: string;
|
||||
customApiDomain?: string;
|
||||
customSharePageDomain?: string;
|
||||
subscription?: {
|
||||
datasetStoreFreeSize?: number;
|
||||
datasetStorePrice?: number;
|
||||
};
|
||||
};
|
||||
|
||||
export type SystemEnvType = {
|
||||
@@ -63,4 +67,5 @@ export type SystemEnvType = {
|
||||
declare global {
|
||||
var feConfigs: FastGPTFeConfigsType;
|
||||
var systemEnv: SystemEnvType;
|
||||
var systemInitd: boolean;
|
||||
}
|
||||
|
@@ -31,16 +31,16 @@ export enum ChatSourceEnum {
|
||||
}
|
||||
export const ChatSourceMap = {
|
||||
[ChatSourceEnum.test]: {
|
||||
name: 'chat.logs.test'
|
||||
name: 'core.chat.logs.test'
|
||||
},
|
||||
[ChatSourceEnum.online]: {
|
||||
name: 'chat.logs.online'
|
||||
name: 'core.chat.logs.online'
|
||||
},
|
||||
[ChatSourceEnum.share]: {
|
||||
name: 'chat.logs.share'
|
||||
name: 'core.chat.logs.share'
|
||||
},
|
||||
[ChatSourceEnum.api]: {
|
||||
name: 'chat.logs.api'
|
||||
name: 'core.chat.logs.api'
|
||||
}
|
||||
};
|
||||
|
||||
|
27
packages/global/core/dataset/api.d.ts
vendored
27
packages/global/core/dataset/api.d.ts
vendored
@@ -1,5 +1,5 @@
|
||||
import { DatasetDataIndexItemType, DatasetSchemaType } from './type';
|
||||
import { DatasetCollectionTrainingModeEnum, DatasetCollectionTypeEnum } from './constant';
|
||||
import { TrainingModeEnum, DatasetCollectionTypeEnum } from './constant';
|
||||
import type { LLMModelItemType } from '../ai/model.d';
|
||||
|
||||
/* ================= dataset ===================== */
|
||||
@@ -16,21 +16,38 @@ export type DatasetUpdateBody = {
|
||||
};
|
||||
|
||||
/* ================= collection ===================== */
|
||||
export type CreateDatasetCollectionParams = {
|
||||
export type DatasetCollectionChunkMetadataType = {
|
||||
trainingType?: `${TrainingModeEnum}`;
|
||||
chunkSize?: number;
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
};
|
||||
export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
|
||||
datasetId: string;
|
||||
parentId?: string;
|
||||
name: string;
|
||||
type: `${DatasetCollectionTypeEnum}`;
|
||||
trainingType?: `${DatasetCollectionTrainingModeEnum}`;
|
||||
chunkSize?: number;
|
||||
fileId?: string;
|
||||
rawLink?: string;
|
||||
qaPrompt?: string;
|
||||
rawTextLength?: number;
|
||||
hashRawText?: string;
|
||||
metadata?: Record<string, any>;
|
||||
};
|
||||
|
||||
export type ApiCreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
|
||||
datasetId: string;
|
||||
parentId?: string;
|
||||
metadata?: Record<string, any>;
|
||||
};
|
||||
export type TextCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
name: string;
|
||||
text: string;
|
||||
};
|
||||
export type LinkCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
link: string;
|
||||
chunkSplitter?: string;
|
||||
};
|
||||
|
||||
/* ================= data ===================== */
|
||||
export type PgSearchRawType = {
|
||||
id: string;
|
||||
|
@@ -53,23 +53,7 @@ export const DatasetCollectionTypeMap = {
|
||||
name: 'core.dataset.link'
|
||||
},
|
||||
[DatasetCollectionTypeEnum.virtual]: {
|
||||
name: 'core.dataset.Virtual File'
|
||||
}
|
||||
};
|
||||
export enum DatasetCollectionTrainingModeEnum {
|
||||
manual = 'manual',
|
||||
chunk = 'chunk',
|
||||
qa = 'qa'
|
||||
}
|
||||
export const DatasetCollectionTrainingTypeMap = {
|
||||
[DatasetCollectionTrainingModeEnum.manual]: {
|
||||
label: 'core.dataset.collection.training.type manual'
|
||||
},
|
||||
[DatasetCollectionTrainingModeEnum.chunk]: {
|
||||
label: 'core.dataset.collection.training.type chunk'
|
||||
},
|
||||
[DatasetCollectionTrainingModeEnum.qa]: {
|
||||
label: 'core.dataset.collection.training.type qa'
|
||||
name: 'core.dataset.Manual collection'
|
||||
}
|
||||
};
|
||||
|
||||
|
6
packages/global/core/dataset/type.d.ts
vendored
6
packages/global/core/dataset/type.d.ts
vendored
@@ -42,11 +42,15 @@ export type DatasetCollectionSchemaType = {
|
||||
type: `${DatasetCollectionTypeEnum}`;
|
||||
createTime: Date;
|
||||
updateTime: Date;
|
||||
|
||||
trainingType: `${TrainingModeEnum}`;
|
||||
chunkSize: number;
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
|
||||
fileId?: string;
|
||||
rawLink?: string;
|
||||
qaPrompt?: string;
|
||||
|
||||
rawTextLength?: number;
|
||||
hashRawText?: string;
|
||||
metadata?: {
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { DatasetCollectionTypeEnum, DatasetDataIndexTypeEnum } from './constant';
|
||||
import { TrainingModeEnum, DatasetCollectionTypeEnum, DatasetDataIndexTypeEnum } from './constant';
|
||||
import { getFileIcon } from '../../common/file/icon';
|
||||
import { strIsLink } from '../../common/string/tools';
|
||||
|
||||
@@ -55,3 +55,8 @@ export function getDefaultIndex(props?: { q?: string; a?: string; dataId?: strin
|
||||
dataId
|
||||
};
|
||||
}
|
||||
|
||||
export const predictDataLimitLength = (mode: `${TrainingModeEnum}`, data: any[]) => {
|
||||
if (mode === TrainingModeEnum.qa) return data.length * 20;
|
||||
return data.length;
|
||||
};
|
||||
|
@@ -7,7 +7,7 @@
|
||||
"encoding": "^0.1.13",
|
||||
"js-tiktoken": "^1.0.7",
|
||||
"openai": "4.23.0",
|
||||
"pdfjs-dist": "^4.0.269",
|
||||
"nanoid": "^4.0.1",
|
||||
"timezones-list": "^3.0.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
1
packages/global/support/user/team/type.d.ts
vendored
1
packages/global/support/user/team/type.d.ts
vendored
@@ -9,7 +9,6 @@ export type TeamSchema = {
|
||||
createTime: Date;
|
||||
balance: number;
|
||||
maxSize: number;
|
||||
lastDatasetBillTime: Date;
|
||||
limit: {
|
||||
lastExportDatasetTime: Date;
|
||||
lastWebsiteSyncTime: Date;
|
||||
|
@@ -7,7 +7,7 @@ export enum BillSourceEnum {
|
||||
api = 'api',
|
||||
shareLink = 'shareLink',
|
||||
training = 'training',
|
||||
datasetStore = 'datasetStore'
|
||||
datasetExpand = 'datasetExpand'
|
||||
}
|
||||
|
||||
export const BillSourceMap: Record<`${BillSourceEnum}`, string> = {
|
||||
@@ -15,5 +15,5 @@ export const BillSourceMap: Record<`${BillSourceEnum}`, string> = {
|
||||
[BillSourceEnum.api]: 'Api',
|
||||
[BillSourceEnum.shareLink]: '免登录链接',
|
||||
[BillSourceEnum.training]: '数据训练',
|
||||
[BillSourceEnum.datasetStore]: '知识库存储'
|
||||
[BillSourceEnum.datasetExpand]: '知识库扩容'
|
||||
};
|
||||
|
4
packages/global/support/wallet/sub/api.d.ts
vendored
Normal file
4
packages/global/support/wallet/sub/api.d.ts
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
export type SubDatasetSizeParams = {
|
||||
size: number;
|
||||
renew: boolean;
|
||||
};
|
37
packages/global/support/wallet/sub/constants.ts
Normal file
37
packages/global/support/wallet/sub/constants.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
export enum SubTypeEnum {
|
||||
datasetStore = 'datasetStore'
|
||||
}
|
||||
|
||||
export const subTypeMap = {
|
||||
[SubTypeEnum.datasetStore]: {
|
||||
label: 'support.user.team.subscription.type.datasetStore'
|
||||
}
|
||||
};
|
||||
|
||||
export enum SubModeEnum {
|
||||
month = 'month',
|
||||
year = 'year'
|
||||
}
|
||||
|
||||
export const subModeMap = {
|
||||
[SubModeEnum.month]: {
|
||||
label: 'support.user.team.subscription.mode.month'
|
||||
},
|
||||
[SubModeEnum.year]: {
|
||||
label: 'support.user.team.subscription.mode.year'
|
||||
}
|
||||
};
|
||||
|
||||
export enum SubStatusEnum {
|
||||
active = 'active',
|
||||
expired = 'expired'
|
||||
}
|
||||
|
||||
export const subStatusMap = {
|
||||
[SubStatusEnum.active]: {
|
||||
label: 'support.user.team.subscription.status.active'
|
||||
},
|
||||
[SubStatusEnum.expired]: {
|
||||
label: 'support.user.team.subscription.status.expired'
|
||||
}
|
||||
};
|
12
packages/global/support/wallet/sub/type.d.ts
vendored
Normal file
12
packages/global/support/wallet/sub/type.d.ts
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
import { SubModeEnum, SubStatusEnum, SubTypeEnum } from './constants';
|
||||
|
||||
export type TeamSubSchema = {
|
||||
teamId: string;
|
||||
type: `${SubTypeEnum}`;
|
||||
mode: `${SubModeEnum}`;
|
||||
status: `${SubStatusEnum}`;
|
||||
renew: boolean;
|
||||
startTime: Date;
|
||||
expiredTime: Date;
|
||||
datasetStoreAmount?: number;
|
||||
};
|
Reference in New Issue
Block a user