4.6.7 first pr (#726)

This commit is contained in:
Archer
2024-01-10 23:35:04 +08:00
committed by GitHub
parent 414b693303
commit 006ad17c6a
186 changed files with 2996 additions and 1838 deletions

View File

@@ -1,9 +1,15 @@
export type UploadImgProps = {
base64Img: string;
import { MongoImageTypeEnum } from './image/constants';
export type preUploadImgProps = {
type: `${MongoImageTypeEnum}`;
expiredTime?: Date;
metadata?: Record<string, any>;
shareId?: string;
};
export type UploadImgProps = preUploadImgProps & {
base64Img: string;
};
export type UrlFetchParams = {
urlList: string[];
@@ -11,6 +17,7 @@ export type UrlFetchParams = {
};
export type UrlFetchResponse = {
url: string;
title: string;
content: string;
selector?: string;
}[];

View File

@@ -0,0 +1,52 @@
export const imageBaseUrl = '/api/system/img/';
export enum MongoImageTypeEnum {
systemAvatar = 'systemAvatar',
appAvatar = 'appAvatar',
pluginAvatar = 'pluginAvatar',
datasetAvatar = 'datasetAvatar',
userAvatar = 'userAvatar',
teamAvatar = 'teamAvatar',
chatImage = 'chatImage',
docImage = 'docImage'
}
export const mongoImageTypeMap = {
[MongoImageTypeEnum.systemAvatar]: {
label: 'common.file.type.appAvatar',
unique: true
},
[MongoImageTypeEnum.appAvatar]: {
label: 'common.file.type.appAvatar',
unique: true
},
[MongoImageTypeEnum.pluginAvatar]: {
label: 'common.file.type.pluginAvatar',
unique: true
},
[MongoImageTypeEnum.datasetAvatar]: {
label: 'common.file.type.datasetAvatar',
unique: true
},
[MongoImageTypeEnum.userAvatar]: {
label: 'common.file.type.userAvatar',
unique: true
},
[MongoImageTypeEnum.teamAvatar]: {
label: 'common.file.type.teamAvatar',
unique: true
},
[MongoImageTypeEnum.chatImage]: {
label: 'common.file.type.chatImage',
unique: false
},
[MongoImageTypeEnum.docImage]: {
label: 'common.file.type.docImage',
unique: false
}
};
export const uniqueImageTypeList = Object.entries(mongoImageTypeMap)
.filter(([key, value]) => value.unique)
.map(([key]) => key as `${MongoImageTypeEnum}`);

View File

@@ -0,0 +1,11 @@
import { MongoImageTypeEnum } from './constants';
export type MongoImageSchemaType = {
teamId: string;
binary: Buffer;
createTime: Date;
expiredTime?: Date;
type: `${MongoImageTypeEnum}`;
metadata?: { fileId?: string };
};

View File

@@ -1,62 +0,0 @@
/* read file to txt */
import * as pdfjsLib from 'pdfjs-dist';
export const readPdfFile = async ({ pdf }: { pdf: string | URL | ArrayBuffer }) => {
pdfjsLib.GlobalWorkerOptions.workerSrc = '/js/pdf.worker.js';
type TokenType = {
str: string;
dir: string;
width: number;
height: number;
transform: number[];
fontName: string;
hasEOL: boolean;
};
const readPDFPage = async (doc: any, pageNo: number) => {
const page = await doc.getPage(pageNo);
const tokenizedText = await page.getTextContent();
const viewport = page.getViewport({ scale: 1 });
const pageHeight = viewport.height;
const headerThreshold = pageHeight * 0.95;
const footerThreshold = pageHeight * 0.05;
const pageTexts: TokenType[] = tokenizedText.items.filter((token: TokenType) => {
return (
!token.transform ||
(token.transform[5] < headerThreshold && token.transform[5] > footerThreshold)
);
});
// concat empty string 'hasEOL'
for (let i = 0; i < pageTexts.length; i++) {
const item = pageTexts[i];
if (item.str === '' && pageTexts[i - 1]) {
pageTexts[i - 1].hasEOL = item.hasEOL;
pageTexts.splice(i, 1);
i--;
}
}
page.cleanup();
return pageTexts
.map((token) => {
const paragraphEnd = token.hasEOL && /([。?!.?!\n\r]|(\r\n))$/.test(token.str);
return paragraphEnd ? `${token.str}\n` : token.str;
})
.join('');
};
const doc = await pdfjsLib.getDocument(pdf).promise;
const pageTextPromises = [];
for (let pageNo = 1; pageNo <= doc.numPages; pageNo++) {
pageTextPromises.push(readPDFPage(doc, pageNo));
}
const pageTexts = await Promise.all(pageTextPromises);
return pageTexts.join('');
};

View File

@@ -0,0 +1,10 @@
// The number of days left in the month is calculated as 30 days per month, and less than 1 day is calculated as 1 day
export const getMonthRemainingDays = () => {
const now = new Date();
const year = now.getFullYear();
const month = now.getMonth();
const date = now.getDate();
const days = new Date(year, month + 1, 0).getDate();
const remainingDays = days - date;
return remainingDays + 1;
};

View File

@@ -15,10 +15,10 @@ export const simpleMarkdownText = (rawText: string) => {
return `[${cleanedLinkText}](${url})`;
});
// replace special \.* ……
const reg1 = /\\([-.!`_(){}\[\]])/g;
// replace special #\.* ……
const reg1 = /\\([#`!*()+-_\[\]{}\\.])/g;
if (reg1.test(rawText)) {
rawText = rawText.replace(/\\([`!*()+-_\[\]{}\\.])/g, '$1');
rawText = rawText.replace(reg1, '$1');
}
// replace \\n
@@ -45,24 +45,26 @@ export const uploadMarkdownBase64 = async ({
uploadImgController
}: {
rawText: string;
uploadImgController: (base64: string) => Promise<string>;
uploadImgController?: (base64: string) => Promise<string>;
}) => {
// match base64, upload and replace it
const base64Regex = /data:image\/.*;base64,([^\)]+)/g;
const base64Arr = rawText.match(base64Regex) || [];
// upload base64 and replace it
await Promise.all(
base64Arr.map(async (base64Img) => {
try {
const str = await uploadImgController(base64Img);
if (uploadImgController) {
// match base64, upload and replace it
const base64Regex = /data:image\/.*;base64,([^\)]+)/g;
const base64Arr = rawText.match(base64Regex) || [];
// upload base64 and replace it
await Promise.all(
base64Arr.map(async (base64Img) => {
try {
const str = await uploadImgController(base64Img);
rawText = rawText.replace(base64Img, str);
} catch (error) {
rawText = rawText.replace(base64Img, '');
rawText = rawText.replace(/!\[.*\]\(\)/g, '');
}
})
);
rawText = rawText.replace(base64Img, str);
} catch (error) {
rawText = rawText.replace(base64Img, '');
rawText = rawText.replace(/!\[.*\]\(\)/g, '');
}
})
);
}
// Remove white space on both sides of the picture
const trimReg = /(!\[.*\]\(.*\))\s*/g;
@@ -70,5 +72,20 @@ export const uploadMarkdownBase64 = async ({
rawText = rawText.replace(trimReg, '$1');
}
return simpleMarkdownText(rawText);
return rawText;
};
export const markdownProcess = async ({
rawText,
uploadImgController
}: {
rawText: string;
uploadImgController?: (base64: string) => Promise<string>;
}) => {
const imageProcess = await uploadMarkdownBase64({
rawText,
uploadImgController
});
return simpleMarkdownText(imageProcess);
};

View File

@@ -33,6 +33,12 @@ export function countPromptTokens(
) {
const enc = getTikTokenEnc();
const text = `${role}\n${prompt}`;
// too large a text will block the thread
if (text.length > 15000) {
return text.length * 1.7;
}
try {
const encodeText = enc.encode(text);
return encodeText.length + role.length; // 补充 role 估算值

View File

@@ -1,3 +1,4 @@
import dayjs from 'dayjs';
export const formatTime2YMDHM = (time: Date) => dayjs(time).format('YYYY-MM-DD HH:mm');
export const formatTime2YMDHM = (time?: Date) =>
time ? dayjs(time).format('YYYY-MM-DD HH:mm') : '';

View File

@@ -1,4 +1,5 @@
import crypto from 'crypto';
import { customAlphabet } from 'nanoid';
/* check string is a web link */
export function strIsLink(str?: string) {
@@ -36,3 +37,7 @@ export function replaceVariable(text: string, obj: Record<string, string | numbe
}
return text || '';
}
export const getNanoid = (size = 12) => {
return customAlphabet('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890', size)();
};

View File

@@ -51,6 +51,10 @@ export type FastGPTFeConfigsType = {
favicon?: string;
customApiDomain?: string;
customSharePageDomain?: string;
subscription?: {
datasetStoreFreeSize?: number;
datasetStorePrice?: number;
};
};
export type SystemEnvType = {
@@ -63,4 +67,5 @@ export type SystemEnvType = {
declare global {
var feConfigs: FastGPTFeConfigsType;
var systemEnv: SystemEnvType;
var systemInitd: boolean;
}