mirror of
https://github.com/labring/FastGPT.git
synced 2025-08-01 20:27:45 +00:00
v4.5.1 (#417)
This commit is contained in:
@@ -1,3 +0,0 @@
|
||||
export type CreateTrainingBillType = {
|
||||
name: string;
|
||||
};
|
@@ -1,4 +0,0 @@
|
||||
export type FetchResultItem = {
|
||||
url: string;
|
||||
content: string;
|
||||
};
|
@@ -4,7 +4,7 @@ import type {
|
||||
LLMModelItemType,
|
||||
VectorModelItemType
|
||||
} from '@/types/model';
|
||||
import type { FeConfigsType } from '@fastgpt/common/type/index.d';
|
||||
import type { FeConfigsType } from '@fastgpt/global/common/system/types/index.d';
|
||||
|
||||
export type InitDateResponse = {
|
||||
chatModels: ChatModelItemType[];
|
||||
|
108
projects/app/src/global/common/string/tools.ts
Normal file
108
projects/app/src/global/common/string/tools.ts
Normal file
@@ -0,0 +1,108 @@
|
||||
import { getErrText } from '@fastgpt/global/common/error/utils';
|
||||
import { countPromptTokens } from '@/global/common/tiktoken';
|
||||
|
||||
/*
|
||||
replace {{variable}} to value
|
||||
*/
|
||||
export function replaceVariable(text: string, obj: Record<string, string | number>) {
|
||||
for (const key in obj) {
|
||||
const val = obj[key];
|
||||
if (typeof val !== 'string') continue;
|
||||
|
||||
text = text.replace(new RegExp(`{{(${key})}}`, 'g'), val);
|
||||
}
|
||||
return text || '';
|
||||
}
|
||||
|
||||
/**
|
||||
* text split into chunks
|
||||
* maxLen - one chunk len. max: 3500
|
||||
* overlapLen - The size of the before and after Text
|
||||
* maxLen > overlapLen
|
||||
*/
|
||||
export const splitText2Chunks = ({ text = '', maxLen }: { text: string; maxLen: number }) => {
|
||||
const overlapLen = Math.floor(maxLen * 0.2); // Overlap length
|
||||
const tempMarker = 'SPLIT_HERE_SPLIT_HERE';
|
||||
|
||||
const stepReg: Record<number, RegExp> = {
|
||||
0: /(\n\n)/g,
|
||||
1: /([\n])/g,
|
||||
2: /([。]|\.\s)/g,
|
||||
3: /([!?]|!\s|\?\s)/g,
|
||||
4: /([;]|;\s)/g,
|
||||
5: /([,]|,\s)/g
|
||||
};
|
||||
|
||||
const splitTextRecursively = ({ text = '', step }: { text: string; step: number }) => {
|
||||
if (text.length <= maxLen) {
|
||||
return [text];
|
||||
}
|
||||
const reg = stepReg[step];
|
||||
|
||||
if (!reg) {
|
||||
// use slice-maxLen to split text
|
||||
const chunks: string[] = [];
|
||||
let chunk = '';
|
||||
for (let i = 0; i < text.length; i += maxLen - overlapLen) {
|
||||
chunk = text.slice(i, i + maxLen);
|
||||
chunks.push(chunk);
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
// split text by delimiters
|
||||
const splitTexts = text
|
||||
.replace(reg, `$1${tempMarker}`)
|
||||
.split(`${tempMarker}`)
|
||||
.filter((part) => part);
|
||||
|
||||
let chunks: string[] = [];
|
||||
let preChunk = '';
|
||||
let chunk = '';
|
||||
for (let i = 0; i < splitTexts.length; i++) {
|
||||
let text = splitTexts[i];
|
||||
// chunk over size
|
||||
if (text.length > maxLen) {
|
||||
const innerChunks = splitTextRecursively({ text, step: step + 1 });
|
||||
if (innerChunks.length === 0) continue;
|
||||
// If the last chunk is too small, it is merged into the next chunk
|
||||
if (innerChunks[innerChunks.length - 1].length <= maxLen * 0.5) {
|
||||
text = innerChunks.pop() || '';
|
||||
chunks = chunks.concat(innerChunks);
|
||||
} else {
|
||||
chunks = chunks.concat(innerChunks);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
chunk += text;
|
||||
// size over lapLen, push it to next chunk
|
||||
if (chunk.length > maxLen - overlapLen) {
|
||||
preChunk += text;
|
||||
}
|
||||
if (chunk.length >= maxLen) {
|
||||
chunks.push(chunk);
|
||||
chunk = preChunk;
|
||||
preChunk = '';
|
||||
}
|
||||
}
|
||||
|
||||
if (chunk && !chunks[chunks.length - 1].endsWith(chunk)) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
return chunks;
|
||||
};
|
||||
|
||||
try {
|
||||
const chunks = splitTextRecursively({ text, step: 0 });
|
||||
|
||||
const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0);
|
||||
|
||||
return {
|
||||
chunks,
|
||||
tokens
|
||||
};
|
||||
} catch (err) {
|
||||
throw new Error(getErrText(err));
|
||||
}
|
||||
};
|
11
projects/app/src/global/common/tiktoken/cl100k_base.json
Normal file
11
projects/app/src/global/common/tiktoken/cl100k_base.json
Normal file
File diff suppressed because one or more lines are too long
95
projects/app/src/global/common/tiktoken/index.ts
Normal file
95
projects/app/src/global/common/tiktoken/index.ts
Normal file
@@ -0,0 +1,95 @@
|
||||
/* Only the token of gpt-3.5-turbo is used */
|
||||
import { ChatItemType } from '@/types/chat';
|
||||
import { Tiktoken } from 'js-tiktoken/lite';
|
||||
import { adaptChat2GptMessages } from '@/utils/common/adapt/message';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constant';
|
||||
import encodingJson from './cl100k_base.json';
|
||||
|
||||
/* init tikToken obj */
|
||||
export function getTikTokenEnc() {
|
||||
if (typeof window !== 'undefined' && window.TikToken) {
|
||||
return window.TikToken;
|
||||
}
|
||||
if (typeof global !== 'undefined' && global.TikToken) {
|
||||
return global.TikToken;
|
||||
}
|
||||
|
||||
const enc = new Tiktoken(encodingJson);
|
||||
|
||||
if (typeof window !== 'undefined') {
|
||||
window.TikToken = enc;
|
||||
}
|
||||
if (typeof global !== 'undefined') {
|
||||
global.TikToken = enc;
|
||||
}
|
||||
|
||||
return enc;
|
||||
}
|
||||
|
||||
/* count one prompt tokens */
|
||||
export function countPromptTokens(
|
||||
prompt = '',
|
||||
role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
|
||||
) {
|
||||
const enc = getTikTokenEnc();
|
||||
const text = `${role}\n${prompt}`;
|
||||
try {
|
||||
const encodeText = enc.encode(text);
|
||||
return encodeText.length + 3; // 补充 role 估算值
|
||||
} catch (error) {
|
||||
return text.length;
|
||||
}
|
||||
}
|
||||
|
||||
/* count messages tokens */
|
||||
export function countMessagesTokens({ messages }: { messages: ChatItemType[] }) {
|
||||
const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
|
||||
|
||||
let totalTokens = 0;
|
||||
for (let i = 0; i < adaptMessages.length; i++) {
|
||||
const item = adaptMessages[i];
|
||||
const tokens = countPromptTokens(item.content, item.role);
|
||||
totalTokens += tokens;
|
||||
}
|
||||
|
||||
return totalTokens;
|
||||
}
|
||||
|
||||
export function sliceTextByTokens({ text, length }: { text: string; length: number }) {
|
||||
const enc = getTikTokenEnc();
|
||||
|
||||
try {
|
||||
const encodeText = enc.encode(text);
|
||||
return enc.decode(encodeText.slice(0, length));
|
||||
} catch (error) {
|
||||
return text.slice(0, length);
|
||||
}
|
||||
}
|
||||
|
||||
/* slice messages from top to bottom by maxTokens */
|
||||
export function sliceMessagesTB({
|
||||
messages,
|
||||
maxTokens
|
||||
}: {
|
||||
messages: ChatItemType[];
|
||||
maxTokens: number;
|
||||
}) {
|
||||
const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
|
||||
let reduceTokens = maxTokens;
|
||||
let result: ChatItemType[] = [];
|
||||
|
||||
for (let i = 0; i < adaptMessages.length; i++) {
|
||||
const item = adaptMessages[i];
|
||||
|
||||
const tokens = countPromptTokens(item.content, item.role);
|
||||
reduceTokens -= tokens;
|
||||
|
||||
if (reduceTokens > 0) {
|
||||
result.push(messages[i]);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result.length === 0 && messages[0] ? [messages[0]] : result;
|
||||
}
|
2
projects/app/src/global/core/api/aiReq.d.ts
vendored
2
projects/app/src/global/core/api/aiReq.d.ts
vendored
@@ -1,4 +1,4 @@
|
||||
import { ChatCompletionRequestMessage } from '@fastgpt/core/ai/type';
|
||||
import { ChatCompletionRequestMessage } from '@fastgpt/global/core/ai/type.d';
|
||||
|
||||
export type CreateQuestionGuideParams = {
|
||||
messages: ChatCompletionRequestMessage[];
|
||||
|
@@ -1,6 +1,5 @@
|
||||
export type AdminUpdateFeedbackParams = {
|
||||
import { MarkDataType } from '../dataset/type';
|
||||
|
||||
export type AdminUpdateFeedbackParams = MarkDataType & {
|
||||
chatItemId: string;
|
||||
kbId: string;
|
||||
dataId: string;
|
||||
content: string;
|
||||
};
|
||||
|
61
projects/app/src/global/core/api/datasetReq.d.ts
vendored
61
projects/app/src/global/core/api/datasetReq.d.ts
vendored
@@ -1,8 +1,9 @@
|
||||
import { DatasetTypeEnum } from '@fastgpt/core/dataset/constant';
|
||||
import { DatasetCollectionTypeEnum, DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import type { RequestPaging } from '@/types';
|
||||
import { TrainingModeEnum } from '@/constants/plugin';
|
||||
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
import type { SearchTestItemType } from '@/types/core/dataset';
|
||||
import { DatasetDataItemType } from '@/types/core/dataset/data';
|
||||
import { DatasetChunkItemType, UploadChunkItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
|
||||
/* ===== dataset ===== */
|
||||
export type DatasetUpdateParams = {
|
||||
@@ -22,38 +23,50 @@ export type CreateDatasetParams = {
|
||||
};
|
||||
|
||||
export type SearchTestProps = {
|
||||
kbId: string;
|
||||
datasetId: string;
|
||||
text: string;
|
||||
};
|
||||
|
||||
/* ======= file =========== */
|
||||
export type GetFileListProps = RequestPaging & {
|
||||
kbId: string;
|
||||
searchText: string;
|
||||
/* ======= collections =========== */
|
||||
export type GetDatasetCollectionsProps = RequestPaging & {
|
||||
datasetId: string;
|
||||
parentId?: string;
|
||||
searchText?: string;
|
||||
simple?: boolean;
|
||||
selectFolder?: boolean;
|
||||
};
|
||||
export type CreateDatasetCollectionParams = {
|
||||
datasetId: string;
|
||||
parentId?: string;
|
||||
name: string;
|
||||
type: `${DatasetCollectionTypeEnum}`;
|
||||
metadata?: DatasetCollectionSchemaType['metadata'];
|
||||
updateTime?: string;
|
||||
};
|
||||
export type UpdateDatasetCollectionParams = {
|
||||
id: string;
|
||||
parentId?: string;
|
||||
name?: string;
|
||||
metadata?: DatasetCollectionSchemaType['metadata'];
|
||||
};
|
||||
|
||||
export type UpdateFileProps = { id: string; name?: string; datasetUsed?: boolean };
|
||||
|
||||
export type MarkFileUsedProps = { fileIds: string[] };
|
||||
|
||||
/* ==== data ===== */
|
||||
export type SetOneDatasetDataProps = {
|
||||
id?: string;
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
q?: string; // embedding content
|
||||
a?: string; // bonus content
|
||||
};
|
||||
export type PushDataProps = {
|
||||
kbId: string;
|
||||
data: DatasetDataItemType[];
|
||||
collectionId: string;
|
||||
data: DatasetChunkItemType[];
|
||||
mode: `${TrainingModeEnum}`;
|
||||
prompt?: string;
|
||||
billId?: string;
|
||||
};
|
||||
|
||||
export type UpdateDatasetDataPrams = {
|
||||
dataId: string;
|
||||
kbId: string;
|
||||
a?: string;
|
||||
q?: string;
|
||||
};
|
||||
|
||||
export type GetDatasetDataListProps = RequestPaging & {
|
||||
kbId: string;
|
||||
searchText: string;
|
||||
fileId: string;
|
||||
searchText?: string;
|
||||
collectionId: string;
|
||||
};
|
||||
|
@@ -2,11 +2,11 @@ import type { RequestPaging } from '@/types';
|
||||
import { TrainingModeEnum } from '@/constants/plugin';
|
||||
import type { SearchTestItemType } from '@/types/core/dataset';
|
||||
import { DatasetDataItemType } from '@/types/core/dataset/data';
|
||||
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
|
||||
/* ===== dataset ===== */
|
||||
export type SearchTestResponseType = SearchTestItemType['results'];
|
||||
|
||||
/* ======= file =========== */
|
||||
/* ======= collection =========== */
|
||||
|
||||
/* ==== data ===== */
|
||||
export type PushDataResponse = {
|
||||
|
24
projects/app/src/global/core/app/modules/utils.ts
Normal file
24
projects/app/src/global/core/app/modules/utils.ts
Normal file
@@ -0,0 +1,24 @@
|
||||
import { SystemInputEnum } from '@/constants/app';
|
||||
import { FlowModuleTypeEnum } from '@/constants/flow';
|
||||
import { AppModuleItemType, VariableItemType } from '@/types/app';
|
||||
|
||||
export const getGuideModule = (modules: AppModuleItemType[]) =>
|
||||
modules.find((item) => item.flowType === FlowModuleTypeEnum.userGuide);
|
||||
|
||||
export const splitGuideModule = (guideModules?: AppModuleItemType) => {
|
||||
const welcomeText: string =
|
||||
guideModules?.inputs?.find((item) => item.key === SystemInputEnum.welcomeText)?.value || '';
|
||||
|
||||
const variableModules: VariableItemType[] =
|
||||
guideModules?.inputs.find((item) => item.key === SystemInputEnum.variables)?.value || [];
|
||||
|
||||
const questionGuide: boolean =
|
||||
guideModules?.inputs?.find((item) => item.key === SystemInputEnum.questionGuide)?.value ||
|
||||
false;
|
||||
|
||||
return {
|
||||
welcomeText,
|
||||
variableModules,
|
||||
questionGuide
|
||||
};
|
||||
};
|
5
projects/app/src/global/core/dataset/request.d.ts
vendored
Normal file
5
projects/app/src/global/core/dataset/request.d.ts
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
/* ================= dataset ===================== */
|
||||
|
||||
/* ================= collection ===================== */
|
||||
|
||||
/* ================= data ===================== */
|
23
projects/app/src/global/core/dataset/response.d.ts
vendored
Normal file
23
projects/app/src/global/core/dataset/response.d.ts
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
import { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type';
|
||||
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d';
|
||||
|
||||
/* ================= dataset ===================== */
|
||||
|
||||
/* ================= collection ===================== */
|
||||
export type DatasetCollectionsListItemType = {
|
||||
_id: string;
|
||||
parentId?: string;
|
||||
name: string;
|
||||
type: DatasetCollectionSchemaType['type'];
|
||||
updateTime: Date;
|
||||
dataAmount?: number;
|
||||
trainingAmount: number;
|
||||
metadata: DatasetCollectionSchemaType['metadata'];
|
||||
};
|
||||
|
||||
/* ================= data ===================== */
|
||||
export type DatasetDataListItemType = {
|
||||
id: string;
|
||||
q: string; // embedding content
|
||||
a: string; // bonus content
|
||||
};
|
7
projects/app/src/global/core/dataset/type.d.ts
vendored
Normal file
7
projects/app/src/global/core/dataset/type.d.ts
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
export type MarkDataType = {
|
||||
dataId: string;
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
q: string;
|
||||
a?: string;
|
||||
};
|
@@ -1,4 +1,4 @@
|
||||
import { PromptTemplateItem } from '@fastgpt/core/ai/type.d';
|
||||
import { PromptTemplateItem } from '@fastgpt/global/core/ai/type.d';
|
||||
|
||||
export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
|
||||
{
|
||||
@@ -9,7 +9,7 @@ export const Prompt_QuoteTemplateList: PromptTemplateItem[] = [
|
||||
{
|
||||
title: '全部变量',
|
||||
desc: '包含 q 和 a 两个变量的标准模板',
|
||||
value: `{instruction:"{{q}}",output:"{{a}}",source:"{{source}}",file_id:"{{file_id}}",index:"{{index}}"}`
|
||||
value: `{instruction:"{{q}}",output:"{{a}}",source:"{{source}}",sourceId:"{{sourceId}}",index:"{{index}}"}`
|
||||
}
|
||||
];
|
||||
|
||||
@@ -24,7 +24,7 @@ export const Prompt_QuotePromptList: PromptTemplateItem[] = [
|
||||
对话要求:
|
||||
1. 背景知识是最新的,其中 instruction 是相关介绍,output 是预期回答或补充。
|
||||
2. 使用背景知识回答问题。
|
||||
3. 背景知识无法满足问题时,你需严谨的回答问题。
|
||||
3. 使用对话的风格回答我的问题,答案要和背景知识表述一致。
|
||||
我的问题是:"{{question}}"`
|
||||
},
|
||||
{
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import type { OpenApiSchema } from '@fastgpt/support/openapi/type.d';
|
||||
import type { OpenApiSchema } from '@fastgpt/global/support/openapi/type';
|
||||
|
||||
export type GetApiKeyProps = {
|
||||
appId?: string;
|
||||
|
Reference in New Issue
Block a user