mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
V4.6.9-first commit (#899)
* perf: insert mongo dataset data session * perf: dataset data index * remove delay * rename bill schema * rename bill record * perf: bill table * perf: prompt * perf: sub plan * change the usage count * feat: usage bill * publish usages * doc * 新增团队聊天功能 (#20) * perf: doc * feat 添加标签部分 feat 信息团队标签配置 feat 新增团队同步管理 feat team分享页面 feat 完成team分享页面 feat 实现模糊搜索 style 格式化 fix 修复迷糊匹配 style 样式修改 fix 团队标签功能修复 * fix 修复鉴权功能 * merge 合并代码 * fix 修复引用错误 * fix 修复pr问题 * fix 修复ts格式问题 --------- Co-authored-by: archer <545436317@qq.com> Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> * update extra plan * fix: ts * format * perf: bill field * feat: standard plan * fix: ts * feat 个人账号页面修改 (#22) * feat 添加标签部分 feat 信息团队标签配置 feat 新增团队同步管理 feat team分享页面 feat 完成team分享页面 feat 实现模糊搜索 style 格式化 fix 修复迷糊匹配 style 样式修改 fix 团队标签功能修复 * fix 修复鉴权功能 * merge 合并代码 * fix 修复引用错误 * fix 修复pr问题 * fix 修复ts格式问题 * feat 修改个人账号页 --------- Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> * sub plan page (#23) * fix chunk index; error page text * feat: dataset process Integral prediction * feat: stand plan field * feat: sub plan limit * perf: index * query extension * perf: share link push app name * perf: plan point unit * perf: get sub plan * perf: account page * feat 新增套餐详情弹窗代码 (#24) * merge 合并代码 * fix 新增套餐详情弹框 * fix 修复pr问题 * feat: change http node input to prompt editor (#21) * feat: change http node input to prompt editor * fix * split PromptEditor to HttpInput * Team plans (#25) * perf: pay check * perf: team plan test * plan limit check * replace sensitive text * perf: fix some null * collection null check * perf: plans modal * perf: http module * pacakge (#26) * individuation page and pay modal amount (#27) * feat: individuation page * team chat config * pay modal * plan count and replace invalid chars (#29) * fix: user oneapi * fix: training queue * fix: qa queue * perf: remove space chars * replace invalid chars * change httpinput dropdown menu (#28) * perf: http * reseet free plan * perf: plan code to packages * remove llm config to package * perf: code * perf: faq * fix: get team plan --------- Co-authored-by: yst <77910600+yu-and-liu@users.noreply.github.com> Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com> Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
@@ -11,6 +11,7 @@ export const getAIApi = (props?: {
|
||||
timeout?: number;
|
||||
}) => {
|
||||
const { userKey, timeout } = props || {};
|
||||
|
||||
return new OpenAI({
|
||||
apiKey: userKey?.key || systemAIChatKey,
|
||||
baseURL: userKey?.baseUrl || baseUrl,
|
||||
|
@@ -1,5 +1,6 @@
|
||||
import { VectorModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
import { getAIApi } from '../config';
|
||||
import { replaceValidChars } from '../../chat/utils';
|
||||
|
||||
type GetVectorProps = {
|
||||
model: VectorModelItemType;
|
||||
@@ -36,7 +37,7 @@ export async function getVectorsByText({ model, input }: GetVectorProps) {
|
||||
}
|
||||
|
||||
return {
|
||||
charsLength: input.length,
|
||||
charsLength: replaceValidChars(input).length,
|
||||
vectors: await Promise.all(res.data.map((item) => unityDimensional(item.embedding)))
|
||||
};
|
||||
});
|
||||
|
@@ -1,159 +0,0 @@
|
||||
import { replaceVariable } from '@fastgpt/global/common/string/tools';
|
||||
import { getAIApi } from '../config';
|
||||
import { ChatItemType } from '@fastgpt/global/core/chat/type';
|
||||
|
||||
/*
|
||||
cfr: coreference resolution - 指代消除
|
||||
可以根据上下文,完事当前问题指代内容,利于检索。
|
||||
*/
|
||||
|
||||
const defaultPrompt = `请不要回答任何问题。
|
||||
你的任务是结合历史记录,为当前问题,实现代词替换,确保问题描述的对象清晰明确。例如:
|
||||
历史记录:
|
||||
"""
|
||||
Q: 对话背景。
|
||||
A: 关于 FatGPT 的介绍和使用等问题。
|
||||
"""
|
||||
当前问题: 怎么下载
|
||||
输出: FastGPT 怎么下载?
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: 报错 "no connection"
|
||||
A: FastGPT 报错"no connection"可能是因为……
|
||||
"""
|
||||
当前问题: 怎么解决
|
||||
输出: FastGPT 报错"no connection"如何解决?
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: 作者是谁?
|
||||
A: FastGPT 的作者是 labring。
|
||||
"""
|
||||
当前问题: 介绍下他
|
||||
输出: 介绍下 FastGPT 的作者 labring。
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: 作者是谁?
|
||||
A: FastGPT 的作者是 labring。
|
||||
"""
|
||||
当前问题: 我想购买商业版。
|
||||
输出: FastGPT 商业版如何购买?
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: 对话背景。
|
||||
A: 关于 FatGPT 的介绍和使用等问题。
|
||||
"""
|
||||
当前问题: nh
|
||||
输出: nh
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: FastGPT 如何收费?
|
||||
A: FastGPT 收费可以参考……
|
||||
"""
|
||||
当前问题: 你知道 laf 么?
|
||||
输出: 你知道 laf 么?
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: FastGPT 的优势
|
||||
A: 1. 开源
|
||||
2. 简便
|
||||
3. 扩展性强
|
||||
"""
|
||||
当前问题: 介绍下第2点。
|
||||
输出: 介绍下 FastGPT 简便的优势。
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: 什么是 FastGPT?
|
||||
A: FastGPT 是一个 RAG 平台。
|
||||
Q: 什么是 Sealos?
|
||||
A: Sealos 是一个云操作系统。
|
||||
"""
|
||||
当前问题: 它们有什么关系?
|
||||
输出: FastGPT 和 Sealos 有什么关系?
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
{{histories}}
|
||||
"""
|
||||
当前问题: {{query}}
|
||||
输出: `;
|
||||
|
||||
export const queryCfr = async ({
|
||||
chatBg,
|
||||
query,
|
||||
histories = [],
|
||||
model
|
||||
}: {
|
||||
chatBg?: string;
|
||||
query: string;
|
||||
histories: ChatItemType[];
|
||||
model: string;
|
||||
}) => {
|
||||
if (histories.length === 0 && !chatBg) {
|
||||
return {
|
||||
rawQuery: query,
|
||||
cfrQuery: query,
|
||||
model,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
|
||||
const systemFewShot = chatBg
|
||||
? `Q: 对话背景。
|
||||
A: ${chatBg}
|
||||
`
|
||||
: '';
|
||||
const historyFewShot = histories
|
||||
.map((item) => {
|
||||
const role = item.obj === 'Human' ? 'Q' : 'A';
|
||||
return `${role}: ${item.value}`;
|
||||
})
|
||||
.join('\n');
|
||||
const concatFewShot = `${systemFewShot}${historyFewShot}`.trim();
|
||||
|
||||
const ai = getAIApi({
|
||||
timeout: 480000
|
||||
});
|
||||
|
||||
const result = await ai.chat.completions.create({
|
||||
model: model,
|
||||
temperature: 0.01,
|
||||
max_tokens: 150,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: replaceVariable(defaultPrompt, {
|
||||
query: `${query}`,
|
||||
histories: concatFewShot
|
||||
})
|
||||
}
|
||||
],
|
||||
stream: false
|
||||
});
|
||||
|
||||
const answer = result.choices?.[0]?.message?.content || '';
|
||||
if (!answer) {
|
||||
return {
|
||||
rawQuery: query,
|
||||
cfrQuery: query,
|
||||
model,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
rawQuery: query,
|
||||
cfrQuery: answer,
|
||||
model,
|
||||
inputTokens: result.usage?.prompt_tokens || 0,
|
||||
outputTokens: result.usage?.completion_tokens || 0
|
||||
};
|
||||
};
|
@@ -1,5 +1,6 @@
|
||||
import type { ChatMessageItemType } from '@fastgpt/global/core/ai/type.d';
|
||||
import { getAIApi } from '../config';
|
||||
import { countGptMessagesChars } from '../../chat/utils';
|
||||
|
||||
export const Prompt_QuestionGuide = `我不太清楚问你什么问题,请帮我生成 3 个问题,引导我继续提问。问题的长度应小于20个字符,按 JSON 格式返回: ["问题1", "问题2", "问题3"]`;
|
||||
|
||||
@@ -10,6 +11,13 @@ export async function createQuestionGuide({
|
||||
messages: ChatMessageItemType[];
|
||||
model: string;
|
||||
}) {
|
||||
const concatMessages: ChatMessageItemType[] = [
|
||||
...messages,
|
||||
{
|
||||
role: 'user',
|
||||
content: Prompt_QuestionGuide
|
||||
}
|
||||
];
|
||||
const ai = getAIApi({
|
||||
timeout: 480000
|
||||
});
|
||||
@@ -17,28 +25,21 @@ export async function createQuestionGuide({
|
||||
model: model,
|
||||
temperature: 0.1,
|
||||
max_tokens: 200,
|
||||
messages: [
|
||||
...messages,
|
||||
{
|
||||
role: 'user',
|
||||
content: Prompt_QuestionGuide
|
||||
}
|
||||
],
|
||||
messages: concatMessages,
|
||||
stream: false
|
||||
});
|
||||
|
||||
const answer = data.choices?.[0]?.message?.content || '';
|
||||
const inputTokens = data.usage?.prompt_tokens || 0;
|
||||
const outputTokens = data.usage?.completion_tokens || 0;
|
||||
|
||||
const start = answer.indexOf('[');
|
||||
const end = answer.lastIndexOf(']');
|
||||
|
||||
const charsLength = countGptMessagesChars(concatMessages);
|
||||
|
||||
if (start === -1 || end === -1) {
|
||||
return {
|
||||
result: [],
|
||||
inputTokens,
|
||||
outputTokens
|
||||
charsLength: 0
|
||||
};
|
||||
}
|
||||
|
||||
@@ -50,14 +51,12 @@ export async function createQuestionGuide({
|
||||
try {
|
||||
return {
|
||||
result: JSON.parse(jsonStr),
|
||||
inputTokens,
|
||||
outputTokens
|
||||
charsLength
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
result: [],
|
||||
inputTokens,
|
||||
outputTokens
|
||||
charsLength: 0
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@@ -1,18 +1,19 @@
|
||||
import { replaceVariable } from '@fastgpt/global/common/string/tools';
|
||||
import { getAIApi } from '../config';
|
||||
import { ChatItemType } from '@fastgpt/global/core/chat/type';
|
||||
import { countGptMessagesChars } from '../../chat/utils';
|
||||
|
||||
/*
|
||||
query extension - 问题扩展
|
||||
可以根据上下文,消除指代性问题以及扩展问题,利于检索。
|
||||
*/
|
||||
|
||||
const defaultPrompt = `作为一个向量检索助手,你的任务是结合历史记录,从不同角度,为“原问题”生成个不同版本的“检索词”,从而提高向量检索的语义丰富度,提高向量检索的精度。生成的问题要求指向对象清晰明确。例如:
|
||||
const defaultPrompt = `作为一个向量检索助手,你的任务是结合历史记录,从不同角度,为“原问题”生成个不同版本的“检索词”,从而提高向量检索的语义丰富度,提高向量检索的精度。生成的问题要求指向对象清晰明确,并与原问题语言相同。例如:
|
||||
历史记录:
|
||||
"""
|
||||
"""
|
||||
原问题: 介绍下剧情。
|
||||
检索词: ["发生了什么故事?","故事梗概是什么?","讲述了什么故事?"]
|
||||
检索词: ["介绍下故事的背景和主要人物。","故事的主题是什么?","剧情是是如何发展的?"]
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
@@ -20,7 +21,7 @@ Q: 对话背景。
|
||||
A: 当前对话是关于 FatGPT 的介绍和使用等。
|
||||
"""
|
||||
原问题: 怎么下载
|
||||
检索词: ["FastGPT 怎么下载?","下载 FastGPT 需要什么条件?","有哪些渠道可以下载 FastGPT?"]
|
||||
检索词: ["FastGPT 如何下载?","下载 FastGPT 需要什么条件?","有哪些渠道可以下载 FastGPT?"]
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
@@ -30,15 +31,15 @@ Q: 报错 "no connection"
|
||||
A: 报错"no connection"可能是因为……
|
||||
"""
|
||||
原问题: 怎么解决
|
||||
检索词: ["FastGPT 报错"no connection"如何解决?", "报错 'no connection' 是什么原因?", "FastGPT提示'no connection',要怎么办?"]
|
||||
检索词: ["FastGPT 报错"no connection"如何解决?", "造成 'no connection' 报错的原因。", "FastGPT提示'no connection',要怎么办?"]
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
Q: 作者是谁?
|
||||
A: FastGPT 的作者是 labring。
|
||||
"""
|
||||
原问题: 介绍下他
|
||||
检索词: ["介绍下 FastGPT 的作者 labring。","作者 labring 的背景信息。","labring 为什么要做 FastGPT?"]
|
||||
原问题: Tell me about him
|
||||
检索词: ["Introduce labring, the author of FastGPT." ," Background information on author labring." "," Why does labring do FastGPT?"]
|
||||
----------------
|
||||
历史记录:
|
||||
"""
|
||||
@@ -105,8 +106,7 @@ export const queryExtension = async ({
|
||||
rawQuery: string;
|
||||
extensionQueries: string[];
|
||||
model: string;
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
charsLength: number;
|
||||
}> => {
|
||||
const systemFewShot = chatBg
|
||||
? `Q: 对话背景。
|
||||
@@ -125,18 +125,20 @@ A: ${chatBg}
|
||||
timeout: 480000
|
||||
});
|
||||
|
||||
const messages = [
|
||||
{
|
||||
role: 'user',
|
||||
content: replaceVariable(defaultPrompt, {
|
||||
query: `${query}`,
|
||||
histories: concatFewShot
|
||||
})
|
||||
}
|
||||
];
|
||||
const result = await ai.chat.completions.create({
|
||||
model: model,
|
||||
temperature: 0.01,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: replaceVariable(defaultPrompt, {
|
||||
query: `${query}`,
|
||||
histories: concatFewShot
|
||||
})
|
||||
}
|
||||
],
|
||||
// @ts-ignore
|
||||
messages,
|
||||
stream: false
|
||||
});
|
||||
|
||||
@@ -146,8 +148,7 @@ A: ${chatBg}
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
model,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
charsLength: 0
|
||||
};
|
||||
}
|
||||
|
||||
@@ -160,8 +161,7 @@ A: ${chatBg}
|
||||
rawQuery: query,
|
||||
extensionQueries: queries,
|
||||
model,
|
||||
inputTokens: result.usage?.prompt_tokens || 0,
|
||||
outputTokens: result.usage?.completion_tokens || 0
|
||||
charsLength: countGptMessagesChars(messages)
|
||||
};
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
@@ -169,8 +169,7 @@ A: ${chatBg}
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
model,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0
|
||||
charsLength: 0
|
||||
};
|
||||
}
|
||||
};
|
||||
|
42
packages/service/core/ai/model.ts
Normal file
42
packages/service/core/ai/model.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
export const getLLMModel = (model?: string) => {
|
||||
return global.llmModels.find((item) => item.model === model) ?? global.llmModels[0];
|
||||
};
|
||||
export const getDatasetModel = (model?: string) => {
|
||||
return (
|
||||
global.llmModels?.filter((item) => item.datasetProcess)?.find((item) => item.model === model) ??
|
||||
global.llmModels[0]
|
||||
);
|
||||
};
|
||||
|
||||
export const getVectorModel = (model?: string) => {
|
||||
return global.vectorModels.find((item) => item.model === model) || global.vectorModels[0];
|
||||
};
|
||||
|
||||
export function getAudioSpeechModel(model?: string) {
|
||||
return (
|
||||
global.audioSpeechModels.find((item) => item.model === model) || global.audioSpeechModels[0]
|
||||
);
|
||||
}
|
||||
|
||||
export function getWhisperModel(model?: string) {
|
||||
return global.whisperModel;
|
||||
}
|
||||
|
||||
export function getReRankModel(model?: string) {
|
||||
return global.reRankModels.find((item) => item.model === model);
|
||||
}
|
||||
|
||||
export enum ModelTypeEnum {
|
||||
llm = 'llm',
|
||||
vector = 'vector',
|
||||
audioSpeech = 'audioSpeech',
|
||||
whisper = 'whisper',
|
||||
rerank = 'rerank'
|
||||
}
|
||||
export const getModelMap = {
|
||||
[ModelTypeEnum.llm]: getLLMModel,
|
||||
[ModelTypeEnum.vector]: getVectorModel,
|
||||
[ModelTypeEnum.audioSpeech]: getAudioSpeechModel,
|
||||
[ModelTypeEnum.whisper]: getWhisperModel,
|
||||
[ModelTypeEnum.rerank]: getReRankModel
|
||||
};
|
@@ -61,6 +61,9 @@ const AppSchema = new Schema({
|
||||
type: String,
|
||||
enum: Object.keys(PermissionTypeMap),
|
||||
default: PermissionTypeEnum.private
|
||||
},
|
||||
teamTags: {
|
||||
type: [String]
|
||||
}
|
||||
});
|
||||
|
||||
|
@@ -92,6 +92,8 @@ try {
|
||||
ChatItemSchema.index({ appId: 1, chatId: 1, dataId: 1 }, { background: true });
|
||||
// admin charts
|
||||
ChatItemSchema.index({ time: -1, obj: 1 }, { background: true });
|
||||
// timer, clear history
|
||||
ChatItemSchema.index({ teamId: 1, time: -1 }, { background: true });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
@@ -83,6 +83,9 @@ try {
|
||||
ChatSchema.index({ teamId: 1, appId: 1, updateTime: -1 }, { background: true });
|
||||
// get share chat history
|
||||
ChatSchema.index({ shareId: 1, outLinkUid: 1, updateTime: -1, source: 1 }, { background: true });
|
||||
|
||||
// timer, clear history
|
||||
ChatSchema.index({ teamId: 1, updateTime: -1 }, { background: true });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
@@ -2,7 +2,10 @@ import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
|
||||
import { ChatRoleEnum, IMG_BLOCK_KEY } from '@fastgpt/global/core/chat/constants';
|
||||
import { countMessagesTokens, countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
|
||||
import { adaptRole_Chat2Message } from '@fastgpt/global/core/chat/adapt';
|
||||
import type { ChatCompletionContentPart } from '@fastgpt/global/core/ai/type.d';
|
||||
import type {
|
||||
ChatCompletionContentPart,
|
||||
ChatMessageItemType
|
||||
} from '@fastgpt/global/core/ai/type.d';
|
||||
import axios from 'axios';
|
||||
|
||||
/* slice chat context by tokens */
|
||||
@@ -56,6 +59,16 @@ export function ChatContextFilter({
|
||||
return [...systemPrompts, ...chats];
|
||||
}
|
||||
|
||||
export const replaceValidChars = (str: string) => {
|
||||
const reg = /[\s\r\n]+/g;
|
||||
return str.replace(reg, '');
|
||||
};
|
||||
export const countMessagesChars = (messages: ChatItemType[]) => {
|
||||
return messages.reduce((sum, item) => sum + replaceValidChars(item.value).length, 0);
|
||||
};
|
||||
export const countGptMessagesChars = (messages: ChatMessageItemType[]) =>
|
||||
messages.reduce((sum, item) => sum + replaceValidChars(item.content).length, 0);
|
||||
|
||||
/**
|
||||
string to vision model. Follow the markdown code block rule for interception:
|
||||
|
||||
|
@@ -147,8 +147,6 @@ export async function delCollectionAndRelatedSources({
|
||||
collectionId: { $in: collectionIds }
|
||||
});
|
||||
|
||||
await delay(2000);
|
||||
|
||||
// delete dataset.datas
|
||||
await MongoDatasetData.deleteMany({ teamId, collectionId: { $in: collectionIds } }, { session });
|
||||
// delete imgs
|
||||
|
@@ -66,6 +66,11 @@ export async function delDatasetRelevantData({
|
||||
if (!datasets.length) return;
|
||||
|
||||
const teamId = datasets[0].teamId;
|
||||
|
||||
if (!teamId) {
|
||||
return Promise.reject('teamId is required');
|
||||
}
|
||||
|
||||
const datasetIds = datasets.map((item) => String(item._id));
|
||||
|
||||
// Get _id, teamId, fileId, metadata.relatedImgId for all collections
|
||||
|
@@ -7,10 +7,6 @@ import {
|
||||
} from '@fastgpt/global/support/user/team/constant';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import { DatasetColCollectionName } from '../collection/schema';
|
||||
import {
|
||||
DatasetDataIndexTypeEnum,
|
||||
DatasetDataIndexTypeMap
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
export const DatasetDataCollectionName = 'dataset.datas';
|
||||
|
||||
@@ -54,11 +50,6 @@ const DatasetDataSchema = new Schema({
|
||||
type: Boolean,
|
||||
default: false
|
||||
},
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetDataIndexTypeMap),
|
||||
default: DatasetDataIndexTypeEnum.custom
|
||||
},
|
||||
dataId: {
|
||||
type: String,
|
||||
required: true
|
||||
|
@@ -14,22 +14,54 @@ export const datasetSearchQueryExtension = async ({
|
||||
extensionBg?: string;
|
||||
histories?: ChatItemType[];
|
||||
}) => {
|
||||
// concat query
|
||||
let queries = [query];
|
||||
let rewriteQuery =
|
||||
histories.length > 0
|
||||
? `${histories
|
||||
.map((item) => {
|
||||
return `${item.obj}: ${item.value}`;
|
||||
})
|
||||
.join('\n')}
|
||||
Human: ${query}
|
||||
`
|
||||
: query;
|
||||
const filterSamQuery = (queries: string[]) => {
|
||||
const set = new Set<string>();
|
||||
const filterSameQueries = queries.filter((item) => {
|
||||
// 删除所有的标点符号与空格等,只对文本进行比较
|
||||
const str = hashStr(item.replace(/[^\p{L}\p{N}]/gu, ''));
|
||||
if (set.has(str)) return false;
|
||||
set.add(str);
|
||||
return true;
|
||||
});
|
||||
|
||||
return filterSameQueries;
|
||||
};
|
||||
|
||||
let { queries, rewriteQuery, alreadyExtension } = (() => {
|
||||
// concat query
|
||||
let rewriteQuery =
|
||||
histories.length > 0
|
||||
? `${histories
|
||||
.map((item) => {
|
||||
return `${item.obj}: ${item.value}`;
|
||||
})
|
||||
.join('\n')}
|
||||
Human: ${query}
|
||||
`
|
||||
: query;
|
||||
|
||||
/* if query already extension, direct parse */
|
||||
try {
|
||||
const jsonParse = JSON.parse(query);
|
||||
const queries: string[] = Array.isArray(jsonParse) ? filterSamQuery(jsonParse) : [query];
|
||||
const alreadyExtension = Array.isArray(jsonParse);
|
||||
return {
|
||||
queries,
|
||||
rewriteQuery: alreadyExtension ? queries.join('\n') : rewriteQuery,
|
||||
alreadyExtension: alreadyExtension
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
queries: [query],
|
||||
rewriteQuery,
|
||||
alreadyExtension: false
|
||||
};
|
||||
}
|
||||
})();
|
||||
|
||||
// ai extension
|
||||
const aiExtensionResult = await (async () => {
|
||||
if (!extensionModel) return;
|
||||
if (!extensionModel || alreadyExtension) return;
|
||||
const result = await queryExtension({
|
||||
chatBg: extensionBg,
|
||||
query,
|
||||
@@ -39,23 +71,13 @@ export const datasetSearchQueryExtension = async ({
|
||||
if (result.extensionQueries?.length === 0) return;
|
||||
return result;
|
||||
})();
|
||||
|
||||
if (aiExtensionResult) {
|
||||
queries = queries.concat(aiExtensionResult.extensionQueries);
|
||||
queries = filterSamQuery(queries.concat(aiExtensionResult.extensionQueries));
|
||||
rewriteQuery = queries.join('\n');
|
||||
}
|
||||
|
||||
const set = new Set<string>();
|
||||
const filterSameQueries = queries.filter((item) => {
|
||||
// 删除所有的标点符号与空格等,只对文本进行比较
|
||||
const str = hashStr(item.replace(/[^\p{L}\p{N}]/gu, ''));
|
||||
if (set.has(str)) return false;
|
||||
set.add(str);
|
||||
return true;
|
||||
});
|
||||
|
||||
return {
|
||||
concatQueries: filterSameQueries,
|
||||
concatQueries: queries,
|
||||
rewriteQuery,
|
||||
aiExtensionResult
|
||||
};
|
||||
|
@@ -11,7 +11,7 @@ import { simpleText } from '@fastgpt/global/common/string/tools';
|
||||
import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
|
||||
import type { VectorModelItemType, LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
|
||||
|
||||
export const lockTrainingDataByTeamId = async (teamId: string, retry = 3): Promise<any> => {
|
||||
export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> => {
|
||||
try {
|
||||
await MongoDatasetTraining.updateMany(
|
||||
{
|
||||
@@ -21,13 +21,7 @@ export const lockTrainingDataByTeamId = async (teamId: string, retry = 3): Promi
|
||||
lockTime: new Date('2999/5/5')
|
||||
}
|
||||
);
|
||||
} catch (error) {
|
||||
if (retry > 0) {
|
||||
await delay(1000);
|
||||
return lockTrainingDataByTeamId(teamId, retry - 1);
|
||||
}
|
||||
return Promise.reject(error);
|
||||
}
|
||||
} catch (error) {}
|
||||
};
|
||||
|
||||
export async function pushDataListToTrainingQueue({
|
||||
@@ -51,17 +45,15 @@ export async function pushDataListToTrainingQueue({
|
||||
datasetId: { _id: datasetId, vectorModel, agentModel }
|
||||
} = await getCollectionWithDataset(collectionId);
|
||||
|
||||
const checkModelValid = async ({ collectionId }: { collectionId: string }) => {
|
||||
if (!collectionId) return Promise.reject(`CollectionId is empty`);
|
||||
|
||||
const checkModelValid = async () => {
|
||||
if (trainingMode === TrainingModeEnum.chunk) {
|
||||
const vectorModelData = vectorModelList?.find((item) => item.model === vectorModel);
|
||||
if (!vectorModelData) {
|
||||
return Promise.reject(`Model ${vectorModel} is inValid`);
|
||||
return Promise.reject(`File model ${vectorModel} is inValid`);
|
||||
}
|
||||
|
||||
return {
|
||||
maxToken: vectorModelData.maxToken * 1.5,
|
||||
maxToken: vectorModelData.maxToken * 1.3,
|
||||
model: vectorModelData.model,
|
||||
weight: vectorModelData.weight
|
||||
};
|
||||
@@ -70,7 +62,7 @@ export async function pushDataListToTrainingQueue({
|
||||
if (trainingMode === TrainingModeEnum.qa) {
|
||||
const qaModelData = datasetModelList?.find((item) => item.model === agentModel);
|
||||
if (!qaModelData) {
|
||||
return Promise.reject(`Model ${agentModel} is inValid`);
|
||||
return Promise.reject(`Vector model ${agentModel} is inValid`);
|
||||
}
|
||||
return {
|
||||
maxToken: qaModelData.maxContext * 0.8,
|
||||
@@ -81,9 +73,7 @@ export async function pushDataListToTrainingQueue({
|
||||
return Promise.reject(`Training mode "${trainingMode}" is inValid`);
|
||||
};
|
||||
|
||||
const { model, maxToken, weight } = await checkModelValid({
|
||||
collectionId
|
||||
});
|
||||
const { model, maxToken, weight } = await checkModelValid();
|
||||
|
||||
// format q and a, remove empty char
|
||||
data.forEach((item) => {
|
||||
|
@@ -2,7 +2,7 @@
|
||||
import { connectionMongo, type Model } from '../../../common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import { DatasetDataIndexTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
|
||||
import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
|
||||
import { DatasetColCollectionName } from '../collection/schema';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import {
|
||||
@@ -86,11 +86,6 @@ const TrainingDataSchema = new Schema({
|
||||
indexes: {
|
||||
type: [
|
||||
{
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetDataIndexTypeMap),
|
||||
required: true
|
||||
},
|
||||
text: {
|
||||
type: String,
|
||||
required: true
|
||||
|
Reference in New Issue
Block a user