Revert "sub plan page (#885)" (#886)

This reverts commit 443ad37b6a.
This commit is contained in:
Archer
2024-02-23 17:48:15 +08:00
committed by GitHub
parent 443ad37b6a
commit fd9b6291af
246 changed files with 4281 additions and 6286 deletions

View File

@@ -11,7 +11,6 @@ export const getAIApi = (props?: {
timeout?: number;
}) => {
const { userKey, timeout } = props || {};
return new OpenAI({
apiKey: userKey?.key || systemAIChatKey,
baseURL: userKey?.baseUrl || baseUrl,

View File

@@ -0,0 +1,159 @@
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { getAIApi } from '../config';
import { ChatItemType } from '@fastgpt/global/core/chat/type';
/*
cfr: coreference resolution - 指代消除
可以根据上下文,完事当前问题指代内容,利于检索。
*/
const defaultPrompt = `请不要回答任何问题。
你的任务是结合历史记录,为当前问题,实现代词替换,确保问题描述的对象清晰明确。例如:
历史记录:
"""
Q: 对话背景。
A: 关于 FatGPT 的介绍和使用等问题。
"""
当前问题: 怎么下载
输出: FastGPT 怎么下载?
----------------
历史记录:
"""
Q: 报错 "no connection"
A: FastGPT 报错"no connection"可能是因为……
"""
当前问题: 怎么解决
输出: FastGPT 报错"no connection"如何解决?
----------------
历史记录:
"""
Q: 作者是谁?
A: FastGPT 的作者是 labring。
"""
当前问题: 介绍下他
输出: 介绍下 FastGPT 的作者 labring。
----------------
历史记录:
"""
Q: 作者是谁?
A: FastGPT 的作者是 labring。
"""
当前问题: 我想购买商业版。
输出: FastGPT 商业版如何购买?
----------------
历史记录:
"""
Q: 对话背景。
A: 关于 FatGPT 的介绍和使用等问题。
"""
当前问题: nh
输出: nh
----------------
历史记录:
"""
Q: FastGPT 如何收费?
A: FastGPT 收费可以参考……
"""
当前问题: 你知道 laf 么?
输出: 你知道 laf 么?
----------------
历史记录:
"""
Q: FastGPT 的优势
A: 1. 开源
2. 简便
3. 扩展性强
"""
当前问题: 介绍下第2点。
输出: 介绍下 FastGPT 简便的优势。
----------------
历史记录:
"""
Q: 什么是 FastGPT
A: FastGPT 是一个 RAG 平台。
Q: 什么是 Sealos
A: Sealos 是一个云操作系统。
"""
当前问题: 它们有什么关系?
输出: FastGPT 和 Sealos 有什么关系?
----------------
历史记录:
"""
{{histories}}
"""
当前问题: {{query}}
输出: `;
export const queryCfr = async ({
chatBg,
query,
histories = [],
model
}: {
chatBg?: string;
query: string;
histories: ChatItemType[];
model: string;
}) => {
if (histories.length === 0 && !chatBg) {
return {
rawQuery: query,
cfrQuery: query,
model,
inputTokens: 0,
outputTokens: 0
};
}
const systemFewShot = chatBg
? `Q: 对话背景。
A: ${chatBg}
`
: '';
const historyFewShot = histories
.map((item) => {
const role = item.obj === 'Human' ? 'Q' : 'A';
return `${role}: ${item.value}`;
})
.join('\n');
const concatFewShot = `${systemFewShot}${historyFewShot}`.trim();
const ai = getAIApi({
timeout: 480000
});
const result = await ai.chat.completions.create({
model: model,
temperature: 0.01,
max_tokens: 150,
messages: [
{
role: 'user',
content: replaceVariable(defaultPrompt, {
query: `${query}`,
histories: concatFewShot
})
}
],
stream: false
});
const answer = result.choices?.[0]?.message?.content || '';
if (!answer) {
return {
rawQuery: query,
cfrQuery: query,
model,
inputTokens: 0,
outputTokens: 0
};
}
return {
rawQuery: query,
cfrQuery: answer,
model,
inputTokens: result.usage?.prompt_tokens || 0,
outputTokens: result.usage?.completion_tokens || 0
};
};

View File

@@ -1,6 +1,5 @@
import type { ChatMessageItemType } from '@fastgpt/global/core/ai/type.d';
import { getAIApi } from '../config';
import { countGptMessagesChars } from '../../chat/utils';
export const Prompt_QuestionGuide = `我不太清楚问你什么问题,请帮我生成 3 个问题引导我继续提问。问题的长度应小于20个字符按 JSON 格式返回: ["问题1", "问题2", "问题3"]`;
@@ -11,13 +10,6 @@ export async function createQuestionGuide({
messages: ChatMessageItemType[];
model: string;
}) {
const concatMessages: ChatMessageItemType[] = [
...messages,
{
role: 'user',
content: Prompt_QuestionGuide
}
];
const ai = getAIApi({
timeout: 480000
});
@@ -25,21 +17,28 @@ export async function createQuestionGuide({
model: model,
temperature: 0.1,
max_tokens: 200,
messages: concatMessages,
messages: [
...messages,
{
role: 'user',
content: Prompt_QuestionGuide
}
],
stream: false
});
const answer = data.choices?.[0]?.message?.content || '';
const inputTokens = data.usage?.prompt_tokens || 0;
const outputTokens = data.usage?.completion_tokens || 0;
const start = answer.indexOf('[');
const end = answer.lastIndexOf(']');
const charsLength = countGptMessagesChars(concatMessages);
if (start === -1 || end === -1) {
return {
result: [],
charsLength: 0
inputTokens,
outputTokens
};
}
@@ -51,12 +50,14 @@ export async function createQuestionGuide({
try {
return {
result: JSON.parse(jsonStr),
charsLength
inputTokens,
outputTokens
};
} catch (error) {
return {
result: [],
charsLength: 0
inputTokens,
outputTokens
};
}
}

View File

@@ -1,19 +1,18 @@
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { getAIApi } from '../config';
import { ChatItemType } from '@fastgpt/global/core/chat/type';
import { countGptMessagesChars } from '../../chat/utils';
/*
query extension - 问题扩展
可以根据上下文,消除指代性问题以及扩展问题,利于检索。
*/
const defaultPrompt = `作为一个向量检索助手,你的任务是结合历史记录,从不同角度,为“原问题”生成个不同版本的“检索词”,从而提高向量检索的语义丰富度,提高向量检索的精度。生成的问题要求指向对象清晰明确,并与原问题语言相同。例如:
const defaultPrompt = `作为一个向量检索助手,你的任务是结合历史记录,从不同角度,为“原问题”生成个不同版本的“检索词”,从而提高向量检索的语义丰富度,提高向量检索的精度。生成的问题要求指向对象清晰明确。例如:
历史记录:
"""
"""
原问题: 介绍下剧情。
检索词: ["介绍下故事的背景和主要人物。","故事的主题是什么?","剧情是是如何发展的"]
检索词: ["发生了什么故事?","故事梗概是什么?","讲述了什么故事"]
----------------
历史记录:
"""
@@ -21,7 +20,7 @@ Q: 对话背景。
A: 当前对话是关于 FatGPT 的介绍和使用等。
"""
原问题: 怎么下载
检索词: ["FastGPT 如何下载?","下载 FastGPT 需要什么条件?","有哪些渠道可以下载 FastGPT"]
检索词: ["FastGPT 怎么下载?","下载 FastGPT 需要什么条件?","有哪些渠道可以下载 FastGPT"]
----------------
历史记录:
"""
@@ -31,15 +30,15 @@ Q: 报错 "no connection"
A: 报错"no connection"可能是因为……
"""
原问题: 怎么解决
检索词: ["FastGPT 报错"no connection"如何解决?", "造成 'no connection' 报错的原因", "FastGPT提示'no connection',要怎么办?"]
检索词: ["FastGPT 报错"no connection"如何解决?", "报错 'no connection' 是什么原因", "FastGPT提示'no connection',要怎么办?"]
----------------
历史记录:
"""
Q: 作者是谁?
A: FastGPT 的作者是 labring。
"""
原问题: Tell me about him
检索词: ["Introduce labring, the author of FastGPT." ," Background information on author labring." "," Why does labring do FastGPT?"]
原问题: 介绍下他
检索词: ["介绍下 FastGPT 的作者 labring。","作者 labring 的背景信息。","labring 为什么要做 FastGPT?"]
----------------
历史记录:
"""
@@ -106,7 +105,8 @@ export const queryExtension = async ({
rawQuery: string;
extensionQueries: string[];
model: string;
charsLength: number;
inputTokens: number;
outputTokens: number;
}> => {
const systemFewShot = chatBg
? `Q: 对话背景。
@@ -125,20 +125,18 @@ A: ${chatBg}
timeout: 480000
});
const messages = [
{
role: 'user',
content: replaceVariable(defaultPrompt, {
query: `${query}`,
histories: concatFewShot
})
}
];
const result = await ai.chat.completions.create({
model: model,
temperature: 0.01,
// @ts-ignore
messages,
messages: [
{
role: 'user',
content: replaceVariable(defaultPrompt, {
query: `${query}`,
histories: concatFewShot
})
}
],
stream: false
});
@@ -148,7 +146,8 @@ A: ${chatBg}
rawQuery: query,
extensionQueries: [],
model,
charsLength: 0
inputTokens: 0,
outputTokens: 0
};
}
@@ -161,7 +160,8 @@ A: ${chatBg}
rawQuery: query,
extensionQueries: queries,
model,
charsLength: countGptMessagesChars(messages)
inputTokens: result.usage?.prompt_tokens || 0,
outputTokens: result.usage?.completion_tokens || 0
};
} catch (error) {
console.log(error);
@@ -169,7 +169,8 @@ A: ${chatBg}
rawQuery: query,
extensionQueries: [],
model,
charsLength: 0
inputTokens: 0,
outputTokens: 0
};
}
};

View File

@@ -61,9 +61,6 @@ const AppSchema = new Schema({
type: String,
enum: Object.keys(PermissionTypeMap),
default: PermissionTypeEnum.private
},
teamTags: {
type: [String]
}
});

View File

@@ -92,8 +92,6 @@ try {
ChatItemSchema.index({ appId: 1, chatId: 1, dataId: 1 }, { background: true });
// admin charts
ChatItemSchema.index({ time: -1, obj: 1 }, { background: true });
// timer, clear history
ChatItemSchema.index({ teamId: 1, time: -1 }, { background: true });
} catch (error) {
console.log(error);
}

View File

@@ -83,9 +83,6 @@ try {
ChatSchema.index({ teamId: 1, appId: 1, updateTime: -1 }, { background: true });
// get share chat history
ChatSchema.index({ shareId: 1, outLinkUid: 1, updateTime: -1, source: 1 }, { background: true });
// timer, clear history
ChatSchema.index({ teamId: 1, updateTime: -1 }, { background: true });
} catch (error) {
console.log(error);
}

View File

@@ -2,10 +2,7 @@ import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
import { ChatRoleEnum, IMG_BLOCK_KEY } from '@fastgpt/global/core/chat/constants';
import { countMessagesTokens, countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
import { adaptRole_Chat2Message } from '@fastgpt/global/core/chat/adapt';
import type {
ChatCompletionContentPart,
ChatMessageItemType
} from '@fastgpt/global/core/ai/type.d';
import type { ChatCompletionContentPart } from '@fastgpt/global/core/ai/type.d';
import axios from 'axios';
/* slice chat context by tokens */
@@ -59,12 +56,6 @@ export function ChatContextFilter({
return [...systemPrompts, ...chats];
}
export const countMessagesChars = (messages: ChatItemType[]) => {
return messages.reduce((sum, item) => sum + item.value.length, 0);
};
export const countGptMessagesChars = (messages: ChatMessageItemType[]) =>
messages.reduce((sum, item) => sum + item.content.length, 0);
/**
string to vision model. Follow the markdown code block rule for interception:

View File

@@ -147,6 +147,8 @@ export async function delCollectionAndRelatedSources({
collectionId: { $in: collectionIds }
});
await delay(2000);
// delete dataset.datas
await MongoDatasetData.deleteMany({ teamId, collectionId: { $in: collectionIds } }, { session });
// delete imgs

View File

@@ -66,11 +66,6 @@ export async function delDatasetRelevantData({
if (!datasets.length) return;
const teamId = datasets[0].teamId;
if (!teamId) {
return Promise.reject('teamId is required');
}
const datasetIds = datasets.map((item) => String(item._id));
// Get _id, teamId, fileId, metadata.relatedImgId for all collections

View File

@@ -7,6 +7,10 @@ import {
} from '@fastgpt/global/support/user/team/constant';
import { DatasetCollectionName } from '../schema';
import { DatasetColCollectionName } from '../collection/schema';
import {
DatasetDataIndexTypeEnum,
DatasetDataIndexTypeMap
} from '@fastgpt/global/core/dataset/constants';
export const DatasetDataCollectionName = 'dataset.datas';
@@ -50,6 +54,11 @@ const DatasetDataSchema = new Schema({
type: Boolean,
default: false
},
type: {
type: String,
enum: Object.keys(DatasetDataIndexTypeMap),
default: DatasetDataIndexTypeEnum.custom
},
dataId: {
type: String,
required: true

View File

@@ -14,54 +14,22 @@ export const datasetSearchQueryExtension = async ({
extensionBg?: string;
histories?: ChatItemType[];
}) => {
const filterSamQuery = (queries: string[]) => {
const set = new Set<string>();
const filterSameQueries = queries.filter((item) => {
// 删除所有的标点符号与空格等,只对文本进行比较
const str = hashStr(item.replace(/[^\p{L}\p{N}]/gu, ''));
if (set.has(str)) return false;
set.add(str);
return true;
});
return filterSameQueries;
};
let { queries, rewriteQuery, alreadyExtension } = (() => {
// concat query
let rewriteQuery =
histories.length > 0
? `${histories
.map((item) => {
return `${item.obj}: ${item.value}`;
})
.join('\n')}
Human: ${query}
`
: query;
/* if query already extension, direct parse */
try {
const jsonParse = JSON.parse(query);
const queries: string[] = Array.isArray(jsonParse) ? filterSamQuery(jsonParse) : [query];
const alreadyExtension = Array.isArray(jsonParse);
return {
queries,
rewriteQuery: alreadyExtension ? queries.join('\n') : rewriteQuery,
alreadyExtension: alreadyExtension
};
} catch (error) {
return {
queries: [query],
rewriteQuery,
alreadyExtension: false
};
}
})();
// concat query
let queries = [query];
let rewriteQuery =
histories.length > 0
? `${histories
.map((item) => {
return `${item.obj}: ${item.value}`;
})
.join('\n')}
Human: ${query}
`
: query;
// ai extension
const aiExtensionResult = await (async () => {
if (!extensionModel || alreadyExtension) return;
if (!extensionModel) return;
const result = await queryExtension({
chatBg: extensionBg,
query,
@@ -71,13 +39,23 @@ export const datasetSearchQueryExtension = async ({
if (result.extensionQueries?.length === 0) return;
return result;
})();
if (aiExtensionResult) {
queries = filterSamQuery(queries.concat(aiExtensionResult.extensionQueries));
queries = queries.concat(aiExtensionResult.extensionQueries);
rewriteQuery = queries.join('\n');
}
const set = new Set<string>();
const filterSameQueries = queries.filter((item) => {
// 删除所有的标点符号与空格等,只对文本进行比较
const str = hashStr(item.replace(/[^\p{L}\p{N}]/gu, ''));
if (set.has(str)) return false;
set.add(str);
return true;
});
return {
concatQueries: queries,
concatQueries: filterSameQueries,
rewriteQuery,
aiExtensionResult
};

View File

@@ -57,7 +57,7 @@ export async function pushDataListToTrainingQueue({
if (trainingMode === TrainingModeEnum.chunk) {
const vectorModelData = vectorModelList?.find((item) => item.model === vectorModel);
if (!vectorModelData) {
return Promise.reject(`File model ${vectorModel} is inValid`);
return Promise.reject(`Model ${vectorModel} is inValid`);
}
return {
@@ -70,7 +70,7 @@ export async function pushDataListToTrainingQueue({
if (trainingMode === TrainingModeEnum.qa) {
const qaModelData = datasetModelList?.find((item) => item.model === agentModel);
if (!qaModelData) {
return Promise.reject(`Vector model ${agentModel} is inValid`);
return Promise.reject(`Model ${agentModel} is inValid`);
}
return {
maxToken: qaModelData.maxContext * 0.8,

View File

@@ -2,7 +2,7 @@
import { connectionMongo, type Model } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import { DatasetDataIndexTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
import { DatasetColCollectionName } from '../collection/schema';
import { DatasetCollectionName } from '../schema';
import {
@@ -86,6 +86,11 @@ const TrainingDataSchema = new Schema({
indexes: {
type: [
{
type: {
type: String,
enum: Object.keys(DatasetDataIndexTypeMap),
required: true
},
text: {
type: String,
required: true