4.6.8-production (#822)

* Json completion (#16)

* json-completion

* fix duplicate

* fix

* fix: config json

* feat: query extension

* perf: i18n

* 468 doc

* json editor

* perf: doc

* perf: default extension model

* docker file

* doc

* perf: token count

* perf: search extension

* format

* perf: some constants data

---------

Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-02-05 00:51:46 +08:00
committed by GitHub
parent ec8e2512bc
commit 51bbdf26a3
68 changed files with 4118 additions and 3787 deletions

View File

@@ -35,6 +35,7 @@ import type {
} from '@fastgpt/global/core/dataset/api.d';
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { getVectorModel } from '../../ai/model';
import { ModuleInputKeyEnum } from '@fastgpt/global/core/module/constants';
export async function pushDataToTrainingQueue(
props: {
@@ -272,7 +273,7 @@ export async function updateData2Dataset({
};
}
export async function searchDatasetData(props: {
type SearchDatasetDataProps = {
teamId: string;
model: string;
similarity?: number; // min distance
@@ -280,12 +281,14 @@ export async function searchDatasetData(props: {
datasetIds: string[];
searchMode?: `${DatasetSearchModeEnum}`;
usingReRank?: boolean;
rawQuery: string;
reRankQuery: string;
queries: string[];
}) {
};
export async function searchDatasetData(props: SearchDatasetDataProps) {
let {
teamId,
rawQuery,
reRankQuery,
queries,
model,
similarity = 0,
@@ -307,27 +310,6 @@ export async function searchDatasetData(props: {
let usingSimilarityFilter = false;
/* function */
const countRecallLimit = () => {
const oneChunkToken = 50;
const estimatedLen = Math.max(20, Math.ceil(maxTokens / oneChunkToken));
if (searchMode === DatasetSearchModeEnum.embedding) {
return {
embeddingLimit: Math.min(estimatedLen, 80),
fullTextLimit: 0
};
}
if (searchMode === DatasetSearchModeEnum.fullTextRecall) {
return {
embeddingLimit: 0,
fullTextLimit: Math.min(estimatedLen, 50)
};
}
return {
embeddingLimit: Math.min(estimatedLen, 60),
fullTextLimit: Math.min(estimatedLen, 40)
};
};
const embeddingRecall = async ({ query, limit }: { query: string; limit: number }) => {
const { vectors, charsLength } = await getVectorsByText({
model: getVectorModel(model),
@@ -531,69 +513,50 @@ export async function searchDatasetData(props: {
embeddingLimit: number;
fullTextLimit: number;
}) => {
// In a group n recall, as long as one of the data appears minAmount of times, it is retained
const getIntersection = (resultList: SearchDataResponseItemType[][], minAmount = 1) => {
minAmount = Math.min(resultList.length, minAmount);
const map: Record<
string,
{
amount: number;
data: SearchDataResponseItemType;
}
> = {};
for (const list of resultList) {
for (const item of list) {
map[item.id] = map[item.id]
? {
amount: map[item.id].amount + 1,
data: item
}
: {
amount: 1,
data: item
};
}
}
return Object.values(map)
.filter((item) => item.amount >= minAmount)
.map((item) => item.data);
};
// multi query recall
const embeddingRecallResList: SearchDataResponseItemType[][] = [];
const fullTextRecallResList: SearchDataResponseItemType[][] = [];
let totalCharsLength = 0;
for await (const query of queries) {
const [{ charsLength, embeddingRecallResults }, { fullTextRecallResults }] =
await Promise.all([
embeddingRecall({
query,
limit: embeddingLimit
}),
fullTextRecall({
query,
limit: fullTextLimit
})
]);
totalCharsLength += charsLength;
embeddingRecallResList.push(embeddingRecallResults);
fullTextRecallResList.push(fullTextRecallResults);
}
await Promise.all(
queries.map(async (query) => {
const [{ charsLength, embeddingRecallResults }, { fullTextRecallResults }] =
await Promise.all([
embeddingRecall({
query,
limit: embeddingLimit
}),
fullTextRecall({
query,
limit: fullTextLimit
})
]);
totalCharsLength += charsLength;
embeddingRecallResList.push(embeddingRecallResults);
fullTextRecallResList.push(fullTextRecallResults);
})
);
// rrf concat
const rrfEmbRecall = datasetSearchResultConcat(
embeddingRecallResList.map((list) => ({ k: 60, list }))
).slice(0, embeddingLimit);
const rrfFTRecall = datasetSearchResultConcat(
fullTextRecallResList.map((list) => ({ k: 60, list }))
).slice(0, fullTextLimit);
return {
charsLength: totalCharsLength,
embeddingRecallResults: embeddingRecallResList[0],
fullTextRecallResults: fullTextRecallResList[0]
embeddingRecallResults: rrfEmbRecall,
fullTextRecallResults: rrfFTRecall
};
};
/* main step */
// count limit
const { embeddingLimit, fullTextLimit } = countRecallLimit();
const embeddingLimit = 60;
const fullTextLimit = 40;
// recall
const { embeddingRecallResults, fullTextRecallResults, charsLength } = await multiQueryRecall({
@@ -620,7 +583,7 @@ export async function searchDatasetData(props: {
return true;
});
return reRankSearchResult({
query: rawQuery,
query: reRankQuery,
data: filterSameDataResults
});
})();

View File

@@ -3,10 +3,13 @@ import { formatModelPrice2Store } from '@/service/support/wallet/bill/utils';
import type { SelectedDatasetType } from '@fastgpt/global/core/module/api.d';
import type { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import { ModelTypeEnum, getVectorModel } from '@/service/core/ai/model';
import { ModelTypeEnum, getLLMModel, getVectorModel } from '@/service/core/ai/model';
import { searchDatasetData } from '@/service/core/dataset/data/controller';
import { ModuleInputKeyEnum, ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';
import { DatasetSearchModeEnum } from '@fastgpt/global/core/dataset/constants';
import { queryExtension } from '@fastgpt/service/core/ai/functions/queryExtension';
import { getHistories } from '../utils';
import { datasetSearchQueryExtension } from '@fastgpt/service/core/dataset/search/utils';
type DatasetSearchProps = ModuleDispatchProps<{
[ModuleInputKeyEnum.datasetSelectList]: SelectedDatasetType;
@@ -15,6 +18,9 @@ type DatasetSearchProps = ModuleDispatchProps<{
[ModuleInputKeyEnum.datasetSearchMode]: `${DatasetSearchModeEnum}`;
[ModuleInputKeyEnum.userChatInput]: string;
[ModuleInputKeyEnum.datasetSearchUsingReRank]: boolean;
[ModuleInputKeyEnum.datasetSearchUsingExtensionQuery]: boolean;
[ModuleInputKeyEnum.datasetSearchExtensionModel]: string;
[ModuleInputKeyEnum.datasetSearchExtensionBg]: string;
}>;
export type DatasetSearchResponse = {
[ModuleOutputKeyEnum.responseData]: moduleDispatchResType;
@@ -28,7 +34,19 @@ export async function dispatchDatasetSearch(
): Promise<DatasetSearchResponse> {
const {
teamId,
params: { datasets = [], similarity, limit = 1500, usingReRank, searchMode, userChatInput }
histories,
params: {
datasets = [],
similarity,
limit = 1500,
usingReRank,
searchMode,
userChatInput,
datasetSearchUsingExtensionQuery,
datasetSearchExtensionModel,
datasetSearchExtensionBg
}
} = props as DatasetSearchProps;
if (!Array.isArray(datasets)) {
@@ -43,15 +61,21 @@ export async function dispatchDatasetSearch(
return Promise.reject('core.chat.error.User input empty');
}
// query extension
const extensionModel =
datasetSearchUsingExtensionQuery && datasetSearchExtensionModel
? getLLMModel(datasetSearchExtensionModel)
: undefined;
const { concatQueries, rewriteQuery, aiExtensionResult } = await datasetSearchQueryExtension({
query: userChatInput,
extensionModel,
extensionBg: datasetSearchExtensionBg,
histories: getHistories(6, histories)
});
// get vector
const vectorModel = getVectorModel(datasets[0]?.vectorModel?.model);
// const { queries: extensionQueries } = await searchQueryExtension({
// query: userChatInput,
// model: global.llmModels[0].model
// });
const concatQueries = [userChatInput];
// start search
const {
searchRes,
@@ -60,7 +84,7 @@ export async function dispatchDatasetSearch(
usingReRank: searchUsingReRank
} = await searchDatasetData({
teamId,
rawQuery: `${userChatInput}`,
reRankQuery: `${rewriteQuery}`,
queries: concatQueries,
model: vectorModel.model,
similarity,
@@ -70,25 +94,45 @@ export async function dispatchDatasetSearch(
usingReRank
});
// count bill results
// vector
const { total, modelName } = formatModelPrice2Store({
model: vectorModel.model,
inputLen: charsLength,
type: ModelTypeEnum.vector
});
const responseData: moduleDispatchResType & { price: number } = {
price: total,
query: concatQueries.join('\n'),
model: modelName,
charsLength,
similarity: usingSimilarityFilter ? similarity : undefined,
limit,
searchMode,
searchUsingReRank: searchUsingReRank
};
if (aiExtensionResult) {
const { total, modelName } = formatModelPrice2Store({
model: aiExtensionResult.model,
inputLen: aiExtensionResult.inputTokens,
outputLen: aiExtensionResult.outputTokens,
type: ModelTypeEnum.llm
});
responseData.price += total;
responseData.inputTokens = aiExtensionResult.inputTokens;
responseData.outputTokens = aiExtensionResult.outputTokens;
responseData.extensionModel = modelName;
responseData.extensionResult =
aiExtensionResult.extensionQueries?.join('\n') ||
JSON.stringify(aiExtensionResult.extensionQueries);
}
return {
isEmpty: searchRes.length === 0 ? true : undefined,
unEmpty: searchRes.length > 0 ? true : undefined,
quoteQA: searchRes,
responseData: {
price: total,
query: concatQueries.join('\n'),
model: modelName,
charsLength,
similarity: usingSimilarityFilter ? similarity : undefined,
limit,
searchMode,
searchUsingReRank: searchUsingReRank
}
responseData
};
}

View File

@@ -1,11 +1,10 @@
import type { ChatItemType, moduleDispatchResType } from '@fastgpt/global/core/chat/type.d';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import { ModuleInputKeyEnum, ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';
import { getHistories } from '../utils';
import { getAIApi } from '@fastgpt/service/core/ai/config';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { ModelTypeEnum, getLLMModel } from '@/service/core/ai/model';
import { formatModelPrice2Store } from '@/service/support/wallet/bill/utils';
import { queryCfr } from '@fastgpt/service/core/ai/functions/cfr';
import { getHistories } from '../utils';
type Props = ModuleDispatchProps<{
[ModuleInputKeyEnum.aiModel]: string;
@@ -34,57 +33,18 @@ export const dispatchCFR = async ({
};
}
const extractModel = getLLMModel(model);
const cfrModel = getLLMModel(model);
const chatHistories = getHistories(history, histories);
const systemFewShot = systemPrompt
? `Q: 对话背景。
A: ${systemPrompt}
`
: '';
const historyFewShot = chatHistories
.map((item) => {
const role = item.obj === 'Human' ? 'Q' : 'A';
return `${role}: ${item.value}`;
})
.join('\n');
const concatFewShot = `${systemFewShot}${historyFewShot}`.trim();
const ai = getAIApi({
timeout: 480000
const { cfrQuery, inputTokens, outputTokens } = await queryCfr({
chatBg: systemPrompt,
query: userChatInput,
histories: chatHistories,
model: cfrModel.model
});
const result = await ai.chat.completions.create({
model: extractModel.model,
temperature: 0,
max_tokens: 150,
messages: [
{
role: 'user',
content: replaceVariable(defaultPrompt, {
query: `${userChatInput}`,
histories: concatFewShot
})
}
],
stream: false
});
let answer = result.choices?.[0]?.message?.content || '';
// console.log(
// replaceVariable(defaultPrompt, {
// query: userChatInput,
// histories: concatFewShot
// })
// );
// console.log(answer);
const inputTokens = result.usage?.prompt_tokens || 0;
const outputTokens = result.usage?.completion_tokens || 0;
const { total, modelName } = formatModelPrice2Store({
model: extractModel.model,
model: cfrModel.model,
inputLen: inputTokens,
outputLen: outputTokens,
type: ModelTypeEnum.llm
@@ -97,85 +57,8 @@ A: ${systemPrompt}
inputTokens,
outputTokens,
query: userChatInput,
textOutput: answer
textOutput: cfrQuery
},
[ModuleOutputKeyEnum.text]: answer
[ModuleOutputKeyEnum.text]: cfrQuery
};
};
const defaultPrompt = `请不要回答任何问题。
你的任务是结合上下文,为当前问题,实现代词替换,确保问题描述的对象清晰明确。例如:
历史记录:
"""
Q: 对话背景。
A: 关于 FatGPT 的介绍和使用等问题。
"""
当前问题: 怎么下载
输出: FastGPT 怎么下载?
----------------
历史记录:
"""
Q: 报错 "no connection"
A: FastGPT 报错"no connection"可能是因为……
"""
当前问题: 怎么解决
输出: FastGPT 报错"no connection"如何解决?
----------------
历史记录:
"""
Q: 作者是谁?
A: FastGPT 的作者是 labring。
"""
当前问题: 介绍下他
输出: 介绍下 FastGPT 的作者 labring。
----------------
历史记录:
"""
Q: 作者是谁?
A: FastGPT 的作者是 labring。
"""
当前问题: 我想购买商业版。
输出: FastGPT 商业版如何购买?
----------------
历史记录:
"""
Q: 对话背景。
A: 关于 FatGPT 的介绍和使用等问题。
"""
当前问题: nh
输出: nh
----------------
历史记录:
"""
Q: FastGPT 如何收费?
A: FastGPT 收费可以参考……
"""
当前问题: 你知道 laf 么?
输出: 你知道 laf 么?
----------------
历史记录:
"""
Q: FastGPT 的优势
A: 1. 开源
2. 简便
3. 扩展性强
"""
当前问题: 介绍下第2点。
输出: 介绍下 FastGPT 简便的优势。
----------------
历史记录:
"""
Q: 什么是 FastGPT
A: FastGPT 是一个 RAG 平台。
Q: 什么是 Sealos
A: Sealos 是一个云操作系统。
"""
当前问题: 它们有什么关系?
输出: FastGPT 和 Sealos 有什么关系?
----------------
历史记录:
"""
{{histories}}
"""
当前问题: {{query}}
输出: `;

View File

@@ -26,63 +26,40 @@ export const dispatchHttpRequest = async (props: HttpRequestProps): Promise<Http
variables,
outputs,
params: {
system_httpMethod: httpMethod,
url: abandonUrl,
system_httpMethod: httpMethod = 'POST',
system_httpReqUrl: httpReqUrl,
system_httpHeader: httpHeader,
...body
}
} = props;
if (!httpReqUrl) {
return Promise.reject('Http url is empty');
}
body = flatDynamicParams(body);
const { requestMethod, requestUrl, requestHeader, requestBody, requestQuery } = await (() => {
// 2024-2-12 clear
if (abandonUrl) {
return {
requestMethod: 'POST',
requestUrl: abandonUrl,
requestHeader: httpHeader,
requestBody: {
...body,
appId,
chatId,
variables
},
requestQuery: {}
};
}
if (httpReqUrl) {
return {
requestMethod: httpMethod,
requestUrl: httpReqUrl,
requestHeader: httpHeader,
requestBody: {
appId,
chatId,
responseChatItemId,
variables,
data: body
},
requestQuery: {
appId,
chatId,
...variables,
...body
}
};
}
return Promise.reject('url is empty');
})();
const requestBody = {
appId,
chatId,
responseChatItemId,
variables,
data: body
};
const requestQuery = {
appId,
chatId,
...variables,
...body
};
const formatBody = transformFlatJson({ ...requestBody });
// parse header
const headers = await (() => {
try {
if (!requestHeader) return {};
return JSON.parse(requestHeader);
if (!httpHeader) return {};
return JSON.parse(httpHeader);
} catch (error) {
return Promise.reject('Header 为非法 JSON 格式');
}
@@ -90,8 +67,8 @@ export const dispatchHttpRequest = async (props: HttpRequestProps): Promise<Http
try {
const response = await fetchData({
method: requestMethod,
url: requestUrl,
method: httpMethod,
url: httpReqUrl,
headers,
body: formatBody,
query: requestQuery

View File

@@ -87,7 +87,10 @@ export const pushGenerateVectorBill = ({
tmbId,
charsLength,
model,
source = BillSourceEnum.fastgpt
source = BillSourceEnum.fastgpt,
extensionModel,
extensionInputTokens,
extensionOutputTokens
}: {
billId?: string;
teamId: string;
@@ -95,19 +98,43 @@ export const pushGenerateVectorBill = ({
charsLength: number;
model: string;
source?: `${BillSourceEnum}`;
extensionModel?: string;
extensionInputTokens?: number;
extensionOutputTokens?: number;
}) => {
let { total, modelName } = formatModelPrice2Store({
const { total: totalVector, modelName: vectorModelName } = formatModelPrice2Store({
model,
inputLen: charsLength,
type: ModelTypeEnum.vector
});
const { extensionTotal, extensionModelName } = (() => {
if (!extensionModel || !extensionInputTokens || !extensionOutputTokens)
return {
extensionTotal: 0,
extensionModelName: ''
};
const { total, modelName } = formatModelPrice2Store({
model: extensionModel,
inputLen: extensionInputTokens,
outputLen: extensionOutputTokens,
type: ModelTypeEnum.llm
});
return {
extensionTotal: total,
extensionModelName: modelName
};
})();
const total = totalVector + extensionTotal;
// 插入 Bill 记录
if (billId) {
concatBill({
teamId,
tmbId,
total,
total: totalVector,
billId,
charsLength,
listIndex: 0
@@ -123,9 +150,20 @@ export const pushGenerateVectorBill = ({
{
moduleName: 'wallet.moduleName.index',
amount: total,
model: modelName,
model: vectorModelName,
charsLength
}
},
...(extensionModel !== undefined
? [
{
moduleName: extensionModelName,
amount: extensionTotal,
model: extensionModelName,
inputTokens: extensionInputTokens,
outputTokens: extensionOutputTokens
}
]
: [])
]
});
}