mirror of
https://github.com/labring/FastGPT.git
synced 2025-08-02 12:48:30 +00:00
4.6.8-alpha (#804)
* perf: redirect request and err log replace perf: dataset openapi feat: session fix: retry input error feat: 468 doc sub page feat: standard sub perf: rerank tip perf: rerank tip perf: api sdk perf: openapi sub plan perf: sub ui fix: ts * perf: init log * fix: variable select * sub page * icon * perf: llm model config * perf: menu ux * perf: system store * perf: publish app name * fix: init data * perf: flow edit ux * fix: value type format and ux * fix prompt editor default value (#13) * fix prompt editor default value * fix prompt editor update when not focus * add key with variable --------- Co-authored-by: Archer <545436317@qq.com> * fix: value type * doc * i18n * import path * home page * perf: mongo session running * fix: ts * perf: use toast * perf: flow edit * perf: sse response * slider ui * fetch error * fix prompt editor rerender when not focus by key defaultvalue (#14) * perf: prompt editor * feat: dataset search concat * perf: doc * fix:ts * perf: doc * fix json editor onblur value (#15) * faq * vector model default config * ipv6 --------- Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
@@ -1,17 +1,11 @@
|
||||
export const getChatModel = (model?: string) => {
|
||||
return global.chatModels.find((item) => item.model === model) ?? global.chatModels[0];
|
||||
export const getLLMModel = (model?: string) => {
|
||||
return global.llmModels.find((item) => item.model === model) ?? global.llmModels[0];
|
||||
};
|
||||
export const getQAModel = (model?: string) => {
|
||||
return global.qaModels.find((item) => item.model === model) || global.qaModels[0];
|
||||
};
|
||||
export const getCQModel = (model?: string) => {
|
||||
return global.cqModels.find((item) => item.model === model) || global.cqModels[0];
|
||||
};
|
||||
export const getExtractModel = (model?: string) => {
|
||||
return global.extractModels.find((item) => item.model === model) || global.extractModels[0];
|
||||
};
|
||||
export const getQGModel = (model?: string) => {
|
||||
return global.qgModels.find((item) => item.model === model) || global.qgModels[0];
|
||||
export const getDatasetModel = (model?: string) => {
|
||||
return (
|
||||
global.llmModels?.filter((item) => item.datasetProcess)?.find((item) => item.model === model) ??
|
||||
global.llmModels[0]
|
||||
);
|
||||
};
|
||||
|
||||
export const getVectorModel = (model?: string) => {
|
||||
@@ -33,22 +27,14 @@ export function getReRankModel(model?: string) {
|
||||
}
|
||||
|
||||
export enum ModelTypeEnum {
|
||||
chat = 'chat',
|
||||
qa = 'qa',
|
||||
cq = 'cq',
|
||||
extract = 'extract',
|
||||
qg = 'qg',
|
||||
llm = 'llm',
|
||||
vector = 'vector',
|
||||
audioSpeech = 'audioSpeech',
|
||||
whisper = 'whisper',
|
||||
rerank = 'rerank'
|
||||
}
|
||||
export const getModelMap = {
|
||||
[ModelTypeEnum.chat]: getChatModel,
|
||||
[ModelTypeEnum.qa]: getQAModel,
|
||||
[ModelTypeEnum.cq]: getCQModel,
|
||||
[ModelTypeEnum.extract]: getExtractModel,
|
||||
[ModelTypeEnum.qg]: getQGModel,
|
||||
[ModelTypeEnum.llm]: getLLMModel,
|
||||
[ModelTypeEnum.vector]: getVectorModel,
|
||||
[ModelTypeEnum.audioSpeech]: getAudioSpeechModel,
|
||||
[ModelTypeEnum.whisper]: getWhisperModel,
|
||||
|
@@ -6,7 +6,7 @@ export const getChatModelNameListByModules = (modules: ModuleItemType[]): string
|
||||
return chatModules
|
||||
.map((item) => {
|
||||
const model = item.inputs.find((input) => input.key === 'model')?.value;
|
||||
return global.chatModels.find((item) => item.model === model)?.name || '';
|
||||
return global.llmModels.find((item) => item.model === model)?.name || '';
|
||||
})
|
||||
.filter(Boolean);
|
||||
};
|
||||
|
@@ -15,6 +15,7 @@ import {
|
||||
DatasetSearchModeMap,
|
||||
SearchScoreTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils';
|
||||
import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils';
|
||||
import { jiebaSplit } from '@/service/common/string/jieba';
|
||||
import { deleteDatasetDataVector } from '@fastgpt/service/common/vectorStore/controller';
|
||||
@@ -33,6 +34,7 @@ import type {
|
||||
PushDatasetDataResponse
|
||||
} from '@fastgpt/global/core/dataset/api.d';
|
||||
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
|
||||
import { getVectorModel } from '../../ai/model';
|
||||
|
||||
export async function pushDataToTrainingQueue(
|
||||
props: {
|
||||
@@ -43,7 +45,7 @@ export async function pushDataToTrainingQueue(
|
||||
const result = await pushDataListToTrainingQueue({
|
||||
...props,
|
||||
vectorModelList: global.vectorModels,
|
||||
qaModelList: global.qaModels
|
||||
datasetModelList: global.llmModels
|
||||
});
|
||||
|
||||
return result;
|
||||
@@ -92,7 +94,7 @@ export async function insertData2Dataset({
|
||||
indexes.map((item) =>
|
||||
insertDatasetDataVector({
|
||||
query: item.text,
|
||||
model,
|
||||
model: getVectorModel(model),
|
||||
teamId,
|
||||
datasetId,
|
||||
collectionId
|
||||
@@ -218,7 +220,7 @@ export async function updateData2Dataset({
|
||||
if (item.type === 'create') {
|
||||
const result = await insertDatasetDataVector({
|
||||
query: item.index.text,
|
||||
model,
|
||||
model: getVectorModel(model),
|
||||
teamId: mongoData.teamId,
|
||||
datasetId: mongoData.datasetId,
|
||||
collectionId: mongoData.collectionId
|
||||
@@ -233,7 +235,7 @@ export async function updateData2Dataset({
|
||||
collectionId: mongoData.collectionId,
|
||||
id: item.index.dataId,
|
||||
query: item.index.text,
|
||||
model
|
||||
model: getVectorModel(model)
|
||||
});
|
||||
item.index.dataId = result.insertId;
|
||||
|
||||
@@ -328,14 +330,15 @@ export async function searchDatasetData(props: {
|
||||
};
|
||||
const embeddingRecall = async ({ query, limit }: { query: string; limit: number }) => {
|
||||
const { vectors, charsLength } = await getVectorsByText({
|
||||
model,
|
||||
model: getVectorModel(model),
|
||||
input: query
|
||||
});
|
||||
|
||||
const { results } = await recallFromVectorStore({
|
||||
vectors,
|
||||
limit,
|
||||
datasetIds
|
||||
datasetIds,
|
||||
efSearch: global.systemEnv?.pgHNSWEfSearch
|
||||
});
|
||||
|
||||
// get q and a
|
||||
@@ -479,6 +482,7 @@ export async function searchDatasetData(props: {
|
||||
});
|
||||
|
||||
if (!Array.isArray(results)) {
|
||||
usingReRank = false;
|
||||
return [];
|
||||
}
|
||||
|
||||
@@ -498,6 +502,7 @@ export async function searchDatasetData(props: {
|
||||
|
||||
return mergeResult;
|
||||
} catch (error) {
|
||||
usingReRank = false;
|
||||
return [];
|
||||
}
|
||||
};
|
||||
@@ -585,66 +590,6 @@ export async function searchDatasetData(props: {
|
||||
fullTextRecallResults: fullTextRecallResList[0]
|
||||
};
|
||||
};
|
||||
const rrfConcat = (
|
||||
arr: { k: number; list: SearchDataResponseItemType[] }[]
|
||||
): SearchDataResponseItemType[] => {
|
||||
arr = arr.filter((item) => item.list.length > 0);
|
||||
|
||||
if (arr.length === 0) return [];
|
||||
if (arr.length === 1) return arr[0].list;
|
||||
|
||||
const map = new Map<string, SearchDataResponseItemType & { rrfScore: number }>();
|
||||
|
||||
// rrf
|
||||
arr.forEach((item) => {
|
||||
const k = item.k;
|
||||
|
||||
item.list.forEach((data, index) => {
|
||||
const rank = index + 1;
|
||||
const score = 1 / (k + rank);
|
||||
|
||||
const record = map.get(data.id);
|
||||
if (record) {
|
||||
// 合并两个score,有相同type的score,取最大值
|
||||
const concatScore = [...record.score];
|
||||
for (const dataItem of data.score) {
|
||||
const sameScore = concatScore.find((item) => item.type === dataItem.type);
|
||||
if (sameScore) {
|
||||
sameScore.value = Math.max(sameScore.value, dataItem.value);
|
||||
} else {
|
||||
concatScore.push(dataItem);
|
||||
}
|
||||
}
|
||||
|
||||
map.set(data.id, {
|
||||
...record,
|
||||
score: concatScore,
|
||||
rrfScore: record.rrfScore + score
|
||||
});
|
||||
} else {
|
||||
map.set(data.id, {
|
||||
...data,
|
||||
rrfScore: score
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// sort
|
||||
const mapArray = Array.from(map.values());
|
||||
const results = mapArray.sort((a, b) => b.rrfScore - a.rrfScore);
|
||||
|
||||
return results.map((item, index) => {
|
||||
item.score.push({
|
||||
type: SearchScoreTypeEnum.rrf,
|
||||
value: item.rrfScore,
|
||||
index
|
||||
});
|
||||
// @ts-ignore
|
||||
delete item.rrfScore;
|
||||
return item;
|
||||
});
|
||||
};
|
||||
|
||||
/* main step */
|
||||
// count limit
|
||||
@@ -681,7 +626,7 @@ export async function searchDatasetData(props: {
|
||||
})();
|
||||
|
||||
// embedding recall and fullText recall rrf concat
|
||||
const rrfConcatResults = rrfConcat([
|
||||
const rrfConcatResults = datasetSearchResultConcat([
|
||||
{ k: 60, list: embeddingRecallResults },
|
||||
{ k: 64, list: fullTextRecallResults },
|
||||
{ k: 60, list: reRankResults }
|
||||
@@ -709,9 +654,8 @@ export async function searchDatasetData(props: {
|
||||
});
|
||||
}
|
||||
if (searchMode === DatasetSearchModeEnum.embedding) {
|
||||
usingSimilarityFilter = true;
|
||||
return filterSameDataResults.filter((item) => {
|
||||
usingSimilarityFilter = true;
|
||||
|
||||
const embeddingScore = item.score.find(
|
||||
(item) => item.type === SearchScoreTypeEnum.embedding
|
||||
);
|
||||
|
Reference in New Issue
Block a user