v4.6.9-alpha (#918)

Co-authored-by: Mufei <327958099@qq.com>
Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-03-04 00:05:25 +08:00
committed by GitHub
parent f9f0b4bffd
commit 42a8184ea0
153 changed files with 4906 additions and 4307 deletions

View File

@@ -35,7 +35,6 @@ import type {
import { pushDataListToTrainingQueue } from '@fastgpt/service/core/dataset/training/controller';
import { getVectorModel } from '@fastgpt/service/core/ai/model';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { startQueue } from '@/service/utils/tools';
export async function pushDataToTrainingQueue(
props: {
@@ -49,8 +48,6 @@ export async function pushDataToTrainingQueue(
datasetModelList: global.llmModels
});
startQueue();
return result;
}
@@ -129,7 +126,7 @@ export async function insertData2Dataset({
return {
insertId: _id,
charsLength: result.reduce((acc, cur) => acc + cur.charsLength, 0)
tokens: result.reduce((acc, cur) => acc + cur.tokens, 0)
};
}
@@ -240,11 +237,11 @@ export async function updateData2Dataset({
return result;
}
return {
charsLength: 0
tokens: 0
};
})
);
const charsLength = insertResult.reduce((acc, cur) => acc + cur.charsLength, 0);
const tokens = insertResult.reduce((acc, cur) => acc + cur.tokens, 0);
// console.log(clonePatchResult2Insert);
await mongoSessionRun(async (session) => {
// update mongo
@@ -273,7 +270,7 @@ export async function updateData2Dataset({
});
return {
charsLength
tokens
};
}
@@ -343,7 +340,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
};
};
const embeddingRecall = async ({ query, limit }: { query: string; limit: number }) => {
const { vectors, charsLength } = await getVectorsByText({
const { vectors, tokens } = await getVectorsByText({
model: getVectorModel(model),
input: query
});
@@ -407,7 +404,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
return {
embeddingRecallResults: formatResult,
charsLength
tokens
};
};
const fullTextRecall = async ({
@@ -552,22 +549,21 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
// multi query recall
const embeddingRecallResList: SearchDataResponseItemType[][] = [];
const fullTextRecallResList: SearchDataResponseItemType[][] = [];
let totalCharsLength = 0;
let totalTokens = 0;
await Promise.all(
queries.map(async (query) => {
const [{ charsLength, embeddingRecallResults }, { fullTextRecallResults }] =
await Promise.all([
embeddingRecall({
query,
limit: embeddingLimit
}),
fullTextRecall({
query,
limit: fullTextLimit
})
]);
totalCharsLength += charsLength;
const [{ tokens, embeddingRecallResults }, { fullTextRecallResults }] = await Promise.all([
embeddingRecall({
query,
limit: embeddingLimit
}),
fullTextRecall({
query,
limit: fullTextLimit
})
]);
totalTokens += tokens;
embeddingRecallResList.push(embeddingRecallResults);
fullTextRecallResList.push(fullTextRecallResults);
@@ -583,7 +579,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
).slice(0, fullTextLimit);
return {
charsLength: totalCharsLength,
tokens: totalTokens,
embeddingRecallResults: rrfEmbRecall,
fullTextRecallResults: rrfFTRecall
};
@@ -594,7 +590,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
const { embeddingLimit, fullTextLimit } = countRecallLimit();
// recall
const { embeddingRecallResults, fullTextRecallResults, charsLength } = await multiQueryRecall({
const { embeddingRecallResults, fullTextRecallResults, tokens } = await multiQueryRecall({
embeddingLimit,
fullTextLimit
});
@@ -666,7 +662,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
return {
searchRes: filterResultsByMaxTokens(scoreFilter, maxTokens),
charsLength,
tokens,
searchMode,
limit: maxTokens,
similarity,

View File

@@ -0,0 +1,31 @@
import { generateQA } from '@/service/events/generateQA';
import { generateVector } from '@/service/events/generateVector';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
export const createDatasetTrainingMongoWatch = () => {
const changeStream = MongoDatasetTraining.watch();
changeStream.on('change', async (change) => {
try {
if (change.operationType === 'insert') {
const fullDocument = change.fullDocument as DatasetTrainingSchemaType;
const { mode } = fullDocument;
if (mode === TrainingModeEnum.qa) {
generateQA();
} else if (mode === TrainingModeEnum.chunk) {
generateVector();
}
}
} catch (error) {}
});
};
export const startTrainingQueue = (fast?: boolean) => {
const max = global.systemEnv?.qaMaxProcess || 10;
for (let i = 0; i < max; i++) {
generateQA();
generateVector();
}
};