mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-17 08:37:59 +00:00
@@ -147,6 +147,8 @@ export async function delCollectionAndRelatedSources({
|
||||
collectionId: { $in: collectionIds }
|
||||
});
|
||||
|
||||
await delay(2000);
|
||||
|
||||
// delete dataset.datas
|
||||
await MongoDatasetData.deleteMany({ teamId, collectionId: { $in: collectionIds } }, { session });
|
||||
// delete imgs
|
||||
|
@@ -66,11 +66,6 @@ export async function delDatasetRelevantData({
|
||||
if (!datasets.length) return;
|
||||
|
||||
const teamId = datasets[0].teamId;
|
||||
|
||||
if (!teamId) {
|
||||
return Promise.reject('teamId is required');
|
||||
}
|
||||
|
||||
const datasetIds = datasets.map((item) => String(item._id));
|
||||
|
||||
// Get _id, teamId, fileId, metadata.relatedImgId for all collections
|
||||
|
@@ -7,6 +7,10 @@ import {
|
||||
} from '@fastgpt/global/support/user/team/constant';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import { DatasetColCollectionName } from '../collection/schema';
|
||||
import {
|
||||
DatasetDataIndexTypeEnum,
|
||||
DatasetDataIndexTypeMap
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
|
||||
export const DatasetDataCollectionName = 'dataset.datas';
|
||||
|
||||
@@ -50,6 +54,11 @@ const DatasetDataSchema = new Schema({
|
||||
type: Boolean,
|
||||
default: false
|
||||
},
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetDataIndexTypeMap),
|
||||
default: DatasetDataIndexTypeEnum.custom
|
||||
},
|
||||
dataId: {
|
||||
type: String,
|
||||
required: true
|
||||
|
@@ -14,54 +14,22 @@ export const datasetSearchQueryExtension = async ({
|
||||
extensionBg?: string;
|
||||
histories?: ChatItemType[];
|
||||
}) => {
|
||||
const filterSamQuery = (queries: string[]) => {
|
||||
const set = new Set<string>();
|
||||
const filterSameQueries = queries.filter((item) => {
|
||||
// 删除所有的标点符号与空格等,只对文本进行比较
|
||||
const str = hashStr(item.replace(/[^\p{L}\p{N}]/gu, ''));
|
||||
if (set.has(str)) return false;
|
||||
set.add(str);
|
||||
return true;
|
||||
});
|
||||
|
||||
return filterSameQueries;
|
||||
};
|
||||
|
||||
let { queries, rewriteQuery, alreadyExtension } = (() => {
|
||||
// concat query
|
||||
let rewriteQuery =
|
||||
histories.length > 0
|
||||
? `${histories
|
||||
.map((item) => {
|
||||
return `${item.obj}: ${item.value}`;
|
||||
})
|
||||
.join('\n')}
|
||||
Human: ${query}
|
||||
`
|
||||
: query;
|
||||
|
||||
/* if query already extension, direct parse */
|
||||
try {
|
||||
const jsonParse = JSON.parse(query);
|
||||
const queries: string[] = Array.isArray(jsonParse) ? filterSamQuery(jsonParse) : [query];
|
||||
const alreadyExtension = Array.isArray(jsonParse);
|
||||
return {
|
||||
queries,
|
||||
rewriteQuery: alreadyExtension ? queries.join('\n') : rewriteQuery,
|
||||
alreadyExtension: alreadyExtension
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
queries: [query],
|
||||
rewriteQuery,
|
||||
alreadyExtension: false
|
||||
};
|
||||
}
|
||||
})();
|
||||
// concat query
|
||||
let queries = [query];
|
||||
let rewriteQuery =
|
||||
histories.length > 0
|
||||
? `${histories
|
||||
.map((item) => {
|
||||
return `${item.obj}: ${item.value}`;
|
||||
})
|
||||
.join('\n')}
|
||||
Human: ${query}
|
||||
`
|
||||
: query;
|
||||
|
||||
// ai extension
|
||||
const aiExtensionResult = await (async () => {
|
||||
if (!extensionModel || alreadyExtension) return;
|
||||
if (!extensionModel) return;
|
||||
const result = await queryExtension({
|
||||
chatBg: extensionBg,
|
||||
query,
|
||||
@@ -71,13 +39,23 @@ export const datasetSearchQueryExtension = async ({
|
||||
if (result.extensionQueries?.length === 0) return;
|
||||
return result;
|
||||
})();
|
||||
|
||||
if (aiExtensionResult) {
|
||||
queries = filterSamQuery(queries.concat(aiExtensionResult.extensionQueries));
|
||||
queries = queries.concat(aiExtensionResult.extensionQueries);
|
||||
rewriteQuery = queries.join('\n');
|
||||
}
|
||||
|
||||
const set = new Set<string>();
|
||||
const filterSameQueries = queries.filter((item) => {
|
||||
// 删除所有的标点符号与空格等,只对文本进行比较
|
||||
const str = hashStr(item.replace(/[^\p{L}\p{N}]/gu, ''));
|
||||
if (set.has(str)) return false;
|
||||
set.add(str);
|
||||
return true;
|
||||
});
|
||||
|
||||
return {
|
||||
concatQueries: queries,
|
||||
concatQueries: filterSameQueries,
|
||||
rewriteQuery,
|
||||
aiExtensionResult
|
||||
};
|
||||
|
@@ -57,7 +57,7 @@ export async function pushDataListToTrainingQueue({
|
||||
if (trainingMode === TrainingModeEnum.chunk) {
|
||||
const vectorModelData = vectorModelList?.find((item) => item.model === vectorModel);
|
||||
if (!vectorModelData) {
|
||||
return Promise.reject(`File model ${vectorModel} is inValid`);
|
||||
return Promise.reject(`Model ${vectorModel} is inValid`);
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -70,7 +70,7 @@ export async function pushDataListToTrainingQueue({
|
||||
if (trainingMode === TrainingModeEnum.qa) {
|
||||
const qaModelData = datasetModelList?.find((item) => item.model === agentModel);
|
||||
if (!qaModelData) {
|
||||
return Promise.reject(`Vector model ${agentModel} is inValid`);
|
||||
return Promise.reject(`Model ${agentModel} is inValid`);
|
||||
}
|
||||
return {
|
||||
maxToken: qaModelData.maxContext * 0.8,
|
||||
|
@@ -2,7 +2,7 @@
|
||||
import { connectionMongo, type Model } from '../../../common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
|
||||
import { DatasetDataIndexTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
|
||||
import { DatasetColCollectionName } from '../collection/schema';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import {
|
||||
@@ -86,6 +86,11 @@ const TrainingDataSchema = new Schema({
|
||||
indexes: {
|
||||
type: [
|
||||
{
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetDataIndexTypeMap),
|
||||
required: true
|
||||
},
|
||||
text: {
|
||||
type: String,
|
||||
required: true
|
||||
|
Reference in New Issue
Block a user