mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-15 15:41:05 +00:00
feat: dataset index prefix (#5061)
This commit is contained in:
2
packages/global/core/dataset/controller.d.ts
vendored
2
packages/global/core/dataset/controller.d.ts
vendored
@@ -10,6 +10,7 @@ export type CreateDatasetDataProps = {
|
||||
a?: string;
|
||||
imageId?: string;
|
||||
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||
indexPrefix?: string;
|
||||
};
|
||||
|
||||
export type UpdateDatasetDataProps = {
|
||||
@@ -21,6 +22,7 @@ export type UpdateDatasetDataProps = {
|
||||
dataId?: string; // pg data id
|
||||
})[];
|
||||
imageId?: string;
|
||||
indexPrefix?: string;
|
||||
};
|
||||
|
||||
export type PatchIndexesProps =
|
||||
|
@@ -7,9 +7,9 @@ export type PushDataToTrainingQueueProps = {
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
|
||||
data: PushDatasetDataChunkProps[];
|
||||
mode?: TrainingModeEnum;
|
||||
data: PushDatasetDataChunkProps[];
|
||||
prompt?: string;
|
||||
|
||||
agentModel: string;
|
||||
vectorModel: string;
|
||||
|
3
packages/global/core/dataset/type.d.ts
vendored
3
packages/global/core/dataset/type.d.ts
vendored
@@ -36,6 +36,7 @@ export type ChunkSettingsType = {
|
||||
// Index enhance
|
||||
imageIndex?: boolean;
|
||||
autoIndexes?: boolean;
|
||||
indexPrefixTitle?: boolean;
|
||||
|
||||
// Chunk setting
|
||||
chunkSettingMode?: ChunkSettingModeEnum; // 系统参数/自定义参数
|
||||
@@ -184,8 +185,6 @@ export type DatasetTrainingSchemaType = {
|
||||
expireAt: Date;
|
||||
lockTime: Date;
|
||||
mode: TrainingModeEnum;
|
||||
model?: string;
|
||||
prompt?: string;
|
||||
dataId?: string;
|
||||
q: string;
|
||||
a: string;
|
||||
|
@@ -103,6 +103,7 @@ export const createCollectionAndInsertData = async ({
|
||||
delete formatCreateCollectionParams.chunkSize;
|
||||
delete formatCreateCollectionParams.chunkSplitter;
|
||||
delete formatCreateCollectionParams.indexSize;
|
||||
delete formatCreateCollectionParams.indexPrefixTitle;
|
||||
}
|
||||
}
|
||||
if (trainingType !== DatasetCollectionDataProcessModeEnum.qa) {
|
||||
@@ -223,7 +224,6 @@ export const createCollectionAndInsertData = async ({
|
||||
vlmModel: dataset.vlmModel,
|
||||
indexSize,
|
||||
mode: trainingMode,
|
||||
prompt: formatCreateCollectionParams.qaPrompt,
|
||||
billId: traingBillId,
|
||||
data: chunks.map((item, index) => ({
|
||||
...item,
|
||||
|
@@ -32,6 +32,7 @@ export const ChunkSettings = {
|
||||
|
||||
imageIndex: Boolean,
|
||||
autoIndexes: Boolean,
|
||||
indexPrefixTitle: Boolean,
|
||||
|
||||
chunkSettingMode: {
|
||||
type: String,
|
||||
|
@@ -27,23 +27,6 @@ export const lockTrainingDataByTeamId = async (teamId: string): Promise<any> =>
|
||||
} catch (error) {}
|
||||
};
|
||||
|
||||
export const pushDataListToTrainingQueueByCollectionId = async ({
|
||||
collectionId,
|
||||
...props
|
||||
}: Omit<PushDataToTrainingQueueProps, 'datasetId' | 'agentModel' | 'vectorModel' | 'vlmModel'>) => {
|
||||
const {
|
||||
dataset: { _id: datasetId, agentModel, vectorModel, vlmModel }
|
||||
} = await getCollectionWithDataset(collectionId);
|
||||
return pushDataListToTrainingQueue({
|
||||
...props,
|
||||
datasetId,
|
||||
collectionId,
|
||||
vectorModel,
|
||||
agentModel,
|
||||
vlmModel
|
||||
});
|
||||
};
|
||||
|
||||
export async function pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
@@ -53,7 +36,6 @@ export async function pushDataListToTrainingQueue({
|
||||
vectorModel,
|
||||
vlmModel,
|
||||
data,
|
||||
prompt,
|
||||
billId,
|
||||
mode = TrainingModeEnum.chunk,
|
||||
indexSize,
|
||||
@@ -149,8 +131,6 @@ export async function pushDataListToTrainingQueue({
|
||||
collectionId: collectionId,
|
||||
billId,
|
||||
mode: formatTrainingMode(item, mode),
|
||||
prompt,
|
||||
model,
|
||||
...(item.q && { q: item.q }),
|
||||
...(item.a && { a: item.a }),
|
||||
...(item.imageId && { imageId: item.imageId }),
|
||||
|
@@ -10,6 +10,7 @@ import {
|
||||
TeamMemberCollectionName
|
||||
} from '@fastgpt/global/support/user/team/constant';
|
||||
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
|
||||
import { DatasetDataCollectionName } from '../data/schema';
|
||||
|
||||
export const DatasetTrainingCollectionName = 'dataset_trainings';
|
||||
|
||||
@@ -54,8 +55,6 @@ const TrainingDataSchema = new Schema({
|
||||
default: 5
|
||||
},
|
||||
|
||||
model: String,
|
||||
prompt: String,
|
||||
q: {
|
||||
type: String,
|
||||
default: ''
|
||||
@@ -74,7 +73,10 @@ const TrainingDataSchema = new Schema({
|
||||
type: Number,
|
||||
default: 0
|
||||
},
|
||||
dataId: Schema.Types.ObjectId,
|
||||
dataId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetDataCollectionName
|
||||
},
|
||||
indexes: {
|
||||
type: [
|
||||
{
|
||||
@@ -105,6 +107,12 @@ TrainingDataSchema.virtual('collection', {
|
||||
foreignField: '_id',
|
||||
justOne: true
|
||||
});
|
||||
TrainingDataSchema.virtual('data', {
|
||||
ref: DatasetDataCollectionName,
|
||||
localField: 'dataId',
|
||||
foreignField: '_id',
|
||||
justOne: true
|
||||
});
|
||||
|
||||
try {
|
||||
// lock training data(teamId); delete training data
|
||||
|
@@ -111,6 +111,8 @@
|
||||
"import_param_setting": "Parameter settings",
|
||||
"import_select_file": "Select a file",
|
||||
"import_select_link": "Enter link",
|
||||
"index_prefix_title": "Index add title",
|
||||
"index_prefix_title_tips": "Automatically add title names to all indexes",
|
||||
"index_size": "Index size",
|
||||
"index_size_tips": "When vectorized, the system will automatically further segment the blocks according to this size.",
|
||||
"input_required_field_to_select_baseurl": "Please enter the required information first",
|
||||
|
@@ -111,6 +111,8 @@
|
||||
"import_param_setting": "参数设置",
|
||||
"import_select_file": "选择文件",
|
||||
"import_select_link": "输入链接",
|
||||
"index_prefix_title": "将标题加入索引",
|
||||
"index_prefix_title_tips": "自动给索引所有索引加标题名",
|
||||
"index_size": "索引大小",
|
||||
"index_size_tips": "向量化时内容的长度,系统会自动按该大小对分块进行进一步的分割。",
|
||||
"input_required_field_to_select_baseurl": "请先输入必填信息",
|
||||
|
@@ -110,6 +110,8 @@
|
||||
"import_param_setting": "參數設定",
|
||||
"import_select_file": "選擇文件",
|
||||
"import_select_link": "輸入連結",
|
||||
"index_prefix_title": "將標題加入索引",
|
||||
"index_prefix_title_tips": "自動給索引所有索引加標題名",
|
||||
"index_size": "索引大小",
|
||||
"index_size_tips": "向量化時內容的長度,系統會自動按該大小對分塊進行進一步的分割。",
|
||||
"input_required_field_to_select_baseurl": "請先輸入必填信息",
|
||||
|
Reference in New Issue
Block a user