mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 12:20:34 +00:00
perf: dataset import params code (#4875)
* perf: dataset import params code * perf: api dataset code * model
This commit is contained in:
30
packages/global/core/dataset/api.d.ts
vendored
30
packages/global/core/dataset/api.d.ts
vendored
@@ -1,9 +1,11 @@
|
||||
import type { DatasetDataIndexItemType, DatasetSchemaType } from './type';
|
||||
import type { ChunkSettingsType, DatasetDataIndexItemType, DatasetSchemaType } from './type';
|
||||
import type {
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
ChunkSettingModeEnum,
|
||||
DataChunkSplitModeEnum
|
||||
DataChunkSplitModeEnum,
|
||||
ChunkTriggerConfigTypeEnum,
|
||||
ParagraphChunkAIModeEnum
|
||||
} from './constants';
|
||||
import type { LLMModelItemType } from '../ai/model.d';
|
||||
import type { ParentIdType } from 'common/parentFolder/type';
|
||||
@@ -32,26 +34,16 @@ export type DatasetUpdateBody = {
|
||||
};
|
||||
|
||||
/* ================= collection ===================== */
|
||||
export type DatasetCollectionChunkMetadataType = {
|
||||
// Input + store params
|
||||
type DatasetCollectionStoreDataType = ChunkSettingsType & {
|
||||
parentId?: string;
|
||||
customPdfParse?: boolean;
|
||||
trainingType?: DatasetCollectionDataProcessModeEnum;
|
||||
imageIndex?: boolean;
|
||||
autoIndexes?: boolean;
|
||||
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
|
||||
chunkSize?: number;
|
||||
indexSize?: number;
|
||||
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
metadata?: Record<string, any>;
|
||||
|
||||
customPdfParse?: boolean;
|
||||
};
|
||||
|
||||
// create collection params
|
||||
export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
|
||||
export type CreateDatasetCollectionParams = DatasetCollectionStoreDataType & {
|
||||
datasetId: string;
|
||||
name: string;
|
||||
type: DatasetCollectionTypeEnum;
|
||||
@@ -72,7 +64,7 @@ export type CreateDatasetCollectionParams = DatasetCollectionChunkMetadataType &
|
||||
nextSyncTime?: Date;
|
||||
};
|
||||
|
||||
export type ApiCreateDatasetCollectionParams = DatasetCollectionChunkMetadataType & {
|
||||
export type ApiCreateDatasetCollectionParams = DatasetCollectionStoreDataType & {
|
||||
datasetId: string;
|
||||
tags?: string[];
|
||||
};
|
||||
@@ -90,7 +82,7 @@ export type ApiDatasetCreateDatasetCollectionParams = ApiCreateDatasetCollection
|
||||
export type FileIdCreateDatasetCollectionParams = ApiCreateDatasetCollectionParams & {
|
||||
fileId: string;
|
||||
};
|
||||
export type reTrainingDatasetFileCollectionParams = DatasetCollectionChunkMetadataType & {
|
||||
export type reTrainingDatasetFileCollectionParams = DatasetCollectionStoreDataType & {
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
};
|
||||
|
@@ -143,15 +143,25 @@ export const DatasetCollectionDataProcessModeMap = {
|
||||
}
|
||||
};
|
||||
|
||||
export enum ChunkTriggerConfigTypeEnum {
|
||||
minSize = 'minSize',
|
||||
forceChunk = 'forceChunk',
|
||||
maxSize = 'maxSize'
|
||||
}
|
||||
export enum ChunkSettingModeEnum {
|
||||
auto = 'auto',
|
||||
custom = 'custom'
|
||||
}
|
||||
|
||||
export enum DataChunkSplitModeEnum {
|
||||
paragraph = 'paragraph',
|
||||
size = 'size',
|
||||
char = 'char'
|
||||
}
|
||||
export enum ParagraphChunkAIModeEnum {
|
||||
auto = 'auto',
|
||||
force = 'force'
|
||||
}
|
||||
|
||||
/* ------------ data -------------- */
|
||||
|
||||
|
@@ -32,7 +32,7 @@ export const DatasetDataIndexMap: Record<
|
||||
color: 'red'
|
||||
},
|
||||
[DatasetDataIndexTypeEnum.image]: {
|
||||
label: i18nT('common:data_index_image'),
|
||||
label: i18nT('dataset:data_index_image'),
|
||||
color: 'purple'
|
||||
}
|
||||
};
|
||||
|
45
packages/global/core/dataset/type.d.ts
vendored
45
packages/global/core/dataset/type.d.ts
vendored
@@ -8,26 +8,42 @@ import type {
|
||||
DatasetStatusEnum,
|
||||
DatasetTypeEnum,
|
||||
SearchScoreTypeEnum,
|
||||
TrainingModeEnum
|
||||
TrainingModeEnum,
|
||||
ChunkSettingModeEnum
|
||||
} from './constants';
|
||||
import type { DatasetPermission } from '../../support/permission/dataset/controller';
|
||||
import { Permission } from '../../support/permission/controller';
|
||||
import type { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
|
||||
import type { SourceMemberType } from 'support/user/type';
|
||||
import type { DatasetDataIndexTypeEnum } from './data/constants';
|
||||
import type { ChunkSettingModeEnum } from './constants';
|
||||
|
||||
export type ChunkSettingsType = {
|
||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||
autoIndexes?: boolean;
|
||||
trainingType?: DatasetCollectionDataProcessModeEnum;
|
||||
|
||||
// Chunk trigger
|
||||
chunkTriggerType?: ChunkTriggerConfigTypeEnum;
|
||||
chunkTriggerMinSize?: number; // maxSize from agent model, not store
|
||||
|
||||
// Data enhance
|
||||
dataEnhanceCollectionName?: boolean; // Auto add collection name to data
|
||||
|
||||
// Index enhance
|
||||
imageIndex?: boolean;
|
||||
autoIndexes?: boolean;
|
||||
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
// Chunk setting
|
||||
chunkSettingMode?: ChunkSettingModeEnum; // 系统参数/自定义参数
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
|
||||
// Paragraph split
|
||||
paragraphChunkAIMode?: ParagraphChunkAIModeEnum;
|
||||
paragraphChunkDeep?: number; // Paragraph deep
|
||||
paragraphChunkMinSize?: number; // Paragraph min size, if too small, it will merge
|
||||
paragraphChunkMaxSize?: number; // Paragraph max size, if too large, it will split
|
||||
// Size split
|
||||
chunkSize?: number;
|
||||
indexSize?: number;
|
||||
// Char split
|
||||
chunkSplitter?: string;
|
||||
indexSize?: number;
|
||||
|
||||
qaPrompt?: string;
|
||||
};
|
||||
|
||||
@@ -66,7 +82,7 @@ export type DatasetSchemaType = {
|
||||
defaultPermission?: number;
|
||||
};
|
||||
|
||||
export type DatasetCollectionSchemaType = {
|
||||
export type DatasetCollectionSchemaType = ChunkSettingsType & {
|
||||
_id: string;
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
@@ -101,18 +117,7 @@ export type DatasetCollectionSchemaType = {
|
||||
|
||||
// Parse settings
|
||||
customPdfParse?: boolean;
|
||||
// Chunk settings
|
||||
autoIndexes?: boolean;
|
||||
imageIndex?: boolean;
|
||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||
|
||||
chunkSettingMode?: ChunkSettingModeEnum;
|
||||
chunkSplitMode?: DataChunkSplitModeEnum;
|
||||
|
||||
chunkSize?: number;
|
||||
indexSize?: number;
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
};
|
||||
|
||||
export type DatasetCollectionTagsSchemaType = {
|
||||
|
Reference in New Issue
Block a user