Files
FastGPT/packages/global/core/dataset/constants.ts
Archer c30f069f2f V4.9.11 feature (#4969)
* Feat: Images dataset collection (#4941)

* New pic (#4858)

* 更新数据集相关类型,添加图像文件ID和预览URL支持;优化数据集导入功能,新增图像数据集处理组件;修复部分国际化文本;更新文件上传逻辑以支持新功能。

* 与原先代码的差别

* 新增 V4.9.10 更新说明,支持 PG 设置`systemEnv.hnswMaxScanTuples`参数,优化 LLM stream 调用超时,修复全文检索多知识库排序问题。同时更新数据集索引,移除 datasetId 字段以简化查询。

* 更换成fileId_image逻辑,并增加训练队列匹配的逻辑

* 新增图片集合判断逻辑,优化预览URL生成流程,确保仅在数据集为图片集合时生成预览URL,并添加相关日志输出以便调试。

* Refactor Docker Compose configuration to comment out exposed ports for production environments, update image versions for pgvector, fastgpt, and mcp_server, and enhance Redis service with a health check. Additionally, standardize dataset collection labels in constants and improve internationalization strings across multiple languages.

* Enhance TrainingStates component by adding internationalization support for the imageParse training mode and update defaultCounts to include imageParse mode in trainingDetail API.

* Enhance dataset import context by adding additional steps for image dataset import process and improve internationalization strings for modal buttons in the useEditTitle hook.

* Update DatasetImportContext to conditionally render MyStep component based on data source type, improving the import process for non-image datasets.

* Refactor image dataset handling by improving internationalization strings, enhancing error messages, and streamlining the preview URL generation process.

* 图片上传到新建的 dataset_collection_images 表,逻辑跟随更改

* 修改了除了controller的其他部分问题

* 把图片数据集的逻辑整合到controller里面

* 补充i18n

* 补充i18n

* resolve评论:主要是上传逻辑的更改和组件复用

* 图片名称的图标显示

* 修改编译报错的命名问题

* 删除不需要的collectionid部分

* 多余文件的处理和改动一个删除按钮

* 除了loading和统一的imageId,其他都resolve掉的

* 处理图标报错

* 复用了MyPhotoView并采用全部替换的方式将imageFileId变成imageId

* 去除不必要文件修改

* 报错和字段修改

* 增加上传成功后删除临时文件的逻辑以及回退一些修改

* 删除path字段,将图片保存到gridfs内,并修改增删等操作的代码

* 修正编译错误

---------

Co-authored-by: archer <545436317@qq.com>

* perf: image dataset

* feat: insert image

* perf: image icon

* fix: training state

---------

Co-authored-by: Zhuangzai fa <143257420+ctrlz526@users.noreply.github.com>

* fix: ts (#4948)

* Thirddatasetmd (#4942)

* add thirddataset.md

* fix thirddataset.md

* fix

* delete wrong png

---------

Co-authored-by: dreamer6680 <146868355@qq.com>

* perf: api dataset code

* perf: log

* add secondary.tsx (#4946)

* add secondary.tsx

* fix

---------

Co-authored-by: dreamer6680 <146868355@qq.com>

* perf: multiple menu

* perf: i18n

* feat: parse queue (#4960)

* feat: parse queue

* feat: sync parse queue

* fix thirddataset.md (#4962)

* fix thirddataset-4.png (#4963)

* feat: Dataset template import (#4934)

* 模版导入部分除了文档还没写

* 修复模版导入的 build 错误

* Document production

* compress pictures

* Change some constants to variables

---------

Co-authored-by: Archer <545436317@qq.com>

* perf: template import

* doc

* llm pargraph

* bocha tool

* fix: del collection

---------

Co-authored-by: Zhuangzai fa <143257420+ctrlz526@users.noreply.github.com>
Co-authored-by: dreamer6680 <1468683855@qq.com>
Co-authored-by: dreamer6680 <146868355@qq.com>
2025-06-06 14:48:44 +08:00

307 lines
8.5 KiB
TypeScript

import { i18nT } from '../../../web/i18n/utils';
/* ------------ dataset -------------- */
export enum DatasetTypeEnum {
folder = 'folder',
dataset = 'dataset',
websiteDataset = 'websiteDataset', // depp link
externalFile = 'externalFile',
apiDataset = 'apiDataset',
feishu = 'feishu',
yuque = 'yuque'
}
// @ts-ignore
export const ApiDatasetTypeMap: Record<
`${DatasetTypeEnum}`,
{
icon: string;
avatar: string;
label: any;
collectionLabel: string;
courseUrl?: string;
}
> = {
[DatasetTypeEnum.apiDataset]: {
icon: 'core/dataset/externalDatasetOutline',
avatar: 'core/dataset/externalDatasetColor',
label: i18nT('dataset:api_file'),
collectionLabel: i18nT('common:File'),
courseUrl: '/docs/guide/knowledge_base/api_dataset/'
},
[DatasetTypeEnum.feishu]: {
icon: 'core/dataset/feishuDatasetOutline',
avatar: 'core/dataset/feishuDatasetColor',
label: i18nT('dataset:feishu_dataset'),
collectionLabel: i18nT('common:File'),
courseUrl: '/docs/guide/knowledge_base/lark_dataset/'
},
[DatasetTypeEnum.yuque]: {
icon: 'core/dataset/yuqueDatasetOutline',
avatar: 'core/dataset/yuqueDatasetColor',
label: i18nT('dataset:yuque_dataset'),
collectionLabel: i18nT('common:File'),
courseUrl: '/docs/guide/knowledge_base/yuque_dataset/'
}
};
export const DatasetTypeMap: Record<
`${DatasetTypeEnum}`,
{
icon: string;
avatar: string;
label: any;
collectionLabel: string;
courseUrl?: string;
}
> = {
...ApiDatasetTypeMap,
[DatasetTypeEnum.folder]: {
icon: 'common/folderFill',
avatar: 'common/folderFill',
label: i18nT('dataset:folder_dataset'),
collectionLabel: i18nT('common:Folder')
},
[DatasetTypeEnum.dataset]: {
icon: 'core/dataset/commonDatasetOutline',
avatar: 'core/dataset/commonDatasetColor',
label: i18nT('dataset:common_dataset'),
collectionLabel: i18nT('common:File')
},
[DatasetTypeEnum.websiteDataset]: {
icon: 'core/dataset/websiteDatasetOutline',
avatar: 'core/dataset/websiteDatasetColor',
label: i18nT('dataset:website_dataset'),
collectionLabel: i18nT('common:Website'),
courseUrl: '/docs/guide/knowledge_base/websync/'
},
[DatasetTypeEnum.externalFile]: {
icon: 'core/dataset/externalDatasetOutline',
avatar: 'core/dataset/externalDatasetColor',
label: i18nT('dataset:external_file'),
collectionLabel: i18nT('common:File')
}
};
export enum DatasetStatusEnum {
active = 'active',
syncing = 'syncing',
waiting = 'waiting',
error = 'error'
}
export const DatasetStatusMap = {
[DatasetStatusEnum.active]: {
label: i18nT('common:core.dataset.status.active')
},
[DatasetStatusEnum.syncing]: {
label: i18nT('common:core.dataset.status.syncing')
},
[DatasetStatusEnum.waiting]: {
label: i18nT('common:core.dataset.status.waiting')
},
[DatasetStatusEnum.error]: {
label: i18nT('dataset:status_error')
}
};
/* ------------ collection -------------- */
export enum DatasetCollectionTypeEnum {
folder = 'folder',
virtual = 'virtual',
file = 'file',
link = 'link', // one link
externalFile = 'externalFile',
apiFile = 'apiFile',
images = 'images'
}
export const DatasetCollectionTypeMap = {
[DatasetCollectionTypeEnum.folder]: {
name: i18nT('common:core.dataset.folder')
},
[DatasetCollectionTypeEnum.file]: {
name: i18nT('common:core.dataset.file')
},
[DatasetCollectionTypeEnum.externalFile]: {
name: i18nT('common:core.dataset.externalFile')
},
[DatasetCollectionTypeEnum.link]: {
name: i18nT('common:core.dataset.link')
},
[DatasetCollectionTypeEnum.virtual]: {
name: i18nT('dataset:empty_collection')
},
[DatasetCollectionTypeEnum.apiFile]: {
name: i18nT('common:core.dataset.apiFile')
},
[DatasetCollectionTypeEnum.images]: {
name: i18nT('dataset:core.dataset.Image collection')
}
};
export enum DatasetCollectionSyncResultEnum {
sameRaw = 'sameRaw',
success = 'success',
failed = 'failed'
}
export const DatasetCollectionSyncResultMap = {
[DatasetCollectionSyncResultEnum.sameRaw]: {
label: i18nT('common:core.dataset.collection.sync.result.sameRaw')
},
[DatasetCollectionSyncResultEnum.success]: {
label: i18nT('common:core.dataset.collection.sync.result.success')
},
[DatasetCollectionSyncResultEnum.failed]: {
label: i18nT('dataset:sync_collection_failed')
}
};
export enum DatasetCollectionDataProcessModeEnum {
chunk = 'chunk',
qa = 'qa',
imageParse = 'imageParse',
backup = 'backup',
template = 'template',
auto = 'auto' // abandon
}
export const DatasetCollectionDataProcessModeMap = {
[DatasetCollectionDataProcessModeEnum.chunk]: {
label: i18nT('common:core.dataset.training.Chunk mode'),
tooltip: i18nT('common:core.dataset.import.Chunk Split Tip')
},
[DatasetCollectionDataProcessModeEnum.qa]: {
label: i18nT('common:core.dataset.training.QA mode'),
tooltip: i18nT('common:core.dataset.import.QA Import Tip')
},
[DatasetCollectionDataProcessModeEnum.imageParse]: {
label: i18nT('dataset:training.Image mode'),
tooltip: i18nT('common:core.dataset.import.Chunk Split Tip')
},
[DatasetCollectionDataProcessModeEnum.auto]: {
label: i18nT('common:core.dataset.training.Auto mode'),
tooltip: i18nT('common:core.dataset.training.Auto mode Tip')
},
[DatasetCollectionDataProcessModeEnum.backup]: {
label: i18nT('dataset:backup_mode'),
tooltip: i18nT('dataset:backup_mode')
},
[DatasetCollectionDataProcessModeEnum.template]: {
label: i18nT('dataset:template_mode'),
tooltip: i18nT('dataset:template_mode')
}
};
export enum ChunkTriggerConfigTypeEnum {
minSize = 'minSize',
forceChunk = 'forceChunk',
maxSize = 'maxSize'
}
export enum ChunkSettingModeEnum {
auto = 'auto',
custom = 'custom'
}
export enum DataChunkSplitModeEnum {
paragraph = 'paragraph',
size = 'size',
char = 'char'
}
export enum ParagraphChunkAIModeEnum {
auto = 'auto',
force = 'force'
}
/* ------------ data -------------- */
/* ------------ training -------------- */
export enum ImportDataSourceEnum {
fileLocal = 'fileLocal',
fileLink = 'fileLink',
fileCustom = 'fileCustom',
externalFile = 'externalFile',
apiDataset = 'apiDataset',
reTraining = 'reTraining',
imageDataset = 'imageDataset'
}
export enum TrainingModeEnum {
parse = 'parse',
chunk = 'chunk',
qa = 'qa',
auto = 'auto',
image = 'image',
imageParse = 'imageParse'
}
/* ------------ search -------------- */
export enum DatasetSearchModeEnum {
embedding = 'embedding',
fullTextRecall = 'fullTextRecall',
mixedRecall = 'mixedRecall'
}
export const DatasetSearchModeMap = {
[DatasetSearchModeEnum.embedding]: {
icon: 'core/dataset/modeEmbedding',
title: i18nT('common:core.dataset.search.mode.embedding'),
desc: i18nT('common:core.dataset.search.mode.embedding desc'),
value: DatasetSearchModeEnum.embedding
},
[DatasetSearchModeEnum.fullTextRecall]: {
icon: 'core/dataset/fullTextRecall',
title: i18nT('common:core.dataset.search.mode.fullTextRecall'),
desc: i18nT('common:core.dataset.search.mode.fullTextRecall desc'),
value: DatasetSearchModeEnum.fullTextRecall
},
[DatasetSearchModeEnum.mixedRecall]: {
icon: 'core/dataset/mixedRecall',
title: i18nT('common:core.dataset.search.mode.mixedRecall'),
desc: i18nT('common:core.dataset.search.mode.mixedRecall desc'),
value: DatasetSearchModeEnum.mixedRecall
}
};
export enum SearchScoreTypeEnum {
embedding = 'embedding',
fullText = 'fullText',
reRank = 'reRank',
rrf = 'rrf'
}
export const SearchScoreTypeMap = {
[SearchScoreTypeEnum.embedding]: {
label: i18nT('common:core.dataset.search.mode.embedding'),
desc: i18nT('common:core.dataset.search.score.embedding desc'),
showScore: true
},
[SearchScoreTypeEnum.fullText]: {
label: i18nT('common:core.dataset.search.score.fullText'),
desc: i18nT('common:core.dataset.search.score.fullText desc'),
showScore: false
},
[SearchScoreTypeEnum.reRank]: {
label: i18nT('common:core.dataset.search.score.reRank'),
desc: i18nT('common:core.dataset.search.score.reRank desc'),
showScore: true
},
[SearchScoreTypeEnum.rrf]: {
label: i18nT('common:core.dataset.search.score.rrf'),
desc: i18nT('common:core.dataset.search.score.rrf desc'),
showScore: false
}
};
export const CustomCollectionIcon = 'common/linkBlue';
export const LinkCollectionIcon = 'common/linkBlue';
/* source prefix */
export enum DatasetSourceReadTypeEnum {
fileLocal = 'fileLocal',
link = 'link',
externalFile = 'externalFile',
apiFile = 'apiFile',
reTraining = 'reTraining'
}