Files
FastGPT/packages/global/core/dataset/utils.ts
Archer adf5377ebe Add image index and pdf parse (#3956)
* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
2025-03-06 18:28:03 +08:00

66 lines
1.6 KiB
TypeScript

import { TrainingModeEnum, DatasetCollectionTypeEnum } from './constants';
import { getFileIcon } from '../../common/file/icon';
import { strIsLink } from '../../common/string/tools';
import { DatasetDataIndexTypeEnum } from './data/constants';
export function getCollectionIcon(
type: DatasetCollectionTypeEnum = DatasetCollectionTypeEnum.file,
name = ''
) {
if (type === DatasetCollectionTypeEnum.folder) {
return 'common/folderFill';
}
if (type === DatasetCollectionTypeEnum.link) {
return 'common/linkBlue';
}
if (type === DatasetCollectionTypeEnum.virtual) {
return 'file/fill/manual';
}
return getFileIcon(name);
}
export function getSourceNameIcon({
sourceName,
sourceId
}: {
sourceName: string;
sourceId?: string;
}) {
try {
const fileIcon = getFileIcon(decodeURIComponent(sourceName.replace(/%/g, '%25')), '');
if (fileIcon) {
return fileIcon;
}
if (strIsLink(sourceId)) {
return 'common/linkBlue';
}
} catch (error) {}
return 'file/fill/file';
}
/* get dataset data default index */
export function getDefaultIndex(props?: { q?: string; a?: string }) {
const { q = '', a } = props || {};
return [
{
text: q,
type: DatasetDataIndexTypeEnum.default
},
...(a
? [
{
text: a,
type: DatasetDataIndexTypeEnum.default
}
]
: [])
];
}
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
if (mode === TrainingModeEnum.qa) return data.length * 20;
if (mode === TrainingModeEnum.auto) return data.length * 5;
return data.length;
};