mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
Add image index and pdf parse (#3956)
* feat: think tag parse * feat: parse think tag test * feat: pdf parse ux * feat: doc2x parse * perf: rewrite training mode setting * feat: image parse queue * perf: image index * feat: image parse process * feat: add init sh * fix: ts
This commit is contained in:
@@ -2,12 +2,17 @@ import { MongoDatasetCollection } from './schema';
|
||||
import { ClientSession } from '../../../common/mongo';
|
||||
import { MongoDatasetCollectionTags } from '../tag/schema';
|
||||
import { readFromSecondary } from '../../../common/mongo/utils';
|
||||
import { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
|
||||
import {
|
||||
CollectionWithDatasetType,
|
||||
DatasetCollectionSchemaType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionSyncResultEnum,
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetSourceReadTypeEnum,
|
||||
DatasetTypeEnum
|
||||
DatasetTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||
import { readDatasetSourceRawText } from '../read';
|
||||
@@ -160,6 +165,7 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
|
||||
})();
|
||||
const rawText = await readDatasetSourceRawText({
|
||||
teamId: collection.teamId,
|
||||
tmbId: collection.tmbId,
|
||||
...sourceReadType
|
||||
});
|
||||
|
||||
@@ -220,3 +226,24 @@ export const syncCollection = async (collection: CollectionWithDatasetType) => {
|
||||
|
||||
return DatasetCollectionSyncResultEnum.success;
|
||||
};
|
||||
|
||||
/*
|
||||
QA: 独立进程
|
||||
Chunk: Image Index -> Auto index -> chunk index
|
||||
*/
|
||||
export const getTrainingModeByCollection = (collection: {
|
||||
trainingType: DatasetCollectionSchemaType['trainingType'];
|
||||
autoIndexes?: DatasetCollectionSchemaType['autoIndexes'];
|
||||
imageIndex?: DatasetCollectionSchemaType['imageIndex'];
|
||||
}) => {
|
||||
if (collection.trainingType === DatasetCollectionDataProcessModeEnum.qa) {
|
||||
return TrainingModeEnum.qa;
|
||||
}
|
||||
if (collection.imageIndex && global.feConfigs?.isPlus) {
|
||||
return TrainingModeEnum.image;
|
||||
}
|
||||
if (collection.autoIndexes && global.feConfigs?.isPlus) {
|
||||
return TrainingModeEnum.auto;
|
||||
}
|
||||
return TrainingModeEnum.chunk;
|
||||
};
|
||||
|
Reference in New Issue
Block a user