External dataset (#1497)

* perf: read rawText and chunk code

* perf: read raw text

* perf: read rawtext

* perf: token count

* log
This commit is contained in:
Archer
2024-05-16 11:47:53 +08:00
committed by GitHub
parent d5073f98ab
commit c6d9b15897
36 changed files with 531 additions and 267 deletions

View File

@@ -170,3 +170,10 @@ export const SearchScoreTypeMap = {
export const CustomCollectionIcon = 'common/linkBlue';
export const LinkCollectionIcon = 'common/linkBlue';
/* source prefix */
export enum DatasetSourceReadTypeEnum {
fileLocal = 'fileLocal',
link = 'link',
externalFile = 'externalFile'
}

View File

@@ -0,0 +1,16 @@
import { DatasetSourceReadTypeEnum, ImportDataSourceEnum } from './constants';
export const rawTextBackupPrefix = 'index,content';
export const importType2ReadType = (type: ImportDataSourceEnum) => {
if (type === ImportDataSourceEnum.csvTable || type === ImportDataSourceEnum.fileLocal) {
return DatasetSourceReadTypeEnum.fileLocal;
}
if (type === ImportDataSourceEnum.fileLink) {
return DatasetSourceReadTypeEnum.link;
}
if (type === ImportDataSourceEnum.externalFile) {
return DatasetSourceReadTypeEnum.externalFile;
}
return DatasetSourceReadTypeEnum.link;
};