External dataset (#1497)

* perf: read rawText and chunk code

* perf: read raw text

* perf: read rawtext

* perf: token count

* log
This commit is contained in:
Archer
2024-05-16 11:47:53 +08:00
committed by GitHub
parent d5073f98ab
commit c6d9b15897
36 changed files with 531 additions and 267 deletions

View File

@@ -1,6 +1,7 @@
import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
import {
DatasetSearchModeEnum,
DatasetSourceReadTypeEnum,
DatasetTypeEnum,
ImportDataSourceEnum,
TrainingModeEnum
@@ -75,22 +76,3 @@ export type SearchTestResponse = {
};
/* =========== training =========== */
export type PostPreviewFilesChunksProps = {
type: ImportDataSourceEnum;
sourceId: string;
chunkSize: number;
overlapRatio: number;
customSplitChar?: string;
};
export type PostPreviewFilesChunksResponse = {
fileId: string;
rawTextLength: number;
chunks: string[];
}[];
export type PostPreviewTableChunksResponse = {
fileId: string;
totalChunks: number;
chunks: { q: string; a: string; chunkIndex: number }[];
errorText?: string;
}[];