Feat: pptx and xlsx loader (#1118)

* perf: plan tip

* perf: upload size controller

* feat: add image ttl index

* feat: new upload file ux

* remove file

* feat: support read pptx

* feat: support xlsx

* fix: rerank docker flie
This commit is contained in:
Archer
2024-04-01 19:01:26 +08:00
committed by GitHub
parent f9d266a6af
commit 21288d1736
90 changed files with 2707 additions and 1678 deletions

View File

@@ -2,6 +2,7 @@ import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import { addLog } from '../../../common/system/log';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { MongoDatasetTraining } from './schema';
import Papa from 'papaparse';
export const checkInvalidChunkAndLock = async ({
err,
@@ -39,3 +40,18 @@ export const checkInvalidChunkAndLock = async ({
}
return false;
};
export const parseCsvTable2Chunks = (rawText: string) => {
const csvArr = Papa.parse(rawText).data as string[][];
const chunks = csvArr
.map((item) => ({
q: item[0] || '',
a: item[1] || ''
}))
.filter((item) => item.q || item.a);
return {
chunks
};
};