Files
FastGPT/packages/service/common/file/read/word.ts
Archer 21288d1736 Feat: pptx and xlsx loader (#1118)
* perf: plan tip

* perf: upload size controller

* feat: add image ttl index

* feat: new upload file ux

* remove file

* feat: support read pptx

* feat: support xlsx

* fix: rerank docker flie
2024-04-01 19:01:26 +08:00

36 lines
788 B
TypeScript

import mammoth from 'mammoth';
import { htmlToMarkdown } from '../../string/markdown';
import { ReadFileByBufferParams, ReadFileResponse } from './type';
import { initMarkdownText } from './utils';
/**
* read docx to markdown
*/
export const readWordFile = async ({
teamId,
buffer,
metadata = {}
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
try {
const { value: html } = await mammoth.convertToHtml({
buffer
});
const md = await htmlToMarkdown(html);
const rawText = await initMarkdownText({
teamId,
md,
metadata
});
return {
rawText,
metadata: {}
};
} catch (error) {
console.log('error doc read:', error);
return Promise.reject('Can not read doc file, please convert to PDF');
}
};