mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-27 00:17:31 +00:00

* perf: plan tip * perf: upload size controller * feat: add image ttl index * feat: new upload file ux * remove file * feat: support read pptx * feat: support xlsx * fix: rerank docker flie
36 lines
788 B
TypeScript
36 lines
788 B
TypeScript
import mammoth from 'mammoth';
|
|
import { htmlToMarkdown } from '../../string/markdown';
|
|
import { ReadFileByBufferParams, ReadFileResponse } from './type';
|
|
import { initMarkdownText } from './utils';
|
|
|
|
/**
|
|
* read docx to markdown
|
|
*/
|
|
export const readWordFile = async ({
|
|
teamId,
|
|
buffer,
|
|
metadata = {}
|
|
}: ReadFileByBufferParams): Promise<ReadFileResponse> => {
|
|
try {
|
|
const { value: html } = await mammoth.convertToHtml({
|
|
buffer
|
|
});
|
|
|
|
const md = await htmlToMarkdown(html);
|
|
|
|
const rawText = await initMarkdownText({
|
|
teamId,
|
|
md,
|
|
metadata
|
|
});
|
|
|
|
return {
|
|
rawText,
|
|
metadata: {}
|
|
};
|
|
} catch (error) {
|
|
console.log('error doc read:', error);
|
|
return Promise.reject('Can not read doc file, please convert to PDF');
|
|
}
|
|
};
|