Files
FastGPT/packages/service/worker/readFile/extension/docx.ts
Finley Ge b712a821f8 fix: upload file (#2992)
* fix: upload file

* chore: remove wasm, support html image parse

* chore: adjust

* chore: move base64match function into htmlstr2md
2024-10-28 21:44:50 +08:00

43 lines
1.1 KiB
TypeScript

import mammoth, { images } from 'mammoth';
import { ReadRawTextByBuffer, ReadFileResponse, ImageType } from '../type';
import { html2md } from '../../htmlStr2Md/utils';
/**
* read docx to markdown
*/
export const readDocsFile = async ({ buffer }: ReadRawTextByBuffer): Promise<ReadFileResponse> => {
const imageList: ImageType[] = [];
try {
const { value: html } = await mammoth.convertToHtml(
{
buffer
},
{
convertImage: images.imgElement(async (image) => {
const imageBase64 = await image.readAsBase64String();
const uuid = crypto.randomUUID();
const mime = image.contentType;
imageList.push({
uuid,
base64: imageBase64,
mime
});
return {
src: uuid
};
})
}
);
const { rawText } = html2md(html);
return {
rawText,
imageList
};
} catch (error) {
console.log('error doc read:', error);
return Promise.reject('Can not read doc file, please convert to PDF');
}
};