Files
FastGPT/packages/service/worker/readFile/extension/docx.ts
Archer 565a966d19 Python Sandbox (#4380)
* Python3 Sandbox (#3944)

* update python box (#4251)

* update python box

* Adjust the height of the NodeCode border.

* update python sandbox and add test systemcall bash

* update sandbox

* add VERSION_RELEASE (#4376)

* save empty docx

* fix pythonbox log error

* fix: js template

---------

Co-authored-by: dogfar <37035781+dogfar@users.noreply.github.com>
Co-authored-by: gggaaallleee <91131304+gggaaallleee@users.noreply.github.com>
Co-authored-by: gggaaallleee <1293587368@qq.com>
2025-03-28 13:45:09 +08:00

44 lines
1.1 KiB
TypeScript

import mammoth, { images } from 'mammoth';
import { ReadRawTextByBuffer, ReadFileResponse, ImageType } from '../type';
import { html2md } from '../../htmlStr2Md/utils';
/**
* read docx to markdown
*/
export const readDocsFile = async ({ buffer }: ReadRawTextByBuffer): Promise<ReadFileResponse> => {
const imageList: ImageType[] = [];
try {
const { value: html } = await mammoth.convertToHtml(
{
buffer
},
{
ignoreEmptyParagraphs: false,
convertImage: images.imgElement(async (image) => {
const imageBase64 = await image.readAsBase64String();
const uuid = crypto.randomUUID();
const mime = image.contentType;
imageList.push({
uuid,
base64: imageBase64,
mime
});
return {
src: uuid
};
})
}
);
const { rawText } = html2md(html);
return {
rawText,
imageList
};
} catch (error) {
console.log('error doc read:', error);
return Promise.reject('Can not read doc file, please convert to PDF');
}
};