Files
FastGPT/packages/service/worker/readFile/parseOffice.ts
Archer 051455238c V4.13.0 features (#5693)
* feat: concat usage code (#5657)

* feat: dataset parse queue (#5661)

* feat: chat usage concat (#5669)

* perf: search test usage

* feat: chat usage concat

* fix: ts

* fix: ts

* feat: chat node response store (#5675)

* feat: chat node response store

* limit export

* test

* add ai generate node (#5506)

* add node copilot

* apply code

* update dynamic input & output

* add code test

* usage

* dynamic input border render

* optimize input & output

* optimize code

* update style

* change card to popover

* prompt editor basic

* prompt editor

* handle key down

* update prompt

* merge

* fix

* fix

* fix

* perf: workflow performance (#5677)

* feat: chat node response store

* limit export

* perf: workflow performance

* remove log

* fix: app template get duplicate (#5682)

* fix: dynamic input lock & code param (#5680)

* fix: dynamic input lock & code param

* fix

* fix

* feat: multi node data sync & system tool hot-swapping (#5575)

* Enhance file upload functionality and system tool integration (#5257)

* Enhance file upload functionality and system tool integration

* Add supplementary documents and optimize the upload interface

* Refactor file plugin types and update upload configurations

* Refactor MinIO configuration variables and clean up API plugin handlers for improved readability and consistency

* File name change

* Refactor SystemTools component layout

* fix i18n

* fix

* fix

* fix

* optimize app logs sort (#5310)

* log keys config modal

* multiple select

* api

* fontsize

* code

* chatid

* fix build

* fix

* fix component

* change name

* log keys config

* fix

* delete unused

* fix

* chore: minio service class rewrite

* chore: s3 plugin upload

* feat: system global cache with multi node sync feature

* feat: cache

* chore: move images

* docs: update & remove useless code

* chore: resolve merge conflicts

* chore: adjust the code

* chore: adjust

* deps: upgrade @fastgpt-sdk/plugin to 0.1.17

* perf(s3): s3 config

* fix: cache syncKey refresh

* fix: update @fastgpt-sdk/plugin to v0.1.18 removing mongo definition for fixing vitest

* chore: adjust

---------

Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com>
Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Archer <545436317@qq.com>

* perf: s3 api code

* fix: toolbox empty when second open modal

* feat: http tool set (#5599)

* feat: http toolSet manual create front end

* feat: http toolSet manual create i18n

* feat: http toolSet manual create back end

* feat: auth, as tool param, adapt mcp

* fix: delete unused httpPlugin

* fix: delete FlowNodeTypeEnum.httpPlugin

* fix: AppTypeEnum include httpToolSet and httpPlugin

* fix

* delete console

* fix

* output schema

* fix

* fix bg

* fix base url

* fix

---------

Co-authored-by: heheer <zhiyu44@qq.com>

* feat: app count

* perf: type check

* feat: catch error

* perf: plugin hot-swapping (#5688)

* perf: plugin hot-swapping

* chore: adjust code

* perf: cite data auth

* fix http toolset (#5689)

* temp

* fix http tool set

* fix

* template author hide

* dynamic IO ui

* fix: auth test

* fix dynamic input & output (#5690)

Co-authored-by: Archer <545436317@qq.com>

* fix: dynamic output id

* doc

* feat: model permission (#5666)

* feat(permission): model permission definition & api

* chore: support update model's collaborators

* feat: remove unauthedmodel when paste and import

* fix: type error

* fix: test setup global model list

* fix: http tool api

* chore: update fastgpt-sdk version

* chore: remove useless code

* chore: myModelList cache

* perf: user who is not manager can not configure model permission (FE)

* perf: model => Set

* feat: getMyModels moved to opensource code; cache the myModelList

* fix: type error

* fix dynamic input reference select type (#5694)

* remove unique index

* read file usage

* perf: connection error

* fix: abort token count

* fix: debug usage concat

* fix: immer clone object

* fix: immer clone object

* perf: throw error when error chat

* update audit i18n

* fix: 修复识别pptx文件后,返回内容顺序错乱问题 (#5696)

* fix: pptx sort error

* fix prompt editor (#5695)

* fix prompt editor

* fix

* fix: redis cache prefix (#5697)

* fix: redis cache prefix

* fix: cache

* fix: get model collaborator by model.model

* feat: hint for model per

* rename bucket name

* model ui

* doc

* doc

---------

Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com>
Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com>
Co-authored-by: Zeng Qingwen <143274079+fishwww-ww@users.noreply.github.com>
Co-authored-by: heheer <zhiyu44@qq.com>
Co-authored-by: Deepturn <33342819+Deepturn@users.noreply.github.com>
2025-09-24 22:40:31 +08:00

141 lines
4.1 KiB
TypeScript

import { getNanoid } from '@fastgpt/global/common/string/tools';
import fs from 'fs';
import decompress from 'decompress';
import { DOMParser } from '@xmldom/xmldom';
import { clearDirFiles } from '../../common/file/utils';
import { addLog } from '../../common/system/log';
const DEFAULTDECOMPRESSSUBLOCATION = '/tmp';
function getNewFileName(ext: string) {
return `${DEFAULTDECOMPRESSSUBLOCATION}/${getNanoid()}.${ext}`;
}
const parseString = (xml: string) => {
let parser = new DOMParser();
return parser.parseFromString(xml, 'text/xml');
};
const parsePowerPoint = async ({
filepath,
decompressPath,
encoding
}: {
filepath: string;
decompressPath: string;
encoding: BufferEncoding;
}) => {
// Files regex that hold our content of interest
const allFilesRegex = /ppt\/(notesSlides|slides)\/(notesSlide|slide)\d+.xml/g;
const slidesRegex = /ppt\/slides\/slide\d+.xml/g;
/** The decompress location which contains the filename in it */
const files = await decompress(filepath, decompressPath, {
filter: (x) => !!x.path.match(allFilesRegex)
});
// Verify if atleast the slides xml files exist in the extracted files list.
if (
files.length == 0 ||
!files.map((file) => file.path).some((filename) => filename.match(slidesRegex))
) {
return Promise.reject('解析 PPT 失败');
}
// Sort files by slide number to ensure correct order
const sortedFiles = files.sort((a, b) => {
const getSlideNumber = (path: string) => {
const match = path.match(/\d+/);
return match ? parseInt(match[0]) : 0;
};
return getSlideNumber(a.path) - getSlideNumber(b.path);
});
// Returning an array of all the xml contents read using fs.readFileSync
const xmlContentArray = await Promise.all(
sortedFiles.map(async (file) => {
try {
return await fs.promises.readFile(`${decompressPath}/${file.path}`, encoding);
} catch (err) {
return await fs.promises.readFile(`${decompressPath}/${file.path}`, 'utf-8');
}
})
);
let responseArr: string[] = [];
xmlContentArray.forEach((xmlContent) => {
/** Find text nodes with a:p tags */
const xmlParagraphNodesList = parseString(xmlContent).getElementsByTagName('a:p');
/** Store all the text content to respond */
responseArr.push(
Array.from(xmlParagraphNodesList)
// Filter paragraph nodes than do not have any text nodes which are identifiable by a:t tag
.filter((paragraphNode) => paragraphNode.getElementsByTagName('a:t').length != 0)
.map((paragraphNode) => {
/** Find text nodes with a:t tags */
const xmlTextNodeList = paragraphNode.getElementsByTagName('a:t');
return Array.from(xmlTextNodeList)
.filter((textNode) => textNode.childNodes[0] && textNode.childNodes[0].nodeValue)
.map((textNode) => textNode.childNodes[0].nodeValue)
.join('');
})
.join('\n')
);
});
return responseArr.join('\n');
};
export const parseOffice = async ({
buffer,
encoding,
extension
}: {
buffer: Buffer;
encoding: BufferEncoding;
extension: string;
}) => {
// Prepare file for processing
// create temp file subdirectory if it does not exist
if (!fs.existsSync(DEFAULTDECOMPRESSSUBLOCATION)) {
fs.mkdirSync(DEFAULTDECOMPRESSSUBLOCATION, { recursive: true });
}
// temp file name
const filepath = getNewFileName(extension);
const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/${getNanoid()}`;
// const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/test`;
// write new file
try {
fs.writeFileSync(filepath, buffer, {
encoding
});
} catch (err) {
fs.writeFileSync(filepath, buffer, {
encoding: 'utf-8'
});
}
const text = await (async () => {
try {
switch (extension) {
case 'pptx':
return parsePowerPoint({ filepath, decompressPath, encoding });
default:
return Promise.reject('只能读取 .pptx 文件');
}
} catch (error) {
addLog.error(`Load ppt error`, { error });
}
return '';
})();
fs.unlinkSync(filepath);
clearDirFiles(decompressPath);
return text;
};