mirror of
https://github.com/labring/FastGPT.git
synced 2025-10-16 16:04:34 +00:00

* feat: concat usage code (#5657) * feat: dataset parse queue (#5661) * feat: chat usage concat (#5669) * perf: search test usage * feat: chat usage concat * fix: ts * fix: ts * feat: chat node response store (#5675) * feat: chat node response store * limit export * test * add ai generate node (#5506) * add node copilot * apply code * update dynamic input & output * add code test * usage * dynamic input border render * optimize input & output * optimize code * update style * change card to popover * prompt editor basic * prompt editor * handle key down * update prompt * merge * fix * fix * fix * perf: workflow performance (#5677) * feat: chat node response store * limit export * perf: workflow performance * remove log * fix: app template get duplicate (#5682) * fix: dynamic input lock & code param (#5680) * fix: dynamic input lock & code param * fix * fix * feat: multi node data sync & system tool hot-swapping (#5575) * Enhance file upload functionality and system tool integration (#5257) * Enhance file upload functionality and system tool integration * Add supplementary documents and optimize the upload interface * Refactor file plugin types and update upload configurations * Refactor MinIO configuration variables and clean up API plugin handlers for improved readability and consistency * File name change * Refactor SystemTools component layout * fix i18n * fix * fix * fix * optimize app logs sort (#5310) * log keys config modal * multiple select * api * fontsize * code * chatid * fix build * fix * fix component * change name * log keys config * fix * delete unused * fix * chore: minio service class rewrite * chore: s3 plugin upload * feat: system global cache with multi node sync feature * feat: cache * chore: move images * docs: update & remove useless code * chore: resolve merge conflicts * chore: adjust the code * chore: adjust * deps: upgrade @fastgpt-sdk/plugin to 0.1.17 * perf(s3): s3 config * fix: cache syncKey refresh * fix: update @fastgpt-sdk/plugin to v0.1.18 removing mongo definition for fixing vitest * chore: adjust --------- Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com> Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Archer <545436317@qq.com> * perf: s3 api code * fix: toolbox empty when second open modal * feat: http tool set (#5599) * feat: http toolSet manual create front end * feat: http toolSet manual create i18n * feat: http toolSet manual create back end * feat: auth, as tool param, adapt mcp * fix: delete unused httpPlugin * fix: delete FlowNodeTypeEnum.httpPlugin * fix: AppTypeEnum include httpToolSet and httpPlugin * fix * delete console * fix * output schema * fix * fix bg * fix base url * fix --------- Co-authored-by: heheer <zhiyu44@qq.com> * feat: app count * perf: type check * feat: catch error * perf: plugin hot-swapping (#5688) * perf: plugin hot-swapping * chore: adjust code * perf: cite data auth * fix http toolset (#5689) * temp * fix http tool set * fix * template author hide * dynamic IO ui * fix: auth test * fix dynamic input & output (#5690) Co-authored-by: Archer <545436317@qq.com> * fix: dynamic output id * doc * feat: model permission (#5666) * feat(permission): model permission definition & api * chore: support update model's collaborators * feat: remove unauthedmodel when paste and import * fix: type error * fix: test setup global model list * fix: http tool api * chore: update fastgpt-sdk version * chore: remove useless code * chore: myModelList cache * perf: user who is not manager can not configure model permission (FE) * perf: model => Set * feat: getMyModels moved to opensource code; cache the myModelList * fix: type error * fix dynamic input reference select type (#5694) * remove unique index * read file usage * perf: connection error * fix: abort token count * fix: debug usage concat * fix: immer clone object * fix: immer clone object * perf: throw error when error chat * update audit i18n * fix: 修复识别pptx文件后,返回内容顺序错乱问题 (#5696) * fix: pptx sort error * fix prompt editor (#5695) * fix prompt editor * fix * fix: redis cache prefix (#5697) * fix: redis cache prefix * fix: cache * fix: get model collaborator by model.model * feat: hint for model per * rename bucket name * model ui * doc * doc --------- Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: Ctrlz <143257420+ctrlz526@users.noreply.github.com> Co-authored-by: Zeng Qingwen <143274079+fishwww-ww@users.noreply.github.com> Co-authored-by: heheer <zhiyu44@qq.com> Co-authored-by: Deepturn <33342819+Deepturn@users.noreply.github.com>
141 lines
4.1 KiB
TypeScript
141 lines
4.1 KiB
TypeScript
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
|
import fs from 'fs';
|
|
import decompress from 'decompress';
|
|
import { DOMParser } from '@xmldom/xmldom';
|
|
import { clearDirFiles } from '../../common/file/utils';
|
|
import { addLog } from '../../common/system/log';
|
|
|
|
const DEFAULTDECOMPRESSSUBLOCATION = '/tmp';
|
|
|
|
function getNewFileName(ext: string) {
|
|
return `${DEFAULTDECOMPRESSSUBLOCATION}/${getNanoid()}.${ext}`;
|
|
}
|
|
|
|
const parseString = (xml: string) => {
|
|
let parser = new DOMParser();
|
|
return parser.parseFromString(xml, 'text/xml');
|
|
};
|
|
|
|
const parsePowerPoint = async ({
|
|
filepath,
|
|
decompressPath,
|
|
encoding
|
|
}: {
|
|
filepath: string;
|
|
decompressPath: string;
|
|
encoding: BufferEncoding;
|
|
}) => {
|
|
// Files regex that hold our content of interest
|
|
const allFilesRegex = /ppt\/(notesSlides|slides)\/(notesSlide|slide)\d+.xml/g;
|
|
const slidesRegex = /ppt\/slides\/slide\d+.xml/g;
|
|
|
|
/** The decompress location which contains the filename in it */
|
|
|
|
const files = await decompress(filepath, decompressPath, {
|
|
filter: (x) => !!x.path.match(allFilesRegex)
|
|
});
|
|
|
|
// Verify if atleast the slides xml files exist in the extracted files list.
|
|
if (
|
|
files.length == 0 ||
|
|
!files.map((file) => file.path).some((filename) => filename.match(slidesRegex))
|
|
) {
|
|
return Promise.reject('解析 PPT 失败');
|
|
}
|
|
|
|
// Sort files by slide number to ensure correct order
|
|
const sortedFiles = files.sort((a, b) => {
|
|
const getSlideNumber = (path: string) => {
|
|
const match = path.match(/\d+/);
|
|
return match ? parseInt(match[0]) : 0;
|
|
};
|
|
return getSlideNumber(a.path) - getSlideNumber(b.path);
|
|
});
|
|
|
|
// Returning an array of all the xml contents read using fs.readFileSync
|
|
const xmlContentArray = await Promise.all(
|
|
sortedFiles.map(async (file) => {
|
|
try {
|
|
return await fs.promises.readFile(`${decompressPath}/${file.path}`, encoding);
|
|
} catch (err) {
|
|
return await fs.promises.readFile(`${decompressPath}/${file.path}`, 'utf-8');
|
|
}
|
|
})
|
|
);
|
|
|
|
let responseArr: string[] = [];
|
|
|
|
xmlContentArray.forEach((xmlContent) => {
|
|
/** Find text nodes with a:p tags */
|
|
const xmlParagraphNodesList = parseString(xmlContent).getElementsByTagName('a:p');
|
|
|
|
/** Store all the text content to respond */
|
|
responseArr.push(
|
|
Array.from(xmlParagraphNodesList)
|
|
// Filter paragraph nodes than do not have any text nodes which are identifiable by a:t tag
|
|
.filter((paragraphNode) => paragraphNode.getElementsByTagName('a:t').length != 0)
|
|
.map((paragraphNode) => {
|
|
/** Find text nodes with a:t tags */
|
|
const xmlTextNodeList = paragraphNode.getElementsByTagName('a:t');
|
|
return Array.from(xmlTextNodeList)
|
|
.filter((textNode) => textNode.childNodes[0] && textNode.childNodes[0].nodeValue)
|
|
.map((textNode) => textNode.childNodes[0].nodeValue)
|
|
.join('');
|
|
})
|
|
.join('\n')
|
|
);
|
|
});
|
|
|
|
return responseArr.join('\n');
|
|
};
|
|
|
|
export const parseOffice = async ({
|
|
buffer,
|
|
encoding,
|
|
extension
|
|
}: {
|
|
buffer: Buffer;
|
|
encoding: BufferEncoding;
|
|
extension: string;
|
|
}) => {
|
|
// Prepare file for processing
|
|
// create temp file subdirectory if it does not exist
|
|
if (!fs.existsSync(DEFAULTDECOMPRESSSUBLOCATION)) {
|
|
fs.mkdirSync(DEFAULTDECOMPRESSSUBLOCATION, { recursive: true });
|
|
}
|
|
|
|
// temp file name
|
|
const filepath = getNewFileName(extension);
|
|
const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/${getNanoid()}`;
|
|
// const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/test`;
|
|
|
|
// write new file
|
|
try {
|
|
fs.writeFileSync(filepath, buffer, {
|
|
encoding
|
|
});
|
|
} catch (err) {
|
|
fs.writeFileSync(filepath, buffer, {
|
|
encoding: 'utf-8'
|
|
});
|
|
}
|
|
|
|
const text = await (async () => {
|
|
try {
|
|
switch (extension) {
|
|
case 'pptx':
|
|
return parsePowerPoint({ filepath, decompressPath, encoding });
|
|
default:
|
|
return Promise.reject('只能读取 .pptx 文件');
|
|
}
|
|
} catch (error) {
|
|
addLog.error(`Load ppt error`, { error });
|
|
}
|
|
return '';
|
|
})();
|
|
|
|
fs.unlinkSync(filepath);
|
|
clearDirFiles(decompressPath);
|
|
return text;
|
|
};
|