mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 13:03:50 +00:00

* feat: org CRUD (#3380) * feat: add org schema * feat: org manage UI * feat: OrgInfoModal * feat: org tree view * feat: org management * fix: init root org * feat: org permission for app * feat: org support for dataset * fix: disable org role control * styles: opt type signatures * fix: remove unused permission * feat: delete org collaborator * perf: Team org ui (#3499) * perf: org ui * perf: org ui * feat: org auth for app & dataset (#3498) * feat: auth org resource permission * feat: org auth support for app & dataset * perf: org permission check (#3500) * i18n (#3501) * name * i18n * feat: support dataset changeOwner (#3483) * feat: support dataset changeOwner * chore: update dataset change owner api * feat: permission manage UI for org (#3503) * perf: password check;perf: image upload check;perf: sso login check (#3509) * perf: password check * perf: image upload check * perf: sso login check * force show update notification modal & fix login page text (#3512) * fix login page English text * update notification modal * perf: notify account (#3515) * perf(plugin): improve searXNG empty result handling and documentation (#3507) * perf(plugin): improve searXNG empty result handling and documentation * 修改了文档和代码部分无搜索的结果的反馈 * refactor: org pathId (#3516) * optimize payment process (#3517) * feat: support wecom sso (#3518) * feat: support wecom sso * chore: remove unused wecom js-sdk dependency * fix qrcode script (#3520) * fix qrcode script * i18n * perf: full text collection and search code;perf: rename function (#3519) * perf: full text collection and search code * perf: rename function * perf: notify modal * remove invalid code * perf: sso login * perf: pay process * 4.8.18 test (#3524) * perf: remove local token * perf: index * perf: file encoding;perf: leave team code;@c121914yu perf: full text search code (#3528) * perf: text encoding * perf: leave team code * perf: full text search code * fix: http status * perf: embedding search and vector avatar * perf: async read file (#3531) * refactor: team permission manager (#3535) * perf: classify org, group and member * refactor: team per manager * fix: missing functions * 4.8.18 test (#3543) * perf: login check * doc * perf: llm model config * perf: team clb config * fix: MemberModal UI (#3553) * fix: adapt MemberModal title and icon * fix: adapt member modal * fix: search input placeholder * fix: add button text * perf: org permission (#3556) * docs:用户答疑的官方文档补充 (#3540) * docs:用户答疑的官方文档补充 * 问题回答的内容修补 * share link random avatar (#3541) * share link random avatar * fix * delete unused code * share page avatar (#3558) * feat: init 4818 * share page avatar * feat: tmp upgrade code (#3559) * feat: tmp upgrade code * fulltext search test * update action * full text tmp code (#3561) * full text tmp code * fix: init * fix: init * remove tmp code * remove tmp code * 4818-alpha * 4.8.18 test (#3562) * full text tmp code * fix: init * upgrade code * account log * account log * perf: dockerfile * upgrade code * chore: update docs app template submission (#3564) --------- Co-authored-by: a.e. <49438478+I-Info@users.noreply.github.com> Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Jiangween <145003935+Jiangween@users.noreply.github.com>
132 lines
3.8 KiB
TypeScript
132 lines
3.8 KiB
TypeScript
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
|
import fs from 'fs';
|
|
import decompress from 'decompress';
|
|
import { DOMParser } from '@xmldom/xmldom';
|
|
import { clearDirFiles } from '../../common/file/utils';
|
|
import { addLog } from '../../common/system/log';
|
|
|
|
const DEFAULTDECOMPRESSSUBLOCATION = '/tmp';
|
|
|
|
function getNewFileName(ext: string) {
|
|
return `${DEFAULTDECOMPRESSSUBLOCATION}/${getNanoid()}.${ext}`;
|
|
}
|
|
|
|
const parseString = (xml: string) => {
|
|
let parser = new DOMParser();
|
|
return parser.parseFromString(xml, 'text/xml');
|
|
};
|
|
|
|
const parsePowerPoint = async ({
|
|
filepath,
|
|
decompressPath,
|
|
encoding
|
|
}: {
|
|
filepath: string;
|
|
decompressPath: string;
|
|
encoding: BufferEncoding;
|
|
}) => {
|
|
// Files regex that hold our content of interest
|
|
const allFilesRegex = /ppt\/(notesSlides|slides)\/(notesSlide|slide)\d+.xml/g;
|
|
const slidesRegex = /ppt\/slides\/slide\d+.xml/g;
|
|
|
|
/** The decompress location which contains the filename in it */
|
|
|
|
const files = await decompress(filepath, decompressPath, {
|
|
filter: (x) => !!x.path.match(allFilesRegex)
|
|
});
|
|
|
|
// Verify if atleast the slides xml files exist in the extracted files list.
|
|
if (
|
|
files.length == 0 ||
|
|
!files.map((file) => file.path).some((filename) => filename.match(slidesRegex))
|
|
) {
|
|
return Promise.reject('解析 PPT 失败');
|
|
}
|
|
|
|
// Returning an array of all the xml contents read using fs.readFileSync
|
|
const xmlContentArray = await Promise.all(
|
|
files.map((file) => {
|
|
try {
|
|
return fs.promises.readFile(`${decompressPath}/${file.path}`, encoding);
|
|
} catch (err) {
|
|
return fs.promises.readFile(`${decompressPath}/${file.path}`, 'utf-8');
|
|
}
|
|
})
|
|
);
|
|
|
|
let responseArr: string[] = [];
|
|
|
|
xmlContentArray.forEach((xmlContent) => {
|
|
/** Find text nodes with a:p tags */
|
|
const xmlParagraphNodesList = parseString(xmlContent).getElementsByTagName('a:p');
|
|
|
|
/** Store all the text content to respond */
|
|
responseArr.push(
|
|
Array.from(xmlParagraphNodesList)
|
|
// Filter paragraph nodes than do not have any text nodes which are identifiable by a:t tag
|
|
.filter((paragraphNode) => paragraphNode.getElementsByTagName('a:t').length != 0)
|
|
.map((paragraphNode) => {
|
|
/** Find text nodes with a:t tags */
|
|
const xmlTextNodeList = paragraphNode.getElementsByTagName('a:t');
|
|
return Array.from(xmlTextNodeList)
|
|
.filter((textNode) => textNode.childNodes[0] && textNode.childNodes[0].nodeValue)
|
|
.map((textNode) => textNode.childNodes[0].nodeValue)
|
|
.join('');
|
|
})
|
|
.join('\n')
|
|
);
|
|
});
|
|
|
|
return responseArr.join('\n');
|
|
};
|
|
|
|
export const parseOffice = async ({
|
|
buffer,
|
|
encoding,
|
|
extension
|
|
}: {
|
|
buffer: Buffer;
|
|
encoding: BufferEncoding;
|
|
extension: string;
|
|
}) => {
|
|
// Prepare file for processing
|
|
// create temp file subdirectory if it does not exist
|
|
if (!fs.existsSync(DEFAULTDECOMPRESSSUBLOCATION)) {
|
|
fs.mkdirSync(DEFAULTDECOMPRESSSUBLOCATION, { recursive: true });
|
|
}
|
|
|
|
// temp file name
|
|
const filepath = getNewFileName(extension);
|
|
const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/${getNanoid()}`;
|
|
// const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/test`;
|
|
|
|
// write new file
|
|
try {
|
|
fs.writeFileSync(filepath, buffer, {
|
|
encoding
|
|
});
|
|
} catch (err) {
|
|
fs.writeFileSync(filepath, buffer, {
|
|
encoding: 'utf-8'
|
|
});
|
|
}
|
|
|
|
const text = await (async () => {
|
|
try {
|
|
switch (extension) {
|
|
case 'pptx':
|
|
return parsePowerPoint({ filepath, decompressPath, encoding });
|
|
default:
|
|
return Promise.reject('只能读取 .pptx 文件');
|
|
}
|
|
} catch (error) {
|
|
addLog.error(`Load ppt error`, { error });
|
|
}
|
|
return '';
|
|
})();
|
|
|
|
fs.unlinkSync(filepath);
|
|
clearDirFiles(decompressPath);
|
|
return text;
|
|
};
|