fix: add pptx encoding try catch (#1393)

This commit is contained in:
heheer
2024-05-08 18:10:37 +08:00
committed by GitHub
parent 2e468fc8ca
commit 7b75a99ba2
3 changed files with 21 additions and 11 deletions

View File

@@ -28,6 +28,6 @@ try {
console.log(error); console.log(error);
} }
export const MongoRwaTextBuffer: Model<RawTextBufferSchemaType> = export const MongoRawTextBuffer: Model<RawTextBufferSchemaType> =
models[collectionName] || model(collectionName, RawTextBufferSchema); models[collectionName] || model(collectionName, RawTextBufferSchema);
MongoRwaTextBuffer.syncIndexes(); MongoRawTextBuffer.syncIndexes();

View File

@@ -6,7 +6,7 @@ import { DatasetFileSchema } from '@fastgpt/global/core/dataset/type';
import { MongoFileSchema } from './schema'; import { MongoFileSchema } from './schema';
import { detectFileEncoding } from '@fastgpt/global/common/file/tools'; import { detectFileEncoding } from '@fastgpt/global/common/file/tools';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common'; import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { MongoRwaTextBuffer } from '../../buffer/rawText/schema'; import { MongoRawTextBuffer } from '../../buffer/rawText/schema';
import { readFileRawContent } from '../read/utils'; import { readFileRawContent } from '../read/utils';
import { PassThrough } from 'stream'; import { PassThrough } from 'stream';
@@ -162,7 +162,7 @@ export const readFileContentFromMongo = async ({
filename: string; filename: string;
}> => { }> => {
// read buffer // read buffer
const fileBuffer = await MongoRwaTextBuffer.findOne({ sourceId: fileId }).lean(); const fileBuffer = await MongoRawTextBuffer.findOne({ sourceId: fileId }).lean();
if (fileBuffer) { if (fileBuffer) {
return { return {
rawText: fileBuffer.rawText, rawText: fileBuffer.rawText,
@@ -208,7 +208,7 @@ export const readFileContentFromMongo = async ({
}); });
if (rawText.trim()) { if (rawText.trim()) {
MongoRwaTextBuffer.create({ MongoRawTextBuffer.create({
sourceId: fileId, sourceId: fileId,
rawText, rawText,
metadata: { metadata: {

View File

@@ -44,9 +44,13 @@ const parsePowerPoint = async ({
} }
// Returning an array of all the xml contents read using fs.readFileSync // Returning an array of all the xml contents read using fs.readFileSync
const xmlContentArray = files.map((file) => const xmlContentArray = files.map((file) => {
fs.readFileSync(`${decompressPath}/${file.path}`, encoding) try {
); return fs.readFileSync(`${decompressPath}/${file.path}`, encoding);
} catch (err) {
return fs.readFileSync(`${decompressPath}/${file.path}`, 'utf-8');
}
});
let responseArr: string[] = []; let responseArr: string[] = [];
@@ -95,9 +99,15 @@ export const parseOffice = async ({
// const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/test`; // const decompressPath = `${DEFAULTDECOMPRESSSUBLOCATION}/test`;
// write new file // write new file
fs.writeFileSync(filepath, buffer, { try {
encoding fs.writeFileSync(filepath, buffer, {
}); encoding
});
} catch (err) {
fs.writeFileSync(filepath, buffer, {
encoding: 'utf-8'
});
}
const text = await (async () => { const text = await (async () => {
try { try {