diff --git a/packages/web/common/file/utils.ts b/packages/web/common/file/utils.ts index 38c1ae5ed..9b8a1618f 100644 --- a/packages/web/common/file/utils.ts +++ b/packages/web/common/file/utils.ts @@ -61,7 +61,14 @@ export const readFileRawText = ({ reject(getErrText(err, 'Load file error')); }; - reader.readAsText(file); + detectFileEncoding(file).then((encoding) => { + console.log(encoding); + + reader.readAsText( + file, + ['iso-8859-1', 'windows-1252'].includes(encoding) ? 'gb2312' : 'utf-8' + ); + }); } catch (error) { reject('The browser does not support file content reading'); } @@ -71,6 +78,29 @@ export const readFileRawText = ({ export const readCsvRawText = async ({ file }: { file: File }) => { const rawText = await readFileRawText({ file }); const csvArr = Papa.parse(rawText).data as string[][]; - return csvArr; }; + +interface EncodingDetectionResult { + encoding: string | null; +} + +function detectEncoding(buffer: ArrayBuffer): EncodingDetectionResult { + const encodings = ['utf-8', 'iso-8859-1', 'windows-1252']; + for (let encoding of encodings) { + try { + const decoder = new TextDecoder(encoding, { fatal: true }); + decoder.decode(buffer); + return { encoding }; // 如果解码成功,返回当前编码 + } catch (e) { + // continue to try next encoding + } + } + return { encoding: null }; // 如果没有编码匹配,返回null +} + +async function detectFileEncoding(file: File): Promise { + const buffer = await loadFile2Buffer({ file }); + const { encoding } = detectEncoding(buffer); + return encoding || 'unknown'; +}