Add image index and pdf parse (#3956)

* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
This commit is contained in:
Archer
2025-03-03 23:08:29 +08:00
committed by archer
parent 08b6f594df
commit adf5377ebe
106 changed files with 2337 additions and 1454 deletions

View File

@@ -0,0 +1,27 @@
import axios from 'axios';
import { addLog } from '../../system/log';
import { serverRequestBaseUrl } from '../../api/serverRequest';
import { getFileContentTypeFromHeader, guessBase64ImageType } from '../utils';
export const getImageBase64 = async (url: string) => {
addLog.debug(`Load image to base64: ${url}`);
try {
const response = await axios.get(url, {
baseURL: serverRequestBaseUrl,
responseType: 'arraybuffer',
proxy: false
});
const base64 = Buffer.from(response.data, 'binary').toString('base64');
const imageType =
getFileContentTypeFromHeader(response.headers['content-type']) ||
guessBase64ImageType(base64);
return `data:${imageType};base64,${base64}`;
} catch (error) {
addLog.debug(`Load image to base64 failed: ${url}`);
console.log(error);
return Promise.reject(error);
}
};