dataset save raw file

This commit is contained in:
archer
2023-09-03 22:39:09 +08:00
parent 086ea83fac
commit a754ceaf3b
37 changed files with 347 additions and 144 deletions

View File

@@ -0,0 +1,35 @@
// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { authUser } from '@/service/utils/auth';
import { PgClient } from '@/service/pg';
import { PgTrainingTableName } from '@/constants/plugin';
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
try {
await authUser({ req, authRoot: true });
const { rowCount } = await PgClient.query(`SELECT 1
FROM information_schema.columns
WHERE table_schema = 'public'
AND table_name = '${PgTrainingTableName}'
AND column_name = 'file_id'`);
if (rowCount > 0) {
return jsonRes(res, {
data: '已经存在file_id字段'
});
}
jsonRes(res, {
data: await PgClient.query(
`ALTER TABLE ${PgTrainingTableName} ADD COLUMN file_id VARCHAR(100)`
)
});
} catch (error) {
jsonRes(res, {
code: 500,
error
});
}
}

View File

@@ -9,12 +9,11 @@ import { startQueue } from '@/service/utils/tools';
import { PgClient } from '@/service/pg';
import { modelToolMap } from '@/utils/plugin';
import { getVectorModel } from '@/service/utils/data';
export type DateItemType = { a: string; q: string; source?: string };
import { DatasetItemType } from '@/types/plugin';
export type Props = {
kbId: string;
data: DateItemType[];
data: DatasetItemType[];
mode: `${TrainingModeEnum}`;
prompt?: string;
};
@@ -95,7 +94,7 @@ export async function pushDataToKb({
// 过滤重复的 qa 内容
const set = new Set();
const filterData: DateItemType[] = [];
const filterData: DatasetItemType[] = [];
data.forEach((item) => {
if (!item.q) return;
@@ -120,13 +119,10 @@ export async function pushDataToKb({
// 数据库去重
const insertData = (
await Promise.allSettled(
filterData.map(async ({ q, a = '', source }) => {
filterData.map(async (data) => {
let { q, a } = data;
if (mode !== TrainingModeEnum.index) {
return Promise.resolve({
q,
a,
source
});
return Promise.resolve(data);
}
if (!q) {
@@ -152,23 +148,17 @@ export async function pushDataToKb({
console.log(error);
error;
}
return Promise.resolve({
q,
a,
source
});
return Promise.resolve(data);
})
)
)
.filter((item) => item.status === 'fulfilled')
.map<DateItemType>((item: any) => item.value);
.map<DatasetItemType>((item: any) => item.value);
// 插入记录
const insertRes = await TrainingData.insertMany(
insertData.map((item) => ({
q: item.q,
a: item.a,
source: item.source,
...item,
userId,
kbId,
mode,

View File

@@ -41,7 +41,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
const response: any = await PgClient.query(
`BEGIN;
SET LOCAL ivfflat.probes = ${global.systemEnv.pgIvfflatProbe || 10};
select id,q,a,source,(vector <#> '[${
select id, q, a, source, file_id, (vector <#> '[${
vectors[0]
}]') * -1 AS score from ${PgTrainingTableName} where kb_id='${kbId}' AND user_id='${userId}' order by vector <#> '[${
vectors[0]
@@ -49,7 +49,9 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
COMMIT;`
);
jsonRes<Response>(res, { data: response?.[2]?.rows || [] });
jsonRes<Response>(res, {
data: response?.[2]?.rows || []
});
} catch (err) {
console.log(err);
jsonRes(res, {

View File

@@ -3,6 +3,7 @@ import { jsonRes } from '@/service/response';
import { connectToDatabase } from '@/service/mongo';
import { GridFSStorage } from '@/service/lib/gridfs';
import { authFileToken } from './readUrl';
import jschardet from 'jschardet';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
@@ -12,6 +13,10 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const { fileId, userId } = await authFileToken(token);
if (!fileId) {
throw new Error('fileId is empty');
}
const gridFs = new GridFSStorage('dataset', userId);
const [file, buffer] = await Promise.all([
@@ -19,9 +24,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
gridFs.download(fileId)
]);
res.setHeader('encoding', file.encoding);
const encoding = jschardet.detect(buffer)?.encoding;
res.setHeader('encoding', encoding);
res.setHeader('Content-Type', file.contentType);
res.setHeader('Cache-Control', 'public, max-age=3600');
res.setHeader('Content-Disposition', `inline; filename="${encodeURIComponent(file.filename)}"`);
res.end(buffer);
} catch (error) {

View File

@@ -28,9 +28,10 @@ class UploadModel {
limits: {
fieldSize: maxSize
},
preservePath: true,
storage: multer.diskStorage({
filename: (_req, file, cb) => {
const { ext } = path.parse(file.originalname);
const { ext } = path.parse(decodeURIComponent(file.originalname));
cb(null, nanoid() + ext);
}
})
@@ -44,8 +45,13 @@ class UploadModel {
return reject(error);
}
// @ts-ignore
resolve({ files: req.files });
resolve({
// @ts-ignore
files: req.files?.map((file) => ({
...file,
originalname: decodeURIComponent(file.originalname)
}))
});
});
});
}
@@ -56,9 +62,9 @@ const upload = new UploadModel();
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { userId } = await authUser({ req });
const { userId } = await authUser({ req, authToken: true });
const { files } = await upload.doUpload(req, res);
const { files = [] } = await upload.doUpload(req, res);
const gridFs = new GridFSStorage('dataset', userId);

View File

@@ -30,7 +30,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const where: any = [['user_id', userId], 'AND', ['id', dataId]];
const searchRes = await PgClient.select<KbDataItemType>(PgTrainingTableName, {
fields: ['kb_id', 'id', 'q', 'a', 'source'],
fields: ['kb_id', 'id', 'q', 'a', 'source', 'file_id'],
where,
limit: 1
});

View File

@@ -43,7 +43,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const [searchRes, total] = await Promise.all([
PgClient.select<KbDataItemType>(PgTrainingTableName, {
fields: ['id', 'q', 'a', 'source'],
fields: ['id', 'q', 'a', 'source', 'file_id'],
where,
order: [{ field: 'id', mode: 'DESC' }],
limit: pageSize,

View File

@@ -8,10 +8,11 @@ import { insertKbItem, PgClient } from '@/service/pg';
import { modelToolMap } from '@/utils/plugin';
import { getVectorModel } from '@/service/utils/data';
import { getVector } from '@/pages/api/openapi/plugin/vector';
import { DatasetItemType } from '@/types/plugin';
export type Props = {
kbId: string;
data: { a: string; q: string; source?: string };
data: DatasetItemType;
};
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {