perf: redirect request and err log replace (#768)

perf: dataset openapi

openapi
This commit is contained in:
Archer
2024-01-23 10:46:02 +08:00
committed by GitHub
parent 379673cae1
commit 318116627c
4 changed files with 210 additions and 5 deletions

View File

@@ -342,7 +342,7 @@ data 为集合的 ID。
{{< /tabs >}}
### 创建一个纯文本集合(商业版)
### 创建一个纯文本集合
传入一段文字,创建一个集合,会根据传入的文字进行分割。
@@ -351,7 +351,7 @@ data 为集合的 ID。
{{< markdownify >}}
```bash
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/text' \
curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/text' \
--header 'Authorization: Bearer {{authorization}}' \
--header 'Content-Type: application/json' \
--data-raw '{
@@ -418,7 +418,7 @@ data 为集合的 ID。
{{< /tab >}}
{{< /tabs >}}
### 创建一个链接集合(商业版)
### 创建一个链接集合
传入一个网络链接,创建一个集合,会先去对应网页抓取内容,再抓取的文字进行分割。
@@ -427,7 +427,7 @@ data 为集合的 ID。
{{< markdownify >}}
```bash
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/link' \
curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/link' \
--header 'Authorization: Bearer {{authorization}}' \
--header 'Content-Type: application/json' \
--data-raw '{

View File

@@ -0,0 +1,88 @@
/*
Create one dataset collection
*/
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type { LinkCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
TrainingModeEnum,
DatasetCollectionTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
import { reloadCollectionChunks } from '@fastgpt/service/core/dataset/collection/utils';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const {
link,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body as LinkCreateDatasetCollectionParams;
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
datasetId: body.datasetId,
per: 'w'
});
// 1. check dataset limit
await checkDatasetLimit({
teamId,
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
insertLen: predictDataLimitLength(trainingType, new Array(10))
});
// 2. create collection
const collectionId = await createOneCollection({
...body,
name: link,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.link,
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
rawLink: link
});
// 3. create bill and start sync
const { billId } = await createTrainingBill({
teamId,
tmbId,
appName: 'core.dataset.collection.Sync Collection',
billSource: BillSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel).name,
agentModel: getQAModel(dataset.agentModel).name
});
await reloadCollectionChunks({
collectionId,
tmbId,
billId
});
jsonRes(res, {
data: { collectionId }
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}

View File

@@ -0,0 +1,117 @@
/*
Create one dataset collection
*/
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo';
import type { TextCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
import {
TrainingModeEnum,
DatasetCollectionTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
import { pushDataToTrainingQueue } from '@/service/core/dataset/data/controller';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const {
name,
text,
trainingType = TrainingModeEnum.chunk,
chunkSize = 512,
chunkSplitter,
qaPrompt,
...body
} = req.body as TextCreateDatasetCollectionParams;
const { teamId, tmbId, dataset } = await authDataset({
req,
authToken: true,
authApiKey: true,
datasetId: body.datasetId,
per: 'w'
});
// 1. split text to chunks
const { chunks } = splitText2Chunks({
text,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : []
});
// 2. check dataset limit
await checkDatasetLimit({
teamId,
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
insertLen: predictDataLimitLength(trainingType, chunks)
});
// 3. create collection and training bill
const [collectionId, { billId }] = await Promise.all([
createOneCollection({
...body,
teamId,
tmbId,
type: DatasetCollectionTypeEnum.virtual,
name,
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
hashRawText: hashStr(text),
rawTextLength: text.length
}),
createTrainingBill({
teamId,
tmbId,
appName: name,
billSource: BillSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getQAModel(dataset.agentModel)?.name
})
]);
// 4. push chunks to training queue
const insertResults = await pushDataToTrainingQueue({
teamId,
tmbId,
collectionId,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((text, index) => ({
q: text,
chunkIndex: index
}))
});
jsonRes(res, {
data: { collectionId, results: insertResults }
});
} catch (err) {
jsonRes(res, {
code: 500,
error: err
});
}
}
export const config = {
api: {
bodyParser: {
sizeLimit: '10mb'
}
}
};

View File

@@ -76,7 +76,7 @@ export const getDatasetCollectionById = (id: string) =>
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
POST<string>(`/core/dataset/collection/create`, data);
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
POST<{ collectionId: string }>(`/proApi/core/dataset/collection/create/link`, data);
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
POST(`/core/dataset/collection/update`, data);