From 318116627cee767dbe5b8412b81d2da3c02296d6 Mon Sep 17 00:00:00 2001 From: Archer <545436317@qq.com> Date: Tue, 23 Jan 2024 10:46:02 +0800 Subject: [PATCH] perf: redirect request and err log replace (#768) perf: dataset openapi openapi --- .../docs/development/openapi/dataset.md | 8 +- .../core/dataset/collection/create/link.ts | 88 +++++++++++++ .../core/dataset/collection/create/text.ts | 117 ++++++++++++++++++ projects/app/src/web/core/dataset/api.ts | 2 +- 4 files changed, 210 insertions(+), 5 deletions(-) create mode 100644 projects/app/src/pages/api/core/dataset/collection/create/link.ts create mode 100644 projects/app/src/pages/api/core/dataset/collection/create/text.ts diff --git a/docSite/content/docs/development/openapi/dataset.md b/docSite/content/docs/development/openapi/dataset.md index 43f8ed94d..8fe90fb7d 100644 --- a/docSite/content/docs/development/openapi/dataset.md +++ b/docSite/content/docs/development/openapi/dataset.md @@ -342,7 +342,7 @@ data 为集合的 ID。 {{< /tabs >}} -### 创建一个纯文本集合(商业版) +### 创建一个纯文本集合 传入一段文字,创建一个集合,会根据传入的文字进行分割。 @@ -351,7 +351,7 @@ data 为集合的 ID。 {{< markdownify >}} ```bash -curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/text' \ +curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/text' \ --header 'Authorization: Bearer {{authorization}}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -418,7 +418,7 @@ data 为集合的 ID。 {{< /tab >}} {{< /tabs >}} -### 创建一个链接集合(商业版) +### 创建一个链接集合 传入一个网络链接,创建一个集合,会先去对应网页抓取内容,再抓取的文字进行分割。 @@ -427,7 +427,7 @@ data 为集合的 ID。 {{< markdownify >}} ```bash -curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/link' \ +curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/link' \ --header 'Authorization: Bearer {{authorization}}' \ --header 'Content-Type: application/json' \ --data-raw '{ diff --git a/projects/app/src/pages/api/core/dataset/collection/create/link.ts b/projects/app/src/pages/api/core/dataset/collection/create/link.ts new file mode 100644 index 000000000..043410c64 --- /dev/null +++ b/projects/app/src/pages/api/core/dataset/collection/create/link.ts @@ -0,0 +1,88 @@ +/* + Create one dataset collection +*/ +import type { NextApiRequest, NextApiResponse } from 'next'; +import { jsonRes } from '@fastgpt/service/common/response'; +import { connectToDatabase } from '@/service/mongo'; +import type { LinkCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d'; +import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; +import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller'; +import { + TrainingModeEnum, + DatasetCollectionTypeEnum +} from '@fastgpt/global/core/dataset/constants'; +import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset'; +import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils'; +import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller'; +import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants'; +import { getQAModel, getVectorModel } from '@/service/core/ai/model'; +import { reloadCollectionChunks } from '@fastgpt/service/core/dataset/collection/utils'; + +export default async function handler(req: NextApiRequest, res: NextApiResponse) { + try { + await connectToDatabase(); + const { + link, + trainingType = TrainingModeEnum.chunk, + chunkSize = 512, + chunkSplitter, + qaPrompt, + ...body + } = req.body as LinkCreateDatasetCollectionParams; + + const { teamId, tmbId, dataset } = await authDataset({ + req, + authToken: true, + authApiKey: true, + datasetId: body.datasetId, + per: 'w' + }); + + // 1. check dataset limit + await checkDatasetLimit({ + teamId, + freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize, + insertLen: predictDataLimitLength(trainingType, new Array(10)) + }); + + // 2. create collection + const collectionId = await createOneCollection({ + ...body, + name: link, + teamId, + tmbId, + type: DatasetCollectionTypeEnum.link, + + trainingType, + chunkSize, + chunkSplitter, + qaPrompt, + + rawLink: link + }); + + // 3. create bill and start sync + const { billId } = await createTrainingBill({ + teamId, + tmbId, + appName: 'core.dataset.collection.Sync Collection', + billSource: BillSourceEnum.training, + vectorModel: getVectorModel(dataset.vectorModel).name, + agentModel: getQAModel(dataset.agentModel).name + }); + await reloadCollectionChunks({ + collectionId, + tmbId, + billId + }); + + jsonRes(res, { + data: { collectionId } + }); + } catch (err) { + jsonRes(res, { + code: 500, + error: err + }); + } +} diff --git a/projects/app/src/pages/api/core/dataset/collection/create/text.ts b/projects/app/src/pages/api/core/dataset/collection/create/text.ts new file mode 100644 index 000000000..06beda81a --- /dev/null +++ b/projects/app/src/pages/api/core/dataset/collection/create/text.ts @@ -0,0 +1,117 @@ +/* + Create one dataset collection +*/ +import type { NextApiRequest, NextApiResponse } from 'next'; +import { jsonRes } from '@fastgpt/service/common/response'; +import { connectToDatabase } from '@/service/mongo'; +import type { TextCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d'; +import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; +import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller'; +import { + TrainingModeEnum, + DatasetCollectionTypeEnum +} from '@fastgpt/global/core/dataset/constants'; +import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter'; +import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset'; +import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils'; +import { pushDataToTrainingQueue } from '@/service/core/dataset/data/controller'; +import { hashStr } from '@fastgpt/global/common/string/tools'; +import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller'; +import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants'; +import { getQAModel, getVectorModel } from '@/service/core/ai/model'; + +export default async function handler(req: NextApiRequest, res: NextApiResponse) { + try { + await connectToDatabase(); + const { + name, + text, + trainingType = TrainingModeEnum.chunk, + chunkSize = 512, + chunkSplitter, + qaPrompt, + ...body + } = req.body as TextCreateDatasetCollectionParams; + + const { teamId, tmbId, dataset } = await authDataset({ + req, + authToken: true, + authApiKey: true, + datasetId: body.datasetId, + per: 'w' + }); + + // 1. split text to chunks + const { chunks } = splitText2Chunks({ + text, + chunkLen: chunkSize, + overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0, + customReg: chunkSplitter ? [chunkSplitter] : [] + }); + + // 2. check dataset limit + await checkDatasetLimit({ + teamId, + freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize, + insertLen: predictDataLimitLength(trainingType, chunks) + }); + + // 3. create collection and training bill + const [collectionId, { billId }] = await Promise.all([ + createOneCollection({ + ...body, + teamId, + tmbId, + type: DatasetCollectionTypeEnum.virtual, + + name, + trainingType, + chunkSize, + chunkSplitter, + qaPrompt, + + hashRawText: hashStr(text), + rawTextLength: text.length + }), + createTrainingBill({ + teamId, + tmbId, + appName: name, + billSource: BillSourceEnum.training, + vectorModel: getVectorModel(dataset.vectorModel)?.name, + agentModel: getQAModel(dataset.agentModel)?.name + }) + ]); + + // 4. push chunks to training queue + const insertResults = await pushDataToTrainingQueue({ + teamId, + tmbId, + collectionId, + trainingMode: trainingType, + prompt: qaPrompt, + billId, + data: chunks.map((text, index) => ({ + q: text, + chunkIndex: index + })) + }); + + jsonRes(res, { + data: { collectionId, results: insertResults } + }); + } catch (err) { + jsonRes(res, { + code: 500, + error: err + }); + } +} + +export const config = { + api: { + bodyParser: { + sizeLimit: '10mb' + } + } +}; diff --git a/projects/app/src/web/core/dataset/api.ts b/projects/app/src/web/core/dataset/api.ts index e5104e285..a85806c59 100644 --- a/projects/app/src/web/core/dataset/api.ts +++ b/projects/app/src/web/core/dataset/api.ts @@ -76,7 +76,7 @@ export const getDatasetCollectionById = (id: string) => export const postDatasetCollection = (data: CreateDatasetCollectionParams) => POST(`/core/dataset/collection/create`, data); export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) => - POST<{ collectionId: string }>(`/proApi/core/dataset/collection/create/link`, data); + POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data); export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) => POST(`/core/dataset/collection/update`, data);