mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 21:13:50 +00:00
perf: redirect request and err log replace (#768)
perf: dataset openapi openapi
This commit is contained in:
@@ -342,7 +342,7 @@ data 为集合的 ID。
|
||||
{{< /tabs >}}
|
||||
|
||||
|
||||
### 创建一个纯文本集合(商业版)
|
||||
### 创建一个纯文本集合
|
||||
|
||||
传入一段文字,创建一个集合,会根据传入的文字进行分割。
|
||||
|
||||
@@ -351,7 +351,7 @@ data 为集合的 ID。
|
||||
{{< markdownify >}}
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/text' \
|
||||
curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/text' \
|
||||
--header 'Authorization: Bearer {{authorization}}' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
@@ -418,7 +418,7 @@ data 为集合的 ID。
|
||||
{{< /tab >}}
|
||||
{{< /tabs >}}
|
||||
|
||||
### 创建一个链接集合(商业版)
|
||||
### 创建一个链接集合
|
||||
|
||||
传入一个网络链接,创建一个集合,会先去对应网页抓取内容,再抓取的文字进行分割。
|
||||
|
||||
@@ -427,7 +427,7 @@ data 为集合的 ID。
|
||||
{{< markdownify >}}
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'http://localhost:3000/api/proApi/core/dataset/collection/create/link' \
|
||||
curl --location --request POST 'http://localhost:3000/api/core/dataset/collection/create/link' \
|
||||
--header 'Authorization: Bearer {{authorization}}' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
|
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
Create one dataset collection
|
||||
*/
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import type { LinkCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
TrainingModeEnum,
|
||||
DatasetCollectionTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
|
||||
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
|
||||
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||
import { reloadCollectionChunks } from '@fastgpt/service/core/dataset/collection/utils';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const {
|
||||
link,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as LinkCreateDatasetCollectionParams;
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
datasetId: body.datasetId,
|
||||
per: 'w'
|
||||
});
|
||||
|
||||
// 1. check dataset limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
|
||||
insertLen: predictDataLimitLength(trainingType, new Array(10))
|
||||
});
|
||||
|
||||
// 2. create collection
|
||||
const collectionId = await createOneCollection({
|
||||
...body,
|
||||
name: link,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.link,
|
||||
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
rawLink: link
|
||||
});
|
||||
|
||||
// 3. create bill and start sync
|
||||
const { billId } = await createTrainingBill({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: 'core.dataset.collection.Sync Collection',
|
||||
billSource: BillSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel).name,
|
||||
agentModel: getQAModel(dataset.agentModel).name
|
||||
});
|
||||
await reloadCollectionChunks({
|
||||
collectionId,
|
||||
tmbId,
|
||||
billId
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: { collectionId }
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
Create one dataset collection
|
||||
*/
|
||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { jsonRes } from '@fastgpt/service/common/response';
|
||||
import { connectToDatabase } from '@/service/mongo';
|
||||
import type { TextCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
|
||||
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
|
||||
import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller';
|
||||
import {
|
||||
TrainingModeEnum,
|
||||
DatasetCollectionTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { checkDatasetLimit } from '@fastgpt/service/support/permission/limit/dataset';
|
||||
import { predictDataLimitLength } from '@fastgpt/global/core/dataset/utils';
|
||||
import { pushDataToTrainingQueue } from '@/service/core/dataset/data/controller';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { createTrainingBill } from '@fastgpt/service/support/wallet/bill/controller';
|
||||
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
|
||||
import { getQAModel, getVectorModel } from '@/service/core/ai/model';
|
||||
|
||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||
try {
|
||||
await connectToDatabase();
|
||||
const {
|
||||
name,
|
||||
text,
|
||||
trainingType = TrainingModeEnum.chunk,
|
||||
chunkSize = 512,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
...body
|
||||
} = req.body as TextCreateDatasetCollectionParams;
|
||||
|
||||
const { teamId, tmbId, dataset } = await authDataset({
|
||||
req,
|
||||
authToken: true,
|
||||
authApiKey: true,
|
||||
datasetId: body.datasetId,
|
||||
per: 'w'
|
||||
});
|
||||
|
||||
// 1. split text to chunks
|
||||
const { chunks } = splitText2Chunks({
|
||||
text,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : []
|
||||
});
|
||||
|
||||
// 2. check dataset limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
freeSize: global.feConfigs?.subscription?.datasetStoreFreeSize,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
// 3. create collection and training bill
|
||||
const [collectionId, { billId }] = await Promise.all([
|
||||
createOneCollection({
|
||||
...body,
|
||||
teamId,
|
||||
tmbId,
|
||||
type: DatasetCollectionTypeEnum.virtual,
|
||||
|
||||
name,
|
||||
trainingType,
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
|
||||
hashRawText: hashStr(text),
|
||||
rawTextLength: text.length
|
||||
}),
|
||||
createTrainingBill({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: name,
|
||||
billSource: BillSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getQAModel(dataset.agentModel)?.name
|
||||
})
|
||||
]);
|
||||
|
||||
// 4. push chunks to training queue
|
||||
const insertResults = await pushDataToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
collectionId,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((text, index) => ({
|
||||
q: text,
|
||||
chunkIndex: index
|
||||
}))
|
||||
});
|
||||
|
||||
jsonRes(res, {
|
||||
data: { collectionId, results: insertResults }
|
||||
});
|
||||
} catch (err) {
|
||||
jsonRes(res, {
|
||||
code: 500,
|
||||
error: err
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export const config = {
|
||||
api: {
|
||||
bodyParser: {
|
||||
sizeLimit: '10mb'
|
||||
}
|
||||
}
|
||||
};
|
@@ -76,7 +76,7 @@ export const getDatasetCollectionById = (id: string) =>
|
||||
export const postDatasetCollection = (data: CreateDatasetCollectionParams) =>
|
||||
POST<string>(`/core/dataset/collection/create`, data);
|
||||
export const postCreateDatasetLinkCollection = (data: LinkCreateDatasetCollectionParams) =>
|
||||
POST<{ collectionId: string }>(`/proApi/core/dataset/collection/create/link`, data);
|
||||
POST<{ collectionId: string }>(`/core/dataset/collection/create/link`, data);
|
||||
|
||||
export const putDatasetCollectionById = (data: UpdateDatasetCollectionParams) =>
|
||||
POST(`/core/dataset/collection/update`, data);
|
||||
|
Reference in New Issue
Block a user