mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
perf: dataset import params code (#4875)
* perf: dataset import params code * perf: api dataset code * model
This commit is contained in:
@@ -1,6 +1,54 @@
|
||||
{
|
||||
"provider": "Claude",
|
||||
"list": [
|
||||
{
|
||||
"model": "claude-sonnet-4-20250514",
|
||||
"name": "claude-sonnet-4-20250514",
|
||||
"maxContext": 200000,
|
||||
"maxResponse": 8000,
|
||||
"quoteMaxToken": 100000,
|
||||
"maxTemperature": 1,
|
||||
"showTopP": true,
|
||||
"showStopSign": true,
|
||||
"vision": true,
|
||||
"toolChoice": true,
|
||||
"functionCall": false,
|
||||
"defaultSystemChatPrompt": "",
|
||||
"datasetProcess": true,
|
||||
"usedInClassify": true,
|
||||
"customCQPrompt": "",
|
||||
"usedInExtractFields": true,
|
||||
"usedInQueryExtension": true,
|
||||
"customExtractPrompt": "",
|
||||
"usedInToolCall": true,
|
||||
"defaultConfig": {},
|
||||
"fieldMap": {},
|
||||
"type": "llm"
|
||||
},
|
||||
{
|
||||
"model": "claude-opus-4-20250514",
|
||||
"name": "claude-opus-4-20250514",
|
||||
"maxContext": 200000,
|
||||
"maxResponse": 4096,
|
||||
"quoteMaxToken": 100000,
|
||||
"maxTemperature": 1,
|
||||
"showTopP": true,
|
||||
"showStopSign": true,
|
||||
"vision": true,
|
||||
"toolChoice": true,
|
||||
"functionCall": false,
|
||||
"defaultSystemChatPrompt": "",
|
||||
"datasetProcess": true,
|
||||
"usedInClassify": true,
|
||||
"customCQPrompt": "",
|
||||
"usedInExtractFields": true,
|
||||
"usedInQueryExtension": true,
|
||||
"customExtractPrompt": "",
|
||||
"usedInToolCall": true,
|
||||
"defaultConfig": {},
|
||||
"fieldMap": {},
|
||||
"type": "llm"
|
||||
},
|
||||
{
|
||||
"model": "claude-3-7-sonnet-20250219",
|
||||
"name": "claude-3-7-sonnet-20250219",
|
||||
|
@@ -25,6 +25,30 @@
|
||||
"showTopP": true,
|
||||
"showStopSign": true
|
||||
},
|
||||
{
|
||||
"model": "gemini-2.5-flash-preview-04-17",
|
||||
"name": "gemini-2.5-flash-preview-04-17",
|
||||
"maxContext": 1000000,
|
||||
"maxResponse": 8000,
|
||||
"quoteMaxToken": 60000,
|
||||
"maxTemperature": 1,
|
||||
"vision": true,
|
||||
"toolChoice": true,
|
||||
"functionCall": false,
|
||||
"defaultSystemChatPrompt": "",
|
||||
"datasetProcess": true,
|
||||
"usedInClassify": true,
|
||||
"customCQPrompt": "",
|
||||
"usedInExtractFields": true,
|
||||
"usedInQueryExtension": true,
|
||||
"customExtractPrompt": "",
|
||||
"usedInToolCall": true,
|
||||
"defaultConfig": {},
|
||||
"fieldMap": {},
|
||||
"type": "llm",
|
||||
"showTopP": true,
|
||||
"showStopSign": true
|
||||
},
|
||||
{
|
||||
"model": "gemini-2.0-flash",
|
||||
"name": "gemini-2.0-flash",
|
||||
|
@@ -74,6 +74,15 @@ export const createCollectionAndInsertData = async ({
|
||||
llmModel: getLLMModel(dataset.agentModel)
|
||||
});
|
||||
const chunkSplitter = computeChunkSplitter(createCollectionParams);
|
||||
if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
|
||||
delete createCollectionParams.chunkTriggerType;
|
||||
delete createCollectionParams.chunkTriggerMinSize;
|
||||
delete createCollectionParams.dataEnhanceCollectionName;
|
||||
delete createCollectionParams.imageIndex;
|
||||
delete createCollectionParams.autoIndexes;
|
||||
delete createCollectionParams.indexSize;
|
||||
delete createCollectionParams.qaPrompt;
|
||||
}
|
||||
|
||||
// 1. split chunks
|
||||
const chunks = rawText2Chunks({
|
||||
|
@@ -163,7 +163,7 @@ export const readApiServerFileContent = async ({
|
||||
title?: string;
|
||||
rawText: string;
|
||||
}> => {
|
||||
const data = (
|
||||
return (
|
||||
await getApiDatasetRequest({
|
||||
apiServer,
|
||||
yuqueServer,
|
||||
@@ -175,10 +175,6 @@ export const readApiServerFileContent = async ({
|
||||
apiFileId,
|
||||
customPdfParse
|
||||
});
|
||||
if (data) {
|
||||
return data;
|
||||
}
|
||||
return Promise.reject(Error);
|
||||
};
|
||||
|
||||
export const rawText2Chunks = ({
|
||||
|
@@ -1,10 +1,12 @@
|
||||
import { getMongoModel, Schema } from '../../common/mongo';
|
||||
import {
|
||||
ChunkSettingModeEnum,
|
||||
ChunkTriggerConfigTypeEnum,
|
||||
DataChunkSplitModeEnum,
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetTypeEnum,
|
||||
DatasetTypeMap
|
||||
DatasetTypeMap,
|
||||
ParagraphChunkAIModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import {
|
||||
TeamCollectionName,
|
||||
@@ -15,12 +17,22 @@ import type { DatasetSchemaType } from '@fastgpt/global/core/dataset/type.d';
|
||||
export const DatasetCollectionName = 'datasets';
|
||||
|
||||
export const ChunkSettings = {
|
||||
imageIndex: Boolean,
|
||||
autoIndexes: Boolean,
|
||||
trainingType: {
|
||||
type: String,
|
||||
enum: Object.values(DatasetCollectionDataProcessModeEnum)
|
||||
},
|
||||
|
||||
chunkTriggerType: {
|
||||
type: String,
|
||||
enum: Object.values(ChunkTriggerConfigTypeEnum)
|
||||
},
|
||||
chunkTriggerMinSize: Number,
|
||||
|
||||
dataEnhanceCollectionName: Boolean,
|
||||
|
||||
imageIndex: Boolean,
|
||||
autoIndexes: Boolean,
|
||||
|
||||
chunkSettingMode: {
|
||||
type: String,
|
||||
enum: Object.values(ChunkSettingModeEnum)
|
||||
@@ -29,6 +41,13 @@ export const ChunkSettings = {
|
||||
type: String,
|
||||
enum: Object.values(DataChunkSplitModeEnum)
|
||||
},
|
||||
paragraphChunkAIMode: {
|
||||
type: String,
|
||||
enum: Object.values(ParagraphChunkAIModeEnum)
|
||||
},
|
||||
paragraphChunkDeep: Number,
|
||||
paragraphChunkMinSize: Number,
|
||||
paragraphChunkMaxSize: Number,
|
||||
chunkSize: Number,
|
||||
chunkSplitter: String,
|
||||
|
||||
@@ -115,9 +134,7 @@ const DatasetSchema = new Schema({
|
||||
|
||||
// abandoned
|
||||
autoSync: Boolean,
|
||||
externalReadUrl: {
|
||||
type: String
|
||||
},
|
||||
externalReadUrl: String,
|
||||
defaultPermission: Number
|
||||
});
|
||||
|
||||
|
Reference in New Issue
Block a user