mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
V4.8.15 feature (#3331)
* feat: add customize toolkit (#3205) * chaoyang * fix-auth * add toolkit * add order * plugin usage * fix * delete console: * Fix: Fix fullscreen preview top positioning and improve Markdown rendering logic (#3247) * 完成任务:修复全屏预览顶部固定问题,优化 Markdown 渲染逻辑 * 有问题修改 * 问题再修改 * 修正问题 * fix: plugin standalone display issue (#3254) * 4.8.15 test (#3246) * o1 config * perf: system plugin code * 调整系统插件代码。增加html 渲染安全配置。 (#3258) * perf: base64 picker * perf: list app or dataset * perf: plugin config code * 小窗适配等问题 (#3257) * 小窗适配等问题 * git问题 * 小窗剩余问题 * feat: system plugin auth and lock version (#3265) * feat: system plugin auth and lock version * update comment * 4.8.15 test (#3267) * tmp log * perf: login direct * perf: iframe html code * remove log * fix: plugin standalone display (#3277) * refactor: 页面拆分&i18n拆分 (#3281) * refactor: account组件拆成独立页面 * script: 新增i18n json文件创建脚本 * refactor: 页面i18n拆分 * i18n: add en&hant * 4.8.15 test (#3285) * tmp log * remove log * fix: watch avatar refresh * perf: i18n code * fix(plugin): use intro instead of userguide (#3290) * Universal SSO (#3292) * tmp log * remove log * feat: common oauth * readme * perf: sso provider * remove sso code * perf: refresh plugins * feat: add api dataset (#3272) * add api-dataset * fix api-dataset * fix api dataset * fix ts * perf: create collection code (#3301) * tmp log * remove log * perf: i18n change * update version doc * feat: question guide from chatId * perf: create collection code * fix: request api * fix: request api * fix: tts auth and response type (#3303) * perf: md splitter * fix: tts auth and response type * fix: api file dataset (#3307) * perf: api dataset init (#3310) * perf: collection schema * perf: api dataset init * refactor: 团队管理独立页面 (#3302) * ui: 团队管理独立页面 * 代码优化 * fix * perf: sync collection and ui check (#3314) * perf: sync collection * remove script * perf: update api server * perf: api dataset parent * perf: team ui * perf: team 18n * update team ui * perf: ui check * perf: i18n * fix: debug variables & cronjob & system plugin callback load (#3315) * fix: debug variables & cronjob & system plugin callback load * fix type * fix * fix * fix: plugin dataset quote;perf: system variables init (#3316) * fix: plugin dataset quote * perf: system variables init * perf: node templates ui;fix: dataset import ui (#3318) * fix: dataset import ui * perf: node templates ui * perf: ui refresh * feat:套餐改名和套餐跳转配置 (#3309) * fixing:except Sidebar * 去除了多余的代码 * 修正了套餐说明的代码 * 修正了误删除的show_git代码 * 修正了名字部分等代码 * 修正了问题,遗留了其他和ui讨论不一致的部分 * 4.8.15 test (#3319) * remove log * pref: bill ui * pref: bill ui * perf: log * html渲染文档 (#3270) * html渲染文档 * 文档有点小问题 * feat: doc (#3322) * 集合重训练 (#3282) * rebaser * 一点补充 * 小问题 * 其他问题修正,删除集合保留文件的参数还没找到... * reTraining * delete uesless * 删除了一行错误代码 * 集合重训练部分 * fixing * 删除console代码 * feat: navbar item config (#3326) * perf: custom navbar code;perf: retraining code;feat: api dataset and dataset api doc (#3329) * feat: api dataset and dataset api doc * perf: retraining code * perf: custom navbar code * fix: ts (#3330) * fix: ts * fix: ts * retraining ui * perf: api collection filter * perf: retrining button --------- Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Jiangween <145003935+Jiangween@users.noreply.github.com> Co-authored-by: papapatrick <109422393+Patrickill@users.noreply.github.com>
This commit is contained in:
@@ -5,6 +5,7 @@ import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
|
||||
import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
|
||||
import { getLLMModel } from '../model';
|
||||
import { llmCompletionsBodyFormat } from '../utils';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
|
||||
/*
|
||||
query extension - 问题扩展
|
||||
@@ -183,7 +184,7 @@ A: ${chatBg}
|
||||
tokens: await countGptMessagesTokens(messages)
|
||||
};
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
addLog.error(`Query extension error`, error);
|
||||
return {
|
||||
rawQuery: query,
|
||||
extensionQueries: [],
|
||||
|
@@ -51,7 +51,6 @@ export function reRankRecall({
|
||||
}));
|
||||
})
|
||||
.catch((err) => {
|
||||
console.log(err);
|
||||
addLog.error('rerank error', err);
|
||||
|
||||
return [];
|
||||
|
@@ -5,39 +5,44 @@ import { getLLMModel } from '../ai/model';
|
||||
import { MongoApp } from './schema';
|
||||
|
||||
export const beforeUpdateAppFormat = <T extends AppSchema['modules'] | undefined>({
|
||||
nodes
|
||||
nodes,
|
||||
isPlugin
|
||||
}: {
|
||||
nodes: T;
|
||||
isPlugin: boolean;
|
||||
}) => {
|
||||
if (nodes) {
|
||||
let maxTokens = 3000;
|
||||
// Check dataset maxTokens
|
||||
if (isPlugin) {
|
||||
let maxTokens = 16000;
|
||||
|
||||
nodes.forEach((item) => {
|
||||
if (
|
||||
item.flowNodeType === FlowNodeTypeEnum.chatNode ||
|
||||
item.flowNodeType === FlowNodeTypeEnum.tools
|
||||
) {
|
||||
const model =
|
||||
item.inputs.find((item) => item.key === NodeInputKeyEnum.aiModel)?.value || '';
|
||||
const chatModel = getLLMModel(model);
|
||||
const quoteMaxToken = chatModel.quoteMaxToken || 3000;
|
||||
nodes.forEach((item) => {
|
||||
if (
|
||||
item.flowNodeType === FlowNodeTypeEnum.chatNode ||
|
||||
item.flowNodeType === FlowNodeTypeEnum.tools
|
||||
) {
|
||||
const model =
|
||||
item.inputs.find((item) => item.key === NodeInputKeyEnum.aiModel)?.value || '';
|
||||
const chatModel = getLLMModel(model);
|
||||
const quoteMaxToken = chatModel.quoteMaxToken || 16000;
|
||||
|
||||
maxTokens = Math.max(maxTokens, quoteMaxToken);
|
||||
}
|
||||
});
|
||||
maxTokens = Math.max(maxTokens, quoteMaxToken);
|
||||
}
|
||||
});
|
||||
|
||||
nodes.forEach((item) => {
|
||||
if (item.flowNodeType === FlowNodeTypeEnum.datasetSearchNode) {
|
||||
item.inputs.forEach((input) => {
|
||||
if (input.key === NodeInputKeyEnum.datasetMaxTokens) {
|
||||
const val = input.value as number;
|
||||
if (val > maxTokens) {
|
||||
input.value = maxTokens;
|
||||
nodes.forEach((item) => {
|
||||
if (item.flowNodeType === FlowNodeTypeEnum.datasetSearchNode) {
|
||||
item.inputs.forEach((input) => {
|
||||
if (input.key === NodeInputKeyEnum.datasetMaxTokens) {
|
||||
const val = input.value as number;
|
||||
if (val > maxTokens) {
|
||||
input.value = maxTokens;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
|
@@ -2,7 +2,6 @@ import { FlowNodeTemplateType } from '@fastgpt/global/core/workflow/type/node.d'
|
||||
import { FlowNodeTypeEnum, defaultNodeVersion } from '@fastgpt/global/core/workflow/node/constant';
|
||||
import { appData2FlowNodeIO, pluginData2FlowNodeIO } from '@fastgpt/global/core/workflow/utils';
|
||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
import type { PluginRuntimeType } from '@fastgpt/global/core/workflow/runtime/type';
|
||||
import { FlowNodeTemplateTypeEnum } from '@fastgpt/global/core/workflow/constants';
|
||||
import { getHandleConfig } from '@fastgpt/global/core/workflow/template/utils';
|
||||
import { getNanoid } from '@fastgpt/global/common/string/tools';
|
||||
@@ -11,6 +10,9 @@ import { MongoApp } from '../schema';
|
||||
import { SystemPluginTemplateItemType } from '@fastgpt/global/core/workflow/type';
|
||||
import { getSystemPluginTemplates } from '../../../../plugins/register';
|
||||
import { getAppLatestVersion, getAppVersionById } from '../version/controller';
|
||||
import { PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
|
||||
import { MongoSystemPlugin } from './systemPluginSchema';
|
||||
import { PluginErrEnum } from '@fastgpt/global/common/error/code/plugin';
|
||||
|
||||
/*
|
||||
plugin id rule:
|
||||
@@ -37,15 +39,45 @@ export async function splitCombinePluginId(id: string) {
|
||||
|
||||
type ChildAppType = SystemPluginTemplateItemType & { teamId?: string };
|
||||
const getSystemPluginTemplateById = async (
|
||||
pluginId: string
|
||||
pluginId: string,
|
||||
versionId?: string
|
||||
): Promise<SystemPluginTemplateItemType> => {
|
||||
const item = getSystemPluginTemplates().find((plugin) => plugin.id === pluginId);
|
||||
if (!item) return Promise.reject('plugin not found');
|
||||
if (!item) return Promise.reject(PluginErrEnum.unAuth);
|
||||
|
||||
return cloneDeep(item);
|
||||
const plugin = cloneDeep(item);
|
||||
|
||||
if (plugin.associatedPluginId) {
|
||||
// The verification plugin is set as a system plugin
|
||||
const systemPlugin = await MongoSystemPlugin.findOne(
|
||||
{ pluginId: plugin.id, 'customConfig.associatedPluginId': plugin.associatedPluginId },
|
||||
'associatedPluginId'
|
||||
).lean();
|
||||
if (!systemPlugin) return Promise.reject(PluginErrEnum.unAuth);
|
||||
|
||||
const app = await MongoApp.findById(plugin.associatedPluginId).lean();
|
||||
if (!app) return Promise.reject(PluginErrEnum.unAuth);
|
||||
|
||||
const version = versionId
|
||||
? await getAppVersionById({
|
||||
appId: plugin.associatedPluginId,
|
||||
versionId,
|
||||
app
|
||||
})
|
||||
: await getAppLatestVersion(plugin.associatedPluginId, app);
|
||||
if (!version.versionId) return Promise.reject('App version not found');
|
||||
|
||||
plugin.workflow = {
|
||||
nodes: version.nodes,
|
||||
edges: version.edges,
|
||||
chatConfig: version.chatConfig
|
||||
};
|
||||
plugin.version = versionId || String(version.versionId);
|
||||
}
|
||||
return plugin;
|
||||
};
|
||||
|
||||
/* format plugin modules to plugin preview module */
|
||||
/* Format plugin to workflow preview node data */
|
||||
export async function getChildAppPreviewNode({
|
||||
id
|
||||
}: {
|
||||
@@ -77,7 +109,9 @@ export async function getChildAppPreviewNode({
|
||||
templateType: FlowNodeTemplateTypeEnum.teamApp,
|
||||
version: version.versionId,
|
||||
originCost: 0,
|
||||
currentCost: 0
|
||||
currentCost: 0,
|
||||
hasTokenFee: false,
|
||||
pluginOrder: 0
|
||||
};
|
||||
} else {
|
||||
return getSystemPluginTemplateById(pluginId);
|
||||
@@ -147,10 +181,12 @@ export async function getChildAppRuntimeById(
|
||||
// 用不到
|
||||
version: item?.pluginData?.nodeVersion || defaultNodeVersion,
|
||||
originCost: 0,
|
||||
currentCost: 0
|
||||
currentCost: 0,
|
||||
hasTokenFee: false,
|
||||
pluginOrder: 0
|
||||
};
|
||||
} else {
|
||||
return getSystemPluginTemplateById(pluginId);
|
||||
return getSystemPluginTemplateById(pluginId, versionId);
|
||||
}
|
||||
})();
|
||||
|
||||
@@ -162,6 +198,7 @@ export async function getChildAppRuntimeById(
|
||||
showStatus: app.showStatus,
|
||||
currentCost: app.currentCost,
|
||||
nodes: app.workflow.nodes,
|
||||
edges: app.workflow.edges
|
||||
edges: app.workflow.edges,
|
||||
hasTokenFee: app.hasTokenFee
|
||||
};
|
||||
}
|
||||
|
35
packages/service/core/app/plugin/pluginGroupSchema.ts
Normal file
35
packages/service/core/app/plugin/pluginGroupSchema.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import { connectionMongo, getMongoModel } from '../../../common/mongo/index';
|
||||
import { PluginGroupSchemaType, TGroupType } from './type';
|
||||
const { Schema } = connectionMongo;
|
||||
|
||||
export const collectionName = 'app_plugin_groups';
|
||||
|
||||
const PluginGroupSchema = new Schema({
|
||||
groupId: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
groupAvatar: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
groupName: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
groupTypes: {
|
||||
type: Array<TGroupType>,
|
||||
default: []
|
||||
},
|
||||
groupOrder: {
|
||||
type: Number,
|
||||
default: 0
|
||||
}
|
||||
});
|
||||
|
||||
PluginGroupSchema.index({ groupId: 1 }, { unique: true });
|
||||
|
||||
export const MongoPluginGroups = getMongoModel<PluginGroupSchemaType>(
|
||||
collectionName,
|
||||
PluginGroupSchema
|
||||
);
|
@@ -25,12 +25,20 @@ const SystemPluginSchema = new Schema({
|
||||
type: Number,
|
||||
default: 0
|
||||
},
|
||||
hasTokenFee: {
|
||||
type: Boolean,
|
||||
default: false
|
||||
},
|
||||
pluginOrder: {
|
||||
type: Number,
|
||||
default: 0
|
||||
},
|
||||
customConfig: Object
|
||||
});
|
||||
|
||||
SystemPluginSchema.index({ pluginId: 1 });
|
||||
|
||||
export const MongoSystemPluginSchema = getMongoModel<SystemPluginConfigSchemaType>(
|
||||
export const MongoSystemPlugin = getMongoModel<SystemPluginConfigSchemaType>(
|
||||
collectionName,
|
||||
SystemPluginSchema
|
||||
);
|
||||
|
20
packages/service/core/app/plugin/type.d.ts
vendored
20
packages/service/core/app/plugin/type.d.ts
vendored
@@ -1,3 +1,4 @@
|
||||
import { SystemPluginListItemType } from '@fastgpt/global/core/app/type';
|
||||
import { FlowNodeTemplateTypeEnum } from '@fastgpt/global/core/workflow/constants';
|
||||
import {
|
||||
SystemPluginTemplateItemType,
|
||||
@@ -9,7 +10,9 @@ export type SystemPluginConfigSchemaType = {
|
||||
|
||||
originCost: number; // n points/one time
|
||||
currentCost: number;
|
||||
hasTokenFee: boolean;
|
||||
isActive: boolean;
|
||||
pluginOrder: number;
|
||||
inputConfig: SystemPluginTemplateItemType['inputConfig'];
|
||||
|
||||
customConfig?: {
|
||||
@@ -19,6 +22,21 @@ export type SystemPluginConfigSchemaType = {
|
||||
version: string;
|
||||
weight?: number;
|
||||
workflow: WorkflowTemplateBasicType;
|
||||
templateType: FlowNodeTemplateTypeEnum;
|
||||
templateType: string;
|
||||
associatedPluginId: string;
|
||||
userGuide: string;
|
||||
};
|
||||
};
|
||||
|
||||
export type TGroupType = {
|
||||
typeName: string;
|
||||
typeId: string;
|
||||
};
|
||||
|
||||
export type PluginGroupSchemaType = {
|
||||
groupId: string;
|
||||
groupAvatar: string;
|
||||
groupName: string;
|
||||
groupTypes: TGroupType[];
|
||||
groupOrder: number;
|
||||
};
|
||||
|
@@ -1,11 +1,11 @@
|
||||
import { PluginRuntimeType } from '@fastgpt/global/core/workflow/runtime/type';
|
||||
import { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
|
||||
import { splitCombinePluginId } from './controller';
|
||||
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
|
||||
import { PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
|
||||
|
||||
/*
|
||||
1. Commercial plugin: n points per times
|
||||
2. Other plugin: sum of children points
|
||||
Plugin points calculation:
|
||||
1. Return 0 if error
|
||||
2. Add configured points if commercial plugin
|
||||
3. Add sum of child nodes points
|
||||
*/
|
||||
export const computedPluginUsage = async ({
|
||||
plugin,
|
||||
@@ -16,13 +16,13 @@ export const computedPluginUsage = async ({
|
||||
childrenUsage: ChatNodeUsageType[];
|
||||
error?: boolean;
|
||||
}) => {
|
||||
const { source } = await splitCombinePluginId(plugin.id);
|
||||
|
||||
// Commercial plugin: n points per times
|
||||
if (source === PluginSourceEnum.commercial) {
|
||||
if (error) return 0;
|
||||
return plugin.currentCost ?? 0;
|
||||
if (error) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
|
||||
const childrenIUsages = childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
|
||||
|
||||
const pluginCurrentCose = plugin.currentCost ?? 0;
|
||||
|
||||
return plugin.hasTokenFee ? pluginCurrentCose + childrenIUsages : pluginCurrentCose;
|
||||
};
|
||||
|
143
packages/service/core/dataset/apiDataset/api.ts
Normal file
143
packages/service/core/dataset/apiDataset/api.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
import type {
|
||||
APIFileContentResponse,
|
||||
APIFileListResponse,
|
||||
APIFileReadResponse,
|
||||
APIFileServer
|
||||
} from '@fastgpt/global/core/dataset/apiDataset';
|
||||
import axios, { Method } from 'axios';
|
||||
import { addLog } from '../../../common/system/log';
|
||||
import { readFileRawTextByUrl } from '../read';
|
||||
import { ParentIdType } from '@fastgpt/global/common/parentFolder/type';
|
||||
|
||||
type ResponseDataType = {
|
||||
success: boolean;
|
||||
message: string;
|
||||
data: any;
|
||||
};
|
||||
|
||||
export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }) => {
|
||||
const instance = axios.create({
|
||||
baseURL: apiServer.baseUrl,
|
||||
timeout: 60000, // 超时时间
|
||||
headers: {
|
||||
'content-type': 'application/json',
|
||||
Authorization: `Bearer ${apiServer.authorization}`
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* 响应数据检查
|
||||
*/
|
||||
const checkRes = (data: ResponseDataType) => {
|
||||
if (data === undefined) {
|
||||
addLog.info('Api dataset data is empty');
|
||||
return Promise.reject('服务器异常');
|
||||
} else if (!data.success) {
|
||||
return Promise.reject(data);
|
||||
}
|
||||
return data.data;
|
||||
};
|
||||
const responseError = (err: any) => {
|
||||
console.log('error->', '请求错误', err);
|
||||
|
||||
if (!err) {
|
||||
return Promise.reject({ message: '未知错误' });
|
||||
}
|
||||
if (typeof err === 'string') {
|
||||
return Promise.reject({ message: err });
|
||||
}
|
||||
if (typeof err.message === 'string') {
|
||||
return Promise.reject({ message: err.message });
|
||||
}
|
||||
if (typeof err.data === 'string') {
|
||||
return Promise.reject({ message: err.data });
|
||||
}
|
||||
if (err?.response?.data) {
|
||||
return Promise.reject(err?.response?.data);
|
||||
}
|
||||
return Promise.reject(err);
|
||||
};
|
||||
|
||||
const request = <T>(url: string, data: any, method: Method): Promise<T> => {
|
||||
/* 去空 */
|
||||
for (const key in data) {
|
||||
if (data[key] === undefined) {
|
||||
delete data[key];
|
||||
}
|
||||
}
|
||||
|
||||
return instance
|
||||
.request({
|
||||
url,
|
||||
method,
|
||||
data: ['POST', 'PUT'].includes(method) ? data : undefined,
|
||||
params: !['POST', 'PUT'].includes(method) ? data : undefined
|
||||
})
|
||||
.then((res) => checkRes(res.data))
|
||||
.catch((err) => responseError(err));
|
||||
};
|
||||
|
||||
const listFiles = async ({
|
||||
searchKey,
|
||||
parentId
|
||||
}: {
|
||||
searchKey?: string;
|
||||
parentId?: ParentIdType;
|
||||
}) => {
|
||||
const files = await request<APIFileListResponse>(
|
||||
`/v1/file/list`,
|
||||
{
|
||||
searchKey,
|
||||
parentId
|
||||
},
|
||||
'POST'
|
||||
);
|
||||
|
||||
if (!Array.isArray(files)) {
|
||||
return Promise.reject('Invalid file list format');
|
||||
}
|
||||
if (files.some((file) => !file.id || !file.name || typeof file.type === 'undefined')) {
|
||||
return Promise.reject('Invalid file data format');
|
||||
}
|
||||
return files;
|
||||
};
|
||||
|
||||
const getFileContent = async ({ teamId, apiFileId }: { teamId: string; apiFileId: string }) => {
|
||||
const data = await request<APIFileContentResponse>(
|
||||
`/v1/file/content`,
|
||||
{ id: apiFileId },
|
||||
'GET'
|
||||
);
|
||||
const content = data.content;
|
||||
const previewUrl = data.previewUrl;
|
||||
|
||||
if (content) {
|
||||
return content;
|
||||
}
|
||||
if (previewUrl) {
|
||||
const rawText = await readFileRawTextByUrl({
|
||||
teamId,
|
||||
url: previewUrl,
|
||||
relatedId: apiFileId
|
||||
});
|
||||
return rawText;
|
||||
}
|
||||
return Promise.reject('Invalid content type: content or previewUrl is required');
|
||||
};
|
||||
|
||||
const getFilePreviewUrl = async ({ apiFileId }: { apiFileId: string }) => {
|
||||
const { url } = await request<APIFileReadResponse>(`/v1/file/read`, { id: apiFileId }, 'GET');
|
||||
|
||||
if (!url || typeof url !== 'string') {
|
||||
return Promise.reject('Invalid response url');
|
||||
}
|
||||
|
||||
return url;
|
||||
};
|
||||
|
||||
return {
|
||||
getFileContent,
|
||||
listFiles,
|
||||
getFilePreviewUrl
|
||||
};
|
||||
};
|
@@ -3,7 +3,8 @@ import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset
|
||||
import { MongoDatasetCollection } from './schema';
|
||||
import {
|
||||
CollectionWithDatasetType,
|
||||
DatasetCollectionSchemaType
|
||||
DatasetCollectionSchemaType,
|
||||
DatasetSchemaType
|
||||
} from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoDatasetTraining } from '../training/schema';
|
||||
import { MongoDatasetData } from '../data/schema';
|
||||
@@ -13,7 +14,132 @@ import { delFileByFileIdList } from '../../../common/file/gridfs/controller';
|
||||
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
|
||||
import { ClientSession } from '../../../common/mongo';
|
||||
import { createOrGetCollectionTags } from './utils';
|
||||
import { rawText2Chunks } from '../read';
|
||||
import { checkDatasetLimit } from '../../../support/permission/teamLimit';
|
||||
import { predictDataLimitLength } from '../../../../global/core/dataset/utils';
|
||||
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
|
||||
import { createTrainingUsage } from '../../../support/wallet/usage/controller';
|
||||
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
|
||||
import { getLLMModel, getVectorModel } from '../../ai/model';
|
||||
import { pushDataListToTrainingQueue } from '../training/controller';
|
||||
import { MongoImage } from '../../../common/file/image/schema';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
|
||||
export const createCollectionAndInsertData = async ({
|
||||
dataset,
|
||||
rawText,
|
||||
relatedId,
|
||||
createCollectionParams,
|
||||
isQAImport = false,
|
||||
session
|
||||
}: {
|
||||
dataset: DatasetSchemaType;
|
||||
rawText: string;
|
||||
relatedId?: string;
|
||||
createCollectionParams: CreateOneCollectionParams;
|
||||
|
||||
isQAImport?: boolean;
|
||||
session?: ClientSession;
|
||||
}) => {
|
||||
const teamId = createCollectionParams.teamId;
|
||||
const tmbId = createCollectionParams.tmbId;
|
||||
// Chunk split params
|
||||
const trainingType = createCollectionParams.trainingType || TrainingModeEnum.chunk;
|
||||
const chunkSize = createCollectionParams.chunkSize;
|
||||
const chunkSplitter = createCollectionParams.chunkSplitter;
|
||||
const qaPrompt = createCollectionParams.qaPrompt;
|
||||
const usageName = createCollectionParams.name;
|
||||
|
||||
// 1. split chunks
|
||||
const chunks = rawText2Chunks({
|
||||
rawText,
|
||||
chunkLen: chunkSize,
|
||||
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
|
||||
customReg: chunkSplitter ? [chunkSplitter] : [],
|
||||
isQAImport
|
||||
});
|
||||
|
||||
// 2. auth limit
|
||||
await checkDatasetLimit({
|
||||
teamId,
|
||||
insertLen: predictDataLimitLength(trainingType, chunks)
|
||||
});
|
||||
|
||||
const fn = async (session: ClientSession) => {
|
||||
// 3. create collection
|
||||
const { _id: collectionId } = await createOneCollection({
|
||||
...createCollectionParams,
|
||||
|
||||
hashRawText: hashStr(rawText),
|
||||
rawTextLength: rawText.length,
|
||||
session
|
||||
});
|
||||
|
||||
// 4. create training bill
|
||||
const { billId } = await createTrainingUsage({
|
||||
teamId,
|
||||
tmbId,
|
||||
appName: usageName,
|
||||
billSource: UsageSourceEnum.training,
|
||||
vectorModel: getVectorModel(dataset.vectorModel)?.name,
|
||||
agentModel: getLLMModel(dataset.agentModel)?.name,
|
||||
session
|
||||
});
|
||||
|
||||
// 5. insert to training queue
|
||||
const insertResults = await pushDataListToTrainingQueue({
|
||||
teamId,
|
||||
tmbId,
|
||||
datasetId: dataset._id,
|
||||
collectionId,
|
||||
agentModel: dataset.agentModel,
|
||||
vectorModel: dataset.vectorModel,
|
||||
trainingMode: trainingType,
|
||||
prompt: qaPrompt,
|
||||
billId,
|
||||
data: chunks.map((item, index) => ({
|
||||
...item,
|
||||
chunkIndex: index
|
||||
})),
|
||||
session
|
||||
});
|
||||
|
||||
// 6. remove related image ttl
|
||||
if (relatedId) {
|
||||
await MongoImage.updateMany(
|
||||
{
|
||||
teamId,
|
||||
'metadata.relatedId': relatedId
|
||||
},
|
||||
{
|
||||
// Remove expiredTime to avoid ttl expiration
|
||||
$unset: {
|
||||
expiredTime: 1
|
||||
}
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
collectionId,
|
||||
insertResults
|
||||
};
|
||||
};
|
||||
|
||||
if (session) {
|
||||
return fn(session);
|
||||
}
|
||||
return mongoSessionRun(fn);
|
||||
};
|
||||
|
||||
export type CreateOneCollectionParams = CreateDatasetCollectionParams & {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
session?: ClientSession;
|
||||
};
|
||||
export async function createOneCollection({
|
||||
teamId,
|
||||
tmbId,
|
||||
@@ -33,18 +159,15 @@ export async function createOneCollection({
|
||||
externalFileId,
|
||||
externalFileUrl,
|
||||
|
||||
apiFileId,
|
||||
|
||||
hashRawText,
|
||||
rawTextLength,
|
||||
metadata = {},
|
||||
session,
|
||||
tags,
|
||||
...props
|
||||
}: CreateDatasetCollectionParams & {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
[key: string]: any;
|
||||
session?: ClientSession;
|
||||
}) {
|
||||
createTime
|
||||
}: CreateOneCollectionParams) {
|
||||
// Create collection tags
|
||||
const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
|
||||
|
||||
@@ -52,7 +175,6 @@ export async function createOneCollection({
|
||||
const [collection] = await MongoDatasetCollection.create(
|
||||
[
|
||||
{
|
||||
...props,
|
||||
teamId,
|
||||
tmbId,
|
||||
parentId: parentId || null,
|
||||
@@ -64,16 +186,18 @@ export async function createOneCollection({
|
||||
chunkSize,
|
||||
chunkSplitter,
|
||||
qaPrompt,
|
||||
metadata,
|
||||
|
||||
fileId,
|
||||
rawLink,
|
||||
...(fileId ? { fileId } : {}),
|
||||
...(rawLink ? { rawLink } : {}),
|
||||
...(externalFileId ? { externalFileId } : {}),
|
||||
externalFileUrl,
|
||||
...(externalFileUrl ? { externalFileUrl } : {}),
|
||||
...(apiFileId ? { apiFileId } : {}),
|
||||
|
||||
rawTextLength,
|
||||
hashRawText,
|
||||
metadata,
|
||||
tags: collectionTags
|
||||
tags: collectionTags,
|
||||
createTime
|
||||
}
|
||||
],
|
||||
{ session }
|
||||
@@ -116,7 +240,68 @@ export const delCollectionRelatedSource = async ({
|
||||
/**
|
||||
* delete collection and it related data
|
||||
*/
|
||||
export async function delCollectionAndRelatedSources({
|
||||
export async function delCollection({
|
||||
collections,
|
||||
session,
|
||||
delRelatedSource
|
||||
}: {
|
||||
collections: (CollectionWithDatasetType | DatasetCollectionSchemaType)[];
|
||||
session: ClientSession;
|
||||
delRelatedSource: boolean;
|
||||
}) {
|
||||
if (collections.length === 0) return;
|
||||
|
||||
const teamId = collections[0].teamId;
|
||||
|
||||
if (!teamId) return Promise.reject('teamId is not exist');
|
||||
|
||||
const datasetIds = Array.from(
|
||||
new Set(
|
||||
collections.map((item) => {
|
||||
if (typeof item.datasetId === 'string') {
|
||||
return String(item.datasetId);
|
||||
}
|
||||
return String(item.datasetId._id);
|
||||
})
|
||||
)
|
||||
);
|
||||
const collectionIds = collections.map((item) => String(item._id));
|
||||
|
||||
// delete training data
|
||||
await MongoDatasetTraining.deleteMany({
|
||||
teamId,
|
||||
datasetIds: { $in: datasetIds },
|
||||
collectionId: { $in: collectionIds }
|
||||
});
|
||||
|
||||
/* file and imgs */
|
||||
if (delRelatedSource) {
|
||||
await delCollectionRelatedSource({ collections, session });
|
||||
}
|
||||
|
||||
// delete dataset.datas
|
||||
await MongoDatasetData.deleteMany(
|
||||
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
|
||||
{ session }
|
||||
);
|
||||
|
||||
// delete collections
|
||||
await MongoDatasetCollection.deleteMany(
|
||||
{
|
||||
teamId,
|
||||
_id: { $in: collectionIds }
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
|
||||
// no session delete: delete files, vector data
|
||||
await deleteDatasetDataVector({ teamId, datasetIds, collectionIds });
|
||||
}
|
||||
|
||||
/**
|
||||
* delete delOnlyCollection
|
||||
*/
|
||||
export async function delOnlyCollection({
|
||||
collections,
|
||||
session
|
||||
}: {
|
||||
@@ -148,9 +333,6 @@ export async function delCollectionAndRelatedSources({
|
||||
collectionId: { $in: collectionIds }
|
||||
});
|
||||
|
||||
/* file and imgs */
|
||||
await delCollectionRelatedSource({ collections, session });
|
||||
|
||||
// delete dataset.datas
|
||||
await MongoDatasetData.deleteMany(
|
||||
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
|
||||
|
@@ -10,90 +10,100 @@ import {
|
||||
|
||||
export const DatasetColCollectionName = 'dataset_collections';
|
||||
|
||||
const DatasetCollectionSchema = new Schema({
|
||||
parentId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetColCollectionName,
|
||||
default: null
|
||||
},
|
||||
teamId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamCollectionName,
|
||||
required: true
|
||||
},
|
||||
tmbId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamMemberCollectionName,
|
||||
required: true
|
||||
},
|
||||
datasetId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetCollectionName,
|
||||
required: true
|
||||
},
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetCollectionTypeMap),
|
||||
required: true
|
||||
},
|
||||
name: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
createTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
updateTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
forbid: {
|
||||
type: Boolean,
|
||||
default: false
|
||||
},
|
||||
const DatasetCollectionSchema = new Schema(
|
||||
{
|
||||
parentId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetColCollectionName,
|
||||
default: null
|
||||
},
|
||||
teamId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamCollectionName,
|
||||
required: true
|
||||
},
|
||||
tmbId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamMemberCollectionName,
|
||||
required: true
|
||||
},
|
||||
datasetId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetCollectionName,
|
||||
required: true
|
||||
},
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetCollectionTypeMap),
|
||||
required: true
|
||||
},
|
||||
name: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
createTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
updateTime: {
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
forbid: {
|
||||
type: Boolean,
|
||||
default: false
|
||||
},
|
||||
|
||||
// chunk filed
|
||||
trainingType: {
|
||||
type: String,
|
||||
enum: Object.keys(TrainingTypeMap)
|
||||
},
|
||||
chunkSize: {
|
||||
type: Number,
|
||||
required: true
|
||||
},
|
||||
chunkSplitter: {
|
||||
type: String
|
||||
},
|
||||
qaPrompt: {
|
||||
type: String
|
||||
},
|
||||
ocrParse: Boolean,
|
||||
// chunk filed
|
||||
trainingType: {
|
||||
type: String,
|
||||
enum: Object.keys(TrainingTypeMap)
|
||||
},
|
||||
chunkSize: {
|
||||
type: Number,
|
||||
required: true
|
||||
},
|
||||
chunkSplitter: {
|
||||
type: String
|
||||
},
|
||||
qaPrompt: {
|
||||
type: String
|
||||
},
|
||||
ocrParse: Boolean,
|
||||
|
||||
tags: {
|
||||
type: [String],
|
||||
default: []
|
||||
},
|
||||
tags: {
|
||||
type: [String],
|
||||
default: []
|
||||
},
|
||||
|
||||
// local file collection
|
||||
fileId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: 'dataset.files'
|
||||
},
|
||||
// web link collection
|
||||
rawLink: String,
|
||||
// external collection
|
||||
externalFileId: String,
|
||||
// local file collection
|
||||
fileId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: 'dataset.files'
|
||||
},
|
||||
// web link collection
|
||||
rawLink: String,
|
||||
// api collection
|
||||
apiFileId: String,
|
||||
// external collection
|
||||
externalFileId: String,
|
||||
externalFileUrl: String, // external import url
|
||||
|
||||
// metadata
|
||||
rawTextLength: Number,
|
||||
hashRawText: String,
|
||||
externalFileUrl: String, // external import url
|
||||
metadata: {
|
||||
type: Object,
|
||||
default: {}
|
||||
// metadata
|
||||
rawTextLength: Number,
|
||||
hashRawText: String,
|
||||
metadata: {
|
||||
type: Object,
|
||||
default: {}
|
||||
}
|
||||
},
|
||||
{
|
||||
// Auto update updateTime
|
||||
timestamps: {
|
||||
updatedAt: 'updateTime'
|
||||
}
|
||||
}
|
||||
});
|
||||
);
|
||||
|
||||
try {
|
||||
// auth file
|
||||
|
@@ -1,17 +1,19 @@
|
||||
import type { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import { MongoDatasetCollection } from './schema';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { MongoDatasetTraining } from '../training/schema';
|
||||
import { urlsFetch } from '../../../common/string/cheerio';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { ClientSession } from '../../../common/mongo';
|
||||
import { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api';
|
||||
import { MongoDatasetCollectionTags } from '../tag/schema';
|
||||
import { readFromSecondary } from '../../../common/mongo/utils';
|
||||
import { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
|
||||
import {
|
||||
DatasetCollectionSyncResultEnum,
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetSourceReadTypeEnum,
|
||||
DatasetTypeEnum
|
||||
} from '@fastgpt/global/core/dataset/constants';
|
||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||
import { readDatasetSourceRawText } from '../read';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
|
||||
import { createCollectionAndInsertData, delCollection } from './controller';
|
||||
|
||||
/**
|
||||
* get all collection by top collectionId
|
||||
@@ -61,148 +63,6 @@ export function getCollectionUpdateTime({ name, time }: { time?: Date; name: str
|
||||
return new Date();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get collection raw text by Collection or collectionId
|
||||
*/
|
||||
export const getCollectionAndRawText = async ({
|
||||
collectionId,
|
||||
collection,
|
||||
newRawText
|
||||
}: {
|
||||
collectionId?: string;
|
||||
collection?: CollectionWithDatasetType;
|
||||
newRawText?: string;
|
||||
}) => {
|
||||
const col = await (async () => {
|
||||
if (collection) return collection;
|
||||
if (collectionId) {
|
||||
return (await MongoDatasetCollection.findById(collectionId).populate(
|
||||
'datasetId'
|
||||
)) as CollectionWithDatasetType;
|
||||
}
|
||||
|
||||
return null;
|
||||
})();
|
||||
|
||||
if (!col) {
|
||||
return Promise.reject('Collection not found');
|
||||
}
|
||||
|
||||
const { title, rawText } = await (async () => {
|
||||
if (newRawText)
|
||||
return {
|
||||
title: '',
|
||||
rawText: newRawText
|
||||
};
|
||||
// link
|
||||
if (col.type === DatasetCollectionTypeEnum.link && col.rawLink) {
|
||||
// crawl new data
|
||||
const result = await urlsFetch({
|
||||
urlList: [col.rawLink],
|
||||
selector: col.datasetId?.websiteConfig?.selector || col?.metadata?.webPageSelector
|
||||
});
|
||||
|
||||
return {
|
||||
title: result[0]?.title,
|
||||
rawText: result[0]?.content
|
||||
};
|
||||
}
|
||||
|
||||
// file
|
||||
|
||||
return {
|
||||
title: '',
|
||||
rawText: ''
|
||||
};
|
||||
})();
|
||||
|
||||
const hashRawText = hashStr(rawText);
|
||||
const isSameRawText = rawText && col.hashRawText === hashRawText;
|
||||
|
||||
return {
|
||||
collection: col,
|
||||
title,
|
||||
rawText,
|
||||
isSameRawText
|
||||
};
|
||||
};
|
||||
|
||||
/* link collection start load data */
|
||||
export const reloadCollectionChunks = async ({
|
||||
collection,
|
||||
tmbId,
|
||||
billId,
|
||||
rawText,
|
||||
session
|
||||
}: {
|
||||
collection: CollectionWithDatasetType;
|
||||
tmbId: string;
|
||||
billId?: string;
|
||||
rawText?: string;
|
||||
session: ClientSession;
|
||||
}): Promise<PushDatasetDataResponse> => {
|
||||
const {
|
||||
title,
|
||||
rawText: newRawText,
|
||||
collection: col,
|
||||
isSameRawText
|
||||
} = await getCollectionAndRawText({
|
||||
collection,
|
||||
newRawText: rawText
|
||||
});
|
||||
|
||||
if (isSameRawText)
|
||||
return {
|
||||
insertLen: 0
|
||||
};
|
||||
|
||||
// split data
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: newRawText,
|
||||
chunkLen: col.chunkSize || 512,
|
||||
customReg: col.chunkSplitter ? [col.chunkSplitter] : []
|
||||
});
|
||||
|
||||
// insert to training queue
|
||||
const model = await (() => {
|
||||
if (col.trainingType === TrainingModeEnum.chunk) return col.datasetId.vectorModel;
|
||||
if (col.trainingType === TrainingModeEnum.qa) return col.datasetId.agentModel;
|
||||
return Promise.reject('Training model error');
|
||||
})();
|
||||
|
||||
const result = await MongoDatasetTraining.insertMany(
|
||||
chunks.map((item, i) => ({
|
||||
teamId: col.teamId,
|
||||
tmbId,
|
||||
datasetId: col.datasetId._id,
|
||||
collectionId: col._id,
|
||||
billId,
|
||||
mode: col.trainingType,
|
||||
prompt: '',
|
||||
model,
|
||||
q: item,
|
||||
a: '',
|
||||
chunkIndex: i
|
||||
})),
|
||||
{ session }
|
||||
);
|
||||
|
||||
// update raw text
|
||||
await MongoDatasetCollection.findByIdAndUpdate(
|
||||
col._id,
|
||||
{
|
||||
...(title && { name: title }),
|
||||
rawTextLength: newRawText.length,
|
||||
hashRawText: hashStr(newRawText)
|
||||
},
|
||||
{ session }
|
||||
);
|
||||
|
||||
return {
|
||||
insertLen: result.length
|
||||
};
|
||||
};
|
||||
|
||||
export const createOrGetCollectionTags = async ({
|
||||
tags,
|
||||
datasetId,
|
||||
@@ -268,3 +128,88 @@ export const collectionTagsToTagLabel = async ({
|
||||
})
|
||||
.filter(Boolean);
|
||||
};
|
||||
|
||||
export const syncCollection = async (collection: CollectionWithDatasetType) => {
|
||||
const dataset = collection.datasetId;
|
||||
|
||||
if (
|
||||
collection.type !== DatasetCollectionTypeEnum.link &&
|
||||
dataset.type !== DatasetTypeEnum.apiDataset
|
||||
) {
|
||||
return Promise.reject(DatasetErrEnum.notSupportSync);
|
||||
}
|
||||
|
||||
// Get new text
|
||||
const sourceReadType = await (async () => {
|
||||
if (collection.type === DatasetCollectionTypeEnum.link) {
|
||||
if (!collection.rawLink) return Promise.reject('rawLink is missing');
|
||||
return {
|
||||
type: DatasetSourceReadTypeEnum.link,
|
||||
sourceId: collection.rawLink,
|
||||
selector: collection.metadata?.webPageSelector
|
||||
};
|
||||
}
|
||||
|
||||
if (!collection.apiFileId) return Promise.reject('apiFileId is missing');
|
||||
if (!dataset.apiServer) return Promise.reject('apiServer not found');
|
||||
return {
|
||||
type: DatasetSourceReadTypeEnum.apiFile,
|
||||
sourceId: collection.apiFileId,
|
||||
apiServer: dataset.apiServer
|
||||
};
|
||||
})();
|
||||
const rawText = await readDatasetSourceRawText({
|
||||
teamId: collection.teamId,
|
||||
...sourceReadType
|
||||
});
|
||||
|
||||
// Check if the original text is the same: skip if same
|
||||
const hashRawText = hashStr(rawText);
|
||||
if (collection.hashRawText && hashRawText === collection.hashRawText) {
|
||||
return DatasetCollectionSyncResultEnum.sameRaw;
|
||||
}
|
||||
|
||||
await mongoSessionRun(async (session) => {
|
||||
// Create new collection
|
||||
await createCollectionAndInsertData({
|
||||
session,
|
||||
dataset,
|
||||
rawText: rawText,
|
||||
createCollectionParams: {
|
||||
teamId: collection.teamId,
|
||||
tmbId: collection.tmbId,
|
||||
datasetId: collection.datasetId._id,
|
||||
name: collection.name,
|
||||
type: collection.type,
|
||||
|
||||
fileId: collection.fileId,
|
||||
rawLink: collection.rawLink,
|
||||
externalFileId: collection.externalFileId,
|
||||
externalFileUrl: collection.externalFileUrl,
|
||||
apiFileId: collection.apiFileId,
|
||||
|
||||
rawTextLength: rawText.length,
|
||||
hashRawText,
|
||||
|
||||
tags: collection.tags,
|
||||
createTime: collection.createTime,
|
||||
|
||||
parentId: collection.parentId,
|
||||
trainingType: collection.trainingType,
|
||||
chunkSize: collection.chunkSize,
|
||||
chunkSplitter: collection.chunkSplitter,
|
||||
qaPrompt: collection.qaPrompt,
|
||||
metadata: collection.metadata
|
||||
}
|
||||
});
|
||||
|
||||
// Delete old collection
|
||||
await delCollection({
|
||||
collections: [collection],
|
||||
delRelatedSource: false,
|
||||
session
|
||||
});
|
||||
});
|
||||
|
||||
return DatasetCollectionSyncResultEnum.success;
|
||||
};
|
||||
|
@@ -7,6 +7,8 @@ import { TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/
|
||||
import axios from 'axios';
|
||||
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
|
||||
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
|
||||
import { APIFileServer } from '@fastgpt/global/core/dataset/apiDataset';
|
||||
import { useApiDatasetRequest } from './apiDataset/api';
|
||||
|
||||
export const readFileRawTextByUrl = async ({
|
||||
teamId,
|
||||
@@ -15,7 +17,7 @@ export const readFileRawTextByUrl = async ({
|
||||
}: {
|
||||
teamId: string;
|
||||
url: string;
|
||||
relatedId?: string;
|
||||
relatedId: string; // externalFileId / apiFileId
|
||||
}) => {
|
||||
const response = await axios({
|
||||
method: 'get',
|
||||
@@ -40,9 +42,9 @@ export const readFileRawTextByUrl = async ({
|
||||
};
|
||||
|
||||
/*
|
||||
fileId - local file, read from mongo
|
||||
link - request
|
||||
externalFile = request read
|
||||
fileId - local file, read from mongo
|
||||
link - request
|
||||
externalFile/apiFile = request read
|
||||
*/
|
||||
export const readDatasetSourceRawText = async ({
|
||||
teamId,
|
||||
@@ -50,14 +52,17 @@ export const readDatasetSourceRawText = async ({
|
||||
sourceId,
|
||||
isQAImport,
|
||||
selector,
|
||||
relatedId
|
||||
externalFileId,
|
||||
apiServer
|
||||
}: {
|
||||
teamId: string;
|
||||
type: DatasetSourceReadTypeEnum;
|
||||
sourceId: string;
|
||||
isQAImport?: boolean;
|
||||
selector?: string;
|
||||
relatedId?: string;
|
||||
|
||||
isQAImport?: boolean; // csv data
|
||||
selector?: string; // link selector
|
||||
externalFileId?: string; // external file dataset
|
||||
apiServer?: APIFileServer; // api dataset
|
||||
}): Promise<string> => {
|
||||
if (type === DatasetSourceReadTypeEnum.fileLocal) {
|
||||
const { rawText } = await readFileContentFromMongo({
|
||||
@@ -75,10 +80,19 @@ export const readDatasetSourceRawText = async ({
|
||||
|
||||
return result[0]?.content || '';
|
||||
} else if (type === DatasetSourceReadTypeEnum.externalFile) {
|
||||
if (!externalFileId) return Promise.reject('FileId not found');
|
||||
const rawText = await readFileRawTextByUrl({
|
||||
teamId,
|
||||
url: sourceId,
|
||||
relatedId
|
||||
relatedId: externalFileId
|
||||
});
|
||||
return rawText;
|
||||
} else if (type === DatasetSourceReadTypeEnum.apiFile) {
|
||||
if (!apiServer) return Promise.reject('apiServer not found');
|
||||
const rawText = await readApiServerFileContent({
|
||||
apiServer,
|
||||
apiFileId: sourceId,
|
||||
teamId
|
||||
});
|
||||
return rawText;
|
||||
}
|
||||
@@ -86,6 +100,18 @@ export const readDatasetSourceRawText = async ({
|
||||
return '';
|
||||
};
|
||||
|
||||
export const readApiServerFileContent = async ({
|
||||
apiServer,
|
||||
apiFileId,
|
||||
teamId
|
||||
}: {
|
||||
apiServer: APIFileServer;
|
||||
apiFileId: string;
|
||||
teamId: string;
|
||||
}) => {
|
||||
return useApiDatasetRequest({ apiServer }).getFileContent({ teamId, apiFileId });
|
||||
};
|
||||
|
||||
export const rawText2Chunks = ({
|
||||
rawText,
|
||||
isQAImport,
|
||||
|
@@ -83,15 +83,18 @@ const DatasetSchema = new Schema({
|
||||
}
|
||||
}
|
||||
},
|
||||
externalReadUrl: {
|
||||
type: String
|
||||
},
|
||||
inheritPermission: {
|
||||
type: Boolean,
|
||||
default: true
|
||||
},
|
||||
apiServer: {
|
||||
type: Object
|
||||
},
|
||||
|
||||
// abandoned
|
||||
externalReadUrl: {
|
||||
type: String
|
||||
},
|
||||
defaultPermission: Number
|
||||
});
|
||||
|
||||
|
@@ -28,8 +28,7 @@ export const checkInvalidChunkAndLock = async ({
|
||||
err?.type === 'invalid_request_error' ||
|
||||
err?.code === 500
|
||||
) {
|
||||
addLog.info('Lock training data');
|
||||
console.log(err);
|
||||
addLog.error('Lock training data', err);
|
||||
|
||||
try {
|
||||
await MongoDatasetTraining.findByIdAndUpdate(data._id, {
|
||||
|
@@ -72,7 +72,6 @@ import { dispatchLoopEnd } from './loop/runLoopEnd';
|
||||
import { dispatchLoopStart } from './loop/runLoopStart';
|
||||
import { dispatchFormInput } from './interactive/formInput';
|
||||
import { dispatchToolParams } from './agent/runTool/toolParams';
|
||||
import { responseWrite } from '../../../common/response';
|
||||
|
||||
const callbackMap: Record<FlowNodeTypeEnum, Function> = {
|
||||
[FlowNodeTypeEnum.workflowStart]: dispatchWorkflowStart,
|
||||
@@ -500,8 +499,7 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
|
||||
value = replaceEditorVariable({
|
||||
text: value,
|
||||
nodes: runtimeNodes,
|
||||
variables,
|
||||
runningNode: node
|
||||
variables
|
||||
});
|
||||
|
||||
// replace reference variables
|
||||
@@ -693,9 +691,17 @@ export function getSystemVariable({
|
||||
chatId,
|
||||
responseChatItemId,
|
||||
histories = [],
|
||||
uid
|
||||
uid,
|
||||
chatConfig
|
||||
}: Props): SystemVariablesType {
|
||||
const variables = chatConfig?.variables || [];
|
||||
const variablesMap = variables.reduce<Record<string, any>>((acc, item) => {
|
||||
acc[item.key] = valueTypeFormat(item.defaultValue, item.valueType);
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
return {
|
||||
...variablesMap,
|
||||
userId: uid,
|
||||
appId: String(runningAppInfo.id),
|
||||
chatId,
|
||||
|
@@ -23,7 +23,6 @@ type RunPluginProps = ModuleDispatchProps<{
|
||||
[key: string]: any;
|
||||
}>;
|
||||
type RunPluginResponse = DispatchNodeResultType<{}>;
|
||||
|
||||
export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPluginResponse> => {
|
||||
const {
|
||||
node: { pluginId, version },
|
||||
@@ -31,7 +30,6 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
|
||||
query,
|
||||
params: { system_forbid_stream = false, ...data } // Plugin input
|
||||
} = props;
|
||||
|
||||
if (!pluginId) {
|
||||
return Promise.reject('pluginId can not find');
|
||||
}
|
||||
@@ -54,7 +52,6 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
|
||||
acc[cur.key] = cur.isToolOutput === false ? false : true;
|
||||
return acc;
|
||||
}, {}) ?? {};
|
||||
|
||||
const runtimeNodes = storeNodes2RuntimeNodes(
|
||||
plugin.nodes,
|
||||
getWorkflowEntryNodeIds(plugin.nodes)
|
||||
@@ -79,7 +76,6 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
|
||||
...filterSystemVariables(props.variables),
|
||||
appId: String(plugin.id)
|
||||
};
|
||||
|
||||
const { flowResponses, flowUsages, assistantResponses, runTimes } = await dispatchWorkFlow({
|
||||
...props,
|
||||
// Rewrite stream mode
|
||||
@@ -105,9 +101,7 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
|
||||
runtimeNodes,
|
||||
runtimeEdges: initWorkflowEdgeStatus(plugin.edges)
|
||||
});
|
||||
|
||||
const output = flowResponses.find((item) => item.moduleType === FlowNodeTypeEnum.pluginOutput);
|
||||
|
||||
if (output) {
|
||||
output.moduleLogo = plugin.avatar;
|
||||
}
|
||||
@@ -117,7 +111,6 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
|
||||
childrenUsage: flowUsages,
|
||||
error: !!output?.pluginOutput?.error
|
||||
});
|
||||
|
||||
return {
|
||||
// 嵌套运行时,如果 childApp stream=false,实际上不会有任何内容输出给用户,所以不需要存储
|
||||
assistantResponses: system_forbid_stream ? [] : assistantResponses,
|
||||
|
@@ -20,7 +20,7 @@ export const dispatchAnswer = (props: Record<string, any>): AnswerResponse => {
|
||||
} = props as AnswerProps;
|
||||
|
||||
const formatText = typeof text === 'string' ? text : JSON.stringify(text, null, 2);
|
||||
const responseText = `\n${formatText}`.replaceAll('\\n', '\n');
|
||||
const responseText = `\n${formatText}`;
|
||||
|
||||
workflowStreamResponse?.({
|
||||
event: SseResponseEventEnum.fastAnswer,
|
||||
|
@@ -110,8 +110,7 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
|
||||
replaceEditorVariable({
|
||||
text,
|
||||
nodes: runtimeNodes,
|
||||
variables: allVariables,
|
||||
runningNode: node
|
||||
variables: allVariables
|
||||
}),
|
||||
allVariables
|
||||
);
|
||||
|
@@ -12,6 +12,7 @@ import { readRawContentByFileBuffer } from '../../../../common/file/read/utils';
|
||||
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
|
||||
import { ChatItemType, UserChatItemValueItemType } from '@fastgpt/global/core/chat/type';
|
||||
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
|
||||
import { addLog } from '../../../../common/system/log';
|
||||
|
||||
type Props = ModuleDispatchProps<{
|
||||
[NodeInputKeyEnum.fileUrlList]: string[];
|
||||
@@ -138,7 +139,7 @@ export const getFileContentFromLinks = async ({
|
||||
|
||||
return url;
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
addLog.warn(`Parse url error`, { error });
|
||||
return '';
|
||||
}
|
||||
})
|
||||
|
@@ -47,8 +47,7 @@ export const dispatchUpdateVariable = async (props: Props): Promise<Response> =>
|
||||
? replaceEditorVariable({
|
||||
text: formatValue,
|
||||
nodes: runtimeNodes,
|
||||
variables,
|
||||
runningNode: node
|
||||
variables
|
||||
})
|
||||
: formatValue;
|
||||
} else {
|
||||
|
@@ -134,7 +134,7 @@ export const checkQuoteQAValue = (quoteQA?: SearchDataResponseItemType[]) => {
|
||||
if (quoteQA.length === 0) {
|
||||
return [];
|
||||
}
|
||||
if (quoteQA.some((item) => !item.q || !item.datasetId)) {
|
||||
if (quoteQA.some((item) => !item.q)) {
|
||||
return undefined;
|
||||
}
|
||||
return quoteQA;
|
||||
|
Reference in New Issue
Block a user