V4.8.15 feature (#3331)

* feat: add customize toolkit (#3205)

* chaoyang

* fix-auth

* add toolkit

* add order

* plugin usage

* fix

* delete console:

* Fix: Fix fullscreen preview top positioning and improve Markdown rendering logic (#3247)

* 完成任务:修复全屏预览顶部固定问题,优化 Markdown 渲染逻辑

* 有问题修改

* 问题再修改

* 修正问题

* fix: plugin standalone display issue (#3254)

* 4.8.15 test (#3246)

* o1 config

* perf: system plugin code

* 调整系统插件代码。增加html 渲染安全配置。 (#3258)

* perf: base64 picker

* perf: list app or dataset

* perf: plugin config code

* 小窗适配等问题 (#3257)

* 小窗适配等问题

* git问题

* 小窗剩余问题

* feat: system plugin auth and lock version (#3265)

* feat: system plugin auth and lock version

* update comment

* 4.8.15 test (#3267)

* tmp log

* perf: login direct

* perf: iframe html code

* remove log

* fix: plugin standalone display (#3277)

* refactor: 页面拆分&i18n拆分 (#3281)

* refactor: account组件拆成独立页面

* script: 新增i18n json文件创建脚本

* refactor: 页面i18n拆分

* i18n: add en&hant

* 4.8.15 test (#3285)

* tmp log

* remove log

* fix: watch avatar refresh

* perf: i18n code

* fix(plugin): use intro instead of userguide (#3290)

* Universal SSO (#3292)

* tmp log

* remove log

* feat: common oauth

* readme

* perf: sso provider

* remove sso code

* perf: refresh plugins

* feat: add api dataset (#3272)

* add api-dataset

* fix api-dataset

* fix api dataset

* fix ts

* perf: create collection code (#3301)

* tmp log

* remove log

* perf: i18n change

* update version doc

* feat: question guide from chatId

* perf: create collection code

* fix: request api

* fix: request api

* fix: tts auth and response type (#3303)

* perf: md splitter

* fix: tts auth and response type

* fix: api file dataset (#3307)

* perf: api dataset init (#3310)

* perf: collection schema

* perf: api dataset init

* refactor: 团队管理独立页面 (#3302)

* ui: 团队管理独立页面

* 代码优化

* fix

* perf: sync collection and ui check (#3314)

* perf: sync collection

* remove script

* perf: update api server

* perf: api dataset parent

* perf: team ui

* perf: team 18n

* update team ui

* perf: ui check

* perf: i18n

* fix: debug variables & cronjob & system plugin callback load (#3315)

* fix: debug variables & cronjob & system plugin callback load

* fix type

* fix

* fix

* fix: plugin dataset quote;perf: system variables init (#3316)

* fix: plugin dataset quote

* perf: system variables init

* perf: node templates ui;fix: dataset import ui (#3318)

* fix: dataset import ui

* perf: node templates ui

* perf: ui refresh

* feat:套餐改名和套餐跳转配置 (#3309)

* fixing:except Sidebar

* 去除了多余的代码

* 修正了套餐说明的代码

* 修正了误删除的show_git代码

* 修正了名字部分等代码

* 修正了问题,遗留了其他和ui讨论不一致的部分

* 4.8.15 test (#3319)

* remove log

* pref: bill ui

* pref: bill ui

* perf: log

* html渲染文档 (#3270)

* html渲染文档

* 文档有点小问题

* feat: doc (#3322)

* 集合重训练 (#3282)

* rebaser

* 一点补充

* 小问题

* 其他问题修正,删除集合保留文件的参数还没找到...

* reTraining

* delete uesless

* 删除了一行错误代码

* 集合重训练部分

* fixing

* 删除console代码

* feat: navbar item config (#3326)

* perf: custom navbar code;perf: retraining code;feat: api dataset and dataset api doc (#3329)

* feat: api dataset and dataset api doc

* perf: retraining code

* perf: custom navbar code

* fix: ts (#3330)

* fix: ts

* fix: ts

* retraining ui

* perf: api collection filter

* perf: retrining button

---------

Co-authored-by: heheer <heheer@sealos.io>
Co-authored-by: Jiangween <145003935+Jiangween@users.noreply.github.com>
Co-authored-by: papapatrick <109422393+Patrickill@users.noreply.github.com>
This commit is contained in:
Archer
2024-12-06 10:56:53 +08:00
committed by GitHub
parent b188544386
commit 1aebe5f185
307 changed files with 7383 additions and 3981 deletions

View File

@@ -5,6 +5,7 @@ import { countGptMessagesTokens } from '../../../common/string/tiktoken/index';
import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt';
import { getLLMModel } from '../model';
import { llmCompletionsBodyFormat } from '../utils';
import { addLog } from '../../../common/system/log';
/*
query extension - 问题扩展
@@ -183,7 +184,7 @@ A: ${chatBg}
tokens: await countGptMessagesTokens(messages)
};
} catch (error) {
console.log(error);
addLog.error(`Query extension error`, error);
return {
rawQuery: query,
extensionQueries: [],

View File

@@ -51,7 +51,6 @@ export function reRankRecall({
}));
})
.catch((err) => {
console.log(err);
addLog.error('rerank error', err);
return [];

View File

@@ -5,39 +5,44 @@ import { getLLMModel } from '../ai/model';
import { MongoApp } from './schema';
export const beforeUpdateAppFormat = <T extends AppSchema['modules'] | undefined>({
nodes
nodes,
isPlugin
}: {
nodes: T;
isPlugin: boolean;
}) => {
if (nodes) {
let maxTokens = 3000;
// Check dataset maxTokens
if (isPlugin) {
let maxTokens = 16000;
nodes.forEach((item) => {
if (
item.flowNodeType === FlowNodeTypeEnum.chatNode ||
item.flowNodeType === FlowNodeTypeEnum.tools
) {
const model =
item.inputs.find((item) => item.key === NodeInputKeyEnum.aiModel)?.value || '';
const chatModel = getLLMModel(model);
const quoteMaxToken = chatModel.quoteMaxToken || 3000;
nodes.forEach((item) => {
if (
item.flowNodeType === FlowNodeTypeEnum.chatNode ||
item.flowNodeType === FlowNodeTypeEnum.tools
) {
const model =
item.inputs.find((item) => item.key === NodeInputKeyEnum.aiModel)?.value || '';
const chatModel = getLLMModel(model);
const quoteMaxToken = chatModel.quoteMaxToken || 16000;
maxTokens = Math.max(maxTokens, quoteMaxToken);
}
});
maxTokens = Math.max(maxTokens, quoteMaxToken);
}
});
nodes.forEach((item) => {
if (item.flowNodeType === FlowNodeTypeEnum.datasetSearchNode) {
item.inputs.forEach((input) => {
if (input.key === NodeInputKeyEnum.datasetMaxTokens) {
const val = input.value as number;
if (val > maxTokens) {
input.value = maxTokens;
nodes.forEach((item) => {
if (item.flowNodeType === FlowNodeTypeEnum.datasetSearchNode) {
item.inputs.forEach((input) => {
if (input.key === NodeInputKeyEnum.datasetMaxTokens) {
const val = input.value as number;
if (val > maxTokens) {
input.value = maxTokens;
}
}
}
});
}
});
});
}
});
}
}
return {

View File

@@ -2,7 +2,6 @@ import { FlowNodeTemplateType } from '@fastgpt/global/core/workflow/type/node.d'
import { FlowNodeTypeEnum, defaultNodeVersion } from '@fastgpt/global/core/workflow/node/constant';
import { appData2FlowNodeIO, pluginData2FlowNodeIO } from '@fastgpt/global/core/workflow/utils';
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
import type { PluginRuntimeType } from '@fastgpt/global/core/workflow/runtime/type';
import { FlowNodeTemplateTypeEnum } from '@fastgpt/global/core/workflow/constants';
import { getHandleConfig } from '@fastgpt/global/core/workflow/template/utils';
import { getNanoid } from '@fastgpt/global/common/string/tools';
@@ -11,6 +10,9 @@ import { MongoApp } from '../schema';
import { SystemPluginTemplateItemType } from '@fastgpt/global/core/workflow/type';
import { getSystemPluginTemplates } from '../../../../plugins/register';
import { getAppLatestVersion, getAppVersionById } from '../version/controller';
import { PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
import { MongoSystemPlugin } from './systemPluginSchema';
import { PluginErrEnum } from '@fastgpt/global/common/error/code/plugin';
/*
plugin id rule:
@@ -37,15 +39,45 @@ export async function splitCombinePluginId(id: string) {
type ChildAppType = SystemPluginTemplateItemType & { teamId?: string };
const getSystemPluginTemplateById = async (
pluginId: string
pluginId: string,
versionId?: string
): Promise<SystemPluginTemplateItemType> => {
const item = getSystemPluginTemplates().find((plugin) => plugin.id === pluginId);
if (!item) return Promise.reject('plugin not found');
if (!item) return Promise.reject(PluginErrEnum.unAuth);
return cloneDeep(item);
const plugin = cloneDeep(item);
if (plugin.associatedPluginId) {
// The verification plugin is set as a system plugin
const systemPlugin = await MongoSystemPlugin.findOne(
{ pluginId: plugin.id, 'customConfig.associatedPluginId': plugin.associatedPluginId },
'associatedPluginId'
).lean();
if (!systemPlugin) return Promise.reject(PluginErrEnum.unAuth);
const app = await MongoApp.findById(plugin.associatedPluginId).lean();
if (!app) return Promise.reject(PluginErrEnum.unAuth);
const version = versionId
? await getAppVersionById({
appId: plugin.associatedPluginId,
versionId,
app
})
: await getAppLatestVersion(plugin.associatedPluginId, app);
if (!version.versionId) return Promise.reject('App version not found');
plugin.workflow = {
nodes: version.nodes,
edges: version.edges,
chatConfig: version.chatConfig
};
plugin.version = versionId || String(version.versionId);
}
return plugin;
};
/* format plugin modules to plugin preview module */
/* Format plugin to workflow preview node data */
export async function getChildAppPreviewNode({
id
}: {
@@ -77,7 +109,9 @@ export async function getChildAppPreviewNode({
templateType: FlowNodeTemplateTypeEnum.teamApp,
version: version.versionId,
originCost: 0,
currentCost: 0
currentCost: 0,
hasTokenFee: false,
pluginOrder: 0
};
} else {
return getSystemPluginTemplateById(pluginId);
@@ -147,10 +181,12 @@ export async function getChildAppRuntimeById(
// 用不到
version: item?.pluginData?.nodeVersion || defaultNodeVersion,
originCost: 0,
currentCost: 0
currentCost: 0,
hasTokenFee: false,
pluginOrder: 0
};
} else {
return getSystemPluginTemplateById(pluginId);
return getSystemPluginTemplateById(pluginId, versionId);
}
})();
@@ -162,6 +198,7 @@ export async function getChildAppRuntimeById(
showStatus: app.showStatus,
currentCost: app.currentCost,
nodes: app.workflow.nodes,
edges: app.workflow.edges
edges: app.workflow.edges,
hasTokenFee: app.hasTokenFee
};
}

View File

@@ -0,0 +1,35 @@
import { connectionMongo, getMongoModel } from '../../../common/mongo/index';
import { PluginGroupSchemaType, TGroupType } from './type';
const { Schema } = connectionMongo;
export const collectionName = 'app_plugin_groups';
const PluginGroupSchema = new Schema({
groupId: {
type: String,
required: true
},
groupAvatar: {
type: String,
default: ''
},
groupName: {
type: String,
required: true
},
groupTypes: {
type: Array<TGroupType>,
default: []
},
groupOrder: {
type: Number,
default: 0
}
});
PluginGroupSchema.index({ groupId: 1 }, { unique: true });
export const MongoPluginGroups = getMongoModel<PluginGroupSchemaType>(
collectionName,
PluginGroupSchema
);

View File

@@ -25,12 +25,20 @@ const SystemPluginSchema = new Schema({
type: Number,
default: 0
},
hasTokenFee: {
type: Boolean,
default: false
},
pluginOrder: {
type: Number,
default: 0
},
customConfig: Object
});
SystemPluginSchema.index({ pluginId: 1 });
export const MongoSystemPluginSchema = getMongoModel<SystemPluginConfigSchemaType>(
export const MongoSystemPlugin = getMongoModel<SystemPluginConfigSchemaType>(
collectionName,
SystemPluginSchema
);

View File

@@ -1,3 +1,4 @@
import { SystemPluginListItemType } from '@fastgpt/global/core/app/type';
import { FlowNodeTemplateTypeEnum } from '@fastgpt/global/core/workflow/constants';
import {
SystemPluginTemplateItemType,
@@ -9,7 +10,9 @@ export type SystemPluginConfigSchemaType = {
originCost: number; // n points/one time
currentCost: number;
hasTokenFee: boolean;
isActive: boolean;
pluginOrder: number;
inputConfig: SystemPluginTemplateItemType['inputConfig'];
customConfig?: {
@@ -19,6 +22,21 @@ export type SystemPluginConfigSchemaType = {
version: string;
weight?: number;
workflow: WorkflowTemplateBasicType;
templateType: FlowNodeTemplateTypeEnum;
templateType: string;
associatedPluginId: string;
userGuide: string;
};
};
export type TGroupType = {
typeName: string;
typeId: string;
};
export type PluginGroupSchemaType = {
groupId: string;
groupAvatar: string;
groupName: string;
groupTypes: TGroupType[];
groupOrder: number;
};

View File

@@ -1,11 +1,11 @@
import { PluginRuntimeType } from '@fastgpt/global/core/workflow/runtime/type';
import { ChatNodeUsageType } from '@fastgpt/global/support/wallet/bill/type';
import { splitCombinePluginId } from './controller';
import { PluginSourceEnum } from '@fastgpt/global/core/plugin/constants';
import { PluginRuntimeType } from '@fastgpt/global/core/plugin/type';
/*
1. Commercial plugin: n points per times
2. Other plugin: sum of children points
Plugin points calculation:
1. Return 0 if error
2. Add configured points if commercial plugin
3. Add sum of child nodes points
*/
export const computedPluginUsage = async ({
plugin,
@@ -16,13 +16,13 @@ export const computedPluginUsage = async ({
childrenUsage: ChatNodeUsageType[];
error?: boolean;
}) => {
const { source } = await splitCombinePluginId(plugin.id);
// Commercial plugin: n points per times
if (source === PluginSourceEnum.commercial) {
if (error) return 0;
return plugin.currentCost ?? 0;
if (error) {
return 0;
}
return childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
const childrenIUsages = childrenUsage.reduce((sum, item) => sum + (item.totalPoints || 0), 0);
const pluginCurrentCose = plugin.currentCost ?? 0;
return plugin.hasTokenFee ? pluginCurrentCose + childrenIUsages : pluginCurrentCose;
};

View File

@@ -0,0 +1,143 @@
import type {
APIFileContentResponse,
APIFileListResponse,
APIFileReadResponse,
APIFileServer
} from '@fastgpt/global/core/dataset/apiDataset';
import axios, { Method } from 'axios';
import { addLog } from '../../../common/system/log';
import { readFileRawTextByUrl } from '../read';
import { ParentIdType } from '@fastgpt/global/common/parentFolder/type';
type ResponseDataType = {
success: boolean;
message: string;
data: any;
};
export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }) => {
const instance = axios.create({
baseURL: apiServer.baseUrl,
timeout: 60000, // 超时时间
headers: {
'content-type': 'application/json',
Authorization: `Bearer ${apiServer.authorization}`
}
});
/**
* 响应数据检查
*/
const checkRes = (data: ResponseDataType) => {
if (data === undefined) {
addLog.info('Api dataset data is empty');
return Promise.reject('服务器异常');
} else if (!data.success) {
return Promise.reject(data);
}
return data.data;
};
const responseError = (err: any) => {
console.log('error->', '请求错误', err);
if (!err) {
return Promise.reject({ message: '未知错误' });
}
if (typeof err === 'string') {
return Promise.reject({ message: err });
}
if (typeof err.message === 'string') {
return Promise.reject({ message: err.message });
}
if (typeof err.data === 'string') {
return Promise.reject({ message: err.data });
}
if (err?.response?.data) {
return Promise.reject(err?.response?.data);
}
return Promise.reject(err);
};
const request = <T>(url: string, data: any, method: Method): Promise<T> => {
/* 去空 */
for (const key in data) {
if (data[key] === undefined) {
delete data[key];
}
}
return instance
.request({
url,
method,
data: ['POST', 'PUT'].includes(method) ? data : undefined,
params: !['POST', 'PUT'].includes(method) ? data : undefined
})
.then((res) => checkRes(res.data))
.catch((err) => responseError(err));
};
const listFiles = async ({
searchKey,
parentId
}: {
searchKey?: string;
parentId?: ParentIdType;
}) => {
const files = await request<APIFileListResponse>(
`/v1/file/list`,
{
searchKey,
parentId
},
'POST'
);
if (!Array.isArray(files)) {
return Promise.reject('Invalid file list format');
}
if (files.some((file) => !file.id || !file.name || typeof file.type === 'undefined')) {
return Promise.reject('Invalid file data format');
}
return files;
};
const getFileContent = async ({ teamId, apiFileId }: { teamId: string; apiFileId: string }) => {
const data = await request<APIFileContentResponse>(
`/v1/file/content`,
{ id: apiFileId },
'GET'
);
const content = data.content;
const previewUrl = data.previewUrl;
if (content) {
return content;
}
if (previewUrl) {
const rawText = await readFileRawTextByUrl({
teamId,
url: previewUrl,
relatedId: apiFileId
});
return rawText;
}
return Promise.reject('Invalid content type: content or previewUrl is required');
};
const getFilePreviewUrl = async ({ apiFileId }: { apiFileId: string }) => {
const { url } = await request<APIFileReadResponse>(`/v1/file/read`, { id: apiFileId }, 'GET');
if (!url || typeof url !== 'string') {
return Promise.reject('Invalid response url');
}
return url;
};
return {
getFileContent,
listFiles,
getFilePreviewUrl
};
};

View File

@@ -3,7 +3,8 @@ import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset
import { MongoDatasetCollection } from './schema';
import {
CollectionWithDatasetType,
DatasetCollectionSchemaType
DatasetCollectionSchemaType,
DatasetSchemaType
} from '@fastgpt/global/core/dataset/type';
import { MongoDatasetTraining } from '../training/schema';
import { MongoDatasetData } from '../data/schema';
@@ -13,7 +14,132 @@ import { delFileByFileIdList } from '../../../common/file/gridfs/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { ClientSession } from '../../../common/mongo';
import { createOrGetCollectionTags } from './utils';
import { rawText2Chunks } from '../read';
import { checkDatasetLimit } from '../../../support/permission/teamLimit';
import { predictDataLimitLength } from '../../../../global/core/dataset/utils';
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
import { createTrainingUsage } from '../../../support/wallet/usage/controller';
import { UsageSourceEnum } from '@fastgpt/global/support/wallet/usage/constants';
import { getLLMModel, getVectorModel } from '../../ai/model';
import { pushDataListToTrainingQueue } from '../training/controller';
import { MongoImage } from '../../../common/file/image/schema';
import { hashStr } from '@fastgpt/global/common/string/tools';
export const createCollectionAndInsertData = async ({
dataset,
rawText,
relatedId,
createCollectionParams,
isQAImport = false,
session
}: {
dataset: DatasetSchemaType;
rawText: string;
relatedId?: string;
createCollectionParams: CreateOneCollectionParams;
isQAImport?: boolean;
session?: ClientSession;
}) => {
const teamId = createCollectionParams.teamId;
const tmbId = createCollectionParams.tmbId;
// Chunk split params
const trainingType = createCollectionParams.trainingType || TrainingModeEnum.chunk;
const chunkSize = createCollectionParams.chunkSize;
const chunkSplitter = createCollectionParams.chunkSplitter;
const qaPrompt = createCollectionParams.qaPrompt;
const usageName = createCollectionParams.name;
// 1. split chunks
const chunks = rawText2Chunks({
rawText,
chunkLen: chunkSize,
overlapRatio: trainingType === TrainingModeEnum.chunk ? 0.2 : 0,
customReg: chunkSplitter ? [chunkSplitter] : [],
isQAImport
});
// 2. auth limit
await checkDatasetLimit({
teamId,
insertLen: predictDataLimitLength(trainingType, chunks)
});
const fn = async (session: ClientSession) => {
// 3. create collection
const { _id: collectionId } = await createOneCollection({
...createCollectionParams,
hashRawText: hashStr(rawText),
rawTextLength: rawText.length,
session
});
// 4. create training bill
const { billId } = await createTrainingUsage({
teamId,
tmbId,
appName: usageName,
billSource: UsageSourceEnum.training,
vectorModel: getVectorModel(dataset.vectorModel)?.name,
agentModel: getLLMModel(dataset.agentModel)?.name,
session
});
// 5. insert to training queue
const insertResults = await pushDataListToTrainingQueue({
teamId,
tmbId,
datasetId: dataset._id,
collectionId,
agentModel: dataset.agentModel,
vectorModel: dataset.vectorModel,
trainingMode: trainingType,
prompt: qaPrompt,
billId,
data: chunks.map((item, index) => ({
...item,
chunkIndex: index
})),
session
});
// 6. remove related image ttl
if (relatedId) {
await MongoImage.updateMany(
{
teamId,
'metadata.relatedId': relatedId
},
{
// Remove expiredTime to avoid ttl expiration
$unset: {
expiredTime: 1
}
},
{
session
}
);
}
return {
collectionId,
insertResults
};
};
if (session) {
return fn(session);
}
return mongoSessionRun(fn);
};
export type CreateOneCollectionParams = CreateDatasetCollectionParams & {
teamId: string;
tmbId: string;
session?: ClientSession;
};
export async function createOneCollection({
teamId,
tmbId,
@@ -33,18 +159,15 @@ export async function createOneCollection({
externalFileId,
externalFileUrl,
apiFileId,
hashRawText,
rawTextLength,
metadata = {},
session,
tags,
...props
}: CreateDatasetCollectionParams & {
teamId: string;
tmbId: string;
[key: string]: any;
session?: ClientSession;
}) {
createTime
}: CreateOneCollectionParams) {
// Create collection tags
const collectionTags = await createOrGetCollectionTags({ tags, teamId, datasetId, session });
@@ -52,7 +175,6 @@ export async function createOneCollection({
const [collection] = await MongoDatasetCollection.create(
[
{
...props,
teamId,
tmbId,
parentId: parentId || null,
@@ -64,16 +186,18 @@ export async function createOneCollection({
chunkSize,
chunkSplitter,
qaPrompt,
metadata,
fileId,
rawLink,
...(fileId ? { fileId } : {}),
...(rawLink ? { rawLink } : {}),
...(externalFileId ? { externalFileId } : {}),
externalFileUrl,
...(externalFileUrl ? { externalFileUrl } : {}),
...(apiFileId ? { apiFileId } : {}),
rawTextLength,
hashRawText,
metadata,
tags: collectionTags
tags: collectionTags,
createTime
}
],
{ session }
@@ -116,7 +240,68 @@ export const delCollectionRelatedSource = async ({
/**
* delete collection and it related data
*/
export async function delCollectionAndRelatedSources({
export async function delCollection({
collections,
session,
delRelatedSource
}: {
collections: (CollectionWithDatasetType | DatasetCollectionSchemaType)[];
session: ClientSession;
delRelatedSource: boolean;
}) {
if (collections.length === 0) return;
const teamId = collections[0].teamId;
if (!teamId) return Promise.reject('teamId is not exist');
const datasetIds = Array.from(
new Set(
collections.map((item) => {
if (typeof item.datasetId === 'string') {
return String(item.datasetId);
}
return String(item.datasetId._id);
})
)
);
const collectionIds = collections.map((item) => String(item._id));
// delete training data
await MongoDatasetTraining.deleteMany({
teamId,
datasetIds: { $in: datasetIds },
collectionId: { $in: collectionIds }
});
/* file and imgs */
if (delRelatedSource) {
await delCollectionRelatedSource({ collections, session });
}
// delete dataset.datas
await MongoDatasetData.deleteMany(
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },
{ session }
);
// delete collections
await MongoDatasetCollection.deleteMany(
{
teamId,
_id: { $in: collectionIds }
},
{ session }
);
// no session delete: delete files, vector data
await deleteDatasetDataVector({ teamId, datasetIds, collectionIds });
}
/**
* delete delOnlyCollection
*/
export async function delOnlyCollection({
collections,
session
}: {
@@ -148,9 +333,6 @@ export async function delCollectionAndRelatedSources({
collectionId: { $in: collectionIds }
});
/* file and imgs */
await delCollectionRelatedSource({ collections, session });
// delete dataset.datas
await MongoDatasetData.deleteMany(
{ teamId, datasetIds: { $in: datasetIds }, collectionId: { $in: collectionIds } },

View File

@@ -10,90 +10,100 @@ import {
export const DatasetColCollectionName = 'dataset_collections';
const DatasetCollectionSchema = new Schema({
parentId: {
type: Schema.Types.ObjectId,
ref: DatasetColCollectionName,
default: null
},
teamId: {
type: Schema.Types.ObjectId,
ref: TeamCollectionName,
required: true
},
tmbId: {
type: Schema.Types.ObjectId,
ref: TeamMemberCollectionName,
required: true
},
datasetId: {
type: Schema.Types.ObjectId,
ref: DatasetCollectionName,
required: true
},
type: {
type: String,
enum: Object.keys(DatasetCollectionTypeMap),
required: true
},
name: {
type: String,
required: true
},
createTime: {
type: Date,
default: () => new Date()
},
updateTime: {
type: Date,
default: () => new Date()
},
forbid: {
type: Boolean,
default: false
},
const DatasetCollectionSchema = new Schema(
{
parentId: {
type: Schema.Types.ObjectId,
ref: DatasetColCollectionName,
default: null
},
teamId: {
type: Schema.Types.ObjectId,
ref: TeamCollectionName,
required: true
},
tmbId: {
type: Schema.Types.ObjectId,
ref: TeamMemberCollectionName,
required: true
},
datasetId: {
type: Schema.Types.ObjectId,
ref: DatasetCollectionName,
required: true
},
type: {
type: String,
enum: Object.keys(DatasetCollectionTypeMap),
required: true
},
name: {
type: String,
required: true
},
createTime: {
type: Date,
default: () => new Date()
},
updateTime: {
type: Date,
default: () => new Date()
},
forbid: {
type: Boolean,
default: false
},
// chunk filed
trainingType: {
type: String,
enum: Object.keys(TrainingTypeMap)
},
chunkSize: {
type: Number,
required: true
},
chunkSplitter: {
type: String
},
qaPrompt: {
type: String
},
ocrParse: Boolean,
// chunk filed
trainingType: {
type: String,
enum: Object.keys(TrainingTypeMap)
},
chunkSize: {
type: Number,
required: true
},
chunkSplitter: {
type: String
},
qaPrompt: {
type: String
},
ocrParse: Boolean,
tags: {
type: [String],
default: []
},
tags: {
type: [String],
default: []
},
// local file collection
fileId: {
type: Schema.Types.ObjectId,
ref: 'dataset.files'
},
// web link collection
rawLink: String,
// external collection
externalFileId: String,
// local file collection
fileId: {
type: Schema.Types.ObjectId,
ref: 'dataset.files'
},
// web link collection
rawLink: String,
// api collection
apiFileId: String,
// external collection
externalFileId: String,
externalFileUrl: String, // external import url
// metadata
rawTextLength: Number,
hashRawText: String,
externalFileUrl: String, // external import url
metadata: {
type: Object,
default: {}
// metadata
rawTextLength: Number,
hashRawText: String,
metadata: {
type: Object,
default: {}
}
},
{
// Auto update updateTime
timestamps: {
updatedAt: 'updateTime'
}
}
});
);
try {
// auth file

View File

@@ -1,17 +1,19 @@
import type { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type.d';
import { MongoDatasetCollection } from './schema';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { MongoDatasetTraining } from '../training/schema';
import { urlsFetch } from '../../../common/string/cheerio';
import {
DatasetCollectionTypeEnum,
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { ClientSession } from '../../../common/mongo';
import { PushDatasetDataResponse } from '@fastgpt/global/core/dataset/api';
import { MongoDatasetCollectionTags } from '../tag/schema';
import { readFromSecondary } from '../../../common/mongo/utils';
import { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
import {
DatasetCollectionSyncResultEnum,
DatasetCollectionTypeEnum,
DatasetSourceReadTypeEnum,
DatasetTypeEnum
} from '@fastgpt/global/core/dataset/constants';
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
import { readDatasetSourceRawText } from '../read';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { mongoSessionRun } from '../../../common/mongo/sessionRun';
import { createCollectionAndInsertData, delCollection } from './controller';
/**
* get all collection by top collectionId
@@ -61,148 +63,6 @@ export function getCollectionUpdateTime({ name, time }: { time?: Date; name: str
return new Date();
}
/**
* Get collection raw text by Collection or collectionId
*/
export const getCollectionAndRawText = async ({
collectionId,
collection,
newRawText
}: {
collectionId?: string;
collection?: CollectionWithDatasetType;
newRawText?: string;
}) => {
const col = await (async () => {
if (collection) return collection;
if (collectionId) {
return (await MongoDatasetCollection.findById(collectionId).populate(
'datasetId'
)) as CollectionWithDatasetType;
}
return null;
})();
if (!col) {
return Promise.reject('Collection not found');
}
const { title, rawText } = await (async () => {
if (newRawText)
return {
title: '',
rawText: newRawText
};
// link
if (col.type === DatasetCollectionTypeEnum.link && col.rawLink) {
// crawl new data
const result = await urlsFetch({
urlList: [col.rawLink],
selector: col.datasetId?.websiteConfig?.selector || col?.metadata?.webPageSelector
});
return {
title: result[0]?.title,
rawText: result[0]?.content
};
}
// file
return {
title: '',
rawText: ''
};
})();
const hashRawText = hashStr(rawText);
const isSameRawText = rawText && col.hashRawText === hashRawText;
return {
collection: col,
title,
rawText,
isSameRawText
};
};
/* link collection start load data */
export const reloadCollectionChunks = async ({
collection,
tmbId,
billId,
rawText,
session
}: {
collection: CollectionWithDatasetType;
tmbId: string;
billId?: string;
rawText?: string;
session: ClientSession;
}): Promise<PushDatasetDataResponse> => {
const {
title,
rawText: newRawText,
collection: col,
isSameRawText
} = await getCollectionAndRawText({
collection,
newRawText: rawText
});
if (isSameRawText)
return {
insertLen: 0
};
// split data
const { chunks } = splitText2Chunks({
text: newRawText,
chunkLen: col.chunkSize || 512,
customReg: col.chunkSplitter ? [col.chunkSplitter] : []
});
// insert to training queue
const model = await (() => {
if (col.trainingType === TrainingModeEnum.chunk) return col.datasetId.vectorModel;
if (col.trainingType === TrainingModeEnum.qa) return col.datasetId.agentModel;
return Promise.reject('Training model error');
})();
const result = await MongoDatasetTraining.insertMany(
chunks.map((item, i) => ({
teamId: col.teamId,
tmbId,
datasetId: col.datasetId._id,
collectionId: col._id,
billId,
mode: col.trainingType,
prompt: '',
model,
q: item,
a: '',
chunkIndex: i
})),
{ session }
);
// update raw text
await MongoDatasetCollection.findByIdAndUpdate(
col._id,
{
...(title && { name: title }),
rawTextLength: newRawText.length,
hashRawText: hashStr(newRawText)
},
{ session }
);
return {
insertLen: result.length
};
};
export const createOrGetCollectionTags = async ({
tags,
datasetId,
@@ -268,3 +128,88 @@ export const collectionTagsToTagLabel = async ({
})
.filter(Boolean);
};
export const syncCollection = async (collection: CollectionWithDatasetType) => {
const dataset = collection.datasetId;
if (
collection.type !== DatasetCollectionTypeEnum.link &&
dataset.type !== DatasetTypeEnum.apiDataset
) {
return Promise.reject(DatasetErrEnum.notSupportSync);
}
// Get new text
const sourceReadType = await (async () => {
if (collection.type === DatasetCollectionTypeEnum.link) {
if (!collection.rawLink) return Promise.reject('rawLink is missing');
return {
type: DatasetSourceReadTypeEnum.link,
sourceId: collection.rawLink,
selector: collection.metadata?.webPageSelector
};
}
if (!collection.apiFileId) return Promise.reject('apiFileId is missing');
if (!dataset.apiServer) return Promise.reject('apiServer not found');
return {
type: DatasetSourceReadTypeEnum.apiFile,
sourceId: collection.apiFileId,
apiServer: dataset.apiServer
};
})();
const rawText = await readDatasetSourceRawText({
teamId: collection.teamId,
...sourceReadType
});
// Check if the original text is the same: skip if same
const hashRawText = hashStr(rawText);
if (collection.hashRawText && hashRawText === collection.hashRawText) {
return DatasetCollectionSyncResultEnum.sameRaw;
}
await mongoSessionRun(async (session) => {
// Create new collection
await createCollectionAndInsertData({
session,
dataset,
rawText: rawText,
createCollectionParams: {
teamId: collection.teamId,
tmbId: collection.tmbId,
datasetId: collection.datasetId._id,
name: collection.name,
type: collection.type,
fileId: collection.fileId,
rawLink: collection.rawLink,
externalFileId: collection.externalFileId,
externalFileUrl: collection.externalFileUrl,
apiFileId: collection.apiFileId,
rawTextLength: rawText.length,
hashRawText,
tags: collection.tags,
createTime: collection.createTime,
parentId: collection.parentId,
trainingType: collection.trainingType,
chunkSize: collection.chunkSize,
chunkSplitter: collection.chunkSplitter,
qaPrompt: collection.qaPrompt,
metadata: collection.metadata
}
});
// Delete old collection
await delCollection({
collections: [collection],
delRelatedSource: false,
session
});
});
return DatasetCollectionSyncResultEnum.success;
};

View File

@@ -7,6 +7,8 @@ import { TextSplitProps, splitText2Chunks } from '@fastgpt/global/common/string/
import axios from 'axios';
import { readRawContentByFileBuffer } from '../../common/file/read/utils';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
import { APIFileServer } from '@fastgpt/global/core/dataset/apiDataset';
import { useApiDatasetRequest } from './apiDataset/api';
export const readFileRawTextByUrl = async ({
teamId,
@@ -15,7 +17,7 @@ export const readFileRawTextByUrl = async ({
}: {
teamId: string;
url: string;
relatedId?: string;
relatedId: string; // externalFileId / apiFileId
}) => {
const response = await axios({
method: 'get',
@@ -40,9 +42,9 @@ export const readFileRawTextByUrl = async ({
};
/*
fileId - local file, read from mongo
link - request
externalFile = request read
fileId - local file, read from mongo
link - request
externalFile/apiFile = request read
*/
export const readDatasetSourceRawText = async ({
teamId,
@@ -50,14 +52,17 @@ export const readDatasetSourceRawText = async ({
sourceId,
isQAImport,
selector,
relatedId
externalFileId,
apiServer
}: {
teamId: string;
type: DatasetSourceReadTypeEnum;
sourceId: string;
isQAImport?: boolean;
selector?: string;
relatedId?: string;
isQAImport?: boolean; // csv data
selector?: string; // link selector
externalFileId?: string; // external file dataset
apiServer?: APIFileServer; // api dataset
}): Promise<string> => {
if (type === DatasetSourceReadTypeEnum.fileLocal) {
const { rawText } = await readFileContentFromMongo({
@@ -75,10 +80,19 @@ export const readDatasetSourceRawText = async ({
return result[0]?.content || '';
} else if (type === DatasetSourceReadTypeEnum.externalFile) {
if (!externalFileId) return Promise.reject('FileId not found');
const rawText = await readFileRawTextByUrl({
teamId,
url: sourceId,
relatedId
relatedId: externalFileId
});
return rawText;
} else if (type === DatasetSourceReadTypeEnum.apiFile) {
if (!apiServer) return Promise.reject('apiServer not found');
const rawText = await readApiServerFileContent({
apiServer,
apiFileId: sourceId,
teamId
});
return rawText;
}
@@ -86,6 +100,18 @@ export const readDatasetSourceRawText = async ({
return '';
};
export const readApiServerFileContent = async ({
apiServer,
apiFileId,
teamId
}: {
apiServer: APIFileServer;
apiFileId: string;
teamId: string;
}) => {
return useApiDatasetRequest({ apiServer }).getFileContent({ teamId, apiFileId });
};
export const rawText2Chunks = ({
rawText,
isQAImport,

View File

@@ -83,15 +83,18 @@ const DatasetSchema = new Schema({
}
}
},
externalReadUrl: {
type: String
},
inheritPermission: {
type: Boolean,
default: true
},
apiServer: {
type: Object
},
// abandoned
externalReadUrl: {
type: String
},
defaultPermission: Number
});

View File

@@ -28,8 +28,7 @@ export const checkInvalidChunkAndLock = async ({
err?.type === 'invalid_request_error' ||
err?.code === 500
) {
addLog.info('Lock training data');
console.log(err);
addLog.error('Lock training data', err);
try {
await MongoDatasetTraining.findByIdAndUpdate(data._id, {

View File

@@ -72,7 +72,6 @@ import { dispatchLoopEnd } from './loop/runLoopEnd';
import { dispatchLoopStart } from './loop/runLoopStart';
import { dispatchFormInput } from './interactive/formInput';
import { dispatchToolParams } from './agent/runTool/toolParams';
import { responseWrite } from '../../../common/response';
const callbackMap: Record<FlowNodeTypeEnum, Function> = {
[FlowNodeTypeEnum.workflowStart]: dispatchWorkflowStart,
@@ -500,8 +499,7 @@ export async function dispatchWorkFlow(data: Props): Promise<DispatchFlowRespons
value = replaceEditorVariable({
text: value,
nodes: runtimeNodes,
variables,
runningNode: node
variables
});
// replace reference variables
@@ -693,9 +691,17 @@ export function getSystemVariable({
chatId,
responseChatItemId,
histories = [],
uid
uid,
chatConfig
}: Props): SystemVariablesType {
const variables = chatConfig?.variables || [];
const variablesMap = variables.reduce<Record<string, any>>((acc, item) => {
acc[item.key] = valueTypeFormat(item.defaultValue, item.valueType);
return acc;
}, {});
return {
...variablesMap,
userId: uid,
appId: String(runningAppInfo.id),
chatId,

View File

@@ -23,7 +23,6 @@ type RunPluginProps = ModuleDispatchProps<{
[key: string]: any;
}>;
type RunPluginResponse = DispatchNodeResultType<{}>;
export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPluginResponse> => {
const {
node: { pluginId, version },
@@ -31,7 +30,6 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
query,
params: { system_forbid_stream = false, ...data } // Plugin input
} = props;
if (!pluginId) {
return Promise.reject('pluginId can not find');
}
@@ -54,7 +52,6 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
acc[cur.key] = cur.isToolOutput === false ? false : true;
return acc;
}, {}) ?? {};
const runtimeNodes = storeNodes2RuntimeNodes(
plugin.nodes,
getWorkflowEntryNodeIds(plugin.nodes)
@@ -79,7 +76,6 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
...filterSystemVariables(props.variables),
appId: String(plugin.id)
};
const { flowResponses, flowUsages, assistantResponses, runTimes } = await dispatchWorkFlow({
...props,
// Rewrite stream mode
@@ -105,9 +101,7 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
runtimeNodes,
runtimeEdges: initWorkflowEdgeStatus(plugin.edges)
});
const output = flowResponses.find((item) => item.moduleType === FlowNodeTypeEnum.pluginOutput);
if (output) {
output.moduleLogo = plugin.avatar;
}
@@ -117,7 +111,6 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
childrenUsage: flowUsages,
error: !!output?.pluginOutput?.error
});
return {
// 嵌套运行时,如果 childApp stream=false实际上不会有任何内容输出给用户所以不需要存储
assistantResponses: system_forbid_stream ? [] : assistantResponses,

View File

@@ -20,7 +20,7 @@ export const dispatchAnswer = (props: Record<string, any>): AnswerResponse => {
} = props as AnswerProps;
const formatText = typeof text === 'string' ? text : JSON.stringify(text, null, 2);
const responseText = `\n${formatText}`.replaceAll('\\n', '\n');
const responseText = `\n${formatText}`;
workflowStreamResponse?.({
event: SseResponseEventEnum.fastAnswer,

View File

@@ -110,8 +110,7 @@ export const dispatchHttp468Request = async (props: HttpRequestProps): Promise<H
replaceEditorVariable({
text,
nodes: runtimeNodes,
variables: allVariables,
runningNode: node
variables: allVariables
}),
allVariables
);

View File

@@ -12,6 +12,7 @@ import { readRawContentByFileBuffer } from '../../../../common/file/read/utils';
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { ChatItemType, UserChatItemValueItemType } from '@fastgpt/global/core/chat/type';
import { parseFileExtensionFromUrl } from '@fastgpt/global/common/string/tools';
import { addLog } from '../../../../common/system/log';
type Props = ModuleDispatchProps<{
[NodeInputKeyEnum.fileUrlList]: string[];
@@ -138,7 +139,7 @@ export const getFileContentFromLinks = async ({
return url;
} catch (error) {
console.log(error);
addLog.warn(`Parse url error`, { error });
return '';
}
})

View File

@@ -47,8 +47,7 @@ export const dispatchUpdateVariable = async (props: Props): Promise<Response> =>
? replaceEditorVariable({
text: formatValue,
nodes: runtimeNodes,
variables,
runningNode: node
variables
})
: formatValue;
} else {

View File

@@ -134,7 +134,7 @@ export const checkQuoteQAValue = (quoteQA?: SearchDataResponseItemType[]) => {
if (quoteQA.length === 0) {
return [];
}
if (quoteQA.some((item) => !item.q || !item.datasetId)) {
if (quoteQA.some((item) => !item.q)) {
return undefined;
}
return quoteQA;