Files
FastGPT/packages/service/core/dataset/controller.ts
Archer a6fbfac96f Yuque dataset baseurl (#4742)
* Yuque dataset baseurl (#4512)

* feat: 增加API数据集功能和国际化支持

* 在apiDataset.d.ts中添加uuid、slug、parent_uuid和children字段
* 更新英文、简体中文和繁体中文的dataset.json文件,增加多条提示信息
* 在ApiDatasetForm组件中实现目录选择功能,支持获取Yuque路径
* 新增BaseUrlSelector组件,用于选择根目录
* 实现getpath API,支持根据Yuque服务器获取路径
* 更新相关API调用,确保兼容性和功能完整性

* feat: 更新Yuque服务器的baseUrl处理逻辑

* 在apiDataset.d.ts中将YuqueServer的baseUrl属性改为必填
* 更新ApiDatasetForm组件,调整baseUrl的状态管理和路径加载逻辑
* 新增getcatalog API以支持获取数据集目录
* 修改相关API调用,确保baseUrl的正确传递和使用
* 优化路径返回值为中文“根目录”

* feat: 更新数据集API调用逻辑

* 将getFeishuAndYuqueDatasetFileList替换为getProApiDatasetFileListRequest,统一API调用方式
* 更新相关文件以确保新API的正确使用
* 优化代码结构,提高可读性和维护性

* 清理代码:移除ApiDatasetForm、BaseUrlSelector和相关API中的调试日志

* 删除不必要的console.log语句,提升代码整洁性
* 确保API逻辑的清晰性,避免冗余输出

* 更新数据集相关类型和API逻辑

- 在apiDataset.d.ts中添加ApiDatasetDetailResponse类型,移除不必要的字段。
- 在proApi.ts中新增DETAIL操作类型及相关参数类型。
- 修改ApiDatasetForm.tsx以支持新的API调用逻辑,统一路径获取方式。
- 更新BaseUrlSelector组件,简化目录选择逻辑。
- 优化getpath.ts和getcatalog.ts中的路径处理逻辑,确保API调用的一致性和正确性。
- 清理不必要的代码和注释,提高代码可读性。

* 清理ApiDatasetForm组件中的调试日志,移除console.log语句以提升代码整洁性和可读性。

* fix

* updata apidatasetform

* remove console

* updata

* updata

* updata editapiservermodal

* updata i18n

* add type

* update getpath

* add type

* perf: yuque dataset baseurl

* perf: remove rerank records

* fix: ts

---------

Co-authored-by: dreamer6680 <1468683855@qq.com>
2025-05-05 18:37:14 +08:00

118 lines
3.2 KiB
TypeScript

import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
import { MongoDatasetCollection } from './collection/schema';
import { MongoDataset } from './schema';
import { delCollectionRelatedSource } from './collection/controller';
import { ClientSession } from '../../common/mongo';
import { MongoDatasetTraining } from './training/schema';
import { MongoDatasetData } from './data/schema';
import { deleteDatasetDataVector } from '../../common/vectorDB/controller';
import { MongoDatasetDataText } from './data/dataTextSchema';
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
import { retryFn } from '@fastgpt/global/common/system/utils';
/* ============= dataset ========== */
/* find all datasetId by top datasetId */
export async function findDatasetAndAllChildren({
teamId,
datasetId,
fields
}: {
teamId: string;
datasetId: string;
fields?: string;
}): Promise<DatasetSchemaType[]> {
const find = async (id: string) => {
const children = await MongoDataset.find(
{
teamId,
parentId: id
},
fields
).lean();
let datasets = children;
for (const child of children) {
const grandChildrenIds = await find(child._id);
datasets = datasets.concat(grandChildrenIds);
}
return datasets;
};
const [dataset, childDatasets] = await Promise.all([
MongoDataset.findById(datasetId).lean(),
find(datasetId)
]);
if (!dataset) {
return Promise.reject('Dataset not found');
}
return [dataset, ...childDatasets];
}
export async function getCollectionWithDataset(collectionId: string) {
const data = await MongoDatasetCollection.findById(collectionId)
.populate<{ dataset: DatasetSchemaType }>('dataset')
.lean();
if (!data) {
return Promise.reject(DatasetErrEnum.unExistCollection);
}
return data;
}
/* delete all data by datasetIds */
export async function delDatasetRelevantData({
datasets,
session
}: {
datasets: DatasetSchemaType[];
session: ClientSession;
}) {
if (!datasets.length) return;
const teamId = datasets[0].teamId;
if (!teamId) {
return Promise.reject('TeamId is required');
}
const datasetIds = datasets.map((item) => item._id);
// Get _id, teamId, fileId, metadata.relatedImgId for all collections
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds }
},
'_id teamId datasetId fileId metadata'
).lean();
await retryFn(async () => {
await Promise.all([
// delete training data
MongoDatasetTraining.deleteMany({
teamId,
datasetId: { $in: datasetIds }
}),
//Delete dataset_data_texts
MongoDatasetDataText.deleteMany({
teamId,
datasetId: { $in: datasetIds }
}),
//delete dataset_datas
MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }),
// Delete Image and file
delCollectionRelatedSource({ collections }),
// Delete vector data
deleteDatasetDataVector({ teamId, datasetIds })
]);
});
// delete collections
await MongoDatasetCollection.deleteMany({
teamId,
datasetId: { $in: datasetIds }
}).session(session);
}