mirror of
https://github.com/labring/FastGPT.git
synced 2026-05-07 01:02:55 +08:00
58000324e2
* feat(marketplace): update plugin/ download count statistic (#5957) * feat: download count * feat: update ui * fix: ui * chore: update sdk verison * chore: update .env.template * chore: adjust * chore: remove console.log * chore: adjust * Update projects/marketplace/src/pages/index.tsx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update projects/marketplace/src/pages/index.tsx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update projects/app/src/pages/config/tool/marketplace.tsx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix: update refresh; feat: marketplace download count per hour --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * download * marketplace code * fix: ui (#5963) * feat: support dataset and files as global variables (#5961) * json & dataset * file * fix file var * fix * fix init * remove * perf: file vars * fix: file uploading errors (#5969) * fix: file uploading errors * fix build * perf: fileselector ux * feat: integrate S3 for dataset with compatibility (#5941) * fix: text split * remove test * feat: integrate S3 for dataset with compatibility * fix: delay s3 files delete timing * fix: remove imageKeys * fix: remove parsed images' TTL * fix: improve codes by pr comments --------- Co-authored-by: archer <545436317@qq.com> * remove log * perf: request limit * chore: s3 migration script (#5971) * test * perf: s3 code * fix: migration script (#5972) * perf: s3 move object * wip: fix s3 bugs (#5976) * fix: incorrect replace origin logic (#5978) * fix: add downloadURL (#5980) * perf: file variable ttl & quick create dataset with temp s3 bucket (#5973) * perf: file variable ttl & quick create dataset with temp s3 bucket * fix * plugin & form input variables (#5979) * plugin & form input variables * fix * docs: 4143.mdx (#5981) * doc: update 4143.mdx (#5982) * fix form input file ttl (#5983) * trans file type (#5986) * trans file type * fix * fix: S3 script early return (#5985) * fix: S3 script typeof * fix: truncate large filename to fit S3 name * perf(permission): add a schema verification for resource permission, tmbId, groupId, orgId should be set at least one of them (#5987) * fix: version & typo (#5988) * fix-v4.14.3 (#5991) * fix: empty alt make replace JWT failed & incorrect image dataset preview url (#5989) * fix: empty alt make replace JWT failed & incorrect image dataset preview url * fix: s3 files recovery script * fix: incorrect chat external url parsing (#5993) --------- Co-authored-by: Finley Ge <32237950+FinleyGe@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: heheer <heheer@sealos.io> Co-authored-by: Roy <whoeverimf5@gmail.com>
172 lines
4.7 KiB
TypeScript
172 lines
4.7 KiB
TypeScript
import { type DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
|
|
import { MongoDatasetCollection } from './collection/schema';
|
|
import { MongoDataset } from './schema';
|
|
import { delCollectionRelatedSource } from './collection/controller';
|
|
import { type ClientSession } from '../../common/mongo';
|
|
import { MongoDatasetTraining } from './training/schema';
|
|
import { MongoDatasetData } from './data/schema';
|
|
import { deleteDatasetDataVector } from '../../common/vectorDB/controller';
|
|
import { MongoDatasetDataText } from './data/dataTextSchema';
|
|
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
|
import { retryFn } from '@fastgpt/global/common/system/utils';
|
|
import { clearDatasetImages } from './image/utils';
|
|
import { MongoDatasetCollectionTags } from './tag/schema';
|
|
import { removeDatasetSyncJobScheduler } from './datasetSync';
|
|
import { mongoSessionRun } from '../../common/mongo/sessionRun';
|
|
import { removeImageByPath } from '../../common/file/image/controller';
|
|
import { UserError } from '@fastgpt/global/common/error/utils';
|
|
import { getS3DatasetSource } from '../../common/s3/sources/dataset';
|
|
|
|
/* ============= dataset ========== */
|
|
/* find all datasetId by top datasetId */
|
|
export async function findDatasetAndAllChildren({
|
|
teamId,
|
|
datasetId,
|
|
fields
|
|
}: {
|
|
teamId: string;
|
|
datasetId: string;
|
|
fields?: string;
|
|
}): Promise<DatasetSchemaType[]> {
|
|
const find = async (id: string) => {
|
|
const children = await MongoDataset.find(
|
|
{
|
|
teamId,
|
|
parentId: id
|
|
},
|
|
fields
|
|
).lean();
|
|
|
|
let datasets = children;
|
|
|
|
for (const child of children) {
|
|
const grandChildrenIds = await find(child._id);
|
|
datasets = datasets.concat(grandChildrenIds);
|
|
}
|
|
|
|
return datasets;
|
|
};
|
|
const [dataset, childDatasets] = await Promise.all([
|
|
MongoDataset.findById(datasetId).lean(),
|
|
find(datasetId)
|
|
]);
|
|
|
|
if (!dataset) {
|
|
return Promise.reject(new UserError('Dataset not found'));
|
|
}
|
|
|
|
return [dataset, ...childDatasets];
|
|
}
|
|
|
|
export async function getCollectionWithDataset(collectionId: string) {
|
|
const data = await MongoDatasetCollection.findById(collectionId)
|
|
.populate<{ dataset: DatasetSchemaType }>('dataset')
|
|
.lean();
|
|
if (!data) {
|
|
return Promise.reject(DatasetErrEnum.unExistCollection);
|
|
}
|
|
return data;
|
|
}
|
|
|
|
/* delete all data by datasetIds */
|
|
export async function delDatasetRelevantData({
|
|
datasets,
|
|
session
|
|
}: {
|
|
datasets: DatasetSchemaType[];
|
|
session: ClientSession;
|
|
}) {
|
|
if (!datasets.length) return;
|
|
|
|
const teamId = datasets[0].teamId;
|
|
|
|
if (!teamId) {
|
|
return Promise.reject(new UserError('TeamId is required'));
|
|
}
|
|
|
|
const datasetIds = datasets.map((item) => item._id);
|
|
|
|
// Get _id, teamId, fileId, metadata.relatedImgId for all collections
|
|
const collections = await MongoDatasetCollection.find(
|
|
{
|
|
teamId,
|
|
datasetId: { $in: datasetIds }
|
|
},
|
|
'_id teamId datasetId fileId metadata'
|
|
).lean();
|
|
|
|
await retryFn(async () => {
|
|
await Promise.all([
|
|
// delete training data
|
|
MongoDatasetTraining.deleteMany({
|
|
teamId,
|
|
datasetId: { $in: datasetIds }
|
|
}),
|
|
//Delete dataset_data_texts
|
|
MongoDatasetDataText.deleteMany({
|
|
teamId,
|
|
datasetId: { $in: datasetIds }
|
|
}),
|
|
//delete dataset_datas
|
|
MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }),
|
|
// Delete collection image and file
|
|
delCollectionRelatedSource({ collections }),
|
|
// Delete dataset Image
|
|
clearDatasetImages(datasetIds),
|
|
// Delete vector data
|
|
deleteDatasetDataVector({ teamId, datasetIds })
|
|
]);
|
|
});
|
|
|
|
// delete collections
|
|
await MongoDatasetCollection.deleteMany({
|
|
teamId,
|
|
datasetId: { $in: datasetIds }
|
|
}).session(session);
|
|
|
|
for (const datasetId of datasetIds) {
|
|
await getS3DatasetSource().deleteDatasetFilesByPrefix({ datasetId });
|
|
}
|
|
}
|
|
|
|
export const deleteDatasets = async ({
|
|
teamId,
|
|
datasets
|
|
}: {
|
|
teamId: string;
|
|
datasets: DatasetSchemaType[];
|
|
}) => {
|
|
const datasetIds = datasets.map((d) => d._id);
|
|
|
|
// delete collection.tags
|
|
await MongoDatasetCollectionTags.deleteMany({
|
|
teamId,
|
|
datasetId: { $in: datasetIds }
|
|
});
|
|
|
|
// Remove cron job
|
|
await Promise.all(
|
|
datasets.map((dataset) => {
|
|
return removeDatasetSyncJobScheduler(dataset._id);
|
|
})
|
|
);
|
|
|
|
// delete all dataset.data and pg data
|
|
await mongoSessionRun(async (session) => {
|
|
// delete dataset data
|
|
await delDatasetRelevantData({ datasets, session });
|
|
|
|
// delete dataset
|
|
await MongoDataset.deleteMany(
|
|
{
|
|
_id: { $in: datasetIds }
|
|
},
|
|
{ session }
|
|
);
|
|
|
|
for await (const dataset of datasets) {
|
|
await removeImageByPath(dataset.avatar, session);
|
|
}
|
|
});
|
|
};
|