mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 12:20:34 +00:00
New dpcs structure and dataset i18n (#551)
* perf: check balance * md * lock way * i18n * docs * doc * i18n * update doc * feat: one link sync * feat: one link sync * feat: one link sync * feat: one link sync * feat: one link sync * feat: one link sync * feat: one link sync
This commit is contained in:
@@ -44,7 +44,6 @@ const DatasetCollectionSchema = new Schema({
|
||||
enum: Object.keys(DatasetCollectionTypeMap),
|
||||
required: true
|
||||
},
|
||||
|
||||
name: {
|
||||
type: String,
|
||||
required: true
|
||||
|
@@ -1,5 +1,11 @@
|
||||
import type { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import { MongoDatasetCollection } from './schema';
|
||||
import { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type';
|
||||
import type { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type.d';
|
||||
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
|
||||
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { MongoDatasetTraining } from '../training/schema';
|
||||
import { urlsFetch } from '@fastgpt/global/common/file/tools';
|
||||
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant';
|
||||
|
||||
/**
|
||||
* get all collection by top collectionId
|
||||
@@ -58,3 +64,64 @@ export function getCollectionUpdateTime({ name, time }: { time?: Date; name: str
|
||||
if (name.startsWith('手动') || ['manual', 'mark'].includes(name)) return new Date('2999/9/9');
|
||||
return new Date();
|
||||
}
|
||||
|
||||
/* link collection start load data */
|
||||
export const loadingOneChunkCollection = async ({
|
||||
collectionId,
|
||||
tmbId,
|
||||
billId,
|
||||
rawText
|
||||
}: {
|
||||
collectionId: string;
|
||||
tmbId: string;
|
||||
billId?: string;
|
||||
rawText?: string;
|
||||
}) => {
|
||||
const collection = (await MongoDatasetCollection.findById(collectionId).populate(
|
||||
'datasetId'
|
||||
)) as CollectionWithDatasetType;
|
||||
|
||||
if (!collection) {
|
||||
return Promise.reject(DatasetErrEnum.unCreateCollection);
|
||||
}
|
||||
|
||||
const newRawText = await (async () => {
|
||||
if (rawText) return rawText;
|
||||
// link
|
||||
if (collection.type === DatasetCollectionTypeEnum.link && collection.rawLink) {
|
||||
// crawl new data
|
||||
const result = await urlsFetch({
|
||||
urlList: [collection.rawLink],
|
||||
selector: collection.datasetId?.websiteConfig?.selector
|
||||
});
|
||||
|
||||
return result[0].content;
|
||||
}
|
||||
// file
|
||||
|
||||
return '';
|
||||
})();
|
||||
|
||||
// split data
|
||||
const { chunks } = splitText2Chunks({
|
||||
text: newRawText,
|
||||
chunkLen: collection.chunkSize || 512
|
||||
});
|
||||
|
||||
// insert to training queue
|
||||
await MongoDatasetTraining.insertMany(
|
||||
chunks.map((item, i) => ({
|
||||
teamId: collection.teamId,
|
||||
tmbId,
|
||||
datasetId: collection.datasetId._id,
|
||||
collectionId: collection._id,
|
||||
billId,
|
||||
mode: collection.trainingType,
|
||||
prompt: '',
|
||||
model: collection.datasetId.vectorModel,
|
||||
q: item,
|
||||
a: '',
|
||||
chunkIndex: i
|
||||
}))
|
||||
);
|
||||
};
|
||||
|
@@ -41,8 +41,8 @@ export async function delCollectionRelevantData({
|
||||
collectionIds: string[];
|
||||
fileIds: string[];
|
||||
}) {
|
||||
collectionIds = collectionIds.map((item) => String(item));
|
||||
const filterFileIds = fileIds.filter(Boolean);
|
||||
collectionIds = collectionIds.filter(Boolean).map((item) => String(item));
|
||||
const filterFileIds = fileIds.filter(Boolean).map((item) => String(item));
|
||||
|
||||
// delete training data
|
||||
await MongoDatasetTraining.deleteMany({
|
||||
|
21
packages/service/core/dataset/training/controller.ts
Normal file
21
packages/service/core/dataset/training/controller.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import { MongoDatasetTraining } from './schema';
|
||||
|
||||
export const lockTrainingDataByTeamId = async (teamId: string, retry = 3): Promise<any> => {
|
||||
try {
|
||||
await MongoDatasetTraining.updateMany(
|
||||
{
|
||||
teamId
|
||||
},
|
||||
{
|
||||
lockTime: new Date('2999/5/5')
|
||||
}
|
||||
);
|
||||
} catch (error) {
|
||||
if (retry > 0) {
|
||||
await delay(1000);
|
||||
return lockTrainingDataByTeamId(teamId, retry - 1);
|
||||
}
|
||||
return Promise.reject(error);
|
||||
}
|
||||
};
|
Reference in New Issue
Block a user