External dataset (#1485)

* fix: revert version

* feat: external collection

* import context

* external ui

* doc

* fix: ts

* clear invalid data

* feat: rename sub name

* fix: node if else edge remove

* fix: init

* api size

* fix: if else node refresh
This commit is contained in:
Archer
2024-05-15 10:19:51 +08:00
committed by GitHub
parent fb04889a31
commit cd876251b7
74 changed files with 1882 additions and 1353 deletions

View File

@@ -1,6 +1,10 @@
import { GET, POST, PUT, DELETE } from '@/web/common/api/request';
import type { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type.d';
import type { DatasetItemType, DatasetListItemType } from '@fastgpt/global/core/dataset/type.d';
import type {
DatasetItemType,
DatasetListItemType,
DatasetSimpleItemType
} from '@fastgpt/global/core/dataset/type.d';
import type {
GetDatasetCollectionsProps,
GetDatasetDataListProps,
@@ -39,13 +43,13 @@ import type { getDatasetTrainingQueueResponse } from '@/pages/api/core/dataset/t
import type { rebuildEmbeddingBody } from '@/pages/api/core/dataset/training/rebuildEmbedding';
/* ======================== dataset ======================= */
export const getDatasets = (data: { parentId?: string; type?: `${DatasetTypeEnum}` }) =>
export const getDatasets = (data: { parentId?: string; type?: DatasetTypeEnum }) =>
GET<DatasetListItemType[]>(`/core/dataset/list`, data);
/**
* get type=dataset list
*/
export const getAllDataset = () => GET<DatasetListItemType[]>(`/core/dataset/allDataset`);
export const getAllDataset = () => GET<DatasetSimpleItemType[]>(`/core/dataset/allDataset`);
export const getDatasetPaths = (parentId?: string) =>
GET<ParentTreePathItemType[]>('/core/dataset/paths', { parentId });

View File

@@ -11,6 +11,8 @@ import { useQuery } from '@tanstack/react-query';
import React, { useMemo, useState } from 'react';
import { useTranslation } from 'next-i18next';
import { useLoading } from '@fastgpt/web/hooks/useLoading';
import { useContextSelector } from 'use-context-selector';
import { DatasetPageContext } from '../context/datasetPageContext';
const SelectCollections = ({
datasetId,
@@ -37,7 +39,8 @@ const SelectCollections = ({
}) => {
const { t } = useTranslation();
const theme = useTheme();
const { datasetDetail, loadDatasetDetail } = useDatasetStore();
const { loadDatasetDetail } = useContextSelector(DatasetPageContext, (v) => v);
const { Loading } = useLoading();
const [selectedDatasetCollectionIds, setSelectedDatasetCollectionIds] =
useState<string[]>(defaultSelectedId);

View File

@@ -1,4 +1,5 @@
import { defaultQAModels, defaultVectorModels } from '@fastgpt/global/core/ai/model';
import { DatasetTypeEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import type {
DatasetCollectionItemType,
DatasetItemType
@@ -11,7 +12,7 @@ export const defaultDatasetDetail: DatasetItemType = {
teamId: '',
tmbId: '',
updateTime: new Date(),
type: 'dataset',
type: DatasetTypeEnum.dataset,
avatar: '/icon/logo.svg',
name: '',
intro: '',
@@ -34,7 +35,7 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
teamId: '',
tmbId: '',
updateTime: new Date(),
type: 'dataset',
type: DatasetTypeEnum.dataset,
avatar: '/icon/logo.svg',
name: '',
intro: '',
@@ -51,7 +52,7 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
sourceName: '',
sourceId: '',
createTime: new Date(),
trainingType: 'chunk',
trainingType: TrainingModeEnum.chunk,
chunkSize: 0
};

View File

@@ -1,11 +1,23 @@
import { useQuery } from '@tanstack/react-query';
import { ReactNode, useMemo } from 'react';
import { ReactNode, useMemo, useState } from 'react';
import { useTranslation } from 'next-i18next';
import { createContext } from 'use-context-selector';
import { getDatasetTrainingQueue, getTrainingQueueLen } from '../api';
import { useDatasetStore } from '../store/dataset';
import {
getDatasetById,
getDatasetTrainingQueue,
getTrainingQueueLen,
putDatasetById
} from '../api';
import { defaultDatasetDetail } from '../constants';
import { DatasetUpdateBody } from '@fastgpt/global/core/dataset/api';
import { DatasetItemType } from '@fastgpt/global/core/dataset/type';
type DatasetPageContextType = {
datasetId: string;
datasetDetail: DatasetItemType;
loadDatasetDetail: (id: string) => Promise<DatasetItemType>;
updateDataset: (data: DatasetUpdateBody) => Promise<void>;
vectorTrainingMap: {
colorSchema: string;
tip: string;
@@ -19,10 +31,6 @@ type DatasetPageContextType = {
refetchDatasetTraining: () => void;
};
type DatasetPageContextValueType = {
datasetId: string;
};
export const DatasetPageContext = createContext<DatasetPageContextType>({
vectorTrainingMap: {
colorSchema: '',
@@ -36,19 +44,46 @@ export const DatasetPageContext = createContext<DatasetPageContextType>({
trainingCount: 0,
refetchDatasetTraining: function (): void {
throw new Error('Function not implemented.');
},
datasetId: '',
datasetDetail: defaultDatasetDetail,
loadDatasetDetail: function (id: string): Promise<DatasetItemType> {
throw new Error('Function not implemented.');
},
updateDataset: function (data: DatasetUpdateBody): Promise<void> {
throw new Error('Function not implemented.');
}
});
export const DatasetPageContextProvider = ({
children,
value
datasetId
}: {
children: ReactNode;
value: DatasetPageContextValueType;
datasetId: string;
}) => {
const { t } = useTranslation();
const { datasetId } = value;
const { datasetDetail } = useDatasetStore();
// dataset detail
const [datasetDetail, setDatasetDetail] = useState(defaultDatasetDetail);
const loadDatasetDetail = async (id: string) => {
const data = await getDatasetById(id);
setDatasetDetail(data);
return data;
};
const updateDataset = async (data: DatasetUpdateBody) => {
await putDatasetById(data);
if (datasetId === data.id) {
setDatasetDetail((state) => ({
...state,
...data
}));
}
};
// global queue
const { data: { vectorTrainingCount = 0, agentTrainingCount = 0 } = {} } = useQuery(
@@ -108,6 +143,11 @@ export const DatasetPageContextProvider = ({
});
const contextValue: DatasetPageContextType = {
datasetId,
datasetDetail,
loadDatasetDetail,
updateDataset,
vectorTrainingMap,
agentTrainingMap,
rebuildingCount,

View File

@@ -0,0 +1,11 @@
import { ReactNode } from 'react';
import { createContext } from 'use-context-selector';
type ContextType = {};
export const Context = createContext<ContextType>({});
export const ContextProvider = ({ children }: { children: ReactNode }) => {
const contextValue: ContextType = {};
return <Context.Provider value={contextValue}>{children}</Context.Provider>;
};

View File

@@ -1,18 +0,0 @@
import { ReactNode } from 'react';
import { createContext } from 'use-context-selector';
type DatasetContextType = {};
type DatasetContextValueType = {};
export const DatasetContext = createContext<DatasetContextType>({});
export const DatasetContextProvider = ({
children,
value
}: {
children: ReactNode;
value: DatasetContextValueType;
}) => {
return <DatasetContext.Provider value={value}>{children}</DatasetContext.Provider>;
};

View File

@@ -1,30 +1,18 @@
import { create } from 'zustand';
import { devtools, persist } from 'zustand/middleware';
import { immer } from 'zustand/middleware/immer';
import type { DatasetItemType, DatasetListItemType } from '@fastgpt/global/core/dataset/type.d';
import {
getAllDataset,
getDatasets,
getDatasetById,
putDatasetById,
postWebsiteSync
} from '@/web/core/dataset/api';
import { defaultDatasetDetail } from '../constants';
import type { DatasetUpdateBody } from '@fastgpt/global/core/dataset/api.d';
import { DatasetStatusEnum } from '@fastgpt/global/core/dataset/constants';
import { postCreateTrainingUsage } from '@/web/support/wallet/usage/api';
import { checkTeamWebSyncLimit } from '@/web/support/user/team/api';
import type {
DatasetListItemType,
DatasetSimpleItemType
} from '@fastgpt/global/core/dataset/type.d';
import { getAllDataset, getDatasets } from '@/web/core/dataset/api';
type State = {
allDatasets: DatasetListItemType[];
loadAllDatasets: () => Promise<DatasetListItemType[]>;
allDatasets: DatasetSimpleItemType[];
loadAllDatasets: () => Promise<DatasetSimpleItemType[]>;
myDatasets: DatasetListItemType[];
loadDatasets: (parentId?: string) => Promise<any>;
setDatasets(val: DatasetListItemType[]): void;
datasetDetail: DatasetItemType;
loadDatasetDetail: (id: string, init?: boolean) => Promise<DatasetItemType>;
updateDataset: (data: DatasetUpdateBody) => Promise<any>;
startWebsiteSync: () => Promise<any>;
loadMyDatasets: (parentId?: string) => Promise<any>;
setMyDatasets(val: DatasetListItemType[]): void;
};
export const useDatasetStore = create<State>()(
@@ -40,66 +28,17 @@ export const useDatasetStore = create<State>()(
return res;
},
myDatasets: [],
async loadDatasets(parentId = '') {
async loadMyDatasets(parentId = '') {
const res = await getDatasets({ parentId });
set((state) => {
state.myDatasets = res;
});
return res;
},
setDatasets(val) {
setMyDatasets(val) {
set((state) => {
state.myDatasets = val;
});
},
datasetDetail: defaultDatasetDetail,
async loadDatasetDetail(id: string, init = false) {
if (!id || (id === get().datasetDetail._id && !init)) return get().datasetDetail;
const data = await getDatasetById(id);
set((state) => {
state.datasetDetail = data;
});
return data;
},
async updateDataset(data) {
await putDatasetById(data);
if (get().datasetDetail._id === data.id) {
set((state) => {
state.datasetDetail = {
...get().datasetDetail,
...data
};
});
}
set((state) => {
state.myDatasets = state.myDatasets = state.myDatasets.map((item) =>
item._id === data.id
? {
...item,
...data
}
: item
);
});
},
async startWebsiteSync() {
await checkTeamWebSyncLimit();
const billId = await postCreateTrainingUsage({
name: 'core.dataset.training.Website Sync',
datasetId: get().datasetDetail._id
});
return postWebsiteSync({ datasetId: get().datasetDetail._id, billId }).then(() => {
get().updateDataset({
id: get().datasetDetail._id,
status: DatasetStatusEnum.syncing
});
});
}
})),
{

View File

@@ -1,13 +1,8 @@
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
import { ImportProcessWayEnum, ImportSourceTypeEnum } from './constants';
import { ImportProcessWayEnum } from './constants';
import { UseFormReturn } from 'react-hook-form';
export type ImportDataComponentProps = {
activeStep: number;
goToNext: () => void;
};
export type ImportSourceItemType = {
id: string;
@@ -17,10 +12,10 @@ export type ImportSourceItemType = {
// source
sourceName: string;
sourceSize?: string;
icon: string;
// file
sourceSize?: string;
isUploading?: boolean;
uploadedFileRate?: number;
dbFileId?: string; // 存储在数据库里的文件Id这个 ID 还是图片和集合的 metadata 中 relateId
@@ -31,6 +26,10 @@ export type ImportSourceItemType = {
// custom text
rawText?: string;
// external file
sourceUrl?: string;
externalId?: string;
};
export type ImportSourceParamsType = UseFormReturn<