feat: model config required check;feat: dataset text model default setting (#3866)

* feat: model config required check

* feat: dataset text model default setting

* perf: collection list count

* fix: ts

* remove index count
This commit is contained in:
Archer
2025-02-24 19:55:49 +08:00
committed by GitHub
parent 3bfe802c48
commit 255764400f
32 changed files with 356 additions and 192 deletions

View File

@@ -8,7 +8,7 @@ import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
import AIModelSelector from '@/components/Select/AIModelSelector';
import { getWebDefaultModel } from '@/web/common/system/utils';
import { getWebDefaultLLMModel } from '@/web/common/system/utils';
type Props = {
llmModelType?: `${LLMModelTypeEnum}`;
@@ -40,7 +40,7 @@ const SettingLLMModel = ({
[llmModelList, llmModelType]
);
const defaultModel = useMemo(() => {
return getWebDefaultModel(modelList).model;
return getWebDefaultLLMModel(modelList).model;
}, [modelList]);
// Set default model

View File

@@ -59,6 +59,7 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
import AIModelSelector from '@/components/Select/AIModelSelector';
import { useRefresh } from '../../../../../../packages/web/hooks/useRefresh';
import { Prompt_CQJson, Prompt_ExtractJson } from '@fastgpt/global/core/ai/prompt/agent';
import MyDivider from '@fastgpt/web/components/common/MyDivider';
const MyModal = dynamic(() => import('@fastgpt/web/components/common/MyModal'));
@@ -730,7 +731,12 @@ const ModelEditModal = ({
<Td>{t('common:core.ai.Max context')}</Td>
<Td textAlign={'right'}>
<Flex justifyContent={'flex-end'}>
<MyNumberInput register={register} name="maxContext" {...InputStyles} />
<MyNumberInput
register={register}
isRequired
name="maxContext"
{...InputStyles}
/>
</Flex>
</Td>
</Tr>
@@ -740,6 +746,7 @@ const ModelEditModal = ({
<Flex justifyContent={'flex-end'}>
<MyNumberInput
register={register}
isRequired
name="quoteMaxToken"
{...InputStyles}
/>
@@ -750,7 +757,12 @@ const ModelEditModal = ({
<Td>{t('common:core.chat.response.module maxToken')}</Td>
<Td textAlign={'right'}>
<Flex justifyContent={'flex-end'}>
<MyNumberInput register={register} name="maxResponse" {...InputStyles} />
<MyNumberInput
register={register}
isRequired
name="maxResponse"
{...InputStyles}
/>
</Flex>
</Td>
</Tr>
@@ -760,6 +772,7 @@ const ModelEditModal = ({
<Flex justifyContent={'flex-end'}>
<MyNumberInput
register={register}
isRequired
name="maxTemperature"
step={0.1}
{...InputStyles}
@@ -838,7 +851,12 @@ const ModelEditModal = ({
</Td>
<Td textAlign={'right'}>
<Flex justifyContent={'flex-end'}>
<MyNumberInput register={register} name="defaultToken" {...InputStyles} />
<MyNumberInput
register={register}
isRequired
name="defaultToken"
{...InputStyles}
/>
</Flex>
</Td>
</Tr>
@@ -846,7 +864,12 @@ const ModelEditModal = ({
<Td>{t('common:core.ai.Max context')}</Td>
<Td textAlign={'right'}>
<Flex justifyContent={'flex-end'}>
<MyNumberInput register={register} name="maxToken" {...InputStyles} />
<MyNumberInput
register={register}
isRequired
name="maxToken"
{...InputStyles}
/>
</Flex>
</Td>
</Tr>
@@ -1214,6 +1237,7 @@ const DefaultModelModal = ({
const {
defaultModels,
llmModelList,
datasetModelList,
embeddingModelList,
ttsModelList,
sttModelList,
@@ -1334,6 +1358,29 @@ const DefaultModelModal = ({
/>
</Box>
</Box>
<MyDivider />
<Box>
<Flex {...labelStyles} alignItems={'center'}>
<Box mr={0.5}>{t('common:core.ai.model.Dataset Agent Model')}</Box>
<QuestionTip label={t('common:dataset_text_model_tip')} />
</Flex>
<Box flex={1}>
<AIModelSelector
bg="myGray.50"
value={defaultData.datasetTextLLM?.model}
list={datasetModelList.map((item) => ({
value: item.model,
label: item.name
}))}
onchange={(e) => {
setDefaultData((state) => ({
...state,
datasetTextLLM: datasetModelList.find((item) => item.model === e)
}));
}}
/>
</Box>
</Box>
</ModalBody>
<ModalFooter>
<Button variant={'whiteBase'} mr={4} onClick={onClose}>
@@ -1347,7 +1394,9 @@ const DefaultModelModal = ({
[ModelTypeEnum.embedding]: defaultData.embedding?.model,
[ModelTypeEnum.tts]: defaultData.tts?.model,
[ModelTypeEnum.stt]: defaultData.stt?.model,
[ModelTypeEnum.rerank]: defaultData.rerank?.model
[ModelTypeEnum.rerank]: defaultData.rerank?.model,
datasetTextLLM: defaultData.datasetTextLLM?.model,
datasetImageLLM: defaultData.datasetImageLLM?.model
})
}
>

View File

@@ -5,7 +5,7 @@ import { llmModelTypeFilterMap } from '@fastgpt/global/core/ai/constants';
import AIModelSelector from '@/components/Select/AIModelSelector';
import { useContextSelector } from 'use-context-selector';
import { WorkflowContext } from '@/pageComponents/app/detail/WorkflowComponents/context';
import { getWebDefaultModel } from '@/web/common/system/utils';
import { getWebDefaultLLMModel } from '@/web/common/system/utils';
const SelectAiModelRender = ({ item, nodeId }: RenderInputProps) => {
const { llmModelList } = useSystemStore();
@@ -23,7 +23,7 @@ const SelectAiModelRender = ({ item, nodeId }: RenderInputProps) => {
[llmModelList, item.llmModelType]
);
const defaultModel = useMemo(() => {
return getWebDefaultModel(modelList).model;
return getWebDefaultLLMModel(modelList).model;
}, [modelList]);
const onChangeModel = useCallback(

View File

@@ -10,7 +10,7 @@ import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
import {
delDatasetCollectionTag,
getDatasetCollectionTags,
getScrollCollectionList,
getDatasetCollections,
getTagUsage,
postAddTagsToCollections,
updateDatasetCollectionTag
@@ -146,7 +146,7 @@ const TagManageModal = ({ onClose }: { onClose: () => void }) => {
scrollDataList: collectionsList,
ScrollList: ScrollListCollections,
isLoading: collectionsListLoading
} = useVirtualScrollPagination(getScrollCollectionList, {
} = useVirtualScrollPagination(getDatasetCollections, {
refreshDeps: [searchText],
// debounceWait: 300,
@@ -156,6 +156,7 @@ const TagManageModal = ({ onClose }: { onClose: () => void }) => {
pageSize: 30,
defaultParams: {
datasetId: datasetDetail._id,
simple: true,
searchText
}
});

View File

@@ -195,7 +195,7 @@ const CollectionCard = () => {
<Tr>
<Th py={4}>{t('common:common.Name')}</Th>
<Th py={4}>{t('dataset:collection.Training type')}</Th>
<Th py={4}>{t('common:dataset.collections.Data Amount')}</Th>
<Th py={4}>{t('dataset:collection_data_count')}</Th>
<Th py={4}>{t('dataset:collection.Create update time')}</Th>
<Th py={4}>{t('common:common.Status')}</Th>
<Th py={4}>{t('dataset:Enable')}</Th>

View File

@@ -29,10 +29,8 @@ import Markdown from '@/components/Markdown';
import { useMemoizedFn } from 'ahooks';
import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
import { TabEnum } from './NavBar';
import {
DatasetCollectionTypeEnum,
ImportDataSourceEnum
} from '@fastgpt/global/core/dataset/constants';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
const DataCard = () => {
const theme = useTheme();
@@ -76,19 +74,17 @@ const DataCard = () => {
const [editDataId, setEditDataId] = useState<string>();
// get file info
const { data: collection } = useQuery(
['getDatasetCollectionById', collectionId],
() => getDatasetCollectionById(collectionId),
{
onError: () => {
router.replace({
query: {
datasetId
}
});
}
const { data: collection } = useRequest2(() => getDatasetCollectionById(collectionId), {
refreshDeps: [collectionId],
manual: false,
onError: () => {
router.replace({
query: {
datasetId
}
});
}
);
});
const canWrite = useMemo(() => datasetDetail.permission.hasWritePer, [datasetDetail]);
@@ -182,7 +178,10 @@ const DataCard = () => {
<Flex align={'center'} color={'myGray.500'}>
<MyIcon name="common/list" mr={2} w={'18px'} />
<Box as={'span'} fontSize={['sm', '14px']} fontWeight={'500'}>
{t('common:core.dataset.data.Total Amount', { total })}
{t('dataset:data_amount', {
dataAmount: total,
indexAmount: collection?.indexAmount ?? '-'
})}
</Box>
</Flex>
<Box flex={1} mr={1} />

View File

@@ -164,12 +164,12 @@ const Info = ({ datasetId }: { datasetId: string }) => {
</Flex>
<Box mt={5} w={'100%'}>
<Flex alignItems={'center'} fontSize={'mini'}>
<FormLabel fontWeight={'500'} flex={'1 0 0'}>
<Flex alignItems={'center'}>
<FormLabel fontWeight={'500'} flex={'1 0 0'} fontSize={'mini'}>
{t('common:core.ai.model.Vector Model')}
</FormLabel>
<MyTooltip label={t('dataset:vector_model_max_tokens_tip')}>
<Box>
<Box fontSize={'mini'}>
{t('dataset:chunk_max_tokens')}: {vectorModel.maxToken}
</Box>
</MyTooltip>

View File

@@ -21,7 +21,7 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
import { getDocPath } from '@/web/common/system/doc';
import { datasetTypeCourseMap } from '@/web/core/dataset/constants';
import ApiDatasetForm from '../ApiDatasetForm';
import { getWebDefaultModel } from '@/web/common/system/utils';
import { getWebDefaultEmbeddingModel, getWebDefaultLLMModel } from '@/web/common/system/utils';
export type CreateDatasetType =
| DatasetTypeEnum.dataset
@@ -40,7 +40,6 @@ const CreateModal = ({
type: CreateDatasetType;
}) => {
const { t } = useTranslation();
const { toast } = useToast();
const router = useRouter();
const { defaultModels, embeddingModelList, datasetModelList } = useSystemStore();
const { isPc } = useSystem();
@@ -79,8 +78,10 @@ const CreateModal = ({
avatar: datasetTypeMap[type].icon,
name: '',
intro: '',
vectorModel: defaultModels.embedding?.model,
agentModel: getWebDefaultModel(datasetModelList)?.model
vectorModel:
defaultModels.embedding?.model || getWebDefaultEmbeddingModel(embeddingModelList)?.model,
agentModel:
defaultModels.datasetTextLLM?.model || getWebDefaultLLMModel(datasetModelList)?.model
}
});
const { register, setValue, handleSubmit, watch } = form;

View File

@@ -15,6 +15,8 @@ export type updateDefaultBody = {
[ModelTypeEnum.tts]?: string;
[ModelTypeEnum.stt]?: string;
[ModelTypeEnum.rerank]?: string;
datasetTextLLM?: string;
datasetImageLLM?: string;
};
export type updateDefaultResponse = {};
@@ -25,10 +27,21 @@ async function handler(
): Promise<updateDefaultResponse> {
await authSystemAdmin({ req });
const { llm, embedding, tts, stt, rerank } = req.body;
const { llm, embedding, tts, stt, rerank, datasetTextLLM, datasetImageLLM } = req.body;
await mongoSessionRun(async (session) => {
await MongoSystemModel.updateMany({}, { $unset: { 'metadata.isDefault': 1 } }, { session });
// Remove all default flags
await MongoSystemModel.updateMany(
{},
{
$unset: {
'metadata.isDefault': 1,
'metadata.isDefaultDatasetTextModel': 1,
'metadata.isDefaultDatasetImageModel': 1
}
},
{ session }
);
if (llm) {
await MongoSystemModel.updateOne(
@@ -37,6 +50,20 @@ async function handler(
{ session }
);
}
if (datasetTextLLM) {
await MongoSystemModel.updateOne(
{ model: datasetTextLLM },
{ $set: { 'metadata.isDefaultDatasetTextModel': true } },
{ session }
);
}
if (datasetImageLLM) {
await MongoSystemModel.updateOne(
{ model: datasetImageLLM },
{ $set: { 'metadata.isDefaultDatasetImageModel': true } },
{ session }
);
}
if (embedding) {
await MongoSystemModel.updateOne(
{ model: embedding },

View File

@@ -11,6 +11,7 @@ import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
import { getVectorCountByCollectionId } from '@fastgpt/service/common/vectorStore/controller';
async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType> {
const { id } = req.query as { id: string };
@@ -29,12 +30,16 @@ async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType>
});
// get file
const file = collection?.fileId
? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
: undefined;
const [file, indexAmount] = await Promise.all([
collection?.fileId
? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
: undefined,
getVectorCountByCollectionId(collection.teamId, collection.datasetId, collection._id)
]);
return {
...collection,
indexAmount: indexAmount ?? 0,
...getCollectionSourceData(collection),
tags: await collectionTagsToTagLabel({
datasetId: collection.datasetId,

View File

@@ -1,12 +1,10 @@
import type { NextApiRequest } from 'next';
import { DatasetTrainingCollectionName } from '@fastgpt/service/core/dataset/training/schema';
import { Types } from '@fastgpt/service/common/mongo';
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
import type { GetDatasetCollectionsProps } from '@/global/core/api/datasetReq';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { DatasetDataCollectionName } from '@fastgpt/service/core/dataset/data/schema';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
import { NextAPI } from '@/service/middleware/entry';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
@@ -14,6 +12,8 @@ import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
import { PaginationResponse } from '@fastgpt/web/common/fetch/type';
import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination';
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
async function handler(
req: NextApiRequest
@@ -77,6 +77,8 @@ async function handler(
.sort({
updateTime: -1
})
.skip(offset)
.limit(pageSize)
.lean();
return {
@@ -88,6 +90,7 @@ async function handler(
tags: item.tags
}),
dataAmount: 0,
indexAmount: 0,
trainingAmount: 0,
permission
}))
@@ -96,75 +99,62 @@ async function handler(
};
}
const [collections, total]: [DatasetCollectionsListItemType[], number] = await Promise.all([
MongoDatasetCollection.aggregate([
{
$match: match
},
{
$sort: { updateTime: -1 }
},
{
$skip: offset
},
{
$limit: pageSize
},
// count training data
{
$lookup: {
from: DatasetTrainingCollectionName,
let: { id: '$_id', team_id: match.teamId, dataset_id: match.datasetId },
pipeline: [
{
$match: {
$expr: {
$and: [{ $eq: ['$teamId', '$$team_id'] }, { $eq: ['$collectionId', '$$id'] }]
}
}
},
{ $count: 'count' }
],
as: 'trainingCount'
}
},
// count collection total data
{
$lookup: {
from: DatasetDataCollectionName,
let: { id: '$_id', team_id: match.teamId, dataset_id: match.datasetId },
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ['$teamId', '$$team_id'] },
{ $eq: ['$datasetId', '$$dataset_id'] },
{ $eq: ['$collectionId', '$$id'] }
]
}
}
},
{ $count: 'count' }
],
as: 'dataCount'
}
},
{
$project: {
...selectField,
dataAmount: {
$ifNull: [{ $arrayElemAt: ['$dataCount.count', 0] }, 0]
},
trainingAmount: {
$ifNull: [{ $arrayElemAt: ['$trainingCount.count', 0] }, 0]
const [collections, total]: [DatasetCollectionSchemaType[], number] = await Promise.all([
MongoDatasetCollection.find(match, undefined, { ...readFromSecondary })
.select(selectField)
.sort({ updateTime: -1 })
.skip(offset)
.limit(pageSize)
.lean(),
MongoDatasetCollection.countDocuments(match, { ...readFromSecondary })
]);
const collectionIds = collections.map((item) => item._id);
// Compute data amount
const [trainingAmount, dataAmount]: [
{ _id: string; count: number }[],
{ _id: string; count: number }[]
] = await Promise.all([
MongoDatasetCollection.aggregate(
[
{
$match: {
teamId: match.teamId,
datasetId: match.datasetId,
collectionId: { $in: collectionIds }
}
},
{
$group: {
_id: '$collectionId',
count: { $sum: 1 }
}
}
],
{
...readFromSecondary
}
]),
MongoDatasetCollection.countDocuments(match, {
...readFromSecondary
})
),
MongoDatasetData.aggregate(
[
{
$match: {
teamId: match.teamId,
datasetId: match.datasetId,
collectionId: { $in: collectionIds }
}
},
{
$group: {
_id: '$collectionId',
count: { $sum: 1 }
}
}
],
{
...readFromSecondary
}
)
]);
const list = await Promise.all(
@@ -174,11 +164,14 @@ async function handler(
datasetId,
tags: item.tags
}),
trainingAmount:
trainingAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
dataAmount: dataAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
permission
}))
);
if (list.find((item) => item.trainingAmount > 0)) {
if (list.some((item) => item.trainingAmount > 0)) {
startTrainingQueue();
}

View File

@@ -94,6 +94,7 @@ async function handler(
...item,
dataAmount: 0,
trainingAmount: 0,
indexAmount: 0,
permission
}))
),

View File

@@ -1,4 +1,4 @@
import { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
import { EmbeddingModelItemType, LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
import { useSystemStore } from './useSystemStore';
import { getWebReqUrl } from '@fastgpt/web/common/system/utils';
@@ -49,7 +49,7 @@ export const getWebLLMModel = (model?: string) => {
return list.find((item) => item.model === model || item.name === model) ?? defaultModels.llm!;
};
export const getWebDefaultModel = (llmList: LLMModelItemType[] = []) => {
export const getWebDefaultLLMModel = (llmList: LLMModelItemType[] = []) => {
const list = llmList.length > 0 ? llmList : useSystemStore.getState().llmModelList;
const defaultModels = useSystemStore.getState().defaultModels;
@@ -57,3 +57,13 @@ export const getWebDefaultModel = (llmList: LLMModelItemType[] = []) => {
? defaultModels.llm
: list[0];
};
export const getWebDefaultEmbeddingModel = (embeddingList: EmbeddingModelItemType[] = []) => {
const list =
embeddingList.length > 0 ? embeddingList : useSystemStore.getState().embeddingModelList;
const defaultModels = useSystemStore.getState().defaultModels;
return defaultModels.embedding &&
list.find((item) => item.model === defaultModels.embedding?.model)
? defaultModels.embedding
: list[0];
};

View File

@@ -56,7 +56,6 @@ import type {
import type { UpdateDatasetDataProps } from '@fastgpt/global/core/dataset/controller';
import type { DatasetFolderCreateBody } from '@/pages/api/core/dataset/folder/create';
import type { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
import type { GetScrollCollectionsProps } from '@/pages/api/core/dataset/collection/scrollList';
import type {
GetApiDatasetFileListProps,
GetApiDatasetFileListResponse
@@ -173,11 +172,6 @@ export const getTagUsage = (datasetId: string) =>
GET<TagUsageType[]>(`/proApi/core/dataset/tag/tagUsage?datasetId=${datasetId}`);
export const getAllTags = (datasetId: string) =>
GET<{ list: DatasetTagType[] }>(`/proApi/core/dataset/tag/getAllTags?datasetId=${datasetId}`);
export const getScrollCollectionList = (data: GetScrollCollectionsProps) =>
POST<PaginationResponse<DatasetCollectionsListItemType>>(
`/core/dataset/collection/scrollList`,
data
);
/* =============================== data ==================================== */
/* get dataset list */

View File

@@ -59,7 +59,8 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
createTime: new Date(),
trainingType: TrainingModeEnum.chunk,
chunkSize: 0,
permission: new DatasetPermission()
permission: new DatasetPermission(),
indexAmount: 0
};
export enum ImportProcessWayEnum {