feat: model config required check;feat: dataset text model default setting (#3866)

* feat: model config required check

* feat: dataset text model default setting

* perf: collection list count

* fix: ts

* remove index count
This commit is contained in:
Archer
2025-02-24 19:55:49 +08:00
committed by GitHub
parent 3bfe802c48
commit 255764400f
32 changed files with 356 additions and 192 deletions

View File

@@ -0,0 +1,22 @@
---
title: 'V4.8.23(进行中)'
description: 'FastGPT V4.8.23 更新说明'
icon: 'upgrade'
draft: false
toc: true
weight: 802
---
## 🚀 新增内容
1. 增加默认“知识库文本理解模型”配置
## ⚙️ 优化
1. 模型配置表单,增加必填项校验。
2. 集合列表数据统计方式。
## 🐛 修复
1. 标签过滤时,子文件夹未成功过滤。

View File

@@ -17,6 +17,8 @@ type BaseModelItemType = {
isActive?: boolean;
isCustom?: boolean;
isDefault?: boolean;
isDefaultDatasetTextModel?: boolean;
isDefaultDatasetImageModel?: boolean;
// If has requestUrl, it will request the model directly
requestUrl?: string;

View File

@@ -192,6 +192,7 @@ export type DatasetCollectionItemType = CollectionWithDatasetType & {
sourceId?: string;
file?: DatasetFileSchema;
permission: DatasetPermission;
indexAmount: number;
};
/* ================= data ===================== */

View File

@@ -1 +1,4 @@
export const FastGPTProUrl = process.env.PRO_URL ? `${process.env.PRO_URL}/api` : '';
export const isFastGPTMainService = !!process.env.PRO_URL;
// @ts-ignore
export const isFastGPTProService = () => !!global.systemConfig;

View File

@@ -21,6 +21,7 @@ export const recallFromVectorStore = Vector.embRecall;
export const getVectorDataByTime = Vector.getVectorDataByTime;
export const getVectorCountByTeamId = Vector.getVectorCountByTeamId;
export const getVectorCountByDatasetId = Vector.getVectorCountByDatasetId;
export const getVectorCountByCollectionId = Vector.getVectorCountByCollectionId;
export const insertDatasetDataVector = async ({
model,

View File

@@ -321,6 +321,23 @@ export class MilvusCtrl {
return total;
};
getVectorCountByCollectionId = async (
teamId: string,
datasetId: string,
collectionId: string
) => {
const client = await this.getClient();
const result = await client.query({
collection_name: DatasetVectorTableName,
output_fields: ['count(*)'],
filter: `(teamId == "${String(teamId)}") and (datasetId == "${String(datasetId)}") and (collectionId == "${String(collectionId)}")`
});
const total = result.data?.[0]?.['count(*)'] as number;
return total;
};
getVectorDataByTime = async (start: Date, end: Date) => {
const client = await this.getClient();

View File

@@ -240,6 +240,23 @@ export class PgVectorCtrl {
where: [['team_id', String(teamId)], 'and', ['dataset_id', String(datasetId)]]
});
return total;
};
getVectorCountByCollectionId = async (
teamId: string,
datasetId: string,
collectionId: string
) => {
const total = await PgClient.count(DatasetVectorTableName, {
where: [
['team_id', String(teamId)],
'and',
['dataset_id', String(datasetId)],
'and',
['collection_id', String(collectionId)]
]
});
return total;
};
}

View File

@@ -52,6 +52,12 @@ export const loadSystemModels = async (init = false) => {
if (model.isDefault) {
global.systemDefaultModel.llm = model;
}
if (model.isDefaultDatasetTextModel) {
global.systemDefaultModel.datasetTextLLM = model;
}
if (model.isDefaultDatasetImageModel) {
global.systemDefaultModel.datasetImageLLM = model;
}
} else if (model.type === ModelTypeEnum.embedding) {
global.embeddingModelMap.set(model.model, model);
global.embeddingModelMap.set(model.name, model);
@@ -134,6 +140,16 @@ export const loadSystemModels = async (init = false) => {
if (!global.systemDefaultModel.llm) {
global.systemDefaultModel.llm = Array.from(global.llmModelMap.values())[0];
}
if (!global.systemDefaultModel.datasetTextLLM) {
global.systemDefaultModel.datasetTextLLM = Array.from(global.llmModelMap.values()).find(
(item) => item.datasetProcess
);
}
if (!global.systemDefaultModel.datasetImageLLM) {
global.systemDefaultModel.datasetImageLLM = Array.from(global.llmModelMap.values()).find(
(item) => item.vision
);
}
if (!global.systemDefaultModel.embedding) {
global.systemDefaultModel.embedding = Array.from(global.embeddingModelMap.values())[0];
}

View File

@@ -22,6 +22,9 @@ export type SystemModelItemType =
export type SystemDefaultModelType = {
[ModelTypeEnum.llm]?: LLMModelItemType;
datasetTextLLM?: LLMModelItemType;
datasetImageLLM?: LLMModelItemType;
[ModelTypeEnum.embedding]?: EmbeddingModelItemType;
[ModelTypeEnum.tts]?: TTSModelType;
[ModelTypeEnum.stt]?: STTModelType;

View File

@@ -201,61 +201,6 @@ export async function searchDatasetData(
};
};
async function getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds
}: {
teamId: string;
datasetIds: string[];
parentCollectionIds: string[];
}): Promise<string[]> {
if (!parentCollectionIds.length) {
return [];
}
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
_id: { $in: parentCollectionIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const resultIds = new Set(collections.map((item) => String(item._id)));
const folderIds = collections
.filter((item) => item.type === 'folder')
.map((item) => String(item._id));
// Get all child collection ids
if (folderIds.length) {
const childCollections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
parentId: { $in: folderIds }
},
'_id',
{
...readFromSecondary
}
).lean();
const childIds = await getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds: childCollections.map((item) => String(item._id))
});
childIds.forEach((id) => resultIds.add(id));
}
return Array.from(resultIds);
}
/*
Collection metadata filter
标签过滤:
@@ -263,6 +208,63 @@ export async function searchDatasetData(
2. and 标签和 null 不能共存,否则返回空数组
*/
const filterCollectionByMetadata = async (): Promise<string[] | undefined> => {
const getAllCollectionIds = async ({
parentCollectionIds
}: {
parentCollectionIds?: string[];
}): Promise<string[] | undefined> => {
if (!parentCollectionIds) return;
if (parentCollectionIds.length === 0) {
return [];
}
const collections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
_id: { $in: parentCollectionIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const resultIds = new Set<string>();
collections.forEach((item) => {
if (item.type !== 'folder') {
resultIds.add(String(item._id));
}
});
const folderIds = collections
.filter((item) => item.type === 'folder')
.map((item) => String(item._id));
// Get all child collection ids
if (folderIds.length) {
const childCollections = await MongoDatasetCollection.find(
{
teamId,
datasetId: { $in: datasetIds },
parentId: { $in: folderIds }
},
'_id type',
{
...readFromSecondary
}
).lean();
const childIds = await getAllCollectionIds({
parentCollectionIds: childCollections.map((item) => String(item._id))
});
childIds?.forEach((id) => resultIds.add(id));
}
return Array.from(resultIds);
};
if (!collectionFilterMatch || !global.feConfigs.isPlus) return;
let tagCollectionIdList: string[] | undefined = undefined;
@@ -382,7 +384,7 @@ export async function searchDatasetData(
}
// Concat tag and time
const finalIds = (() => {
const collectionIds = (() => {
if (tagCollectionIdList && createTimeCollectionIdList) {
return tagCollectionIdList.filter((id) =>
(createTimeCollectionIdList as string[]).includes(id)
@@ -392,13 +394,9 @@ export async function searchDatasetData(
return tagCollectionIdList || createTimeCollectionIdList;
})();
return finalIds
? await getAllCollectionIds({
teamId,
datasetIds,
parentCollectionIds: finalIds
})
: undefined;
return await getAllCollectionIds({
parentCollectionIds: collectionIds
});
} catch (error) {}
};
const embeddingRecall = async ({

View File

@@ -8,12 +8,12 @@ import { i18nT } from '../../../../web/i18n/utils';
import { pushConcatBillTask, pushReduceTeamAiPointsTask } from './utils';
import { POST } from '../../../common/api/plusRequest';
import { FastGPTProUrl } from '../../../common/system/constants';
import { isFastGPTMainService } from '../../../common/system/constants';
export async function createUsage(data: CreateUsageProps) {
try {
// In FastGPT server
if (FastGPTProUrl) {
if (isFastGPTMainService) {
await POST('/support/wallet/usage/createUsage', data);
} else if (global.reduceAiPointsQueue) {
// In FastGPT pro server
@@ -31,7 +31,7 @@ export async function createUsage(data: CreateUsageProps) {
export async function concatUsage(data: ConcatUsageProps) {
try {
// In FastGPT server
if (FastGPTProUrl) {
if (isFastGPTMainService) {
await POST('/support/wallet/usage/concatUsage', data);
} else if (global.reduceAiPointsQueue) {
const {

View File

@@ -547,7 +547,6 @@
"core.dataset.data.Main Content": "Main Content",
"core.dataset.data.Search data placeholder": "Search Related Data",
"core.dataset.data.Too Long": "Total Length Exceeded",
"core.dataset.data.Total Amount": "{{total}} Groups",
"core.dataset.data.group": "Group",
"core.dataset.data.unit": "Items",
"core.dataset.embedding model tip": "The index model can convert natural language into vectors for semantic search.\nNote that different index models cannot be used together. Once an index model is selected, it cannot be changed.",
@@ -860,7 +859,6 @@
"dataset.collections.Collection Embedding": "{{total}} Indexes",
"dataset.collections.Confirm to delete the folder": "Confirm to Delete This Folder and All Its Contents?",
"dataset.collections.Create And Import": "Create/Import",
"dataset.collections.Data Amount": "Total Data",
"dataset.collections.Select Collection": "Select File",
"dataset.collections.Select One Collection To Store": "Select a File to Store",
"dataset.data.Can not edit": "No Edit Permission",
@@ -876,6 +874,7 @@
"dataset.dataset_name": "Dataset Name",
"dataset.deleteFolderTips": "Confirm to Delete This Folder and All Its Contained Datasets? Data Cannot Be Recovered After Deletion, Please Confirm!",
"dataset.test.noResult": "No Search Results",
"dataset_text_model_tip": "Used for text processing in the knowledge base preprocessing stage, such as automatic supplementary indexing, Q&A pair extraction.",
"deep_rag_search": "In-depth search",
"delete_api": "Are you sure you want to delete this API key? \nAfter deletion, the key will become invalid immediately and the corresponding conversation log will not be deleted. Please confirm!",
"embedding_model_not_config": "No index model is detected",

View File

@@ -7,6 +7,7 @@
"close_auto_sync": "Are you sure you want to turn off automatic sync?",
"collection.Create update time": "Creation/Update Time",
"collection.Training type": "Training",
"collection_data_count": "Data amount",
"collection_not_support_retraining": "This collection type does not support retuning parameters",
"collection_not_support_sync": "This collection does not support synchronization",
"collection_sync": "Sync data",
@@ -20,6 +21,7 @@
"custom_data_process_params": "Custom",
"custom_data_process_params_desc": "Customize data processing rules",
"data.ideal_chunk_length": "ideal block length",
"data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes",
"data_process_params": "Params",
"data_process_setting": "Processing config",
"dataset.Unsupported operation": "dataset.Unsupported operation",

View File

@@ -550,7 +550,6 @@
"core.dataset.data.Main Content": "主要内容",
"core.dataset.data.Search data placeholder": "搜索相关数据",
"core.dataset.data.Too Long": "总长度超长了",
"core.dataset.data.Total Amount": "{{total}} 组",
"core.dataset.data.group": "组",
"core.dataset.data.unit": "条",
"core.dataset.embedding model tip": "索引模型可以将自然语言转成向量,用于进行语义检索。\n注意不同索引模型无法一起使用选择完索引模型后将无法修改。",
@@ -863,7 +862,6 @@
"dataset.collections.Collection Embedding": "{{total}} 组索引中",
"dataset.collections.Confirm to delete the folder": "确认删除该文件夹及里面所有内容?",
"dataset.collections.Create And Import": "新建/导入",
"dataset.collections.Data Amount": "数据总量",
"dataset.collections.Select Collection": "选择文件",
"dataset.collections.Select One Collection To Store": "选择一个文件进行存储",
"dataset.data.Can not edit": "无编辑权限",
@@ -879,6 +877,7 @@
"dataset.dataset_name": "知识库名称",
"dataset.deleteFolderTips": "确认删除该文件夹及其包含的所有知识库?删除后数据无法恢复,请确认!",
"dataset.test.noResult": "搜索结果为空",
"dataset_text_model_tip": "用于知识库预处理阶段的文本处理,例如自动补充索引、问答对提取。",
"deep_rag_search": "深度搜索",
"delete_api": "确认删除该API密钥删除后该密钥立即失效对应的对话日志不会删除请确认",
"embedding_model_not_config": "检测到没有可用的索引模型",
@@ -944,9 +943,9 @@
"model_moka": "Moka-AI",
"model_moonshot": "月之暗面",
"model_other": "其他",
"model_ppio": "PPIO 派欧云",
"model_qwen": "阿里千问",
"model_siliconflow": "硅基流动",
"model_ppio": "PPIO 派欧云",
"model_sparkdesk": "讯飞星火",
"model_stepfun": "阶跃星辰",
"model_yi": "零一万物",

View File

@@ -7,6 +7,7 @@
"close_auto_sync": "确认关闭自动同步功能?",
"collection.Create update time": "创建/更新时间",
"collection.Training type": "训练模式",
"collection_data_count": "数据量",
"collection_not_support_retraining": "该集合类型不支持重新调整参数",
"collection_not_support_sync": "该集合不支持同步",
"collection_sync": "立即同步",
@@ -20,6 +21,7 @@
"custom_data_process_params": "自定义",
"custom_data_process_params_desc": "自定义设置数据处理规则",
"data.ideal_chunk_length": "理想分块长度",
"data_amount": "{{dataAmount}} 组数据, {{indexAmount}} 组索引",
"data_process_params": "处理参数",
"data_process_setting": "数据处理配置",
"dataset.Unsupported operation": "操作不支持",

View File

@@ -546,7 +546,6 @@
"core.dataset.data.Main Content": "主要內容",
"core.dataset.data.Search data placeholder": "搜尋相關資料",
"core.dataset.data.Too Long": "總長度超出上限",
"core.dataset.data.Total Amount": "{{total}} 組",
"core.dataset.data.group": "組",
"core.dataset.data.unit": "筆",
"core.dataset.embedding model tip": "索引模型可以將自然語言轉換成向量,用於進行語意搜尋。\n注意不同索引模型無法一起使用。選擇索引模型後就無法修改。",
@@ -860,7 +859,6 @@
"dataset.collections.Collection Embedding": "{{total}} 個索引",
"dataset.collections.Confirm to delete the folder": "確認刪除此資料夾及其所有內容?",
"dataset.collections.Create And Import": "建立或匯入",
"dataset.collections.Data Amount": "資料總量",
"dataset.collections.Select Collection": "選擇檔案",
"dataset.collections.Select One Collection To Store": "選擇一個檔案進行儲存",
"dataset.data.Can not edit": "無編輯權限",
@@ -876,6 +874,7 @@
"dataset.dataset_name": "知識庫名稱",
"dataset.deleteFolderTips": "確認刪除此資料夾及其包含的所有知識庫?刪除後資料無法復原,請確認!",
"dataset.test.noResult": "搜尋結果為空",
"dataset_text_model_tip": "用於知識庫預處理階段的文本處理,例如自動補充索引、問答對提取。",
"deep_rag_search": "深度搜索",
"delete_api": "確認刪除此 API 金鑰?\n刪除後該金鑰將立即失效對應的對話記錄不會被刪除請確認",
"embedding_model_not_config": "檢測到沒有可用的索引模型",

View File

@@ -7,6 +7,7 @@
"close_auto_sync": "確認關閉自動同步功能?",
"collection.Create update time": "建立/更新時間",
"collection.Training type": "分段模式",
"collection_data_count": "數據量",
"collection_not_support_retraining": "此集合類型不支援重新調整參數",
"collection_not_support_sync": "該集合不支援同步",
"collection_sync": "立即同步",
@@ -20,6 +21,7 @@
"custom_data_process_params": "自訂",
"custom_data_process_params_desc": "自訂資料處理規則",
"data.ideal_chunk_length": "理想分塊長度",
"data_amount": "{{dataAmount}} 組數據, {{indexAmount}} 組索引",
"data_process_params": "處理參數",
"data_process_setting": "資料處理設定",
"dataset.Unsupported operation": "操作不支持",

View File

@@ -8,7 +8,7 @@ import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
import AIModelSelector from '@/components/Select/AIModelSelector';
import { getWebDefaultModel } from '@/web/common/system/utils';
import { getWebDefaultLLMModel } from '@/web/common/system/utils';
type Props = {
llmModelType?: `${LLMModelTypeEnum}`;
@@ -40,7 +40,7 @@ const SettingLLMModel = ({
[llmModelList, llmModelType]
);
const defaultModel = useMemo(() => {
return getWebDefaultModel(modelList).model;
return getWebDefaultLLMModel(modelList).model;
}, [modelList]);
// Set default model

View File

@@ -59,6 +59,7 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
import AIModelSelector from '@/components/Select/AIModelSelector';
import { useRefresh } from '../../../../../../packages/web/hooks/useRefresh';
import { Prompt_CQJson, Prompt_ExtractJson } from '@fastgpt/global/core/ai/prompt/agent';
import MyDivider from '@fastgpt/web/components/common/MyDivider';
const MyModal = dynamic(() => import('@fastgpt/web/components/common/MyModal'));
@@ -730,7 +731,12 @@ const ModelEditModal = ({
<Td>{t('common:core.ai.Max context')}</Td>
<Td textAlign={'right'}>
<Flex justifyContent={'flex-end'}>
<MyNumberInput register={register} name="maxContext" {...InputStyles} />
<MyNumberInput
register={register}
isRequired
name="maxContext"
{...InputStyles}
/>
</Flex>
</Td>
</Tr>
@@ -740,6 +746,7 @@ const ModelEditModal = ({
<Flex justifyContent={'flex-end'}>
<MyNumberInput
register={register}
isRequired
name="quoteMaxToken"
{...InputStyles}
/>
@@ -750,7 +757,12 @@ const ModelEditModal = ({
<Td>{t('common:core.chat.response.module maxToken')}</Td>
<Td textAlign={'right'}>
<Flex justifyContent={'flex-end'}>
<MyNumberInput register={register} name="maxResponse" {...InputStyles} />
<MyNumberInput
register={register}
isRequired
name="maxResponse"
{...InputStyles}
/>
</Flex>
</Td>
</Tr>
@@ -760,6 +772,7 @@ const ModelEditModal = ({
<Flex justifyContent={'flex-end'}>
<MyNumberInput
register={register}
isRequired
name="maxTemperature"
step={0.1}
{...InputStyles}
@@ -838,7 +851,12 @@ const ModelEditModal = ({
</Td>
<Td textAlign={'right'}>
<Flex justifyContent={'flex-end'}>
<MyNumberInput register={register} name="defaultToken" {...InputStyles} />
<MyNumberInput
register={register}
isRequired
name="defaultToken"
{...InputStyles}
/>
</Flex>
</Td>
</Tr>
@@ -846,7 +864,12 @@ const ModelEditModal = ({
<Td>{t('common:core.ai.Max context')}</Td>
<Td textAlign={'right'}>
<Flex justifyContent={'flex-end'}>
<MyNumberInput register={register} name="maxToken" {...InputStyles} />
<MyNumberInput
register={register}
isRequired
name="maxToken"
{...InputStyles}
/>
</Flex>
</Td>
</Tr>
@@ -1214,6 +1237,7 @@ const DefaultModelModal = ({
const {
defaultModels,
llmModelList,
datasetModelList,
embeddingModelList,
ttsModelList,
sttModelList,
@@ -1334,6 +1358,29 @@ const DefaultModelModal = ({
/>
</Box>
</Box>
<MyDivider />
<Box>
<Flex {...labelStyles} alignItems={'center'}>
<Box mr={0.5}>{t('common:core.ai.model.Dataset Agent Model')}</Box>
<QuestionTip label={t('common:dataset_text_model_tip')} />
</Flex>
<Box flex={1}>
<AIModelSelector
bg="myGray.50"
value={defaultData.datasetTextLLM?.model}
list={datasetModelList.map((item) => ({
value: item.model,
label: item.name
}))}
onchange={(e) => {
setDefaultData((state) => ({
...state,
datasetTextLLM: datasetModelList.find((item) => item.model === e)
}));
}}
/>
</Box>
</Box>
</ModalBody>
<ModalFooter>
<Button variant={'whiteBase'} mr={4} onClick={onClose}>
@@ -1347,7 +1394,9 @@ const DefaultModelModal = ({
[ModelTypeEnum.embedding]: defaultData.embedding?.model,
[ModelTypeEnum.tts]: defaultData.tts?.model,
[ModelTypeEnum.stt]: defaultData.stt?.model,
[ModelTypeEnum.rerank]: defaultData.rerank?.model
[ModelTypeEnum.rerank]: defaultData.rerank?.model,
datasetTextLLM: defaultData.datasetTextLLM?.model,
datasetImageLLM: defaultData.datasetImageLLM?.model
})
}
>

View File

@@ -5,7 +5,7 @@ import { llmModelTypeFilterMap } from '@fastgpt/global/core/ai/constants';
import AIModelSelector from '@/components/Select/AIModelSelector';
import { useContextSelector } from 'use-context-selector';
import { WorkflowContext } from '@/pageComponents/app/detail/WorkflowComponents/context';
import { getWebDefaultModel } from '@/web/common/system/utils';
import { getWebDefaultLLMModel } from '@/web/common/system/utils';
const SelectAiModelRender = ({ item, nodeId }: RenderInputProps) => {
const { llmModelList } = useSystemStore();
@@ -23,7 +23,7 @@ const SelectAiModelRender = ({ item, nodeId }: RenderInputProps) => {
[llmModelList, item.llmModelType]
);
const defaultModel = useMemo(() => {
return getWebDefaultModel(modelList).model;
return getWebDefaultLLMModel(modelList).model;
}, [modelList]);
const onChangeModel = useCallback(

View File

@@ -10,7 +10,7 @@ import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils';
import {
delDatasetCollectionTag,
getDatasetCollectionTags,
getScrollCollectionList,
getDatasetCollections,
getTagUsage,
postAddTagsToCollections,
updateDatasetCollectionTag
@@ -146,7 +146,7 @@ const TagManageModal = ({ onClose }: { onClose: () => void }) => {
scrollDataList: collectionsList,
ScrollList: ScrollListCollections,
isLoading: collectionsListLoading
} = useVirtualScrollPagination(getScrollCollectionList, {
} = useVirtualScrollPagination(getDatasetCollections, {
refreshDeps: [searchText],
// debounceWait: 300,
@@ -156,6 +156,7 @@ const TagManageModal = ({ onClose }: { onClose: () => void }) => {
pageSize: 30,
defaultParams: {
datasetId: datasetDetail._id,
simple: true,
searchText
}
});

View File

@@ -195,7 +195,7 @@ const CollectionCard = () => {
<Tr>
<Th py={4}>{t('common:common.Name')}</Th>
<Th py={4}>{t('dataset:collection.Training type')}</Th>
<Th py={4}>{t('common:dataset.collections.Data Amount')}</Th>
<Th py={4}>{t('dataset:collection_data_count')}</Th>
<Th py={4}>{t('dataset:collection.Create update time')}</Th>
<Th py={4}>{t('common:common.Status')}</Th>
<Th py={4}>{t('dataset:Enable')}</Th>

View File

@@ -29,10 +29,8 @@ import Markdown from '@/components/Markdown';
import { useMemoizedFn } from 'ahooks';
import { useScrollPagination } from '@fastgpt/web/hooks/useScrollPagination';
import { TabEnum } from './NavBar';
import {
DatasetCollectionTypeEnum,
ImportDataSourceEnum
} from '@fastgpt/global/core/dataset/constants';
import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
const DataCard = () => {
const theme = useTheme();
@@ -76,19 +74,17 @@ const DataCard = () => {
const [editDataId, setEditDataId] = useState<string>();
// get file info
const { data: collection } = useQuery(
['getDatasetCollectionById', collectionId],
() => getDatasetCollectionById(collectionId),
{
onError: () => {
router.replace({
query: {
datasetId
}
});
}
const { data: collection } = useRequest2(() => getDatasetCollectionById(collectionId), {
refreshDeps: [collectionId],
manual: false,
onError: () => {
router.replace({
query: {
datasetId
}
});
}
);
});
const canWrite = useMemo(() => datasetDetail.permission.hasWritePer, [datasetDetail]);
@@ -182,7 +178,10 @@ const DataCard = () => {
<Flex align={'center'} color={'myGray.500'}>
<MyIcon name="common/list" mr={2} w={'18px'} />
<Box as={'span'} fontSize={['sm', '14px']} fontWeight={'500'}>
{t('common:core.dataset.data.Total Amount', { total })}
{t('dataset:data_amount', {
dataAmount: total,
indexAmount: collection?.indexAmount ?? '-'
})}
</Box>
</Flex>
<Box flex={1} mr={1} />

View File

@@ -164,12 +164,12 @@ const Info = ({ datasetId }: { datasetId: string }) => {
</Flex>
<Box mt={5} w={'100%'}>
<Flex alignItems={'center'} fontSize={'mini'}>
<FormLabel fontWeight={'500'} flex={'1 0 0'}>
<Flex alignItems={'center'}>
<FormLabel fontWeight={'500'} flex={'1 0 0'} fontSize={'mini'}>
{t('common:core.ai.model.Vector Model')}
</FormLabel>
<MyTooltip label={t('dataset:vector_model_max_tokens_tip')}>
<Box>
<Box fontSize={'mini'}>
{t('dataset:chunk_max_tokens')}: {vectorModel.maxToken}
</Box>
</MyTooltip>

View File

@@ -21,7 +21,7 @@ import MyIcon from '@fastgpt/web/components/common/Icon';
import { getDocPath } from '@/web/common/system/doc';
import { datasetTypeCourseMap } from '@/web/core/dataset/constants';
import ApiDatasetForm from '../ApiDatasetForm';
import { getWebDefaultModel } from '@/web/common/system/utils';
import { getWebDefaultEmbeddingModel, getWebDefaultLLMModel } from '@/web/common/system/utils';
export type CreateDatasetType =
| DatasetTypeEnum.dataset
@@ -40,7 +40,6 @@ const CreateModal = ({
type: CreateDatasetType;
}) => {
const { t } = useTranslation();
const { toast } = useToast();
const router = useRouter();
const { defaultModels, embeddingModelList, datasetModelList } = useSystemStore();
const { isPc } = useSystem();
@@ -79,8 +78,10 @@ const CreateModal = ({
avatar: datasetTypeMap[type].icon,
name: '',
intro: '',
vectorModel: defaultModels.embedding?.model,
agentModel: getWebDefaultModel(datasetModelList)?.model
vectorModel:
defaultModels.embedding?.model || getWebDefaultEmbeddingModel(embeddingModelList)?.model,
agentModel:
defaultModels.datasetTextLLM?.model || getWebDefaultLLMModel(datasetModelList)?.model
}
});
const { register, setValue, handleSubmit, watch } = form;

View File

@@ -15,6 +15,8 @@ export type updateDefaultBody = {
[ModelTypeEnum.tts]?: string;
[ModelTypeEnum.stt]?: string;
[ModelTypeEnum.rerank]?: string;
datasetTextLLM?: string;
datasetImageLLM?: string;
};
export type updateDefaultResponse = {};
@@ -25,10 +27,21 @@ async function handler(
): Promise<updateDefaultResponse> {
await authSystemAdmin({ req });
const { llm, embedding, tts, stt, rerank } = req.body;
const { llm, embedding, tts, stt, rerank, datasetTextLLM, datasetImageLLM } = req.body;
await mongoSessionRun(async (session) => {
await MongoSystemModel.updateMany({}, { $unset: { 'metadata.isDefault': 1 } }, { session });
// Remove all default flags
await MongoSystemModel.updateMany(
{},
{
$unset: {
'metadata.isDefault': 1,
'metadata.isDefaultDatasetTextModel': 1,
'metadata.isDefaultDatasetImageModel': 1
}
},
{ session }
);
if (llm) {
await MongoSystemModel.updateOne(
@@ -37,6 +50,20 @@ async function handler(
{ session }
);
}
if (datasetTextLLM) {
await MongoSystemModel.updateOne(
{ model: datasetTextLLM },
{ $set: { 'metadata.isDefaultDatasetTextModel': true } },
{ session }
);
}
if (datasetImageLLM) {
await MongoSystemModel.updateOne(
{ model: datasetImageLLM },
{ $set: { 'metadata.isDefaultDatasetImageModel': true } },
{ session }
);
}
if (embedding) {
await MongoSystemModel.updateOne(
{ model: embedding },

View File

@@ -11,6 +11,7 @@ import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
import { DatasetCollectionItemType } from '@fastgpt/global/core/dataset/type';
import { CommonErrEnum } from '@fastgpt/global/common/error/code/common';
import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
import { getVectorCountByCollectionId } from '@fastgpt/service/common/vectorStore/controller';
async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType> {
const { id } = req.query as { id: string };
@@ -29,12 +30,16 @@ async function handler(req: NextApiRequest): Promise<DatasetCollectionItemType>
});
// get file
const file = collection?.fileId
? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
: undefined;
const [file, indexAmount] = await Promise.all([
collection?.fileId
? await getFileById({ bucketName: BucketNameEnum.dataset, fileId: collection.fileId })
: undefined,
getVectorCountByCollectionId(collection.teamId, collection.datasetId, collection._id)
]);
return {
...collection,
indexAmount: indexAmount ?? 0,
...getCollectionSourceData(collection),
tags: await collectionTagsToTagLabel({
datasetId: collection.datasetId,

View File

@@ -1,12 +1,10 @@
import type { NextApiRequest } from 'next';
import { DatasetTrainingCollectionName } from '@fastgpt/service/core/dataset/training/schema';
import { Types } from '@fastgpt/service/common/mongo';
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
import type { GetDatasetCollectionsProps } from '@/global/core/api/datasetReq';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constants';
import { authDataset } from '@fastgpt/service/support/permission/dataset/auth';
import { DatasetDataCollectionName } from '@fastgpt/service/core/dataset/data/schema';
import { startTrainingQueue } from '@/service/core/dataset/training/utils';
import { NextAPI } from '@/service/middleware/entry';
import { ReadPermissionVal } from '@fastgpt/global/support/permission/constant';
@@ -14,6 +12,8 @@ import { readFromSecondary } from '@fastgpt/service/common/mongo/utils';
import { collectionTagsToTagLabel } from '@fastgpt/service/core/dataset/collection/utils';
import { PaginationResponse } from '@fastgpt/web/common/fetch/type';
import { parsePaginationRequest } from '@fastgpt/service/common/api/pagination';
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
async function handler(
req: NextApiRequest
@@ -77,6 +77,8 @@ async function handler(
.sort({
updateTime: -1
})
.skip(offset)
.limit(pageSize)
.lean();
return {
@@ -88,6 +90,7 @@ async function handler(
tags: item.tags
}),
dataAmount: 0,
indexAmount: 0,
trainingAmount: 0,
permission
}))
@@ -96,75 +99,62 @@ async function handler(
};
}
const [collections, total]: [DatasetCollectionsListItemType[], number] = await Promise.all([
MongoDatasetCollection.aggregate([
{
$match: match
},
{
$sort: { updateTime: -1 }
},
{
$skip: offset
},
{
$limit: pageSize
},
// count training data
{
$lookup: {
from: DatasetTrainingCollectionName,
let: { id: '$_id', team_id: match.teamId, dataset_id: match.datasetId },
pipeline: [
{
$match: {
$expr: {
$and: [{ $eq: ['$teamId', '$$team_id'] }, { $eq: ['$collectionId', '$$id'] }]
}
}
},
{ $count: 'count' }
],
as: 'trainingCount'
}
},
// count collection total data
{
$lookup: {
from: DatasetDataCollectionName,
let: { id: '$_id', team_id: match.teamId, dataset_id: match.datasetId },
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ['$teamId', '$$team_id'] },
{ $eq: ['$datasetId', '$$dataset_id'] },
{ $eq: ['$collectionId', '$$id'] }
]
}
}
},
{ $count: 'count' }
],
as: 'dataCount'
}
},
{
$project: {
...selectField,
dataAmount: {
$ifNull: [{ $arrayElemAt: ['$dataCount.count', 0] }, 0]
},
trainingAmount: {
$ifNull: [{ $arrayElemAt: ['$trainingCount.count', 0] }, 0]
const [collections, total]: [DatasetCollectionSchemaType[], number] = await Promise.all([
MongoDatasetCollection.find(match, undefined, { ...readFromSecondary })
.select(selectField)
.sort({ updateTime: -1 })
.skip(offset)
.limit(pageSize)
.lean(),
MongoDatasetCollection.countDocuments(match, { ...readFromSecondary })
]);
const collectionIds = collections.map((item) => item._id);
// Compute data amount
const [trainingAmount, dataAmount]: [
{ _id: string; count: number }[],
{ _id: string; count: number }[]
] = await Promise.all([
MongoDatasetCollection.aggregate(
[
{
$match: {
teamId: match.teamId,
datasetId: match.datasetId,
collectionId: { $in: collectionIds }
}
},
{
$group: {
_id: '$collectionId',
count: { $sum: 1 }
}
}
],
{
...readFromSecondary
}
]),
MongoDatasetCollection.countDocuments(match, {
...readFromSecondary
})
),
MongoDatasetData.aggregate(
[
{
$match: {
teamId: match.teamId,
datasetId: match.datasetId,
collectionId: { $in: collectionIds }
}
},
{
$group: {
_id: '$collectionId',
count: { $sum: 1 }
}
}
],
{
...readFromSecondary
}
)
]);
const list = await Promise.all(
@@ -174,11 +164,14 @@ async function handler(
datasetId,
tags: item.tags
}),
trainingAmount:
trainingAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
dataAmount: dataAmount.find((amount) => String(amount._id) === String(item._id))?.count || 0,
permission
}))
);
if (list.find((item) => item.trainingAmount > 0)) {
if (list.some((item) => item.trainingAmount > 0)) {
startTrainingQueue();
}

View File

@@ -94,6 +94,7 @@ async function handler(
...item,
dataAmount: 0,
trainingAmount: 0,
indexAmount: 0,
permission
}))
),

View File

@@ -1,4 +1,4 @@
import { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
import { EmbeddingModelItemType, LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
import { useSystemStore } from './useSystemStore';
import { getWebReqUrl } from '@fastgpt/web/common/system/utils';
@@ -49,7 +49,7 @@ export const getWebLLMModel = (model?: string) => {
return list.find((item) => item.model === model || item.name === model) ?? defaultModels.llm!;
};
export const getWebDefaultModel = (llmList: LLMModelItemType[] = []) => {
export const getWebDefaultLLMModel = (llmList: LLMModelItemType[] = []) => {
const list = llmList.length > 0 ? llmList : useSystemStore.getState().llmModelList;
const defaultModels = useSystemStore.getState().defaultModels;
@@ -57,3 +57,13 @@ export const getWebDefaultModel = (llmList: LLMModelItemType[] = []) => {
? defaultModels.llm
: list[0];
};
export const getWebDefaultEmbeddingModel = (embeddingList: EmbeddingModelItemType[] = []) => {
const list =
embeddingList.length > 0 ? embeddingList : useSystemStore.getState().embeddingModelList;
const defaultModels = useSystemStore.getState().defaultModels;
return defaultModels.embedding &&
list.find((item) => item.model === defaultModels.embedding?.model)
? defaultModels.embedding
: list[0];
};

View File

@@ -56,7 +56,6 @@ import type {
import type { UpdateDatasetDataProps } from '@fastgpt/global/core/dataset/controller';
import type { DatasetFolderCreateBody } from '@/pages/api/core/dataset/folder/create';
import type { PaginationProps, PaginationResponse } from '@fastgpt/web/common/fetch/type';
import type { GetScrollCollectionsProps } from '@/pages/api/core/dataset/collection/scrollList';
import type {
GetApiDatasetFileListProps,
GetApiDatasetFileListResponse
@@ -173,11 +172,6 @@ export const getTagUsage = (datasetId: string) =>
GET<TagUsageType[]>(`/proApi/core/dataset/tag/tagUsage?datasetId=${datasetId}`);
export const getAllTags = (datasetId: string) =>
GET<{ list: DatasetTagType[] }>(`/proApi/core/dataset/tag/getAllTags?datasetId=${datasetId}`);
export const getScrollCollectionList = (data: GetScrollCollectionsProps) =>
POST<PaginationResponse<DatasetCollectionsListItemType>>(
`/core/dataset/collection/scrollList`,
data
);
/* =============================== data ==================================== */
/* get dataset list */

View File

@@ -59,7 +59,8 @@ export const defaultCollectionDetail: DatasetCollectionItemType = {
createTime: new Date(),
trainingType: TrainingModeEnum.chunk,
chunkSize: 0,
permission: new DatasetPermission()
permission: new DatasetPermission(),
indexAmount: 0
};
export enum ImportProcessWayEnum {