perf: yuque dataset (#5040)

* perf: yuque dataset

* doc
This commit is contained in:
Archer
2025-06-16 18:01:59 +08:00
committed by GitHub
parent 450d0a54fe
commit 7981b61ca9
9 changed files with 139 additions and 53 deletions

View File

@@ -14,6 +14,8 @@ weight: 788
3. 问题分类和内容提取,提示词中自动加入上一轮结果进行额外引导。 3. 问题分类和内容提取,提示词中自动加入上一轮结果进行额外引导。
4. 判断器支持变量引用。 4. 判断器支持变量引用。
5. 商业版支持知识库分块时LLM 进行自动分段识别。 5. 商业版支持知识库分块时LLM 进行自动分段识别。
6. Admin 管理员数据看板。
7. 豆包 1.6 系列模型,更新 qwen 模型配置。
## ⚙️ 优化 ## ⚙️ 优化
@@ -25,6 +27,7 @@ weight: 788
6. MCP 工具调用,使用 Raw schema 进行工具调用,保障完整性。 6. MCP 工具调用,使用 Raw schema 进行工具调用,保障完整性。
7. 删除知识库文件时,如果文件不存在,不会阻断删除。 7. 删除知识库文件时,如果文件不存在,不会阻断删除。
8. 升级 MCP SDK兼容最新的 HTTPStreamable。 8. 升级 MCP SDK兼容最新的 HTTPStreamable。
9. 语雀文档库,递归获取文档类型目录下的数据。
## 🐛 修复 ## 🐛 修复

View File

@@ -1,6 +1,72 @@
{ {
"provider": "Doubao", "provider": "Doubao",
"list": [ "list": [
{
"model": "Doubao-Seed-1.6",
"name": "Doubao-Seed-1.6",
"maxContext": 220000,
"maxResponse": 16000,
"quoteMaxToken": 220000,
"maxTemperature": 1,
"showTopP": true,
"showStopSign": true,
"vision": true,
"toolChoice": true,
"functionCall": false,
"defaultSystemChatPrompt": "",
"datasetProcess": true,
"usedInClassify": true,
"usedInExtractFields": true,
"usedInQueryExtension": true,
"usedInToolCall": true,
"defaultConfig": {},
"fieldMap": {},
"type": "llm"
},
{
"model": "Doubao-Seed-1.6-thinking",
"name": "Doubao-Seed-1.6-thinking",
"maxContext": 220000,
"maxResponse": 16000,
"quoteMaxToken": 220000,
"maxTemperature": 1,
"showTopP": true,
"showStopSign": true,
"vision": true,
"toolChoice": true,
"functionCall": false,
"defaultSystemChatPrompt": "",
"datasetProcess": true,
"usedInClassify": true,
"usedInExtractFields": true,
"usedInQueryExtension": true,
"usedInToolCall": true,
"defaultConfig": {},
"fieldMap": {},
"type": "llm"
},
{
"model": "Doubao-Seed-1.6-flash",
"name": "Doubao-Seed-1.6-flash",
"maxContext": 220000,
"maxResponse": 16000,
"quoteMaxToken": 220000,
"maxTemperature": 1,
"showTopP": true,
"showStopSign": true,
"vision": true,
"toolChoice": true,
"functionCall": false,
"defaultSystemChatPrompt": "",
"datasetProcess": true,
"usedInClassify": true,
"usedInExtractFields": true,
"usedInQueryExtension": true,
"usedInToolCall": true,
"defaultConfig": {},
"fieldMap": {},
"type": "llm"
},
{ {
"model": "Doubao-1.5-lite-32k", "model": "Doubao-1.5-lite-32k",
"name": "Doubao-1.5-lite-32k", "name": "Doubao-1.5-lite-32k",

View File

@@ -4,9 +4,9 @@
{ {
"model": "qwen-max", "model": "qwen-max",
"name": "Qwen-max", "name": "Qwen-max",
"maxContext": 32000, "maxContext": 128000,
"maxResponse": 4000, "maxResponse": 8000,
"quoteMaxToken": 6000, "quoteMaxToken": 120000,
"maxTemperature": 1, "maxTemperature": 1,
"vision": false, "vision": false,
"toolChoice": true, "toolChoice": true,
@@ -27,10 +27,10 @@
{ {
"model": "qwen-vl-max", "model": "qwen-vl-max",
"name": "qwen-vl-max", "name": "qwen-vl-max",
"maxContext": 32000, "maxContext": 128000,
"maxResponse": 2000, "maxResponse": 8000,
"quoteMaxToken": 20000, "quoteMaxToken": 120000,
"maxTemperature": 1.2, "maxTemperature": 1,
"vision": true, "vision": true,
"toolChoice": false, "toolChoice": false,
"functionCall": false, "functionCall": false,
@@ -49,9 +49,9 @@
{ {
"model": "qwen-plus", "model": "qwen-plus",
"name": "Qwen-plus", "name": "Qwen-plus",
"maxContext": 64000, "maxContext": 128000,
"maxResponse": 8000, "maxResponse": 8000,
"quoteMaxToken": 60000, "quoteMaxToken": 120000,
"maxTemperature": 1, "maxTemperature": 1,
"vision": false, "vision": false,
"toolChoice": true, "toolChoice": true,
@@ -72,10 +72,10 @@
{ {
"model": "qwen-vl-plus", "model": "qwen-vl-plus",
"name": "qwen-vl-plus", "name": "qwen-vl-plus",
"maxContext": 32000, "maxContext": 128000,
"maxResponse": 2000, "maxResponse": 8000,
"quoteMaxToken": 20000, "quoteMaxToken": 120000,
"maxTemperature": 1.2, "maxTemperature": 1,
"vision": true, "vision": true,
"toolChoice": false, "toolChoice": false,
"functionCall": false, "functionCall": false,
@@ -92,9 +92,9 @@
{ {
"model": "qwen-turbo", "model": "qwen-turbo",
"name": "Qwen-turbo", "name": "Qwen-turbo",
"maxContext": 128000, "maxContext": 1000000,
"maxResponse": 8000, "maxResponse": 8000,
"quoteMaxToken": 100000, "quoteMaxToken": 1000000,
"maxTemperature": 1, "maxTemperature": 1,
"vision": false, "vision": false,
"toolChoice": true, "toolChoice": true,
@@ -487,9 +487,9 @@
{ {
"model": "qwen-long", "model": "qwen-long",
"name": "qwen-long", "name": "qwen-long",
"maxContext": 100000, "maxContext": 10000000,
"maxResponse": 6000, "maxResponse": 6000,
"quoteMaxToken": 10000, "quoteMaxToken": 10000000,
"maxTemperature": 1, "maxTemperature": 1,
"vision": false, "vision": false,
"toolChoice": false, "toolChoice": false,

View File

@@ -106,7 +106,7 @@ export const useApiDatasetRequest = ({ apiServer }: { apiServer: APIFileServer }
const formattedFiles = files.map((file) => ({ const formattedFiles = files.map((file) => ({
...file, ...file,
hasChild: file.type === 'folder' hasChild: file.hasChild ?? file.type === 'folder'
})); }));
return formattedFiles; return formattedFiles;

View File

@@ -198,6 +198,7 @@ export const useYuqueDatasetRequest = ({ yuqueServer }: { yuqueServer: YuqueServ
}: { }: {
apiFileId: string; apiFileId: string;
}): Promise<ApiFileReadContentResponse> => { }): Promise<ApiFileReadContentResponse> => {
if (typeof apiFileId !== 'string') return Promise.reject('Invalid file id');
const [parentId, fileId] = apiFileId.split(/-(.*?)-(.*)/); const [parentId, fileId] = apiFileId.split(/-(.*?)-(.*)/);
const data = await request<{ title: string; body: string }>( const data = await request<{ title: string; body: string }>(

View File

@@ -167,7 +167,7 @@ export const readApiServerFileContent = async ({
}; };
export const rawText2Chunks = async ({ export const rawText2Chunks = async ({
rawText, rawText = '',
chunkTriggerType = ChunkTriggerConfigTypeEnum.minSize, chunkTriggerType = ChunkTriggerConfigTypeEnum.minSize,
chunkTriggerMinSize = 1000, chunkTriggerMinSize = 1000,
backupParse, backupParse,

View File

@@ -140,7 +140,7 @@ const DataTableComponent = ({
model: item.model, model: item.model,
totalCalls: item.totalCalls, totalCalls: item.totalCalls,
errorCalls: item.errorCalls, errorCalls: item.errorCalls,
totalCost: item.totalCost, totalCost: Math.floor(item.totalCost),
avgResponseTime: successCalls > 0 ? item.totalResponseTime / successCalls / 1000 : 0, avgResponseTime: successCalls > 0 ? item.totalResponseTime / successCalls / 1000 : 0,
avgTtfb: successCalls > 0 ? item.totalTtfb / successCalls / 1000 : 0 avgTtfb: successCalls > 0 ? item.totalTtfb / successCalls / 1000 : 0
}); });
@@ -201,7 +201,7 @@ const DataTableComponent = ({
model: modelName, model: modelName,
totalCalls: item.totalCalls, totalCalls: item.totalCalls,
errorCalls: item.errorCalls, errorCalls: item.errorCalls,
totalCost: item.totalCost, totalCost: Math.floor(item.totalCost),
avgResponseTime: successCalls > 0 ? item.totalResponseTime / successCalls / 1000 : 0, avgResponseTime: successCalls > 0 ? item.totalResponseTime / successCalls / 1000 : 0,
avgTtfb: successCalls > 0 ? item.totalTtfb / successCalls / 1000 : 0 avgTtfb: successCalls > 0 ? item.totalTtfb / successCalls / 1000 : 0
}); });

View File

@@ -349,7 +349,7 @@ const ModelDashboard = ({ Tab }: { Tab: React.ReactNode }) => {
inputTokens, inputTokens,
outputTokens, outputTokens,
totalTokens, totalTokens,
totalCost, totalCost: Math.floor(totalCost),
avgResponseTime: Math.round(avgResponseTime * 100) / 100, avgResponseTime: Math.round(avgResponseTime * 100) / 100,
avgTtfb: Math.round(avgTtfb * 100) / 100, avgTtfb: Math.round(avgTtfb * 100) / 100,
maxRpm, maxRpm,

View File

@@ -70,8 +70,10 @@ const CustomAPIFileInput = () => {
} }
); );
const { data: existIdList = [] } = useRequest2( const { data: existIdList = new Set() } = useRequest2(
() => getApiDatasetFileListExistId({ datasetId: datasetDetail._id }), async () => {
return new Set<string>(await getApiDatasetFileListExistId({ datasetId: datasetDetail._id }));
},
{ {
manual: false manual: false
} }
@@ -89,7 +91,12 @@ const CustomAPIFileInput = () => {
const allFiles: APIFileItem[] = []; const allFiles: APIFileItem[] = [];
for (const file of files) { for (const file of files) {
if (file.type === 'folder') { if (sources.some((item) => item.apiFileId === file.id)) {
allFiles.push(file);
continue;
}
if (file.hasChild) {
const folderFiles = await getApiDatasetFileList({ const folderFiles = await getApiDatasetFileList({
datasetId: datasetDetail._id, datasetId: datasetDetail._id,
parentId: file?.id parentId: file?.id
@@ -97,27 +104,28 @@ const CustomAPIFileInput = () => {
const subFiles = await getFilesRecursively(folderFiles); const subFiles = await getFilesRecursively(folderFiles);
allFiles.push(...subFiles); allFiles.push(...subFiles);
} else {
allFiles.push(file);
} }
allFiles.push(file);
} }
return allFiles; return allFiles;
}; };
const allFiles = await getFilesRecursively(selectFiles); const allFiles = await getFilesRecursively(selectFiles);
const uniqueFiles = allFiles.filter(
(item, index, array) =>
!existIdList.has(item.id) && array.findIndex((file) => file.id === item.id) === index
);
setSources( setSources(
allFiles uniqueFiles.map((item) => ({
.filter((item) => !existIdList.includes(item.id)) id: item.id,
.map((item) => ({ apiFileId: item.id,
id: item.id, apiFile: item,
apiFileId: item.id, createStatus: 'waiting',
apiFile: item, sourceName: item.name,
createStatus: 'waiting', icon: getSourceNameIcon({ sourceName: item.name }) as any
sourceName: item.name, }))
icon: getSourceNameIcon({ sourceName: item.name }) as any
}))
); );
}, },
{ {
@@ -147,15 +155,24 @@ const CustomAPIFileInput = () => {
[selectFiles] [selectFiles]
); );
const handleSelectAll = useCallback(() => { const isAllSelected = useMemo(() => {
const isAllSelected = fileList.length === selectFiles.length; return fileList.every(
(item) => existIdList.has(item.id) || selectFiles.some((file) => file.id === item.id)
);
}, [fileList, selectFiles, existIdList]);
const handleSelectAll = useCallback(() => {
if (isAllSelected) { if (isAllSelected) {
setSelectFiles([]); setSelectFiles((state) =>
state.filter((file) => !fileList.find((item) => item.id === file.id))
);
} else { } else {
setSelectFiles(fileList); setSelectFiles((state) => [
...state.filter((file) => !fileList.find((item) => item.id === file.id)),
...fileList.filter((item) => !existIdList.has(item.id))
]);
} }
}, [fileList, selectFiles]); }, [isAllSelected, fileList, existIdList]);
return ( return (
<MyBox isLoading={loading} position="relative" h="full"> <MyBox isLoading={loading} position="relative" h="full">
@@ -193,23 +210,22 @@ const CustomAPIFileInput = () => {
fontSize={'sm'} fontSize={'sm'}
fontWeight={'medium'} fontWeight={'medium'}
color={'myGray.900'} color={'myGray.900'}
onClick={(e) => { // onClick={(e) => {
if (!(e.target as HTMLElement).closest('.checkbox')) { // if (!(e.target as HTMLElement).closest('.checkbox')) {
handleSelectAll(); // handleSelectAll();
} // }
}} // }}
> >
<Checkbox <Checkbox
className="checkbox" className="checkbox"
mr={2} mr={2}
isChecked={fileList.length === selectFiles.length} isChecked={isAllSelected}
onChange={handleSelectAll} onChange={handleSelectAll}
/> />
{t('common:Select_all')} {t('common:Select_all')}
</Flex> </Flex>
{fileList.map((item) => { {fileList.map((item) => {
const isFolder = item.type === 'folder'; const isExists = existIdList.has(item.id);
const isExists = existIdList.includes(item.id);
const isChecked = isExists || selectFiles.some((file) => file.id === item.id); const isChecked = isExists || selectFiles.some((file) => file.id === item.id);
return ( return (
@@ -243,9 +259,9 @@ const CustomAPIFileInput = () => {
/> />
<MyIcon <MyIcon
name={ name={
!isFolder item.type === 'folder'
? (getSourceNameIcon({ sourceName: item.name }) as any) ? 'common/folderFill'
: 'common/folderFill' : (getSourceNameIcon({ sourceName: item.name }) as any)
} }
w={'18px'} w={'18px'}
mr={1.5} mr={1.5}