mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 04:06:18 +00:00
Add image index and pdf parse (#3956)
* feat: think tag parse * feat: parse think tag test * feat: pdf parse ux * feat: doc2x parse * perf: rewrite training mode setting * feat: image parse queue * perf: image index * feat: image parse process * feat: add init sh * fix: ts
This commit is contained in:
@@ -1,31 +0,0 @@
|
||||
export const retryRun = <T>(fn: () => T, retry = 2): T => {
|
||||
try {
|
||||
return fn();
|
||||
} catch (error) {
|
||||
if (retry > 0) {
|
||||
return retryRun(fn, retry - 1);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
export const batchRun = async <T>(arr: T[], fn: (arr: T) => any, batchSize = 10) => {
|
||||
const batchArr = new Array(batchSize).fill(null);
|
||||
const result: any[] = [];
|
||||
|
||||
const batchFn = async () => {
|
||||
const data = arr.shift();
|
||||
if (data) {
|
||||
result.push(await fn(data));
|
||||
return batchFn();
|
||||
}
|
||||
};
|
||||
|
||||
await Promise.all(
|
||||
batchArr.map(async () => {
|
||||
await batchFn();
|
||||
})
|
||||
);
|
||||
|
||||
return result;
|
||||
};
|
@@ -1,4 +1,4 @@
|
||||
import { batchRun } from '../fn/utils';
|
||||
import { batchRun } from '../system/utils';
|
||||
import { getNanoid, simpleText } from './tools';
|
||||
import type { ImageType } from '../../../service/worker/readFile/type';
|
||||
|
||||
@@ -37,6 +37,80 @@ export const simpleMarkdownText = (rawText: string) => {
|
||||
return rawText.trim();
|
||||
};
|
||||
|
||||
export const htmlTable2Md = (content: string): string => {
|
||||
return content.replace(/<table>[\s\S]*?<\/table>/g, (htmlTable) => {
|
||||
try {
|
||||
// Clean up whitespace and newlines
|
||||
const cleanHtml = htmlTable.replace(/\n\s*/g, '');
|
||||
const rows = cleanHtml.match(/<tr>(.*?)<\/tr>/g);
|
||||
if (!rows) return htmlTable;
|
||||
|
||||
// Parse table data
|
||||
let tableData: string[][] = [];
|
||||
let maxColumns = 0;
|
||||
|
||||
// Try to convert to markdown table
|
||||
rows.forEach((row, rowIndex) => {
|
||||
if (!tableData[rowIndex]) {
|
||||
tableData[rowIndex] = [];
|
||||
}
|
||||
let colIndex = 0;
|
||||
const cells = row.match(/<td.*?>(.*?)<\/td>/g) || [];
|
||||
|
||||
cells.forEach((cell) => {
|
||||
while (tableData[rowIndex][colIndex]) {
|
||||
colIndex++;
|
||||
}
|
||||
const colspan = parseInt(cell.match(/colspan="(\d+)"/)?.[1] || '1');
|
||||
const rowspan = parseInt(cell.match(/rowspan="(\d+)"/)?.[1] || '1');
|
||||
const content = cell.replace(/<td.*?>|<\/td>/g, '').trim();
|
||||
|
||||
for (let i = 0; i < rowspan; i++) {
|
||||
for (let j = 0; j < colspan; j++) {
|
||||
if (!tableData[rowIndex + i]) {
|
||||
tableData[rowIndex + i] = [];
|
||||
}
|
||||
tableData[rowIndex + i][colIndex + j] = i === 0 && j === 0 ? content : '^^';
|
||||
}
|
||||
}
|
||||
colIndex += colspan;
|
||||
maxColumns = Math.max(maxColumns, colIndex);
|
||||
});
|
||||
|
||||
for (let i = 0; i < maxColumns; i++) {
|
||||
if (!tableData[rowIndex][i]) {
|
||||
tableData[rowIndex][i] = ' ';
|
||||
}
|
||||
}
|
||||
});
|
||||
const chunks: string[] = [];
|
||||
|
||||
const headerCells = tableData[0]
|
||||
.slice(0, maxColumns)
|
||||
.map((cell) => (cell === '^^' ? ' ' : cell || ' '));
|
||||
const headerRow = '| ' + headerCells.join(' | ') + ' |';
|
||||
chunks.push(headerRow);
|
||||
|
||||
const separator = '| ' + Array(headerCells.length).fill('---').join(' | ') + ' |';
|
||||
chunks.push(separator);
|
||||
|
||||
tableData.slice(1).forEach((row) => {
|
||||
const paddedRow = row
|
||||
.slice(0, maxColumns)
|
||||
.map((cell) => (cell === '^^' ? ' ' : cell || ' '));
|
||||
while (paddedRow.length < maxColumns) {
|
||||
paddedRow.push(' ');
|
||||
}
|
||||
chunks.push('| ' + paddedRow.join(' | ') + ' |');
|
||||
});
|
||||
|
||||
return chunks.join('\n');
|
||||
} catch (error) {
|
||||
return htmlTable;
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* format markdown
|
||||
* 1. upload base64
|
||||
|
30
packages/global/common/system/types/index.d.ts
vendored
30
packages/global/common/system/types/index.d.ts
vendored
@@ -43,10 +43,14 @@ export type FastGPTConfigFileType = {
|
||||
export type FastGPTFeConfigsType = {
|
||||
show_workorder?: boolean;
|
||||
show_emptyChat?: boolean;
|
||||
isPlus?: boolean;
|
||||
register_method?: ['email' | 'phone' | 'sync'];
|
||||
login_method?: ['email' | 'phone']; // Attention: login method is diffrent with oauth
|
||||
find_password_method?: ['email' | 'phone'];
|
||||
bind_notification_method?: ['email' | 'phone'];
|
||||
googleClientVerKey?: string;
|
||||
|
||||
show_emptyChat?: boolean;
|
||||
show_appStore?: boolean;
|
||||
show_git?: boolean;
|
||||
show_pay?: boolean;
|
||||
@@ -57,15 +61,19 @@ export type FastGPTFeConfigsType = {
|
||||
show_aiproxy?: boolean;
|
||||
concatMd?: string;
|
||||
|
||||
concatMd?: string;
|
||||
docUrl?: string;
|
||||
openAPIDocUrl?: string;
|
||||
systemPluginCourseUrl?: string;
|
||||
appTemplateCourse?: string;
|
||||
customApiDomain?: string;
|
||||
customSharePageDomain?: string;
|
||||
|
||||
systemTitle?: string;
|
||||
systemDescription?: string;
|
||||
googleClientVerKey?: string;
|
||||
isPlus?: boolean;
|
||||
scripts?: { [key: string]: string }[];
|
||||
favicon?: string;
|
||||
|
||||
sso?: {
|
||||
icon?: string;
|
||||
title?: string;
|
||||
@@ -91,13 +99,14 @@ export type FastGPTFeConfigsType = {
|
||||
exportDatasetLimitMinutes?: number;
|
||||
websiteSyncLimitMinuted?: number;
|
||||
};
|
||||
scripts?: { [key: string]: string }[];
|
||||
favicon?: string;
|
||||
customApiDomain?: string;
|
||||
customSharePageDomain?: string;
|
||||
|
||||
uploadFileMaxAmount?: number;
|
||||
uploadFileMaxSize?: number;
|
||||
|
||||
// Compute by systemEnv.customPdfParse
|
||||
showCustomPdfParse?: boolean;
|
||||
customPdfParsePrice?: number;
|
||||
|
||||
lafEnv?: string;
|
||||
navbarItems?: NavbarItemType[];
|
||||
externalProviderWorkflowVariables?: ExternalProviderWorkflowVarType[];
|
||||
@@ -107,9 +116,18 @@ export type SystemEnvType = {
|
||||
openapiPrefix?: string;
|
||||
vectorMaxProcess: number;
|
||||
qaMaxProcess: number;
|
||||
vlmMaxProcess: number;
|
||||
pgHNSWEfSearch: number;
|
||||
tokenWorkers: number; // token count max worker
|
||||
|
||||
oneapiUrl?: string;
|
||||
chatApiKey?: string;
|
||||
|
||||
customPdfParse?: {
|
||||
url?: string;
|
||||
key?: string;
|
||||
|
||||
doc2xKey?: string;
|
||||
price?: number; // n points/1 page
|
||||
};
|
||||
};
|
||||
|
@@ -16,3 +16,24 @@ export const retryFn = async <T>(fn: () => Promise<T>, retryTimes = 3): Promise<
|
||||
return Promise.reject(error);
|
||||
}
|
||||
};
|
||||
|
||||
export const batchRun = async <T>(arr: T[], fn: (arr: T) => any, batchSize = 10) => {
|
||||
const batchArr = new Array(batchSize).fill(null);
|
||||
const result: any[] = [];
|
||||
|
||||
const batchFn = async () => {
|
||||
const data = arr.shift();
|
||||
if (data) {
|
||||
result.push(await fn(data));
|
||||
return batchFn();
|
||||
}
|
||||
};
|
||||
|
||||
await Promise.all(
|
||||
batchArr.map(async () => {
|
||||
await batchFn();
|
||||
})
|
||||
);
|
||||
|
||||
return result;
|
||||
};
|
||||
|
@@ -22,7 +22,7 @@ export const defaultQAModels: LLMModelItemType[] = [
|
||||
maxTemperature: 1.2,
|
||||
charsPointsPrice: 0,
|
||||
censor: false,
|
||||
vision: false,
|
||||
vision: true,
|
||||
datasetProcess: true,
|
||||
toolChoice: true,
|
||||
functionCall: false,
|
||||
@@ -59,10 +59,17 @@ export const defaultSTTModels: STTModelType[] = [
|
||||
export const getModelFromList = (
|
||||
modelList: { provider: ModelProviderIdType; name: string; model: string }[],
|
||||
model: string
|
||||
) => {
|
||||
):
|
||||
| {
|
||||
avatar: string;
|
||||
provider: ModelProviderIdType;
|
||||
name: string;
|
||||
model: string;
|
||||
}
|
||||
| undefined => {
|
||||
const modelData = modelList.find((item) => item.model === model) ?? modelList[0];
|
||||
if (!modelData) {
|
||||
throw new Error('No Key model is configured');
|
||||
return;
|
||||
}
|
||||
const provider = getModelProvider(modelData.provider);
|
||||
return {
|
||||
|
19
packages/global/core/dataset/api.d.ts
vendored
19
packages/global/core/dataset/api.d.ts
vendored
@@ -1,5 +1,5 @@
|
||||
import { DatasetDataIndexItemType, DatasetSchemaType } from './type';
|
||||
import { TrainingModeEnum, DatasetCollectionTypeEnum } from './constants';
|
||||
import { DatasetCollectionTypeEnum, DatasetCollectionDataProcessModeEnum } from './constants';
|
||||
import type { LLMModelItemType } from '../ai/model.d';
|
||||
import { ParentIdType } from 'common/parentFolder/type';
|
||||
|
||||
@@ -10,9 +10,11 @@ export type DatasetUpdateBody = {
|
||||
name?: string;
|
||||
avatar?: string;
|
||||
intro?: string;
|
||||
agentModel?: LLMModelItemType;
|
||||
status?: DatasetSchemaType['status'];
|
||||
|
||||
agentModel?: string;
|
||||
vlmModel?: string;
|
||||
|
||||
websiteConfig?: DatasetSchemaType['websiteConfig'];
|
||||
externalReadUrl?: DatasetSchemaType['externalReadUrl'];
|
||||
defaultPermission?: DatasetSchemaType['defaultPermission'];
|
||||
@@ -27,7 +29,10 @@ export type DatasetUpdateBody = {
|
||||
/* ================= collection ===================== */
|
||||
export type DatasetCollectionChunkMetadataType = {
|
||||
parentId?: string;
|
||||
trainingType?: TrainingModeEnum;
|
||||
customPdfParse?: boolean;
|
||||
trainingType?: DatasetCollectionDataProcessModeEnum;
|
||||
imageIndex?: boolean;
|
||||
autoIndexes?: boolean;
|
||||
chunkSize?: number;
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
@@ -131,9 +136,15 @@ export type PostWebsiteSyncParams = {
|
||||
export type PushDatasetDataProps = {
|
||||
collectionId: string;
|
||||
data: PushDatasetDataChunkProps[];
|
||||
trainingMode: TrainingModeEnum;
|
||||
trainingType?: DatasetCollectionDataProcessModeEnum;
|
||||
autoIndexes?: boolean;
|
||||
imageIndex?: boolean;
|
||||
prompt?: string;
|
||||
|
||||
billId?: string;
|
||||
|
||||
// Abandon
|
||||
trainingMode?: DatasetCollectionDataProcessModeEnum;
|
||||
};
|
||||
export type PushDatasetDataResponse = {
|
||||
insertLen: number;
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { DatasetCollectionTypeEnum, TrainingModeEnum, TrainingTypeMap } from '../constants';
|
||||
import { DatasetCollectionTypeEnum } from '../constants';
|
||||
import { DatasetCollectionSchemaType } from '../type';
|
||||
|
||||
export const getCollectionSourceData = (collection?: DatasetCollectionSchemaType) => {
|
||||
@@ -16,9 +16,3 @@ export const getCollectionSourceData = (collection?: DatasetCollectionSchemaType
|
||||
export const checkCollectionIsFolder = (type: DatasetCollectionTypeEnum) => {
|
||||
return type === DatasetCollectionTypeEnum.folder || type === DatasetCollectionTypeEnum.virtual;
|
||||
};
|
||||
|
||||
export const getTrainingTypeLabel = (type?: TrainingModeEnum) => {
|
||||
if (!type) return '';
|
||||
if (!TrainingTypeMap[type]) return '';
|
||||
return TrainingTypeMap[type].label;
|
||||
};
|
||||
|
@@ -109,6 +109,26 @@ export const DatasetCollectionSyncResultMap = {
|
||||
}
|
||||
};
|
||||
|
||||
export enum DatasetCollectionDataProcessModeEnum {
|
||||
chunk = 'chunk',
|
||||
qa = 'qa',
|
||||
auto = 'auto' // abandon
|
||||
}
|
||||
export const DatasetCollectionDataProcessModeMap = {
|
||||
[DatasetCollectionDataProcessModeEnum.chunk]: {
|
||||
label: i18nT('common:core.dataset.training.Chunk mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.Chunk Split Tip')
|
||||
},
|
||||
[DatasetCollectionDataProcessModeEnum.qa]: {
|
||||
label: i18nT('common:core.dataset.training.QA mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.QA Import Tip')
|
||||
},
|
||||
[DatasetCollectionDataProcessModeEnum.auto]: {
|
||||
label: i18nT('common:core.dataset.training.Auto mode'),
|
||||
tooltip: i18nT('common:core.dataset.training.Auto mode Tip')
|
||||
}
|
||||
};
|
||||
|
||||
/* ------------ data -------------- */
|
||||
|
||||
/* ------------ training -------------- */
|
||||
@@ -124,28 +144,11 @@ export enum ImportDataSourceEnum {
|
||||
|
||||
export enum TrainingModeEnum {
|
||||
chunk = 'chunk',
|
||||
qa = 'qa',
|
||||
auto = 'auto',
|
||||
qa = 'qa'
|
||||
image = 'image'
|
||||
}
|
||||
|
||||
export const TrainingTypeMap = {
|
||||
[TrainingModeEnum.chunk]: {
|
||||
label: i18nT('common:core.dataset.training.Chunk mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.Chunk Split Tip'),
|
||||
openSource: true
|
||||
},
|
||||
[TrainingModeEnum.auto]: {
|
||||
label: i18nT('common:core.dataset.training.Auto mode'),
|
||||
tooltip: i18nT('common:core.dataset.training.Auto mode Tip'),
|
||||
openSource: false
|
||||
},
|
||||
[TrainingModeEnum.qa]: {
|
||||
label: i18nT('common:core.dataset.training.QA mode'),
|
||||
tooltip: i18nT('common:core.dataset.import.QA Import Tip'),
|
||||
openSource: true
|
||||
}
|
||||
};
|
||||
|
||||
/* ------------ search -------------- */
|
||||
export enum DatasetSearchModeEnum {
|
||||
embedding = 'embedding',
|
||||
|
25
packages/global/core/dataset/controller.d.ts
vendored
25
packages/global/core/dataset/controller.d.ts
vendored
@@ -20,9 +20,22 @@ export type UpdateDatasetDataProps = {
|
||||
})[];
|
||||
};
|
||||
|
||||
export type PatchIndexesProps = {
|
||||
type: 'create' | 'update' | 'delete' | 'unChange';
|
||||
index: Omit<DatasetDataIndexItemType, 'dataId'> & {
|
||||
dataId?: string;
|
||||
};
|
||||
};
|
||||
export type PatchIndexesProps =
|
||||
| {
|
||||
type: 'create';
|
||||
index: Omit<DatasetDataIndexItemType, 'dataId'> & {
|
||||
dataId?: string;
|
||||
};
|
||||
}
|
||||
| {
|
||||
type: 'update';
|
||||
index: DatasetDataIndexItemType;
|
||||
}
|
||||
| {
|
||||
type: 'delete';
|
||||
index: DatasetDataIndexItemType;
|
||||
}
|
||||
| {
|
||||
type: 'unChange';
|
||||
index: DatasetDataIndexItemType;
|
||||
};
|
||||
|
42
packages/global/core/dataset/data/constants.ts
Normal file
42
packages/global/core/dataset/data/constants.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
import { i18nT } from '../../../../web/i18n/utils';
|
||||
|
||||
export enum DatasetDataIndexTypeEnum {
|
||||
default = 'default',
|
||||
custom = 'custom',
|
||||
summary = 'summary',
|
||||
question = 'question',
|
||||
image = 'image'
|
||||
}
|
||||
|
||||
export const DatasetDataIndexMap: Record<
|
||||
`${DatasetDataIndexTypeEnum}`,
|
||||
{
|
||||
label: any;
|
||||
color: string;
|
||||
}
|
||||
> = {
|
||||
[DatasetDataIndexTypeEnum.default]: {
|
||||
label: i18nT('dataset:data_index_default'),
|
||||
color: 'gray'
|
||||
},
|
||||
[DatasetDataIndexTypeEnum.custom]: {
|
||||
label: i18nT('dataset:data_index_custom'),
|
||||
color: 'blue'
|
||||
},
|
||||
[DatasetDataIndexTypeEnum.summary]: {
|
||||
label: i18nT('dataset:data_index_summary'),
|
||||
color: 'green'
|
||||
},
|
||||
[DatasetDataIndexTypeEnum.question]: {
|
||||
label: i18nT('dataset:data_index_question'),
|
||||
color: 'red'
|
||||
},
|
||||
[DatasetDataIndexTypeEnum.image]: {
|
||||
label: i18nT('dataset:data_index_image'),
|
||||
color: 'purple'
|
||||
}
|
||||
};
|
||||
export const defaultDatasetIndexData = DatasetDataIndexMap[DatasetDataIndexTypeEnum.custom];
|
||||
export const getDatasetIndexMapData = (type: `${DatasetDataIndexTypeEnum}`) => {
|
||||
return DatasetDataIndexMap[type] || defaultDatasetIndexData;
|
||||
};
|
20
packages/global/core/dataset/training/type.d.ts
vendored
Normal file
20
packages/global/core/dataset/training/type.d.ts
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
import { PushDatasetDataChunkProps } from '../api';
|
||||
import { TrainingModeEnum } from '../constants';
|
||||
|
||||
export type PushDataToTrainingQueueProps = {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
|
||||
mode?: TrainingModeEnum;
|
||||
data: PushDatasetDataChunkProps[];
|
||||
prompt?: string;
|
||||
|
||||
agentModel: string;
|
||||
vectorModel: string;
|
||||
vlmModel?: string;
|
||||
|
||||
billId?: string;
|
||||
session?: ClientSession;
|
||||
};
|
45
packages/global/core/dataset/type.d.ts
vendored
45
packages/global/core/dataset/type.d.ts
vendored
@@ -2,6 +2,7 @@ import type { LLMModelItemType, EmbeddingModelItemType } from '../../core/ai/mod
|
||||
import { PermissionTypeEnum } from '../../support/permission/constant';
|
||||
import { PushDatasetDataChunkProps } from './api';
|
||||
import {
|
||||
DatasetCollectionDataProcessModeEnum,
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetStatusEnum,
|
||||
DatasetTypeEnum,
|
||||
@@ -12,6 +13,7 @@ import { DatasetPermission } from '../../support/permission/dataset/controller';
|
||||
import { Permission } from '../../support/permission/controller';
|
||||
import { APIFileServer, FeishuServer, YuqueServer } from './apiDataset';
|
||||
import { SourceMemberType } from 'support/user/type';
|
||||
import { DatasetDataIndexTypeEnum } from './data/constants';
|
||||
|
||||
export type DatasetSchemaType = {
|
||||
_id: string;
|
||||
@@ -23,11 +25,14 @@ export type DatasetSchemaType = {
|
||||
|
||||
avatar: string;
|
||||
name: string;
|
||||
vectorModel: string;
|
||||
agentModel: string;
|
||||
intro: string;
|
||||
type: `${DatasetTypeEnum}`;
|
||||
status: `${DatasetStatusEnum}`;
|
||||
|
||||
vectorModel: string;
|
||||
agentModel: string;
|
||||
vlmModel?: string;
|
||||
|
||||
websiteConfig?: {
|
||||
url: string;
|
||||
selector: string;
|
||||
@@ -52,26 +57,22 @@ export type DatasetCollectionSchemaType = {
|
||||
parentId?: string;
|
||||
name: string;
|
||||
type: DatasetCollectionTypeEnum;
|
||||
createTime: Date;
|
||||
updateTime: Date;
|
||||
forbid?: boolean;
|
||||
|
||||
trainingType: TrainingModeEnum;
|
||||
chunkSize: number;
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
ocrParse?: boolean;
|
||||
|
||||
tags?: string[];
|
||||
|
||||
createTime: Date;
|
||||
updateTime: Date;
|
||||
|
||||
// Status
|
||||
forbid?: boolean;
|
||||
nextSyncTime?: Date;
|
||||
|
||||
// Collection metadata
|
||||
fileId?: string; // local file id
|
||||
rawLink?: string; // link url
|
||||
externalFileId?: string; //external file id
|
||||
apiFileId?: string; // api file id
|
||||
externalFileUrl?: string; // external import url
|
||||
|
||||
nextSyncTime?: Date;
|
||||
|
||||
rawTextLength?: number;
|
||||
hashRawText?: string;
|
||||
metadata?: {
|
||||
@@ -80,6 +81,16 @@ export type DatasetCollectionSchemaType = {
|
||||
|
||||
[key: string]: any;
|
||||
};
|
||||
|
||||
// Parse settings
|
||||
customPdfParse?: boolean;
|
||||
// Chunk settings
|
||||
autoIndexes?: boolean;
|
||||
imageIndex?: boolean;
|
||||
trainingType: DatasetCollectionDataProcessModeEnum;
|
||||
chunkSize: number;
|
||||
chunkSplitter?: string;
|
||||
qaPrompt?: string;
|
||||
};
|
||||
|
||||
export type DatasetCollectionTagsSchemaType = {
|
||||
@@ -90,7 +101,7 @@ export type DatasetCollectionTagsSchemaType = {
|
||||
};
|
||||
|
||||
export type DatasetDataIndexItemType = {
|
||||
defaultIndex: boolean;
|
||||
type: `${DatasetDataIndexTypeEnum}`;
|
||||
dataId: string; // pg data id
|
||||
text: string;
|
||||
};
|
||||
@@ -141,6 +152,7 @@ export type DatasetTrainingSchemaType = {
|
||||
chunkIndex: number;
|
||||
weight: number;
|
||||
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||
retryCount: number;
|
||||
};
|
||||
|
||||
export type CollectionWithDatasetType = DatasetCollectionSchemaType & {
|
||||
@@ -169,9 +181,10 @@ export type DatasetListItemType = {
|
||||
sourceMember?: SourceMemberType;
|
||||
};
|
||||
|
||||
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel' | 'agentModel'> & {
|
||||
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel' | 'agentModel' | 'vlmModel'> & {
|
||||
vectorModel: EmbeddingModelItemType;
|
||||
agentModel: LLMModelItemType;
|
||||
vlmModel?: LLMModelItemType;
|
||||
permission: DatasetPermission;
|
||||
};
|
||||
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import { TrainingModeEnum, DatasetCollectionTypeEnum } from './constants';
|
||||
import { getFileIcon } from '../../common/file/icon';
|
||||
import { strIsLink } from '../../common/string/tools';
|
||||
import { DatasetDataIndexTypeEnum } from './data/constants';
|
||||
|
||||
export function getCollectionIcon(
|
||||
type: DatasetCollectionTypeEnum = DatasetCollectionTypeEnum.file,
|
||||
@@ -38,14 +39,23 @@ export function getSourceNameIcon({
|
||||
}
|
||||
|
||||
/* get dataset data default index */
|
||||
export function getDefaultIndex(props?: { q?: string; a?: string; dataId?: string }) {
|
||||
const { q = '', a, dataId } = props || {};
|
||||
const qaStr = `${q}\n${a}`.trim();
|
||||
return {
|
||||
defaultIndex: true,
|
||||
text: a ? qaStr : q,
|
||||
dataId
|
||||
};
|
||||
export function getDefaultIndex(props?: { q?: string; a?: string }) {
|
||||
const { q = '', a } = props || {};
|
||||
|
||||
return [
|
||||
{
|
||||
text: q,
|
||||
type: DatasetDataIndexTypeEnum.default
|
||||
},
|
||||
...(a
|
||||
? [
|
||||
{
|
||||
text: a,
|
||||
type: DatasetDataIndexTypeEnum.default
|
||||
}
|
||||
]
|
||||
: [])
|
||||
];
|
||||
}
|
||||
|
||||
export const predictDataLimitLength = (mode: TrainingModeEnum, data: any[]) => {
|
||||
|
@@ -10,7 +10,8 @@ export enum UsageSourceEnum {
|
||||
wecom = 'wecom',
|
||||
feishu = 'feishu',
|
||||
dingtalk = 'dingtalk',
|
||||
official_account = 'official_account'
|
||||
official_account = 'official_account',
|
||||
pdfParse = 'pdfParse'
|
||||
}
|
||||
|
||||
export const UsageSourceMap = {
|
||||
@@ -43,5 +44,8 @@ export const UsageSourceMap = {
|
||||
},
|
||||
[UsageSourceEnum.dingtalk]: {
|
||||
label: i18nT('account_usage:dingtalk')
|
||||
},
|
||||
[UsageSourceEnum.pdfParse]: {
|
||||
label: i18nT('account_usage:pdf_parse')
|
||||
}
|
||||
};
|
||||
|
@@ -7,6 +7,7 @@ export type UsageListItemCountType = {
|
||||
outputTokens?: number;
|
||||
charsLength?: number;
|
||||
duration?: number;
|
||||
pages?: number;
|
||||
|
||||
// deprecated
|
||||
tokens?: number;
|
||||
|
Reference in New Issue
Block a user