Add image index and pdf parse (#3956)

* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
This commit is contained in:
Archer
2025-03-03 23:08:29 +08:00
committed by archer
parent 08b6f594df
commit adf5377ebe
106 changed files with 2337 additions and 1454 deletions

View File

@@ -1,6 +1,7 @@
import { NodeOutputKeyEnum } from '@fastgpt/global/core/workflow/constants';
import { DispatchNodeResponseKeyEnum } from '@fastgpt/global/core/workflow/runtime/constants';
import type {
ChatDispatchProps,
DispatchNodeResultType,
RuntimeNodeItemType
} from '@fastgpt/global/core/workflow/runtime/type';
@@ -46,7 +47,7 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
query,
requestOrigin,
chatConfig,
runningAppInfo: { teamId },
runningUserInfo,
externalProvider,
params: {
model,
@@ -99,10 +100,10 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
const globalFiles = chatValue2RuntimePrompt(query).files;
const { documentQuoteText, userFiles } = await getMultiInput({
runningUserInfo,
histories: chatHistories,
requestOrigin,
maxFiles: chatConfig?.fileSelectConfig?.maxFiles || 20,
teamId,
fileLinks,
inputFiles: globalFiles,
hasReadFilesTool
@@ -289,19 +290,19 @@ export const dispatchRunTools = async (props: DispatchToolModuleProps): Promise<
};
const getMultiInput = async ({
runningUserInfo,
histories,
fileLinks,
requestOrigin,
maxFiles,
teamId,
inputFiles,
hasReadFilesTool
}: {
runningUserInfo: ChatDispatchProps['runningUserInfo'];
histories: ChatItemType[];
fileLinks?: string[];
requestOrigin?: string;
maxFiles: number;
teamId: string;
inputFiles: UserChatItemValueItemType['file'][];
hasReadFilesTool: boolean;
}) => {
@@ -329,7 +330,8 @@ const getMultiInput = async ({
urls,
requestOrigin,
maxFiles,
teamId
teamId: runningUserInfo.teamId,
tmbId: runningUserInfo.tmbId
});
return {

View File

@@ -11,7 +11,10 @@ import { formatModelChars2Points } from '../../../../support/wallet/usage/utils'
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
import { postTextCensor } from '../../../../common/api/requestPlusApi';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants';
import type { DispatchNodeResultType } from '@fastgpt/global/core/workflow/runtime/type';
import type {
ChatDispatchProps,
DispatchNodeResultType
} from '@fastgpt/global/core/workflow/runtime/type';
import { countGptMessagesTokens } from '../../../../common/string/tiktoken/index';
import {
chats2GPTMessages,
@@ -69,7 +72,7 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
histories,
node: { name },
query,
runningAppInfo: { teamId },
runningUserInfo,
workflowStreamResponse,
chatConfig,
params: {
@@ -121,7 +124,7 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
stringQuoteText,
requestOrigin,
maxFiles: chatConfig?.fileSelectConfig?.maxFiles || 20,
teamId
runningUserInfo
})
]);
@@ -355,7 +358,7 @@ async function getMultiInput({
stringQuoteText,
requestOrigin,
maxFiles,
teamId
runningUserInfo
}: {
histories: ChatItemType[];
inputFiles: UserChatItemValueItemType['file'][];
@@ -363,7 +366,7 @@ async function getMultiInput({
stringQuoteText?: string; // file quote
requestOrigin?: string;
maxFiles: number;
teamId: string;
runningUserInfo: ChatDispatchProps['runningUserInfo'];
}) {
// 旧版本适配====>
if (stringQuoteText) {
@@ -400,7 +403,8 @@ async function getMultiInput({
urls,
requestOrigin,
maxFiles,
teamId
teamId: runningUserInfo.teamId,
tmbId: runningUserInfo.tmbId
});
return {

View File

@@ -45,7 +45,7 @@ ${content.slice(0, 100)}${content.length > 100 ? '......' : ''}
export const dispatchReadFiles = async (props: Props): Promise<Response> => {
const {
requestOrigin,
runningAppInfo: { teamId },
runningUserInfo: { teamId, tmbId },
histories,
chatConfig,
node: { version },
@@ -61,7 +61,8 @@ export const dispatchReadFiles = async (props: Props): Promise<Response> => {
urls: [...fileUrlList, ...filesFromHistories],
requestOrigin,
maxFiles,
teamId
teamId,
tmbId
});
return {
@@ -105,12 +106,14 @@ export const getFileContentFromLinks = async ({
urls,
requestOrigin,
maxFiles,
teamId
teamId,
tmbId
}: {
urls: string[];
requestOrigin?: string;
maxFiles: number;
teamId: string;
tmbId: string;
}) => {
const parseUrlList = urls
// Remove invalid urls
@@ -205,6 +208,7 @@ export const getFileContentFromLinks = async ({
extension,
isQAImport: false,
teamId,
tmbId,
buffer,
encoding
});