Test parse cite and add tool call parallel (#4737)

* add quote response filter (#4727)

* chatting

* add quote response filter

* add test

* remove comment

* perf: cite hidden

* perf: format llm response

* feat: comment

* update default chunk size

* update default chunk size

---------

Co-authored-by: heheer <heheer@sealos.io>
This commit is contained in:
Archer
2025-04-30 17:43:50 +08:00
committed by GitHub
parent 683ab6c17d
commit fdd4e9edbd
53 changed files with 1131 additions and 716 deletions

View File

@@ -4,7 +4,7 @@ import { ChatItemType } from '@fastgpt/global/core/chat/type';
import { countGptMessagesTokens, countPromptTokens } from '../../../common/string/tiktoken/index';
import { chats2GPTMessages } from '@fastgpt/global/core/chat/adapt';
import { getLLMModel } from '../model';
import { llmCompletionsBodyFormat, llmResponseToAnswerText } from '../utils';
import { llmCompletionsBodyFormat, formatLLMResponse } from '../utils';
import { addLog } from '../../../common/system/log';
import { filterGPTMessageByMaxContext } from '../../chat/utils';
import json5 from 'json5';
@@ -170,7 +170,7 @@ assistant: ${chatBg}
const { response } = await createChatCompletion({
body: llmCompletionsBodyFormat(
{
stream: false,
stream: true,
model: modelData.model,
temperature: 0.1,
messages
@@ -178,7 +178,7 @@ assistant: ${chatBg}
modelData
)
});
const { text: answer, usage } = await llmResponseToAnswerText(response);
const { text: answer, usage } = await formatLLMResponse(response);
const inputTokens = usage?.prompt_tokens || (await countGptMessagesTokens(messages));
const outputTokens = usage?.completion_tokens || (await countPromptTokens(answer));