Files
FastGPT/packages/service/core/chat/utils.ts
Archer 064c64e74c V4.6.9-first commit (#899)
* perf: insert mongo dataset data session

* perf: dataset data index

* remove delay

* rename bill schema

* rename bill record

* perf: bill table

* perf: prompt

* perf: sub plan

* change the usage count

* feat: usage bill

* publish usages

* doc

* 新增团队聊天功能 (#20)

* perf: doc

* feat 添加标签部分

feat 信息团队标签配置

feat 新增团队同步管理

feat team分享页面

feat 完成team分享页面

feat 实现模糊搜索

style 格式化

fix 修复迷糊匹配

style 样式修改

fix 团队标签功能修复

* fix 修复鉴权功能

* merge 合并代码

* fix 修复引用错误

* fix 修复pr问题

* fix 修复ts格式问题

---------

Co-authored-by: archer <545436317@qq.com>
Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>

* update extra plan

* fix: ts

* format

* perf: bill field

* feat: standard plan

* fix: ts

* feat 个人账号页面修改 (#22)

* feat 添加标签部分

feat 信息团队标签配置

feat 新增团队同步管理

feat team分享页面

feat 完成team分享页面

feat 实现模糊搜索

style 格式化

fix 修复迷糊匹配

style 样式修改

fix 团队标签功能修复

* fix 修复鉴权功能

* merge 合并代码

* fix 修复引用错误

* fix 修复pr问题

* fix 修复ts格式问题

* feat 修改个人账号页

---------

Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>

* sub plan page (#23)

* fix chunk index; error page text

* feat: dataset process Integral prediction

* feat: stand plan field

* feat: sub plan limit

* perf: index

* query extension

* perf: share link push app name

* perf: plan point unit

* perf: get sub plan

* perf: account page

* feat 新增套餐详情弹窗代码 (#24)

* merge 合并代码

* fix 新增套餐详情弹框

* fix 修复pr问题

* feat: change http node input to prompt editor (#21)

* feat: change http node input to prompt editor

* fix

* split PromptEditor to HttpInput

* Team plans (#25)

* perf: pay check

* perf: team plan test

* plan limit check

* replace sensitive text

* perf: fix some null

* collection null check

* perf: plans modal

* perf: http module

* pacakge (#26)

* individuation page and pay modal amount (#27)

* feat: individuation page

* team chat config

* pay modal

* plan count and replace invalid chars (#29)

* fix: user oneapi

* fix: training queue

* fix: qa queue

* perf: remove space chars

* replace invalid chars

* change httpinput dropdown menu (#28)

* perf: http

* reseet free plan

* perf: plan code to packages

* remove llm config to package

* perf: code

* perf: faq

* fix: get team plan

---------

Co-authored-by: yst <77910600+yu-and-liu@users.noreply.github.com>
Co-authored-by: liuxingwan <liuxingwan.lxw@alibaba-inc.com>
Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
2024-02-28 13:19:15 +08:00

184 lines
5.0 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
import { ChatRoleEnum, IMG_BLOCK_KEY } from '@fastgpt/global/core/chat/constants';
import { countMessagesTokens, countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
import { adaptRole_Chat2Message } from '@fastgpt/global/core/chat/adapt';
import type {
ChatCompletionContentPart,
ChatMessageItemType
} from '@fastgpt/global/core/ai/type.d';
import axios from 'axios';
/* slice chat context by tokens */
export function ChatContextFilter({
messages = [],
maxTokens
}: {
messages: ChatItemType[];
maxTokens: number;
}) {
if (!Array.isArray(messages)) {
return [];
}
const rawTextLen = messages.reduce((sum, item) => sum + item.value.length, 0);
// If the text length is less than half of the maximum token, no calculation is required
if (rawTextLen < maxTokens * 0.5) {
return messages;
}
// filter startWith system prompt
const chatStartIndex = messages.findIndex((item) => item.obj !== ChatRoleEnum.System);
const systemPrompts: ChatItemType[] = messages.slice(0, chatStartIndex);
const chatPrompts: ChatItemType[] = messages.slice(chatStartIndex);
// reduce token of systemPrompt
maxTokens -= countMessagesTokens({
messages: systemPrompts
});
// 根据 tokens 截断内容
const chats: ChatItemType[] = [];
// 从后往前截取对话内容
for (let i = chatPrompts.length - 1; i >= 0; i--) {
const item = chatPrompts[i];
chats.unshift(item);
const tokens = countPromptTokens(item.value, adaptRole_Chat2Message(item.obj));
maxTokens -= tokens;
/* 整体 tokens 超出范围, system必须保留 */
if (maxTokens <= 0) {
if (chats.length > 1) {
chats.shift();
}
break;
}
}
return [...systemPrompts, ...chats];
}
export const replaceValidChars = (str: string) => {
const reg = /[\s\r\n]+/g;
return str.replace(reg, '');
};
export const countMessagesChars = (messages: ChatItemType[]) => {
return messages.reduce((sum, item) => sum + replaceValidChars(item.value).length, 0);
};
export const countGptMessagesChars = (messages: ChatMessageItemType[]) =>
messages.reduce((sum, item) => sum + replaceValidChars(item.content).length, 0);
/**
string to vision model. Follow the markdown code block rule for interception:
@rule:
```img-block
{src:""}
{src:""}
```
```file-block
{name:"",src:""},
{name:"",src:""}
```
@example:
Whats in this image?
```img-block
{src:"https://1.png"}
```
@return
[
{ type: 'text', text: 'Whats in this image?' },
{
type: 'image_url',
image_url: {
url: 'https://1.png'
}
}
]
*/
export async function formatStr2ChatContent(str: string) {
const content: ChatCompletionContentPart[] = [];
let lastIndex = 0;
const regex = new RegExp(`\`\`\`(${IMG_BLOCK_KEY})\\n([\\s\\S]*?)\`\`\``, 'g');
const imgKey: 'image_url' = 'image_url';
let match;
while ((match = regex.exec(str)) !== null) {
// add previous text
if (match.index > lastIndex) {
const text = str.substring(lastIndex, match.index).trim();
if (text) {
content.push({ type: 'text', text });
}
}
const blockType = match[1].trim();
if (blockType === IMG_BLOCK_KEY) {
const blockContentLines = match[2].trim().split('\n');
const jsonLines = blockContentLines.map((item) => {
try {
return JSON.parse(item) as { src: string };
} catch (error) {
return { src: '' };
}
});
for (const item of jsonLines) {
if (!item.src) throw new Error("image block's content error");
}
content.push(
...jsonLines.map((item) => ({
type: imgKey,
image_url: {
url: item.src
}
}))
);
}
lastIndex = regex.lastIndex;
}
// add remaining text
if (lastIndex < str.length) {
const remainingText = str.substring(lastIndex).trim();
if (remainingText) {
content.push({ type: 'text', text: remainingText });
}
}
// Continuous text type content, if type=text, merge them
for (let i = 0; i < content.length - 1; i++) {
const currentContent = content[i];
const nextContent = content[i + 1];
if (currentContent.type === 'text' && nextContent.type === 'text') {
currentContent.text += nextContent.text;
content.splice(i + 1, 1);
i--;
}
}
if (content.length === 1 && content[0].type === 'text') {
return content[0].text;
}
if (!content) return null;
// load img to base64
for await (const item of content) {
if (item.type === imgKey && item[imgKey]?.url) {
const response = await axios.get(item[imgKey].url, {
responseType: 'arraybuffer'
});
const base64 = Buffer.from(response.data).toString('base64');
item[imgKey].url = `data:${response.headers['content-type']};base64,${base64}`;
}
}
return content ? content : null;
}