4.8-alpha fix (#1424)

This commit is contained in:
Archer
2024-05-09 22:48:44 +08:00
committed by GitHub
parent 6463427d93
commit 434af56abd
19 changed files with 255 additions and 148 deletions

View File

@@ -1,24 +1,61 @@
import { getErrText } from '../error/utils';
import { replaceRegChars } from './tools';
/**
* text split into chunks
* chunkLen - one chunk len. max: 3500
* overlapLen - The size of the before and after Text
* chunkLen > overlapLen
* markdown
*/
export const splitText2Chunks = (props: {
export const CUSTOM_SPLIT_SIGN = '-----CUSTOM_SPLIT_SIGN-----';
type SplitProps = {
text: string;
chunkLen: number;
overlapRatio?: number;
customReg?: string[];
}): {
};
type SplitResponse = {
chunks: string[];
chars: number;
overlapRatio?: number;
} => {
};
// 判断字符串是否为markdown的表格形式
const strIsMdTable = (str: string) => {
const regex = /^(\|.*\|[\r]*)$/m;
return regex.test(str);
};
const markdownTableSplit = (props: SplitProps): SplitResponse => {
let { text = '', chunkLen } = props;
const splitText2Lines = text.split('\n');
const header = splitText2Lines[0];
const headerSize = header.split('|').length - 2;
const mdSplitString = `| ${new Array(headerSize)
.fill(0)
.map(() => '---')
.join(' | ')} |`;
const chunks: string[] = [];
let chunk = `${header}
${mdSplitString}
`;
for (let i = 2; i < splitText2Lines.length; i++) {
if (chunk.length + splitText2Lines[i].length > chunkLen * 1.2) {
chunks.push(chunk);
chunk = `${header}
${mdSplitString}
`;
}
chunk += `${splitText2Lines[i]}\n`;
}
return {
chunks,
chars: chunks.reduce((sum, chunk) => sum + chunk.length, 0)
};
};
const commonSplit = (props: SplitProps): SplitResponse => {
let { text = '', chunkLen, overlapRatio = 0.2, customReg = [] } = props;
const splitMarker = 'SPLIT_HERE_SPLIT_HERE';
const codeBlockMarker = 'CODE_BLOCK_LINE_MARKER';
const overlapLen = Math.round(chunkLen * overlapRatio);
@@ -253,3 +290,29 @@ export const splitText2Chunks = (props: {
throw new Error(getErrText(err));
}
};
/**
* text split into chunks
* chunkLen - one chunk len. max: 3500
* overlapLen - The size of the before and after Text
* chunkLen > overlapLen
* markdown
*/
export const splitText2Chunks = (props: SplitProps): SplitResponse => {
let { text = '' } = props;
const splitWithCustomSign = text.split(CUSTOM_SPLIT_SIGN);
const splitResult = splitWithCustomSign.map((item) => {
if (strIsMdTable(text)) {
return markdownTableSplit(props);
}
return commonSplit(props);
});
return {
chunks: splitResult.map((item) => item.chunks).flat(),
chars: splitResult.reduce((sum, item) => sum + item.chars, 0)
};
};

View File

@@ -57,7 +57,7 @@ export const countGptMessagesTokens = (
// 检测是否有内存泄漏
// addLog.info(`Count token time: ${Date.now() - start}, token: ${data}`);
// console.log(Object.keys(global.tiktokenWorker.callbackMap));
// console.log(process.memoryUsage());
};
worker.postMessage({

View File

@@ -15,6 +15,6 @@ export type InsertVectorProps = {
export type EmbeddingRecallProps = {
teamId: string;
datasetIds: string[];
similarity?: number;
efSearch?: number;
// similarity?: number;
// efSearch?: number;
};

View File

@@ -129,17 +129,15 @@ export const embeddingRecall = async (
): Promise<{
results: EmbeddingRecallItemType[];
}> => {
const { teamId, datasetIds, vectors, limit, similarity = 0, retry = 2, efSearch = 100 } = props;
const { datasetIds, vectors, limit, retry = 2 } = props;
try {
const results: any = await PgClient.query(
`BEGIN;
SET LOCAL hnsw.ef_search = ${efSearch};
SET LOCAL hnsw.ef_search = ${global.systemEnv?.pgHNSWEfSearch || 100};
select id, collection_id, vector <#> '[${vectors[0]}]' AS score
from ${PgDatasetTableName}
where team_id='${teamId}'
AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
AND vector <#> '[${vectors[0]}]' < -${similarity}
where dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
order by score limit ${limit};
COMMIT;`
);

View File

@@ -85,8 +85,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
teamId,
datasetIds,
vectors,
limit,
efSearch: global.systemEnv?.pgHNSWEfSearch
limit
});
// get q and a

View File

@@ -127,8 +127,8 @@ const completions = async ({
});
const answer = data.choices?.[0].message?.content || '';
console.log(JSON.stringify(chats2GPTMessages({ messages, reserveId: false }), null, 2));
console.log(answer, '----');
// console.log(JSON.stringify(chats2GPTMessages({ messages, reserveId: false }), null, 2));
// console.log(answer, '----');
const id =
agents.find((item) => answer.includes(item.key))?.key ||

View File

@@ -10,9 +10,13 @@ export const readCsvRawText = async (params: ReadRawTextByBuffer): Promise<ReadF
const header = csvArr[0];
const formatText = header
? csvArr.map((item) => item.map((item, i) => `${header[i]}:${item}`).join('\n')).join('\n')
: '';
// format to md table
const formatText = `| ${header.join(' | ')} |
| ${header.map(() => '---').join(' | ')} |
${csvArr
.slice(1)
.map((row) => `| ${row.map((item) => item.replace(/\n/g, '\\n')).join(' | ')} |`)
.join('\n')}`;
return {
rawText,

View File

@@ -1,3 +1,4 @@
import { CUSTOM_SPLIT_SIGN } from '@fastgpt/global/common/string/textSplitter';
import { ReadRawTextByBuffer, ReadFileResponse } from '../type';
import xlsx from 'node-xlsx';
import Papa from 'papaparse';
@@ -18,25 +19,25 @@ export const readXlsxRawText = async ({
});
const rawText = format2Csv.map((item) => item.csvText).join('\n');
const formatText = format2Csv
.map((item) => {
const csvArr = Papa.parse(item.csvText).data as string[][];
const header = csvArr[0];
const formatText = header
? csvArr
.map((item) =>
item
.map((item, i) => (item ? `${header[i]}:${item}` : ''))
.filter(Boolean)
.join('\n')
)
.join('\n')
: '';
if (!header) return;
return `${item.title}\n${formatText}`;
const formatText = `| ${header.join(' | ')} |
| ${header.map(() => '---').join(' | ')} |
${csvArr
.slice(1)
.map((row) => `| ${row.map((item) => item.replace(/\n/g, '\\n')).join(' | ')} |`)
.join('\n')}`;
return formatText;
})
.join('\n');
.filter(Boolean)
.join(CUSTOM_SPLIT_SIGN);
return {
rawText: rawText,

View File

@@ -67,5 +67,5 @@ parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
});
}
global?.close?.();
process.exit();
});

View File

@@ -15,6 +15,5 @@ parentPort?.on('message', (params: { html: string }) => {
data: error
});
}
global?.close?.();
process.exit();
});

View File

@@ -26,91 +26,96 @@ parentPort?.on(
tools?: ChatCompletionTool[];
functionCall?: ChatCompletionCreateParams.Function[];
}) => {
const start = Date.now();
/* count one prompt tokens */
const countPromptTokens = (
prompt: string | ChatCompletionContentPart[] | null | undefined = '',
role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
) => {
const promptText = (() => {
if (!prompt) return '';
if (typeof prompt === 'string') return prompt;
let promptText = '';
prompt.forEach((item) => {
if (item.type === 'text') {
promptText += item.text;
} else if (item.type === 'image_url') {
promptText += item.image_url.url;
}
});
return promptText;
})();
const text = `${role}\n${promptText}`.trim();
try {
const encodeText = enc.encode(text);
const supplementaryToken = role ? 4 : 0;
return encodeText.length + supplementaryToken;
} catch (error) {
return text.length;
}
};
const countToolsTokens = (
tools?: ChatCompletionTool[] | ChatCompletionCreateParams.Function[]
) => {
if (!tools || tools.length === 0) return 0;
const toolText = tools
? JSON.stringify(tools)
.replace('"', '')
.replace('\n', '')
.replace(/( ){2,}/g, ' ')
: '';
return enc.encode(toolText).length;
};
const total =
messages.reduce((sum, item) => {
// Evaluates the text of toolcall and functioncall
const functionCallPrompt = (() => {
let prompt = '';
if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant) {
const toolCalls = item.tool_calls;
prompt +=
toolCalls
?.map((item) => `${item?.function?.name} ${item?.function?.arguments}`.trim())
?.join('') || '';
const functionCall = item.function_call;
prompt += `${functionCall?.name} ${functionCall?.arguments}`.trim();
}
return prompt;
try {
/* count one prompt tokens */
const countPromptTokens = (
prompt: string | ChatCompletionContentPart[] | null | undefined = '',
role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
) => {
const promptText = (() => {
if (!prompt) return '';
if (typeof prompt === 'string') return prompt;
let promptText = '';
prompt.forEach((item) => {
if (item.type === 'text') {
promptText += item.text;
} else if (item.type === 'image_url') {
promptText += item.image_url.url;
}
});
return promptText;
})();
const contentPrompt = (() => {
if (!item.content) return '';
if (typeof item.content === 'string') return item.content;
return item.content
.map((item) => {
if (item.type === 'text') return item.text;
return '';
})
.join('');
})();
const text = `${role}\n${promptText}`.trim();
return sum + countPromptTokens(`${contentPrompt}${functionCallPrompt}`, item.role);
}, 0) +
countToolsTokens(tools) +
countToolsTokens(functionCall);
try {
const encodeText = enc.encode(text);
const supplementaryToken = role ? 4 : 0;
return encodeText.length + supplementaryToken;
} catch (error) {
return text.length;
}
};
const countToolsTokens = (
tools?: ChatCompletionTool[] | ChatCompletionCreateParams.Function[]
) => {
if (!tools || tools.length === 0) return 0;
parentPort?.postMessage({
id,
type: 'success',
data: total
});
const toolText = tools
? JSON.stringify(tools)
.replace('"', '')
.replace('\n', '')
.replace(/( ){2,}/g, ' ')
: '';
global?.close?.();
return enc.encode(toolText).length;
};
const total =
messages.reduce((sum, item) => {
// Evaluates the text of toolcall and functioncall
const functionCallPrompt = (() => {
let prompt = '';
if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant) {
const toolCalls = item.tool_calls;
prompt +=
toolCalls
?.map((item) => `${item?.function?.name} ${item?.function?.arguments}`.trim())
?.join('') || '';
const functionCall = item.function_call;
prompt += `${functionCall?.name} ${functionCall?.arguments}`.trim();
}
return prompt;
})();
const contentPrompt = (() => {
if (!item.content) return '';
if (typeof item.content === 'string') return item.content;
return item.content
.map((item) => {
if (item.type === 'text') return item.text;
return '';
})
.join('');
})();
return sum + countPromptTokens(`${contentPrompt}${functionCallPrompt}`, item.role);
}, 0) +
countToolsTokens(tools) +
countToolsTokens(functionCall);
parentPort?.postMessage({
id,
type: 'success',
data: total
});
} catch (error) {
parentPort?.postMessage({
id,
type: 'success',
data: 0
});
}
}
);

View File

@@ -25,9 +25,12 @@ export const runWorker = <T = any>(name: WorkerNameEnum, params?: Record<string,
});
worker.on('error', (err) => {
worker.terminate();
reject(err);
worker.terminate();
});
worker.on('messageerror', (err) => {
reject(err);
worker.terminate();
});
});
};

View File

@@ -1,10 +1,32 @@
### FastGPT V4.7.1
### FastGPT V4.8
1. 新增 - 语音输入完整配置。支持选择是否打开语音输入(包括分享页面),支持语音输入后自动发送,支持语音输入后自动语音播放(流式)
2. 新增 - Pptx 和 xlsx 文件读取。但所有文件读取都放服务端,会消耗更多的服务器资源,以及无法在上传时预览更多内容。
3. 新增 - 集成 Laf 云函数,可以读取 Laf 账号中的云函数作为 HTTP 模块。
4. 修改 - csv导入模板取消 header 校验,自动获取前两列。
5. 修复 - 问题补全历史记录BUG
6. [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow/intro)
7. [使用文档](https://doc.fastgpt.in/docs/intro/)
8. [点击查看商业版](https://doc.fastgpt.in/docs/commercial/)
本次更新的重点是对工作流 (高级编排) 进行了重构,使其更加简洁和强大。但由于新旧工作流机制有较大变化,尽管我们进行了一定的自动转换,仍有部分工作流需要您手动重建。请尽快更新到新版本,并对工作流进行必要的调试和重新发布
❗ 重要提示:
1⃣ 旧工作流更新后暂不失效,打开旧工作流会弹出自动转换提示,重新编排后点 “发布” 按钮发布新工作流
2⃣ 发布新工作流前,工作流自动保存功能暂不生效
3⃣ 应用和插件新增 version 字段,标识适用新/旧版工作流,以实现兼容
✨ 新增功能亮点:
1⃣ 判断器:支持 if/elseIf/else 判断逻辑,工作流控制更灵活
2⃣ 变量更新节点:运行中可动态修改工作流输出变量或全局变量值
3⃣ 工作流自动保存和版本管理:自动保存修改,支持查看和回滚历史版本
4⃣ 工作流调试模式:更直观高效,可调试单节点或逐步执行,实时查看输入输出数据
5⃣ 定时执行应用:支持简单配置实现各种定时任务
🛠️ 其他优化与修复:
- 优化工作流节点连线方式,支持四向连接,易构建循环工作流
- 显著提升工作流上下文数据传递性能
- 简易模式下修改配置自动刷新调试框,免手动保存
- 改进 worker 进程管理,支持 Token 计算任务分配,提高效率
- 工具调用支持 string、boolean、number 数据类型
- 完善 completions 接口对 size 参数限制
- 重构 Node.js API 中间件和服务端代码
- 对话记录长度调整为偶数,最大长度增至 50 轮,避免奇数导致部分模型不兼容
- HTTP 节点出错将终止进程,避免异常影响
- 修复工具调用名称不能以数字开头问题
- 修复分享链接 query 参数缓存 bug
- 修复工具调用和 HTTP 模块兼容性问题
- [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow/intro)
- [使用文档](https://doc.fastgpt.in/docs/intro/)
- [点击查看商业版](https://doc.fastgpt.in/docs/commercial/)

View File

@@ -55,6 +55,8 @@ const MessageInput = ({
const { t } = useTranslation();
const havInput = !!inputValue || fileList.length > 0;
const hasFileUploading = fileList.some((item) => !item.url);
const canSendMessage = havInput && !hasFileUploading;
/* file selector and upload */
const { File, onOpen: onOpenSelectFile } = useSelectFile({
@@ -142,7 +144,8 @@ const MessageInput = ({
);
/* on send */
const handleSend = useCallback(async () => {
const handleSend = async () => {
if (!canSendMessage) return;
const textareaValue = TextareaDom.current?.value || '';
onSendMessage({
@@ -150,7 +153,7 @@ const MessageInput = ({
files: fileList
});
replaceFile([]);
}, [TextareaDom, fileList, onSendMessage, replaceFile]);
};
/* whisper init */
const {
@@ -466,16 +469,20 @@ const MessageInput = ({
h={['28px', '32px']}
w={['28px', '32px']}
borderRadius={'md'}
bg={isSpeaking || isChatting ? '' : !havInput ? '#E5E5E5' : 'primary.500'}
bg={
isSpeaking || isChatting
? ''
: !havInput || hasFileUploading
? '#E5E5E5'
: 'primary.500'
}
cursor={havInput ? 'pointer' : 'not-allowed'}
lineHeight={1}
onClick={() => {
if (isChatting) {
return onStop();
}
if (havInput) {
return handleSend();
}
return handleSend();
}}
>
{isChatting ? (

View File

@@ -53,7 +53,7 @@ const Markdown = ({
);
const formatSource = source
.replace(/\\n/g, '\n&nbsp;')
// .replace(/\\n/g, '\n')
.replace(/(http[s]?:\/\/[^\s。]+)([。,])/g, '$1 $2')
.replace(/\n*(\[QUOTE SIGN\]\(.*\))/g, '$1');

View File

@@ -40,3 +40,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
res.status(500).send(getErrText(err));
}
}
export const config = {
api: {
responseLimit: '16mb'
}
};

View File

@@ -362,12 +362,6 @@ async function handler(req: NextApiRequest, res: NextApiResponse) {
}
export default NextAPI(handler);
export const config = {
api: {
responseLimit: '20mb'
}
};
const authShareChat = async ({
chatId,
...data
@@ -526,3 +520,9 @@ const authHeaderRequest = async ({
canWrite
};
};
export const config = {
api: {
responseLimit: '20mb'
}
};

View File

@@ -28,7 +28,7 @@ export default React.memo(FileLocal);
const csvTemplate = `"第一列内容","第二列内容"
"必填列","可选列。CSV 中请注意内容不能包含双引号,双引号是列分割符号"
"只会第一和第二列内容导入,其余列会被忽略",""
"只会第一和第二列内容导入,其余列会被忽略",""
"结合人工智能的演进历程,AIGC的发展大致可以分为三个阶段即:早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期),以及快速发展展阶段(21世纪10年代中期至今)。",""
"AIGC发展分为几个阶段","早期萌芽阶段(20世纪50年代至90年代中期)、沉淀积累阶段(20世纪90年代中期至21世纪10年代中期)、快速发展展阶段(21世纪10年代中期至今)"`;

View File

@@ -32,7 +32,7 @@ export const useSearchTestStore = create<State>()(
datasetTestList: [],
pushDatasetTestItem(data) {
set((state) => {
state.datasetTestList = [data, ...state.datasetTestList].slice(0, 100);
state.datasetTestList = [data, ...state.datasetTestList].slice(0, 50);
});
},
delDatasetTestItemById(id) {