mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
4.8-alpha fix (#1424)
This commit is contained in:
@@ -57,7 +57,7 @@ export const countGptMessagesTokens = (
|
||||
|
||||
// 检测是否有内存泄漏
|
||||
// addLog.info(`Count token time: ${Date.now() - start}, token: ${data}`);
|
||||
// console.log(Object.keys(global.tiktokenWorker.callbackMap));
|
||||
// console.log(process.memoryUsage());
|
||||
};
|
||||
|
||||
worker.postMessage({
|
||||
|
@@ -15,6 +15,6 @@ export type InsertVectorProps = {
|
||||
export type EmbeddingRecallProps = {
|
||||
teamId: string;
|
||||
datasetIds: string[];
|
||||
similarity?: number;
|
||||
efSearch?: number;
|
||||
// similarity?: number;
|
||||
// efSearch?: number;
|
||||
};
|
||||
|
@@ -129,17 +129,15 @@ export const embeddingRecall = async (
|
||||
): Promise<{
|
||||
results: EmbeddingRecallItemType[];
|
||||
}> => {
|
||||
const { teamId, datasetIds, vectors, limit, similarity = 0, retry = 2, efSearch = 100 } = props;
|
||||
const { datasetIds, vectors, limit, retry = 2 } = props;
|
||||
|
||||
try {
|
||||
const results: any = await PgClient.query(
|
||||
`BEGIN;
|
||||
SET LOCAL hnsw.ef_search = ${efSearch};
|
||||
SET LOCAL hnsw.ef_search = ${global.systemEnv?.pgHNSWEfSearch || 100};
|
||||
select id, collection_id, vector <#> '[${vectors[0]}]' AS score
|
||||
from ${PgDatasetTableName}
|
||||
where team_id='${teamId}'
|
||||
AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
|
||||
AND vector <#> '[${vectors[0]}]' < -${similarity}
|
||||
where dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
|
||||
order by score limit ${limit};
|
||||
COMMIT;`
|
||||
);
|
||||
|
@@ -85,8 +85,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
|
||||
teamId,
|
||||
datasetIds,
|
||||
vectors,
|
||||
limit,
|
||||
efSearch: global.systemEnv?.pgHNSWEfSearch
|
||||
limit
|
||||
});
|
||||
|
||||
// get q and a
|
||||
|
@@ -127,8 +127,8 @@ const completions = async ({
|
||||
});
|
||||
const answer = data.choices?.[0].message?.content || '';
|
||||
|
||||
console.log(JSON.stringify(chats2GPTMessages({ messages, reserveId: false }), null, 2));
|
||||
console.log(answer, '----');
|
||||
// console.log(JSON.stringify(chats2GPTMessages({ messages, reserveId: false }), null, 2));
|
||||
// console.log(answer, '----');
|
||||
|
||||
const id =
|
||||
agents.find((item) => answer.includes(item.key))?.key ||
|
||||
|
@@ -10,9 +10,13 @@ export const readCsvRawText = async (params: ReadRawTextByBuffer): Promise<ReadF
|
||||
|
||||
const header = csvArr[0];
|
||||
|
||||
const formatText = header
|
||||
? csvArr.map((item) => item.map((item, i) => `${header[i]}:${item}`).join('\n')).join('\n')
|
||||
: '';
|
||||
// format to md table
|
||||
const formatText = `| ${header.join(' | ')} |
|
||||
| ${header.map(() => '---').join(' | ')} |
|
||||
${csvArr
|
||||
.slice(1)
|
||||
.map((row) => `| ${row.map((item) => item.replace(/\n/g, '\\n')).join(' | ')} |`)
|
||||
.join('\n')}`;
|
||||
|
||||
return {
|
||||
rawText,
|
||||
|
@@ -1,3 +1,4 @@
|
||||
import { CUSTOM_SPLIT_SIGN } from '@fastgpt/global/common/string/textSplitter';
|
||||
import { ReadRawTextByBuffer, ReadFileResponse } from '../type';
|
||||
import xlsx from 'node-xlsx';
|
||||
import Papa from 'papaparse';
|
||||
@@ -18,25 +19,25 @@ export const readXlsxRawText = async ({
|
||||
});
|
||||
|
||||
const rawText = format2Csv.map((item) => item.csvText).join('\n');
|
||||
|
||||
const formatText = format2Csv
|
||||
.map((item) => {
|
||||
const csvArr = Papa.parse(item.csvText).data as string[][];
|
||||
const header = csvArr[0];
|
||||
|
||||
const formatText = header
|
||||
? csvArr
|
||||
.map((item) =>
|
||||
item
|
||||
.map((item, i) => (item ? `${header[i]}:${item}` : ''))
|
||||
.filter(Boolean)
|
||||
.join('\n')
|
||||
)
|
||||
.join('\n')
|
||||
: '';
|
||||
if (!header) return;
|
||||
|
||||
return `${item.title}\n${formatText}`;
|
||||
const formatText = `| ${header.join(' | ')} |
|
||||
| ${header.map(() => '---').join(' | ')} |
|
||||
${csvArr
|
||||
.slice(1)
|
||||
.map((row) => `| ${row.map((item) => item.replace(/\n/g, '\\n')).join(' | ')} |`)
|
||||
.join('\n')}`;
|
||||
|
||||
return formatText;
|
||||
})
|
||||
.join('\n');
|
||||
.filter(Boolean)
|
||||
.join(CUSTOM_SPLIT_SIGN);
|
||||
|
||||
return {
|
||||
rawText: rawText,
|
||||
|
@@ -67,5 +67,5 @@ parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
|
||||
});
|
||||
}
|
||||
|
||||
global?.close?.();
|
||||
process.exit();
|
||||
});
|
||||
|
@@ -15,6 +15,5 @@ parentPort?.on('message', (params: { html: string }) => {
|
||||
data: error
|
||||
});
|
||||
}
|
||||
|
||||
global?.close?.();
|
||||
process.exit();
|
||||
});
|
||||
|
@@ -26,91 +26,96 @@ parentPort?.on(
|
||||
tools?: ChatCompletionTool[];
|
||||
functionCall?: ChatCompletionCreateParams.Function[];
|
||||
}) => {
|
||||
const start = Date.now();
|
||||
/* count one prompt tokens */
|
||||
const countPromptTokens = (
|
||||
prompt: string | ChatCompletionContentPart[] | null | undefined = '',
|
||||
role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
|
||||
) => {
|
||||
const promptText = (() => {
|
||||
if (!prompt) return '';
|
||||
if (typeof prompt === 'string') return prompt;
|
||||
let promptText = '';
|
||||
prompt.forEach((item) => {
|
||||
if (item.type === 'text') {
|
||||
promptText += item.text;
|
||||
} else if (item.type === 'image_url') {
|
||||
promptText += item.image_url.url;
|
||||
}
|
||||
});
|
||||
return promptText;
|
||||
})();
|
||||
|
||||
const text = `${role}\n${promptText}`.trim();
|
||||
|
||||
try {
|
||||
const encodeText = enc.encode(text);
|
||||
const supplementaryToken = role ? 4 : 0;
|
||||
return encodeText.length + supplementaryToken;
|
||||
} catch (error) {
|
||||
return text.length;
|
||||
}
|
||||
};
|
||||
const countToolsTokens = (
|
||||
tools?: ChatCompletionTool[] | ChatCompletionCreateParams.Function[]
|
||||
) => {
|
||||
if (!tools || tools.length === 0) return 0;
|
||||
|
||||
const toolText = tools
|
||||
? JSON.stringify(tools)
|
||||
.replace('"', '')
|
||||
.replace('\n', '')
|
||||
.replace(/( ){2,}/g, ' ')
|
||||
: '';
|
||||
|
||||
return enc.encode(toolText).length;
|
||||
};
|
||||
|
||||
const total =
|
||||
messages.reduce((sum, item) => {
|
||||
// Evaluates the text of toolcall and functioncall
|
||||
const functionCallPrompt = (() => {
|
||||
let prompt = '';
|
||||
if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant) {
|
||||
const toolCalls = item.tool_calls;
|
||||
prompt +=
|
||||
toolCalls
|
||||
?.map((item) => `${item?.function?.name} ${item?.function?.arguments}`.trim())
|
||||
?.join('') || '';
|
||||
|
||||
const functionCall = item.function_call;
|
||||
prompt += `${functionCall?.name} ${functionCall?.arguments}`.trim();
|
||||
}
|
||||
return prompt;
|
||||
try {
|
||||
/* count one prompt tokens */
|
||||
const countPromptTokens = (
|
||||
prompt: string | ChatCompletionContentPart[] | null | undefined = '',
|
||||
role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
|
||||
) => {
|
||||
const promptText = (() => {
|
||||
if (!prompt) return '';
|
||||
if (typeof prompt === 'string') return prompt;
|
||||
let promptText = '';
|
||||
prompt.forEach((item) => {
|
||||
if (item.type === 'text') {
|
||||
promptText += item.text;
|
||||
} else if (item.type === 'image_url') {
|
||||
promptText += item.image_url.url;
|
||||
}
|
||||
});
|
||||
return promptText;
|
||||
})();
|
||||
|
||||
const contentPrompt = (() => {
|
||||
if (!item.content) return '';
|
||||
if (typeof item.content === 'string') return item.content;
|
||||
return item.content
|
||||
.map((item) => {
|
||||
if (item.type === 'text') return item.text;
|
||||
return '';
|
||||
})
|
||||
.join('');
|
||||
})();
|
||||
const text = `${role}\n${promptText}`.trim();
|
||||
|
||||
return sum + countPromptTokens(`${contentPrompt}${functionCallPrompt}`, item.role);
|
||||
}, 0) +
|
||||
countToolsTokens(tools) +
|
||||
countToolsTokens(functionCall);
|
||||
try {
|
||||
const encodeText = enc.encode(text);
|
||||
const supplementaryToken = role ? 4 : 0;
|
||||
return encodeText.length + supplementaryToken;
|
||||
} catch (error) {
|
||||
return text.length;
|
||||
}
|
||||
};
|
||||
const countToolsTokens = (
|
||||
tools?: ChatCompletionTool[] | ChatCompletionCreateParams.Function[]
|
||||
) => {
|
||||
if (!tools || tools.length === 0) return 0;
|
||||
|
||||
parentPort?.postMessage({
|
||||
id,
|
||||
type: 'success',
|
||||
data: total
|
||||
});
|
||||
const toolText = tools
|
||||
? JSON.stringify(tools)
|
||||
.replace('"', '')
|
||||
.replace('\n', '')
|
||||
.replace(/( ){2,}/g, ' ')
|
||||
: '';
|
||||
|
||||
global?.close?.();
|
||||
return enc.encode(toolText).length;
|
||||
};
|
||||
|
||||
const total =
|
||||
messages.reduce((sum, item) => {
|
||||
// Evaluates the text of toolcall and functioncall
|
||||
const functionCallPrompt = (() => {
|
||||
let prompt = '';
|
||||
if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant) {
|
||||
const toolCalls = item.tool_calls;
|
||||
prompt +=
|
||||
toolCalls
|
||||
?.map((item) => `${item?.function?.name} ${item?.function?.arguments}`.trim())
|
||||
?.join('') || '';
|
||||
|
||||
const functionCall = item.function_call;
|
||||
prompt += `${functionCall?.name} ${functionCall?.arguments}`.trim();
|
||||
}
|
||||
return prompt;
|
||||
})();
|
||||
|
||||
const contentPrompt = (() => {
|
||||
if (!item.content) return '';
|
||||
if (typeof item.content === 'string') return item.content;
|
||||
return item.content
|
||||
.map((item) => {
|
||||
if (item.type === 'text') return item.text;
|
||||
return '';
|
||||
})
|
||||
.join('');
|
||||
})();
|
||||
|
||||
return sum + countPromptTokens(`${contentPrompt}${functionCallPrompt}`, item.role);
|
||||
}, 0) +
|
||||
countToolsTokens(tools) +
|
||||
countToolsTokens(functionCall);
|
||||
|
||||
parentPort?.postMessage({
|
||||
id,
|
||||
type: 'success',
|
||||
data: total
|
||||
});
|
||||
} catch (error) {
|
||||
parentPort?.postMessage({
|
||||
id,
|
||||
type: 'success',
|
||||
data: 0
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
|
@@ -25,9 +25,12 @@ export const runWorker = <T = any>(name: WorkerNameEnum, params?: Record<string,
|
||||
});
|
||||
|
||||
worker.on('error', (err) => {
|
||||
worker.terminate();
|
||||
|
||||
reject(err);
|
||||
worker.terminate();
|
||||
});
|
||||
worker.on('messageerror', (err) => {
|
||||
reject(err);
|
||||
worker.terminate();
|
||||
});
|
||||
});
|
||||
};
|
||||
|
Reference in New Issue
Block a user