4.8-alpha fix (#1424)

This commit is contained in:
Archer
2024-05-09 22:48:44 +08:00
committed by GitHub
parent 6463427d93
commit 434af56abd
19 changed files with 255 additions and 148 deletions

View File

@@ -57,7 +57,7 @@ export const countGptMessagesTokens = (
// 检测是否有内存泄漏
// addLog.info(`Count token time: ${Date.now() - start}, token: ${data}`);
// console.log(Object.keys(global.tiktokenWorker.callbackMap));
// console.log(process.memoryUsage());
};
worker.postMessage({

View File

@@ -15,6 +15,6 @@ export type InsertVectorProps = {
export type EmbeddingRecallProps = {
teamId: string;
datasetIds: string[];
similarity?: number;
efSearch?: number;
// similarity?: number;
// efSearch?: number;
};

View File

@@ -129,17 +129,15 @@ export const embeddingRecall = async (
): Promise<{
results: EmbeddingRecallItemType[];
}> => {
const { teamId, datasetIds, vectors, limit, similarity = 0, retry = 2, efSearch = 100 } = props;
const { datasetIds, vectors, limit, retry = 2 } = props;
try {
const results: any = await PgClient.query(
`BEGIN;
SET LOCAL hnsw.ef_search = ${efSearch};
SET LOCAL hnsw.ef_search = ${global.systemEnv?.pgHNSWEfSearch || 100};
select id, collection_id, vector <#> '[${vectors[0]}]' AS score
from ${PgDatasetTableName}
where team_id='${teamId}'
AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
AND vector <#> '[${vectors[0]}]' < -${similarity}
where dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
order by score limit ${limit};
COMMIT;`
);

View File

@@ -85,8 +85,7 @@ export async function searchDatasetData(props: SearchDatasetDataProps) {
teamId,
datasetIds,
vectors,
limit,
efSearch: global.systemEnv?.pgHNSWEfSearch
limit
});
// get q and a

View File

@@ -127,8 +127,8 @@ const completions = async ({
});
const answer = data.choices?.[0].message?.content || '';
console.log(JSON.stringify(chats2GPTMessages({ messages, reserveId: false }), null, 2));
console.log(answer, '----');
// console.log(JSON.stringify(chats2GPTMessages({ messages, reserveId: false }), null, 2));
// console.log(answer, '----');
const id =
agents.find((item) => answer.includes(item.key))?.key ||

View File

@@ -10,9 +10,13 @@ export const readCsvRawText = async (params: ReadRawTextByBuffer): Promise<ReadF
const header = csvArr[0];
const formatText = header
? csvArr.map((item) => item.map((item, i) => `${header[i]}:${item}`).join('\n')).join('\n')
: '';
// format to md table
const formatText = `| ${header.join(' | ')} |
| ${header.map(() => '---').join(' | ')} |
${csvArr
.slice(1)
.map((row) => `| ${row.map((item) => item.replace(/\n/g, '\\n')).join(' | ')} |`)
.join('\n')}`;
return {
rawText,

View File

@@ -1,3 +1,4 @@
import { CUSTOM_SPLIT_SIGN } from '@fastgpt/global/common/string/textSplitter';
import { ReadRawTextByBuffer, ReadFileResponse } from '../type';
import xlsx from 'node-xlsx';
import Papa from 'papaparse';
@@ -18,25 +19,25 @@ export const readXlsxRawText = async ({
});
const rawText = format2Csv.map((item) => item.csvText).join('\n');
const formatText = format2Csv
.map((item) => {
const csvArr = Papa.parse(item.csvText).data as string[][];
const header = csvArr[0];
const formatText = header
? csvArr
.map((item) =>
item
.map((item, i) => (item ? `${header[i]}:${item}` : ''))
.filter(Boolean)
.join('\n')
)
.join('\n')
: '';
if (!header) return;
return `${item.title}\n${formatText}`;
const formatText = `| ${header.join(' | ')} |
| ${header.map(() => '---').join(' | ')} |
${csvArr
.slice(1)
.map((row) => `| ${row.map((item) => item.replace(/\n/g, '\\n')).join(' | ')} |`)
.join('\n')}`;
return formatText;
})
.join('\n');
.filter(Boolean)
.join(CUSTOM_SPLIT_SIGN);
return {
rawText: rawText,

View File

@@ -67,5 +67,5 @@ parentPort?.on('message', async (props: ReadRawTextProps<Uint8Array>) => {
});
}
global?.close?.();
process.exit();
});

View File

@@ -15,6 +15,5 @@ parentPort?.on('message', (params: { html: string }) => {
data: error
});
}
global?.close?.();
process.exit();
});

View File

@@ -26,91 +26,96 @@ parentPort?.on(
tools?: ChatCompletionTool[];
functionCall?: ChatCompletionCreateParams.Function[];
}) => {
const start = Date.now();
/* count one prompt tokens */
const countPromptTokens = (
prompt: string | ChatCompletionContentPart[] | null | undefined = '',
role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
) => {
const promptText = (() => {
if (!prompt) return '';
if (typeof prompt === 'string') return prompt;
let promptText = '';
prompt.forEach((item) => {
if (item.type === 'text') {
promptText += item.text;
} else if (item.type === 'image_url') {
promptText += item.image_url.url;
}
});
return promptText;
})();
const text = `${role}\n${promptText}`.trim();
try {
const encodeText = enc.encode(text);
const supplementaryToken = role ? 4 : 0;
return encodeText.length + supplementaryToken;
} catch (error) {
return text.length;
}
};
const countToolsTokens = (
tools?: ChatCompletionTool[] | ChatCompletionCreateParams.Function[]
) => {
if (!tools || tools.length === 0) return 0;
const toolText = tools
? JSON.stringify(tools)
.replace('"', '')
.replace('\n', '')
.replace(/( ){2,}/g, ' ')
: '';
return enc.encode(toolText).length;
};
const total =
messages.reduce((sum, item) => {
// Evaluates the text of toolcall and functioncall
const functionCallPrompt = (() => {
let prompt = '';
if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant) {
const toolCalls = item.tool_calls;
prompt +=
toolCalls
?.map((item) => `${item?.function?.name} ${item?.function?.arguments}`.trim())
?.join('') || '';
const functionCall = item.function_call;
prompt += `${functionCall?.name} ${functionCall?.arguments}`.trim();
}
return prompt;
try {
/* count one prompt tokens */
const countPromptTokens = (
prompt: string | ChatCompletionContentPart[] | null | undefined = '',
role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
) => {
const promptText = (() => {
if (!prompt) return '';
if (typeof prompt === 'string') return prompt;
let promptText = '';
prompt.forEach((item) => {
if (item.type === 'text') {
promptText += item.text;
} else if (item.type === 'image_url') {
promptText += item.image_url.url;
}
});
return promptText;
})();
const contentPrompt = (() => {
if (!item.content) return '';
if (typeof item.content === 'string') return item.content;
return item.content
.map((item) => {
if (item.type === 'text') return item.text;
return '';
})
.join('');
})();
const text = `${role}\n${promptText}`.trim();
return sum + countPromptTokens(`${contentPrompt}${functionCallPrompt}`, item.role);
}, 0) +
countToolsTokens(tools) +
countToolsTokens(functionCall);
try {
const encodeText = enc.encode(text);
const supplementaryToken = role ? 4 : 0;
return encodeText.length + supplementaryToken;
} catch (error) {
return text.length;
}
};
const countToolsTokens = (
tools?: ChatCompletionTool[] | ChatCompletionCreateParams.Function[]
) => {
if (!tools || tools.length === 0) return 0;
parentPort?.postMessage({
id,
type: 'success',
data: total
});
const toolText = tools
? JSON.stringify(tools)
.replace('"', '')
.replace('\n', '')
.replace(/( ){2,}/g, ' ')
: '';
global?.close?.();
return enc.encode(toolText).length;
};
const total =
messages.reduce((sum, item) => {
// Evaluates the text of toolcall and functioncall
const functionCallPrompt = (() => {
let prompt = '';
if (item.role === ChatCompletionRequestMessageRoleEnum.Assistant) {
const toolCalls = item.tool_calls;
prompt +=
toolCalls
?.map((item) => `${item?.function?.name} ${item?.function?.arguments}`.trim())
?.join('') || '';
const functionCall = item.function_call;
prompt += `${functionCall?.name} ${functionCall?.arguments}`.trim();
}
return prompt;
})();
const contentPrompt = (() => {
if (!item.content) return '';
if (typeof item.content === 'string') return item.content;
return item.content
.map((item) => {
if (item.type === 'text') return item.text;
return '';
})
.join('');
})();
return sum + countPromptTokens(`${contentPrompt}${functionCallPrompt}`, item.role);
}, 0) +
countToolsTokens(tools) +
countToolsTokens(functionCall);
parentPort?.postMessage({
id,
type: 'success',
data: total
});
} catch (error) {
parentPort?.postMessage({
id,
type: 'success',
data: 0
});
}
}
);

View File

@@ -25,9 +25,12 @@ export const runWorker = <T = any>(name: WorkerNameEnum, params?: Record<string,
});
worker.on('error', (err) => {
worker.terminate();
reject(err);
worker.terminate();
});
worker.on('messageerror', (err) => {
reject(err);
worker.terminate();
});
});
};