fix: 抽取概要

This commit is contained in:
archer
2023-03-26 22:43:39 +08:00
parent 3e4487ad9a
commit 249ed18d15
3 changed files with 33 additions and 36 deletions

View File

@@ -13,7 +13,7 @@ const DataDetail = ({ dataName, dataId }: { dataName: string; dataId: string })
data: dataItems data: dataItems
} = usePaging<DataItemSchema>({ } = usePaging<DataItemSchema>({
api: getDataItems, api: getDataItems,
pageSize: 5, pageSize: 10,
params: { params: {
dataId dataId
} }

View File

@@ -12,7 +12,7 @@ export async function generateAbstract(next = false): Promise<any> {
const systemPrompt: ChatCompletionRequestMessage = { const systemPrompt: ChatCompletionRequestMessage = {
role: 'system', role: 'system',
content: `我会向你发送一段长文本请从中总结出3~10个摘要,尽量详细,请按以下格式返回: "(1):"\n"(2):"\n"(3):"\n` content: `请从长文本中总结出5至15个摘要,尽量详细,请务必按以下格式返回: "(1):"\n"(2):"\n"(3):"\n`
}; };
let dataItem: DataItemSchema | null = null; let dataItem: DataItemSchema | null = null;
@@ -57,37 +57,29 @@ export async function generateAbstract(next = false): Promise<any> {
// 获取 openai 请求实例 // 获取 openai 请求实例
const chatAPI = getOpenAIApi(userApiKey || systemKey); const chatAPI = getOpenAIApi(userApiKey || systemKey);
// 请求 chatgpt 获取摘要 // 请求 chatgpt 获取摘要
const abstractResponse = await Promise.allSettled( const abstractResponse = await chatAPI.createChatCompletion(
[0.5, 1].map((temperature) => {
chatAPI.createChatCompletion( model: ChatModelNameEnum.GPT35,
temperature: 0.8,
n: 1,
messages: [
systemPrompt,
{ {
model: ChatModelNameEnum.GPT35, role: 'user',
temperature: temperature, content: dataItem?.text || ''
n: 1,
messages: [
systemPrompt,
{
role: 'user',
content: dataItem?.text || ''
}
]
},
{
timeout: 120000,
httpsAgent
} }
) ]
) },
{
timeout: 120000,
httpsAgent
}
); );
// 过滤出成功的响应
const successAbstracts = abstractResponse.filter((item) => item.status === 'fulfilled');
// 提取摘要内容 // 提取摘要内容
const rawContents: string[] = successAbstracts.map( const rawContent: string = abstractResponse?.data.choices[0].message?.content || '';
(item: any) => item?.value?.data.choices[0].message?.content || ''
);
// 从 content 中提取摘要内容 // 从 content 中提取摘要内容
const splitContents = rawContents.map((content) => splitText(content)).flat(); const splitContents = splitText(rawContent);
// 生成词向量 // 生成词向量
const vectorResponse = await Promise.allSettled( const vectorResponse = await Promise.allSettled(
@@ -101,7 +93,13 @@ export async function generateAbstract(next = false): Promise<any> {
// 筛选成功的向量请求 // 筛选成功的向量请求
const vectorSuccessResponse = vectorResponse const vectorSuccessResponse = vectorResponse
.map((item: any, i) => { .map((item: any, i) => {
if (item.status !== 'fulfilled') return ''; if (item.status !== 'fulfilled') {
console.log('获取词向量错误: ', item);
return {
abstract: splitContents[i].abstract,
abstractVector: ''
};
}
return { return {
abstract: splitContents[i].abstract, abstract: splitContents[i].abstract,
abstractVector: item?.value?.data?.data?.[0]?.embedding abstractVector: item?.value?.data?.data?.[0]?.embedding
@@ -113,9 +111,7 @@ export async function generateAbstract(next = false): Promise<any> {
await DataItem.findByIdAndUpdate(dataItem._id, { await DataItem.findByIdAndUpdate(dataItem._id, {
status: 0, status: 0,
$push: { $push: {
rawResponse: { rawResponse: rawContent,
$each: rawContents
},
result: { result: {
$each: vectorSuccessResponse $each: vectorSuccessResponse
} }
@@ -124,21 +120,21 @@ export async function generateAbstract(next = false): Promise<any> {
// 计费 // 计费
!userApiKey && !userApiKey &&
splitContents.length > 0 && vectorResponse.length > 0 &&
pushSplitDataBill({ pushSplitDataBill({
userId: dataItem.userId, userId: dataItem.userId,
type: 'abstract', type: 'abstract',
text: text:
systemPrompt.content + systemPrompt.content +
dataItem.text + dataItem.text +
rawContents.join('') + rawContent +
rawContents.join('').substring(0, Math.floor(dataItem.text.length / 10)) // 向量价格是gpt35的1/10 rawContent.substring(0, Math.floor(dataItem.text.length / 10)) // 向量价格是gpt35的1/10
}); });
console.log( console.log(
'生成摘要成功time:', '生成摘要成功time:',
`${(Date.now() - startTime) / 1000}s`, `${(Date.now() - startTime) / 1000}s`,
'摘要数量:', '摘要数量:',
splitContents.length vectorResponse.length
); );
} catch (error: any) { } catch (error: any) {
console.log('error: 生成摘要错误', dataItem?._id); console.log('error: 生成摘要错误', dataItem?._id);

View File

@@ -1,6 +1,7 @@
import { Schema, model, models, Model } from 'mongoose'; import { Schema, model, models, Model } from 'mongoose';
import { modelList } from '@/constants/model'; import { modelList } from '@/constants/model';
import { BillSchema as BillType } from '@/types/mongoSchema'; import { BillSchema as BillType } from '@/types/mongoSchema';
import { BillTypeMap } from '@/constants/user';
const BillSchema = new Schema({ const BillSchema = new Schema({
userId: { userId: {
@@ -10,7 +11,7 @@ const BillSchema = new Schema({
}, },
type: { type: {
type: String, type: String,
enum: ['chat', 'splitData', 'return'], enum: Object.keys(BillTypeMap),
required: true required: true
}, },
modelName: { modelName: {