mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-25 06:14:06 +00:00
feat: 摘要拆分
This commit is contained in:
@@ -3,12 +3,13 @@ import { RequestPaging } from '../types/index';
|
|||||||
import { Obj2Query } from '@/utils/tools';
|
import { Obj2Query } from '@/utils/tools';
|
||||||
import type { DataListItem } from '@/types/data';
|
import type { DataListItem } from '@/types/data';
|
||||||
import type { PagingData } from '../types/index';
|
import type { PagingData } from '../types/index';
|
||||||
import { DataItemSchema } from '@/types/mongoSchema';
|
import type { DataItemSchema } from '@/types/mongoSchema';
|
||||||
|
import type { CreateDataProps } from '@/pages/data/components/CreateDataModal';
|
||||||
|
|
||||||
export const getDataList = (data: RequestPaging) =>
|
export const getDataList = (data: RequestPaging) =>
|
||||||
GET<PagingData<DataListItem>>(`/data/getDataList?${Obj2Query(data)}`);
|
GET<PagingData<DataListItem>>(`/data/getDataList?${Obj2Query(data)}`);
|
||||||
|
|
||||||
export const postData = (name: string) => POST<string>(`/data/postData?name=${name}`);
|
export const postData = (data: CreateDataProps) => POST<string>(`/data/postData`, data);
|
||||||
|
|
||||||
export const postSplitData = (dataId: string, text: string) =>
|
export const postSplitData = (dataId: string, text: string) =>
|
||||||
POST(`/data/splitData`, { dataId, text });
|
POST(`/data/splitData`, { dataId, text });
|
||||||
|
6
src/constants/data.ts
Normal file
6
src/constants/data.ts
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
import type { DataType } from '@/types/data';
|
||||||
|
|
||||||
|
export const DataTypeTextMap: Record<DataType, string> = {
|
||||||
|
QA: '问答拆分',
|
||||||
|
abstract: '摘要总结'
|
||||||
|
};
|
@@ -1,6 +1,8 @@
|
|||||||
export enum BillTypeEnum {
|
export enum BillTypeEnum {
|
||||||
chat = 'chat',
|
chat = 'chat',
|
||||||
splitData = 'splitData',
|
splitData = 'splitData',
|
||||||
|
QA = 'QA',
|
||||||
|
abstract = 'abstract',
|
||||||
return = 'return'
|
return = 'return'
|
||||||
}
|
}
|
||||||
export enum PageTypeEnum {
|
export enum PageTypeEnum {
|
||||||
@@ -11,6 +13,8 @@ export enum PageTypeEnum {
|
|||||||
|
|
||||||
export const BillTypeMap: Record<`${BillTypeEnum}`, string> = {
|
export const BillTypeMap: Record<`${BillTypeEnum}`, string> = {
|
||||||
[BillTypeEnum.chat]: '对话',
|
[BillTypeEnum.chat]: '对话',
|
||||||
[BillTypeEnum.splitData]: '文本拆分',
|
[BillTypeEnum.splitData]: 'QA拆分',
|
||||||
|
[BillTypeEnum.QA]: 'QA拆分',
|
||||||
|
[BillTypeEnum.abstract]: '摘要总结',
|
||||||
[BillTypeEnum.return]: '退款'
|
[BillTypeEnum.return]: '退款'
|
||||||
};
|
};
|
||||||
|
@@ -2,11 +2,12 @@ import type { NextApiRequest, NextApiResponse } from 'next';
|
|||||||
import { jsonRes } from '@/service/response';
|
import { jsonRes } from '@/service/response';
|
||||||
import { connectToDatabase, Data } from '@/service/mongo';
|
import { connectToDatabase, Data } from '@/service/mongo';
|
||||||
import { authToken } from '@/service/utils/tools';
|
import { authToken } from '@/service/utils/tools';
|
||||||
|
import type { DataType } from '@/types/data';
|
||||||
|
|
||||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||||
try {
|
try {
|
||||||
let { name } = req.query as { name: string };
|
let { name, type } = req.body as { name: string; type: DataType };
|
||||||
if (!name) {
|
if (!name || !type) {
|
||||||
throw new Error('参数错误');
|
throw new Error('参数错误');
|
||||||
}
|
}
|
||||||
await connectToDatabase();
|
await connectToDatabase();
|
||||||
@@ -18,7 +19,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
|||||||
// 生成 data 集合
|
// 生成 data 集合
|
||||||
const data = await Data.create({
|
const data = await Data.create({
|
||||||
userId,
|
userId,
|
||||||
name
|
name,
|
||||||
|
type
|
||||||
});
|
});
|
||||||
|
|
||||||
jsonRes(res, {
|
jsonRes(res, {
|
||||||
|
@@ -1,9 +1,11 @@
|
|||||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||||
import { jsonRes } from '@/service/response';
|
import { jsonRes } from '@/service/response';
|
||||||
import { connectToDatabase, Data, DataItem } from '@/service/mongo';
|
import { connectToDatabase, DataItem, Data } from '@/service/mongo';
|
||||||
import { authToken } from '@/service/utils/tools';
|
import { authToken } from '@/service/utils/tools';
|
||||||
import { generateQA } from '@/service/events/generateQA';
|
import { generateQA } from '@/service/events/generateQA';
|
||||||
|
import { generateAbstract } from '@/service/events/generateAbstract';
|
||||||
|
|
||||||
|
/* 拆分数据成QA */
|
||||||
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
export default async function handler(req: NextApiRequest, res: NextApiResponse) {
|
||||||
try {
|
try {
|
||||||
let { text, dataId } = req.body as { text: string; dataId: string };
|
let { text, dataId } = req.body as { text: string; dataId: string };
|
||||||
@@ -17,14 +19,20 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
|||||||
|
|
||||||
const userId = await authToken(authorization);
|
const userId = await authToken(authorization);
|
||||||
|
|
||||||
|
const DataRecord = await Data.findById(dataId);
|
||||||
|
|
||||||
|
if (!DataRecord) {
|
||||||
|
throw new Error('找不到数据集');
|
||||||
|
}
|
||||||
|
|
||||||
const dataItems: any[] = [];
|
const dataItems: any[] = [];
|
||||||
|
|
||||||
// 格式化文本长度
|
// 每 1000 字符一组
|
||||||
for (let i = 0; i <= text.length / 1000; i++) {
|
for (let i = 0; i <= text.length / 1000; i++) {
|
||||||
dataItems.push({
|
dataItems.push({
|
||||||
temperature: 0,
|
|
||||||
userId,
|
userId,
|
||||||
dataId,
|
dataId,
|
||||||
|
type: DataRecord.type,
|
||||||
text: text.slice(i * 1000, (i + 1) * 1000),
|
text: text.slice(i * 1000, (i + 1) * 1000),
|
||||||
status: 1
|
status: 1
|
||||||
});
|
});
|
||||||
@@ -33,10 +41,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
|||||||
// 批量插入数据
|
// 批量插入数据
|
||||||
await DataItem.insertMany(dataItems);
|
await DataItem.insertMany(dataItems);
|
||||||
|
|
||||||
generateQA();
|
try {
|
||||||
|
generateQA();
|
||||||
|
generateAbstract();
|
||||||
|
} catch (error) {
|
||||||
|
error;
|
||||||
|
}
|
||||||
|
|
||||||
jsonRes(res, {
|
jsonRes(res, {
|
||||||
data: dataItems.length
|
data: ''
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
jsonRes(res, {
|
jsonRes(res, {
|
||||||
|
@@ -13,14 +13,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
|
|||||||
// await DataItem.updateMany(
|
// await DataItem.updateMany(
|
||||||
// {},
|
// {},
|
||||||
// {
|
// {
|
||||||
// times: 2
|
// type: 'QA'
|
||||||
|
// // times: 2
|
||||||
// }
|
// }
|
||||||
// );
|
// );
|
||||||
|
|
||||||
await Data.updateMany(
|
await Data.updateMany(
|
||||||
{},
|
{},
|
||||||
{
|
{
|
||||||
isDeleted: false
|
type: 'QA'
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@@ -8,10 +8,21 @@ import {
|
|||||||
ModalBody,
|
ModalBody,
|
||||||
ModalCloseButton,
|
ModalCloseButton,
|
||||||
Button,
|
Button,
|
||||||
Input
|
Input,
|
||||||
|
Select,
|
||||||
|
FormControl,
|
||||||
|
FormErrorMessage
|
||||||
} from '@chakra-ui/react';
|
} from '@chakra-ui/react';
|
||||||
import { postData } from '@/api/data';
|
import { postData } from '@/api/data';
|
||||||
import { useMutation } from '@tanstack/react-query';
|
import { useMutation } from '@tanstack/react-query';
|
||||||
|
import { useForm, SubmitHandler } from 'react-hook-form';
|
||||||
|
import { DataType } from '@/types/data';
|
||||||
|
import { DataTypeTextMap } from '@/constants/data';
|
||||||
|
|
||||||
|
export interface CreateDataProps {
|
||||||
|
name: string;
|
||||||
|
type: DataType;
|
||||||
|
}
|
||||||
|
|
||||||
const CreateDataModal = ({
|
const CreateDataModal = ({
|
||||||
onClose,
|
onClose,
|
||||||
@@ -21,9 +32,20 @@ const CreateDataModal = ({
|
|||||||
onSuccess: () => void;
|
onSuccess: () => void;
|
||||||
}) => {
|
}) => {
|
||||||
const [inputVal, setInputVal] = useState('');
|
const [inputVal, setInputVal] = useState('');
|
||||||
|
const {
|
||||||
|
getValues,
|
||||||
|
register,
|
||||||
|
handleSubmit,
|
||||||
|
formState: { errors }
|
||||||
|
} = useForm<CreateDataProps>({
|
||||||
|
defaultValues: {
|
||||||
|
name: '',
|
||||||
|
type: 'abstract'
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
const { isLoading, mutate } = useMutation({
|
const { isLoading, mutate } = useMutation({
|
||||||
mutationFn: (name: string) => postData(name),
|
mutationFn: (e: CreateDataProps) => postData(e),
|
||||||
onSuccess() {
|
onSuccess() {
|
||||||
onSuccess();
|
onSuccess();
|
||||||
onClose();
|
onClose();
|
||||||
@@ -37,23 +59,33 @@ const CreateDataModal = ({
|
|||||||
<ModalHeader>创建数据集</ModalHeader>
|
<ModalHeader>创建数据集</ModalHeader>
|
||||||
<ModalCloseButton />
|
<ModalCloseButton />
|
||||||
|
|
||||||
<ModalBody display={'flex'}>
|
<ModalBody>
|
||||||
<Input
|
<FormControl mb={8} isInvalid={!!errors.name}>
|
||||||
value={inputVal}
|
<Input
|
||||||
onChange={(e) => setInputVal(e.target.value)}
|
placeholder="数据集名称"
|
||||||
placeholder={'数据集名称'}
|
{...register('name', {
|
||||||
></Input>
|
required: '数据集名称不能为空'
|
||||||
|
})}
|
||||||
|
/>
|
||||||
|
<FormErrorMessage position={'absolute'} fontSize="xs">
|
||||||
|
{!!errors.name && errors.name.message}
|
||||||
|
</FormErrorMessage>
|
||||||
|
</FormControl>
|
||||||
|
<FormControl>
|
||||||
|
<Select placeholder="数据集类型" {...register('type', {})}>
|
||||||
|
{Object.entries(DataTypeTextMap).map(([key, value]) => (
|
||||||
|
<option key={key} value={key}>
|
||||||
|
{value}
|
||||||
|
</option>
|
||||||
|
))}
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
</ModalBody>
|
</ModalBody>
|
||||||
<ModalFooter>
|
<ModalFooter>
|
||||||
<Button colorScheme={'gray'} onClick={onClose}>
|
<Button colorScheme={'gray'} onClick={onClose}>
|
||||||
取消
|
取消
|
||||||
</Button>
|
</Button>
|
||||||
<Button
|
<Button ml={3} isLoading={isLoading} onClick={handleSubmit(mutate as any)}>
|
||||||
ml={3}
|
|
||||||
isDisabled={inputVal === ''}
|
|
||||||
isLoading={isLoading}
|
|
||||||
onClick={() => mutate(inputVal)}
|
|
||||||
>
|
|
||||||
确认
|
确认
|
||||||
</Button>
|
</Button>
|
||||||
</ModalFooter>
|
</ModalFooter>
|
||||||
|
@@ -22,6 +22,7 @@ import { useToast } from '@/hooks/useToast';
|
|||||||
import { useLoading } from '@/hooks/useLoading';
|
import { useLoading } from '@/hooks/useLoading';
|
||||||
import { formatPrice } from '@/utils/user';
|
import { formatPrice } from '@/utils/user';
|
||||||
import { modelList, ChatModelNameEnum } from '@/constants/model';
|
import { modelList, ChatModelNameEnum } from '@/constants/model';
|
||||||
|
import { encode, decode } from 'gpt-token-utils';
|
||||||
|
|
||||||
const fileExtension = '.txt,.doc,.docx,.pdf,.md';
|
const fileExtension = '.txt,.doc,.docx,.pdf,.md';
|
||||||
|
|
||||||
@@ -106,6 +107,7 @@ const ImportDataModal = ({
|
|||||||
.join('\n')
|
.join('\n')
|
||||||
.replace(/\n+/g, '\n');
|
.replace(/\n+/g, '\n');
|
||||||
setFileText(fileTexts);
|
setFileText(fileTexts);
|
||||||
|
console.log(encode(fileTexts));
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
toast({
|
toast({
|
||||||
@@ -161,7 +163,9 @@ const ImportDataModal = ({
|
|||||||
placeholder={'请粘贴或输入需要处理的文本'}
|
placeholder={'请粘贴或输入需要处理的文本'}
|
||||||
onChange={(e) => setTextInput(e.target.value)}
|
onChange={(e) => setTextInput(e.target.value)}
|
||||||
/>
|
/>
|
||||||
<Box mt={2}>一共 {textInput.length} 个字</Box>
|
<Box mt={2}>
|
||||||
|
一共 {textInput.length} 个字,{encode(textInput).length} 个tokens
|
||||||
|
</Box>
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
{activeTab === 'doc' && (
|
{activeTab === 'doc' && (
|
||||||
@@ -174,12 +178,15 @@ const ImportDataModal = ({
|
|||||||
border={'1px solid '}
|
border={'1px solid '}
|
||||||
borderColor={'blackAlpha.200'}
|
borderColor={'blackAlpha.200'}
|
||||||
borderRadius={'md'}
|
borderRadius={'md'}
|
||||||
|
fontSize={'sm'}
|
||||||
>
|
>
|
||||||
<Button onClick={onOpen}>选择文件</Button>
|
<Button onClick={onOpen}>选择文件</Button>
|
||||||
<Box mt={2}>支持 {fileExtension} 文件</Box>
|
<Box mt={2}>支持 {fileExtension} 文件</Box>
|
||||||
{fileText && (
|
{fileText && (
|
||||||
<>
|
<>
|
||||||
<Box mt={2}>一共 {fileText.length} 个字</Box>
|
<Box mt={2}>
|
||||||
|
一共 {fileText.length} 个字,{encode(fileText).length} 个tokens
|
||||||
|
</Box>
|
||||||
<Box
|
<Box
|
||||||
maxH={'300px'}
|
maxH={'300px'}
|
||||||
w={'100%'}
|
w={'100%'}
|
||||||
|
@@ -22,7 +22,7 @@ const DataDetail = ({ dataName, dataId }: { dataName: string; dataId: string })
|
|||||||
return (
|
return (
|
||||||
<Card py={4} h={'100%'} display={'flex'} flexDirection={'column'}>
|
<Card py={4} h={'100%'} display={'flex'} flexDirection={'column'}>
|
||||||
<Box px={6} fontSize={'xl'} fontWeight={'bold'}>
|
<Box px={6} fontSize={'xl'} fontWeight={'bold'}>
|
||||||
{dataName} 拆分结果
|
{dataName} 结果
|
||||||
</Box>
|
</Box>
|
||||||
<ScrollData
|
<ScrollData
|
||||||
flex={'1 0 0'}
|
flex={'1 0 0'}
|
||||||
@@ -38,8 +38,13 @@ const DataDetail = ({ dataName, dataId }: { dataName: string; dataId: string })
|
|||||||
<Box key={item._id}>
|
<Box key={item._id}>
|
||||||
{item.result.map((result, i) => (
|
{item.result.map((result, i) => (
|
||||||
<Box key={i} mb={3}>
|
<Box key={i} mb={3}>
|
||||||
<Box fontWeight={'bold'}>Q: {result.q}</Box>
|
{item.type === 'QA' && (
|
||||||
<Box>A: {result.a}</Box>
|
<>
|
||||||
|
<Box fontWeight={'bold'}>Q: {result.q}</Box>
|
||||||
|
<Box>A: {result.a}</Box>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
{item.type === 'abstract' && <Box fontSize={'sm'}>{result.abstract}</Box>}
|
||||||
</Box>
|
</Box>
|
||||||
))}
|
))}
|
||||||
</Box>
|
</Box>
|
||||||
|
@@ -28,13 +28,14 @@ import { useRouter } from 'next/router';
|
|||||||
import { useConfirm } from '@/hooks/useConfirm';
|
import { useConfirm } from '@/hooks/useConfirm';
|
||||||
import { useRequest } from '@/hooks/useRequest';
|
import { useRequest } from '@/hooks/useRequest';
|
||||||
import { DataItemSchema } from '@/types/mongoSchema';
|
import { DataItemSchema } from '@/types/mongoSchema';
|
||||||
|
import { DataTypeTextMap } from '@/constants/data';
|
||||||
import { customAlphabet } from 'nanoid';
|
import { customAlphabet } from 'nanoid';
|
||||||
const nanoid = customAlphabet('.,', 1);
|
const nanoid = customAlphabet('.,', 1);
|
||||||
|
|
||||||
const CreateDataModal = dynamic(() => import('./components/CreateDataModal'));
|
const CreateDataModal = dynamic(() => import('./components/CreateDataModal'));
|
||||||
const ImportDataModal = dynamic(() => import('./components/ImportDataModal'));
|
const ImportDataModal = dynamic(() => import('./components/ImportDataModal'));
|
||||||
|
|
||||||
export type ExportDataType = 'jsonl';
|
export type ExportDataType = 'jsonl' | 'txt';
|
||||||
|
|
||||||
const DataList = () => {
|
const DataList = () => {
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
@@ -84,21 +85,26 @@ const DataList = () => {
|
|||||||
let text = '';
|
let text = '';
|
||||||
// 生成 jsonl
|
// 生成 jsonl
|
||||||
data.forEach((item) => {
|
data.forEach((item) => {
|
||||||
const result = JSON.stringify({
|
if (res.type === 'jsonl' && item.q && item.a) {
|
||||||
prompt: `${item.q.toLocaleLowerCase()}${nanoid()}</s>`,
|
const result = JSON.stringify({
|
||||||
completion: ` ${item.a}###`
|
prompt: `${item.q.toLocaleLowerCase()}${nanoid()}</s>`,
|
||||||
});
|
completion: ` ${item.a}###`
|
||||||
text += `${result}\n`;
|
});
|
||||||
|
text += `${result}\n`;
|
||||||
|
} else if (res.type === 'txt' && item.abstract) {
|
||||||
|
text += `${item.abstract}\n`;
|
||||||
|
}
|
||||||
});
|
});
|
||||||
// 去掉最后一个 \n
|
// 去掉最后一个 \n
|
||||||
text = text.substring(0, text.length - 1);
|
text = text.substring(0, text.length - 1);
|
||||||
|
|
||||||
// 导出为文件
|
// 导出为文件
|
||||||
const blob = new Blob([text], { type: 'application/json;charset=utf-8' });
|
const blob = new Blob([text], { type: 'application/json;charset=utf-8' });
|
||||||
|
|
||||||
// 创建下载链接
|
// 创建下载链接
|
||||||
const downloadLink = document.createElement('a');
|
const downloadLink = document.createElement('a');
|
||||||
downloadLink.href = window.URL.createObjectURL(blob);
|
downloadLink.href = window.URL.createObjectURL(blob);
|
||||||
downloadLink.download = 'file.jsonl';
|
downloadLink.download = `data.${res.type}`;
|
||||||
|
|
||||||
// 添加链接到页面并触发下载
|
// 添加链接到页面并触发下载
|
||||||
document.body.appendChild(downloadLink);
|
document.body.appendChild(downloadLink);
|
||||||
@@ -138,6 +144,7 @@ const DataList = () => {
|
|||||||
<Thead>
|
<Thead>
|
||||||
<Tr>
|
<Tr>
|
||||||
<Th>集合名</Th>
|
<Th>集合名</Th>
|
||||||
|
<Th>类型</Th>
|
||||||
<Th>创建时间</Th>
|
<Th>创建时间</Th>
|
||||||
<Th>训练中 / 总数据</Th>
|
<Th>训练中 / 总数据</Th>
|
||||||
<Th></Th>
|
<Th></Th>
|
||||||
@@ -158,6 +165,7 @@ const DataList = () => {
|
|||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
</Td>
|
</Td>
|
||||||
|
<Td>{DataTypeTextMap[item.type || 'QA']}</Td>
|
||||||
<Td>{dayjs(item.createTime).format('YYYY/MM/DD HH:mm')}</Td>
|
<Td>{dayjs(item.createTime).format('YYYY/MM/DD HH:mm')}</Td>
|
||||||
<Td>
|
<Td>
|
||||||
{item.trainingData} / {item.totalData}
|
{item.trainingData} / {item.totalData}
|
||||||
@@ -187,9 +195,18 @@ const DataList = () => {
|
|||||||
导出
|
导出
|
||||||
</MenuButton>
|
</MenuButton>
|
||||||
<MenuList>
|
<MenuList>
|
||||||
<MenuItem onClick={() => handleExportData({ data: item, type: 'jsonl' })}>
|
{item.type === 'QA' && (
|
||||||
jsonl
|
<MenuItem
|
||||||
</MenuItem>
|
onClick={() => handleExportData({ data: item, type: 'jsonl' })}
|
||||||
|
>
|
||||||
|
jsonl
|
||||||
|
</MenuItem>
|
||||||
|
)}
|
||||||
|
{item.type === 'abstract' && (
|
||||||
|
<MenuItem onClick={() => handleExportData({ data: item, type: 'txt' })}>
|
||||||
|
txt
|
||||||
|
</MenuItem>
|
||||||
|
)}
|
||||||
</MenuList>
|
</MenuList>
|
||||||
</Menu>
|
</Menu>
|
||||||
|
|
||||||
|
@@ -97,7 +97,7 @@ const ModelEditForm = ({ formHooks }: { formHooks: UseFormReturn<ModelSchema> })
|
|||||||
<Box mb={1}>系统提示词</Box>
|
<Box mb={1}>系统提示词</Box>
|
||||||
<Textarea
|
<Textarea
|
||||||
rows={6}
|
rows={6}
|
||||||
maxLength={500}
|
maxLength={-1}
|
||||||
{...register('systemPrompt')}
|
{...register('systemPrompt')}
|
||||||
placeholder={
|
placeholder={
|
||||||
'模型默认的 prompt 词,通过调整该内容,可以生成一个限定范围的模型。\n\n注意,改功能会影响对话的整体朝向!'
|
'模型默认的 prompt 词,通过调整该内容,可以生成一个限定范围的模型。\n\n注意,改功能会影响对话的整体朝向!'
|
||||||
|
177
src/service/events/generateAbstract.ts
Normal file
177
src/service/events/generateAbstract.ts
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
import { DataItem } from '@/service/mongo';
|
||||||
|
import { getOpenAIApi } from '@/service/utils/chat';
|
||||||
|
import { httpsAgent, getOpenApiKey } from '@/service/utils/tools';
|
||||||
|
import type { ChatCompletionRequestMessage } from 'openai';
|
||||||
|
import { DataItemSchema } from '@/types/mongoSchema';
|
||||||
|
import { ChatModelNameEnum } from '@/constants/model';
|
||||||
|
import { pushSplitDataBill } from '@/service/events/pushBill';
|
||||||
|
|
||||||
|
export async function generateAbstract(next = false): Promise<any> {
|
||||||
|
if (global.generatingAbstract && !next) return;
|
||||||
|
global.generatingAbstract = true;
|
||||||
|
|
||||||
|
const systemPrompt: ChatCompletionRequestMessage = {
|
||||||
|
role: 'system',
|
||||||
|
content: `我会向你发送一段长文本,请从中总结出3~10个摘要,尽量详细,请按以下格式返回: "(1):"\n"(2):"\n"(3):"\n`
|
||||||
|
};
|
||||||
|
let dataItem: DataItemSchema | null = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// 找出一个需要生成的 dataItem
|
||||||
|
dataItem = await DataItem.findOne({
|
||||||
|
status: { $ne: 0 },
|
||||||
|
times: { $gt: 0 },
|
||||||
|
type: 'abstract'
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!dataItem) {
|
||||||
|
console.log('没有需要生成 【摘要】 的数据');
|
||||||
|
global.generatingAbstract = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 更新状态为生成中
|
||||||
|
await DataItem.findByIdAndUpdate(dataItem._id, {
|
||||||
|
status: 2
|
||||||
|
});
|
||||||
|
|
||||||
|
// 获取 openapi Key
|
||||||
|
let userApiKey, systemKey;
|
||||||
|
try {
|
||||||
|
const key = await getOpenApiKey(dataItem.userId);
|
||||||
|
userApiKey = key.userApiKey;
|
||||||
|
systemKey = key.systemKey;
|
||||||
|
} catch (error) {
|
||||||
|
// 余额不够了, 把用户所有记录改成闲置
|
||||||
|
await DataItem.updateMany({
|
||||||
|
userId: dataItem.userId,
|
||||||
|
status: 0
|
||||||
|
});
|
||||||
|
throw new Error('获取 openai key 失败');
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('正在生成一组摘要, ID:', dataItem._id);
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
// 获取 openai 请求实例
|
||||||
|
const chatAPI = getOpenAIApi(userApiKey || systemKey);
|
||||||
|
// 请求 chatgpt 获取摘要
|
||||||
|
const abstractResponse = await Promise.allSettled(
|
||||||
|
[0.5, 1].map((temperature) =>
|
||||||
|
chatAPI.createChatCompletion(
|
||||||
|
{
|
||||||
|
model: ChatModelNameEnum.GPT35,
|
||||||
|
temperature: temperature,
|
||||||
|
n: 1,
|
||||||
|
messages: [
|
||||||
|
systemPrompt,
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: dataItem?.text || ''
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timeout: 120000,
|
||||||
|
httpsAgent
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// 过滤出成功的响应
|
||||||
|
const successAbstracts = abstractResponse.filter((item) => item.status === 'fulfilled');
|
||||||
|
// 提取摘要内容
|
||||||
|
const rawContents: string[] = successAbstracts.map(
|
||||||
|
(item: any) => item?.value?.data.choices[0].message?.content || ''
|
||||||
|
);
|
||||||
|
// 从 content 中提取摘要内容
|
||||||
|
const splitContents = rawContents.map((content) => splitText(content)).flat();
|
||||||
|
|
||||||
|
// 生成词向量
|
||||||
|
const vectorResponse = await Promise.allSettled(
|
||||||
|
splitContents.map((item) =>
|
||||||
|
chatAPI.createEmbedding({
|
||||||
|
model: 'text-embedding-ada-002',
|
||||||
|
input: item.abstract
|
||||||
|
})
|
||||||
|
)
|
||||||
|
);
|
||||||
|
// 筛选成功的向量请求
|
||||||
|
const vectorSuccessResponse = vectorResponse
|
||||||
|
.map((item: any, i) => {
|
||||||
|
if (item.status !== 'fulfilled') return '';
|
||||||
|
return {
|
||||||
|
abstract: splitContents[i].abstract,
|
||||||
|
abstractVector: item?.value?.data?.data?.[0]?.embedding
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter((item) => item);
|
||||||
|
|
||||||
|
// 插入数据库,并修改状态
|
||||||
|
await DataItem.findByIdAndUpdate(dataItem._id, {
|
||||||
|
status: 0,
|
||||||
|
$push: {
|
||||||
|
rawResponse: {
|
||||||
|
$each: rawContents
|
||||||
|
},
|
||||||
|
result: {
|
||||||
|
$each: vectorSuccessResponse
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// 计费
|
||||||
|
!userApiKey &&
|
||||||
|
splitContents.length > 0 &&
|
||||||
|
pushSplitDataBill({
|
||||||
|
userId: dataItem.userId,
|
||||||
|
type: 'abstract',
|
||||||
|
text:
|
||||||
|
systemPrompt.content +
|
||||||
|
dataItem.text +
|
||||||
|
rawContents.join('') +
|
||||||
|
rawContents.join('').substring(0, Math.floor(dataItem.text.length / 10)) // 向量价格是gpt35的1/10
|
||||||
|
});
|
||||||
|
console.log(
|
||||||
|
'生成摘要成功,time:',
|
||||||
|
`${(Date.now() - startTime) / 1000}s`,
|
||||||
|
'摘要数量:',
|
||||||
|
splitContents.length
|
||||||
|
);
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log('error: 生成摘要错误', dataItem?._id);
|
||||||
|
console.log('response:', error);
|
||||||
|
if (dataItem?._id) {
|
||||||
|
await DataItem.findByIdAndUpdate(dataItem._id, {
|
||||||
|
status: dataItem.times > 0 ? 1 : 0, // 还有重试次数则可以继续进行
|
||||||
|
$inc: {
|
||||||
|
// 剩余尝试次数-1
|
||||||
|
times: -1
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
generateAbstract(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 检查文本是否按格式返回
|
||||||
|
*/
|
||||||
|
function splitText(text: string) {
|
||||||
|
const regex = /\(\d+\):(\s*)(.*)(\s*)/g;
|
||||||
|
const matches = text.matchAll(regex); // 获取所有匹配到的结果
|
||||||
|
|
||||||
|
const result = []; // 存储最终的结果
|
||||||
|
for (const match of matches) {
|
||||||
|
if (match[2]) {
|
||||||
|
result.push({
|
||||||
|
abstract: match[2] as string
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
@@ -20,7 +20,8 @@ export async function generateQA(next = false): Promise<any> {
|
|||||||
// 找出一个需要生成的 dataItem
|
// 找出一个需要生成的 dataItem
|
||||||
dataItem = await DataItem.findOne({
|
dataItem = await DataItem.findOne({
|
||||||
status: { $ne: 0 },
|
status: { $ne: 0 },
|
||||||
times: { $gt: 0 }
|
times: { $gt: 0 },
|
||||||
|
type: 'QA'
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!dataItem) {
|
if (!dataItem) {
|
||||||
@@ -49,62 +50,72 @@ export async function generateQA(next = false): Promise<any> {
|
|||||||
throw new Error('获取 openai key 失败');
|
throw new Error('获取 openai key 失败');
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('正在生成一个QA, ID:', dataItem._id, 'temperature: ', dataItem.temperature / 100);
|
console.log('正在生成一组QA, ID:', dataItem._id);
|
||||||
|
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
|
|
||||||
// 获取 openai 请求实例
|
// 获取 openai 请求实例
|
||||||
const chatAPI = getOpenAIApi(userApiKey || systemKey);
|
const chatAPI = getOpenAIApi(userApiKey || systemKey);
|
||||||
// 请求 chatgpt 获取回答
|
// 请求 chatgpt 获取回答
|
||||||
const response = await chatAPI.createChatCompletion(
|
const response = await Promise.allSettled(
|
||||||
{
|
[0, 0.5, 0.8].map((temperature) =>
|
||||||
model: ChatModelNameEnum.GPT35,
|
chatAPI.createChatCompletion(
|
||||||
temperature: dataItem.temperature / 100,
|
|
||||||
n: 1,
|
|
||||||
messages: [
|
|
||||||
systemPrompt,
|
|
||||||
{
|
{
|
||||||
role: 'user',
|
model: ChatModelNameEnum.GPT35,
|
||||||
content: dataItem.text
|
temperature: temperature,
|
||||||
|
n: 1,
|
||||||
|
messages: [
|
||||||
|
systemPrompt,
|
||||||
|
{
|
||||||
|
role: 'user',
|
||||||
|
content: dataItem?.text || ''
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
timeout: 120000,
|
||||||
|
httpsAgent
|
||||||
}
|
}
|
||||||
]
|
)
|
||||||
},
|
)
|
||||||
{
|
);
|
||||||
timeout: 120000,
|
// 过滤出成功的响应
|
||||||
httpsAgent
|
const successResponse = response.filter((item) => item.status === 'fulfilled');
|
||||||
}
|
// 提取响应内容
|
||||||
|
const rawContents: string[] = successResponse.map(
|
||||||
|
(item: any) => item?.value?.data.choices[0].message?.content || ''
|
||||||
);
|
);
|
||||||
const content = response.data.choices[0].message?.content;
|
|
||||||
// 从 content 中提取 QA
|
// 从 content 中提取 QA
|
||||||
const splitResponse = splitText(content || '');
|
const splitResponses = rawContents.map((content) => splitText(content)).flat();
|
||||||
// 插入数据库,并修改状态
|
// 插入数据库,并修改状态
|
||||||
await DataItem.findByIdAndUpdate(dataItem._id, {
|
await DataItem.findByIdAndUpdate(dataItem._id, {
|
||||||
status: dataItem.temperature >= 90 ? 0 : 1, // 需要生成 4 组内容。0,0.3,0.6,0.9
|
status: 0,
|
||||||
temperature: dataItem.temperature >= 90 ? dataItem.temperature : dataItem.temperature + 30,
|
|
||||||
$push: {
|
$push: {
|
||||||
rawResponse: content,
|
rawResponse: {
|
||||||
|
$each: rawContents
|
||||||
|
},
|
||||||
result: {
|
result: {
|
||||||
$each: splitResponse
|
$each: splitResponses
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
// 计费
|
// 计费
|
||||||
!userApiKey &&
|
!userApiKey &&
|
||||||
splitResponse.length > 0 &&
|
splitResponses.length > 0 &&
|
||||||
pushSplitDataBill({
|
pushSplitDataBill({
|
||||||
userId: dataItem.userId,
|
userId: dataItem.userId,
|
||||||
text: systemPrompt.content + dataItem.text + content
|
type: 'QA',
|
||||||
|
text: systemPrompt.content + dataItem.text + rawContents.join('')
|
||||||
});
|
});
|
||||||
console.log(
|
console.log(
|
||||||
'生成QA成功,time:',
|
'生成QA成功,time:',
|
||||||
`${(Date.now() - startTime) / 1000}s`,
|
`${(Date.now() - startTime) / 1000}s`,
|
||||||
'QA数量:',
|
'QA数量:',
|
||||||
splitResponse.length
|
splitResponses.length
|
||||||
);
|
);
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.log('error: 生成QA错误', dataItem?._id);
|
console.log('error: 生成QA错误', dataItem?._id);
|
||||||
console.log('response:', error?.response);
|
console.log('response:', error?.response);
|
||||||
// 重置状态
|
|
||||||
if (dataItem?._id) {
|
if (dataItem?._id) {
|
||||||
await DataItem.findByIdAndUpdate(dataItem._id, {
|
await DataItem.findByIdAndUpdate(dataItem._id, {
|
||||||
status: dataItem.times > 0 ? 1 : 0, // 还有重试次数则可以继续进行
|
status: dataItem.times > 0 ? 1 : 0, // 还有重试次数则可以继续进行
|
||||||
|
@@ -2,6 +2,7 @@ import { connectToDatabase, Bill, User } from '../mongo';
|
|||||||
import { modelList, ChatModelNameEnum } from '@/constants/model';
|
import { modelList, ChatModelNameEnum } from '@/constants/model';
|
||||||
import { encode } from 'gpt-token-utils';
|
import { encode } from 'gpt-token-utils';
|
||||||
import { formatPrice } from '@/utils/user';
|
import { formatPrice } from '@/utils/user';
|
||||||
|
import type { DataType } from '@/types/data';
|
||||||
|
|
||||||
export const pushChatBill = async ({
|
export const pushChatBill = async ({
|
||||||
modelName,
|
modelName,
|
||||||
@@ -59,7 +60,15 @@ export const pushChatBill = async ({
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
export const pushSplitDataBill = async ({ userId, text }: { userId: string; text: string }) => {
|
export const pushSplitDataBill = async ({
|
||||||
|
userId,
|
||||||
|
text,
|
||||||
|
type
|
||||||
|
}: {
|
||||||
|
userId: string;
|
||||||
|
text: string;
|
||||||
|
type: DataType;
|
||||||
|
}) => {
|
||||||
await connectToDatabase();
|
await connectToDatabase();
|
||||||
|
|
||||||
let billId;
|
let billId;
|
||||||
@@ -83,7 +92,7 @@ export const pushSplitDataBill = async ({ userId, text }: { userId: string; text
|
|||||||
// 插入 Bill 记录
|
// 插入 Bill 记录
|
||||||
const res = await Bill.create({
|
const res = await Bill.create({
|
||||||
userId,
|
userId,
|
||||||
type: 'splitData',
|
type,
|
||||||
modelName: ChatModelNameEnum.GPT35,
|
modelName: ChatModelNameEnum.GPT35,
|
||||||
textLen: text.length,
|
textLen: text.length,
|
||||||
tokenLen: tokens.length,
|
tokenLen: tokens.length,
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
import { Schema, model, models, Model } from 'mongoose';
|
import { Schema, model, models, Model } from 'mongoose';
|
||||||
import { DataItemSchema as Datatype } from '@/types/mongoSchema';
|
import { DataSchema as Datatype } from '@/types/mongoSchema';
|
||||||
|
import { DataTypeTextMap } from '@/constants/data';
|
||||||
|
|
||||||
const DataSchema = new Schema({
|
const DataSchema = new Schema({
|
||||||
userId: {
|
userId: {
|
||||||
@@ -15,6 +16,11 @@ const DataSchema = new Schema({
|
|||||||
type: Date,
|
type: Date,
|
||||||
default: () => new Date()
|
default: () => new Date()
|
||||||
},
|
},
|
||||||
|
type: {
|
||||||
|
type: String,
|
||||||
|
required: true,
|
||||||
|
enum: Object.keys(DataTypeTextMap)
|
||||||
|
},
|
||||||
isDeleted: {
|
isDeleted: {
|
||||||
type: Boolean,
|
type: Boolean,
|
||||||
default: false
|
default: false
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
import type { DataItemSchema as DataItemType } from '@/types/mongoSchema';
|
import type { DataItemSchema as DataItemType } from '@/types/mongoSchema';
|
||||||
import { Schema, model, models, Model } from 'mongoose';
|
import { Schema, model, models, Model } from 'mongoose';
|
||||||
|
import { DataTypeTextMap } from '@/constants/data';
|
||||||
|
|
||||||
const DataItemSchema = new Schema({
|
const DataItemSchema = new Schema({
|
||||||
userId: {
|
userId: {
|
||||||
@@ -12,19 +13,23 @@ const DataItemSchema = new Schema({
|
|||||||
ref: 'data',
|
ref: 'data',
|
||||||
required: true
|
required: true
|
||||||
},
|
},
|
||||||
|
type: {
|
||||||
|
type: String,
|
||||||
|
required: true,
|
||||||
|
enum: Object.keys(DataTypeTextMap)
|
||||||
|
},
|
||||||
times: {
|
times: {
|
||||||
|
// 剩余重试次数
|
||||||
type: Number,
|
type: Number,
|
||||||
default: 3
|
default: 3
|
||||||
},
|
},
|
||||||
text: {
|
text: {
|
||||||
|
// 文本内容
|
||||||
type: String,
|
type: String,
|
||||||
required: true
|
required: true
|
||||||
},
|
},
|
||||||
temperature: {
|
|
||||||
type: Number,
|
|
||||||
required: true
|
|
||||||
},
|
|
||||||
rawResponse: {
|
rawResponse: {
|
||||||
|
// 原始拆分结果
|
||||||
type: [String],
|
type: [String],
|
||||||
default: []
|
default: []
|
||||||
},
|
},
|
||||||
@@ -33,11 +38,21 @@ const DataItemSchema = new Schema({
|
|||||||
{
|
{
|
||||||
q: {
|
q: {
|
||||||
type: String,
|
type: String,
|
||||||
required: true
|
default: ''
|
||||||
},
|
},
|
||||||
a: {
|
a: {
|
||||||
type: String,
|
type: String,
|
||||||
required: true
|
default: ''
|
||||||
|
},
|
||||||
|
abstract: {
|
||||||
|
// 摘要
|
||||||
|
type: String,
|
||||||
|
default: ''
|
||||||
|
},
|
||||||
|
abstractVector: {
|
||||||
|
// 摘要对应的向量
|
||||||
|
type: [Number],
|
||||||
|
default: []
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@@ -1,5 +1,7 @@
|
|||||||
import mongoose from 'mongoose';
|
import mongoose from 'mongoose';
|
||||||
import { generateQA } from './events/generateQA';
|
import { generateQA } from './events/generateQA';
|
||||||
|
import { generateAbstract } from './events/generateAbstract';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 连接 MongoDB 数据库
|
* 连接 MongoDB 数据库
|
||||||
*/
|
*/
|
||||||
@@ -24,8 +26,8 @@ export async function connectToDatabase(): Promise<void> {
|
|||||||
global.mongodb = null;
|
global.mongodb = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 递归 QA 生成
|
|
||||||
generateQA();
|
generateQA();
|
||||||
|
generateAbstract();
|
||||||
}
|
}
|
||||||
|
|
||||||
export * from './models/authCode';
|
export * from './models/authCode';
|
||||||
|
2
src/types/data.d.ts
vendored
2
src/types/data.d.ts
vendored
@@ -1,5 +1,7 @@
|
|||||||
import type { DataSchema } from './mongoSchema';
|
import type { DataSchema } from './mongoSchema';
|
||||||
|
|
||||||
|
export type DataType = 'QA' | 'abstract';
|
||||||
|
|
||||||
export interface DataListItem extends DataSchema {
|
export interface DataListItem extends DataSchema {
|
||||||
trainingData: number;
|
trainingData: number;
|
||||||
totalData: number;
|
totalData: number;
|
||||||
|
1
src/types/index.d.ts
vendored
1
src/types/index.d.ts
vendored
@@ -3,6 +3,7 @@ import type { Mongoose } from 'mongoose';
|
|||||||
declare global {
|
declare global {
|
||||||
var mongodb: Mongoose | string | null;
|
var mongodb: Mongoose | string | null;
|
||||||
var generatingQA: boolean;
|
var generatingQA: boolean;
|
||||||
|
var generatingAbstract: boolean;
|
||||||
var QRCode: any;
|
var QRCode: any;
|
||||||
interface Window {
|
interface Window {
|
||||||
['pdfjs-dist/build/pdf']: any;
|
['pdfjs-dist/build/pdf']: any;
|
||||||
|
9
src/types/mongoSchema.d.ts
vendored
9
src/types/mongoSchema.d.ts
vendored
@@ -1,5 +1,6 @@
|
|||||||
import type { ChatItemType } from './chat';
|
import type { ChatItemType } from './chat';
|
||||||
import { ModelStatusEnum, TrainingStatusEnum, ChatModelNameEnum } from '@/constants/model';
|
import { ModelStatusEnum, TrainingStatusEnum, ChatModelNameEnum } from '@/constants/model';
|
||||||
|
import type { DataType } from './data';
|
||||||
|
|
||||||
export type ServiceName = 'openai';
|
export type ServiceName = 'openai';
|
||||||
|
|
||||||
@@ -102,19 +103,21 @@ export interface DataSchema {
|
|||||||
userId: string;
|
userId: string;
|
||||||
name: string;
|
name: string;
|
||||||
createTime: string;
|
createTime: string;
|
||||||
|
type: DataType;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface DataItemSchema {
|
export interface DataItemSchema {
|
||||||
_id: string;
|
_id: string;
|
||||||
userId: string;
|
userId: string;
|
||||||
dataId: string;
|
dataId: string;
|
||||||
|
type: DataType;
|
||||||
times: number;
|
times: number;
|
||||||
temperature: number;
|
|
||||||
text: string;
|
text: string;
|
||||||
rawResponse: string[];
|
rawResponse: string[];
|
||||||
result: {
|
result: {
|
||||||
q: string;
|
q?: string;
|
||||||
a: string;
|
a?: string;
|
||||||
|
abstract?: string;
|
||||||
}[];
|
}[];
|
||||||
status: 0 | 1 | 2;
|
status: 0 | 1 | 2;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user