mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 13:03:50 +00:00
feat: kb data source
This commit is contained in:
@@ -8,8 +8,9 @@ CREATE TABLE IF NOT EXISTS modelData (
|
|||||||
vector VECTOR(1536) NOT NULL,
|
vector VECTOR(1536) NOT NULL,
|
||||||
user_id VARCHAR(50) NOT NULL,
|
user_id VARCHAR(50) NOT NULL,
|
||||||
kb_id VARCHAR(50) NOT NULL,
|
kb_id VARCHAR(50) NOT NULL,
|
||||||
|
source VARCHAR(100),
|
||||||
q TEXT NOT NULL,
|
q TEXT NOT NULL,
|
||||||
a TEXT NOT NULL
|
a TEXT NOT NULL,
|
||||||
);
|
);
|
||||||
-- 索引设置,按需取
|
-- 索引设置,按需取
|
||||||
-- CREATE INDEX IF NOT EXISTS modelData_userId_index ON modelData USING HASH (user_id);
|
-- CREATE INDEX IF NOT EXISTS modelData_userId_index ON modelData USING HASH (user_id);
|
||||||
|
@@ -4,7 +4,6 @@ import type { ImageProps } from '@chakra-ui/react';
|
|||||||
import { LOGO_ICON } from '@/constants/chat';
|
import { LOGO_ICON } from '@/constants/chat';
|
||||||
|
|
||||||
const Avatar = ({ w = '30px', ...props }: ImageProps) => {
|
const Avatar = ({ w = '30px', ...props }: ImageProps) => {
|
||||||
console.log(props.src);
|
|
||||||
return (
|
return (
|
||||||
<Image
|
<Image
|
||||||
fallbackSrc={LOGO_ICON}
|
fallbackSrc={LOGO_ICON}
|
||||||
|
@@ -1,5 +1,4 @@
|
|||||||
import type { NextApiRequest, NextApiResponse } from 'next';
|
import type { NextApiRequest, NextApiResponse } from 'next';
|
||||||
import type { KbDataItemType } from '@/types/plugin';
|
|
||||||
import { jsonRes } from '@/service/response';
|
import { jsonRes } from '@/service/response';
|
||||||
import { connectToDatabase, TrainingData } from '@/service/mongo';
|
import { connectToDatabase, TrainingData } from '@/service/mongo';
|
||||||
import { authUser } from '@/service/utils/auth';
|
import { authUser } from '@/service/utils/auth';
|
||||||
@@ -9,9 +8,11 @@ import { TrainingModeEnum } from '@/constants/plugin';
|
|||||||
import { startQueue } from '@/service/utils/tools';
|
import { startQueue } from '@/service/utils/tools';
|
||||||
import { PgClient } from '@/service/pg';
|
import { PgClient } from '@/service/pg';
|
||||||
|
|
||||||
|
type DateItemType = { a: string; q: string; source?: string };
|
||||||
|
|
||||||
export type Props = {
|
export type Props = {
|
||||||
kbId: string;
|
kbId: string;
|
||||||
data: { a: KbDataItemType['a']; q: KbDataItemType['q'] }[];
|
data: DateItemType[];
|
||||||
mode: `${TrainingModeEnum}`;
|
mode: `${TrainingModeEnum}`;
|
||||||
prompt?: string;
|
prompt?: string;
|
||||||
};
|
};
|
||||||
@@ -63,10 +64,7 @@ export async function pushDataToKb({
|
|||||||
|
|
||||||
// 过滤重复的 qa 内容
|
// 过滤重复的 qa 内容
|
||||||
const set = new Set();
|
const set = new Set();
|
||||||
const filterData: {
|
const filterData: DateItemType[] = [];
|
||||||
a: string;
|
|
||||||
q: string;
|
|
||||||
}[] = [];
|
|
||||||
|
|
||||||
data.forEach((item) => {
|
data.forEach((item) => {
|
||||||
const text = item.q + item.a;
|
const text = item.q + item.a;
|
||||||
@@ -79,11 +77,12 @@ export async function pushDataToKb({
|
|||||||
// 数据库去重
|
// 数据库去重
|
||||||
const insertData = (
|
const insertData = (
|
||||||
await Promise.allSettled(
|
await Promise.allSettled(
|
||||||
filterData.map(async ({ q, a = '' }) => {
|
filterData.map(async ({ q, a = '', source }) => {
|
||||||
if (mode !== TrainingModeEnum.index) {
|
if (mode !== TrainingModeEnum.index) {
|
||||||
return Promise.resolve({
|
return Promise.resolve({
|
||||||
q,
|
q,
|
||||||
a
|
a,
|
||||||
|
source
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -112,19 +111,21 @@ export async function pushDataToKb({
|
|||||||
}
|
}
|
||||||
return Promise.resolve({
|
return Promise.resolve({
|
||||||
q,
|
q,
|
||||||
a
|
a,
|
||||||
|
source
|
||||||
});
|
});
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
.filter((item) => item.status === 'fulfilled')
|
.filter((item) => item.status === 'fulfilled')
|
||||||
.map<{ q: string; a: string }>((item: any) => item.value);
|
.map<DateItemType>((item: any) => item.value);
|
||||||
|
|
||||||
// 插入记录
|
// 插入记录
|
||||||
await TrainingData.insertMany(
|
await TrainingData.insertMany(
|
||||||
insertData.map((item) => ({
|
insertData.map((item) => ({
|
||||||
q: item.q,
|
q: item.q,
|
||||||
a: item.a,
|
a: item.a,
|
||||||
|
source: item.source,
|
||||||
userId,
|
userId,
|
||||||
kbId,
|
kbId,
|
||||||
mode,
|
mode,
|
||||||
|
@@ -32,6 +32,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
|
|||||||
await PgClient.update('modelData', {
|
await PgClient.update('modelData', {
|
||||||
where: [['id', dataId], 'AND', ['user_id', userId]],
|
where: [['id', dataId], 'AND', ['user_id', userId]],
|
||||||
values: [
|
values: [
|
||||||
|
{ key: 'source', value: '手动修改' },
|
||||||
{ key: 'a', value: a.replace(/'/g, '"') },
|
{ key: 'a', value: a.replace(/'/g, '"') },
|
||||||
...(q
|
...(q
|
||||||
? [
|
? [
|
||||||
|
@@ -3,7 +3,7 @@ import { jsonRes } from '@/service/response';
|
|||||||
import { connectToDatabase } from '@/service/mongo';
|
import { connectToDatabase } from '@/service/mongo';
|
||||||
import { authUser } from '@/service/utils/auth';
|
import { authUser } from '@/service/utils/auth';
|
||||||
import { PgClient } from '@/service/pg';
|
import { PgClient } from '@/service/pg';
|
||||||
import type { PgKBDataItemType } from '@/types/pg';
|
import type { KbDataItemType } from '@/types/plugin';
|
||||||
|
|
||||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||||
try {
|
try {
|
||||||
@@ -21,8 +21,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||||||
|
|
||||||
const where: any = [['user_id', userId], 'AND', ['id', dataId]];
|
const where: any = [['user_id', userId], 'AND', ['id', dataId]];
|
||||||
|
|
||||||
const searchRes = await PgClient.select<PgKBDataItemType>('modelData', {
|
const searchRes = await PgClient.select<KbDataItemType>('modelData', {
|
||||||
fields: ['id', 'q', 'a'],
|
fields: ['id', 'q', 'a', 'source'],
|
||||||
where,
|
where,
|
||||||
limit: 1
|
limit: 1
|
||||||
});
|
});
|
||||||
|
@@ -3,7 +3,7 @@ import { jsonRes } from '@/service/response';
|
|||||||
import { connectToDatabase } from '@/service/mongo';
|
import { connectToDatabase } from '@/service/mongo';
|
||||||
import { authUser } from '@/service/utils/auth';
|
import { authUser } from '@/service/utils/auth';
|
||||||
import { PgClient } from '@/service/pg';
|
import { PgClient } from '@/service/pg';
|
||||||
import type { PgKBDataItemType } from '@/types/pg';
|
import type { KbDataItemType } from '@/types/plugin';
|
||||||
|
|
||||||
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
|
||||||
try {
|
try {
|
||||||
@@ -31,11 +31,16 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
|
|||||||
['user_id', userId],
|
['user_id', userId],
|
||||||
'AND',
|
'AND',
|
||||||
['kb_id', kbId],
|
['kb_id', kbId],
|
||||||
...(searchText ? ['AND', `(q LIKE '%${searchText}%' OR a LIKE '%${searchText}%')`] : [])
|
...(searchText
|
||||||
|
? [
|
||||||
|
'AND',
|
||||||
|
`(q LIKE '%${searchText}%' OR a LIKE '%${searchText}%' OR source LIKE '%${searchText}%')`
|
||||||
|
]
|
||||||
|
: [])
|
||||||
];
|
];
|
||||||
|
|
||||||
const searchRes = await PgClient.select<PgKBDataItemType>('modelData', {
|
const searchRes = await PgClient.select<KbDataItemType>('modelData', {
|
||||||
fields: ['id', 'q', 'a'],
|
fields: ['id', 'q', 'a', 'source'],
|
||||||
where,
|
where,
|
||||||
order: [{ field: 'id', mode: 'DESC' }],
|
order: [{ field: 'id', mode: 'DESC' }],
|
||||||
limit: pageSize,
|
limit: pageSize,
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
import React, { useCallback, useState, useRef } from 'react';
|
import React, { useCallback, useState, useRef, useEffect } from 'react';
|
||||||
import {
|
import {
|
||||||
Box,
|
Box,
|
||||||
TableContainer,
|
TableContainer,
|
||||||
@@ -56,7 +56,7 @@ const DataCard = ({ kbId }: { kbId: string }) => {
|
|||||||
const { toast } = useToast();
|
const { toast } = useToast();
|
||||||
|
|
||||||
const {
|
const {
|
||||||
data: modelDataList,
|
data: kbDataList,
|
||||||
isLoading,
|
isLoading,
|
||||||
Pagination,
|
Pagination,
|
||||||
total,
|
total,
|
||||||
@@ -72,11 +72,6 @@ const DataCard = ({ kbId }: { kbId: string }) => {
|
|||||||
defaultRequest: false
|
defaultRequest: false
|
||||||
});
|
});
|
||||||
|
|
||||||
useQuery(['getKbData', kbId], () => {
|
|
||||||
getData(1);
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
|
|
||||||
const [editInputData, setEditInputData] = useState<InputDataType>();
|
const [editInputData, setEditInputData] = useState<InputDataType>();
|
||||||
|
|
||||||
const {
|
const {
|
||||||
@@ -101,20 +96,14 @@ const DataCard = ({ kbId }: { kbId: string }) => {
|
|||||||
);
|
);
|
||||||
|
|
||||||
const refetchData = useCallback(
|
const refetchData = useCallback(
|
||||||
(num = 1) => {
|
(num = pageNum) => {
|
||||||
getData(num);
|
getData(num);
|
||||||
refetch();
|
refetch();
|
||||||
return null;
|
return null;
|
||||||
},
|
},
|
||||||
[getData, refetch]
|
[getData, pageNum, refetch]
|
||||||
);
|
);
|
||||||
|
|
||||||
// interval get data
|
|
||||||
useQuery(['refetchData'], () => refetchData(pageNum), {
|
|
||||||
refetchInterval: 5000,
|
|
||||||
enabled: qaListLen > 0 || vectorListLen > 0
|
|
||||||
});
|
|
||||||
|
|
||||||
// get al data and export csv
|
// get al data and export csv
|
||||||
const { mutate: onclickExport, isLoading: isLoadingExport = false } = useMutation({
|
const { mutate: onclickExport, isLoading: isLoadingExport = false } = useMutation({
|
||||||
mutationFn: () => getExportDataList(kbId),
|
mutationFn: () => getExportDataList(kbId),
|
||||||
@@ -148,6 +137,17 @@ const DataCard = ({ kbId }: { kbId: string }) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// interval get data
|
||||||
|
useQuery(['refetchData'], () => refetchData(1), {
|
||||||
|
refetchInterval: 5000,
|
||||||
|
enabled: qaListLen > 0 || vectorListLen > 0
|
||||||
|
});
|
||||||
|
useQuery(['getKbData', kbId], () => {
|
||||||
|
setSearchText('');
|
||||||
|
getData(1);
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Box position={'relative'}>
|
<Box position={'relative'}>
|
||||||
<Flex>
|
<Flex>
|
||||||
@@ -239,18 +239,22 @@ const DataCard = ({ kbId }: { kbId: string }) => {
|
|||||||
</Tooltip>
|
</Tooltip>
|
||||||
</Th>
|
</Th>
|
||||||
<Th>补充知识</Th>
|
<Th>补充知识</Th>
|
||||||
|
<Th>来源</Th>
|
||||||
<Th>操作</Th>
|
<Th>操作</Th>
|
||||||
</Tr>
|
</Tr>
|
||||||
</Thead>
|
</Thead>
|
||||||
<Tbody>
|
<Tbody>
|
||||||
{modelDataList.map((item) => (
|
{kbDataList.map((item) => (
|
||||||
<Tr key={item.id}>
|
<Tr key={item.id} fontSize={'sm'}>
|
||||||
<Td>
|
<Td>
|
||||||
<Box {...tdStyles.current}>{item.q}</Box>
|
<Box {...tdStyles.current}>{item.q}</Box>
|
||||||
</Td>
|
</Td>
|
||||||
<Td>
|
<Td>
|
||||||
<Box {...tdStyles.current}>{item.a || '-'}</Box>
|
<Box {...tdStyles.current}>{item.a || '-'}</Box>
|
||||||
</Td>
|
</Td>
|
||||||
|
<Td maxW={'15%'} whiteSpace={'pre-wrap'} userSelect={'all'}>
|
||||||
|
{item.source?.trim() || '-'}
|
||||||
|
</Td>
|
||||||
<Td>
|
<Td>
|
||||||
<IconButton
|
<IconButton
|
||||||
mr={5}
|
mr={5}
|
||||||
|
@@ -56,13 +56,14 @@ const InputDataModal = ({
|
|||||||
try {
|
try {
|
||||||
const { insertLen } = await postKbDataFromList({
|
const { insertLen } = await postKbDataFromList({
|
||||||
kbId,
|
kbId,
|
||||||
|
mode: TrainingModeEnum.index,
|
||||||
data: [
|
data: [
|
||||||
{
|
{
|
||||||
a: e.a,
|
a: e.a,
|
||||||
q: e.q
|
q: e.q,
|
||||||
|
source: '手动录入'
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
mode: TrainingModeEnum.index
|
|
||||||
});
|
});
|
||||||
|
|
||||||
if (insertLen === 0) {
|
if (insertLen === 0) {
|
||||||
|
@@ -37,6 +37,7 @@ const SelectJsonModal = ({
|
|||||||
const { toast } = useToast();
|
const { toast } = useToast();
|
||||||
const { File, onOpen } = useSelectFile({ fileType: '.csv', multiple: false });
|
const { File, onOpen } = useSelectFile({ fileType: '.csv', multiple: false });
|
||||||
const [fileData, setFileData] = useState<{ q: string; a: string }[]>([]);
|
const [fileData, setFileData] = useState<{ q: string; a: string }[]>([]);
|
||||||
|
const [fileName, setFileName] = useState('');
|
||||||
const [successData, setSuccessData] = useState(0);
|
const [successData, setSuccessData] = useState(0);
|
||||||
const { openConfirm, ConfirmChild } = useConfirm({
|
const { openConfirm, ConfirmChild } = useConfirm({
|
||||||
content: '确认导入该数据集?'
|
content: '确认导入该数据集?'
|
||||||
@@ -46,6 +47,7 @@ const SelectJsonModal = ({
|
|||||||
async (e: File[]) => {
|
async (e: File[]) => {
|
||||||
const file = e[0];
|
const file = e[0];
|
||||||
setSelecting(true);
|
setSelecting(true);
|
||||||
|
setFileName(file.name);
|
||||||
try {
|
try {
|
||||||
const { header, data } = await readCsvContent(file);
|
const { header, data } = await readCsvContent(file);
|
||||||
if (header[0] !== 'question' || header[1] !== 'answer') {
|
if (header[0] !== 'question' || header[1] !== 'answer') {
|
||||||
@@ -75,11 +77,14 @@ const SelectJsonModal = ({
|
|||||||
let success = 0;
|
let success = 0;
|
||||||
|
|
||||||
// subsection import
|
// subsection import
|
||||||
const step = 50;
|
const step = 100;
|
||||||
for (let i = 0; i < fileData.length; i += step) {
|
for (let i = 0; i < fileData.length; i += step) {
|
||||||
const { insertLen } = await postKbDataFromList({
|
const { insertLen } = await postKbDataFromList({
|
||||||
kbId,
|
kbId,
|
||||||
data: fileData.slice(i, i + step),
|
data: fileData.slice(i, i + step).map((item) => ({
|
||||||
|
...item,
|
||||||
|
source: fileName
|
||||||
|
})),
|
||||||
mode: TrainingModeEnum.index
|
mode: TrainingModeEnum.index
|
||||||
});
|
});
|
||||||
success += insertLen || 0;
|
success += insertLen || 0;
|
||||||
@@ -129,13 +134,14 @@ const SelectJsonModal = ({
|
|||||||
>
|
>
|
||||||
点击下载csv模板
|
点击下载csv模板
|
||||||
</Box>
|
</Box>
|
||||||
<Flex alignItems={'center'}>
|
<Box>
|
||||||
<Button isLoading={selecting} isDisabled={uploading} onClick={onOpen}>
|
<Button isLoading={selecting} isDisabled={uploading} onClick={onOpen}>
|
||||||
选择 csv 问答对
|
选择 csv 问答对
|
||||||
</Button>
|
</Button>
|
||||||
|
<Box mt={4}>
|
||||||
<Box ml={4}>一共 {fileData.length} 组数据(下面最多展示100组)</Box>
|
【{fileName}】一共有 {fileData.length} 组数据(下面最多展示100组)
|
||||||
</Flex>
|
</Box>
|
||||||
|
</Box>
|
||||||
</Box>
|
</Box>
|
||||||
<Box flex={'3 0 0'} h={'100%'} overflow={'auto'} p={2} backgroundColor={'blackAlpha.50'}>
|
<Box flex={'3 0 0'} h={'100%'} overflow={'auto'} p={2} backgroundColor={'blackAlpha.50'}>
|
||||||
{fileData.slice(0, 100).map((item, index) => (
|
{fileData.slice(0, 100).map((item, index) => (
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
import React, { useState, useCallback, useMemo } from 'react';
|
import React, { useState, useCallback } from 'react';
|
||||||
import {
|
import {
|
||||||
Box,
|
Box,
|
||||||
Flex,
|
Flex,
|
||||||
@@ -54,15 +54,17 @@ const SelectFileModal = ({
|
|||||||
const [prompt, setPrompt] = useState('');
|
const [prompt, setPrompt] = useState('');
|
||||||
const { File, onOpen } = useSelectFile({ fileType: fileExtension, multiple: true });
|
const { File, onOpen } = useSelectFile({ fileType: fileExtension, multiple: true });
|
||||||
const [mode, setMode] = useState<`${TrainingModeEnum}`>(TrainingModeEnum.index);
|
const [mode, setMode] = useState<`${TrainingModeEnum}`>(TrainingModeEnum.index);
|
||||||
const [fileTextArr, setFileTextArr] = useState<string[]>(['']);
|
const [files, setFiles] = useState<{ filename: string; text: string }[]>([
|
||||||
|
{ filename: '文本1', text: '' }
|
||||||
|
]);
|
||||||
const [splitRes, setSplitRes] = useState<{
|
const [splitRes, setSplitRes] = useState<{
|
||||||
tokens: number;
|
tokens: number;
|
||||||
chunks: string[];
|
chunks: { filename: string; value: string }[];
|
||||||
successChunks: number;
|
successChunks: number;
|
||||||
}>({
|
}>({
|
||||||
tokens: 0,
|
tokens: 0,
|
||||||
chunks: [],
|
successChunks: 0,
|
||||||
successChunks: 0
|
chunks: []
|
||||||
});
|
});
|
||||||
const { openConfirm, ConfirmChild } = useConfirm({
|
const { openConfirm, ConfirmChild } = useConfirm({
|
||||||
content: `确认导入该文件,需要一定时间进行拆解,该任务无法终止!如果余额不足,未完成的任务会被直接清除。一共 ${
|
content: `确认导入该文件,需要一定时间进行拆解,该任务无法终止!如果余额不足,未完成的任务会被直接清除。一共 ${
|
||||||
@@ -78,21 +80,21 @@ const SelectFileModal = ({
|
|||||||
files.forEach((file) => {
|
files.forEach((file) => {
|
||||||
promise = promise.then(async () => {
|
promise = promise.then(async () => {
|
||||||
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
|
const extension = file?.name?.split('.')?.pop()?.toLowerCase();
|
||||||
let text = '';
|
const text = await (async () => {
|
||||||
switch (extension) {
|
switch (extension) {
|
||||||
case 'txt':
|
case 'txt':
|
||||||
case 'md':
|
case 'md':
|
||||||
text = await readTxtContent(file);
|
return readTxtContent(file);
|
||||||
break;
|
case 'pdf':
|
||||||
case 'pdf':
|
return readPdfContent(file);
|
||||||
text = await readPdfContent(file);
|
case 'doc':
|
||||||
break;
|
case 'docx':
|
||||||
case 'doc':
|
return readDocContent(file);
|
||||||
case 'docx':
|
}
|
||||||
text = await readDocContent(file);
|
return '';
|
||||||
break;
|
})();
|
||||||
}
|
|
||||||
text && setFileTextArr((state) => [text].concat(state));
|
text && setFiles((state) => [{ filename: file.name, text }].concat(state));
|
||||||
return;
|
return;
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -115,11 +117,13 @@ const SelectFileModal = ({
|
|||||||
|
|
||||||
// subsection import
|
// subsection import
|
||||||
let success = 0;
|
let success = 0;
|
||||||
const step = 50;
|
const step = 100;
|
||||||
for (let i = 0; i < splitRes.chunks.length; i += step) {
|
for (let i = 0; i < splitRes.chunks.length; i += step) {
|
||||||
const { insertLen } = await postKbDataFromList({
|
const { insertLen } = await postKbDataFromList({
|
||||||
kbId,
|
kbId,
|
||||||
data: splitRes.chunks.slice(i, i + step).map((text) => ({ q: text, a: '' })),
|
data: splitRes.chunks
|
||||||
|
.slice(i, i + step)
|
||||||
|
.map((item) => ({ q: item.value, a: '', source: item.filename })),
|
||||||
prompt: `下面是"${prompt || '一段长文本'}"`,
|
prompt: `下面是"${prompt || '一段长文本'}"`,
|
||||||
mode
|
mode
|
||||||
});
|
});
|
||||||
@@ -149,26 +153,32 @@ const SelectFileModal = ({
|
|||||||
const onclickImport = useCallback(async () => {
|
const onclickImport = useCallback(async () => {
|
||||||
setBtnLoading(true);
|
setBtnLoading(true);
|
||||||
try {
|
try {
|
||||||
let promise = Promise.resolve();
|
const splitRes = files
|
||||||
|
.map((item) =>
|
||||||
const splitRes = await Promise.all(
|
splitText_token({
|
||||||
fileTextArr
|
text: item.text,
|
||||||
.filter((item) => item)
|
...modeMap[mode]
|
||||||
.map((item) =>
|
})
|
||||||
splitText_token({
|
)
|
||||||
text: item,
|
.map((item, i) => ({
|
||||||
...modeMap[mode]
|
...item,
|
||||||
})
|
filename: files[i].filename
|
||||||
)
|
}))
|
||||||
);
|
.filter((item) => item.tokens > 0);
|
||||||
|
|
||||||
setSplitRes({
|
setSplitRes({
|
||||||
tokens: splitRes.reduce((sum, item) => sum + item.tokens, 0),
|
tokens: splitRes.reduce((sum, item) => sum + item.tokens, 0),
|
||||||
chunks: splitRes.map((item) => item.chunks).flat(),
|
chunks: splitRes
|
||||||
|
.map((item) =>
|
||||||
|
item.chunks.map((chunk) => ({
|
||||||
|
filename: item.filename,
|
||||||
|
value: chunk
|
||||||
|
}))
|
||||||
|
)
|
||||||
|
.flat(),
|
||||||
successChunks: 0
|
successChunks: 0
|
||||||
});
|
});
|
||||||
|
|
||||||
await promise;
|
|
||||||
openConfirm(mutate)();
|
openConfirm(mutate)();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
toast({
|
toast({
|
||||||
@@ -177,7 +187,7 @@ const SelectFileModal = ({
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
setBtnLoading(false);
|
setBtnLoading(false);
|
||||||
}, [fileTextArr, mode, mutate, openConfirm, toast]);
|
}, [files, mode, mutate, openConfirm, toast]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Modal isOpen={true} onClose={onClose} isCentered>
|
<Modal isOpen={true} onClose={onClose} isCentered>
|
||||||
@@ -204,7 +214,7 @@ const SelectFileModal = ({
|
|||||||
>
|
>
|
||||||
<Box mt={2} px={5} maxW={['100%', '70%']} textAlign={'justify'} color={'blackAlpha.600'}>
|
<Box mt={2} px={5} maxW={['100%', '70%']} textAlign={'justify'} color={'blackAlpha.600'}>
|
||||||
支持 {fileExtension} 文件。Gpt会自动对文本进行 QA 拆分,需要较长训练时间,拆分需要消耗
|
支持 {fileExtension} 文件。Gpt会自动对文本进行 QA 拆分,需要较长训练时间,拆分需要消耗
|
||||||
tokens,账号余额不足时,未拆分的数据会被删除。一个{fileTextArr.length}个文本。
|
tokens,账号余额不足时,未拆分的数据会被删除。一个{files.length}个文本。
|
||||||
</Box>
|
</Box>
|
||||||
{/* 拆分模式 */}
|
{/* 拆分模式 */}
|
||||||
<Flex w={'100%'} px={5} alignItems={'center'} mt={4}>
|
<Flex w={'100%'} px={5} alignItems={'center'} mt={4}>
|
||||||
@@ -235,26 +245,26 @@ const SelectFileModal = ({
|
|||||||
)}
|
)}
|
||||||
{/* 文本内容 */}
|
{/* 文本内容 */}
|
||||||
<Box flex={'1 0 0'} px={5} h={0} w={'100%'} overflowY={'auto'} mt={4}>
|
<Box flex={'1 0 0'} px={5} h={0} w={'100%'} overflowY={'auto'} mt={4}>
|
||||||
{fileTextArr.slice(0, 100).map((item, i) => (
|
{files.slice(0, 100).map((item, i) => (
|
||||||
<Box key={i} mb={5}>
|
<Box key={i} mb={5}>
|
||||||
<Box mb={1}>文本{i + 1}</Box>
|
<Box mb={1}>{item.filename}</Box>
|
||||||
<Textarea
|
<Textarea
|
||||||
placeholder="文件内容,空内容会自动忽略"
|
placeholder="文件内容,空内容会自动忽略"
|
||||||
maxLength={-1}
|
maxLength={-1}
|
||||||
rows={10}
|
rows={10}
|
||||||
fontSize={'xs'}
|
fontSize={'xs'}
|
||||||
whiteSpace={'pre-wrap'}
|
whiteSpace={'pre-wrap'}
|
||||||
value={item}
|
value={item.text}
|
||||||
onChange={(e) => {
|
onChange={(e) => {
|
||||||
setFileTextArr([
|
setFiles([
|
||||||
...fileTextArr.slice(0, i),
|
...files.slice(0, i),
|
||||||
e.target.value,
|
{ ...item, text: e.target.value },
|
||||||
...fileTextArr.slice(i + 1)
|
...files.slice(i + 1)
|
||||||
]);
|
]);
|
||||||
}}
|
}}
|
||||||
onBlur={(e) => {
|
onBlur={(e) => {
|
||||||
if (fileTextArr.length > 1 && e.target.value === '') {
|
if (files.length > 1 && e.target.value === '') {
|
||||||
setFileTextArr((state) => [...state.slice(0, i), ...state.slice(i + 1)]);
|
setFiles((state) => [...state.slice(0, i), ...state.slice(i + 1)]);
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
@@ -272,7 +282,7 @@ const SelectFileModal = ({
|
|||||||
取消
|
取消
|
||||||
</Button>
|
</Button>
|
||||||
<Button
|
<Button
|
||||||
isDisabled={uploading || btnLoading || fileTextArr[0] === ''}
|
isDisabled={uploading || btnLoading || files[0]?.text === ''}
|
||||||
onClick={onclickImport}
|
onClick={onclickImport}
|
||||||
>
|
>
|
||||||
{uploading ? (
|
{uploading ? (
|
||||||
|
@@ -61,7 +61,8 @@ export async function generateQA(): Promise<any> {
|
|||||||
userId: 1,
|
userId: 1,
|
||||||
kbId: 1,
|
kbId: 1,
|
||||||
prompt: 1,
|
prompt: 1,
|
||||||
q: 1
|
q: 1,
|
||||||
|
source: 1
|
||||||
});
|
});
|
||||||
|
|
||||||
// task preemption
|
// task preemption
|
||||||
@@ -137,7 +138,10 @@ A2:
|
|||||||
// 创建 向量生成 队列
|
// 创建 向量生成 队列
|
||||||
await pushDataToKb({
|
await pushDataToKb({
|
||||||
kbId,
|
kbId,
|
||||||
data: responseList,
|
data: responseList.map((item) => ({
|
||||||
|
...item,
|
||||||
|
source: data.source
|
||||||
|
})),
|
||||||
userId,
|
userId,
|
||||||
mode: TrainingModeEnum.index
|
mode: TrainingModeEnum.index
|
||||||
});
|
});
|
||||||
|
@@ -57,7 +57,8 @@ export async function generateVector(): Promise<any> {
|
|||||||
userId: 1,
|
userId: 1,
|
||||||
kbId: 1,
|
kbId: 1,
|
||||||
q: 1,
|
q: 1,
|
||||||
a: 1
|
a: 1,
|
||||||
|
source: 1
|
||||||
});
|
});
|
||||||
|
|
||||||
// task preemption
|
// task preemption
|
||||||
@@ -91,6 +92,7 @@ export async function generateVector(): Promise<any> {
|
|||||||
data: vectors.map((vector, i) => ({
|
data: vectors.map((vector, i) => ({
|
||||||
q: dataItems[i].q,
|
q: dataItems[i].q,
|
||||||
a: dataItems[i].a,
|
a: dataItems[i].a,
|
||||||
|
source: data.source,
|
||||||
vector
|
vector
|
||||||
}))
|
}))
|
||||||
});
|
});
|
||||||
|
@@ -38,8 +38,9 @@ const TrainingDataSchema = new Schema({
|
|||||||
type: String,
|
type: String,
|
||||||
default: ''
|
default: ''
|
||||||
},
|
},
|
||||||
vectorList: {
|
source: {
|
||||||
type: Object
|
type: String,
|
||||||
|
default: ''
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@@ -172,12 +172,14 @@ export const insertKbItem = ({
|
|||||||
vector: number[];
|
vector: number[];
|
||||||
q: string;
|
q: string;
|
||||||
a: string;
|
a: string;
|
||||||
|
source?: string;
|
||||||
}[];
|
}[];
|
||||||
}) => {
|
}) => {
|
||||||
return PgClient.insert('modelData', {
|
return PgClient.insert('modelData', {
|
||||||
values: data.map((item) => [
|
values: data.map((item) => [
|
||||||
{ key: 'user_id', value: userId },
|
{ key: 'user_id', value: userId },
|
||||||
{ key: 'kb_id', value: kbId },
|
{ key: 'kb_id', value: kbId },
|
||||||
|
{ key: 'source', value: item.source?.slice(0, 30)?.trim() || '' },
|
||||||
{ key: 'q', value: item.q.replace(/'/g, '"') },
|
{ key: 'q', value: item.q.replace(/'/g, '"') },
|
||||||
{ key: 'a', value: item.a.replace(/'/g, '"') },
|
{ key: 'a', value: item.a.replace(/'/g, '"') },
|
||||||
{ key: 'vector', value: `[${item.vector}]` }
|
{ key: 'vector', value: `[${item.vector}]` }
|
||||||
|
1
src/types/mongoSchema.d.ts
vendored
1
src/types/mongoSchema.d.ts
vendored
@@ -78,6 +78,7 @@ export interface TrainingDataSchema {
|
|||||||
prompt: string;
|
prompt: string;
|
||||||
q: string;
|
q: string;
|
||||||
a: string;
|
a: string;
|
||||||
|
source: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ChatSchema {
|
export interface ChatSchema {
|
||||||
|
7
src/types/pg.d.ts
vendored
7
src/types/pg.d.ts
vendored
@@ -1,7 +0,0 @@
|
|||||||
export interface PgKBDataItemType {
|
|
||||||
id: string;
|
|
||||||
q: string;
|
|
||||||
a: string;
|
|
||||||
user_id: string;
|
|
||||||
kb_id: string;
|
|
||||||
}
|
|
||||||
|
3
src/types/plugin.d.ts
vendored
3
src/types/plugin.d.ts
vendored
@@ -10,8 +10,7 @@ export interface KbDataItemType {
|
|||||||
id: string;
|
id: string;
|
||||||
q: string; // 提问词
|
q: string; // 提问词
|
||||||
a: string; // 原文
|
a: string; // 原文
|
||||||
kbId: string;
|
source: string;
|
||||||
userId: string;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export type TextPluginRequestParams = {
|
export type TextPluginRequestParams = {
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
import mammoth from 'mammoth';
|
import mammoth from 'mammoth';
|
||||||
import Papa from 'papaparse';
|
import Papa from 'papaparse';
|
||||||
import { getOpenAiEncMap } from './plugin/openai';
|
import { getOpenAiEncMap } from './plugin/openai';
|
||||||
|
import { getErrText } from './tools';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 读取 txt 文件内容
|
* 读取 txt 文件内容
|
||||||
@@ -145,7 +146,7 @@ export const fileDownload = ({
|
|||||||
* slideLen - The size of the before and after Text
|
* slideLen - The size of the before and after Text
|
||||||
* maxLen > slideLen
|
* maxLen > slideLen
|
||||||
*/
|
*/
|
||||||
export const splitText_token = async ({
|
export const splitText_token = ({
|
||||||
text,
|
text,
|
||||||
maxLen,
|
maxLen,
|
||||||
slideLen
|
slideLen
|
||||||
@@ -184,8 +185,8 @@ export const splitText_token = async ({
|
|||||||
chunks,
|
chunks,
|
||||||
tokens
|
tokens
|
||||||
};
|
};
|
||||||
} catch (error) {
|
} catch (err) {
|
||||||
return Promise.reject(error);
|
throw new Error(getErrText(err));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user