This commit is contained in:
Archer
2023-10-11 17:18:43 +08:00
committed by GitHub
parent d0041a98b4
commit bcf9491999
51 changed files with 852 additions and 460 deletions

View File

@@ -15,16 +15,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
await connectToDatabase();
await authUser({ req, authRoot: true });
console.log('add index');
await PgClient.query(
`
ALTER TABLE modeldata
ALTER COLUMN source TYPE VARCHAR(256),
ALTER COLUMN file_id TYPE VARCHAR(256);
CREATE INDEX IF NOT EXISTS modelData_fileId_index ON modeldata (file_id);
`
);
console.log('index success');
console.log('count rows');
// 去重获取 fileId
const { rows } = await PgClient.query(`SELECT DISTINCT file_id
@@ -36,8 +26,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
await init(rows.slice(i, i + limit), initFileIds);
console.log(i);
}
console.log('filter success');
console.log('start update');
for (let i = 0; i < initFileIds.length; i++) {
await PgClient.query(`UPDATE ${PgDatasetTableName}
@@ -49,9 +37,11 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
const { rows: emptyIds } = await PgClient.query(
`SELECT id FROM ${PgDatasetTableName} WHERE file_id IS NULL OR file_id=''`
);
console.log('filter success');
console.log(emptyIds.length);
await delay(5000);
console.log('start update');
async function start(start: number) {
for (let i = start; i < emptyIds.length; i += limit) {
@@ -65,12 +55,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
start(i);
}
// await PgClient.query(
// `UPDATE ${PgDatasetTableName}
// SET file_id = '${DatasetSpecialIdEnum.manual}'
// WHERE file_id IS NULL OR file_id = ''`
// );
console.log('update success');
jsonRes(res, {

View File

@@ -4,17 +4,17 @@ import { jsonRes } from '@/service/response';
import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { App } from '@/service/models/app';
import { AppModuleItemType } from '@/types/app';
export type Props = {
name: string;
avatar?: string;
modules: AppModuleItemType[];
};
import type { CreateAppParams } from '@/types/app';
import { AppTypeEnum } from '@/constants/app';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
const { name, avatar, modules } = req.body as Props;
const {
name = 'APP',
avatar,
type = AppTypeEnum.advanced,
modules
} = req.body as CreateAppParams;
if (!name || !Array.isArray(modules)) {
throw new Error('缺少参数');
@@ -38,7 +38,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
avatar,
name,
userId,
modules
modules,
type
});
jsonRes(res, {

View File

@@ -9,7 +9,7 @@ import { authApp } from '@/service/utils/auth';
/* 获取我的模型 */
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
const { name, avatar, type, chat, share, intro, modules } = req.body as AppUpdateParams;
const { name, avatar, type, share, intro, modules } = req.body as AppUpdateParams;
const { appId } = req.query as { appId: string };
if (!appId) {
@@ -37,7 +37,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
type,
avatar,
intro,
chat,
...(share && {
'share.isShare': share.isShare,
'share.isShareDetail': share.isShareDetail

View File

@@ -13,6 +13,7 @@ import { getVectorModel } from '@/service/utils/data';
import { getVector } from '@/pages/api/openapi/plugin/vector';
import { DatasetDataItemType } from '@/types/core/dataset/data';
import { countPromptTokens } from '@/utils/common/tiktoken';
import { authFileIdValid } from '@/service/dataset/auth';
export type Props = {
kbId: string;
@@ -72,6 +73,8 @@ export async function getVectorAndInsertDataset(
return Promise.reject('已经存在完全一致的数据');
}
await authFileIdValid(data.file_id);
const { vectors } = await getVector({
model: kb.vectorModel,
input: [q],

View File

@@ -1,16 +1,17 @@
/* push data to training queue */
import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { connectToDatabase, TrainingData, KB } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { authKb } from '@/service/utils/auth';
import { withNextCors } from '@/service/utils/tools';
import { PgDatasetTableName, TrainingModeEnum } from '@/constants/plugin';
import { TrainingModeEnum } from '@/constants/plugin';
import { startQueue } from '@/service/utils/tools';
import { PgClient } from '@/service/pg';
import { getVectorModel } from '@/service/utils/data';
import { DatasetDataItemType } from '@/types/core/dataset/data';
import { countPromptTokens } from '@/utils/common/tiktoken';
import type { PushDataProps, PushDataResponse } from '@/api/core/dataset/data.d';
import { authFileIdValid } from '@/service/dataset/auth';
const modeMap = {
[TrainingModeEnum.index]: true,
@@ -80,69 +81,49 @@ export async function pushDataToKb({
[TrainingModeEnum.qa]: global.qaModel.maxToken * 0.8
};
// 过滤重复的 qa 内容
// filter repeat or equal content
const set = new Set();
const filterData: DatasetDataItemType[] = [];
const filterResult: Record<string, DatasetDataItemType[]> = {
success: [],
overToken: [],
fileIdInvalid: [],
error: []
};
data.forEach((item) => {
if (!item.q) return;
await Promise.all(
data.map(async (item) => {
if (!item.q) {
filterResult.error.push(item);
return;
}
const text = item.q + item.a;
const text = item.q + item.a;
// count q token
const token = countPromptTokens(item.q, 'system');
// count q token
const token = countPromptTokens(item.q, 'system');
if (token > modeMaxToken[mode]) {
return;
}
if (token > modeMaxToken[mode]) {
filterResult.overToken.push(item);
return;
}
if (!set.has(text)) {
filterData.push(item);
set.add(text);
}
});
try {
await authFileIdValid(item.file_id);
} catch (error) {
filterResult.fileIdInvalid.push(item);
return;
}
// 数据库去重
const insertData = (
await Promise.allSettled(
filterData.map(async (data) => {
let { q, a } = data;
if (mode !== TrainingModeEnum.index) {
return Promise.resolve(data);
}
if (!q) {
return Promise.reject('q为空');
}
q = q.replace(/\\n/g, '\n').trim().replace(/'/g, '"');
a = a.replace(/\\n/g, '\n').trim().replace(/'/g, '"');
// Exactly the same data, not push
try {
const { rows } = await PgClient.query(`
SELECT COUNT(*) > 0 AS exists
FROM ${PgDatasetTableName}
WHERE md5(q)=md5('${q}') AND md5(a)=md5('${a}') AND user_id='${userId}' AND kb_id='${kbId}'
`);
const exists = rows[0]?.exists || false;
if (exists) {
return Promise.reject('已经存在');
}
} catch (error) {
console.log(error);
}
return Promise.resolve(data);
})
)
)
.filter((item) => item.status === 'fulfilled')
.map<DatasetDataItemType>((item: any) => item.value);
if (!set.has(text)) {
filterResult.success.push(item);
set.add(text);
}
})
);
// 插入记录
const insertRes = await TrainingData.insertMany(
insertData.map((item) => ({
filterResult.success.map((item) => ({
...item,
userId,
kbId,
@@ -154,9 +135,11 @@ export async function pushDataToKb({
);
insertRes.length > 0 && startQueue();
delete filterResult.success;
return {
insertLen: insertRes.length
insertLen: insertRes.length,
...filterResult
};
}

View File

@@ -3,7 +3,7 @@ import { jsonRes } from '@/service/response';
import { connectToDatabase, TrainingData } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { GridFSStorage } from '@/service/lib/gridfs';
import { PgClient } from '@/service/pg';
import { PgClient, updateDataFileId } from '@/service/pg';
import { PgDatasetTableName } from '@/constants/plugin';
import { FileStatusEnum } from '@/constants/dataset';
import { strIsLink } from '@fastgpt/common/tools/str';
@@ -35,8 +35,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
.join(' ')}
${searchText ? `AND source ILIKE '%${searchText}%'` : ''}`;
const [{ rows }, { rowCount: total }] = await Promise.all([
PgClient.query(`SELECT file_id, COUNT(*) AS count
let [{ rows }, { rowCount: total }] = await Promise.all([
PgClient.query<{ file_id: string; count: number }>(`SELECT file_id, COUNT(*) AS count
FROM ${PgDatasetTableName}
where ${pgWhere}
GROUP BY file_id
@@ -49,6 +49,21 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
`)
]);
// If fileId is invalid, reset it to manual
await Promise.all(
rows.map((row) => {
if (!strIsLink(row.file_id) && row.file_id.length !== 24) {
return updateDataFileId({
oldFileId: row.file_id,
userId,
newFileId: DatasetSpecialIdEnum.manual
});
}
})
);
// just filter link or fileData
rows = rows.filter((row) => strIsLink(row.file_id) || row.file_id.length === 24);
// find files
const gridFs = new GridFSStorage('dataset', userId);
const collection = gridFs.Collection();
@@ -96,6 +111,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const data = await Promise.all([
getSpecialData(),
...rows.map(async (row) => {
if (!row.file_id) return null;
// link data
if (strIsLink(row.file_id)) {
const { rows } = await PgClient.select(PgDatasetTableName, {

View File

@@ -47,7 +47,6 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
data: response?.[2]?.rows || []
});
} catch (err) {
console.log(err);
jsonRes(res, {
code: 500,
error: err

View File

@@ -2,7 +2,7 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@/service/response';
import { authBalanceByUid, authUser } from '@/service/utils/auth';
import { withNextCors } from '@/service/utils/tools';
import { getAIChatApi, axiosConfig } from '@fastgpt/core/ai/config';
import { getAIApi } from '@fastgpt/core/ai/config';
import { pushGenerateVectorBill } from '@/service/common/bill/push';
type Props = {
@@ -54,29 +54,31 @@ export async function getVector({
}
// 获取 chatAPI
const chatAPI = getAIChatApi();
const ai = getAIApi();
// 把输入的内容转成向量
const result = await chatAPI
.createEmbedding(
const result = await ai.embeddings
.create(
{
model,
input
},
{
timeout: 60000,
...axiosConfig()
timeout: 60000
}
)
.then(async (res) => {
if (!res.data?.data?.[0]?.embedding) {
console.log(res.data);
if (!res.data) {
return Promise.reject('Embedding API 404');
}
if (!res?.data?.[0]?.embedding) {
console.log(res?.data);
// @ts-ignore
return Promise.reject(res.data?.err?.message || 'Embedding API Error');
}
return {
tokenLen: res.data.usage.total_tokens || 0,
vectors: await Promise.all(res.data.data.map((item) => unityDimensional(item.embedding)))
tokenLen: res.usage.total_tokens || 0,
vectors: await Promise.all(res.data.map((item) => unityDimensional(item.embedding)))
};
});

View File

@@ -5,7 +5,7 @@ import { User } from '@/service/models/user';
import { connectToDatabase } from '@/service/mongo';
import { authUser } from '@/service/utils/auth';
import { UserUpdateParams } from '@/types/user';
import { axiosConfig, getAIChatApi, openaiBaseUrl } from '@fastgpt/core/ai/config';
import { getAIApi, openaiBaseUrl } from '@fastgpt/core/ai/config';
/* update user info */
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
@@ -22,20 +22,15 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
const baseUrl = openaiAccount?.baseUrl || openaiBaseUrl;
openaiAccount.baseUrl = baseUrl;
const chatAPI = getAIChatApi(openaiAccount);
const ai = getAIApi(openaiAccount);
const response = await chatAPI.createChatCompletion(
{
model: 'gpt-3.5-turbo',
max_tokens: 1,
messages: [{ role: 'user', content: 'hi' }]
},
{
...axiosConfig(openaiAccount)
}
);
if (response?.data?.choices?.[0]?.message?.content === undefined) {
throw new Error(JSON.stringify(response?.data));
const response = await ai.chat.completions.create({
model: 'gpt-3.5-turbo',
max_tokens: 1,
messages: [{ role: 'user', content: 'hi' }]
});
if (response?.choices?.[0]?.message?.content === undefined) {
throw new Error('Key response is empty');
}
}

View File

@@ -6,6 +6,7 @@ import dynamic from 'next/dynamic';
import { defaultApp } from '@/constants/model';
import { useToast } from '@/hooks/useToast';
import { useQuery } from '@tanstack/react-query';
import { feConfigs } from '@/store/static';
import Tabs from '@/components/Tabs';
import SideTabs from '@/components/SideTabs';
@@ -52,7 +53,9 @@ const AppDetail = ({ currentTab }: { currentTab: `${TabEnum}` }) => {
const tabList = useMemo(
() => [
{ label: '简易配置', id: TabEnum.basicEdit, icon: 'overviewLight' },
{ label: '高级编排', id: TabEnum.adEdit, icon: 'settingLight' },
...(feConfigs?.hide_app_flow
? []
: [{ label: '高级编排', id: TabEnum.adEdit, icon: 'settingLight' }]),
{ label: '外部使用', id: TabEnum.outLink, icon: 'shareLight' },
{ label: '对话日志', id: TabEnum.logs, icon: 'logsLight' },
{ label: '立即对话', id: TabEnum.startChat, icon: 'chat' }

View File

@@ -21,6 +21,7 @@ import { useRouter } from 'next/router';
import { appTemplates } from '@/constants/flow/ModuleTemplate';
import { useGlobalStore } from '@/store/global';
import { useRequest } from '@/hooks/useRequest';
import { feConfigs } from '@/store/static';
import Avatar from '@/components/Avatar';
import MyTooltip from '@/components/MyTooltip';
import MyModal from '@/components/MyModal';
@@ -74,10 +75,15 @@ const CreateModal = ({ onClose, onSuccess }: { onClose: () => void; onSuccess: (
const { mutate: onclickCreate, isLoading: creating } = useRequest({
mutationFn: async (data: FormType) => {
const template = appTemplates.find((item) => item.id === data.templateId);
if (!template) {
return Promise.reject('模板不存在');
}
return postCreateApp({
avatar: data.avatar,
name: data.name,
modules: appTemplates.find((item) => item.id === data.templateId)?.modules || []
type: template.type,
modules: template.modules || []
});
},
onSuccess(id: string) {
@@ -118,48 +124,52 @@ const CreateModal = ({ onClose, onSuccess }: { onClose: () => void; onSuccess: (
})}
/>
</Flex>
<Box mt={[4, 7]} mb={[0, 3]} color={'myGray.800'} fontWeight={'bold'}>
</Box>
<Grid
userSelect={'none'}
gridTemplateColumns={['repeat(1,1fr)', 'repeat(2,1fr)']}
gridGap={[2, 4]}
>
{appTemplates.map((item) => (
<Card
key={item.id}
border={theme.borders.base}
p={3}
borderRadius={'md'}
cursor={'pointer'}
boxShadow={'sm'}
{...(getValues('templateId') === item.id
? {
bg: 'myWhite.600'
}
: {
_hover: {
boxShadow: 'md'
}
})}
onClick={() => {
setValue('templateId', item.id);
setRefresh((state) => !state);
}}
{!feConfigs?.hide_app_flow && (
<>
<Box mt={[4, 7]} mb={[0, 3]} color={'myGray.800'} fontWeight={'bold'}>
</Box>
<Grid
userSelect={'none'}
gridTemplateColumns={['repeat(1,1fr)', 'repeat(2,1fr)']}
gridGap={[2, 4]}
>
<Flex alignItems={'center'}>
<Avatar src={item.avatar} borderRadius={'md'} w={'20px'} />
<Box ml={3} fontWeight={'bold'}>
{item.name}
</Box>
</Flex>
<Box fontSize={'sm'} mt={4}>
{item.intro}
</Box>
</Card>
))}
</Grid>
{appTemplates.map((item) => (
<Card
key={item.id}
border={theme.borders.base}
p={3}
borderRadius={'md'}
cursor={'pointer'}
boxShadow={'sm'}
{...(getValues('templateId') === item.id
? {
bg: 'myWhite.600'
}
: {
_hover: {
boxShadow: 'md'
}
})}
onClick={() => {
setValue('templateId', item.id);
setRefresh((state) => !state);
}}
>
<Flex alignItems={'center'}>
<Avatar src={item.avatar} borderRadius={'md'} w={'20px'} />
<Box ml={3} fontWeight={'bold'}>
{item.name}
</Box>
</Flex>
<Box fontSize={'sm'} mt={4}>
{item.intro}
</Box>
</Card>
))}
</Grid>
</>
)}
</ModalBody>
<ModalFooter>

View File

@@ -263,6 +263,10 @@ export function RawFileText({ fileId, filename = '', ...props }: RawFileTextProp
const { setLoading } = useGlobalStore();
const hasFile = useMemo(() => fileId && !datasetSpecialIds.includes(fileId), [fileId]);
const formatName = useMemo(
() => (filename.startsWith('kb') ? t(filename) : filename),
[filename, t]
);
return (
<MyTooltip label={hasFile ? t('file.Click to view file') || '' : ''} shouldWrapChildren={false}>
@@ -293,7 +297,7 @@ export function RawFileText({ fileId, filename = '', ...props }: RawFileTextProp
: {})}
{...props}
>
{t(filename)}
{formatName}
</Box>
</MyTooltip>
);