doc and config rerank (#475)

This commit is contained in:
Archer
2023-11-16 10:46:47 +08:00
committed by GitHub
parent cd3acb44ab
commit 16103029f5
18 changed files with 158 additions and 55 deletions

View File

@@ -317,6 +317,9 @@
},
"deleteDatasetTips": "Are you sure to delete the knowledge base? Data cannot be recovered after deletion, please confirm!",
"deleteFolderTips": "Are you sure to delete this folder and all the knowledge bases it contains? Data cannot be recovered after deletion, please confirm!",
"recall": {
"rerank": "Rerank"
},
"test": {
"noResult": "Search results are empty"
}

View File

@@ -317,6 +317,9 @@
},
"deleteDatasetTips": "确认删除该知识库?删除后数据无法恢复,请确认!",
"deleteFolderTips": "确认删除该文件夹及其包含的所有知识库?删除后数据无法恢复,请确认!",
"recall": {
"rerank": "结果重排"
},
"test": {
"noResult": "搜索结果为空"
}

View File

@@ -9,7 +9,8 @@ import {
useTheme,
Textarea,
Grid,
Divider
Divider,
Switch
} from '@chakra-ui/react';
import Avatar from '@/components/Avatar';
import { useForm } from 'react-hook-form';
@@ -30,6 +31,7 @@ export type KbParamsType = {
searchSimilarity: number;
searchLimit: number;
searchEmptyText: string;
rerank: boolean;
};
export const DatasetSelectModal = ({
@@ -225,10 +227,11 @@ export const DatasetSelectModal = ({
);
};
export const KbParamsModal = ({
export const DatasetParamsModal = ({
searchEmptyText,
searchLimit,
searchSimilarity,
rerank,
onClose,
onChange
}: KbParamsType & { onClose: () => void; onChange: (e: KbParamsType) => void }) => {
@@ -237,7 +240,8 @@ export const KbParamsModal = ({
defaultValues: {
searchEmptyText,
searchLimit,
searchSimilarity
searchSimilarity,
rerank
}
});
@@ -245,6 +249,24 @@ export const KbParamsModal = ({
<MyModal isOpen={true} onClose={onClose} title={'搜索参数调整'} minW={['90vw', '600px']}>
<Flex flexDirection={'column'}>
<ModalBody>
{feConfigs?.isPlus && (
<Box display={['block', 'flex']} py={5} pt={[0, 5]}>
<Box flex={'0 0 100px'} mb={[8, 0]}>
<MyTooltip label={'将召回的结果进行进一步重排,可增加召回率'} forceShow>
<QuestionOutlineIcon ml={1} />
</MyTooltip>
</Box>
<Switch
size={'lg'}
isChecked={getValues('rerank')}
onChange={(e) => {
setValue('rerank', e.target.checked);
setRefresh(!refresh);
}}
/>
</Box>
)}
<Box display={['block', 'flex']} py={5} pt={[0, 5]}>
<Box flex={'0 0 100px'} mb={[8, 0]}>

View File

@@ -14,7 +14,8 @@ import {
useDisclosure,
Button,
useTheme,
Grid
Grid,
Switch
} from '@chakra-ui/react';
import { FlowNodeInputTypeEnum } from '@fastgpt/global/core/module/node/constant';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
@@ -35,6 +36,7 @@ import type { SelectedDatasetType } from '@fastgpt/global/core/module/api.d';
import { useQuery } from '@tanstack/react-query';
import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d';
import type { EditFieldModeType, EditFieldType } from '../modules/FieldEditModal';
import { feConfigs } from '@/web/common/system/staticData';
const FieldEditModal = dynamic(() => import('../modules/FieldEditModal'));
const SelectAppModal = dynamic(() => import('../../SelectAppModal'));
@@ -163,7 +165,10 @@ const RenderInput = ({
editFiledType?: EditFieldModeType;
}) => {
const sortInputs = useMemo(
() => flowInputList.sort((a, b) => (a.key === FlowNodeInputTypeEnum.switch ? -1 : 1)),
() =>
flowInputList
.filter((item) => !item.plusField || feConfigs.isPlus)
.sort((a, b) => (a.key === FlowNodeInputTypeEnum.switch ? -1 : 1)),
[flowInputList]
);
return (
@@ -187,6 +192,9 @@ const RenderInput = ({
{item.type === FlowNodeInputTypeEnum.input && (
<TextInputRender item={item} moduleId={moduleId} />
)}
{item.type === FlowNodeInputTypeEnum.switch && (
<SwitchRender item={item} moduleId={moduleId} />
)}
{item.type === FlowNodeInputTypeEnum.textarea && (
<TextareaRender item={item} moduleId={moduleId} />
)}
@@ -277,6 +285,26 @@ var TextInputRender = React.memo(function TextInputRender({ item, moduleId }: Re
);
});
var SwitchRender = React.memo(function SwitchRender({ item, moduleId }: RenderProps) {
return (
<Switch
size={'lg'}
isChecked={item.value}
onChange={(e) => {
onChangeNode({
moduleId,
type: 'updateInput',
key: item.key,
value: {
...item,
value: e.target.checked
}
});
}}
/>
);
});
var TextareaRender = React.memo(function TextareaRender({ item, moduleId }: RenderProps) {
return (
<Textarea

View File

@@ -292,6 +292,14 @@ export const DatasetSearchModule: FlowModuleTemplateType = {
{ label: '20', value: 20 }
]
},
{
key: 'rerank',
type: FlowNodeInputTypeEnum.switch,
label: '结果重排',
description: '将召回的结果进行进一步重排,可增加召回率',
plusField: true,
value: false
},
Input_Template_UserChatInput
],
outputs: [

View File

@@ -22,6 +22,7 @@ export type SearchTestProps = {
datasetId: string;
text: string;
limit?: number;
rerank?: boolean;
};
/* ======= collections =========== */

View File

@@ -16,7 +16,7 @@ import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try {
await connectToDatabase();
const { datasetId, text, limit = 20 } = req.body as SearchTestProps;
const { datasetId, text, limit = 20, rerank } = req.body as SearchTestProps;
if (!datasetId || !text) {
throw new Error('缺少参数');
@@ -38,7 +38,8 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
text,
model: dataset.vectorModel,
limit: Math.min(limit, 50),
datasetIds: [datasetId]
datasetIds: [datasetId],
rerank
});
// push bill

View File

@@ -52,7 +52,7 @@ import MyIcon from '@/components/Icon';
import ChatBox, { type ComponentRef, type StartChatFnProps } from '@/components/ChatBox';
import { addVariable } from '@/components/core/module/VariableEditModal';
import { KbParamsModal } from '@/components/core/module/DatasetSelectModal';
import { DatasetParamsModal } from '@/components/core/module/DatasetSelectModal';
import { AppTypeEnum } from '@fastgpt/global/core/app/constants';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useAppStore } from '@/web/core/app/store/useAppStore';
@@ -585,15 +585,15 @@ const Settings = ({ appId }: { appId: string }) => {
)}
{isOpenKbParams && (
<KbParamsModal
searchEmptyText={getValues('dataset.searchEmptyText')}
searchLimit={getValues('dataset.searchLimit')}
searchSimilarity={getValues('dataset.searchSimilarity')}
<DatasetParamsModal
{...getValues('dataset')}
onClose={onCloseKbParams}
onChange={({ searchEmptyText, searchLimit, searchSimilarity }) => {
setValue('dataset.searchEmptyText', searchEmptyText);
setValue('dataset.searchLimit', searchLimit);
setValue('dataset.searchSimilarity', searchSimilarity);
onChange={(e) => {
setValue('dataset', {
...getValues('dataset'),
...e
});
setRefresh((state) => !state);
}}
/>

View File

@@ -1,5 +1,5 @@
import React, { useEffect, useMemo, useState } from 'react';
import { Box, Textarea, Button, Flex, useTheme, Grid, Progress } from '@chakra-ui/react';
import { Box, Textarea, Button, Flex, useTheme, Grid, Progress, Switch } from '@chakra-ui/react';
import { useDatasetStore } from '@/web/core/dataset/store/dataset';
import { useSearchTestStore, SearchTestStoreItemType } from '@/web/core/dataset/store/searchTest';
import { getDatasetDataItemById, postSearchText } from '@/web/core/dataset/api';
@@ -15,6 +15,7 @@ import MyTooltip from '@/components/MyTooltip';
import { QuestionOutlineIcon } from '@chakra-ui/icons';
import { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type';
import { useTranslation } from 'next-i18next';
import { feConfigs } from '@/web/common/system/staticData';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
const Test = ({ datasetId }: { datasetId: string }) => {
@@ -28,6 +29,7 @@ const Test = ({ datasetId }: { datasetId: string }) => {
const [inputText, setInputText] = useState('');
const [datasetTestItem, setDatasetTestItem] = useState<SearchTestStoreItemType>();
const [editInputData, setEditInputData] = useState<InputDataType & { collectionId: string }>();
const [rerank, setRerank] = useState(false);
const kbTestHistory = useMemo(
() => datasetTestList.filter((item) => item.datasetId === datasetId),
@@ -35,7 +37,7 @@ const Test = ({ datasetId }: { datasetId: string }) => {
);
const { mutate, isLoading } = useRequest({
mutationFn: () => postSearchText({ datasetId, text: inputText.trim() }),
mutationFn: () => postSearchText({ datasetId, text: inputText.trim(), rerank, limit: 20 }),
onSuccess(res: SearchDataResponseItemType[]) {
if (!res || res.length === 0) {
return toast({
@@ -91,7 +93,13 @@ const Test = ({ datasetId }: { datasetId: string }) => {
onChange={(e) => setInputText(e.target.value)}
/>
<Flex alignItems={'center'} justifyContent={'flex-end'}>
<Box mr={3} color={'myGray.500'}>
{feConfigs?.isPlus && (
<Flex alignItems={'center'}>
{t('dataset.recall.rerank')}
<Switch ml={1} isChecked={rerank} onChange={(e) => setRerank(e.target.checked)} />
</Flex>
)}
<Box mx={3} color={'myGray.500'}>
{inputText.length}
</Box>
<Button isDisabled={inputText === ''} isLoading={isLoading} onClick={mutate}>

View File

@@ -131,13 +131,15 @@ export async function searchDatasetData({
model,
similarity = 0,
limit,
datasetIds = []
datasetIds = [],
rerank = false
}: {
text: string;
model: string;
similarity?: number; // min distance
limit: number;
datasetIds: string[];
rerank?: boolean;
}) {
const { vectors, tokenLen } = await getVectorsByText({
model,
@@ -219,6 +221,13 @@ export async function searchDatasetData({
return true;
});
if (!rerank) {
return {
searchRes: filterData.slice(0, limit),
tokenLen
};
}
// ReRank result
const reRankResult = await reRankSearchResult({
query: text,

View File

@@ -208,7 +208,7 @@ function filterQuote({
source: item.sourceName,
sourceId: String(item.sourceId || 'UnKnow'),
index: index + 1,
score: item.score.toFixed(4)
score: item.score?.toFixed(4)
});
}
const sliceResult = sliceMessagesTB({

View File

@@ -11,6 +11,7 @@ type DatasetSearchProps = ModuleDispatchProps<{
datasets: SelectedDatasetType;
similarity: number;
limit: number;
rerank: boolean;
userChatInput: string;
}>;
export type KBSearchResponse = {
@@ -20,9 +21,9 @@ export type KBSearchResponse = {
quoteQA: SearchDataResponseItemType[];
};
export async function dispatchDatasetSearch(props: Record<string, any>): Promise<KBSearchResponse> {
export async function dispatchDatasetSearch(props: DatasetSearchProps): Promise<KBSearchResponse> {
const {
inputs: { datasets = [], similarity = 0.4, limit = 5, userChatInput }
inputs: { datasets = [], similarity = 0.4, limit = 5, rerank, userChatInput }
} = props as DatasetSearchProps;
if (datasets.length === 0) {
@@ -41,7 +42,8 @@ export async function dispatchDatasetSearch(props: Record<string, any>): Promise
model: vectorModel.model,
similarity,
limit,
datasetIds: datasets.map((item) => item.datasetId)
datasetIds: datasets.map((item) => item.datasetId),
rerank
});
return {

View File

@@ -95,12 +95,13 @@ export const streamFetch = ({
});
read();
} catch (err: any) {
if (err?.message === 'The user aborted a request.') {
if (abortSignal.signal.aborted) {
return resolve({
responseText,
responseData
});
}
reject({
responseText,
message: getErrText(err, '请求异常')

View File

@@ -20,6 +20,7 @@ export type EditFormType = {
searchSimilarity: number;
searchLimit: number;
searchEmptyText: string;
rerank: boolean;
};
guide: {
welcome: {
@@ -49,7 +50,8 @@ export const getDefaultAppForm = (): EditFormType => {
list: [],
searchSimilarity: 0.4,
searchLimit: 5,
searchEmptyText: ''
searchEmptyText: '',
rerank: false
},
guide: {
welcome: {
@@ -136,6 +138,11 @@ export const appModules2Form = (modules: ModuleItemType[]) => {
inputs: module.inputs,
key: 'limit'
});
updateVal({
formKey: 'dataset.rerank',
inputs: module.inputs,
key: 'rerank'
});
// empty text
const emptyOutputs = module.outputs.find((item) => item.key === 'isEmpty')?.targets || [];
const emptyOutput = emptyOutputs[0];
@@ -475,6 +482,15 @@ const kbTemplate = (formData: EditFormType): ModuleItemType[] => [
type: FlowNodeInputTypeEnum.target,
label: '用户问题',
connected: true
},
{
key: 'rerank',
type: FlowNodeInputTypeEnum.switch,
label: '结果重排',
description: '将召回的结果进行进一步重排,可增加召回率',
plusField: true,
connected: true,
value: formData.dataset.rerank
}
],
outputs: [