feat: pg vector 0.8.0;perf: app pdf enhance parse (#3962)

* perf: app pdf enhance parse

* feat: pg vector 0.8.0

* update schema default

* model sort and default image

* perf: i18n

* perf: ui tip
This commit is contained in:
Archer
2025-03-04 13:43:50 +08:00
committed by archer
parent adf5377ebe
commit 54eb5c0547
33 changed files with 181 additions and 66 deletions

View File

@@ -1,6 +1,6 @@
{
"name": "app",
"version": "4.8.23",
"version": "4.9.0",
"private": false,
"scripts": {
"dev": "next dev",

View File

@@ -51,13 +51,13 @@ export const navbarWidth = '64px';
const Layout = ({ children }: { children: JSX.Element }) => {
const router = useRouter();
const { toast } = useToast();
const { t } = useTranslation();
const { Loading } = useLoading();
const { loading, feConfigs, notSufficientModalType, llmModelList, embeddingModelList } =
useSystemStore();
const { isPc } = useSystem();
const { userInfo, teamPlanStatus, isUpdateNotification, setIsUpdateNotification } =
useUserStore();
const { userInfo, isUpdateNotification, setIsUpdateNotification } = useUserStore();
const { setUserDefaultLng } = useI18nLng();
const isChatPage = useMemo(
@@ -87,7 +87,6 @@ const Layout = ({ children }: { children: JSX.Element }) => {
});
// Check model invalid
const { toast } = useToast();
useDebounceEffect(
() => {
if (userInfo?.username === 'root') {

View File

@@ -110,7 +110,7 @@ const OneRowSelector = ({ list, onchange, disableTip, ...props }: Props) => {
</Box>
);
};
const MultipleRowSelector = ({ list, onchange, disableTip, ...props }: Props) => {
const MultipleRowSelector = ({ list, onchange, disableTip, placeholder, ...props }: Props) => {
const { t } = useTranslation();
const { llmModelList, embeddingModelList, ttsModelList, sttModelList, reRankModelList } =
useSystemStore();
@@ -124,7 +124,7 @@ const MultipleRowSelector = ({ list, onchange, disableTip, ...props }: Props) =>
];
return list.map((item) => getModelFromList(allModels, item.value)!).filter(Boolean);
}, [llmModelList, embeddingModelList, ttsModelList, sttModelList, reRankModelList]);
}, [llmModelList, embeddingModelList, ttsModelList, sttModelList, reRankModelList, list]);
const [value, setValue] = useState<string[]>([]);
@@ -174,7 +174,7 @@ const MultipleRowSelector = ({ list, onchange, disableTip, ...props }: Props) =>
}
return renderList.filter((item) => item.children.length > 0);
}, [avatarSize, list, modelList]);
}, [avatarSize, list, modelList, t]);
const onSelect = useCallback(
(e: string[]) => {
@@ -184,7 +184,9 @@ const MultipleRowSelector = ({ list, onchange, disableTip, ...props }: Props) =>
);
const SelectedModel = useMemo(() => {
if (!props.value) return <>{t('common:not_model_config')}</>;
const modelData = getModelFromList(modelList, props.value);
if (!modelData) return <>{t('common:not_model_config')}</>;
setValue([modelData.provider, props.value]);
@@ -201,7 +203,7 @@ const MultipleRowSelector = ({ list, onchange, disableTip, ...props }: Props) =>
<Box>{modelData?.name}</Box>
</HStack>
);
}, [modelList, props.value, avatarSize]);
}, [modelList, props.value, t, avatarSize]);
return (
<Box
@@ -217,6 +219,7 @@ const MultipleRowSelector = ({ list, onchange, disableTip, ...props }: Props) =>
list={selectorList}
onSelect={onSelect}
value={value}
placeholder={placeholder}
rowMinWidth="160px"
ButtonProps={{
isDisabled: !!disableTip,

View File

@@ -9,7 +9,8 @@ import {
HStack,
Switch,
ModalFooter,
BoxProps
BoxProps,
Checkbox
} from '@chakra-ui/react';
import React, { useMemo } from 'react';
import { useTranslation } from 'next-i18next';
@@ -22,6 +23,8 @@ import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
import { useMount } from 'ahooks';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
import MyTag from '@fastgpt/web/components/common/Tag/index';
import MyDivider from '@fastgpt/web/components/common/MyDivider';
const FileSelect = ({
forbidVision = false,
@@ -95,6 +98,42 @@ const FileSelect = ({
}}
/>
</HStack>
{value.canSelectFile && feConfigs.showCustomPdfParse && (
<>
<HStack justifyContent={'end'} spacing={1} mt={2}>
<Checkbox
isChecked={value.customPdfParse}
onChange={(e) => {
onChange({
...value,
customPdfParse: e.target.checked
});
}}
>
<FormLabel>{t('app:pdf_enhance_parse')}</FormLabel>
</Checkbox>
<QuestionTip label={t('app:pdf_enhance_parse_tips')} />
{feConfigs?.show_pay && (
<MyTag
type={'borderSolid'}
borderColor={'myGray.200'}
bg={'myGray.100'}
color={'primary.600'}
py={1.5}
borderRadius={'md'}
px={3}
whiteSpace={'wrap'}
ml={1}
>
{t('app:pdf_enhance_parse_price', {
price: feConfigs.customPdfParsePrice || 0
})}
</MyTag>
)}
</HStack>
<MyDivider my={2} />
</>
)}
<HStack mt={6}>
<FormLabel flex={'1 0 0'}>{t('app:image_upload')}</FormLabel>
{forbidVision ? (

View File

@@ -563,8 +563,10 @@ const DefaultModelModal = ({
embeddingModelList,
ttsModelList,
sttModelList,
reRankModelList
reRankModelList,
getVlmModelList
} = useSystemStore();
const vlmModelList = useMemo(() => getVlmModelList(), [getVlmModelList]);
// Create a copy of defaultModels for local state management
const [defaultData, setDefaultData] = useState(defaultModels);
@@ -703,6 +705,28 @@ const DefaultModelModal = ({
/>
</Box>
</Box>
<Box>
<Flex mt={4} {...labelStyles} alignItems={'center'}>
<Box mr={0.5}>{t('account_model:vlm_model')}</Box>
<QuestionTip label={t('account_model:vlm_model_tip')} />
</Flex>
<Box flex={1}>
<AIModelSelector
bg="myGray.50"
value={defaultData.datasetImageLLM?.model}
list={vlmModelList.map((item) => ({
value: item.model,
label: item.name
}))}
onchange={(e) => {
setDefaultData((state) => ({
...state,
datasetImageLLM: vlmModelList.find((item) => item.model === e)
}));
}}
/>
</Box>
</Box>
</ModalBody>
<ModalFooter>
<Button variant={'whiteBase'} mr={4} onClick={onClose}>

View File

@@ -35,21 +35,17 @@ import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
import MyNumberInput from '@fastgpt/web/components/common/Input/NumberInput';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
import { shadowLight } from '@fastgpt/web/styles/theme';
import AIModelSelector from '@/components/Select/AIModelSelector';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { useToast } from '@fastgpt/web/hooks/useToast';
function DataProcess() {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
const { toast } = useToast();
const {
goToNext,
processParamsForm,
chunkSizeField,
minChunkSize,
maxChunkSize,
priceTip,
chunkSize
} = useContextSelector(DatasetImportContext, (v) => v);
const { goToNext, processParamsForm, chunkSizeField, minChunkSize, maxChunkSize } =
useContextSelector(DatasetImportContext, (v) => v);
const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const { getValues, setValue, register, watch } = processParamsForm;
const trainingType = watch('trainingType');
const chunkSettingMode = watch('chunkSettingMode');
@@ -177,9 +173,16 @@ function DataProcess() {
<QuestionTip label={t('dataset:auto_indexes_tips')} />
</HStack>
<HStack flex={'1'} spacing={1}>
<Checkbox {...register('imageIndex')}>
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
</Checkbox>
<MyTooltip
label={!datasetDetail?.vlmModel ? t('common:error_vlm_not_config') : ''}
>
<Checkbox
isDisabled={!datasetDetail?.vlmModel}
{...register('imageIndex')}
>
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
</Checkbox>
</MyTooltip>
<QuestionTip label={t('dataset:image_auto_parse_tips')} />
</HStack>
</HStack>

View File

@@ -37,7 +37,7 @@ const Info = ({ datasetId }: { datasetId: string }) => {
const { t } = useTranslation();
const { datasetDetail, loadDatasetDetail, updateDataset, rebuildingCount, trainingCount } =
useContextSelector(DatasetPageContext, (v) => v);
const { feConfigs, datasetModelList, embeddingModelList, getVllmModelList } = useSystemStore();
const { feConfigs, datasetModelList, embeddingModelList, getVlmModelList } = useSystemStore();
const [editedDataset, setEditedDataset] = useState<EditResourceInfoFormType>();
const [editedAPIDataset, setEditedAPIDataset] = useState<EditAPIDatasetInfoFormType>();
@@ -52,7 +52,7 @@ const Info = ({ datasetId }: { datasetId: string }) => {
const vectorModel = watch('vectorModel');
const agentModel = watch('agentModel');
const vllmModelList = useMemo(() => getVllmModelList(), [getVllmModelList]);
const vllmModelList = useMemo(() => getVlmModelList(), [getVlmModelList]);
const vlmModel = watch('vlmModel');
const { ConfirmModal: ConfirmDelModal } = useConfirm({

View File

@@ -40,7 +40,7 @@ const CreateModal = ({
}) => {
const { t } = useTranslation();
const router = useRouter();
const { feConfigs, defaultModels, embeddingModelList, datasetModelList, getVllmModelList } =
const { feConfigs, defaultModels, embeddingModelList, datasetModelList, getVlmModelList } =
useSystemStore();
const { isPc } = useSystem();
@@ -71,7 +71,7 @@ const CreateModal = ({
const filterNotHiddenVectorModelList = embeddingModelList.filter((item) => !item.hidden);
const vllmModelList = useMemo(() => getVllmModelList(), [getVllmModelList]);
const vllmModelList = useMemo(() => getVlmModelList(), [getVlmModelList]);
const form = useForm<CreateDatasetParams>({
defaultValues: {

View File

@@ -5,6 +5,8 @@ import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection
import { DatasetCollectionDataProcessModeEnum } from '@fastgpt/global/core/dataset/constants';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/data/constants';
import { PgClient } from '@fastgpt/service/common/vectorStore/pg';
import { PG_ADDRESS } from '@fastgpt/service/common/vectorStore/constants';
// 所有 trainingType=auto 的 collection都改成 trainingType=chunk
const updateCollections = async () => {
@@ -48,10 +50,19 @@ const updateData = async () => {
}
]);
};
const upgradePgVector = async () => {
if (!PG_ADDRESS) return;
await PgClient.query(`
ALTER EXTENSION vector UPDATE;
`);
};
async function handler(req: NextApiRequest, _res: NextApiResponse) {
await authCert({ req, authRoot: true });
console.log('升级 PG vector 插件');
await upgradePgVector();
console.log('变更所有 collection 的 trainingType 为 chunk');
await updateCollections();

View File

@@ -53,7 +53,7 @@ type State = {
defaultModels: SystemDefaultModelType;
llmModelList: LLMModelItemType[];
datasetModelList: LLMModelItemType[];
getVllmModelList: () => LLMModelItemType[];
getVlmModelList: () => LLMModelItemType[];
embeddingModelList: EmbeddingModelItemType[];
ttsModelList: TTSModelType[];
reRankModelList: ReRankModelItemType[];
@@ -135,7 +135,7 @@ export const useSystemStore = create<State>()(
ttsModelList: [],
reRankModelList: [],
sttModelList: [],
getVllmModelList: () => {
getVlmModelList: () => {
return get().llmModelList.filter((item) => item.vision);
},
initStaticData(res) {