mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
fix: queue (#4485)
This commit is contained in:
@@ -62,3 +62,4 @@ curl --location --request POST 'https://{{host}}/api/admin/initv494' \
|
|||||||
## 🐛 修复
|
## 🐛 修复
|
||||||
|
|
||||||
1. 搜索应用/知识库时,无法点击目录进入下一层。
|
1. 搜索应用/知识库时,无法点击目录进入下一层。
|
||||||
|
2. 重新训练时,参数未成功初始化。
|
@@ -8,6 +8,7 @@ import { MILVUS_ADDRESS, PG_ADDRESS, OCEANBASE_ADDRESS } from './constants';
|
|||||||
import { MilvusCtrl } from './milvus/class';
|
import { MilvusCtrl } from './milvus/class';
|
||||||
import { setRedisCache, getRedisCache, delRedisCache, CacheKeyEnum } from '../redis/cache';
|
import { setRedisCache, getRedisCache, delRedisCache, CacheKeyEnum } from '../redis/cache';
|
||||||
import { throttle } from 'lodash';
|
import { throttle } from 'lodash';
|
||||||
|
import { retryFn } from '@fastgpt/global/common/system/utils';
|
||||||
|
|
||||||
const getVectorObj = () => {
|
const getVectorObj = () => {
|
||||||
if (PG_ADDRESS) return new PgVectorCtrl();
|
if (PG_ADDRESS) return new PgVectorCtrl();
|
||||||
@@ -55,6 +56,7 @@ export const insertDatasetDataVector = async ({
|
|||||||
query: string;
|
query: string;
|
||||||
model: EmbeddingModelItemType;
|
model: EmbeddingModelItemType;
|
||||||
}) => {
|
}) => {
|
||||||
|
return retryFn(async () => {
|
||||||
const { vectors, tokens } = await getVectorsByText({
|
const { vectors, tokens } = await getVectorsByText({
|
||||||
model,
|
model,
|
||||||
input: query,
|
input: query,
|
||||||
@@ -71,6 +73,7 @@ export const insertDatasetDataVector = async ({
|
|||||||
tokens,
|
tokens,
|
||||||
insertId
|
insertId
|
||||||
};
|
};
|
||||||
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
export const deleteDatasetDataVector = async (props: DelDatasetVectorCtrlProps) => {
|
export const deleteDatasetDataVector = async (props: DelDatasetVectorCtrlProps) => {
|
||||||
|
@@ -115,6 +115,7 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
|
|||||||
const chunkSplitMode = watch('chunkSplitMode');
|
const chunkSplitMode = watch('chunkSplitMode');
|
||||||
const autoIndexes = watch('autoIndexes');
|
const autoIndexes = watch('autoIndexes');
|
||||||
const indexSize = watch('indexSize');
|
const indexSize = watch('indexSize');
|
||||||
|
const imageIndex = watch('imageIndex');
|
||||||
|
|
||||||
const trainingModeList = useMemo(() => {
|
const trainingModeList = useMemo(() => {
|
||||||
const list = Object.entries(DatasetCollectionDataProcessModeMap);
|
const list = Object.entries(DatasetCollectionDataProcessModeMap);
|
||||||
@@ -225,7 +226,11 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
|
|||||||
<HStack gap={[3, 7]}>
|
<HStack gap={[3, 7]}>
|
||||||
<HStack flex={'1'} spacing={1}>
|
<HStack flex={'1'} spacing={1}>
|
||||||
<MyTooltip label={!feConfigs?.isPlus ? t('common:commercial_function_tip') : ''}>
|
<MyTooltip label={!feConfigs?.isPlus ? t('common:commercial_function_tip') : ''}>
|
||||||
<Checkbox isDisabled={!feConfigs?.isPlus} {...register('autoIndexes')}>
|
<Checkbox
|
||||||
|
isDisabled={!feConfigs?.isPlus}
|
||||||
|
isChecked={autoIndexes}
|
||||||
|
{...register('autoIndexes')}
|
||||||
|
>
|
||||||
<FormLabel>{t('dataset:auto_indexes')}</FormLabel>
|
<FormLabel>{t('dataset:auto_indexes')}</FormLabel>
|
||||||
</Checkbox>
|
</Checkbox>
|
||||||
</MyTooltip>
|
</MyTooltip>
|
||||||
@@ -243,6 +248,7 @@ const CollectionChunkForm = ({ form }: { form: UseFormReturn<CollectionChunkForm
|
|||||||
>
|
>
|
||||||
<Checkbox
|
<Checkbox
|
||||||
isDisabled={!feConfigs?.isPlus || !datasetDetail?.vlmModel}
|
isDisabled={!feConfigs?.isPlus || !datasetDetail?.vlmModel}
|
||||||
|
isChecked={imageIndex}
|
||||||
{...register('imageIndex')}
|
{...register('imageIndex')}
|
||||||
>
|
>
|
||||||
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
|
<FormLabel>{t('dataset:image_auto_parse')}</FormLabel>
|
||||||
|
@@ -20,17 +20,14 @@ import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
|
|||||||
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
|
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
|
||||||
import { shadowLight } from '@fastgpt/web/styles/theme';
|
import { shadowLight } from '@fastgpt/web/styles/theme';
|
||||||
import CollectionChunkForm from '../../Form/CollectionChunkForm';
|
import CollectionChunkForm from '../../Form/CollectionChunkForm';
|
||||||
import { DatasetCollectionDataProcessModeEnum } from '@fastgpt/global/core/dataset/constants';
|
|
||||||
|
|
||||||
function DataProcess() {
|
function DataProcess() {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const { feConfigs } = useSystemStore();
|
const { feConfigs } = useSystemStore();
|
||||||
|
|
||||||
const { goToNext, processParamsForm, chunkSize } = useContextSelector(
|
const { goToNext, processParamsForm } = useContextSelector(DatasetImportContext, (v) => v);
|
||||||
DatasetImportContext,
|
const { register, watch } = processParamsForm;
|
||||||
(v) => v
|
const customPdfParseValue = watch('customPdfParse');
|
||||||
);
|
|
||||||
const { register } = processParamsForm;
|
|
||||||
|
|
||||||
const Title = useCallback(({ title }: { title: string }) => {
|
const Title = useCallback(({ title }: { title: string }) => {
|
||||||
return (
|
return (
|
||||||
@@ -66,7 +63,7 @@ function DataProcess() {
|
|||||||
>
|
>
|
||||||
{feConfigs.showCustomPdfParse && (
|
{feConfigs.showCustomPdfParse && (
|
||||||
<HStack spacing={1}>
|
<HStack spacing={1}>
|
||||||
<Checkbox {...register('customPdfParse')}>
|
<Checkbox isChecked={customPdfParseValue} {...register('customPdfParse')}>
|
||||||
<FormLabel>{t('dataset:pdf_enhance_parse')}</FormLabel>
|
<FormLabel>{t('dataset:pdf_enhance_parse')}</FormLabel>
|
||||||
</Checkbox>
|
</Checkbox>
|
||||||
<QuestionTip label={t('dataset:pdf_enhance_parse_tips')} />
|
<QuestionTip label={t('dataset:pdf_enhance_parse_tips')} />
|
||||||
|
@@ -17,7 +17,7 @@ import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
|
|||||||
import { countPromptTokens } from '@fastgpt/service/common/string/tiktoken';
|
import { countPromptTokens } from '@fastgpt/service/common/string/tiktoken';
|
||||||
|
|
||||||
const formatIndexes = async ({
|
const formatIndexes = async ({
|
||||||
indexes,
|
indexes = [],
|
||||||
q,
|
q,
|
||||||
a = '',
|
a = '',
|
||||||
indexSize,
|
indexSize,
|
||||||
@@ -66,7 +66,6 @@ const formatIndexes = async ({
|
|||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
indexes = indexes || [];
|
|
||||||
// If index not type, set it to custom
|
// If index not type, set it to custom
|
||||||
indexes = indexes
|
indexes = indexes
|
||||||
.map((item) => ({
|
.map((item) => ({
|
||||||
@@ -93,7 +92,7 @@ const formatIndexes = async ({
|
|||||||
indexes = indexes.filter((item) => item.type !== DatasetDataIndexTypeEnum.default);
|
indexes = indexes.filter((item) => item.type !== DatasetDataIndexTypeEnum.default);
|
||||||
indexes.push(...concatDefaultIndexes);
|
indexes.push(...concatDefaultIndexes);
|
||||||
|
|
||||||
// Filter same text
|
// Remove same text
|
||||||
indexes = indexes.filter(
|
indexes = indexes.filter(
|
||||||
(item, index, self) => index === self.findIndex((t) => t.text === item.text)
|
(item, index, self) => index === self.findIndex((t) => t.text === item.text)
|
||||||
);
|
);
|
||||||
@@ -101,12 +100,16 @@ const formatIndexes = async ({
|
|||||||
const chekcIndexes = (
|
const chekcIndexes = (
|
||||||
await Promise.all(
|
await Promise.all(
|
||||||
indexes.map(async (item) => {
|
indexes.map(async (item) => {
|
||||||
|
if (item.type === DatasetDataIndexTypeEnum.default) {
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
|
||||||
// If oversize tokens, split it
|
// If oversize tokens, split it
|
||||||
const tokens = await countPromptTokens(item.text);
|
const tokens = await countPromptTokens(item.text);
|
||||||
if (tokens > indexSize) {
|
if (tokens > maxIndexSize) {
|
||||||
const splitText = splitText2Chunks({
|
const splitText = splitText2Chunks({
|
||||||
text: item.text,
|
text: item.text,
|
||||||
chunkSize: 512,
|
chunkSize: indexSize,
|
||||||
maxSize: maxIndexSize
|
maxSize: maxIndexSize
|
||||||
}).chunks;
|
}).chunks;
|
||||||
return splitText.map((text) => ({
|
return splitText.map((text) => ({
|
||||||
@@ -114,6 +117,7 @@ const formatIndexes = async ({
|
|||||||
type: item.type
|
type: item.type
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
return item;
|
return item;
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
@@ -164,8 +168,15 @@ export async function insertData2Dataset({
|
|||||||
});
|
});
|
||||||
|
|
||||||
// insert to vector store
|
// insert to vector store
|
||||||
const result = await Promise.all(
|
const results: {
|
||||||
newIndexes.map(async (item) => {
|
tokens: number;
|
||||||
|
index: {
|
||||||
|
dataId: string;
|
||||||
|
type: `${DatasetDataIndexTypeEnum}`;
|
||||||
|
text: string;
|
||||||
|
};
|
||||||
|
}[] = [];
|
||||||
|
for await (const item of newIndexes) {
|
||||||
const result = await insertDatasetDataVector({
|
const result = await insertDatasetDataVector({
|
||||||
query: item.text,
|
query: item.text,
|
||||||
model: embModel,
|
model: embModel,
|
||||||
@@ -173,15 +184,14 @@ export async function insertData2Dataset({
|
|||||||
datasetId,
|
datasetId,
|
||||||
collectionId
|
collectionId
|
||||||
});
|
});
|
||||||
return {
|
results.push({
|
||||||
tokens: result.tokens,
|
tokens: result.tokens,
|
||||||
index: {
|
index: {
|
||||||
...item,
|
...item,
|
||||||
dataId: result.insertId
|
dataId: result.insertId
|
||||||
}
|
}
|
||||||
};
|
});
|
||||||
})
|
}
|
||||||
);
|
|
||||||
|
|
||||||
// 2. Create mongo data
|
// 2. Create mongo data
|
||||||
const [{ _id }] = await MongoDatasetData.create(
|
const [{ _id }] = await MongoDatasetData.create(
|
||||||
@@ -194,7 +204,7 @@ export async function insertData2Dataset({
|
|||||||
q,
|
q,
|
||||||
a,
|
a,
|
||||||
chunkIndex,
|
chunkIndex,
|
||||||
indexes: result.map((item) => item.index)
|
indexes: results.map((item) => item.index)
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
{ session, ordered: true }
|
{ session, ordered: true }
|
||||||
@@ -216,7 +226,7 @@ export async function insertData2Dataset({
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
insertId: _id,
|
insertId: _id,
|
||||||
tokens: result.reduce((acc, cur) => acc + cur.tokens, 0)
|
tokens: results.reduce((acc, cur) => acc + cur.tokens, 0)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -303,10 +313,12 @@ export async function updateData2Dataset({
|
|||||||
await mongoData.save();
|
await mongoData.save();
|
||||||
|
|
||||||
// 5. insert vector
|
// 5. insert vector
|
||||||
const insertResult = await Promise.all(
|
const insertResults: {
|
||||||
patchResult
|
tokens: number;
|
||||||
.filter((item) => item.type === 'create' || item.type === 'update')
|
}[] = [];
|
||||||
.map(async (item) => {
|
for await (const item of patchResult) {
|
||||||
|
if (item.type === 'delete' || item.type === 'unChange') continue;
|
||||||
|
|
||||||
// insert new vector and update dateId
|
// insert new vector and update dateId
|
||||||
const result = await insertDatasetDataVector({
|
const result = await insertDatasetDataVector({
|
||||||
query: item.index.text,
|
query: item.index.text,
|
||||||
@@ -316,12 +328,12 @@ export async function updateData2Dataset({
|
|||||||
collectionId: mongoData.collectionId
|
collectionId: mongoData.collectionId
|
||||||
});
|
});
|
||||||
item.index.dataId = result.insertId;
|
item.index.dataId = result.insertId;
|
||||||
return {
|
insertResults.push({
|
||||||
tokens: result.tokens
|
tokens: result.tokens
|
||||||
};
|
});
|
||||||
})
|
}
|
||||||
);
|
|
||||||
const tokens = insertResult.reduce((acc, cur) => acc + cur.tokens, 0);
|
const tokens = insertResults.reduce((acc, cur) => acc + cur.tokens, 0);
|
||||||
|
|
||||||
const newIndexes = patchResult
|
const newIndexes = patchResult
|
||||||
.filter((item) => item.type !== 'delete')
|
.filter((item) => item.type !== 'delete')
|
||||||
|
@@ -200,8 +200,12 @@ const rebuildData = async ({
|
|||||||
|
|
||||||
// update vector, update dataset_data rebuilding status, delete data from training
|
// update vector, update dataset_data rebuilding status, delete data from training
|
||||||
// 1. Insert new vector to dataset_data
|
// 1. Insert new vector to dataset_data
|
||||||
const updateResult = await Promise.all(
|
const updateResult: {
|
||||||
mongoData.indexes.map(async (index, i) => {
|
tokens: number;
|
||||||
|
insertId: string;
|
||||||
|
}[] = [];
|
||||||
|
let i = 0;
|
||||||
|
for await (const index of mongoData.indexes) {
|
||||||
const result = await insertDatasetDataVector({
|
const result = await insertDatasetDataVector({
|
||||||
query: index.text,
|
query: index.text,
|
||||||
model: getEmbeddingModel(trainingData.model),
|
model: getEmbeddingModel(trainingData.model),
|
||||||
@@ -210,9 +214,10 @@ const rebuildData = async ({
|
|||||||
collectionId: mongoData.collectionId
|
collectionId: mongoData.collectionId
|
||||||
});
|
});
|
||||||
mongoData.indexes[i].dataId = result.insertId;
|
mongoData.indexes[i].dataId = result.insertId;
|
||||||
return result;
|
updateResult.push(result);
|
||||||
})
|
i++;
|
||||||
);
|
}
|
||||||
|
|
||||||
const { tokens } = await mongoSessionRun(async (session) => {
|
const { tokens } = await mongoSessionRun(async (session) => {
|
||||||
// 2. Ensure that the training data is deleted after the Mongo update is successful
|
// 2. Ensure that the training data is deleted after the Mongo update is successful
|
||||||
await mongoData.save({ session });
|
await mongoData.save({ session });
|
||||||
|
Reference in New Issue
Block a user