V4.14.4 dev (#6058)

* perf: faq

* index

* delete dataset

* delete dataset

* perf: delete dataset

* init

* fix: outLink UID (#6048)

* perf: query extension

* fix: s3 configs (#6050)

* fix: s3 configs

* s3

---------

Co-authored-by: archer <545436317@qq.com>

* s3 valid string check

* perf: completion api

* fix: model test

* perf: init

* fix: init

* fix: init shell

* fix: faq

---------

Co-authored-by: Roy <whoeverimf5@gmail.com>
This commit is contained in:
Archer
2025-12-08 21:02:38 +08:00
committed by GitHub
parent 44f95038b0
commit bdee2db74a
21 changed files with 599 additions and 618 deletions
@@ -13,7 +13,7 @@ import { useTextCosine } from '../hooks/useTextCosine';
This module can eliminate referential ambiguity and expand queries based on context to improve retrieval.
Submodular Optimization Mode: Generate multiple candidate queries, then use submodular algorithm to select the optimal query combination
*/
const title = global.feConfigs?.systemTitle || 'FastAI';
const title = global.feConfigs?.systemTitle || 'Nginx';
const defaultPrompt = `## 你的任务
你作为一个向量检索助手,你的任务是结合历史记录,为"原问题"生成{{count}}个不同版本的"检索词"。这些检索词应该从不同角度探索主题,以提高向量检索的语义丰富度和精度。
@@ -230,7 +230,7 @@ assistant: ${chatBg}
.replace(/ /g, '');
try {
const queries = json5.parse(jsonStr) as string[];
let queries = json5.parse(jsonStr) as string[];
if (!Array.isArray(queries) || queries.length === 0) {
return {
@@ -248,6 +248,8 @@ assistant: ${chatBg}
const { lazyGreedyQuerySelection, embeddingModel: useEmbeddingModel } = useTextCosine({
embeddingModel
});
queries = queries.map((item) => String(item));
const { selectedData: selectedQueries, embeddingTokens } = await lazyGreedyQuerySelection({
originalText: query,
candidates: queries,
+15 -4
View File
@@ -81,7 +81,7 @@ export const createLLMResponse = async <T extends CompletionsBodyType>(
return requestMessages;
})();
const requestBody = await llmCompletionsBodyFormat({
const { requestBody, modelData } = await llmCompletionsBodyFormat({
...body,
messages: rewriteMessages
});
@@ -89,6 +89,7 @@ export const createLLMResponse = async <T extends CompletionsBodyType>(
// console.log(JSON.stringify(requestBody, null, 2));
const { response, isStreamResponse, getEmptyResponseTip } = await createChatCompletion({
body: requestBody,
modelData,
userKey,
options: {
headers: {
@@ -491,10 +492,16 @@ const llmCompletionsBodyFormat = async <T extends CompletionsBodyType>({
parallel_tool_calls,
toolCallMode,
...body
}: LLMRequestBodyType<T>): Promise<InferCompletionsBody<T>> => {
}: LLMRequestBodyType<T>): Promise<{
requestBody: InferCompletionsBody<T>;
modelData: LLMModelItemType;
}> => {
const modelData = getLLMModel(body.model);
if (!modelData) {
return body as unknown as InferCompletionsBody<T>;
return {
requestBody: body as unknown as InferCompletionsBody<T>,
modelData
};
}
const response_format = (() => {
@@ -548,7 +555,10 @@ const llmCompletionsBodyFormat = async <T extends CompletionsBodyType>({
});
}
return requestBody as unknown as InferCompletionsBody<T>;
return {
requestBody: requestBody as unknown as InferCompletionsBody<T>,
modelData
};
};
const createChatCompletion = async ({
modelData,
@@ -579,6 +589,7 @@ const createChatCompletion = async ({
try {
// Rewrite model
const modelConstantsData = modelData || getLLMModel(body.model);
if (!modelConstantsData) {
return Promise.reject(`${body.model} not found`);
}
@@ -115,6 +115,25 @@ export async function delDatasetRelevantData({
// Delete vector data
await deleteDatasetDataVector({ teamId, datasetIds });
// Delete dataset_data_texts in batches by datasetId
for (const datasetId of datasetIds) {
await MongoDatasetDataText.deleteMany({
teamId,
datasetId
}).maxTimeMS(300000); // Reduce timeout for single batch
}
// Delete dataset_datas in batches by datasetId
for (const datasetId of datasetIds) {
await MongoDatasetData.deleteMany({
teamId,
datasetId
}).maxTimeMS(300000);
}
await delCollectionRelatedSource({ collections });
// Delete vector data
await deleteDatasetDataVector({ teamId, datasetIds });
// delete collections
await MongoDatasetCollection.deleteMany({
teamId,
@@ -5,15 +5,11 @@ import { addDays } from 'date-fns';
import { isS3ObjectKey, jwtSignS3ObjectKey } from '../../../common/s3/utils';
export const formatDatasetDataValue = ({
teamId,
datasetId,
q,
a,
imageId,
imageDescMap
}: {
teamId: string;
datasetId: string;
q: string;
a?: string;
imageId?: string;
@@ -73,8 +69,6 @@ export const getFormatDatasetCiteList = (list: DatasetDataSchemaType[]) => {
return list.map((item) => ({
_id: item._id,
...formatDatasetDataValue({
teamId: item.teamId,
datasetId: item.datasetId,
q: item.q,
a: item.a,
imageId: item.imageId
@@ -555,8 +555,6 @@ export async function searchDatasetData(
id: String(data._id),
updateTime: data.updateTime,
...formatDatasetDataValue({
teamId,
datasetId: data.datasetId,
q: data.q,
a: data.a,
imageId: data.imageId,
@@ -727,8 +725,6 @@ export async function searchDatasetData(
collectionId: String(data.collectionId),
updateTime: data.updateTime,
...formatDatasetDataValue({
teamId,
datasetId: data.datasetId,
q: data.q,
a: data.a,
imageId: data.imageId,
@@ -14,6 +14,7 @@ import { i18nT } from '../../../../../web/i18n/utils';
import { filterDatasetsByTmbId } from '../../../dataset/utils';
import { getDatasetSearchToolResponsePrompt } from '../../../../../global/core/ai/prompt/dataset';
import { getNodeErrResponse } from '../utils';
import { addLog } from '../../../../common/system/log';
type DatasetSearchProps = ModuleDispatchProps<{
[NodeInputKeyEnum.datasetSelectList]: SelectedDatasetType[];
@@ -49,7 +50,6 @@ export async function dispatchDatasetSearch(
const {
runningAppInfo: { teamId },
runningUserInfo: { tmbId },
uid,
histories,
node,
params: {
@@ -281,6 +281,7 @@ export async function dispatchDatasetSearch(
: 'No results'
};
} catch (error) {
addLog.error(`[Dataset search] error`, error);
return getNodeErrResponse({ error });
}
}