This commit is contained in:
Archer
2023-12-11 15:12:14 +08:00
committed by GitHub
parent 84cf6b5658
commit d2d7eac9e0
105 changed files with 1091 additions and 801 deletions

View File

@@ -13,15 +13,7 @@ import { jiebaSplit } from '../utils';
import { reRankRecall } from '../../ai/rerank';
import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
export async function insertData2Pg({
mongoDataId,
input,
model,
teamId,
tmbId,
datasetId,
collectionId
}: {
export async function insertData2Pg(props: {
mongoDataId: string;
input: string;
model: string;
@@ -29,42 +21,42 @@ export async function insertData2Pg({
tmbId: string;
datasetId: string;
collectionId: string;
}) {
let retry = 2;
async function insertPg(): Promise<{ insertId: string; vectors: number[][]; tokenLen: number }> {
try {
// get vector
const { vectors, tokenLen } = await getVectorsByText({
model,
input: [input]
});
const { rows } = await PgClient.insert(PgDatasetTableName, {
values: [
[
{ key: 'vector', value: `[${vectors[0]}]` },
{ key: 'team_id', value: String(teamId) },
{ key: 'tmb_id', value: String(tmbId) },
{ key: 'dataset_id', value: datasetId },
{ key: 'collection_id', value: collectionId },
{ key: 'data_id', value: String(mongoDataId) }
]
retry?: number;
}): Promise<{ insertId: string; vectors: number[][]; tokenLen: number }> {
const { mongoDataId, input, model, teamId, tmbId, datasetId, collectionId, retry = 3 } = props;
try {
// get vector
const { vectors, tokenLen } = await getVectorsByText({
model,
input: [input]
});
const { rows } = await PgClient.insert(PgDatasetTableName, {
values: [
[
{ key: 'vector', value: `[${vectors[0]}]` },
{ key: 'team_id', value: String(teamId) },
{ key: 'tmb_id', value: String(tmbId) },
{ key: 'dataset_id', value: datasetId },
{ key: 'collection_id', value: collectionId },
{ key: 'data_id', value: String(mongoDataId) }
]
});
return {
insertId: rows[0].id,
vectors,
tokenLen
};
} catch (error) {
if (--retry < 0) {
return Promise.reject(error);
}
await delay(500);
return insertPg();
]
});
return {
insertId: rows[0].id,
vectors,
tokenLen
};
} catch (error) {
if (retry <= 0) {
return Promise.reject(error);
}
await delay(500);
return insertData2Pg({
...props,
retry: retry - 1
});
}
return insertPg();
}
export async function updatePgDataById({
@@ -128,8 +120,9 @@ export async function searchDatasetData(props: SearchProps) {
}
const rerank =
searchMode === DatasetSearchModeEnum.embeddingReRank ||
searchMode === DatasetSearchModeEnum.embFullTextReRank;
global.reRankModels?.[0] &&
(searchMode === DatasetSearchModeEnum.embeddingReRank ||
searchMode === DatasetSearchModeEnum.embFullTextReRank);
const oneChunkToken = 50;
const { embeddingLimit, fullTextLimit } = (() => {
@@ -188,8 +181,6 @@ export async function searchDatasetData(props: SearchProps) {
return true;
});
// token slice
if (!rerank) {
return {
searchRes: filterResultsByMaxTokens(
@@ -264,7 +255,7 @@ export async function embeddingRecall({
{
_id: { $in: filterRows.map((item) => item.data_id?.trim()) }
},
'datasetId collectionId q a indexes'
'datasetId collectionId q a chunkIndex indexes'
).lean()
]);
const formatResult = filterRows
@@ -281,6 +272,7 @@ export async function embeddingRecall({
id: String(data._id),
q: data.q,
a: data.a,
chunkIndex: data.chunkIndex,
indexes: data.indexes,
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
@@ -322,7 +314,8 @@ export async function fullTextRecall({ text, limit, datasetIds = [] }: SearchPro
collectionId: 1,
q: 1,
a: 1,
indexes: 1
indexes: 1,
chunkIndex: 1
}
)
.sort({ score: { $meta: 'textScore' } })
@@ -354,6 +347,7 @@ export async function fullTextRecall({ text, limit, datasetIds = [] }: SearchPro
sourceId: collection?.fileId || collection?.rawLink,
q: item.q,
a: item.a,
chunkIndex: item.chunkIndex,
indexes: item.indexes,
// @ts-ignore
score: item.score
@@ -395,8 +389,6 @@ export async function reRankSearchResult({
return mergeResult;
} catch (error) {
console.log(error);
return data;
}
}

View File

@@ -4,7 +4,7 @@ import { DatasetDataIndexTypeEnum, TrainingModeEnum } from '@fastgpt/global/core
import { sendOneInform } from '../support/user/inform/api';
import { getAIApi } from '@fastgpt/service/core/ai/config';
import type { ChatMessageItemType } from '@fastgpt/global/core/ai/type.d';
import { addLog } from '@fastgpt/service/common/mongo/controller';
import { addLog } from '@fastgpt/service/common/system/log';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { Prompt_AgentQA } from '@/global/core/prompt/agent';
@@ -56,6 +56,7 @@ export async function generateQA(): Promise<any> {
collectionId: 1,
q: 1,
model: 1,
chunkIndex: 1,
billId: 1,
prompt: 1
})
@@ -130,7 +131,7 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
const ai = getAIApi(undefined, 600000);
const chatResponse = await ai.chat.completions.create({
model,
temperature: 0.01,
temperature: 0.3,
messages,
stream: false
});
@@ -144,7 +145,10 @@ ${replaceVariable(Prompt_AgentQA.fixedText, { text })}`;
teamId: data.teamId,
tmbId: data.tmbId,
collectionId: data.collectionId,
data: qaArr,
data: qaArr.map((item) => ({
...item,
chunkIndex: data.chunkIndex
})),
mode: TrainingModeEnum.chunk,
billId: data.billId
});

View File

@@ -2,7 +2,7 @@ import { insertData2Dataset } from '@/service/core/dataset/data/controller';
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import { sendOneInform } from '../support/user/inform/api';
import { addLog } from '@fastgpt/service/common/mongo/controller';
import { addLog } from '@fastgpt/service/common/system/log';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { authTeamBalance } from '@/service/support/permission/auth/bill';
import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push';

View File

@@ -223,6 +223,7 @@ function filterQuote({
score: item.score?.toFixed(4)
});
}
const sliceResult = sliceMessagesTB({
maxTokens: model.quoteMaxToken,
messages: quoteQA.map((item, index) => ({
@@ -234,13 +235,30 @@ function filterQuote({
// slice filterSearch
const filterQuoteQA = quoteQA.slice(0, sliceResult.length);
// filterQuoteQA按collectionId聚合在一起后再按chunkIndex从小到大排序
const sortQuoteQAMap: Record<string, SearchDataResponseItemType[]> = {};
filterQuoteQA.forEach((item) => {
if (sortQuoteQAMap[item.collectionId]) {
sortQuoteQAMap[item.collectionId].push(item);
} else {
sortQuoteQAMap[item.collectionId] = [item];
}
});
const sortQuoteQAList = Object.values(sortQuoteQAMap).flat();
sortQuoteQAList.sort((a, b) => {
if (a.collectionId === b.collectionId) {
return a.chunkIndex - b.chunkIndex;
}
return 0;
});
const quoteText =
filterQuoteQA.length > 0
? `${filterQuoteQA.map((item, index) => getValue(item, index)).join('\n')}`
: '';
return {
filterQuoteQA,
filterQuoteQA: sortQuoteQAList,
quoteText
};
}

View File

@@ -45,9 +45,16 @@ export async function autChatCrud({
}
// req auth
const { tmbId, role } = await authUserRole(props);
const { teamId, tmbId, role } = await authUserRole(props);
if (String(teamId) !== String(chat.teamId)) return Promise.reject(ChatErrEnum.unAuthChat);
if (role === TeamMemberRoleEnum.owner) return { uid: outLinkUid };
if (String(tmbId) === String(chat.tmbId)) return { uid: outLinkUid };
// admin
if (per === 'r' && role === TeamMemberRoleEnum.admin) return { uid: outLinkUid };
return Promise.reject(ChatErrEnum.unAuthChat);
})();

View File

@@ -1,5 +1,4 @@
import { DatasetDataItemType, DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type';
import { AuthResponseType } from '@fastgpt/global/support/permission/type';
import { DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { AuthModeType } from '@fastgpt/service/support/permission/type';
@@ -27,6 +26,7 @@ export async function authDatasetData({
id: String(datasetData._id),
q: datasetData.q,
a: datasetData.a,
chunkIndex: datasetData.chunkIndex,
indexes: datasetData.indexes,
datasetId: String(datasetData.datasetId),
collectionId: String(datasetData.collectionId),

View File

@@ -2,7 +2,7 @@ import { BillSourceEnum, PRICE_SCALE } from '@fastgpt/global/support/wallet/bill
import { getAudioSpeechModel, getQAModel } from '@/service/core/ai/model';
import type { ChatHistoryItemResType } from '@fastgpt/global/core/chat/type.d';
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
import { addLog } from '@fastgpt/service/common/mongo/controller';
import { addLog } from '@fastgpt/service/common/system/log';
import type { ConcatBillProps, CreateBillProps } from '@fastgpt/global/support/wallet/bill/api.d';
import { defaultQGModels } from '@fastgpt/global/core/ai/model';
import { POST } from '@fastgpt/service/common/api/plusRequest';

View File

@@ -3,7 +3,7 @@ import { MongoApp } from '@fastgpt/service/core/app/schema';
import { ChatSourceEnum } from '@fastgpt/global/core/chat/constants';
import { MongoChatItem } from '@fastgpt/service/core/chat/chatItemSchema';
import { MongoChat } from '@fastgpt/service/core/chat/chatSchema';
import { addLog } from '@fastgpt/service/common/mongo/controller';
import { addLog } from '@fastgpt/service/common/system/log';
import { chatContentReplaceBlock } from '@fastgpt/global/core/chat/utils';
type Props = {