4.6.5- CoreferenceResolution Module (#631)

This commit is contained in:
Archer
2023-12-22 10:47:31 +08:00
committed by GitHub
parent 41115a96c0
commit cd682d4275
112 changed files with 4163 additions and 2700 deletions

View File

@@ -0,0 +1,25 @@
import { existsSync, readFileSync } from 'fs';
export const readConfigData = (name: string) => {
const isDev = process.env.NODE_ENV === 'development';
const splitName = name.split('.');
const devName = `${splitName[0]}.local.${splitName[1]}`;
const filename = (() => {
if (isDev) {
// check local file exists
const hasLocalFile = existsSync(`data/${devName}`);
if (hasLocalFile) {
return `data/${devName}`;
}
return `data/${name}`;
}
// production path
return `/app/data/${name}`;
})();
const content = readFileSync(filename, 'utf-8');
return content;
};

View File

@@ -1,62 +1,26 @@
import {
defaultAudioSpeechModels,
defaultChatModels,
defaultCQModels,
defaultExtractModels,
defaultQAModels,
defaultQGModels,
defaultVectorModels
} from '@fastgpt/global/core/ai/model';
export const getChatModel = (model?: string) => {
return (
(global.chatModels || defaultChatModels).find((item) => item.model === model) ||
global.chatModels?.[0] ||
defaultChatModels[0]
);
return global.chatModels.find((item) => item.model === model) ?? global.chatModels[0];
};
export const getQAModel = (model?: string) => {
return (
(global.qaModels || defaultQAModels).find((item) => item.model === model) ||
global.qaModels?.[0] ||
defaultQAModels[0]
);
return global.qaModels.find((item) => item.model === model) || global.qaModels[0];
};
export const getCQModel = (model?: string) => {
return (
(global.cqModels || defaultCQModels).find((item) => item.model === model) ||
global.cqModels?.[0] ||
defaultCQModels[0]
);
return global.cqModels.find((item) => item.model === model) || global.cqModels[0];
};
export const getExtractModel = (model?: string) => {
return (
(global.extractModels || defaultExtractModels).find((item) => item.model === model) ||
global.extractModels?.[0] ||
defaultExtractModels[0]
);
return global.extractModels.find((item) => item.model === model) || global.extractModels[0];
};
export const getQGModel = (model?: string) => {
return (
(global.qgModels || defaultQGModels).find((item) => item.model === model) ||
global.qgModels?.[0] ||
defaultQGModels[0]
);
return global.qgModels.find((item) => item.model === model) || global.qgModels[0];
};
export const getVectorModel = (model?: string) => {
return (
global.vectorModels.find((item) => item.model === model) ||
global.vectorModels?.[0] ||
defaultVectorModels[0]
);
return global.vectorModels.find((item) => item.model === model) || global.vectorModels[0];
};
export function getAudioSpeechModel(model?: string) {
return (
global.audioSpeechModels.find((item) => item.model === model) ||
global.audioSpeechModels?.[0] ||
defaultAudioSpeechModels[0]
global.audioSpeechModels.find((item) => item.model === model) || global.audioSpeechModels[0]
);
}

View File

@@ -12,6 +12,7 @@ import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { jiebaSplit } from '../utils';
import { reRankRecall } from '../../ai/rerank';
import { countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
import { hashStr } from '@fastgpt/global/common/string/tools';
export async function insertData2Pg(props: {
mongoDataId: string;
@@ -98,34 +99,40 @@ export async function updatePgDataById({
// ------------------ search start ------------------
type SearchProps = {
text: string;
model: string;
similarity?: number; // min distance
limit: number; // max Token limit
datasetIds: string[];
searchMode?: `${DatasetSearchModeEnum}`;
};
export async function searchDatasetData(props: SearchProps) {
export async function searchDatasetData(
props: SearchProps & { rawQuery: string; queries: string[] }
) {
let {
text,
rawQuery,
queries,
model,
similarity = 0,
limit: maxTokens,
searchMode = DatasetSearchModeEnum.embedding
searchMode = DatasetSearchModeEnum.embedding,
datasetIds = []
} = props;
searchMode = global.systemEnv?.pluginBaseUrl ? searchMode : DatasetSearchModeEnum.embedding;
/* init params */
searchMode = global.systemEnv?.pluginBaseUrl ? searchMode : DatasetSearchModeEnum.embedding;
// Compatible with topk limit
if (maxTokens < 50) {
maxTokens = 1500;
}
const rerank =
global.reRankModels?.[0] &&
(searchMode === DatasetSearchModeEnum.embeddingReRank ||
searchMode === DatasetSearchModeEnum.embFullTextReRank);
let set = new Set<string>();
const oneChunkToken = 50;
const { embeddingLimit, fullTextLimit } = (() => {
/* function */
const countRecallLimit = () => {
const oneChunkToken = 50;
const estimatedLen = Math.max(20, Math.ceil(maxTokens / oneChunkToken));
// Increase search range, reduce hnsw loss. 20 ~ 100
@@ -148,34 +155,295 @@ export async function searchDatasetData(props: SearchProps) {
embeddingLimit: Math.min(80, Math.max(50, estimatedLen * 2)),
fullTextLimit: Math.min(40, Math.max(20, estimatedLen))
};
})();
};
const embeddingRecall = async ({ query, limit }: { query: string; limit: number }) => {
const { vectors, tokenLen } = await getVectorsByText({
model,
input: [query]
});
const [{ tokenLen, embeddingRecallResults }, { fullTextRecallResults }] = await Promise.all([
embeddingRecall({
...props,
rerank,
limit: embeddingLimit
}),
fullTextRecall({
...props,
limit: fullTextLimit
})
]);
const results: any = await PgClient.query(
`BEGIN;
SET LOCAL hnsw.ef_search = ${global.systemEnv.pgHNSWEfSearch || 100};
select id, collection_id, data_id, (vector <#> '[${vectors[0]}]') * -1 AS score
from ${PgDatasetTableName}
where dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
${rerank ? '' : `AND vector <#> '[${vectors[0]}]' < -${similarity}`}
order by score desc limit ${limit};
COMMIT;`
);
// concat embedding and fullText recall result
let set = new Set<string>(embeddingRecallResults.map((item) => item.id));
const concatRecallResults = embeddingRecallResults;
fullTextRecallResults.forEach((item) => {
if (!set.has(item.id) && item.score >= similarity) {
concatRecallResults.push(item);
set.add(item.id);
const rows = results?.[2]?.rows as PgSearchRawType[];
// concat same data_id
const filterRows: PgSearchRawType[] = [];
let set = new Set<string>();
for (const row of rows) {
if (!set.has(row.data_id)) {
filterRows.push(row);
set.add(row.data_id);
}
}
// get q and a
const [collections, dataList] = await Promise.all([
MongoDatasetCollection.find(
{
_id: { $in: filterRows.map((item) => item.collection_id) }
},
'name fileId rawLink'
).lean(),
MongoDatasetData.find(
{
_id: { $in: filterRows.map((item) => item.data_id?.trim()) }
},
'datasetId collectionId q a chunkIndex indexes'
).lean()
]);
const formatResult = filterRows
.map((item) => {
const collection = collections.find(
(collection) => String(collection._id) === item.collection_id
);
const data = dataList.find((data) => String(data._id) === item.data_id);
// if collection or data UnExist, the relational mongo data already deleted
if (!collection || !data) return null;
return {
id: String(data._id),
q: data.q,
a: data.a,
chunkIndex: data.chunkIndex,
indexes: data.indexes,
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
sourceName: collection.name || '',
sourceId: collection?.fileId || collection?.rawLink,
score: item.score
};
})
.filter((item) => item !== null) as SearchDataResponseItemType[];
return {
embeddingRecallResults: formatResult,
tokenLen
};
};
const fullTextRecall = async ({
query,
limit
}: {
query: string;
limit: number;
}): Promise<{
fullTextRecallResults: SearchDataResponseItemType[];
tokenLen: number;
}> => {
if (limit === 0) {
return {
fullTextRecallResults: [],
tokenLen: 0
};
}
let searchResults = (
await Promise.all(
datasetIds.map((id) =>
MongoDatasetData.find(
{
datasetId: id,
$text: { $search: jiebaSplit({ text: query }) }
},
{
score: { $meta: 'textScore' },
_id: 1,
datasetId: 1,
collectionId: 1,
q: 1,
a: 1,
indexes: 1,
chunkIndex: 1
}
)
.sort({ score: { $meta: 'textScore' } })
.limit(limit)
.lean()
)
)
).flat() as (DatasetDataSchemaType & { score: number })[];
// resort
searchResults.sort((a, b) => b.score - a.score);
searchResults.slice(0, limit);
const collections = await MongoDatasetCollection.find(
{
_id: { $in: searchResults.map((item) => item.collectionId) }
},
'_id name fileId rawLink'
);
return {
fullTextRecallResults: searchResults.map((item) => {
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
return {
id: String(item._id),
datasetId: String(item.datasetId),
collectionId: String(item.collectionId),
sourceName: collection?.name || '',
sourceId: collection?.fileId || collection?.rawLink,
q: item.q,
a: item.a,
chunkIndex: item.chunkIndex,
indexes: item.indexes,
// @ts-ignore
score: item.score
};
}),
tokenLen: 0
};
};
const reRankSearchResult = async ({
data,
query
}: {
data: SearchDataResponseItemType[];
query: string;
}): Promise<SearchDataResponseItemType[]> => {
try {
const results = await reRankRecall({
query,
inputs: data.map((item) => ({
id: item.id,
text: `${item.q}\n${item.a}`
}))
});
if (!Array.isArray(results)) return data;
// add new score to data
const mergeResult = results
.map((item) => {
const target = data.find((dataItem) => dataItem.id === item.id);
if (!target) return null;
return {
...target,
score: item.score || target.score
};
})
.filter(Boolean) as SearchDataResponseItemType[];
return mergeResult;
} catch (error) {
return data;
}
};
const filterResultsByMaxTokens = (list: SearchDataResponseItemType[], maxTokens: number) => {
const results: SearchDataResponseItemType[] = [];
let totalTokens = 0;
for (let i = 0; i < list.length; i++) {
const item = list[i];
totalTokens += countPromptTokens(item.q + item.a);
if (totalTokens > maxTokens + 500) {
break;
}
results.push(item);
if (totalTokens > maxTokens) {
break;
}
}
return results.length === 0 ? list.slice(0, 1) : results;
};
const multiQueryRecall = async ({
embeddingLimit,
fullTextLimit
}: {
embeddingLimit: number;
fullTextLimit: number;
}) => {
// In a group n recall, as long as one of the data appears minAmount of times, it is retained
const getIntersection = (resultList: SearchDataResponseItemType[][], minAmount = 1) => {
minAmount = Math.min(resultList.length, minAmount);
const map: Record<
string,
{
amount: number;
data: SearchDataResponseItemType;
}
> = {};
for (const list of resultList) {
for (const item of list) {
map[item.id] = map[item.id]
? {
amount: map[item.id].amount + 1,
data: item
}
: {
amount: 1,
data: item
};
}
}
return Object.values(map)
.filter((item) => item.amount >= minAmount)
.map((item) => item.data);
};
// multi query recall
const embeddingRecallResList: SearchDataResponseItemType[][] = [];
const fullTextRecallResList: SearchDataResponseItemType[][] = [];
let embTokens = 0;
for await (const query of queries) {
const [{ tokenLen, embeddingRecallResults }, { fullTextRecallResults }] = await Promise.all([
embeddingRecall({
query,
limit: embeddingLimit
}),
fullTextRecall({
query,
limit: fullTextLimit
})
]);
embTokens += tokenLen;
embeddingRecallResList.push(embeddingRecallResults);
fullTextRecallResList.push(fullTextRecallResults);
}
return {
tokens: embTokens,
embeddingRecallResults: getIntersection(embeddingRecallResList, 2),
fullTextRecallResults: getIntersection(fullTextRecallResList, 2)
};
};
/* main step */
// count limit
const { embeddingLimit, fullTextLimit } = countRecallLimit();
// recall
const { embeddingRecallResults, fullTextRecallResults, tokens } = await multiQueryRecall({
embeddingLimit,
fullTextLimit
});
// concat recall results
set = new Set<string>(embeddingRecallResults.map((item) => item.id));
const concatRecallResults = embeddingRecallResults.concat(
fullTextRecallResults.filter((item) => !set.has(item.id))
);
// remove same q and a data
set = new Set<string>();
const filterSameDataResults = concatRecallResults.filter((item) => {
const str = `${item.q}${item.a}`.trim();
// 删除所有的标点符号与空格等,只对文本进行比较
const str = hashStr(`${item.q}${item.a}`.replace(/[^\p{L}\p{N}]/gu, ''));
if (set.has(str)) return false;
set.add(str);
return true;
@@ -187,14 +455,14 @@ export async function searchDatasetData(props: SearchProps) {
filterSameDataResults.filter((item) => item.score >= similarity),
maxTokens
),
tokenLen
tokenLen: tokens
};
}
// ReRank result
// ReRank results
const reRankResults = (
await reRankSearchResult({
query: text,
query: rawQuery,
data: filterSameDataResults
})
).filter((item) => item.score > similarity);
@@ -204,210 +472,7 @@ export async function searchDatasetData(props: SearchProps) {
reRankResults.filter((item) => item.score >= similarity),
maxTokens
),
tokenLen
tokenLen: tokens
};
}
export async function embeddingRecall({
text,
model,
similarity = 0,
limit,
datasetIds = [],
rerank = false
}: SearchProps & { rerank: boolean }) {
const { vectors, tokenLen } = await getVectorsByText({
model,
input: [text]
});
const results: any = await PgClient.query(
`BEGIN;
SET LOCAL hnsw.ef_search = ${global.systemEnv.pgHNSWEfSearch || 100};
select id, collection_id, data_id, (vector <#> '[${vectors[0]}]') * -1 AS score
from ${PgDatasetTableName}
where dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
${rerank ? '' : `AND vector <#> '[${vectors[0]}]' < -${similarity}`}
order by score desc limit ${limit};
COMMIT;`
);
const rows = results?.[2]?.rows as PgSearchRawType[];
// concat same data_id
const filterRows: PgSearchRawType[] = [];
let set = new Set<string>();
for (const row of rows) {
if (!set.has(row.data_id)) {
filterRows.push(row);
set.add(row.data_id);
}
}
// get q and a
const [collections, dataList] = await Promise.all([
MongoDatasetCollection.find(
{
_id: { $in: filterRows.map((item) => item.collection_id) }
},
'name fileId rawLink'
).lean(),
MongoDatasetData.find(
{
_id: { $in: filterRows.map((item) => item.data_id?.trim()) }
},
'datasetId collectionId q a chunkIndex indexes'
).lean()
]);
const formatResult = filterRows
.map((item) => {
const collection = collections.find(
(collection) => String(collection._id) === item.collection_id
);
const data = dataList.find((data) => String(data._id) === item.data_id);
// if collection or data UnExist, the relational mongo data already deleted
if (!collection || !data) return null;
return {
id: String(data._id),
q: data.q,
a: data.a,
chunkIndex: data.chunkIndex,
indexes: data.indexes,
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
sourceName: collection.name || '',
sourceId: collection?.fileId || collection?.rawLink,
score: item.score
};
})
.filter((item) => item !== null) as SearchDataResponseItemType[];
return {
embeddingRecallResults: formatResult,
tokenLen
};
}
export async function fullTextRecall({ text, limit, datasetIds = [] }: SearchProps): Promise<{
fullTextRecallResults: SearchDataResponseItemType[];
tokenLen: number;
}> {
if (limit === 0) {
return {
fullTextRecallResults: [],
tokenLen: 0
};
}
let searchResults = (
await Promise.all(
datasetIds.map((id) =>
MongoDatasetData.find(
{
datasetId: id,
$text: { $search: jiebaSplit({ text }) }
},
{
score: { $meta: 'textScore' },
_id: 1,
datasetId: 1,
collectionId: 1,
q: 1,
a: 1,
indexes: 1,
chunkIndex: 1
}
)
.sort({ score: { $meta: 'textScore' } })
.limit(limit)
.lean()
)
)
).flat() as (DatasetDataSchemaType & { score: number })[];
// resort
searchResults.sort((a, b) => b.score - a.score);
searchResults.slice(0, limit);
const collections = await MongoDatasetCollection.find(
{
_id: { $in: searchResults.map((item) => item.collectionId) }
},
'_id name fileId rawLink'
);
return {
fullTextRecallResults: searchResults.map((item) => {
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
return {
id: String(item._id),
datasetId: String(item.datasetId),
collectionId: String(item.collectionId),
sourceName: collection?.name || '',
sourceId: collection?.fileId || collection?.rawLink,
q: item.q,
a: item.a,
chunkIndex: item.chunkIndex,
indexes: item.indexes,
// @ts-ignore
score: item.score
};
}),
tokenLen: 0
};
}
// plus reRank search result
export async function reRankSearchResult({
data,
query
}: {
data: SearchDataResponseItemType[];
query: string;
}): Promise<SearchDataResponseItemType[]> {
try {
const results = await reRankRecall({
query,
inputs: data.map((item) => ({
id: item.id,
text: `${item.q}\n${item.a}`
}))
});
if (!Array.isArray(results)) return data;
// add new score to data
const mergeResult = results
.map((item) => {
const target = data.find((dataItem) => dataItem.id === item.id);
if (!target) return null;
return {
...target,
score: item.score || target.score
};
})
.filter(Boolean) as SearchDataResponseItemType[];
return mergeResult;
} catch (error) {
return data;
}
}
export function filterResultsByMaxTokens(list: SearchDataResponseItemType[], maxTokens: number) {
const results: SearchDataResponseItemType[] = [];
let totalTokens = 0;
for (let i = 0; i < list.length; i++) {
const item = list[i];
totalTokens += countPromptTokens(item.q + item.a);
if (totalTokens > maxTokens + 200) {
break;
}
results.push(item);
if (totalTokens > maxTokens) {
break;
}
}
return results;
}
// ------------------ search end ------------------

View File

@@ -170,9 +170,11 @@ export async function generateVector(): Promise<any> {
err.response?.data?.error?.type === 'invalid_request_error' ||
err?.code === 500
) {
addLog.info('invalid message format', {
dataItem
});
addLog.info('Lock training data');
console.log(err?.code);
console.log(err.response?.data?.error?.type);
console.log(err?.message);
try {
await MongoDatasetTraining.findByIdAndUpdate(data._id, {
lockTime: new Date('2998/5/5')

View File

@@ -5,7 +5,7 @@ import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { getAIApi } from '@fastgpt/service/core/ai/config';
import type { ClassifyQuestionAgentItemType } from '@fastgpt/global/core/module/type.d';
import { ModuleInputKeyEnum, ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { Prompt_CQJson } from '@/global/core/prompt/agent';
import { FunctionModelItemType } from '@fastgpt/global/core/ai/model.d';
@@ -43,8 +43,8 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse
const chatHistories = getHistories(history, histories);
const { arg, tokens } = await (async () => {
if (cqModel.functionCall) {
return functionCall({
if (cqModel.toolChoice) {
return toolChoice({
...props,
histories: chatHistories,
cqModel
@@ -73,7 +73,7 @@ export const dispatchClassifyQuestion = async (props: Props): Promise<CQResponse
};
};
async function functionCall({
async function toolChoice({
user,
cqModel,
histories,

View File

@@ -5,17 +5,19 @@ import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { getAIApi } from '@fastgpt/service/core/ai/config';
import type { ContextExtractAgentItemType } from '@fastgpt/global/core/module/type';
import { ModuleInputKeyEnum, ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import { Prompt_ExtractJson } from '@/global/core/prompt/agent';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { FunctionModelItemType } from '@fastgpt/global/core/ai/model.d';
import { getHistories } from '../utils';
import { getExtractModel } from '@/service/core/ai/model';
type Props = ModuleDispatchProps<{
[ModuleInputKeyEnum.history]?: ChatItemType[];
[ModuleInputKeyEnum.contextExtractInput]: string;
[ModuleInputKeyEnum.extractKeys]: ContextExtractAgentItemType[];
[ModuleInputKeyEnum.description]: string;
[ModuleInputKeyEnum.aiModel]: string;
}>;
type Response = {
[ModuleOutputKeyEnum.success]?: boolean;
@@ -30,19 +32,19 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
const {
user,
histories,
inputs: { content, history = 6, description, extractKeys }
inputs: { content, history = 6, model, description, extractKeys }
} = props;
if (!content) {
return Promise.reject('Input is empty');
}
const extractModel = global.extractModels[0];
const extractModel = getExtractModel(model);
const chatHistories = getHistories(history, histories);
const { arg, tokens, rawResponse } = await (async () => {
if (extractModel.functionCall) {
return functionCall({
const { arg, tokens } = await (async () => {
if (extractModel.toolChoice) {
return toolChoice({
...props,
histories: chatHistories,
extractModel
@@ -60,6 +62,9 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
if (!extractKeys.find((item) => item.key === key)) {
delete arg[key];
}
if (arg[key] === '') {
delete arg[key];
}
}
// auth fields
@@ -91,7 +96,7 @@ export async function dispatchContentExtract(props: Props): Promise<Response> {
};
}
async function functionCall({
async function toolChoice({
extractModel,
user,
histories,
@@ -101,17 +106,19 @@ async function functionCall({
...histories,
{
obj: ChatRoleEnum.Human,
value: `<任务描述>
value: `你的任务:
"""
${description || '根据用户要求获取适当的 JSON 字符串。'}
"""
要求:
"""
- 如果字段为空,你返回空字符串。
- 不要换行。
- 结合历史记录和文本进行获取。
</任务描述>
- 字符串不要换行。
- 结合上下文和当前问题进行获取。
"""
<文本>
${content}
</文本>`
当前问题: "${content}"`
}
];
const filterMessages = ChatContextFilter({

View File

@@ -16,7 +16,7 @@ import { adaptChat2GptMessages } from '@fastgpt/global/core/chat/adapt';
import { Prompt_QuotePromptList, Prompt_QuoteTemplateList } from '@/global/core/prompt/AIChat';
import type { AIChatModuleProps } from '@fastgpt/global/core/module/node/type.d';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import { responseWrite, responseWriteController } from '@fastgpt/service/common/response';
import { getChatModel, ModelTypeEnum } from '@/service/core/ai/model';
import type { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type';

View File

@@ -2,11 +2,12 @@ import type { moduleDispatchResType } from '@fastgpt/global/core/chat/type.d';
import { countModelPrice } from '@/service/support/wallet/bill/utils';
import type { SelectedDatasetType } from '@fastgpt/global/core/module/api.d';
import type { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import { ModelTypeEnum } from '@/service/core/ai/model';
import { searchDatasetData } from '@/service/core/dataset/data/pg';
import { ModuleInputKeyEnum, ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';
import { DatasetSearchModeEnum } from '@fastgpt/global/core/dataset/constant';
import { searchQueryExtension } from '@fastgpt/service/core/ai/functions/queryExtension';
type DatasetSearchProps = ModuleDispatchProps<{
[ModuleInputKeyEnum.datasetSelectList]: SelectedDatasetType;
@@ -26,24 +27,34 @@ export async function dispatchDatasetSearch(
props: DatasetSearchProps
): Promise<DatasetSearchResponse> {
const {
teamId,
tmbId,
inputs: { datasets = [], similarity = 0.4, limit = 5, searchMode, userChatInput }
} = props as DatasetSearchProps;
if (!Array.isArray(datasets)) {
return Promise.reject('Quote type error');
}
if (datasets.length === 0) {
return Promise.reject("You didn't choose the knowledge base");
return Promise.reject('core.chat.error.Select dataset empty');
}
if (!userChatInput) {
return Promise.reject('Your input is empty');
return Promise.reject('core.chat.error.User question empty');
}
// get vector
const vectorModel = datasets[0]?.vectorModel || global.vectorModels[0];
// const { queries: extensionQueries } = await searchQueryExtension({
// query: userChatInput,
// model: global.chatModels[0].model
// });
const concatQueries = [userChatInput];
// start search
const { searchRes, tokenLen } = await searchDatasetData({
text: userChatInput,
rawQuery: userChatInput,
queries: concatQueries,
model: vectorModel.model,
similarity,
limit,
@@ -61,7 +72,7 @@ export async function dispatchDatasetSearch(
tokens: tokenLen,
type: ModelTypeEnum.vector
}),
query: userChatInput,
query: concatQueries.join('\n'),
model: vectorModel.name,
tokens: tokenLen,
similarity,

View File

@@ -1,12 +1,11 @@
import { NextApiResponse } from 'next';
import { ModuleInputKeyEnum } from '@fastgpt/global/core/module/constants';
import { ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';
import { RunningModuleItemType } from '@/types/app';
import { ModuleDispatchProps } from '@/types/core/chat/type';
import type { ChatHistoryItemResType, ChatItemType } from '@fastgpt/global/core/chat/type.d';
import type { ChatDispatchProps, RunningModuleItemType } from '@fastgpt/global/core/module/type.d';
import { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import type { ChatHistoryItemResType } from '@fastgpt/global/core/chat/type.d';
import { FlowNodeInputTypeEnum, FlowNodeTypeEnum } from '@fastgpt/global/core/module/node/constant';
import { ModuleItemType } from '@fastgpt/global/core/module/type';
import { UserType } from '@fastgpt/global/support/user/type';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { responseWrite } from '@fastgpt/service/common/response';
import { sseResponseEventEnum } from '@fastgpt/service/common/response/constant';
@@ -22,11 +21,12 @@ import { dispatchClassifyQuestion } from './agent/classifyQuestion';
import { dispatchContentExtract } from './agent/extract';
import { dispatchHttpRequest } from './tools/http';
import { dispatchAppRequest } from './tools/runApp';
import { dispatchCFR } from './tools/cfr';
import { dispatchRunPlugin } from './plugin/run';
import { dispatchPluginInput } from './plugin/runInput';
import { dispatchPluginOutput } from './plugin/runOutput';
const callbackMap: Record<string, Function> = {
const callbackMap: Record<`${FlowNodeTypeEnum}`, Function> = {
[FlowNodeTypeEnum.historyNode]: dispatchHistory,
[FlowNodeTypeEnum.questionInput]: dispatchChatInput,
[FlowNodeTypeEnum.answerNode]: dispatchAnswer,
@@ -38,38 +38,28 @@ const callbackMap: Record<string, Function> = {
[FlowNodeTypeEnum.runApp]: dispatchAppRequest,
[FlowNodeTypeEnum.pluginModule]: dispatchRunPlugin,
[FlowNodeTypeEnum.pluginInput]: dispatchPluginInput,
[FlowNodeTypeEnum.pluginOutput]: dispatchPluginOutput
[FlowNodeTypeEnum.pluginOutput]: dispatchPluginOutput,
[FlowNodeTypeEnum.cfr]: dispatchCFR,
// none
[FlowNodeTypeEnum.userGuide]: () => Promise.resolve(),
[FlowNodeTypeEnum.variable]: () => Promise.resolve()
};
/* running */
export async function dispatchModules({
res,
teamId,
tmbId,
user,
appId,
modules,
chatId,
responseChatItemId,
histories = [],
startParams = {},
variables = {},
user,
stream = false,
detail = false
}: {
res: NextApiResponse;
teamId: string;
tmbId: string;
user: UserType;
appId: string;
detail = false,
...props
}: ChatDispatchProps & {
modules: ModuleItemType[];
chatId?: string;
responseChatItemId?: string;
histories: ChatItemType[];
startParams?: Record<string, any>;
variables?: Record<string, any>;
stream?: boolean;
detail?: boolean;
}) {
// set sse response headers
if (stream) {
@@ -196,25 +186,21 @@ export async function dispatchModules({
module.inputs.forEach((item: any) => {
params[item.key] = item.value;
});
const props: ModuleDispatchProps<Record<string, any>> = {
const dispatchData: ModuleDispatchProps<Record<string, any>> = {
...props,
res,
teamId,
tmbId,
user,
appId,
chatId,
responseChatItemId,
stream,
detail,
variables,
histories,
user,
stream,
detail,
outputs: module.outputs,
inputs: params
};
const dispatchRes: Record<string, any> = await (async () => {
if (callbackMap[module.flowType]) {
return callbackMap[module.flowType](props);
return callbackMap[module.flowType](dispatchData);
}
return {};
})();

View File

@@ -1,6 +1,6 @@
import { ModuleInputKeyEnum } from '@fastgpt/global/core/module/constants';
import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import { getHistories } from '../utils';
export type HistoryProps = ModuleDispatchProps<{
maxContext?: number;

View File

@@ -1,5 +1,5 @@
import { ModuleInputKeyEnum } from '@fastgpt/global/core/module/constants';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
export type UserChatInputProps = ModuleDispatchProps<{
[ModuleInputKeyEnum.userChatInput]: string;
}>;

View File

@@ -1,4 +1,4 @@
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import { dispatchModules } from '../index';
import { FlowNodeTypeEnum } from '@fastgpt/global/core/module/node/constant';
import {
@@ -21,6 +21,7 @@ type RunPluginResponse = {
export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPluginResponse> => {
const {
mode,
teamId,
tmbId,
inputs: { pluginId, ...data }
@@ -71,6 +72,7 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
if (output) {
output.moduleLogo = plugin.avatar;
}
console.log(responseData.length);
return {
answerText,
@@ -79,7 +81,14 @@ export const dispatchRunPlugin = async (props: RunPluginProps): Promise<RunPlugi
moduleLogo: plugin.avatar,
price: responseData.reduce((sum, item) => sum + (item.price || 0), 0),
runningTime: responseData.reduce((sum, item) => sum + (item.runningTime || 0), 0),
pluginOutput: output?.pluginOutput
pluginOutput: output?.pluginOutput,
pluginDetail:
mode === 'test' && plugin.teamId === teamId
? responseData.filter((item) => {
const filterArr = [FlowNodeTypeEnum.pluginOutput];
return !filterArr.includes(item.moduleType as any);
})
: undefined
},
...(output ? output.pluginOutput : {})
};

View File

@@ -1,4 +1,4 @@
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
export type PluginInputProps = ModuleDispatchProps<{
[key: string]: any;

View File

@@ -1,5 +1,5 @@
import type { moduleDispatchResType } from '@fastgpt/global/core/chat/type.d';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import { ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';
export type PluginOutputProps = ModuleDispatchProps<{

View File

@@ -1,7 +1,7 @@
import { sseResponseEventEnum } from '@fastgpt/service/common/response/constant';
import { responseWrite } from '@fastgpt/service/common/response';
import { textAdaptGptResponse } from '@/utils/adapt';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import { ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';
export type AnswerProps = ModuleDispatchProps<{
text: string;

View File

@@ -0,0 +1,167 @@
import type { ChatItemType, moduleDispatchResType } from '@fastgpt/global/core/chat/type.d';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import { ModuleInputKeyEnum, ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';
import { getHistories } from '../utils';
import { getAIApi } from '@fastgpt/service/core/ai/config';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { getExtractModel } from '@/service/core/ai/model';
type Props = ModuleDispatchProps<{
[ModuleInputKeyEnum.aiModel]: string;
[ModuleInputKeyEnum.aiSystemPrompt]?: string;
[ModuleInputKeyEnum.history]?: ChatItemType[] | number;
[ModuleInputKeyEnum.userChatInput]: string;
}>;
type Response = {
[ModuleOutputKeyEnum.text]: string;
[ModuleOutputKeyEnum.responseData]?: moduleDispatchResType;
};
export const dispatchCFR = async ({
histories,
inputs: { model, systemPrompt, history, userChatInput }
}: Props): Promise<Response> => {
if (!userChatInput) {
return Promise.reject('Question is empty');
}
if (histories.length === 0 && !systemPrompt) {
return {
[ModuleOutputKeyEnum.text]: userChatInput
};
}
const extractModel = getExtractModel(model);
const chatHistories = getHistories(history, histories);
const systemFewShot = systemPrompt
? `Q: 对话背景。
A: ${systemPrompt}
`
: '';
const historyFewShot = chatHistories
.map((item) => {
const role = item.obj === 'Human' ? 'Q' : 'A';
return `${role}: ${item.value}`;
})
.join('\n');
const concatFewShot = `${systemFewShot}${historyFewShot}`.trim();
const ai = getAIApi(undefined, 480000);
const result = await ai.chat.completions.create({
model: extractModel.model,
temperature: 0,
max_tokens: 150,
messages: [
{
role: 'user',
content: replaceVariable(defaultPrompt, {
query: userChatInput,
histories: concatFewShot
})
}
],
stream: false
});
let answer = result.choices?.[0]?.message?.content || '';
// console.log(
// replaceVariable(defaultPrompt, {
// query: userChatInput,
// histories: concatFewShot
// })
// );
// console.log(answer);
const tokens = result.usage?.total_tokens || 0;
return {
[ModuleOutputKeyEnum.responseData]: {
price: extractModel.price * tokens,
model: extractModel.name || '',
tokens,
query: userChatInput,
textOutput: answer
},
[ModuleOutputKeyEnum.text]: answer
};
};
const defaultPrompt = `请不要回答任何问题。
你的任务是结合上下文,为当前问题,实现代词替换,确保问题描述的对象清晰明确。例如:
历史记录:
"""
Q: 对话背景。
A: 关于 FatGPT 的介绍和使用等问题。
"""
当前问题: 怎么下载
输出: FastGPT 怎么下载?
----------------
历史记录:
"""
Q: 报错 "no connection"
A: FastGPT 报错"no connection"可能是因为……
"""
当前问题: 怎么解决
输出: FastGPT 报错"no connection"如何解决?
----------------
历史记录:
"""
Q: 作者是谁?
A: FastGPT 的作者是 labring。
"""
当前问题: 介绍下他
输出: 介绍下 FastGPT 的作者 labring。
----------------
历史记录:
"""
Q: 作者是谁?
A: FastGPT 的作者是 labring。
"""
当前问题: 我想购买商业版。
输出: FastGPT 商业版如何购买?
----------------
历史记录:
"""
Q: 对话背景。
A: 关于 FatGPT 的介绍和使用等问题。
"""
当前问题: nh
输出: nh
----------------
历史记录:
"""
Q: FastGPT 如何收费?
A: FastGPT 收费可以参考……
"""
当前问题: 你知道 laf 么?
输出: 你知道 laf 么?
----------------
历史记录:
"""
Q: FastGPT 的优势
A: 1. 开源
2. 简便
3. 扩展性强
"""
当前问题: 介绍下第2点。
输出: 介绍下 FastGPT 简便的优势。
----------------
历史记录:
"""
Q: 什么是 FastGPT
A: FastGPT 是一个 RAG 平台。
Q: 什么是 Sealos
A: Sealos 是一个云操作系统。
"""
当前问题: 它们有什么关系?
输出: FastGPT 和 Sealos 有什么关系?
----------------
历史记录:
"""
{{histories}}
"""
当前问题: {{query}}
输出: `;

View File

@@ -1,5 +1,5 @@
import type { moduleDispatchResType } from '@fastgpt/global/core/chat/type.d';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import { ModuleInputKeyEnum, ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';
import axios from 'axios';
import { flatDynamicParams } from '../utils';

View File

@@ -1,5 +1,5 @@
import type { moduleDispatchResType, ChatItemType } from '@fastgpt/global/core/chat/type.d';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import type { ModuleDispatchProps } from '@fastgpt/global/core/module/type.d';
import { SelectAppItemType } from '@fastgpt/global/core/module/type';
import { dispatchModules } from '../index';
import { MongoApp } from '@fastgpt/service/core/app/schema';
@@ -7,12 +7,12 @@ import { responseWrite } from '@fastgpt/service/common/response';
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { sseResponseEventEnum } from '@fastgpt/service/common/response/constant';
import { textAdaptGptResponse } from '@/utils/adapt';
import { ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';
import { ModuleInputKeyEnum, ModuleOutputKeyEnum } from '@fastgpt/global/core/module/constants';
import { getHistories } from '../utils';
type Props = ModuleDispatchProps<{
userChatInput: string;
history?: ChatItemType[];
[ModuleInputKeyEnum.userChatInput]: string;
[ModuleInputKeyEnum.history]?: ChatItemType[] | number;
app: SelectAppItemType;
}>;
type Response = {
@@ -28,7 +28,7 @@ export const dispatchAppRequest = async (props: Props): Promise<Response> => {
stream,
detail,
histories,
inputs: { userChatInput, history = [], app }
inputs: { userChatInput, history, app }
} = props;
if (!userChatInput) {

View File

@@ -1,10 +1,9 @@
import { BillSourceEnum, PRICE_SCALE } from '@fastgpt/global/support/wallet/bill/constants';
import { getAudioSpeechModel, getQAModel } from '@/service/core/ai/model';
import { getAudioSpeechModel, getQAModel, getVectorModel } from '@/service/core/ai/model';
import type { ChatHistoryItemResType } from '@fastgpt/global/core/chat/type.d';
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
import { addLog } from '@fastgpt/service/common/system/log';
import type { ConcatBillProps, CreateBillProps } from '@fastgpt/global/support/wallet/bill/api.d';
import { defaultQGModels } from '@fastgpt/global/core/ai/model';
import { POST } from '@fastgpt/service/common/api/plusRequest';
import { PostReRankProps } from '@fastgpt/global/core/ai/api';
@@ -113,8 +112,7 @@ export const pushGenerateVectorBill = ({
source?: `${BillSourceEnum}`;
}) => {
// 计算价格. 至少为1
const vectorModel =
global.vectorModels.find((item) => item.model === model) || global.vectorModels[0];
const vectorModel = getVectorModel(model);
const unitPrice = vectorModel.price || 0.2;
let total = unitPrice * tokenLen;
total = total > 1 ? total : 1;
@@ -158,7 +156,7 @@ export const pushQuestionGuideBill = ({
teamId: string;
tmbId: string;
}) => {
const qgModel = global.qgModels?.[0] || defaultQGModels[0];
const qgModel = global.qgModels[0];
const total = qgModel.price * tokens;
createBill({
teamId,