This commit is contained in:
Archer
2023-11-15 11:36:25 +08:00
committed by GitHub
parent 592e1a93a2
commit bfd8be5df0
181 changed files with 2499 additions and 1552 deletions

View File

@@ -1,68 +0,0 @@
import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import type { NextApiResponse } from 'next';
import { countMessagesTokens, countPromptTokens } from '@/global/common/tiktoken';
import { adaptRole_Chat2Message } from '@/utils/common/adapt/message';
export type ChatCompletionResponseType = {
streamResponse: any;
responseMessages: ChatItemType[];
responseText: string;
totalTokens: number;
};
export type StreamResponseType = {
chatResponse: any;
messages: ChatItemType[];
res: NextApiResponse;
model: string;
[key: string]: any;
};
/* slice chat context by tokens */
export function ChatContextFilter({
messages = [],
maxTokens
}: {
messages: ChatItemType[];
maxTokens: number;
}) {
if (!Array.isArray(messages)) {
return [];
}
const rawTextLen = messages.reduce((sum, item) => sum + item.value.length, 0);
// If the text length is less than half of the maximum token, no calculation is required
if (rawTextLen < maxTokens * 0.5) {
return messages;
}
// filter startWith system prompt
const chatStartIndex = messages.findIndex((item) => item.obj !== ChatRoleEnum.System);
const systemPrompts: ChatItemType[] = messages.slice(0, chatStartIndex);
const chatPrompts: ChatItemType[] = messages.slice(chatStartIndex);
// reduce token of systemPrompt
maxTokens -= countMessagesTokens({
messages: systemPrompts
});
// 根据 tokens 截断内容
const chats: ChatItemType[] = [];
// 从后往前截取对话内容
for (let i = chatPrompts.length - 1; i >= 0; i--) {
const item = chatPrompts[i];
chats.unshift(item);
const tokens = countPromptTokens(item.value, adaptRole_Chat2Message(item.obj));
maxTokens -= tokens;
/* 整体 tokens 超出范围, system必须保留 */
if (maxTokens <= 0) {
chats.shift();
break;
}
}
return [...systemPrompts, ...chats];
}

View File

@@ -1,106 +1,19 @@
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
import { getVectorsByText } from '@/service/core/ai/vector';
import { PgClient } from '@fastgpt/service/common/pg';
import { delay } from '@/utils/tools';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import {
DatasetDataItemType,
PgDataItemType,
PgRawDataItemType
} from '@fastgpt/global/core/dataset/type';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
CreateDatasetDataProps,
PatchIndexesProps,
UpdateDatasetDataProps
} from '@fastgpt/global/core/dataset/controller';
import { deletePgDataById, insertData2Pg, updatePgDataById } from './pg';
import { Types } from 'mongoose';
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/constant';
import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils';
export async function formatPgRawData(data: PgRawDataItemType) {
return {
id: data.id,
q: data.q,
a: data.a,
teamId: data.team_id,
tmbId: data.tmb_id,
datasetId: data.dataset_id,
collectionId: data.collection_id
};
}
/* get */
export async function getDatasetPgData({ id }: { id: string }): Promise<PgDataItemType> {
const { rows } = await PgClient.select<PgRawDataItemType>(PgDatasetTableName, {
fields: ['id', 'q', 'a', 'team_id', 'tmb_id', 'dataset_id', 'collection_id'],
where: [['id', id]],
limit: 1
});
const row = rows[0];
if (!row) return Promise.reject('Data not found');
return formatPgRawData(row);
}
export async function getPgDataWithCollection({
pgDataList
}: {
pgDataList: PgRawDataItemType[];
}): Promise<DatasetDataItemType[]> {
const collections = await MongoDatasetCollection.find(
{
_id: { $in: pgDataList.map((item) => item.collection_id) }
},
'_id name datasetId metadata'
).lean();
return pgDataList.map((item) => {
const collection = collections.find(
(collection) => String(collection._id) === item.collection_id
);
return {
id: item.id,
q: item.q,
a: item.a,
datasetId: collection?.datasetId || '',
collectionId: item.collection_id,
sourceName: collection?.name || '',
sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink
};
});
}
type Props = {
q: string;
a?: string;
model: string;
};
/**
* update a or a
/* insert data.
* 1. create data id
* 2. insert pg
* 3. create mongo data
*/
export async function updateData2Dataset({ dataId, q, a = '', model }: Props & { dataId: string }) {
const { vectors = [], tokenLen = 0 } = await (async () => {
if (q) {
return getVectorsByText({
input: [q],
model
});
}
return { vectors: [[]], tokenLen: 0 };
})();
await PgClient.update(PgDatasetTableName, {
where: [['id', dataId]],
values: [
{ key: 'a', value: a.replace(/'/g, '"') },
...(q
? [
{ key: 'q', value: q.replace(/'/g, '"') },
{ key: 'vector', value: `[${vectors[0]}]` }
]
: [])
]
});
return {
vectors,
tokenLen
};
}
/* insert data to pg */
export async function insertData2Dataset({
teamId,
tmbId,
@@ -108,61 +21,215 @@ export async function insertData2Dataset({
collectionId,
q,
a = '',
indexes,
model
}: Props & {
teamId: string;
tmbId: string;
datasetId: string;
collectionId: string;
}: CreateDatasetDataProps & {
model: string;
}) {
if (!q || !datasetId || !collectionId || !model) {
return Promise.reject('q, datasetId, collectionId, model is required');
}
const { vectors, tokenLen } = await getVectorsByText({
model,
input: [q]
q = q.trim();
a = a.trim();
const id = new Types.ObjectId();
const qaStr = `${q}\n${a}`.trim();
// empty indexes check, if empty, create default index
indexes =
Array.isArray(indexes) && indexes.length > 0
? indexes.map((index) => ({
...index,
dataId: undefined,
defaultIndex: indexes?.length === 1 && index.text === qaStr ? true : index.defaultIndex
}))
: [getDefaultIndex({ q, a })];
// insert to pg
const result = await Promise.all(
indexes.map((item) =>
insertData2Pg({
mongoDataId: String(id),
input: item.text,
model,
teamId,
tmbId,
datasetId,
collectionId
})
)
);
// create mongo
const { _id } = await MongoDatasetData.create({
_id: id,
teamId,
tmbId,
datasetId,
collectionId,
q,
a,
indexes: indexes.map((item, i) => ({
...item,
dataId: result[i].insertId
}))
});
let retry = 2;
async function insertPg(): Promise<string> {
try {
const { rows } = await PgClient.insert(PgDatasetTableName, {
values: [
[
{ key: 'vector', value: `[${vectors[0]}]` },
{ key: 'team_id', value: String(teamId) },
{ key: 'tmb_id', value: String(tmbId) },
{ key: 'q', value: q },
{ key: 'a', value: a },
{ key: 'dataset_id', value: datasetId },
{ key: 'collection_id', value: collectionId }
]
]
});
return rows[0].id;
} catch (error) {
if (--retry < 0) {
return Promise.reject(error);
}
await delay(500);
return insertPg();
}
}
const insertId = await insertPg();
return {
insertId,
tokenLen,
vectors
insertId: _id,
tokenLen: result.reduce((acc, cur) => acc + cur.tokenLen, 0)
};
}
/**
* delete data by collectionIds
* update data
* 1. compare indexes
* 2. update pg data
* 3. update mongo data
*/
export async function updateData2Dataset({
dataId,
q,
a,
indexes,
model
}: UpdateDatasetDataProps & { model: string }) {
if (!Array.isArray(indexes)) {
return Promise.reject('indexes is required');
}
const qaStr = `${q}\n${a}`.trim();
// patch index and update pg
const mongoData = await MongoDatasetData.findById(dataId);
if (!mongoData) return Promise.reject('Data not found');
// make sure have one index
if (indexes.length === 0) {
const databaseDefaultIndex = mongoData.indexes.find((index) => index.defaultIndex);
indexes = [
getDefaultIndex({
q,
a,
dataId: databaseDefaultIndex ? String(databaseDefaultIndex.dataId) : undefined
})
];
}
// patch indexes, create, update, delete
const patchResult: PatchIndexesProps[] = [];
// find database indexes in new Indexes, if have not, delete it
for (const item of mongoData.indexes) {
const index = indexes.find((index) => index.dataId === item.dataId);
if (!index) {
patchResult.push({
type: 'delete',
index: item
});
}
}
for (const item of indexes) {
const index = mongoData.indexes.find((index) => index.dataId === item.dataId);
// in database, update
if (index) {
// manual update index
if (index.text !== item.text) {
patchResult.push({
type: 'update',
index: item
});
} else if (index.defaultIndex && index.text !== qaStr) {
// update default index
patchResult.push({
type: 'update',
index: {
...item,
type:
item.type === DatasetDataIndexTypeEnum.qa && !a
? DatasetDataIndexTypeEnum.chunk
: item.type,
text: qaStr
}
});
}
} else {
// not in database, create
patchResult.push({
type: 'create',
index: item
});
}
}
const result = await Promise.all(
patchResult.map(async (item) => {
if (item.type === 'create') {
const result = await insertData2Pg({
mongoDataId: dataId,
input: item.index.text,
model,
teamId: mongoData.teamId,
tmbId: mongoData.tmbId,
datasetId: mongoData.datasetId,
collectionId: mongoData.collectionId
});
item.index.dataId = result.insertId;
return result;
}
if (item.type === 'update' && item.index.dataId) {
return updatePgDataById({
id: item.index.dataId,
input: item.index.text,
model
});
}
if (item.type === 'delete' && item.index.dataId) {
return deletePgDataById(['id', item.index.dataId]);
}
return {
tokenLen: 0
};
})
);
const tokenLen = result.reduce((acc, cur) => acc + cur.tokenLen, 0);
// update mongo
mongoData.q = q || mongoData.q;
mongoData.a = a ?? mongoData.a;
// @ts-ignore
mongoData.indexes = indexes;
await mongoData.save();
return {
tokenLen
};
}
/* delete all data by datasetIds */
export async function delDataByDatasetId({ datasetIds }: { datasetIds: string[] }) {
datasetIds = datasetIds.map((item) => String(item));
// delete pg data
await deletePgDataById(`dataset_id IN ('${datasetIds.join("','")}')`);
// delete dataset.datas
await MongoDatasetData.deleteMany({ datasetId: { $in: datasetIds } });
}
/**
* delete all data by collectionIds
*/
export async function delDataByCollectionId({ collectionIds }: { collectionIds: string[] }) {
const ids = collectionIds.map((item) => String(item));
return PgClient.delete(PgDatasetTableName, {
where: [`collection_id IN ('${ids.join("','")}')`]
});
// delete pg data
await deletePgDataById(`collection_id IN ('${ids.join("','")}')`);
// delete dataset.datas
await MongoDatasetData.deleteMany({ collectionId: { $in: ids } });
}
/**
* delete one data by mongoDataId
*/
export async function deleteDataByDataId(mongoDataId: string) {
await deletePgDataById(['data_id', mongoDataId]);
await MongoDatasetData.findByIdAndDelete(mongoDataId);
}

View File

@@ -0,0 +1,281 @@
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
import type { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type.d';
import { PgClient } from '@fastgpt/service/common/pg';
import { getVectorsByText } from '@/service/core/ai/vector';
import { delay } from '@/utils/tools';
import { PgSearchRawType } from '@fastgpt/global/core/dataset/api';
import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { POST } from '@fastgpt/service/common/api/plusRequest';
import { PostReRankResponse } from '@fastgpt/global/core/ai/api';
export async function insertData2Pg({
mongoDataId,
input,
model,
teamId,
tmbId,
datasetId,
collectionId
}: {
mongoDataId: string;
input: string;
model: string;
teamId: string;
tmbId: string;
datasetId: string;
collectionId: string;
}) {
let retry = 2;
async function insertPg(): Promise<{ insertId: string; vectors: number[][]; tokenLen: number }> {
try {
// get vector
const { vectors, tokenLen } = await getVectorsByText({
model,
input: [input]
});
const { rows } = await PgClient.insert(PgDatasetTableName, {
values: [
[
{ key: 'vector', value: `[${vectors[0]}]` },
{ key: 'team_id', value: String(teamId) },
{ key: 'tmb_id', value: String(tmbId) },
{ key: 'dataset_id', value: datasetId },
{ key: 'collection_id', value: collectionId },
{ key: 'data_id', value: String(mongoDataId) }
]
]
});
return {
insertId: rows[0].id,
vectors,
tokenLen
};
} catch (error) {
if (--retry < 0) {
return Promise.reject(error);
}
await delay(500);
return insertPg();
}
}
return insertPg();
}
export async function updatePgDataById({
id,
input,
model
}: {
id: string;
input: string;
model: string;
}) {
let retry = 2;
async function updatePg(): Promise<{ vectors: number[][]; tokenLen: number }> {
try {
// get vector
const { vectors, tokenLen } = await getVectorsByText({
model,
input: [input]
});
// update pg
await PgClient.update(PgDatasetTableName, {
where: [['id', id]],
values: [{ key: 'vector', value: `[${vectors[0]}]` }]
});
return {
vectors,
tokenLen
};
} catch (error) {
if (--retry < 0) {
return Promise.reject(error);
}
await delay(500);
return updatePg();
}
}
return updatePg();
}
export async function deletePgDataById(
where: ['id' | 'dataset_id' | 'collection_id' | 'data_id', string] | string
) {
let retry = 2;
async function deleteData(): Promise<any> {
try {
await PgClient.delete(PgDatasetTableName, {
where: [where]
});
} catch (error) {
if (--retry < 0) {
return Promise.reject(error);
}
await delay(500);
return deleteData();
}
}
await deleteData();
return {
tokenLen: 0
};
}
// search
export async function searchDatasetData({
text,
model,
similarity = 0,
limit,
datasetIds = []
}: {
text: string;
model: string;
similarity?: number; // min distance
limit: number;
datasetIds: string[];
}) {
const { vectors, tokenLen } = await getVectorsByText({
model,
input: [text]
});
const minLimit = global.systemEnv.pluginBaseUrl ? Math.max(50, limit * 4) : limit * 2;
const results: any = await PgClient.query(
`BEGIN;
SET LOCAL hnsw.ef_search = ${global.systemEnv.pgHNSWEfSearch || 100};
select id, collection_id, data_id, (vector <#> '[${
vectors[0]
}]') * -1 AS score from ${PgDatasetTableName} where dataset_id IN (${datasetIds
.map((id) => `'${String(id)}'`)
.join(',')}) AND vector <#> '[${vectors[0]}]' < -${similarity} order by vector <#> '[${
vectors[0]
}]' limit ${minLimit};
COMMIT;`
);
const rows = results?.[2]?.rows as PgSearchRawType[];
// concat same data_id
const filterRows: PgSearchRawType[] = [];
let set = new Set<string>();
for (const row of rows) {
if (!set.has(row.data_id)) {
filterRows.push(row);
set.add(row.data_id);
}
}
// get q and a
const [collections, dataList] = await Promise.all([
MongoDatasetCollection.find(
{
_id: { $in: filterRows.map((item) => item.collection_id) }
},
'name metadata'
).lean(),
MongoDatasetData.find(
{
_id: { $in: filterRows.map((item) => item.data_id?.trim()) }
},
'datasetId collectionId q a indexes'
).lean()
]);
const formatResult = filterRows
.map((item) => {
const collection = collections.find(
(collection) => String(collection._id) === item.collection_id
);
const data = dataList.find((data) => String(data._id) === item.data_id);
// if collection or data UnExist, the relational mongo data already deleted
if (!collection || !data) return null;
return {
id: String(data._id),
q: data.q,
a: data.a,
indexes: data.indexes,
datasetId: String(data.datasetId),
collectionId: String(data.collectionId),
sourceName: collection.name || '',
sourceId: collection.metadata?.fileId || collection.metadata?.rawLink,
score: item.score
};
})
.filter((item) => item !== null) as SearchDataResponseItemType[];
// remove same q and a data
set = new Set<string>();
const filterData = formatResult.filter((item) => {
const str = `${item.q}${item.a}`.trim();
if (set.has(str)) return false;
set.add(str);
return true;
});
// ReRank result
const reRankResult = await reRankSearchResult({
query: text,
data: filterData
});
// similarity filter
const filterReRankResult = reRankResult.filter((item) => item.score > similarity);
// concat rerank and embedding data
set = new Set<string>(filterReRankResult.map((item) => item.id));
const concatResult = filterReRankResult.concat(
filterData.filter((item) => {
if (set.has(item.id)) return false;
set.add(item.id);
return true;
})
);
return {
searchRes: concatResult.slice(0, limit),
tokenLen
};
}
// plus reRank search result
export async function reRankSearchResult({
data,
query
}: {
data: SearchDataResponseItemType[];
query: string;
}): Promise<SearchDataResponseItemType[]> {
if (!global.systemEnv.pluginBaseUrl) return data;
try {
const result = await POST<PostReRankResponse>('/core/ai/retrival/rerank', {
query,
inputs: data.map((item) => ({
id: item.id,
text: `${item.q}\n${item.a}`.trim()
}))
});
const mergeResult = result
.map((item) => {
const target = data.find((dataItem) => dataItem.id === item.id);
if (!target) return null;
return {
...target,
score: item.score ?? target.score
};
})
.filter((item) => item) as SearchDataResponseItemType[];
return mergeResult;
} catch (error) {
console.log(error);
return data;
}
}

View File

@@ -1,11 +1,5 @@
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
import {
SearchDataResponseItemType,
SearchDataResultItemType
} from '@fastgpt/global/core/dataset/type';
import { PgClient } from '@fastgpt/service/common/pg';
import { getVectorsByText } from '../../ai/vector';
import { getPgDataWithCollection } from './controller';
/**
* Same value judgment
@@ -30,75 +24,3 @@ export async function hasSameValue({
return Promise.reject('已经存在完全一致的数据');
}
}
/**
* count one collection amount of total data
*/
export async function countCollectionData({
collectionIds,
datasetId
}: {
collectionIds: string[];
datasetId?: string;
}) {
collectionIds = collectionIds.map((item) => String(item));
if (collectionIds.length === 0) return [];
const { rows } = await PgClient.query(`
SELECT
${collectionIds
.map((id) => `SUM(CASE WHEN collection_id = '${id}' THEN 1 ELSE 0 END) AS count${id}`)
.join(',')}
FROM ${PgDatasetTableName}
WHERE collection_id IN (${collectionIds.map((id) => `'${id}'`).join(',')})
${datasetId ? `AND dataset_id='${String(datasetId)}` : ''}';
`);
const values = Object.values(rows[0]).map((item) => Number(item));
return values;
}
export async function searchDatasetData({
text,
model,
similarity = 0,
limit,
datasetIds = []
}: {
text: string;
model: string;
similarity?: number;
limit: number;
datasetIds: string[];
}) {
const { vectors, tokenLen } = await getVectorsByText({
model,
input: [text]
});
const results: any = await PgClient.query(
`BEGIN;
SET LOCAL hnsw.ef_search = ${global.systemEnv.pgHNSWEfSearch || 100};
select id, q, a, collection_id, (vector <#> '[${
vectors[0]
}]') * -1 AS score from ${PgDatasetTableName} where dataset_id IN (${datasetIds
.map((id) => `'${String(id)}'`)
.join(',')}) AND vector <#> '[${vectors[0]}]' < -${similarity} order by vector <#> '[${
vectors[0]
}]' limit ${limit};
COMMIT;`
);
const rows = results?.[2]?.rows as SearchDataResultItemType[];
const collectionsData = await getPgDataWithCollection({ pgDataList: rows });
const searchRes: SearchDataResponseItemType[] = collectionsData.map((item, index) => ({
...item,
score: rows[index].score
}));
return {
searchRes,
tokenLen
};
}

View File

@@ -1,16 +1,17 @@
import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema';
import { pushQABill } from '@/service/support/wallet/bill/push';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import { DatasetDataIndexTypeEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import { sendOneInform } from '../support/user/inform/api';
import { getAIApi } from '@fastgpt/service/core/ai/config';
import type { ChatMessageItemType } from '@fastgpt/global/core/ai/type.d';
import { addLog } from '@fastgpt/service/common/mongo/controller';
import { splitText2Chunks } from '@/global/common/string/tools';
import { replaceVariable } from '@/global/common/string/tools';
import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { Prompt_AgentQA } from '@/global/core/prompt/agent';
import { pushDataToDatasetCollection } from '@/pages/api/core/dataset/data/pushData';
import { getErrText } from '@fastgpt/global/common/error/utils';
import { authTeamBalance } from '../support/permission/auth/bill';
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api.d';
const reduceQueue = () => {
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
@@ -43,7 +44,7 @@ export async function generateQA(): Promise<any> {
teamId: 1,
tmbId: 1,
datasetId: 1,
datasetCollectionId: 1,
collectionId: 1,
q: 1,
model: 1,
billId: 1,
@@ -71,7 +72,7 @@ export async function generateQA(): Promise<any> {
if (done) {
reduceQueue();
global.vectorQueueLen <= 0 && console.log(`索引】任务完成`);
global.vectorQueueLen <= 0 && console.log(`QA】Task Done`);
return;
}
if (error || !data) {
@@ -87,15 +88,20 @@ export async function generateQA(): Promise<any> {
try {
sendOneInform({
type: 'system',
title: '索引生成任务中止',
title: '文本训练任务中止',
content:
'由于账号余额不足,索引生成任务中止,重新充值后将会继续。暂停的任务将在 7 天后被删除。',
'该团队账号余额不足,文本训练任务中止,重新充值后将会继续。暂停的任务将在 7 天后被删除。',
tmbId: data.tmbId
});
console.log('余额不足,暂停向量生成任务');
await MongoDatasetTraining.findById(data._id, {
lockTime: new Date('2999/5/5')
});
console.log('余额不足,暂停【QA】生成任务');
await MongoDatasetTraining.updateMany(
{
teamId: data.teamId
},
{
lockTime: new Date('2999/5/5')
}
);
} catch (error) {}
reduceQueue();
return generateQA();
@@ -123,18 +129,18 @@ export async function generateQA(): Promise<any> {
messages,
stream: false
});
const answer = chatResponse.choices?.[0].message?.content;
const answer = chatResponse.choices?.[0].message?.content || '';
const totalTokens = chatResponse.usage?.total_tokens || 0;
const qaArr = formatSplitText(answer || ''); // 格式化后的QA对
const qaArr = formatSplitText(answer, text); // 格式化后的QA对
// get vector and insert
await pushDataToDatasetCollection({
teamId: data.teamId,
tmbId: data.tmbId,
collectionId: data.datasetCollectionId,
collectionId: data.collectionId,
data: qaArr,
mode: TrainingModeEnum.index,
mode: TrainingModeEnum.chunk,
billId: data.billId
});
@@ -198,31 +204,44 @@ export async function generateQA(): Promise<any> {
/**
* 检查文本是否按格式返回
*/
function formatSplitText(text: string) {
function formatSplitText(text: string, rawText: string) {
text = text.replace(/\\n/g, '\n'); // 将换行符替换为空格
const regex = /Q\d+:(\s*)(.*)(\s*)A\d+:(\s*)([\s\S]*?)(?=Q|$)/g; // 匹配Q和A的正则表达式
const matches = text.matchAll(regex); // 获取所有匹配到的结果
const result = []; // 存储最终的结果
const result: PushDatasetDataChunkProps[] = []; // 存储最终的结果
for (const match of matches) {
const q = match[2];
const a = match[5];
if (q && a) {
// 如果Q和A都存在就将其添加到结果中
const q = match[2] || '';
const a = match[5] || '';
if (q) {
result.push({
q: `${q}\n${a.trim().replace(/\n\s*/g, '\n')}`,
a: ''
q,
a,
indexes: [
{
defaultIndex: true,
type: DatasetDataIndexTypeEnum.qa,
text: `${q}\n${a.trim().replace(/\n\s*/g, '\n')}`
}
]
});
}
}
// empty result. direct split chunk
if (result.length === 0) {
const splitRes = splitText2Chunks({ text: text, maxLen: 500 });
splitRes.chunks.forEach((item) => {
const splitRes = splitText2Chunks({ text: rawText, maxLen: 500 });
splitRes.chunks.forEach((chunk) => {
result.push({
q: item,
a: ''
q: chunk,
a: '',
indexes: [
{
defaultIndex: true,
type: DatasetDataIndexTypeEnum.chunk,
text: chunk
}
]
});
});
}

View File

@@ -27,7 +27,7 @@ export async function generateVector(): Promise<any> {
const data = (
await MongoDatasetTraining.findOneAndUpdate(
{
mode: TrainingModeEnum.index,
mode: TrainingModeEnum.chunk,
lockTime: { $lte: new Date(Date.now() - 1 * 60 * 1000) }
},
{
@@ -39,9 +39,10 @@ export async function generateVector(): Promise<any> {
teamId: 1,
tmbId: 1,
datasetId: 1,
datasetCollectionId: 1,
collectionId: 1,
q: 1,
a: 1,
indexes: 1,
model: 1,
billId: 1
})
@@ -57,7 +58,8 @@ export async function generateVector(): Promise<any> {
data,
dataItem: {
q: data.q.replace(/[\x00-\x08]/g, ' '),
a: data.a?.replace(/[\x00-\x08]/g, ' ') || ''
a: data.a?.replace(/[\x00-\x08]/g, ' ') || '',
indexes: data.indexes
}
};
} catch (error) {
@@ -70,7 +72,7 @@ export async function generateVector(): Promise<any> {
if (done) {
reduceQueue();
global.vectorQueueLen <= 0 && console.log(`索引】任务完成`);
global.vectorQueueLen <= 0 && console.log(`index】Task done`);
return;
}
if (error || !data) {
@@ -86,15 +88,20 @@ export async function generateVector(): Promise<any> {
try {
sendOneInform({
type: 'system',
title: '索引生成任务中止',
title: '文本训练任务中止',
content:
'由于账号余额不足,索引生成任务中止,重新充值后将会继续。暂停的任务将在 7 天后被删除。',
'该团队账号余额不足,文本训练任务中止,重新充值后将会继续。暂停的任务将在 7 天后被删除。',
tmbId: data.tmbId
});
console.log('余额不足,暂停向量生成任务');
await MongoDatasetTraining.findById(data._id, {
lockTime: new Date('2999/5/5')
});
console.log('余额不足,暂停向量生成任务');
await MongoDatasetTraining.updateMany(
{
teamId: data.teamId
},
{
lockTime: new Date('2999/5/5')
}
);
} catch (error) {}
reduceQueue();
return generateVector();
@@ -108,9 +115,10 @@ export async function generateVector(): Promise<any> {
teamId: data.teamId,
tmbId: data.teamId,
datasetId: data.datasetId,
collectionId: data.datasetCollectionId,
collectionId: data.collectionId,
q: dataItem.q,
a: dataItem.a,
indexes: dataItem.indexes,
model: data.model
});
// push bill

View File

@@ -1,5 +1,5 @@
import { adaptChat2GptMessages } from '@/utils/common/adapt/message';
import { ChatContextFilter } from '@/service/common/tiktoken';
import { adaptChat2GptMessages } from '@fastgpt/global/core/chat/adapt';
import { ChatContextFilter } from '@fastgpt/service/core/chat/utils';
import type { moduleDispatchResType, ChatItemType } from '@fastgpt/global/core/chat/type.d';
import { ChatRoleEnum, TaskResponseKeyEnum } from '@fastgpt/global/core/chat/constants';
import { getAIApi } from '@fastgpt/service/core/ai/config';
@@ -7,7 +7,7 @@ import type { ClassifyQuestionAgentItemType } from '@fastgpt/global/core/module/
import { SystemInputEnum } from '@/constants/app';
import { FlowNodeSpecialInputKeyEnum } from '@fastgpt/global/core/module/node/constant';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import { replaceVariable } from '@/global/common/string/tools';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { Prompt_CQJson } from '@/global/core/prompt/agent';
import { FunctionModelItemType } from '@fastgpt/global/core/ai/model.d';
import { getCQModel } from '@/service/core/ai/model';

View File

@@ -1,5 +1,5 @@
import { adaptChat2GptMessages } from '@/utils/common/adapt/message';
import { ChatContextFilter } from '@/service/common/tiktoken';
import { adaptChat2GptMessages } from '@fastgpt/global/core/chat/adapt';
import { ChatContextFilter } from '@fastgpt/service/core/chat/utils';
import type { moduleDispatchResType, ChatItemType } from '@fastgpt/global/core/chat/type.d';
import { ChatRoleEnum, TaskResponseKeyEnum } from '@fastgpt/global/core/chat/constants';
import { getAIApi } from '@fastgpt/service/core/ai/config';
@@ -7,7 +7,7 @@ import type { ContextExtractAgentItemType } from '@fastgpt/global/core/module/ty
import { ContextExtractEnum } from '@/constants/flow/flowField';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import { Prompt_ExtractJson } from '@/global/core/prompt/agent';
import { replaceVariable } from '@/global/common/string/tools';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { FunctionModelItemType } from '@fastgpt/global/core/ai/model.d';
type Props = ModuleDispatchProps<{

View File

@@ -1,5 +1,5 @@
import type { NextApiResponse } from 'next';
import { ChatContextFilter } from '@/service/common/tiktoken';
import { ChatContextFilter } from '@fastgpt/service/core/chat/utils';
import type { moduleDispatchResType, ChatItemType } from '@fastgpt/global/core/chat/type.d';
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { sseResponseEventEnum } from '@fastgpt/service/common/response/constant';
@@ -12,11 +12,11 @@ import type { ChatModelItemType } from '@fastgpt/global/core/ai/model.d';
import { postTextCensor } from '@/service/common/censor';
import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constant';
import type { ModuleItemType } from '@fastgpt/global/core/module/type.d';
import { countMessagesTokens, sliceMessagesTB } from '@/global/common/tiktoken';
import { adaptChat2GptMessages } from '@/utils/common/adapt/message';
import { countMessagesTokens, sliceMessagesTB } from '@fastgpt/global/common/string/tiktoken';
import { adaptChat2GptMessages } from '@fastgpt/global/core/chat/adapt';
import { Prompt_QuotePromptList, Prompt_QuoteTemplateList } from '@/global/core/prompt/AIChat';
import type { AIChatProps } from '@/types/core/aiChat';
import { replaceVariable } from '@/global/common/string/tools';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import { responseWrite, responseWriteController } from '@fastgpt/service/common/response';
import { getChatModel, ModelTypeEnum } from '@/service/core/ai/model';
@@ -112,6 +112,7 @@ export const dispatchChatCompletion = async (props: ChatProps): Promise<ChatResp
temperature,
max_tokens,
stream,
seed: temperature < 0.3 ? 1 : undefined,
messages: [
...(modelConstantsData.defaultSystemChatPrompt
? [

View File

@@ -1,11 +1,11 @@
import type { moduleDispatchResType } from '@fastgpt/global/core/chat/type.d';
import { TaskResponseKeyEnum } from '@fastgpt/global/core/chat/constants';
import { countModelPrice } from '@/service/support/wallet/bill/utils';
import type { SelectedDatasetType } from '@/types/core/dataset';
import type { SelectedDatasetType } from '@fastgpt/global/core/module/api.d';
import type { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type';
import type { ModuleDispatchProps } from '@/types/core/chat/type';
import { ModelTypeEnum } from '@/service/core/ai/model';
import { searchDatasetData } from '@/service/core/dataset/data/utils';
import { searchDatasetData } from '@/service/core/dataset/data/pg';
type DatasetSearchProps = ModuleDispatchProps<{
datasets: SelectedDatasetType;
@@ -22,8 +22,6 @@ export type KBSearchResponse = {
export async function dispatchDatasetSearch(props: Record<string, any>): Promise<KBSearchResponse> {
const {
teamId,
tmbId,
inputs: { datasets = [], similarity = 0.4, limit = 5, userChatInput }
} = props as DatasetSearchProps;

View File

@@ -7,7 +7,7 @@ import { FlowNodeTypeEnum } from '@fastgpt/global/core/module/node/constant';
import { ModuleItemType } from '@fastgpt/global/core/module/type';
import { UserType } from '@fastgpt/global/support/user/type';
import { TaskResponseKeyEnum } from '@fastgpt/global/core/chat/constants';
import { replaceVariable } from '@/global/common/string/tools';
import { replaceVariable } from '@fastgpt/global/common/string/tools';
import { responseWrite } from '@fastgpt/service/common/response';
import { sseResponseEventEnum } from '@fastgpt/service/common/response/constant';
import { getSystemTime } from '@fastgpt/global/common/time/timezone';

View File

@@ -34,7 +34,7 @@ export const dispatchAppRequest = async (props: Props): Promise<Response> => {
const appData = await MongoApp.findOne({
_id: app.id,
userId: user._id
teamId: user.team.teamId
});
if (!appData) {

View File

@@ -40,20 +40,18 @@ async function initRootUser() {
await MongoUser.findOneAndUpdate(
{ username: 'root' },
{
password: hashStr(psw),
balance: 999999 * PRICE_SCALE
password: hashStr(psw)
}
);
} else {
const { _id } = await MongoUser.create({
username: 'root',
password: hashStr(psw),
balance: 999999 * PRICE_SCALE
password: hashStr(psw)
});
rootId = _id;
}
// init root team
await createDefaultTeam({ userId: rootId, maxSize: 1 });
await createDefaultTeam({ userId: rootId, maxSize: 1, balance: 9999 * PRICE_SCALE });
console.log(`root user init:`, {
username: 'root',

View File

@@ -1,36 +1,43 @@
import { getDatasetPgData } from '@/service/core/dataset/data/controller';
import { PgDataItemType } from '@fastgpt/global/core/dataset/type';
import { DatasetDataItemType, DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type';
import { AuthResponseType } from '@fastgpt/global/support/permission/type';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { parseHeaderCert } from '@fastgpt/service/support/permission/controller';
import { AuthModeType } from '@fastgpt/service/support/permission/type';
/* data permission same of collection */
export async function authDatasetData({
dataId,
...props
}: AuthModeType & {
dataId: string;
}): Promise<
AuthResponseType & {
datasetData: PgDataItemType;
}
> {
const result = await parseHeaderCert(props);
const { tmbId } = result;
}) {
// get pg data
const datasetData = await getDatasetPgData({ id: dataId });
const datasetData = await MongoDatasetData.findById(dataId);
const isOwner = String(datasetData.tmbId) === tmbId;
// data has the same permissions as collection
const { canWrite } = await authDatasetCollection({
if (!datasetData) {
return Promise.reject('Data not found');
}
const result = await authDatasetCollection({
...props,
collectionId: datasetData.collectionId
});
const data: DatasetDataItemType = {
id: String(datasetData._id),
q: datasetData.q,
a: datasetData.a,
indexes: datasetData.indexes,
datasetId: String(datasetData.datasetId),
collectionId: String(datasetData.collectionId),
sourceName: result.collection.name || '',
sourceId: result.collection.metadata?.fileId || result.collection.metadata?.rawLink,
isOwner: String(datasetData.tmbId) === result.tmbId,
canWrite: result.canWrite
};
return {
...result,
datasetData,
isOwner,
canWrite
datasetData: data
};
}

View File

@@ -1,7 +1,10 @@
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
import { MongoUser } from '@fastgpt/service/support/user/schema';
import { UserType } from '@fastgpt/global/support/user/type';
import { getTeamInfoByTmbId } from '@fastgpt/service/support/user/team/controller';
import {
getTeamInfoByTmbId,
getUserDefaultTeam
} from '@fastgpt/service/support/user/team/controller';
export async function getUserDetail({
tmbId,
@@ -10,7 +13,15 @@ export async function getUserDetail({
tmbId?: string;
userId?: string;
}): Promise<UserType> {
const team = await getTeamInfoByTmbId({ tmbId, userId });
const team = await (async () => {
if (tmbId) {
return getTeamInfoByTmbId({ tmbId });
}
if (userId) {
return getUserDefaultTeam({ userId });
}
return Promise.reject(ERROR_ENUM.unAuthorization);
})();
const user = await MongoUser.findById(team.userId);
if (!user) {

View File

@@ -1,5 +1,5 @@
import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants';
import { getAudioSpeechModel, getModelMap, ModelTypeEnum } from '@/service/core/ai/model';
import { getAudioSpeechModel } from '@/service/core/ai/model';
import type { ChatHistoryItemResType } from '@fastgpt/global/core/chat/api.d';
import { formatPrice } from '@fastgpt/global/support/wallet/bill/tools';
import { addLog } from '@fastgpt/service/common/mongo/controller';

View File

@@ -11,7 +11,7 @@ type Props = {
teamId: string;
tmbId: string;
variables?: Record<string, any>;
isOwner: boolean;
updateUseTime: boolean;
source: `${ChatSourceEnum}`;
shareId?: string;
content: [ChatItemType, ChatItemType];
@@ -23,7 +23,7 @@ export async function saveChat({
teamId,
tmbId,
variables,
isOwner,
updateUseTime,
source,
shareId,
content
@@ -76,7 +76,7 @@ export async function saveChat({
);
}
if (isOwner && source === ChatSourceEnum.online) {
if (updateUseTime && source === ChatSourceEnum.online) {
promise.push(
MongoApp.findByIdAndUpdate(appId, {
updateTime: new Date()