mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-22 12:20:34 +00:00
v4.6-3 (#471)
This commit is contained in:
53
packages/service/core/chat/utils.ts
Normal file
53
packages/service/core/chat/utils.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
|
||||
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
|
||||
import { countMessagesTokens, countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
|
||||
import { adaptRole_Chat2Message } from '@fastgpt/global/core/chat/adapt';
|
||||
|
||||
/* slice chat context by tokens */
|
||||
export function ChatContextFilter({
|
||||
messages = [],
|
||||
maxTokens
|
||||
}: {
|
||||
messages: ChatItemType[];
|
||||
maxTokens: number;
|
||||
}) {
|
||||
if (!Array.isArray(messages)) {
|
||||
return [];
|
||||
}
|
||||
const rawTextLen = messages.reduce((sum, item) => sum + item.value.length, 0);
|
||||
|
||||
// If the text length is less than half of the maximum token, no calculation is required
|
||||
if (rawTextLen < maxTokens * 0.5) {
|
||||
return messages;
|
||||
}
|
||||
|
||||
// filter startWith system prompt
|
||||
const chatStartIndex = messages.findIndex((item) => item.obj !== ChatRoleEnum.System);
|
||||
const systemPrompts: ChatItemType[] = messages.slice(0, chatStartIndex);
|
||||
const chatPrompts: ChatItemType[] = messages.slice(chatStartIndex);
|
||||
|
||||
// reduce token of systemPrompt
|
||||
maxTokens -= countMessagesTokens({
|
||||
messages: systemPrompts
|
||||
});
|
||||
|
||||
// 根据 tokens 截断内容
|
||||
const chats: ChatItemType[] = [];
|
||||
|
||||
// 从后往前截取对话内容
|
||||
for (let i = chatPrompts.length - 1; i >= 0; i--) {
|
||||
const item = chatPrompts[i];
|
||||
chats.unshift(item);
|
||||
|
||||
const tokens = countPromptTokens(item.value, adaptRole_Chat2Message(item.obj));
|
||||
maxTokens -= tokens;
|
||||
|
||||
/* 整体 tokens 超出范围, system必须保留 */
|
||||
if (maxTokens <= 0) {
|
||||
chats.shift();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return [...systemPrompts, ...chats];
|
||||
}
|
@@ -56,8 +56,7 @@ const DatasetCollectionSchema = new Schema({
|
||||
ref: 'dataset.files'
|
||||
},
|
||||
rawLink: {
|
||||
type: String,
|
||||
default: ''
|
||||
type: String
|
||||
},
|
||||
// 451 初始化
|
||||
pgCollectionId: {
|
||||
|
@@ -1,5 +1,25 @@
|
||||
import { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoDatasetCollection } from './collection/schema';
|
||||
import { MongoDataset } from './schema';
|
||||
|
||||
/* ============= dataset ========== */
|
||||
/* find all datasetId by top datasetId */
|
||||
export async function findDatasetIdTreeByTopDatasetId(
|
||||
id: string,
|
||||
result: string[] = []
|
||||
): Promise<string[]> {
|
||||
let allChildrenIds = [...result];
|
||||
|
||||
// find children
|
||||
const children = await MongoDataset.find({ parentId: id });
|
||||
|
||||
for (const child of children) {
|
||||
const grandChildrenIds = await findDatasetIdTreeByTopDatasetId(child._id, result);
|
||||
allChildrenIds = allChildrenIds.concat(grandChildrenIds);
|
||||
}
|
||||
|
||||
return [String(id), ...allChildrenIds];
|
||||
}
|
||||
|
||||
export async function getCollectionWithDataset(collectionId: string) {
|
||||
const data = (
|
||||
|
78
packages/service/core/dataset/data/schema.ts
Normal file
78
packages/service/core/dataset/data/schema.ts
Normal file
@@ -0,0 +1,78 @@
|
||||
import { connectionMongo, type Model } from '../../../common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import {
|
||||
TeamCollectionName,
|
||||
TeamMemberCollectionName
|
||||
} from '@fastgpt/global/support/user/team/constant';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import { DatasetColCollectionName } from '../collection/schema';
|
||||
import { DatasetDataIndexTypeMap } from '@fastgpt/global/core/dataset/constant';
|
||||
|
||||
export const DatasetDataCollectionName = 'dataset.datas';
|
||||
|
||||
const DatasetDataSchema = new Schema({
|
||||
teamId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamCollectionName,
|
||||
required: true
|
||||
},
|
||||
tmbId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamMemberCollectionName,
|
||||
required: true
|
||||
},
|
||||
datasetId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetCollectionName,
|
||||
required: true
|
||||
},
|
||||
collectionId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetColCollectionName,
|
||||
required: true
|
||||
},
|
||||
q: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
a: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
indexes: {
|
||||
type: [
|
||||
{
|
||||
defaultIndex: {
|
||||
type: Boolean,
|
||||
default: false
|
||||
},
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetDataIndexTypeMap),
|
||||
required: true
|
||||
},
|
||||
dataId: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
text: {
|
||||
type: String,
|
||||
required: true
|
||||
}
|
||||
}
|
||||
],
|
||||
default: []
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
DatasetDataSchema.index({ userId: 1 });
|
||||
DatasetDataSchema.index({ datasetId: 1 });
|
||||
DatasetDataSchema.index({ collectionId: 1 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
export const MongoDatasetData: Model<DatasetDataSchemaType> =
|
||||
models[DatasetDataCollectionName] || model(DatasetDataCollectionName, DatasetDataSchema);
|
@@ -2,7 +2,7 @@
|
||||
import { connectionMongo, type Model } from '../../../common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
|
||||
import { DatasetDataIndexTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
|
||||
import { DatasetColCollectionName } from '../collection/schema';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import {
|
||||
@@ -33,12 +33,13 @@ const TrainingDataSchema = new Schema({
|
||||
ref: DatasetCollectionName,
|
||||
required: true
|
||||
},
|
||||
datasetCollectionId: {
|
||||
collectionId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetColCollectionName,
|
||||
required: true
|
||||
},
|
||||
billId: {
|
||||
// concat bill
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
@@ -48,6 +49,7 @@ const TrainingDataSchema = new Schema({
|
||||
required: true
|
||||
},
|
||||
expireAt: {
|
||||
// It will be deleted after 7 days
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
@@ -56,6 +58,7 @@ const TrainingDataSchema = new Schema({
|
||||
default: () => new Date('2000/1/1')
|
||||
},
|
||||
model: {
|
||||
// ai model
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
@@ -71,13 +74,29 @@ const TrainingDataSchema = new Schema({
|
||||
a: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
indexes: {
|
||||
type: [
|
||||
{
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetDataIndexTypeMap),
|
||||
required: true
|
||||
},
|
||||
text: {
|
||||
type: String,
|
||||
required: true
|
||||
}
|
||||
}
|
||||
],
|
||||
default: []
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
TrainingDataSchema.index({ lockTime: 1 });
|
||||
TrainingDataSchema.index({ userId: 1 });
|
||||
TrainingDataSchema.index({ datasetCollectionId: 1 });
|
||||
TrainingDataSchema.index({ collectionId: 1 });
|
||||
TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
|
Reference in New Issue
Block a user