This commit is contained in:
Archer
2023-11-15 11:36:25 +08:00
committed by GitHub
parent 592e1a93a2
commit bfd8be5df0
181 changed files with 2499 additions and 1552 deletions

View File

@@ -0,0 +1,53 @@
import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
import { countMessagesTokens, countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
import { adaptRole_Chat2Message } from '@fastgpt/global/core/chat/adapt';
/* slice chat context by tokens */
export function ChatContextFilter({
messages = [],
maxTokens
}: {
messages: ChatItemType[];
maxTokens: number;
}) {
if (!Array.isArray(messages)) {
return [];
}
const rawTextLen = messages.reduce((sum, item) => sum + item.value.length, 0);
// If the text length is less than half of the maximum token, no calculation is required
if (rawTextLen < maxTokens * 0.5) {
return messages;
}
// filter startWith system prompt
const chatStartIndex = messages.findIndex((item) => item.obj !== ChatRoleEnum.System);
const systemPrompts: ChatItemType[] = messages.slice(0, chatStartIndex);
const chatPrompts: ChatItemType[] = messages.slice(chatStartIndex);
// reduce token of systemPrompt
maxTokens -= countMessagesTokens({
messages: systemPrompts
});
// 根据 tokens 截断内容
const chats: ChatItemType[] = [];
// 从后往前截取对话内容
for (let i = chatPrompts.length - 1; i >= 0; i--) {
const item = chatPrompts[i];
chats.unshift(item);
const tokens = countPromptTokens(item.value, adaptRole_Chat2Message(item.obj));
maxTokens -= tokens;
/* 整体 tokens 超出范围, system必须保留 */
if (maxTokens <= 0) {
chats.shift();
break;
}
}
return [...systemPrompts, ...chats];
}

View File

@@ -56,8 +56,7 @@ const DatasetCollectionSchema = new Schema({
ref: 'dataset.files'
},
rawLink: {
type: String,
default: ''
type: String
},
// 451 初始化
pgCollectionId: {

View File

@@ -1,5 +1,25 @@
import { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
import { MongoDatasetCollection } from './collection/schema';
import { MongoDataset } from './schema';
/* ============= dataset ========== */
/* find all datasetId by top datasetId */
export async function findDatasetIdTreeByTopDatasetId(
id: string,
result: string[] = []
): Promise<string[]> {
let allChildrenIds = [...result];
// find children
const children = await MongoDataset.find({ parentId: id });
for (const child of children) {
const grandChildrenIds = await findDatasetIdTreeByTopDatasetId(child._id, result);
allChildrenIds = allChildrenIds.concat(grandChildrenIds);
}
return [String(id), ...allChildrenIds];
}
export async function getCollectionWithDataset(collectionId: string) {
const data = (

View File

@@ -0,0 +1,78 @@
import { connectionMongo, type Model } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type.d';
import {
TeamCollectionName,
TeamMemberCollectionName
} from '@fastgpt/global/support/user/team/constant';
import { DatasetCollectionName } from '../schema';
import { DatasetColCollectionName } from '../collection/schema';
import { DatasetDataIndexTypeMap } from '@fastgpt/global/core/dataset/constant';
export const DatasetDataCollectionName = 'dataset.datas';
const DatasetDataSchema = new Schema({
teamId: {
type: Schema.Types.ObjectId,
ref: TeamCollectionName,
required: true
},
tmbId: {
type: Schema.Types.ObjectId,
ref: TeamMemberCollectionName,
required: true
},
datasetId: {
type: Schema.Types.ObjectId,
ref: DatasetCollectionName,
required: true
},
collectionId: {
type: Schema.Types.ObjectId,
ref: DatasetColCollectionName,
required: true
},
q: {
type: String,
required: true
},
a: {
type: String,
default: ''
},
indexes: {
type: [
{
defaultIndex: {
type: Boolean,
default: false
},
type: {
type: String,
enum: Object.keys(DatasetDataIndexTypeMap),
required: true
},
dataId: {
type: String,
required: true
},
text: {
type: String,
required: true
}
}
],
default: []
}
});
try {
DatasetDataSchema.index({ userId: 1 });
DatasetDataSchema.index({ datasetId: 1 });
DatasetDataSchema.index({ collectionId: 1 });
} catch (error) {
console.log(error);
}
export const MongoDatasetData: Model<DatasetDataSchemaType> =
models[DatasetDataCollectionName] || model(DatasetDataCollectionName, DatasetDataSchema);

View File

@@ -2,7 +2,7 @@
import { connectionMongo, type Model } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
import { DatasetDataIndexTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
import { DatasetColCollectionName } from '../collection/schema';
import { DatasetCollectionName } from '../schema';
import {
@@ -33,12 +33,13 @@ const TrainingDataSchema = new Schema({
ref: DatasetCollectionName,
required: true
},
datasetCollectionId: {
collectionId: {
type: Schema.Types.ObjectId,
ref: DatasetColCollectionName,
required: true
},
billId: {
// concat bill
type: String,
default: ''
},
@@ -48,6 +49,7 @@ const TrainingDataSchema = new Schema({
required: true
},
expireAt: {
// It will be deleted after 7 days
type: Date,
default: () => new Date()
},
@@ -56,6 +58,7 @@ const TrainingDataSchema = new Schema({
default: () => new Date('2000/1/1')
},
model: {
// ai model
type: String,
required: true
},
@@ -71,13 +74,29 @@ const TrainingDataSchema = new Schema({
a: {
type: String,
default: ''
},
indexes: {
type: [
{
type: {
type: String,
enum: Object.keys(DatasetDataIndexTypeMap),
required: true
},
text: {
type: String,
required: true
}
}
],
default: []
}
});
try {
TrainingDataSchema.index({ lockTime: 1 });
TrainingDataSchema.index({ userId: 1 });
TrainingDataSchema.index({ datasetCollectionId: 1 });
TrainingDataSchema.index({ collectionId: 1 });
TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 });
} catch (error) {
console.log(error);