This commit is contained in:
Archer
2023-10-22 23:54:04 +08:00
committed by GitHub
parent 3091a90df6
commit a3534407bf
365 changed files with 7266 additions and 6055 deletions

View File

@@ -0,0 +1,17 @@
import type { UserModelSchema } from '@fastgpt/global/support/user/type';
import OpenAI from 'openai';
export const openaiBaseUrl = process.env.OPENAI_BASE_URL || 'https://api.openai.com/v1';
export const baseUrl = process.env.ONEAPI_URL || openaiBaseUrl;
export const systemAIChatKey = process.env.CHAT_API_KEY || '';
export const getAIApi = (props?: UserModelSchema['openaiAccount'], timeout = 6000) => {
return new OpenAI({
apiKey: props?.key || systemAIChatKey,
baseURL: props?.baseUrl || baseUrl,
httpAgent: global.httpsAgent,
timeout,
maxRetries: 2
});
};

View File

@@ -0,0 +1,57 @@
import type { ChatCompletionRequestMessage } from '@fastgpt/global/core/ai/type.d';
import { getAIApi } from '../config';
export const Prompt_QuestionGuide = `我不太清楚问你什么问题,请帮我生成 3 个问题引导我继续提问。问题的长度应小于20个字符按 JSON 格式返回: ["问题1", "问题2", "问题3"]`;
export async function createQuestionGuide({
messages,
model
}: {
messages: ChatCompletionRequestMessage[];
model: string;
}) {
const ai = getAIApi(undefined, 48000);
const data = await ai.chat.completions.create({
model: model,
temperature: 0,
max_tokens: 200,
messages: [
...messages,
{
role: 'user',
content: Prompt_QuestionGuide
}
],
stream: false
});
const answer = data.choices?.[0]?.message?.content || '';
const totalTokens = data.usage?.total_tokens || 0;
const start = answer.indexOf('[');
const end = answer.lastIndexOf(']');
if (start === -1 || end === -1) {
return {
result: [],
tokens: totalTokens
};
}
const jsonStr = answer
.substring(start, end + 1)
.replace(/(\\n|\\)/g, '')
.replace(/ /g, '');
try {
return {
result: JSON.parse(jsonStr),
tokens: totalTokens
};
} catch (error) {
return {
result: [],
tokens: totalTokens
};
}
}

View File

@@ -0,0 +1,26 @@
import { ERROR_ENUM } from '@fastgpt/global/common/error/errorCode';
import { MongoDatasetCollection } from './collection/schema';
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type';
export async function authCollection({
collectionId,
userId
}: {
collectionId: string;
userId: string;
}) {
const collection = await MongoDatasetCollection.findOne({
_id: collectionId,
userId
})
.populate('datasetId')
.lean();
if (collection) {
return {
...collection,
dataset: collection.datasetId as unknown as DatasetSchemaType
};
}
return Promise.reject(ERROR_ENUM.unAuthDataset);
}

View File

@@ -0,0 +1,66 @@
import { connectionMongo, type Model } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { DatasetCollectionSchemaType } from '@fastgpt/global/core/dataset/type.d';
import { DatasetCollectionTypeMap } from '@fastgpt/global/core/dataset/constant';
import { DatasetCollectionName } from '../schema';
export const DatasetColCollectionName = 'dataset.collections';
const DatasetCollectionSchema = new Schema({
parentId: {
type: Schema.Types.ObjectId,
ref: DatasetColCollectionName,
default: null
},
userId: {
type: Schema.Types.ObjectId,
ref: 'user',
required: true
},
datasetId: {
type: Schema.Types.ObjectId,
ref: DatasetCollectionName,
required: true
},
name: {
type: String,
required: true
},
type: {
type: String,
enum: Object.keys(DatasetCollectionTypeMap),
required: true
},
updateTime: {
type: Date,
default: () => new Date()
},
metadata: {
type: {
fileId: {
type: Schema.Types.ObjectId,
ref: 'dataset.files'
},
rawLink: {
type: String,
default: ''
},
// 451 初始化
pgCollectionId: {
type: String
}
},
default: {}
}
});
try {
DatasetCollectionSchema.index({ datasetId: 1 });
DatasetCollectionSchema.index({ userId: 1 });
DatasetCollectionSchema.index({ updateTime: -1 });
} catch (error) {
console.log(error);
}
export const MongoDatasetCollection: Model<DatasetCollectionSchemaType> =
models[DatasetColCollectionName] || model(DatasetColCollectionName, DatasetCollectionSchema);

View File

@@ -0,0 +1,62 @@
import { MongoDatasetCollection } from './schema';
import { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder/type';
/**
* get all collection by top collectionId
*/
export async function findCollectionAndChild(id: string, fields = '_id parentId name metadata') {
async function find(id: string) {
// find children
const children = await MongoDatasetCollection.find({ parentId: id }, fields);
let collections = children;
for (const child of children) {
const grandChildrenIds = await find(child._id);
collections = collections.concat(grandChildrenIds);
}
return collections;
}
const [collection, childCollections] = await Promise.all([
MongoDatasetCollection.findById(id, fields),
find(id)
]);
if (!collection) {
return Promise.reject('Collection not found');
}
return [collection, ...childCollections];
}
export async function getDatasetCollectionPaths({
parentId = '',
userId
}: {
parentId?: string;
userId: string;
}): Promise<ParentTreePathItemType[]> {
async function find(parentId?: string): Promise<ParentTreePathItemType[]> {
if (!parentId) {
return [];
}
const parent = await MongoDatasetCollection.findOne({ _id: parentId, userId }, 'name parentId');
if (!parent) return [];
const paths = await find(parent.parentId);
paths.push({ parentId, parentName: parent.name });
return paths;
}
return await find(parentId);
}
export function getCollectionUpdateTime({ name, time }: { time?: Date; name: string }) {
if (time) return time;
if (name.startsWith('手动') || ['manual', 'mark'].includes(name)) return new Date('2999/9/9');
return new Date();
}

View File

@@ -0,0 +1,55 @@
import { connectionMongo, type Model } from '../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type.d';
import { DatasetTypeMap } from '@fastgpt/global/core/dataset/constant';
export const DatasetCollectionName = 'datasets';
const DatasetSchema = new Schema({
parentId: {
type: Schema.Types.ObjectId,
ref: DatasetCollectionName,
default: null
},
userId: {
type: Schema.Types.ObjectId,
ref: 'user',
required: true
},
updateTime: {
type: Date,
default: () => new Date()
},
avatar: {
type: String,
default: '/icon/logo.svg'
},
name: {
type: String,
required: true
},
vectorModel: {
type: String,
required: true,
default: 'text-embedding-ada-002'
},
type: {
type: String,
enum: Object.keys(DatasetTypeMap),
required: true,
default: 'dataset'
},
tags: {
type: [String],
default: []
}
});
try {
DatasetSchema.index({ userId: 1 });
} catch (error) {
console.log(error);
}
export const MongoDataset: Model<DatasetSchemaType> =
models[DatasetCollectionName] || model(DatasetCollectionName, DatasetSchema);

View File

@@ -0,0 +1,72 @@
/* 模型的知识库 */
import { connectionMongo, type Model } from '../../../common/mongo';
const { Schema, model, models } = connectionMongo;
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
import { DatasetColCollectionName } from '../collection/schema';
import { DatasetCollectionName } from '../schema';
export const DatasetTrainingCollectionName = 'dataset.trainings';
const TrainingDataSchema = new Schema({
userId: {
type: Schema.Types.ObjectId,
ref: 'user',
required: true
},
datasetId: {
type: Schema.Types.ObjectId,
ref: DatasetCollectionName,
required: true
},
datasetCollectionId: {
type: Schema.Types.ObjectId,
ref: DatasetColCollectionName,
required: true
},
billId: {
type: String,
default: ''
},
mode: {
type: String,
enum: Object.keys(TrainingTypeMap),
required: true
},
expireAt: {
type: Date,
default: () => new Date()
},
lockTime: {
type: Date,
default: () => new Date('2000/1/1')
},
model: {
type: String,
required: true
},
prompt: {
// qa split prompt
type: String,
default: ''
},
q: {
type: String,
required: true
},
a: {
type: String,
default: ''
}
});
try {
TrainingDataSchema.index({ lockTime: 1 });
TrainingDataSchema.index({ userId: 1 });
TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 });
} catch (error) {
console.log(error);
}
export const MongoDatasetTraining: Model<DatasetTrainingSchemaType> =
models[DatasetTrainingCollectionName] || model(DatasetTrainingCollectionName, TrainingDataSchema);