This commit is contained in:
Archer
2023-11-15 21:35:50 +08:00
committed by GitHub
parent bfd8be5df0
commit cd3acb44ab
39 changed files with 457 additions and 160 deletions

View File

@@ -63,8 +63,8 @@ export const splitText2Chunks = (props: { text: string; maxLen: number; overlapL
let chunks: string[] = [];
for (let i = 0; i < splitTexts.length; i++) {
let text = splitTexts[i];
let chunkToken = countPromptTokens(lastChunk, '');
const textToken = countPromptTokens(text, '');
let chunkToken = lastChunk.length;
const textToken = text.length;
// next chunk is too large / new chunk is too large(The current chunk must be smaller than maxLen)
if (textToken >= maxLen || chunkToken + textToken > maxLen * 1.4) {

View File

@@ -1,4 +1,4 @@
import type { VectorModelItemType } from '../../core/ai/model.d';
import type { LLMModelItemType, VectorModelItemType } from '../../core/ai/model.d';
import { PermissionTypeEnum } from '../../support/permission/constant';
import { PushDatasetDataChunkProps } from './api';
import {
@@ -19,6 +19,7 @@ export type DatasetSchemaType = {
avatar: string;
name: string;
vectorModel: string;
agentModel: string;
tags: string[];
type: `${DatasetTypeEnum}`;
permission: `${PermissionTypeEnum}`;
@@ -84,8 +85,9 @@ export type CollectionWithDatasetType = Omit<DatasetCollectionSchemaType, 'datas
};
/* ================= dataset ===================== */
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel'> & {
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel' | 'agentModel'> & {
vectorModel: VectorModelItemType;
agentModel: LLMModelItemType;
isOwner: boolean;
canWrite: boolean;
};

View File

@@ -3,6 +3,8 @@ import { BillListItemType } from './type';
export type CreateTrainingBillProps = {
name: string;
vectorModel?: string;
agentModel?: string;
};
export type ConcatBillProps = {

View File

@@ -61,7 +61,6 @@ const AppSchema = new Schema({
try {
AppSchema.index({ updateTime: -1 });
AppSchema.index({ 'share.collection': -1 });
} catch (error) {
console.log(error);
}

View File

@@ -69,7 +69,6 @@ const DatasetCollectionSchema = new Schema({
try {
DatasetCollectionSchema.index({ datasetId: 1 });
DatasetCollectionSchema.index({ userId: 1 });
DatasetCollectionSchema.index({ updateTime: -1 });
} catch (error) {
console.log(error);

View File

@@ -48,6 +48,11 @@ const DatasetSchema = new Schema({
required: true,
default: 'text-embedding-ada-002'
},
agentModel: {
type: String,
required: true,
default: 'gpt-3.5-turbo-16k'
},
type: {
type: String,
enum: Object.keys(DatasetTypeMap),

View File

@@ -95,7 +95,7 @@ const TrainingDataSchema = new Schema({
try {
TrainingDataSchema.index({ lockTime: 1 });
TrainingDataSchema.index({ userId: 1 });
TrainingDataSchema.index({ datasetId: 1 });
TrainingDataSchema.index({ collectionId: 1 });
TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 });
} catch (error) {