4.6.8-alpha (#804)

* perf: redirect request and err log replace

perf: dataset openapi

feat: session

fix: retry input error

feat: 468 doc

sub page

feat: standard sub

perf: rerank tip

perf: rerank tip

perf: api sdk

perf: openapi

sub plan

perf: sub ui

fix: ts

* perf: init log

* fix: variable select

* sub page

* icon

* perf: llm model config

* perf: menu ux

* perf: system store

* perf: publish app name

* fix: init data

* perf: flow edit ux

* fix: value type format and ux

* fix prompt editor default value (#13)

* fix prompt editor default value

* fix prompt editor update when not focus

* add key with variable

---------

Co-authored-by: Archer <545436317@qq.com>

* fix: value type

* doc

* i18n

* import path

* home page

* perf: mongo session running

* fix: ts

* perf: use toast

* perf: flow edit

* perf: sse response

* slider ui

* fetch error

* fix prompt editor rerender when not focus by key defaultvalue (#14)

* perf: prompt editor

* feat: dataset search concat

* perf: doc

* fix:ts

* perf: doc

* fix json editor onblur value (#15)

* faq

* vector model default config

* ipv6

---------

Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-02-01 21:57:41 +08:00
committed by GitHub
parent fc19c4cf09
commit 34602b25df
285 changed files with 10345 additions and 11223 deletions

View File

@@ -6,10 +6,14 @@ export const baseUrl = process.env.ONEAPI_URL || openaiBaseUrl;
export const systemAIChatKey = process.env.CHAT_API_KEY || '';
export const getAIApi = (props?: UserModelSchema['openaiAccount'], timeout = 60000) => {
export const getAIApi = (props?: {
userKey?: UserModelSchema['openaiAccount'];
timeout?: number;
}) => {
const { userKey, timeout } = props || {};
return new OpenAI({
apiKey: props?.key || systemAIChatKey,
baseURL: props?.baseUrl || baseUrl,
apiKey: userKey?.key || systemAIChatKey,
baseURL: userKey?.baseUrl || baseUrl,
httpAgent: global.httpsAgent,
timeout,
maxRetries: 2

View File

@@ -1,15 +1,13 @@
import { VectorModelItemType } from '@fastgpt/global/core/ai/model.d';
import { getAIApi } from '../config';
export type GetVectorProps = {
model: string;
type GetVectorProps = {
model: VectorModelItemType;
input: string;
};
// text to vector
export async function getVectorsByText({
model = 'text-embedding-ada-002',
input
}: GetVectorProps) {
export async function getVectorsByText({ model, input }: GetVectorProps) {
if (!input) {
return Promise.reject({
code: 500,
@@ -23,7 +21,8 @@ export async function getVectorsByText({
// input text to vector
const result = await ai.embeddings
.create({
model,
...model.defaultConfig,
model: model.model,
input: [input]
})
.then(async (res) => {

View File

@@ -10,10 +10,12 @@ export async function createQuestionGuide({
messages: ChatMessageItemType[];
model: string;
}) {
const ai = getAIApi(undefined, 480000);
const ai = getAIApi({
timeout: 480000
});
const data = await ai.chat.completions.create({
model: model,
temperature: 0,
temperature: 0.1,
max_tokens: 200,
messages: [
...messages,

View File

@@ -17,7 +17,9 @@ OUTPUT:
`;
export const searchQueryExtension = async ({ query, model }: { query: string; model: string }) => {
const ai = getAIApi(undefined, 480000);
const ai = getAIApi({
timeout: 480000
});
const result = await ai.chat.completions.create({
model,

View File

@@ -90,7 +90,7 @@ try {
close custom feedback;
*/
ChatItemSchema.index({ appId: 1, chatId: 1, dataId: 1 }, { background: true });
ChatItemSchema.index({ time: -1 }, { background: true });
ChatItemSchema.index({ time: -1, obj: 1 }, { background: true });
ChatItemSchema.index({ userGoodFeedback: 1 }, { background: true });
ChatItemSchema.index({ userBadFeedback: 1 }, { background: true });
ChatItemSchema.index({ customFeedbacks: 1 }, { background: true });

View File

@@ -15,6 +15,7 @@ import { delImgByRelatedId } from '../../../common/file/image/controller';
import { deleteDatasetDataVector } from '../../../common/vectorStore/controller';
import { delFileByFileIdList } from '../../../common/file/gridfs/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { ClientSession } from '../../../common/mongo';
export async function createOneCollection({
teamId,
@@ -35,41 +36,53 @@ export async function createOneCollection({
hashRawText,
rawTextLength,
metadata = {},
session,
...props
}: CreateDatasetCollectionParams & { teamId: string; tmbId: string; [key: string]: any }) {
const { _id } = await MongoDatasetCollection.create({
...props,
teamId,
tmbId,
parentId: parentId || null,
datasetId,
name,
type,
}: CreateDatasetCollectionParams & {
teamId: string;
tmbId: string;
[key: string]: any;
session?: ClientSession;
}) {
const [collection] = await MongoDatasetCollection.create(
[
{
...props,
teamId,
tmbId,
parentId: parentId || null,
datasetId,
name,
type,
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
trainingType,
chunkSize,
chunkSplitter,
qaPrompt,
fileId,
rawLink,
fileId,
rawLink,
rawTextLength,
hashRawText,
metadata
});
rawTextLength,
hashRawText,
metadata
}
],
{ session }
);
// create default collection
if (type === DatasetCollectionTypeEnum.folder) {
await createDefaultCollection({
datasetId,
parentId: _id,
parentId: collection._id,
teamId,
tmbId
tmbId,
session
});
}
return _id;
return collection;
}
// create default collection
@@ -78,34 +91,43 @@ export function createDefaultCollection({
datasetId,
parentId,
teamId,
tmbId
tmbId,
session
}: {
name?: '手动录入' | '手动标注';
datasetId: string;
parentId?: string;
teamId: string;
tmbId: string;
session?: ClientSession;
}) {
return MongoDatasetCollection.create({
name,
teamId,
tmbId,
datasetId,
parentId,
type: DatasetCollectionTypeEnum.virtual,
trainingType: TrainingModeEnum.chunk,
chunkSize: 0,
updateTime: new Date('2099')
});
return MongoDatasetCollection.create(
[
{
name,
teamId,
tmbId,
datasetId,
parentId,
type: DatasetCollectionTypeEnum.virtual,
trainingType: TrainingModeEnum.chunk,
chunkSize: 0,
updateTime: new Date('2099')
}
],
{ session }
);
}
/**
* delete collection and it related data
*/
export async function delCollectionAndRelatedSources({
collections
collections,
session
}: {
collections: (CollectionWithDatasetType | DatasetCollectionSchemaType)[];
session: ClientSession;
}) {
if (collections.length === 0) return;
@@ -128,24 +150,25 @@ export async function delCollectionAndRelatedSources({
await delay(2000);
// delete dataset.datas
await MongoDatasetData.deleteMany({ teamId, collectionId: { $in: collectionIds } });
// delete pg data
await deleteDatasetDataVector({ teamId, collectionIds });
// delete file and imgs
await Promise.all([
delImgByRelatedId({
teamId,
relateIds: relatedImageIds
}),
delFileByFileIdList({
bucketName: BucketNameEnum.dataset,
fileIdList
})
]);
await MongoDatasetData.deleteMany({ teamId, collectionId: { $in: collectionIds } }, { session });
// delete imgs
await delImgByRelatedId({
teamId,
relateIds: relatedImageIds,
session
});
// delete collections
await MongoDatasetCollection.deleteMany({
_id: { $in: collectionIds }
await MongoDatasetCollection.deleteMany(
{
_id: { $in: collectionIds }
},
{ session }
);
// no session delete: delete files, vector data
await deleteDatasetDataVector({ teamId, collectionIds });
await delFileByFileIdList({
bucketName: BucketNameEnum.dataset,
fileIdList
});
}

View File

@@ -9,6 +9,7 @@ import {
TrainingModeEnum
} from '@fastgpt/global/core/dataset/constants';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { ClientSession } from '../../../common/mongo';
/**
* get all collection by top collectionId
@@ -149,17 +150,17 @@ export const getCollectionAndRawText = async ({
/* link collection start load data */
export const reloadCollectionChunks = async ({
collectionId,
collection,
tmbId,
billId,
rawText
rawText,
session
}: {
collectionId?: string;
collection?: CollectionWithDatasetType;
collection: CollectionWithDatasetType;
tmbId: string;
billId?: string;
rawText?: string;
session: ClientSession;
}) => {
const {
title,
@@ -168,7 +169,6 @@ export const reloadCollectionChunks = async ({
isSameRawText
} = await getCollectionAndRawText({
collection,
collectionId,
newRawText: rawText
});
@@ -186,6 +186,7 @@ export const reloadCollectionChunks = async ({
if (col.trainingType === TrainingModeEnum.qa) return col.datasetId.agentModel;
return Promise.reject('Training model error');
})();
await MongoDatasetTraining.insertMany(
chunks.map((item, i) => ({
teamId: col.teamId,
@@ -199,13 +200,18 @@ export const reloadCollectionChunks = async ({
q: item,
a: '',
chunkIndex: i
}))
})),
{ session }
);
// update raw text
await MongoDatasetCollection.findByIdAndUpdate(col._id, {
...(title && { name: title }),
rawTextLength: newRawText.length,
hashRawText: hashStr(newRawText)
});
await MongoDatasetCollection.findByIdAndUpdate(
col._id,
{
...(title && { name: title }),
rawTextLength: newRawText.length,
hashRawText: hashStr(newRawText)
},
{ session }
);
};

View File

@@ -2,6 +2,7 @@ import { CollectionWithDatasetType, DatasetSchemaType } from '@fastgpt/global/co
import { MongoDatasetCollection } from './collection/schema';
import { MongoDataset } from './schema';
import { delCollectionAndRelatedSources } from './collection/controller';
import { ClientSession } from '../../common/mongo';
/* ============= dataset ========== */
/* find all datasetId by top datasetId */
@@ -55,7 +56,13 @@ export async function getCollectionWithDataset(collectionId: string) {
}
/* delete all data by datasetIds */
export async function delDatasetRelevantData({ datasets }: { datasets: DatasetSchemaType[] }) {
export async function delDatasetRelevantData({
datasets,
session
}: {
datasets: DatasetSchemaType[];
session: ClientSession;
}) {
if (!datasets.length) return;
const teamId = datasets[0].teamId;
@@ -70,5 +77,5 @@ export async function delDatasetRelevantData({ datasets }: { datasets: DatasetSc
'_id teamId fileId metadata'
).lean();
await delCollectionAndRelatedSources({ collections });
await delCollectionAndRelatedSources({ collections, session });
}

View File

@@ -40,12 +40,12 @@ export async function pushDataListToTrainingQueue({
trainingMode = TrainingModeEnum.chunk,
vectorModelList = [],
qaModelList = []
datasetModelList = []
}: {
teamId: string;
tmbId: string;
vectorModelList: VectorModelItemType[];
qaModelList: LLMModelItemType[];
datasetModelList: LLMModelItemType[];
} & PushDatasetDataProps): Promise<PushDatasetDataResponse> {
const {
datasetId: { _id: datasetId, vectorModel, agentModel }
@@ -68,7 +68,7 @@ export async function pushDataListToTrainingQueue({
}
if (trainingMode === TrainingModeEnum.qa) {
const qaModelData = qaModelList?.find((item) => item.model === agentModel);
const qaModelData = datasetModelList?.find((item) => item.model === agentModel);
if (!qaModelData) {
return Promise.reject(`Model ${agentModel} is inValid`);
}
@@ -150,7 +150,7 @@ export async function pushDataListToTrainingQueue({
model,
q: item.q,
a: item.a,
chunkIndex: item.chunkIndex ?? i,
chunkIndex: item.chunkIndex ?? 0,
weight: weight ?? 0,
indexes: item.indexes
}))