mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 05:12:39 +00:00
v4.6-3 (#471)
This commit is contained in:
131
packages/global/common/string/textSplitter.ts
Normal file
131
packages/global/common/string/textSplitter.ts
Normal file
@@ -0,0 +1,131 @@
|
||||
import { getErrText } from '../error/utils';
|
||||
import { countPromptTokens } from './tiktoken';
|
||||
|
||||
/**
|
||||
* text split into chunks
|
||||
* maxLen - one chunk len. max: 3500
|
||||
* overlapLen - The size of the before and after Text
|
||||
* maxLen > overlapLen
|
||||
* markdown
|
||||
*/
|
||||
export const splitText2Chunks = (props: { text: string; maxLen: number; overlapLen?: number }) => {
|
||||
const { text = '', maxLen, overlapLen = Math.floor(maxLen * 0.2) } = props;
|
||||
const tempMarker = 'SPLIT_HERE_SPLIT_HERE';
|
||||
|
||||
const stepReg: Record<number, RegExp> = {
|
||||
0: /^(#\s[^\n]+)\n/gm,
|
||||
1: /^(##\s[^\n]+)\n/gm,
|
||||
2: /^(###\s[^\n]+)\n/gm,
|
||||
3: /^(####\s[^\n]+)\n/gm,
|
||||
|
||||
4: /(\n\n)/g,
|
||||
5: /([\n])/g,
|
||||
6: /[。]|(?!<[^a-zA-Z])\.\s/g,
|
||||
7: /([!?]|!\s|\?\s)/g,
|
||||
8: /([;]|;\s)/g,
|
||||
9: /([,]|,\s)/g
|
||||
};
|
||||
|
||||
const splitTextRecursively = ({
|
||||
text = '',
|
||||
step,
|
||||
lastChunk,
|
||||
overlayChunk
|
||||
}: {
|
||||
text: string;
|
||||
step: number;
|
||||
lastChunk: string;
|
||||
overlayChunk: string;
|
||||
}) => {
|
||||
if (text.length <= maxLen) {
|
||||
return [text];
|
||||
}
|
||||
const reg = stepReg[step];
|
||||
const isMarkdownSplit = step < 4;
|
||||
|
||||
if (!reg) {
|
||||
// use slice-maxLen to split text
|
||||
const chunks: string[] = [];
|
||||
let chunk = '';
|
||||
for (let i = 0; i < text.length; i += maxLen - overlapLen) {
|
||||
chunk = text.slice(i, i + maxLen);
|
||||
chunks.push(chunk);
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
// split text by special char
|
||||
const splitTexts = text
|
||||
.replace(reg, isMarkdownSplit ? `${tempMarker}$1` : `$1${tempMarker}`)
|
||||
.split(`${tempMarker}`)
|
||||
.filter((part) => part);
|
||||
|
||||
let chunks: string[] = [];
|
||||
for (let i = 0; i < splitTexts.length; i++) {
|
||||
let text = splitTexts[i];
|
||||
let chunkToken = countPromptTokens(lastChunk, '');
|
||||
const textToken = countPromptTokens(text, '');
|
||||
|
||||
// next chunk is too large / new chunk is too large(The current chunk must be smaller than maxLen)
|
||||
if (textToken >= maxLen || chunkToken + textToken > maxLen * 1.4) {
|
||||
// last chunk is too large, push it to chunks, not add to next chunk
|
||||
if (chunkToken > maxLen * 0.7) {
|
||||
chunks.push(lastChunk);
|
||||
lastChunk = '';
|
||||
overlayChunk = '';
|
||||
}
|
||||
// chunk is small, insert to next chunks
|
||||
const innerChunks = splitTextRecursively({
|
||||
text,
|
||||
step: step + 1,
|
||||
lastChunk,
|
||||
overlayChunk
|
||||
});
|
||||
if (innerChunks.length === 0) continue;
|
||||
chunks = chunks.concat(innerChunks);
|
||||
lastChunk = '';
|
||||
overlayChunk = '';
|
||||
continue;
|
||||
}
|
||||
|
||||
// size less than maxLen, push text to last chunk
|
||||
lastChunk += text;
|
||||
chunkToken += textToken; // Definitely less than 1.4 * maxLen
|
||||
|
||||
// size over lapLen, push it to next chunk
|
||||
if (
|
||||
overlapLen !== 0 &&
|
||||
!isMarkdownSplit &&
|
||||
chunkToken >= maxLen - overlapLen &&
|
||||
textToken < overlapLen
|
||||
) {
|
||||
overlayChunk += text;
|
||||
}
|
||||
if (chunkToken >= maxLen) {
|
||||
chunks.push(lastChunk);
|
||||
lastChunk = overlayChunk;
|
||||
overlayChunk = '';
|
||||
}
|
||||
}
|
||||
|
||||
/* If the last chunk is independent, it needs to be push chunks. */
|
||||
if (lastChunk && chunks[chunks.length - 1] && !chunks[chunks.length - 1].endsWith(lastChunk)) {
|
||||
chunks.push(lastChunk);
|
||||
}
|
||||
|
||||
return chunks;
|
||||
};
|
||||
|
||||
try {
|
||||
const chunks = splitTextRecursively({ text, step: 0, lastChunk: '', overlayChunk: '' });
|
||||
|
||||
const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0);
|
||||
|
||||
return {
|
||||
chunks,
|
||||
tokens
|
||||
};
|
||||
} catch (err) {
|
||||
throw new Error(getErrText(err));
|
||||
}
|
||||
};
|
11
packages/global/common/string/tiktoken/cl100k_base.json
Normal file
11
packages/global/common/string/tiktoken/cl100k_base.json
Normal file
File diff suppressed because one or more lines are too long
84
packages/global/common/string/tiktoken/index.ts
Normal file
84
packages/global/common/string/tiktoken/index.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
/* Only the token of gpt-3.5-turbo is used */
|
||||
import type { ChatItemType } from '../../../core/chat/type';
|
||||
import { Tiktoken } from 'js-tiktoken/lite';
|
||||
import { adaptChat2GptMessages } from '../../../core/chat/adapt';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '../../../core/ai/constant';
|
||||
import encodingJson from './cl100k_base.json';
|
||||
|
||||
/* init tikToken obj */
|
||||
export function getTikTokenEnc() {
|
||||
if (typeof window !== 'undefined' && window.TikToken) {
|
||||
return window.TikToken;
|
||||
}
|
||||
if (typeof global !== 'undefined' && global.TikToken) {
|
||||
return global.TikToken;
|
||||
}
|
||||
|
||||
const enc = new Tiktoken(encodingJson);
|
||||
|
||||
if (typeof window !== 'undefined') {
|
||||
window.TikToken = enc;
|
||||
}
|
||||
if (typeof global !== 'undefined') {
|
||||
global.TikToken = enc;
|
||||
}
|
||||
|
||||
return enc;
|
||||
}
|
||||
|
||||
/* count one prompt tokens */
|
||||
export function countPromptTokens(
|
||||
prompt = '',
|
||||
role: '' | `${ChatCompletionRequestMessageRoleEnum}` = ''
|
||||
) {
|
||||
const enc = getTikTokenEnc();
|
||||
const text = `${role}\n${prompt}`;
|
||||
try {
|
||||
const encodeText = enc.encode(text);
|
||||
return encodeText.length + 3; // 补充 role 估算值
|
||||
} catch (error) {
|
||||
return text.length;
|
||||
}
|
||||
}
|
||||
|
||||
/* count messages tokens */
|
||||
export function countMessagesTokens({ messages }: { messages: ChatItemType[] }) {
|
||||
const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
|
||||
|
||||
let totalTokens = 0;
|
||||
for (let i = 0; i < adaptMessages.length; i++) {
|
||||
const item = adaptMessages[i];
|
||||
const tokens = countPromptTokens(item.content, item.role);
|
||||
totalTokens += tokens;
|
||||
}
|
||||
|
||||
return totalTokens;
|
||||
}
|
||||
|
||||
/* slice messages from top to bottom by maxTokens */
|
||||
export function sliceMessagesTB({
|
||||
messages,
|
||||
maxTokens
|
||||
}: {
|
||||
messages: ChatItemType[];
|
||||
maxTokens: number;
|
||||
}) {
|
||||
const adaptMessages = adaptChat2GptMessages({ messages, reserveId: true });
|
||||
let reduceTokens = maxTokens;
|
||||
let result: ChatItemType[] = [];
|
||||
|
||||
for (let i = 0; i < adaptMessages.length; i++) {
|
||||
const item = adaptMessages[i];
|
||||
|
||||
const tokens = countPromptTokens(item.content, item.role);
|
||||
reduceTokens -= tokens;
|
||||
|
||||
if (reduceTokens > 0) {
|
||||
result.push(messages[i]);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result.length === 0 && messages[0] ? [messages[0]] : result;
|
||||
}
|
5
packages/global/common/string/tiktoken/type.d.ts
vendored
Normal file
5
packages/global/common/string/tiktoken/type.d.ts
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
import type { Tiktoken } from 'js-tiktoken';
|
||||
|
||||
declare global {
|
||||
var TikToken: Tiktoken;
|
||||
}
|
@@ -1,13 +1,15 @@
|
||||
import crypto from 'crypto';
|
||||
|
||||
/* check string is a web link */
|
||||
export function strIsLink(str?: string) {
|
||||
if (!str) return false;
|
||||
if (/^((http|https)?:\/\/|www\.|\/)[^\s/$.?#].[^\s]*$/i.test(str)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
export const hashStr = (psw: string) => {
|
||||
return crypto.createHash('sha256').update(psw).digest('hex');
|
||||
/* hash string */
|
||||
export const hashStr = (str: string) => {
|
||||
return crypto.createHash('sha256').update(str).digest('hex');
|
||||
};
|
||||
|
||||
/* simple text, remove chinese space and extra \n */
|
||||
@@ -20,3 +22,16 @@ export const simpleText = (text: string) => {
|
||||
|
||||
return text;
|
||||
};
|
||||
|
||||
/*
|
||||
replace {{variable}} to value
|
||||
*/
|
||||
export function replaceVariable(text: string, obj: Record<string, string | number>) {
|
||||
for (const key in obj) {
|
||||
const val = obj[key];
|
||||
if (!['string', 'number'].includes(typeof val)) continue;
|
||||
|
||||
text = text.replace(new RegExp(`{{(${key})}}`, 'g'), String(val));
|
||||
}
|
||||
return text || '';
|
||||
}
|
||||
|
5
packages/global/core/ai/api.d.ts
vendored
Normal file
5
packages/global/core/ai/api.d.ts
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
export type PostReRankProps = {
|
||||
query: string;
|
||||
inputs: { id: string; text: string }[];
|
||||
};
|
||||
export type PostReRankResponse = { id: string; score: number }[];
|
40
packages/global/core/chat/adapt.ts
Normal file
40
packages/global/core/chat/adapt.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
import type { ChatItemType } from '../../core/chat/type.d';
|
||||
import { ChatRoleEnum } from '../../core/chat/constants';
|
||||
import { ChatCompletionRequestMessageRoleEnum } from '../../core/ai/constant';
|
||||
import type { ChatMessageItemType } from '../../core/ai/type.d';
|
||||
|
||||
const chat2Message = {
|
||||
[ChatRoleEnum.AI]: ChatCompletionRequestMessageRoleEnum.Assistant,
|
||||
[ChatRoleEnum.Human]: ChatCompletionRequestMessageRoleEnum.User,
|
||||
[ChatRoleEnum.System]: ChatCompletionRequestMessageRoleEnum.System,
|
||||
[ChatRoleEnum.Function]: ChatCompletionRequestMessageRoleEnum.Function,
|
||||
[ChatRoleEnum.Tool]: ChatCompletionRequestMessageRoleEnum.Tool
|
||||
};
|
||||
const message2Chat = {
|
||||
[ChatCompletionRequestMessageRoleEnum.System]: ChatRoleEnum.System,
|
||||
[ChatCompletionRequestMessageRoleEnum.User]: ChatRoleEnum.Human,
|
||||
[ChatCompletionRequestMessageRoleEnum.Assistant]: ChatRoleEnum.AI,
|
||||
[ChatCompletionRequestMessageRoleEnum.Function]: ChatRoleEnum.Function,
|
||||
[ChatCompletionRequestMessageRoleEnum.Tool]: ChatRoleEnum.Tool
|
||||
};
|
||||
|
||||
export function adaptRole_Chat2Message(role: `${ChatRoleEnum}`) {
|
||||
return chat2Message[role];
|
||||
}
|
||||
export function adaptRole_Message2Chat(role: `${ChatCompletionRequestMessageRoleEnum}`) {
|
||||
return message2Chat[role];
|
||||
}
|
||||
|
||||
export const adaptChat2GptMessages = ({
|
||||
messages,
|
||||
reserveId
|
||||
}: {
|
||||
messages: ChatItemType[];
|
||||
reserveId: boolean;
|
||||
}): ChatMessageItemType[] => {
|
||||
return messages.map((item) => ({
|
||||
...(reserveId && { dataId: item.dataId }),
|
||||
role: chat2Message[item.obj],
|
||||
content: item.value || ''
|
||||
}));
|
||||
};
|
20
packages/global/core/dataset/api.d.ts
vendored
Normal file
20
packages/global/core/dataset/api.d.ts
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
import { DatasetDataIndexItemType } from './type';
|
||||
|
||||
/* ================= dataset ===================== */
|
||||
|
||||
/* ================= collection ===================== */
|
||||
|
||||
/* ================= data ===================== */
|
||||
export type PgSearchRawType = {
|
||||
id: string;
|
||||
team_id: string;
|
||||
tmb_id: string;
|
||||
collection_id: string;
|
||||
data_id: string;
|
||||
score: number;
|
||||
};
|
||||
export type PushDatasetDataChunkProps = {
|
||||
q: string; // embedding content
|
||||
a?: string; // bonus content
|
||||
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||
};
|
@@ -36,29 +36,54 @@ export const DatasetCollectionTypeMap = {
|
||||
}
|
||||
};
|
||||
|
||||
export enum TrainingModeEnum {
|
||||
'qa' = 'qa',
|
||||
'index' = 'index'
|
||||
export enum DatasetDataIndexTypeEnum {
|
||||
chunk = 'chunk',
|
||||
qa = 'qa',
|
||||
summary = 'summary',
|
||||
hypothetical = 'hypothetical',
|
||||
custom = 'custom'
|
||||
}
|
||||
export const TrainingTypeMap = {
|
||||
[TrainingModeEnum.qa]: 'qa',
|
||||
[TrainingModeEnum.index]: 'index'
|
||||
};
|
||||
|
||||
export enum DatasetSpecialIdEnum {
|
||||
manual = 'manual',
|
||||
mark = 'mark'
|
||||
}
|
||||
export const datasetSpecialIdMap = {
|
||||
[DatasetSpecialIdEnum.manual]: {
|
||||
name: 'kb.Manual Data',
|
||||
sourceName: 'kb.Manual Input'
|
||||
export const DatasetDataIndexTypeMap = {
|
||||
[DatasetDataIndexTypeEnum.chunk]: {
|
||||
name: 'dataset.data.indexes.chunk'
|
||||
},
|
||||
[DatasetSpecialIdEnum.mark]: {
|
||||
name: 'kb.Mark Data',
|
||||
sourceName: 'kb.Manual Mark'
|
||||
[DatasetDataIndexTypeEnum.summary]: {
|
||||
name: 'dataset.data.indexes.summary'
|
||||
},
|
||||
[DatasetDataIndexTypeEnum.hypothetical]: {
|
||||
name: 'dataset.data.indexes.hypothetical'
|
||||
},
|
||||
[DatasetDataIndexTypeEnum.qa]: {
|
||||
name: 'dataset.data.indexes.qa'
|
||||
},
|
||||
[DatasetDataIndexTypeEnum.custom]: {
|
||||
name: 'dataset.data.indexes.custom'
|
||||
}
|
||||
};
|
||||
export const datasetSpecialIds: string[] = [DatasetSpecialIdEnum.manual, DatasetSpecialIdEnum.mark];
|
||||
|
||||
export enum TrainingModeEnum {
|
||||
'chunk' = 'chunk',
|
||||
'qa' = 'qa'
|
||||
// 'hypothetical' = 'hypothetical',
|
||||
// 'summary' = 'summary',
|
||||
// 'multipleIndex' = 'multipleIndex'
|
||||
}
|
||||
export const TrainingTypeMap = {
|
||||
[TrainingModeEnum.chunk]: {
|
||||
name: 'chunk'
|
||||
},
|
||||
[TrainingModeEnum.qa]: {
|
||||
name: 'qa'
|
||||
}
|
||||
// [TrainingModeEnum.hypothetical]: {
|
||||
// name: 'hypothetical'
|
||||
// },
|
||||
// [TrainingModeEnum.summary]: {
|
||||
// name: 'summary'
|
||||
// },
|
||||
// [TrainingModeEnum.multipleIndex]: {
|
||||
// name: 'multipleIndex'
|
||||
// }
|
||||
};
|
||||
|
||||
export const FolderAvatarSrc = '/imgs/files/folder.svg';
|
||||
|
27
packages/global/core/dataset/controller.d.ts
vendored
Normal file
27
packages/global/core/dataset/controller.d.ts
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
import type { DatasetDataIndexItemType, DatasetDataSchemaType } from './type';
|
||||
|
||||
export type CreateDatasetDataProps = {
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
q: string;
|
||||
a?: string;
|
||||
indexes?: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||
};
|
||||
|
||||
export type UpdateDatasetDataProps = {
|
||||
dataId: string;
|
||||
q?: string;
|
||||
a?: string;
|
||||
indexes?: (Omit<DatasetDataIndexItemType, 'dataId'> & {
|
||||
dataId?: string; // pg data id
|
||||
})[];
|
||||
};
|
||||
|
||||
export type PatchIndexesProps = {
|
||||
type: 'create' | 'update' | 'delete';
|
||||
index: Omit<DatasetDataIndexItemType, 'dataId'> & {
|
||||
dataId?: string;
|
||||
};
|
||||
};
|
78
packages/global/core/dataset/type.d.ts
vendored
78
packages/global/core/dataset/type.d.ts
vendored
@@ -1,6 +1,14 @@
|
||||
import type { VectorModelItemType } from '../../core/ai/model.d';
|
||||
import { PermissionTypeEnum } from '../../support/permission/constant';
|
||||
import { DatasetCollectionTypeEnum, DatasetTypeEnum, TrainingModeEnum } from './constant';
|
||||
import { PushDatasetDataChunkProps } from './api';
|
||||
import {
|
||||
DatasetCollectionTypeEnum,
|
||||
DatasetDataIndexTypeEnum,
|
||||
DatasetTypeEnum,
|
||||
TrainingModeEnum
|
||||
} from './constant';
|
||||
|
||||
/* schema */
|
||||
export type DatasetSchemaType = {
|
||||
_id: string;
|
||||
parentId: string;
|
||||
@@ -33,13 +41,33 @@ export type DatasetCollectionSchemaType = {
|
||||
};
|
||||
};
|
||||
|
||||
export type DatasetDataIndexItemType = {
|
||||
defaultIndex: boolean;
|
||||
dataId: string; // pg data id
|
||||
type: `${DatasetDataIndexTypeEnum}`;
|
||||
text: string;
|
||||
};
|
||||
export type DatasetDataSchemaType = {
|
||||
_id: string;
|
||||
userId: string;
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
q: string; // large chunks or question
|
||||
a: string; // answer or custom content
|
||||
indexes: DatasetDataIndexItemType[];
|
||||
};
|
||||
|
||||
export type DatasetTrainingSchemaType = {
|
||||
_id: string;
|
||||
userId: string;
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
datasetId: string;
|
||||
datasetCollectionId: string;
|
||||
collectionId: string;
|
||||
billId: string;
|
||||
expireAt: Date;
|
||||
lockTime: Date;
|
||||
@@ -48,6 +76,7 @@ export type DatasetTrainingSchemaType = {
|
||||
prompt: string;
|
||||
q: string;
|
||||
a: string;
|
||||
indexes: Omit<DatasetDataIndexItemType, 'dataId'>[];
|
||||
};
|
||||
|
||||
export type CollectionWithDatasetType = Omit<DatasetCollectionSchemaType, 'datasetId'> & {
|
||||
@@ -55,41 +84,31 @@ export type CollectionWithDatasetType = Omit<DatasetCollectionSchemaType, 'datas
|
||||
};
|
||||
|
||||
/* ================= dataset ===================== */
|
||||
|
||||
/* ================= collection ===================== */
|
||||
export type DatasetCollectionItemType = DatasetCollectionSchemaType & {
|
||||
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel'> & {
|
||||
vectorModel: VectorModelItemType;
|
||||
isOwner: boolean;
|
||||
canWrite: boolean;
|
||||
};
|
||||
|
||||
/* ================= collection ===================== */
|
||||
export type DatasetCollectionItemType = CollectionWithDatasetType & {
|
||||
canWrite: boolean;
|
||||
sourceName: string;
|
||||
sourceId?: string;
|
||||
};
|
||||
|
||||
/* ================= data ===================== */
|
||||
export type PgRawDataItemType = {
|
||||
id: string;
|
||||
q: string;
|
||||
a: string;
|
||||
team_id: string;
|
||||
tmb_id: string;
|
||||
dataset_id: string;
|
||||
collection_id: string;
|
||||
};
|
||||
export type PgDataItemType = {
|
||||
id: string;
|
||||
q: string;
|
||||
a: string;
|
||||
teamId: string;
|
||||
tmbId: string;
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
};
|
||||
export type DatasetChunkItemType = {
|
||||
q: string;
|
||||
a: string;
|
||||
};
|
||||
export type DatasetDataItemType = DatasetChunkItemType & {
|
||||
export type DatasetDataItemType = {
|
||||
id: string;
|
||||
datasetId: string;
|
||||
collectionId: string;
|
||||
sourceName: string;
|
||||
sourceId?: string;
|
||||
q: string;
|
||||
a: string;
|
||||
indexes: DatasetDataIndexItemType[];
|
||||
isOwner: boolean;
|
||||
canWrite: boolean;
|
||||
};
|
||||
|
||||
/* --------------- file ---------------------- */
|
||||
@@ -109,9 +128,6 @@ export type DatasetFileSchema = {
|
||||
};
|
||||
|
||||
/* ============= search =============== */
|
||||
export type SearchDataResultItemType = PgRawDataItemType & {
|
||||
score: number;
|
||||
};
|
||||
export type SearchDataResponseItemType = DatasetDataItemType & {
|
||||
score: number;
|
||||
};
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { DatasetCollectionTypeEnum } from './constant';
|
||||
import { DatasetCollectionTypeEnum, DatasetDataIndexTypeEnum } from './constant';
|
||||
import { getFileIcon } from '../../common/file/icon';
|
||||
import { strIsLink } from '../../common/string/tools';
|
||||
|
||||
@@ -44,3 +44,14 @@ export function getSourceNameIcon({
|
||||
}
|
||||
return '/imgs/files/collection.svg';
|
||||
}
|
||||
|
||||
export function getDefaultIndex(props?: { q?: string; a?: string; dataId?: string }) {
|
||||
const { q = '', a, dataId } = props || {};
|
||||
const qaStr = `${q}\n${a}`.trim();
|
||||
return {
|
||||
defaultIndex: true,
|
||||
type: a ? DatasetDataIndexTypeEnum.qa : DatasetDataIndexTypeEnum.chunk,
|
||||
text: a ? qaStr : q,
|
||||
dataId
|
||||
};
|
||||
}
|
||||
|
3
packages/global/core/module/api.d.ts
vendored
3
packages/global/core/module/api.d.ts
vendored
@@ -0,0 +1,3 @@
|
||||
import { VectorModelItemType } from '../ai/model.d';
|
||||
|
||||
export type SelectedDatasetType = { datasetId: string; vectorModel: VectorModelItemType }[];
|
||||
|
@@ -6,7 +6,8 @@
|
||||
"timezones-list": "^3.0.2",
|
||||
"dayjs": "^1.11.7",
|
||||
"encoding": "^0.1.13",
|
||||
"openai": "^4.16.1"
|
||||
"openai": "^4.16.1",
|
||||
"js-tiktoken": "^1.0.7"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.8.5"
|
||||
|
@@ -24,7 +24,8 @@ export const TeamMemberRoleMap = {
|
||||
export enum TeamMemberStatusEnum {
|
||||
waiting = 'waiting',
|
||||
active = 'active',
|
||||
reject = 'reject'
|
||||
reject = 'reject',
|
||||
leave = 'leave'
|
||||
}
|
||||
export const TeamMemberStatusMap = {
|
||||
[TeamMemberStatusEnum.waiting]: {
|
||||
@@ -38,5 +39,10 @@ export const TeamMemberStatusMap = {
|
||||
[TeamMemberStatusEnum.reject]: {
|
||||
label: 'user.team.member.reject',
|
||||
color: 'red.600'
|
||||
},
|
||||
[TeamMemberStatusEnum.leave]: {
|
||||
label: 'user.team.member.leave',
|
||||
color: 'red.600'
|
||||
}
|
||||
};
|
||||
export const leaveStatus = { $ne: TeamMemberStatusEnum.leave };
|
||||
|
@@ -37,4 +37,7 @@ export type UpdateInviteProps = {
|
||||
tmbId: string;
|
||||
status: TeamMemberSchema['status'];
|
||||
};
|
||||
export type InviteMemberResponse = Record<'invite' | 'inValid' | 'inTeam', string[]>;
|
||||
export type InviteMemberResponse = Record<
|
||||
'invite' | 'inValid' | 'inTeam',
|
||||
{ username: string; userId: string }[]
|
||||
>;
|
||||
|
4
packages/global/support/user/team/type.d.ts
vendored
4
packages/global/support/user/team/type.d.ts
vendored
@@ -16,6 +16,7 @@ export type TeamMemberSchema = {
|
||||
teamId: string;
|
||||
userId: string;
|
||||
createTime: Date;
|
||||
name: string;
|
||||
role: `${TeamMemberRoleEnum}`;
|
||||
status: `${TeamMemberStatusEnum}`;
|
||||
defaultTeam: boolean;
|
||||
@@ -25,6 +26,7 @@ export type TeamItemType = {
|
||||
userId: string;
|
||||
teamId: string;
|
||||
teamName: string;
|
||||
memberName: string;
|
||||
avatar: string;
|
||||
balance: number;
|
||||
tmbId: string;
|
||||
@@ -39,7 +41,7 @@ export type TeamMemberItemType = {
|
||||
userId: string;
|
||||
tmbId: string;
|
||||
teamId: string;
|
||||
memberUsername: string;
|
||||
memberName: string;
|
||||
avatar: string;
|
||||
role: `${TeamMemberRoleEnum}`;
|
||||
status: `${TeamMemberStatusEnum}`;
|
||||
|
@@ -15,7 +15,7 @@ export type BillSchema = CreateBillProps & {
|
||||
|
||||
export type BillItemType = {
|
||||
id: string;
|
||||
username: string;
|
||||
memberName: string;
|
||||
time: Date;
|
||||
appName: string;
|
||||
source: BillSchema['source'];
|
||||
|
@@ -15,9 +15,6 @@ interface ResponseDataType {
|
||||
* 请求开始
|
||||
*/
|
||||
function requestStart(config: InternalAxiosRequestConfig): InternalAxiosRequestConfig {
|
||||
if (config.headers) {
|
||||
config.headers.rootkey = process.env.ROOT_KEY;
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
@@ -62,7 +59,8 @@ const instance = axios.create({
|
||||
timeout: 60000, // 超时时间
|
||||
headers: {
|
||||
'content-type': 'application/json',
|
||||
'Cache-Control': 'no-cache'
|
||||
'Cache-Control': 'no-cache',
|
||||
rootkey: process.env.ROOT_KEY
|
||||
}
|
||||
});
|
||||
|
||||
|
@@ -171,8 +171,7 @@ export async function initPg() {
|
||||
tmb_id VARCHAR(50) NOT NULL,
|
||||
dataset_id VARCHAR(50) NOT NULL,
|
||||
collection_id VARCHAR(50) NOT NULL,
|
||||
q TEXT NOT NULL,
|
||||
a TEXT
|
||||
data_id VARCHAR(50) NOT NULL
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS vector_index ON ${PgDatasetTableName} USING hnsw (vector vector_ip_ops) WITH (m = 24, ef_construction = 64);
|
||||
`);
|
||||
|
53
packages/service/core/chat/utils.ts
Normal file
53
packages/service/core/chat/utils.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
import type { ChatItemType } from '@fastgpt/global/core/chat/type.d';
|
||||
import { ChatRoleEnum } from '@fastgpt/global/core/chat/constants';
|
||||
import { countMessagesTokens, countPromptTokens } from '@fastgpt/global/common/string/tiktoken';
|
||||
import { adaptRole_Chat2Message } from '@fastgpt/global/core/chat/adapt';
|
||||
|
||||
/* slice chat context by tokens */
|
||||
export function ChatContextFilter({
|
||||
messages = [],
|
||||
maxTokens
|
||||
}: {
|
||||
messages: ChatItemType[];
|
||||
maxTokens: number;
|
||||
}) {
|
||||
if (!Array.isArray(messages)) {
|
||||
return [];
|
||||
}
|
||||
const rawTextLen = messages.reduce((sum, item) => sum + item.value.length, 0);
|
||||
|
||||
// If the text length is less than half of the maximum token, no calculation is required
|
||||
if (rawTextLen < maxTokens * 0.5) {
|
||||
return messages;
|
||||
}
|
||||
|
||||
// filter startWith system prompt
|
||||
const chatStartIndex = messages.findIndex((item) => item.obj !== ChatRoleEnum.System);
|
||||
const systemPrompts: ChatItemType[] = messages.slice(0, chatStartIndex);
|
||||
const chatPrompts: ChatItemType[] = messages.slice(chatStartIndex);
|
||||
|
||||
// reduce token of systemPrompt
|
||||
maxTokens -= countMessagesTokens({
|
||||
messages: systemPrompts
|
||||
});
|
||||
|
||||
// 根据 tokens 截断内容
|
||||
const chats: ChatItemType[] = [];
|
||||
|
||||
// 从后往前截取对话内容
|
||||
for (let i = chatPrompts.length - 1; i >= 0; i--) {
|
||||
const item = chatPrompts[i];
|
||||
chats.unshift(item);
|
||||
|
||||
const tokens = countPromptTokens(item.value, adaptRole_Chat2Message(item.obj));
|
||||
maxTokens -= tokens;
|
||||
|
||||
/* 整体 tokens 超出范围, system必须保留 */
|
||||
if (maxTokens <= 0) {
|
||||
chats.shift();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return [...systemPrompts, ...chats];
|
||||
}
|
@@ -56,8 +56,7 @@ const DatasetCollectionSchema = new Schema({
|
||||
ref: 'dataset.files'
|
||||
},
|
||||
rawLink: {
|
||||
type: String,
|
||||
default: ''
|
||||
type: String
|
||||
},
|
||||
// 451 初始化
|
||||
pgCollectionId: {
|
||||
|
@@ -1,5 +1,25 @@
|
||||
import { CollectionWithDatasetType } from '@fastgpt/global/core/dataset/type';
|
||||
import { MongoDatasetCollection } from './collection/schema';
|
||||
import { MongoDataset } from './schema';
|
||||
|
||||
/* ============= dataset ========== */
|
||||
/* find all datasetId by top datasetId */
|
||||
export async function findDatasetIdTreeByTopDatasetId(
|
||||
id: string,
|
||||
result: string[] = []
|
||||
): Promise<string[]> {
|
||||
let allChildrenIds = [...result];
|
||||
|
||||
// find children
|
||||
const children = await MongoDataset.find({ parentId: id });
|
||||
|
||||
for (const child of children) {
|
||||
const grandChildrenIds = await findDatasetIdTreeByTopDatasetId(child._id, result);
|
||||
allChildrenIds = allChildrenIds.concat(grandChildrenIds);
|
||||
}
|
||||
|
||||
return [String(id), ...allChildrenIds];
|
||||
}
|
||||
|
||||
export async function getCollectionWithDataset(collectionId: string) {
|
||||
const data = (
|
||||
|
78
packages/service/core/dataset/data/schema.ts
Normal file
78
packages/service/core/dataset/data/schema.ts
Normal file
@@ -0,0 +1,78 @@
|
||||
import { connectionMongo, type Model } from '../../../common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { DatasetDataSchemaType } from '@fastgpt/global/core/dataset/type.d';
|
||||
import {
|
||||
TeamCollectionName,
|
||||
TeamMemberCollectionName
|
||||
} from '@fastgpt/global/support/user/team/constant';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import { DatasetColCollectionName } from '../collection/schema';
|
||||
import { DatasetDataIndexTypeMap } from '@fastgpt/global/core/dataset/constant';
|
||||
|
||||
export const DatasetDataCollectionName = 'dataset.datas';
|
||||
|
||||
const DatasetDataSchema = new Schema({
|
||||
teamId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamCollectionName,
|
||||
required: true
|
||||
},
|
||||
tmbId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: TeamMemberCollectionName,
|
||||
required: true
|
||||
},
|
||||
datasetId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetCollectionName,
|
||||
required: true
|
||||
},
|
||||
collectionId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetColCollectionName,
|
||||
required: true
|
||||
},
|
||||
q: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
a: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
indexes: {
|
||||
type: [
|
||||
{
|
||||
defaultIndex: {
|
||||
type: Boolean,
|
||||
default: false
|
||||
},
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetDataIndexTypeMap),
|
||||
required: true
|
||||
},
|
||||
dataId: {
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
text: {
|
||||
type: String,
|
||||
required: true
|
||||
}
|
||||
}
|
||||
],
|
||||
default: []
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
DatasetDataSchema.index({ userId: 1 });
|
||||
DatasetDataSchema.index({ datasetId: 1 });
|
||||
DatasetDataSchema.index({ collectionId: 1 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
export const MongoDatasetData: Model<DatasetDataSchemaType> =
|
||||
models[DatasetDataCollectionName] || model(DatasetDataCollectionName, DatasetDataSchema);
|
@@ -2,7 +2,7 @@
|
||||
import { connectionMongo, type Model } from '../../../common/mongo';
|
||||
const { Schema, model, models } = connectionMongo;
|
||||
import { DatasetTrainingSchemaType } from '@fastgpt/global/core/dataset/type';
|
||||
import { TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
|
||||
import { DatasetDataIndexTypeMap, TrainingTypeMap } from '@fastgpt/global/core/dataset/constant';
|
||||
import { DatasetColCollectionName } from '../collection/schema';
|
||||
import { DatasetCollectionName } from '../schema';
|
||||
import {
|
||||
@@ -33,12 +33,13 @@ const TrainingDataSchema = new Schema({
|
||||
ref: DatasetCollectionName,
|
||||
required: true
|
||||
},
|
||||
datasetCollectionId: {
|
||||
collectionId: {
|
||||
type: Schema.Types.ObjectId,
|
||||
ref: DatasetColCollectionName,
|
||||
required: true
|
||||
},
|
||||
billId: {
|
||||
// concat bill
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
@@ -48,6 +49,7 @@ const TrainingDataSchema = new Schema({
|
||||
required: true
|
||||
},
|
||||
expireAt: {
|
||||
// It will be deleted after 7 days
|
||||
type: Date,
|
||||
default: () => new Date()
|
||||
},
|
||||
@@ -56,6 +58,7 @@ const TrainingDataSchema = new Schema({
|
||||
default: () => new Date('2000/1/1')
|
||||
},
|
||||
model: {
|
||||
// ai model
|
||||
type: String,
|
||||
required: true
|
||||
},
|
||||
@@ -71,13 +74,29 @@ const TrainingDataSchema = new Schema({
|
||||
a: {
|
||||
type: String,
|
||||
default: ''
|
||||
},
|
||||
indexes: {
|
||||
type: [
|
||||
{
|
||||
type: {
|
||||
type: String,
|
||||
enum: Object.keys(DatasetDataIndexTypeMap),
|
||||
required: true
|
||||
},
|
||||
text: {
|
||||
type: String,
|
||||
required: true
|
||||
}
|
||||
}
|
||||
],
|
||||
default: []
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
TrainingDataSchema.index({ lockTime: 1 });
|
||||
TrainingDataSchema.index({ userId: 1 });
|
||||
TrainingDataSchema.index({ datasetCollectionId: 1 });
|
||||
TrainingDataSchema.index({ collectionId: 1 });
|
||||
TrainingDataSchema.index({ expireAt: 1 }, { expireAfterSeconds: 7 * 24 * 60 });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
|
@@ -23,6 +23,7 @@ const PromotionRecordSchema = new Schema({
|
||||
enum: ['pay', 'register']
|
||||
},
|
||||
amount: {
|
||||
// 1 * PRICE_SCALE
|
||||
type: Number,
|
||||
required: true
|
||||
}
|
||||
|
@@ -30,7 +30,7 @@ export const pushResult2Remote = async ({
|
||||
shareId?: string;
|
||||
responseData?: any[];
|
||||
}) => {
|
||||
if (!shareId || !authToken) return;
|
||||
if (!shareId || !authToken || !global.systemEnv.pluginBaseUrl) return;
|
||||
try {
|
||||
const outLink = await MongoOutLink.findOne({
|
||||
shareId
|
||||
|
@@ -1,5 +1,7 @@
|
||||
import { AuthUserTypeEnum } from '@fastgpt/global/support/permission/constant';
|
||||
import { parseHeaderCert } from '../controller';
|
||||
import { AuthModeType } from '../type';
|
||||
import { authOutLinkValid } from './outLink';
|
||||
|
||||
export const authCert = async (props: AuthModeType) => {
|
||||
const result = await parseHeaderCert(props);
|
||||
@@ -10,3 +12,22 @@ export const authCert = async (props: AuthModeType) => {
|
||||
canWrite: true
|
||||
};
|
||||
};
|
||||
export async function authCertAndShareId({
|
||||
shareId,
|
||||
...props
|
||||
}: AuthModeType & { shareId?: string }) {
|
||||
if (!shareId) {
|
||||
return authCert(props);
|
||||
}
|
||||
|
||||
const { app } = await authOutLinkValid({ shareId });
|
||||
|
||||
return {
|
||||
teamId: String(app.teamId),
|
||||
tmbId: String(app.tmbId),
|
||||
authType: AuthUserTypeEnum.outLink,
|
||||
apikey: '',
|
||||
isOwner: false,
|
||||
canWrite: false
|
||||
};
|
||||
}
|
||||
|
@@ -27,11 +27,11 @@ export async function authDataset({
|
||||
}
|
||||
> {
|
||||
const result = await parseHeaderCert(props);
|
||||
const { userId, teamId, tmbId } = result;
|
||||
const { teamId, tmbId } = result;
|
||||
const { role } = await getTeamInfoByTmbId({ tmbId });
|
||||
|
||||
const { dataset, isOwner, canWrite } = await (async () => {
|
||||
const dataset = (await MongoDataset.findOne({ _id: datasetId, teamId }))?.toJSON();
|
||||
const dataset = (await MongoDataset.findOne({ _id: datasetId, teamId }))?.toObject();
|
||||
|
||||
if (!dataset) {
|
||||
return Promise.reject(DatasetErrEnum.unAuthDataset);
|
||||
|
@@ -6,7 +6,6 @@ import { getTeamInfoByTmbId } from '../../user/team/controller';
|
||||
import { MongoOpenApi } from '../../openapi/schema';
|
||||
import { OpenApiErrEnum } from '@fastgpt/global/common/error/code/openapi';
|
||||
import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant';
|
||||
import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant';
|
||||
|
||||
export async function authOpenApiKeyCrud({
|
||||
id,
|
||||
|
@@ -37,13 +37,11 @@ export async function authUserRole(props: AuthModeType): Promise<
|
||||
teamOwner: boolean;
|
||||
}
|
||||
> {
|
||||
const { userId, teamId, tmbId } = await parseHeaderCert(props);
|
||||
const { role: userRole, canWrite } = await getTeamInfoByTmbId({ tmbId });
|
||||
const result = await parseHeaderCert(props);
|
||||
const { role: userRole, canWrite } = await getTeamInfoByTmbId({ tmbId: result.tmbId });
|
||||
|
||||
return {
|
||||
userId,
|
||||
teamId,
|
||||
tmbId,
|
||||
...result,
|
||||
isOwner: true,
|
||||
role: userRole,
|
||||
teamOwner: userRole === TeamMemberRoleEnum.owner,
|
||||
|
@@ -4,57 +4,42 @@ import {
|
||||
TeamMemberRoleEnum,
|
||||
TeamMemberStatusEnum,
|
||||
TeamCollectionName,
|
||||
TeamMemberCollectionName
|
||||
TeamMemberCollectionName,
|
||||
leaveStatus
|
||||
} from '@fastgpt/global/support/user/team/constant';
|
||||
|
||||
export async function getTeamInfoByTmbId({
|
||||
tmbId,
|
||||
userId
|
||||
}: {
|
||||
tmbId?: string;
|
||||
userId?: string;
|
||||
}): Promise<TeamItemType> {
|
||||
if (!tmbId && !userId) {
|
||||
return Promise.reject('tmbId or userId is required');
|
||||
}
|
||||
|
||||
async function getTeam(match: Record<string, any>): Promise<TeamItemType> {
|
||||
const db = connectionMongo?.connection?.db;
|
||||
|
||||
const TeamMember = db.collection(TeamMemberCollectionName);
|
||||
|
||||
const results = await TeamMember.aggregate([
|
||||
{
|
||||
$match: tmbId
|
||||
? {
|
||||
_id: new Types.ObjectId(tmbId)
|
||||
}
|
||||
: {
|
||||
userId: new Types.ObjectId(userId),
|
||||
defaultTeam: true
|
||||
}
|
||||
$match: match
|
||||
},
|
||||
{
|
||||
$lookup: {
|
||||
from: TeamCollectionName, // 关联的集合名
|
||||
localField: 'teamId', // TeamMember 集合中用于关联的字段
|
||||
foreignField: '_id', // Team 集合中用于关联的字段
|
||||
as: 'team' // 查询结果中的字段名,存放关联查询的结果
|
||||
from: TeamCollectionName,
|
||||
localField: 'teamId',
|
||||
foreignField: '_id',
|
||||
as: 'team'
|
||||
}
|
||||
},
|
||||
{
|
||||
$unwind: '$team' // 将查询结果中的 team 字段展开,变成一个对象
|
||||
$unwind: '$team'
|
||||
}
|
||||
]).toArray();
|
||||
const tmb = results[0];
|
||||
|
||||
if (!tmb) {
|
||||
return Promise.reject('team not exist');
|
||||
return Promise.reject('member not exist');
|
||||
}
|
||||
|
||||
return {
|
||||
userId: String(tmb.userId),
|
||||
teamId: String(tmb.teamId),
|
||||
teamName: tmb.team.name,
|
||||
memberName: tmb.name,
|
||||
avatar: tmb.team.avatar,
|
||||
balance: tmb.team.balance,
|
||||
tmbId: String(tmb._id),
|
||||
@@ -65,11 +50,31 @@ export async function getTeamInfoByTmbId({
|
||||
maxSize: tmb.team.maxSize
|
||||
};
|
||||
}
|
||||
|
||||
export async function getTeamInfoByTmbId({ tmbId }: { tmbId: string }) {
|
||||
if (!tmbId) {
|
||||
return Promise.reject('tmbId or userId is required');
|
||||
}
|
||||
return getTeam({
|
||||
_id: new Types.ObjectId(tmbId),
|
||||
status: leaveStatus
|
||||
});
|
||||
}
|
||||
|
||||
export async function getUserDefaultTeam({ userId }: { userId: string }) {
|
||||
if (!userId) {
|
||||
return Promise.reject('tmbId or userId is required');
|
||||
}
|
||||
return getTeam({
|
||||
userId: new Types.ObjectId(userId),
|
||||
defaultTeam: true
|
||||
});
|
||||
}
|
||||
export async function createDefaultTeam({
|
||||
userId,
|
||||
teamName = 'My Team',
|
||||
avatar = '/icon/logo.svg',
|
||||
balance = 0,
|
||||
balance,
|
||||
maxSize = 5
|
||||
}: {
|
||||
userId: string;
|
||||
@@ -103,6 +108,7 @@ export async function createDefaultTeam({
|
||||
await TeamMember.insertOne({
|
||||
teamId: insertedId,
|
||||
userId,
|
||||
name: 'Owner',
|
||||
role: TeamMemberRoleEnum.owner,
|
||||
status: TeamMemberStatusEnum.active,
|
||||
createTime: new Date(),
|
||||
@@ -116,7 +122,7 @@ export async function createDefaultTeam({
|
||||
},
|
||||
{
|
||||
$set: {
|
||||
balance,
|
||||
...(balance !== undefined && { balance }),
|
||||
maxSize
|
||||
}
|
||||
}
|
||||
|
@@ -36,6 +36,7 @@ const BillSchema = new Schema({
|
||||
default: () => new Date()
|
||||
},
|
||||
total: {
|
||||
// 1 * PRICE_SCALE
|
||||
type: Number,
|
||||
required: true
|
||||
},
|
||||
|
Reference in New Issue
Block a user