4.6.2-alpha (#517)

This commit is contained in:
Archer
2023-11-25 21:58:00 +08:00
committed by GitHub
parent 9cb4280a16
commit 3acbf1ab17
39 changed files with 617 additions and 183 deletions

View File

@@ -13,7 +13,8 @@ export const hashStr = (str: string) => {
}; };
/* simple text, remove chinese space and extra \n */ /* simple text, remove chinese space and extra \n */
export const simpleText = (text: string) => { export const simpleText = (text = '') => {
text = text.trim();
text = text.replace(/([\u4e00-\u9fa5])[\s&&[^\n]]+([\u4e00-\u9fa5])/g, '$1$2'); text = text.replace(/([\u4e00-\u9fa5])[\s&&[^\n]]+([\u4e00-\u9fa5])/g, '$1$2');
text = text.replace(/\r\n|\r/g, '\n'); text = text.replace(/\r\n|\r/g, '\n');
text = text.replace(/\n{3,}/g, '\n\n'); text = text.replace(/\n{3,}/g, '\n\n');

View File

@@ -59,6 +59,7 @@ export type DatasetDataSchemaType = {
collectionId: string; collectionId: string;
q: string; // large chunks or question q: string; // large chunks or question
a: string; // answer or custom content a: string; // answer or custom content
fullTextToken: string;
indexes: DatasetDataIndexItemType[]; indexes: DatasetDataIndexItemType[];
}; };
@@ -83,6 +84,9 @@ export type DatasetTrainingSchemaType = {
export type CollectionWithDatasetType = Omit<DatasetCollectionSchemaType, 'datasetId'> & { export type CollectionWithDatasetType = Omit<DatasetCollectionSchemaType, 'datasetId'> & {
datasetId: DatasetSchemaType; datasetId: DatasetSchemaType;
}; };
export type DatasetDataWithCollectionType = Omit<DatasetDataSchemaType, 'collectionId'> & {
collectionId: DatasetCollectionSchemaType;
};
/* ================= dataset ===================== */ /* ================= dataset ===================== */
export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel' | 'agentModel'> & { export type DatasetItemType = Omit<DatasetSchemaType, 'vectorModel' | 'agentModel'> & {
@@ -130,6 +134,6 @@ export type DatasetFileSchema = {
}; };
/* ============= search =============== */ /* ============= search =============== */
export type SearchDataResponseItemType = DatasetDataItemType & { export type SearchDataResponseItemType = Omit<DatasetDataItemType, 'isOwner' | 'canWrite'> & {
score: number; score: number;
}; };

View File

@@ -20,7 +20,7 @@ export type FlowNodeChangeProps = {
}; };
export type FlowNodeInputItemType = { export type FlowNodeInputItemType = {
key: `${ModuleInputKeyEnum}`; key: `${ModuleInputKeyEnum}` | string;
type: `${FlowNodeInputTypeEnum}`; // Decide on a render style type: `${FlowNodeInputTypeEnum}`; // Decide on a render style
value?: any; value?: any;
valueType?: `${ModuleDataTypeEnum}`; // data type valueType?: `${ModuleDataTypeEnum}`; // data type

View File

@@ -25,7 +25,7 @@ export type moduleTemplateListType = {
// store module type // store module type
export type ModuleItemType = { export type ModuleItemType = {
name: string; name: string;
logo?: string; avatar?: string;
intro?: string; intro?: string;
moduleId: string; moduleId: string;
position?: { position?: {

View File

@@ -43,13 +43,9 @@ const DatasetDataSchema = new Schema({
type: String, type: String,
default: '' default: ''
}, },
qToken: { fullTextToken: {
type: String, type: String,
default: '' required: true
},
aToken: {
type: String,
default: ''
}, },
indexes: { indexes: {
type: [ type: [
@@ -82,7 +78,7 @@ try {
DatasetDataSchema.index({ datasetId: 1 }); DatasetDataSchema.index({ datasetId: 1 });
DatasetDataSchema.index({ collectionId: 1 }); DatasetDataSchema.index({ collectionId: 1 });
// full text index // full text index
DatasetDataSchema.index({ qToken: 'text', aToken: 'text' }); DatasetDataSchema.index({ fullTextToken: 'text' });
} catch (error) { } catch (error) {
console.log(error); console.log(error);
} }

View File

@@ -4,23 +4,23 @@
"dependencies": { "dependencies": {
"@fastgpt/global": "workspace:*", "@fastgpt/global": "workspace:*",
"axios": "^1.5.1", "axios": "^1.5.1",
"nextjs-cors": "^2.1.2",
"next": "13.5.2",
"cookie": "^0.5.0", "cookie": "^0.5.0",
"encoding": "^0.1.13",
"jsonwebtoken": "^9.0.2", "jsonwebtoken": "^9.0.2",
"mongoose": "^7.0.2", "mongoose": "^7.0.2",
"winston": "^3.10.0", "nanoid": "^4.0.1",
"winston-mongodb": "^5.1.1", "next": "13.5.2",
"tunnel": "^0.0.6", "nextjs-cors": "^2.1.2",
"encoding": "^0.1.13",
"pg": "^8.10.0", "pg": "^8.10.0",
"nanoid": "^4.0.1" "tunnel": "^0.0.6",
"winston": "^3.10.0",
"winston-mongodb": "^5.1.1"
}, },
"devDependencies": { "devDependencies": {
"@types/tunnel": "^0.0.4",
"@types/pg": "^8.6.6",
"@types/node": "^20.8.5",
"@types/cookie": "^0.5.2", "@types/cookie": "^0.5.2",
"@types/jsonwebtoken": "^9.0.3" "@types/jsonwebtoken": "^9.0.3",
"@types/node": "^20.8.5",
"@types/pg": "^8.6.6",
"@types/tunnel": "^0.0.4"
} }
} }

171
pnpm-lock.yaml generated
View File

@@ -164,6 +164,9 @@ importers:
'@mozilla/readability': '@mozilla/readability':
specifier: ^0.4.4 specifier: ^0.4.4
version: registry.npmmirror.com/@mozilla/readability@0.4.4 version: registry.npmmirror.com/@mozilla/readability@0.4.4
'@node-rs/jieba':
specifier: ^1.7.2
version: registry.npmmirror.com/@node-rs/jieba@1.7.2
'@tanstack/react-query': '@tanstack/react-query':
specifier: ^4.24.10 specifier: ^4.24.10
version: registry.npmmirror.com/@tanstack/react-query@4.36.1(react-dom@18.2.0)(react@18.2.0) version: registry.npmmirror.com/@tanstack/react-query@4.36.1(react-dom@18.2.0)(react@18.2.0)
@@ -3686,6 +3689,174 @@ packages:
requiresBuild: true requiresBuild: true
optional: true optional: true
registry.npmmirror.com/@node-rs/jieba-android-arm-eabi@1.7.2:
resolution: {integrity: sha512-FyDHRNSRIHOQO7S6Q4RwuGffnnnuNwaXPH7K8WqSzifEY+zFIaSPcNqrZHrnqyeXc4JiYpBIHeP+0Mkf1kIGRA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba-android-arm-eabi/-/jieba-android-arm-eabi-1.7.2.tgz}
name: '@node-rs/jieba-android-arm-eabi'
version: 1.7.2
engines: {node: '>= 10'}
cpu: [arm]
os: [android]
requiresBuild: true
dev: false
optional: true
registry.npmmirror.com/@node-rs/jieba-android-arm64@1.7.2:
resolution: {integrity: sha512-z0UEZCGrAX/IiarhuDMsEIDZBS77UZv4SQyL/J48yrsbWKbb2lJ1vCrYxXIWqwp6auXHEu4r1O/pMriDAcEnPg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba-android-arm64/-/jieba-android-arm64-1.7.2.tgz}
name: '@node-rs/jieba-android-arm64'
version: 1.7.2
engines: {node: '>= 10'}
cpu: [arm64]
os: [android]
requiresBuild: true
dev: false
optional: true
registry.npmmirror.com/@node-rs/jieba-darwin-arm64@1.7.2:
resolution: {integrity: sha512-M2cHIWRaaOmXGKy446SH2+Y2PzREaI2oYznPbg55wYEdioUp01YS/2WRG8CaoCKEj0aUocA7MFM2vVcoIAsbQw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba-darwin-arm64/-/jieba-darwin-arm64-1.7.2.tgz}
name: '@node-rs/jieba-darwin-arm64'
version: 1.7.2
engines: {node: '>= 10'}
cpu: [arm64]
os: [darwin]
requiresBuild: true
dev: false
optional: true
registry.npmmirror.com/@node-rs/jieba-darwin-x64@1.7.2:
resolution: {integrity: sha512-euDawBU2FxB0CGTR803BA6WABsiicIrqa61z2AFFDPkJCDrauEM0jbMg3GDKLAvbaLbZ1Etu3QNN5xyroqp4Qw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba-darwin-x64/-/jieba-darwin-x64-1.7.2.tgz}
name: '@node-rs/jieba-darwin-x64'
version: 1.7.2
engines: {node: '>= 10'}
cpu: [x64]
os: [darwin]
requiresBuild: true
dev: false
optional: true
registry.npmmirror.com/@node-rs/jieba-freebsd-x64@1.7.2:
resolution: {integrity: sha512-vXCaYxPb90d/xTBVG+ZZXrFLXsO2719pZSyiZCL2tey+UY28U7MOoK6394Wwmf0FCB/eRTQMCKjVIUDi+IRMUg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba-freebsd-x64/-/jieba-freebsd-x64-1.7.2.tgz}
name: '@node-rs/jieba-freebsd-x64'
version: 1.7.2
engines: {node: '>= 10'}
cpu: [x64]
os: [freebsd]
requiresBuild: true
dev: false
optional: true
registry.npmmirror.com/@node-rs/jieba-linux-arm-gnueabihf@1.7.2:
resolution: {integrity: sha512-HTep79XlJYO3KRYZ2kJChG9HnYr1DKSQTB+HEYWKLK0ifphqybcxGNLAdH0S4dViG2ciD0+iN/refgtqZEidpw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba-linux-arm-gnueabihf/-/jieba-linux-arm-gnueabihf-1.7.2.tgz}
name: '@node-rs/jieba-linux-arm-gnueabihf'
version: 1.7.2
engines: {node: '>= 10'}
cpu: [arm]
os: [linux]
requiresBuild: true
dev: false
optional: true
registry.npmmirror.com/@node-rs/jieba-linux-arm64-gnu@1.7.2:
resolution: {integrity: sha512-P8QJdQydOVewL1MIqYiRpI7LOfrRQag+p4/hwExe+YXH8C7DOrR8rWJD/7XNRTbpOimlHq1UN/e+ZzhxQF/cLw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba-linux-arm64-gnu/-/jieba-linux-arm64-gnu-1.7.2.tgz}
name: '@node-rs/jieba-linux-arm64-gnu'
version: 1.7.2
engines: {node: '>= 10'}
cpu: [arm64]
os: [linux]
libc: [glibc]
requiresBuild: true
dev: false
optional: true
registry.npmmirror.com/@node-rs/jieba-linux-arm64-musl@1.7.2:
resolution: {integrity: sha512-WjnN0hmDvTXb2h3hMW5VnUGkK1xaqhs+WHfMMilau55+YN+YOYALKZ0TeBY4BapClLuBx54wqwmBX+B4hAXunQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba-linux-arm64-musl/-/jieba-linux-arm64-musl-1.7.2.tgz}
name: '@node-rs/jieba-linux-arm64-musl'
version: 1.7.2
engines: {node: '>= 10'}
cpu: [arm64]
os: [linux]
libc: [musl]
requiresBuild: true
dev: false
optional: true
registry.npmmirror.com/@node-rs/jieba-linux-x64-gnu@1.7.2:
resolution: {integrity: sha512-gBXds/DwNSA6lNUxJjL6WIaNT6pnlM5juUgV/krLLkBJ8vXpOrQ07p0rrK1tnigz9b20xhsHaFRSwED1Y8zeXw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba-linux-x64-gnu/-/jieba-linux-x64-gnu-1.7.2.tgz}
name: '@node-rs/jieba-linux-x64-gnu'
version: 1.7.2
engines: {node: '>= 10'}
cpu: [x64]
os: [linux]
libc: [glibc]
requiresBuild: true
dev: false
optional: true
registry.npmmirror.com/@node-rs/jieba-linux-x64-musl@1.7.2:
resolution: {integrity: sha512-tNVD3SMuG5zAj7+bLS2Enio3zR7BPxi3PhQtpQ+Hv83jajIcN46QQ0EdoMFz/aB+hkQ9PlLAstu+VREFegs5EA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba-linux-x64-musl/-/jieba-linux-x64-musl-1.7.2.tgz}
name: '@node-rs/jieba-linux-x64-musl'
version: 1.7.2
engines: {node: '>= 10'}
cpu: [x64]
os: [linux]
libc: [musl]
requiresBuild: true
dev: false
optional: true
registry.npmmirror.com/@node-rs/jieba-win32-arm64-msvc@1.7.2:
resolution: {integrity: sha512-/e1iQ0Dh02lGPNCYTU/H3cfIsWydaGRzZ3TDj6GfWrxkWqXORL98x/VJ/C/uKLpc7GSLLd9ygyZG7SOAfKe2tA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba-win32-arm64-msvc/-/jieba-win32-arm64-msvc-1.7.2.tgz}
name: '@node-rs/jieba-win32-arm64-msvc'
version: 1.7.2
engines: {node: '>= 10'}
cpu: [arm64]
os: [win32]
requiresBuild: true
dev: false
optional: true
registry.npmmirror.com/@node-rs/jieba-win32-ia32-msvc@1.7.2:
resolution: {integrity: sha512-cYjA6YUiOwtuEzWErvwMMt/RETNWQDLcmAaiHA8ohsa6c0eB0kRJlQCc683tlaczZxqroY/7C9mxgJNGvoGRbw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba-win32-ia32-msvc/-/jieba-win32-ia32-msvc-1.7.2.tgz}
name: '@node-rs/jieba-win32-ia32-msvc'
version: 1.7.2
engines: {node: '>= 10'}
cpu: [ia32]
os: [win32]
requiresBuild: true
dev: false
optional: true
registry.npmmirror.com/@node-rs/jieba-win32-x64-msvc@1.7.2:
resolution: {integrity: sha512-2M+Um3woFF17sa8VBYQQ6E5PNMe9Kf9fdzmeDh/GzuNHXlxW4LyK9VTV8zchIv/bDNAR5Z85kfW4wASULUxvFQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba-win32-x64-msvc/-/jieba-win32-x64-msvc-1.7.2.tgz}
name: '@node-rs/jieba-win32-x64-msvc'
version: 1.7.2
engines: {node: '>= 10'}
cpu: [x64]
os: [win32]
requiresBuild: true
dev: false
optional: true
registry.npmmirror.com/@node-rs/jieba@1.7.2:
resolution: {integrity: sha512-zGto08NDU+KWm670qVHYGTb0YTEJ0A97dwH3WCnnhyRYMqTbOXKC6OwTc/cjzfSJP1UDBSar9Ug9BlmWmEThWg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@node-rs/jieba/-/jieba-1.7.2.tgz}
name: '@node-rs/jieba'
version: 1.7.2
engines: {node: '>= 10'}
optionalDependencies:
'@node-rs/jieba-android-arm-eabi': registry.npmmirror.com/@node-rs/jieba-android-arm-eabi@1.7.2
'@node-rs/jieba-android-arm64': registry.npmmirror.com/@node-rs/jieba-android-arm64@1.7.2
'@node-rs/jieba-darwin-arm64': registry.npmmirror.com/@node-rs/jieba-darwin-arm64@1.7.2
'@node-rs/jieba-darwin-x64': registry.npmmirror.com/@node-rs/jieba-darwin-x64@1.7.2
'@node-rs/jieba-freebsd-x64': registry.npmmirror.com/@node-rs/jieba-freebsd-x64@1.7.2
'@node-rs/jieba-linux-arm-gnueabihf': registry.npmmirror.com/@node-rs/jieba-linux-arm-gnueabihf@1.7.2
'@node-rs/jieba-linux-arm64-gnu': registry.npmmirror.com/@node-rs/jieba-linux-arm64-gnu@1.7.2
'@node-rs/jieba-linux-arm64-musl': registry.npmmirror.com/@node-rs/jieba-linux-arm64-musl@1.7.2
'@node-rs/jieba-linux-x64-gnu': registry.npmmirror.com/@node-rs/jieba-linux-x64-gnu@1.7.2
'@node-rs/jieba-linux-x64-musl': registry.npmmirror.com/@node-rs/jieba-linux-x64-musl@1.7.2
'@node-rs/jieba-win32-arm64-msvc': registry.npmmirror.com/@node-rs/jieba-win32-arm64-msvc@1.7.2
'@node-rs/jieba-win32-ia32-msvc': registry.npmmirror.com/@node-rs/jieba-win32-ia32-msvc@1.7.2
'@node-rs/jieba-win32-x64-msvc': registry.npmmirror.com/@node-rs/jieba-win32-x64-msvc@1.7.2
dev: false
registry.npmmirror.com/@nodelib/fs.scandir@2.1.5: registry.npmmirror.com/@nodelib/fs.scandir@2.1.5:
resolution: {integrity: sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz} resolution: {integrity: sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz}
name: '@nodelib/fs.scandir' name: '@nodelib/fs.scandir'

View File

@@ -20,6 +20,7 @@
"@fastgpt/service": "workspace:*", "@fastgpt/service": "workspace:*",
"@fastgpt/web": "workspace:*", "@fastgpt/web": "workspace:*",
"@mozilla/readability": "^0.4.4", "@mozilla/readability": "^0.4.4",
"@node-rs/jieba": "^1.7.2",
"@tanstack/react-query": "^4.24.10", "@tanstack/react-query": "^4.24.10",
"@types/nprogress": "^0.2.0", "@types/nprogress": "^0.2.0",
"axios": "^1.5.1", "axios": "^1.5.1",

View File

@@ -261,6 +261,9 @@
"data": { "data": {
"Edit": "Edit Data", "Edit": "Edit Data",
"id": "Data ID" "id": "Data ID"
},
"test": {
"Test Result": "Results"
} }
}, },
"module": { "module": {

View File

@@ -261,6 +261,9 @@
"data": { "data": {
"Edit": "编辑数据", "Edit": "编辑数据",
"id": "数据ID" "id": "数据ID"
},
"test": {
"Test Result": "测试结果"
} }
}, },
"module": { "module": {

View File

@@ -342,7 +342,7 @@ ${images.map((img) => JSON.stringify({ src: img.src })).join('\n')}
const items = clipboardData.items; const items = clipboardData.items;
const files = Array.from(items) const files = Array.from(items)
.map((item) => (item.kind === 'file' ? item.getAsFile() : undefined)) .map((item) => (item.kind === 'file' ? item.getAsFile() : undefined))
.filter((item) => item) as File[]; .filter(Boolean) as File[];
onSelectFile(files); onSelectFile(files);
} }
}} }}

View File

@@ -40,7 +40,7 @@ const ResponseTags = ({ responseData = [] }: { responseData?: ChatHistoryItemRes
.filter((item) => item.moduleType === FlowNodeTypeEnum.chatNode) .filter((item) => item.moduleType === FlowNodeTypeEnum.chatNode)
.map((item) => item.quoteList) .map((item) => item.quoteList)
.flat() .flat()
.filter((item) => item) as SearchDataResponseItemType[]; .filter(Boolean) as SearchDataResponseItemType[];
const sourceList = quoteList.reduce( const sourceList = quoteList.reduce(
(acc: Record<string, SearchDataResponseItemType[]>, cur) => { (acc: Record<string, SearchDataResponseItemType[]>, cur) => {
if (!acc[cur.sourceName]) { if (!acc[cur.sourceName]) {

View File

@@ -18,13 +18,6 @@ export type DatasetUpdateParams = {
agentModel?: LLMModelItemType; agentModel?: LLMModelItemType;
}; };
export type SearchTestProps = {
datasetId: string;
text: string;
limit?: number;
rerank?: boolean;
};
/* ======= collections =========== */ /* ======= collections =========== */
export type GetDatasetCollectionsProps = RequestPaging & { export type GetDatasetCollectionsProps = RequestPaging & {
datasetId: string; datasetId: string;

View File

@@ -1,6 +1,9 @@
import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api'; import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant'; import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant';
import { DatasetDataIndexItemType } from '@fastgpt/global/core/dataset/type'; import {
DatasetDataIndexItemType,
SearchDataResponseItemType
} from '@fastgpt/global/core/dataset/type';
/* ================= dataset ===================== */ /* ================= dataset ===================== */
export type CreateDatasetParams = { export type CreateDatasetParams = {
@@ -34,3 +37,15 @@ export type UpdateDatasetDataProps = {
dataId?: string; // pg data id dataId?: string; // pg data id
})[]; })[];
}; };
/* -------------- search ---------------- */
export type SearchTestProps = {
datasetId: string;
text: string;
limit?: number;
rerank?: boolean;
};
export type SearchTestResponse = {
list: SearchDataResponseItemType[];
duration: string;
};

View File

@@ -93,7 +93,7 @@ function App({ Component, pageProps }: AppProps) {
return ( return (
<> <>
<Head> <Head>
<title>{feConfigs?.systemTitle || process.env.SYSTEM_NAME || 'GPT'}</title> <title>{feConfigs?.systemTitle || process.env.SYSTEM_NAME || ''}</title>
<meta <meta
name="description" name="description"
content="FastGPT 是一个大模型应用编排系统,提供开箱即用的数据处理、模型调用等能力,可以快速的构建知识库并通过 Flow 可视化进行工作流编排,实现复杂的知识库场景!" content="FastGPT 是一个大模型应用编排系统,提供开箱即用的数据处理、模型调用等能力,可以快速的构建知识库并通过 Flow 可视化进行工作流编排,实现复杂的知识库场景!"

View File

@@ -295,7 +295,7 @@ async function initPgData() {
]); ]);
} catch (error) { } catch (error) {
console.log(error); console.log(error);
console.log('column exits'); console.log('column exists');
} }
const { rows } = await PgClient.query<{ user_id: string }>(` const { rows } = await PgClient.query<{ user_id: string }>(`

View File

@@ -2,18 +2,9 @@ import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response'; import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo'; import { connectToDatabase } from '@/service/mongo';
import { delay } from '@/utils/tools'; import { delay } from '@/utils/tools';
import { PgClient } from '@fastgpt/service/common/pg';
import {
DatasetDataIndexTypeEnum,
PgDatasetTableName
} from '@fastgpt/global/core/dataset/constant';
import { authCert } from '@fastgpt/service/support/permission/auth/common'; import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema'; import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { getUserDefaultTeam } from '@fastgpt/service/support/user/team/controller'; import { jiebaSplit } from '@/service/core/dataset/utils';
import { MongoDataset } from '@fastgpt/service/core/dataset/schema';
import { defaultQAModels } from '@fastgpt/global/core/ai/model';
import { MongoApp } from '@fastgpt/service/core/app/schema';
let success = 0; let success = 0;
/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */ /* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */
@@ -22,6 +13,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
const { limit = 50 } = req.body as { limit: number }; const { limit = 50 } = req.body as { limit: number };
await authCert({ req, authRoot: true }); await authCert({ req, authRoot: true });
await connectToDatabase(); await connectToDatabase();
success = 0;
console.log(
'total',
await MongoDatasetData.countDocuments({ fullTextToken: { $exists: false } })
);
await initFullTextToken(limit); await initFullTextToken(limit);
@@ -37,4 +34,31 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
}); });
} }
} }
export async function initFullTextToken(limit = 50) {} export async function initFullTextToken(limit = 50): Promise<any> {
try {
const dataList = await MongoDatasetData.find({ fullTextToken: { $exists: false } }, '_id q a')
.limit(limit)
.lean();
if (dataList.length === 0) return;
const result = await Promise.allSettled(
dataList.map((item) => {
const text = item.q + (item.a || '');
const tokens = jiebaSplit({ text });
return MongoDatasetData.findByIdAndUpdate(item._id, {
$set: {
fullTextToken: tokens
}
});
})
);
success += result.filter((item) => item.status === 'fulfilled').length;
console.log(`success: ${success}`);
return initFullTextToken(limit);
} catch (error) {
await delay(1000);
return initFullTextToken(limit);
}
}

View File

@@ -6,6 +6,7 @@ import type { CreateAppParams } from '@fastgpt/global/core/app/api.d';
import { AppTypeEnum } from '@fastgpt/global/core/app/constants'; import { AppTypeEnum } from '@fastgpt/global/core/app/constants';
import { MongoApp } from '@fastgpt/service/core/app/schema'; import { MongoApp } from '@fastgpt/service/core/app/schema';
import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user'; import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user';
import { SimpleModeTemplate_FastGPT_Universal } from '@/global/core/app/constants';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) { export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try { try {
@@ -39,7 +40,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse<
teamId, teamId,
tmbId, tmbId,
modules, modules,
type type,
simpleTemplateId: SimpleModeTemplate_FastGPT_Universal.id
}); });
jsonRes(res, { jsonRes(res, {

View File

@@ -41,7 +41,7 @@ function simpleChatTemplate({
{ {
moduleId: 'userChatInput', moduleId: 'userChatInput',
name: '用户问题(对话入口)', name: '用户问题(对话入口)',
logo: '/imgs/module/userChatInput.png', avatar: '/imgs/module/userChatInput.png',
flowType: 'questionInput', flowType: 'questionInput',
position: { position: {
x: 464.32198615344566, x: 464.32198615344566,
@@ -73,7 +73,7 @@ function simpleChatTemplate({
{ {
moduleId: 'history', moduleId: 'history',
name: '聊天记录', name: '聊天记录',
logo: '/imgs/module/history.png', avatar: '/imgs/module/history.png',
flowType: 'historyNode', flowType: 'historyNode',
position: { position: {
x: 452.5466249541586, x: 452.5466249541586,
@@ -114,7 +114,7 @@ function simpleChatTemplate({
{ {
moduleId: 'chatModule', moduleId: 'chatModule',
name: 'AI 对话', name: 'AI 对话',
logo: '/imgs/module/AI.png', avatar: '/imgs/module/AI.png',
flowType: 'chatNode', flowType: 'chatNode',
showStatus: true, showStatus: true,
position: { position: {
@@ -284,7 +284,7 @@ function datasetTemplate({
{ {
moduleId: 'userChatInput', moduleId: 'userChatInput',
name: '用户问题(对话入口)', name: '用户问题(对话入口)',
logo: '/imgs/module/userChatInput.png', avatar: '/imgs/module/userChatInput.png',
flowType: 'questionInput', flowType: 'questionInput',
position: { position: {
x: 464.32198615344566, x: 464.32198615344566,
@@ -320,7 +320,7 @@ function datasetTemplate({
{ {
moduleId: 'history', moduleId: 'history',
name: '聊天记录', name: '聊天记录',
logo: '/imgs/module/history.png', avatar: '/imgs/module/history.png',
flowType: 'historyNode', flowType: 'historyNode',
position: { position: {
x: 452.5466249541586, x: 452.5466249541586,
@@ -361,7 +361,7 @@ function datasetTemplate({
{ {
moduleId: 'datasetSearch', moduleId: 'datasetSearch',
name: '知识库搜索', name: '知识库搜索',
logo: '/imgs/module/db.png', avatar: '/imgs/module/db.png',
flowType: 'datasetSearchNode', flowType: 'datasetSearchNode',
showStatus: true, showStatus: true,
position: { position: {
@@ -454,7 +454,7 @@ function datasetTemplate({
{ {
moduleId: 'chatModule', moduleId: 'chatModule',
name: 'AI 对话', name: 'AI 对话',
logo: '/imgs/module/AI.png', avatar: '/imgs/module/AI.png',
flowType: 'chatNode', flowType: 'chatNode',
showStatus: true, showStatus: true,
position: { position: {

View File

@@ -15,6 +15,7 @@ import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controll
import { authTeamBalance } from '@/service/support/permission/auth/bill'; import { authTeamBalance } from '@/service/support/permission/auth/bill';
import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push'; import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push';
import { InsertOneDatasetDataProps } from '@/global/core/dataset/api'; import { InsertOneDatasetDataProps } from '@/global/core/dataset/api';
import { simpleText } from '@fastgpt/global/common/string/tools';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) { export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try { try {
@@ -46,8 +47,12 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
] = await Promise.all([getCollectionWithDataset(collectionId), authTeamBalance(teamId)]); ] = await Promise.all([getCollectionWithDataset(collectionId), authTeamBalance(teamId)]);
// format data // format data
const formatQ = q.replace(/\\n/g, '\n').trim().replace(/'/g, '"'); const formatQ = simpleText(q);
const formatA = a?.replace(/\\n/g, '\n').trim().replace(/'/g, '"') || ''; const formatA = simpleText(a);
const formatIndexes = indexes?.map((item) => ({
...item,
text: simpleText(item.text)
}));
// token check // token check
const token = countPromptTokens(formatQ, 'system'); const token = countPromptTokens(formatQ, 'system');
@@ -72,7 +77,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
q: formatQ, q: formatQ,
a: formatA, a: formatA,
model: vectorModelData.model, model: vectorModelData.model,
indexes indexes: formatIndexes
}); });
pushGenerateVectorBill({ pushGenerateVectorBill({

View File

@@ -13,6 +13,7 @@ import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api';
import { getQAModel, getVectorModel } from '@/service/core/ai/model'; import { getQAModel, getVectorModel } from '@/service/core/ai/model';
import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset'; import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset';
import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller'; import { getCollectionWithDataset } from '@fastgpt/service/core/dataset/controller';
import { simpleText } from '@fastgpt/global/common/string/tools';
export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) { export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try { try {
@@ -72,6 +73,21 @@ export async function pushDataToDatasetCollection({
collectionId collectionId
}); });
// format q and a, remove empty char
data.forEach((item) => {
item.q = simpleText(item.q);
item.a = simpleText(item.a);
item.indexes = item.indexes
?.map((index) => {
return {
...index,
text: simpleText(index.text)
};
})
.filter(Boolean);
});
// filter repeat or equal content // filter repeat or equal content
const set = new Set(); const set = new Set();
const filterResult: Record<string, PushDatasetDataChunkProps[]> = { const filterResult: Record<string, PushDatasetDataChunkProps[]> = {

View File

@@ -1,9 +1,8 @@
import type { NextApiRequest, NextApiResponse } from 'next'; import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response'; import { jsonRes } from '@fastgpt/service/common/response';
import { withNextCors } from '@fastgpt/service/common/middle/cors'; import { withNextCors } from '@fastgpt/service/common/middle/cors';
import type { SearchTestProps } from '@/global/core/api/datasetReq.d'; import type { SearchTestProps, SearchTestResponse } from '@/global/core/dataset/api.d';
import { connectToDatabase } from '@/service/mongo'; import { connectToDatabase } from '@/service/mongo';
import type { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type';
import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; import { authDataset } from '@fastgpt/service/support/permission/auth/dataset';
import { authTeamBalance } from '@/service/support/permission/auth/bill'; import { authTeamBalance } from '@/service/support/permission/auth/bill';
import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push'; import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push';
@@ -22,6 +21,8 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
throw new Error('缺少参数'); throw new Error('缺少参数');
} }
const start = Date.now();
// auth dataset role // auth dataset role
const { dataset, teamId, tmbId, apikey } = await authDataset({ const { dataset, teamId, tmbId, apikey } = await authDataset({
req, req,
@@ -61,8 +62,11 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
}); });
} }
jsonRes<SearchDataResponseItemType[]>(res, { jsonRes<SearchTestResponse>(res, {
data: searchRes data: {
list: searchRes,
duration: `${((Date.now() - start) / 1000).toFixed(3)}s`
}
}); });
} catch (err) { } catch (err) {
jsonRes(res, { jsonRes(res, {

View File

@@ -1,17 +1,16 @@
import type { NextApiRequest, NextApiResponse } from 'next'; import type { NextApiRequest, NextApiResponse } from 'next';
import { jsonRes } from '@fastgpt/service/common/response'; import { jsonRes } from '@fastgpt/service/common/response';
import { connectToDatabase } from '@/service/mongo'; import { connectToDatabase } from '@/service/mongo';
import { MongoPlugin } from '@fastgpt/service/core/plugin/schema';
import { authPluginCrud } from '@fastgpt/service/support/permission/auth/plugin'; import { authPluginCrud } from '@fastgpt/service/support/permission/auth/plugin';
export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) { export default async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
try { try {
const { id } = req.query as { id: string }; const { id } = req.query as { id: string };
await connectToDatabase(); await connectToDatabase();
await authPluginCrud({ req, authToken: true, id, per: 'r' }); const { plugin } = await authPluginCrud({ req, authToken: true, id, per: 'r' });
jsonRes(res, { jsonRes(res, {
data: await MongoPlugin.findOne({ id }) data: plugin
}); });
} catch (err) { } catch (err) {
jsonRes(res, { jsonRes(res, {

View File

@@ -230,16 +230,13 @@ const Info = (
placeholder={'标签,使用空格分割。'} placeholder={'标签,使用空格分割。'}
maxLength={30} maxLength={30}
onChange={(e) => { onChange={(e) => {
setValue( setValue('tags', e.target.value.split(' ').filter(Boolean));
'tags',
e.target.value.split(' ').filter((item) => item)
);
setRefresh(!refresh); setRefresh(!refresh);
}} }}
/> />
<Flex w={'100%'} pl={['90px', '160px']} mt={2}> <Flex w={'100%'} pl={['90px', '160px']} mt={2}>
{getValues('tags') {getValues('tags')
.filter((item) => item) .filter(Boolean)
.map((item, i) => ( .map((item, i) => (
<Tag mr={2} mb={2} key={i} whiteSpace={'nowrap'}> <Tag mr={2} mb={2} key={i} whiteSpace={'nowrap'}>
{item} {item}

View File

@@ -16,6 +16,7 @@ import { QuestionOutlineIcon } from '@chakra-ui/icons';
import { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type'; import { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type';
import { useTranslation } from 'next-i18next'; import { useTranslation } from 'next-i18next';
import { feConfigs } from '@/web/common/system/staticData'; import { feConfigs } from '@/web/common/system/staticData';
import { SearchTestResponse } from '../../../../global/core/dataset/api';
const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12); const nanoid = customAlphabet('abcdefghijklmnopqrstuvwxyz1234567890', 12);
const Test = ({ datasetId }: { datasetId: string }) => { const Test = ({ datasetId }: { datasetId: string }) => {
@@ -37,20 +38,21 @@ const Test = ({ datasetId }: { datasetId: string }) => {
); );
const { mutate, isLoading } = useRequest({ const { mutate, isLoading } = useRequest({
mutationFn: () => postSearchText({ datasetId, text: inputText.trim(), rerank, limit: 20 }), mutationFn: () => postSearchText({ datasetId, text: inputText.trim(), rerank, limit: 30 }),
onSuccess(res: SearchDataResponseItemType[]) { onSuccess(res: SearchTestResponse) {
if (!res || res.length === 0) { if (!res || res.list.length === 0) {
return toast({ return toast({
status: 'warning', status: 'warning',
title: t('dataset.test.noResult') title: t('dataset.test.noResult')
}); });
} }
const testItem = { const testItem: SearchTestStoreItemType = {
id: nanoid(), id: nanoid(),
datasetId, datasetId,
text: inputText.trim(), text: inputText.trim(),
time: new Date(), time: new Date(),
results: res results: res.list,
duration: res.duration
}; };
pushDatasetTestItem(testItem); pushDatasetTestItem(testItem);
setDatasetTestItem(testItem); setDatasetTestItem(testItem);
@@ -176,7 +178,7 @@ const Test = ({ datasetId }: { datasetId: string }) => {
<> <>
<Flex alignItems={'center'}> <Flex alignItems={'center'}>
<Box fontSize={'3xl'} color={'myGray.600'}> <Box fontSize={'3xl'} color={'myGray.600'}>
{t('core.dataset.test.Test Result')}
</Box> </Box>
<MyTooltip <MyTooltip
label={ label={
@@ -185,12 +187,13 @@ const Test = ({ datasetId }: { datasetId: string }) => {
forceShow forceShow
> >
<QuestionOutlineIcon <QuestionOutlineIcon
ml={2} mx={2}
color={'myGray.600'} color={'myGray.600'}
cursor={'pointer'} cursor={'pointer'}
fontSize={'lg'} fontSize={'lg'}
/> />
</MyTooltip> </MyTooltip>
<Box>({datasetTestItem.duration})</Box>
</Flex> </Flex>
<Grid <Grid
mt={1} mt={1}

View File

@@ -178,7 +178,7 @@ const CreateModal = ({ onClose, parentId }: { onClose: () => void; parentId?: st
<Flex mt={2} flexWrap={'wrap'}> <Flex mt={2} flexWrap={'wrap'}>
{getValues('tags') {getValues('tags')
.split(' ') .split(' ')
.filter((item) => item) .filter(Boolean)
.map((item, i) => ( .map((item, i) => (
<Tag mr={2} mb={2} key={i} whiteSpace={'nowrap'}> <Tag mr={2} mb={2} key={i} whiteSpace={'nowrap'}>
{item} {item}

View File

@@ -390,13 +390,11 @@ const Kb = () => {
</Flex> </Flex>
<Box flex={'1 0 0'} overflow={'hidden'} pt={2}> <Box flex={'1 0 0'} overflow={'hidden'} pt={2}>
<Flex> <Flex>
{dataset.tags {dataset.tags.filter(Boolean).map((tag, i) => (
.filter((item) => item) <Tag key={i} mr={2} mb={2}>
.map((tag, i) => ( {tag}
<Tag key={i} mr={2} mb={2}> </Tag>
{tag} ))}
</Tag>
))}
</Flex> </Flex>
</Box> </Box>
<Flex alignItems={'center'} fontSize={'sm'}> <Flex alignItems={'center'} fontSize={'sm'}>

View File

@@ -34,7 +34,7 @@ const PreviewPlugin = ({
item: { item: {
moduleId: 'plugin', moduleId: 'plugin',
flowType: FlowNodeTypeEnum.pluginModule, flowType: FlowNodeTypeEnum.pluginModule,
logo: plugin.avatar, avatar: plugin.avatar,
name: plugin.name, name: plugin.name,
intro: plugin.intro, intro: plugin.intro,
...formatPluginToPreviewModule(plugin._id, modules) ...formatPluginToPreviewModule(plugin._id, modules)

View File

@@ -48,27 +48,32 @@ const Render = ({ pluginId }: Props) => {
return copyTemplates; return copyTemplates;
}, [nodes]); }, [nodes]);
const { data } = useQuery(['getOnePlugin', pluginId], () => getOnePlugin(pluginId), { const { data: pluginDetail } = useQuery(
onError: (error) => { ['getOnePlugin', pluginId],
toast({ () => getOnePlugin(pluginId),
status: 'warning', {
title: getErrText(error, t('plugin.Load Plugin Failed')) onError: (error) => {
}); toast({
router.replace('/plugin/list'); status: 'warning',
title: getErrText(error, t('plugin.Load Plugin Failed'))
});
router.replace('/plugin/list');
}
} }
}); );
console.log(pluginDetail);
useQuery(['getPlugTemplates'], () => loadPluginTemplates()); useQuery(['getPlugTemplates'], () => loadPluginTemplates());
const filterPlugins = useMemo(() => { const filterPlugins = useMemo(() => {
return pluginModuleTemplates.filter((item) => item.id !== pluginId); return pluginModuleTemplates.filter((item) => item.id !== pluginId);
}, [pluginId, pluginModuleTemplates]); }, [pluginId, pluginModuleTemplates]);
return data ? ( return pluginDetail ? (
<Flow <Flow
systemTemplates={filterTemplates} systemTemplates={filterTemplates}
pluginTemplates={filterPlugins} pluginTemplates={filterPlugins}
modules={data?.modules || []} modules={pluginDetail?.modules || []}
Header={<Header plugin={data} onClose={() => router.back()} />} Header={<Header plugin={pluginDetail} onClose={() => router.back()} />}
/> />
) : ( ) : (
<Loading /> <Loading />

View File

@@ -24,17 +24,81 @@ import MyModal from '@/components/MyModal';
import { useTranslation } from 'next-i18next'; import { useTranslation } from 'next-i18next';
import { useConfirm } from '@/web/common/hooks/useConfirm'; import { useConfirm } from '@/web/common/hooks/useConfirm';
import MyIcon from '@/components/Icon'; import MyIcon from '@/components/Icon';
import { CreateOnePluginParams } from '@fastgpt/global/core/plugin/controller';
export type FormType = { export type FormType = CreateOnePluginParams & {
id?: string; id?: string;
avatar: string;
name: string;
intro: string;
}; };
export const defaultForm = { export const defaultForm: FormType = {
avatar: '/icon/logo.svg', avatar: '/icon/logo.svg',
name: '', name: '',
intro: '' intro: '',
modules: [
{
moduleId: 'w90mfp',
name: '定义插件输入',
avatar: '/imgs/module/input.png',
flowType: 'pluginInput',
showStatus: false,
position: {
x: 616.4226348688949,
y: -165.05298493910115
},
inputs: [
{
key: 'question',
valueType: 'string',
type: 'target',
label: '用户问题',
required: true,
edit: true,
connected: false
}
],
outputs: [
{
key: 'question',
valueType: 'string',
label: '用户问题',
type: 'source',
edit: true,
targets: []
}
]
},
{
moduleId: 'tze1ju',
name: '定义插件输出',
avatar: '/imgs/module/output.png',
flowType: 'pluginOutput',
showStatus: false,
position: {
x: 1607.7142331269126,
y: -151.8669210746189
},
inputs: [
{
key: 'answer',
type: 'target',
valueType: 'string',
label: '答案',
required: true,
edit: true,
connected: true
}
],
outputs: [
{
key: 'answer',
valueType: 'string',
label: '答案',
type: 'source',
edit: true,
targets: []
}
]
}
]
}; };
const CreateModal = ({ const CreateModal = ({

View File

@@ -8,5 +8,5 @@ export const getChatModelNameListByModules = (modules: ModuleItemType[]): string
const model = item.inputs.find((input) => input.key === 'model')?.value; const model = item.inputs.find((input) => input.key === 'model')?.value;
return global.chatModels.find((item) => item.model === model)?.name || ''; return global.chatModels.find((item) => item.model === model)?.name || '';
}) })
.filter((item) => item); .filter(Boolean);
}; };

View File

@@ -8,6 +8,7 @@ import { deletePgDataById, insertData2Pg, updatePgDataById } from './pg';
import { Types } from 'mongoose'; import { Types } from 'mongoose';
import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/constant'; import { DatasetDataIndexTypeEnum } from '@fastgpt/global/core/dataset/constant';
import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils'; import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils';
import { jiebaSplit } from '../utils';
/* insert data. /* insert data.
* 1. create data id * 1. create data id
@@ -34,9 +35,6 @@ export async function insertData2Dataset({
return Promise.reject("teamId and tmbId can't be the same"); return Promise.reject("teamId and tmbId can't be the same");
} }
q = q.trim();
a = a.trim();
const id = new Types.ObjectId(); const id = new Types.ObjectId();
const qaStr = `${q}\n${a}`.trim(); const qaStr = `${q}\n${a}`.trim();
@@ -74,6 +72,7 @@ export async function insertData2Dataset({
collectionId, collectionId,
q, q,
a, a,
fullTextToken: jiebaSplit({ text: q + a }),
indexes: indexes.map((item, i) => ({ indexes: indexes.map((item, i) => ({
...item, ...item,
dataId: result[i].insertId dataId: result[i].insertId
@@ -203,6 +202,7 @@ export async function updateData2Dataset({
// update mongo // update mongo
mongoData.q = q || mongoData.q; mongoData.q = q || mongoData.q;
mongoData.a = a ?? mongoData.a; mongoData.a = a ?? mongoData.a;
mongoData.fullTextToken = jiebaSplit({ text: mongoData.q + mongoData.a });
// @ts-ignore // @ts-ignore
mongoData.indexes = indexes; mongoData.indexes = indexes;
await mongoData.save(); await mongoData.save();

View File

@@ -1,5 +1,8 @@
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant'; import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
import type { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type.d'; import type {
DatasetDataWithCollectionType,
SearchDataResponseItemType
} from '@fastgpt/global/core/dataset/type.d';
import { PgClient } from '@fastgpt/service/common/pg'; import { PgClient } from '@fastgpt/service/common/pg';
import { getVectorsByText } from '@/service/core/ai/vector'; import { getVectorsByText } from '@/service/core/ai/vector';
import { delay } from '@/utils/tools'; import { delay } from '@/utils/tools';
@@ -8,6 +11,7 @@ import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema'; import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { POST } from '@fastgpt/service/common/api/plusRequest'; import { POST } from '@fastgpt/service/common/api/plusRequest';
import { PostReRankResponse } from '@fastgpt/global/core/ai/api'; import { PostReRankResponse } from '@fastgpt/global/core/ai/api';
import { jiebaSplit } from '../utils';
export async function insertData2Pg({ export async function insertData2Pg({
mongoDataId, mongoDataId,
@@ -125,39 +129,100 @@ export async function deletePgDataById(
}; };
} }
// search // ------------------ search start ------------------
export async function searchDatasetData({ type SearchProps = {
text,
model,
similarity = 0,
limit,
datasetIds = [],
rerank = false
}: {
text: string; text: string;
model: string; model: string;
similarity?: number; // min distance similarity?: number; // min distance
limit: number; limit: number;
datasetIds: string[]; datasetIds: string[];
rerank?: boolean; rerank?: boolean;
}) { };
export async function searchDatasetData(props: SearchProps) {
const { text, similarity = 0, limit, rerank = false } = props;
const [{ tokenLen, embeddingRecallResults }, { fullTextRecallResults }] = await Promise.all([
embeddingRecall({
...props,
limit: rerank ? Math.max(50, limit * 3) : limit * 2
}),
fullTextRecall({
...props,
limit: 40
})
]);
// concat recall result
let set = new Set<string>();
const concatRecallResults = embeddingRecallResults;
for (const item of fullTextRecallResults) {
if (!set.has(item.id)) {
concatRecallResults.push(item);
set.add(item.id);
}
}
// remove same q and a data
set = new Set<string>();
const filterSameDataResults = concatRecallResults.filter((item) => {
const str = `${item.q}${item.a}`.trim();
if (set.has(str)) return false;
set.add(str);
return true;
});
if (!rerank) {
return {
searchRes: filterSameDataResults.slice(0, limit),
tokenLen
};
}
// ReRank result
const reRankResults = await reRankSearchResult({
query: text,
data: filterSameDataResults
});
// similarity filter
const filterReRankResults = reRankResults.filter((item) => item.score > similarity);
// concat rerank and embedding data
set = new Set<string>(filterReRankResults.map((item) => item.id));
const concatResult = filterReRankResults.concat(
filterSameDataResults.filter((item) => {
if (set.has(item.id)) return false;
set.add(item.id);
return true;
})
);
return {
searchRes: concatResult.slice(0, limit),
tokenLen
};
}
export async function embeddingRecall({
text,
model,
similarity = 0,
limit,
datasetIds = [],
rerank = false
}: SearchProps) {
const { vectors, tokenLen } = await getVectorsByText({ const { vectors, tokenLen } = await getVectorsByText({
model, model,
input: [text] input: [text]
}); });
const minLimit = global.systemEnv.pluginBaseUrl ? Math.max(50, limit * 4) : limit * 2;
const results: any = await PgClient.query( const results: any = await PgClient.query(
`BEGIN; `BEGIN;
SET LOCAL hnsw.ef_search = ${global.systemEnv.pgHNSWEfSearch || 100}; SET LOCAL hnsw.ef_search = ${global.systemEnv.pgHNSWEfSearch || 100};
select id, collection_id, data_id, (vector <#> '[${ select id, collection_id, data_id, (vector <#> '[${vectors[0]}]') * -1 AS score
vectors[0] from ${PgDatasetTableName}
}]') * -1 AS score from ${PgDatasetTableName} where dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
where dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')}) AND vector <#> '[${ ${rerank ? '' : `AND vector <#> '[${vectors[0]}]' < -${similarity}`}
vectors[0] order by score desc limit ${limit};
}]' < -${similarity}
order by score desc limit ${minLimit};
COMMIT;` COMMIT;`
); );
@@ -212,47 +277,54 @@ export async function searchDatasetData({
}) })
.filter((item) => item !== null) as SearchDataResponseItemType[]; .filter((item) => item !== null) as SearchDataResponseItemType[];
// remove same q and a data
set = new Set<string>();
const filterData = formatResult.filter((item) => {
const str = `${item.q}${item.a}`.trim();
if (set.has(str)) return false;
set.add(str);
return true;
});
if (!rerank) {
return {
searchRes: filterData.slice(0, limit),
tokenLen
};
}
// ReRank result
const reRankResult = await reRankSearchResult({
query: text,
data: filterData
});
// similarity filter
const filterReRankResult = reRankResult.filter((item) => item.score > similarity);
// concat rerank and embedding data
set = new Set<string>(filterReRankResult.map((item) => item.id));
const concatResult = filterReRankResult.concat(
filterData.filter((item) => {
if (set.has(item.id)) return false;
set.add(item.id);
return true;
})
);
return { return {
searchRes: concatResult.slice(0, limit), embeddingRecallResults: formatResult,
tokenLen tokenLen
}; };
} }
export async function fullTextRecall({
text,
limit,
datasetIds = [],
rerank = false
}: SearchProps): Promise<{
fullTextRecallResults: SearchDataResponseItemType[];
tokenLen: number;
}> {
if (!rerank) {
return {
fullTextRecallResults: [],
tokenLen: 0
};
}
const result = (await MongoDatasetData.find(
{
datasetId: { $in: datasetIds.map((item) => item) },
$text: { $search: jiebaSplit({ text }) }
},
{ score: { $meta: 'textScore' } }
)
.sort({ score: { $meta: 'textScore' } })
.limit(limit)
.populate('collectionId')
.lean()) as DatasetDataWithCollectionType[];
return {
fullTextRecallResults: result.map((item) => ({
id: String(item._id),
datasetId: String(item.datasetId),
collectionId: String(item.collectionId._id),
sourceName: item.collectionId.name || '',
sourceId: item.collectionId.metadata?.fileId || item.collectionId.metadata?.rawLink,
q: item.q,
a: item.a,
indexes: item.indexes,
score: 1
})),
tokenLen: 0
};
}
// plus reRank search result // plus reRank search result
export async function reRankSearchResult({ export async function reRankSearchResult({
data, data,
@@ -279,7 +351,7 @@ export async function reRankSearchResult({
score: item.score ?? target.score score: item.score ?? target.score
}; };
}) })
.filter((item) => item) as SearchDataResponseItemType[]; .filter(Boolean) as SearchDataResponseItemType[];
return mergeResult; return mergeResult;
} catch (error) { } catch (error) {
@@ -288,3 +360,4 @@ export async function reRankSearchResult({
return data; return data;
} }
} }
// ------------------ search end ------------------

View File

@@ -0,0 +1,34 @@
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { cut, extract } from '@node-rs/jieba';
/**
* Same value judgment
*/
export async function hasSameValue({
collectionId,
q,
a = ''
}: {
collectionId: string;
q: string;
a?: string;
}) {
const count = await MongoDatasetData.countDocuments({
q,
a,
collectionId
});
if (count > 0) {
return Promise.reject('已经存在完全一致的数据');
}
}
export function jiebaSplit({ text }: { text: string }) {
const tokens = cut(text, true);
return tokens
.map((item) => item.replace(/[^\u4e00-\u9fa5a-zA-Z0-9\s]/g, '').trim())
.filter(Boolean)
.join(' ');
}

View File

@@ -13,8 +13,15 @@ import { getErrText } from '@fastgpt/global/common/error/utils';
import { authTeamBalance } from '../support/permission/auth/bill'; import { authTeamBalance } from '../support/permission/auth/bill';
import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api.d'; import type { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api.d';
const reduceQueue = () => { const reduceQueue = (retry = false) => {
global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0; global.qaQueueLen = global.qaQueueLen > 0 ? global.qaQueueLen - 1 : 0;
if (global.qaQueueLen === 0 && retry) {
setTimeout(() => {
generateQA();
}, 60000);
}
return global.vectorQueueLen === 0;
}; };
export async function generateQA(): Promise<any> { export async function generateQA(): Promise<any> {
@@ -32,7 +39,7 @@ export async function generateQA(): Promise<any> {
const data = await MongoDatasetTraining.findOneAndUpdate( const data = await MongoDatasetTraining.findOneAndUpdate(
{ {
mode: TrainingModeEnum.qa, mode: TrainingModeEnum.qa,
lockTime: { $lte: new Date(Date.now() - 10 * 60 * 1000) } lockTime: { $lte: new Date(Date.now() - 6 * 60 * 1000) }
}, },
{ {
lockTime: new Date() lockTime: new Date()
@@ -70,12 +77,13 @@ export async function generateQA(): Promise<any> {
} }
})(); })();
if (done) { if (done || !data) {
reduceQueue(); if (reduceQueue()) {
global.vectorQueueLen <= 0 && console.log(`【QA】Task Done`); console.log(`【QA】Task Done`);
}
return; return;
} }
if (error || !data) { if (error) {
reduceQueue(); reduceQueue();
return generateQA(); return generateQA();
} }
@@ -171,7 +179,7 @@ export async function generateQA(): Promise<any> {
reduceQueue(); reduceQueue();
generateQA(); generateQA();
} catch (err: any) { } catch (err: any) {
reduceQueue(); reduceQueue(true);
// log // log
if (err?.response) { if (err?.response) {
addLog.info('openai error: 生成QA错误', { addLog.info('openai error: 生成QA错误', {

View File

@@ -7,8 +7,16 @@ import { getErrText } from '@fastgpt/global/common/error/utils';
import { authTeamBalance } from '@/service/support/permission/auth/bill'; import { authTeamBalance } from '@/service/support/permission/auth/bill';
import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push'; import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push';
const reduceQueue = () => { const reduceQueue = (retry = false) => {
global.vectorQueueLen = global.vectorQueueLen > 0 ? global.vectorQueueLen - 1 : 0; global.vectorQueueLen = global.vectorQueueLen > 0 ? global.vectorQueueLen - 1 : 0;
if (global.vectorQueueLen === 0 && retry) {
setTimeout(() => {
generateVector();
}, 60000);
}
return global.vectorQueueLen === 0;
}; };
/* 索引生成队列。每导入一次,就是一个单独的线程 */ /* 索引生成队列。每导入一次,就是一个单独的线程 */
@@ -57,8 +65,8 @@ export async function generateVector(): Promise<any> {
return { return {
data, data,
dataItem: { dataItem: {
q: data.q.replace(/[\x00-\x08]/g, ' '), q: data.q,
a: data.a?.replace(/[\x00-\x08]/g, ' ') || '', a: data.a || '',
indexes: data.indexes indexes: data.indexes
} }
}; };
@@ -70,12 +78,13 @@ export async function generateVector(): Promise<any> {
} }
})(); })();
if (done) { if (done || !data) {
reduceQueue(); if (reduceQueue()) {
global.vectorQueueLen <= 0 && console.log(`【index】Task done`); console.log(`【index】Task done`);
}
return; return;
} }
if (error || !data) { if (error) {
reduceQueue(); reduceQueue();
return generateVector(); return generateVector();
} }
@@ -108,8 +117,15 @@ export async function generateVector(): Promise<any> {
} }
// create vector and insert // create vector and insert
try { try {
// invalid data
if (!data.q.trim()) {
await MongoDatasetTraining.findByIdAndDelete(data._id);
reduceQueue();
generateVector();
return;
}
// insert data to pg // insert data to pg
const { tokenLen } = await insertData2Dataset({ const { tokenLen } = await insertData2Dataset({
teamId: data.teamId, teamId: data.teamId,
@@ -135,7 +151,7 @@ export async function generateVector(): Promise<any> {
reduceQueue(); reduceQueue();
generateVector(); generateVector();
} catch (err: any) { } catch (err: any) {
reduceQueue(); reduceQueue(true);
// log // log
if (err?.response) { if (err?.response) {
addLog.info('openai error: 生成向量错误', { addLog.info('openai error: 生成向量错误', {

View File

@@ -2,7 +2,7 @@ const decoder = new TextDecoder();
export const parseStreamChunk = (value: BufferSource) => { export const parseStreamChunk = (value: BufferSource) => {
const chunk = decoder.decode(value); const chunk = decoder.decode(value);
const chunkLines = chunk.split('\n\n').filter((item) => item); const chunkLines = chunk.split('\n\n').filter(Boolean);
const chunkResponse = chunkLines.map((item) => { const chunkResponse = chunkLines.map((item) => {
const splitEvent = item.split('\n'); const splitEvent = item.split('\n');
if (splitEvent.length === 2) { if (splitEvent.length === 2) {

View File

@@ -3,12 +3,12 @@ import type { ParentTreePathItemType } from '@fastgpt/global/common/parentFolder
import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d'; import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d';
import type { import type {
DatasetUpdateParams, DatasetUpdateParams,
SearchTestProps,
GetDatasetCollectionsProps, GetDatasetCollectionsProps,
GetDatasetDataListProps, GetDatasetDataListProps,
CreateDatasetCollectionParams, CreateDatasetCollectionParams,
UpdateDatasetCollectionParams UpdateDatasetCollectionParams
} from '@/global/core/api/datasetReq.d'; } from '@/global/core/api/datasetReq.d';
import type { SearchTestProps, SearchTestResponse } from '@/global/core/dataset/api.d';
import type { import type {
PushDatasetDataProps, PushDatasetDataProps,
UpdateDatasetDataProps, UpdateDatasetDataProps,
@@ -21,8 +21,6 @@ import type {
SearchDataResponseItemType SearchDataResponseItemType
} from '@fastgpt/global/core/dataset/type'; } from '@fastgpt/global/core/dataset/type';
import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant'; import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant';
import { getToken } from '@/web/support/user/auth';
import download from 'downloadjs';
import type { DatasetDataItemType } from '@fastgpt/global/core/dataset/type'; import type { DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d'; import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d';
import { PagingData } from '@/types'; import { PagingData } from '@/types';
@@ -53,7 +51,7 @@ export const getCheckExportLimit = (datasetId: string) =>
/* =========== search test ============ */ /* =========== search test ============ */
export const postSearchText = (data: SearchTestProps) => export const postSearchText = (data: SearchTestProps) =>
POST<SearchDataResponseItemType[]>(`/core/dataset/searchTest`, data); POST<SearchTestResponse>(`/core/dataset/searchTest`, data);
/* ============================= collections ==================================== */ /* ============================= collections ==================================== */
export const getDatasetCollections = (data: GetDatasetCollectionsProps) => export const getDatasetCollections = (data: GetDatasetCollectionsProps) =>

View File

@@ -8,6 +8,7 @@ export type SearchTestStoreItemType = {
datasetId: string; datasetId: string;
text: string; text: string;
time: Date; time: Date;
duration: string;
results: SearchDataResponseItemType[]; results: SearchDataResponseItemType[];
}; };