From 14895bbcfd0d9a1496380e08048cadb393477f2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AF=B8=E5=B2=B3?= Date: Thu, 27 Mar 2025 18:39:49 +0800 Subject: [PATCH] feat: vector store support oceanbase (#4356) * feat: vector store support oceanbase * chore(config): Rename pgHNSWEfSearch to hnswEfSearch to work for pg and oceanbase both --- .../fastgpt/templates/configmap-config.yaml | 2 +- .../zh-cn/docs/development/configuration.md | 2 +- .../content/zh-cn/docs/development/intro.md | 2 +- .../docs/development/modelConfig/intro.md | 2 +- .../global/common/system/types/index.d.ts | 2 +- .../service/common/vectorStore/constants.ts | 1 + .../service/common/vectorStore/controller.ts | 4 +- .../common/vectorStore/oceanbase/class.ts | 254 ++++++++++++++++++ .../common/vectorStore/oceanbase/index.ts | 173 ++++++++++++ .../service/common/vectorStore/pg/class.ts | 2 +- packages/service/common/vectorStore/type.d.ts | 2 + packages/service/package.json | 1 + pnpm-lock.yaml | 9 +- projects/app/.env.template | 4 +- projects/app/data/config.json | 2 +- projects/app/data/model.json | 2 +- 16 files changed, 451 insertions(+), 13 deletions(-) create mode 100644 packages/service/common/vectorStore/oceanbase/class.ts create mode 100644 packages/service/common/vectorStore/oceanbase/index.ts diff --git a/deploy/helm/fastgpt/templates/configmap-config.yaml b/deploy/helm/fastgpt/templates/configmap-config.yaml index 4a760d560..07e3d3668 100644 --- a/deploy/helm/fastgpt/templates/configmap-config.yaml +++ b/deploy/helm/fastgpt/templates/configmap-config.yaml @@ -7,7 +7,7 @@ data: "vectorMaxProcess": 15, "qaMaxProcess": 15, "vlmMaxProcess": 15, - "pgHNSWEfSearch": 100 + "hnswEfSearch": 100 }, "llmModels": [ { diff --git a/docSite/content/zh-cn/docs/development/configuration.md b/docSite/content/zh-cn/docs/development/configuration.md index af6cf5344..22da35cde 100644 --- a/docSite/content/zh-cn/docs/development/configuration.md +++ b/docSite/content/zh-cn/docs/development/configuration.md @@ -25,7 +25,7 @@ weight: 707 "qaMaxProcess": 15, // 问答拆分线程数量 "vlmMaxProcess": 15, // 图片理解模型最大处理进程 "tokenWorkers": 50, // Token 计算线程保持数,会持续占用内存,不能设置太大。 - "pgHNSWEfSearch": 100, // 向量搜索参数。越大,搜索越精确,但是速度越慢。设置为100,有99%+精度。 + "hnswEfSearch": 100, // 向量搜索参数,仅对 PG 和 OB 生效。越大,搜索越精确,但是速度越慢。设置为100,有99%+精度。 "customPdfParse": { // 4.9.0 新增配置 "url": "", // 自定义 PDF 解析服务地址 "key": "", // 自定义 PDF 解析服务密钥 diff --git a/docSite/content/zh-cn/docs/development/intro.md b/docSite/content/zh-cn/docs/development/intro.md index d1ec79ba0..1050f3d76 100644 --- a/docSite/content/zh-cn/docs/development/intro.md +++ b/docSite/content/zh-cn/docs/development/intro.md @@ -71,7 +71,7 @@ Mongo 数据库需要注意,需要注意在连接地址中增加 `directConnec - `vectorMaxProcess`: 向量生成最大进程,根据数据库和 key 的并发数来决定,通常单个 120 号,2c4g 服务器设置 10~15。 - `qaMaxProcess`: QA 生成最大进程 - `vlmMaxProcess`: 图片理解模型最大进程 -- `pgHNSWEfSearch`: PostgreSQL vector 索引参数,越大搜索精度越高但是速度越慢,具体可看 pgvector 官方说明。 +- `hnswEfSearch`: 向量搜索参数,仅对 PG 和 OB 生效,越大搜索精度越高但是速度越慢。 ### 5. 运行 diff --git a/docSite/content/zh-cn/docs/development/modelConfig/intro.md b/docSite/content/zh-cn/docs/development/modelConfig/intro.md index 29481a489..a86af3af8 100644 --- a/docSite/content/zh-cn/docs/development/modelConfig/intro.md +++ b/docSite/content/zh-cn/docs/development/modelConfig/intro.md @@ -302,7 +302,7 @@ OneAPI 的语言识别接口,无法正确的识别其他模型(会始终识 "vectorMaxProcess": 15, // 向量处理线程数量 "qaMaxProcess": 15, // 问答拆分线程数量 "tokenWorkers": 50, // Token 计算线程保持数,会持续占用内存,不能设置太大。 - "pgHNSWEfSearch": 100 // 向量搜索参数。越大,搜索越精确,但是速度越慢。设置为100,有99%+精度。 + "hnswEfSearch": 100 // 向量搜索参数,仅对 PG 和 OB 生效。越大,搜索越精确,但是速度越慢。设置为100,有99%+精度。 }, "llmModels": [ { diff --git a/packages/global/common/system/types/index.d.ts b/packages/global/common/system/types/index.d.ts index ac30d92e3..eb989da5d 100644 --- a/packages/global/common/system/types/index.d.ts +++ b/packages/global/common/system/types/index.d.ts @@ -112,7 +112,7 @@ export type SystemEnvType = { vectorMaxProcess: number; qaMaxProcess: number; vlmMaxProcess: number; - pgHNSWEfSearch: number; + hnswEfSearch: number; tokenWorkers: number; // token count max worker oneapiUrl?: string; diff --git a/packages/service/common/vectorStore/constants.ts b/packages/service/common/vectorStore/constants.ts index 5b9206eb5..8476cf6c1 100644 --- a/packages/service/common/vectorStore/constants.ts +++ b/packages/service/common/vectorStore/constants.ts @@ -2,5 +2,6 @@ export const DatasetVectorDbName = 'fastgpt'; export const DatasetVectorTableName = 'modeldata'; export const PG_ADDRESS = process.env.PG_URL; +export const OCEANBASE_ADDRESS = process.env.OCEANBASE_URL; export const MILVUS_ADDRESS = process.env.MILVUS_ADDRESS; export const MILVUS_TOKEN = process.env.MILVUS_TOKEN; diff --git a/packages/service/common/vectorStore/controller.ts b/packages/service/common/vectorStore/controller.ts index dae18b484..fd3b1521c 100644 --- a/packages/service/common/vectorStore/controller.ts +++ b/packages/service/common/vectorStore/controller.ts @@ -1,13 +1,15 @@ /* vector crud */ import { PgVectorCtrl } from './pg/class'; +import { ObVectorCtrl } from './oceanbase/class'; import { getVectorsByText } from '../../core/ai/embedding'; import { InsertVectorProps } from './controller.d'; import { EmbeddingModelItemType } from '@fastgpt/global/core/ai/model.d'; -import { MILVUS_ADDRESS, PG_ADDRESS } from './constants'; +import { MILVUS_ADDRESS, PG_ADDRESS, OCEANBASE_ADDRESS } from './constants'; import { MilvusCtrl } from './milvus/class'; const getVectorObj = () => { if (PG_ADDRESS) return new PgVectorCtrl(); + if (OCEANBASE_ADDRESS) return new ObVectorCtrl(); if (MILVUS_ADDRESS) return new MilvusCtrl(); return new PgVectorCtrl(); diff --git a/packages/service/common/vectorStore/oceanbase/class.ts b/packages/service/common/vectorStore/oceanbase/class.ts new file mode 100644 index 000000000..5d2e196a8 --- /dev/null +++ b/packages/service/common/vectorStore/oceanbase/class.ts @@ -0,0 +1,254 @@ +/* oceanbase vector crud */ +import { DatasetVectorTableName } from '../constants'; +import { delay } from '@fastgpt/global/common/system/utils'; +import { ObClient } from './index'; +import { RowDataPacket, ResultSetHeader } from 'mysql2/promise'; +import { + DelDatasetVectorCtrlProps, + EmbeddingRecallCtrlProps, + EmbeddingRecallResponse, + InsertVectorControllerProps +} from '../controller.d'; +import dayjs from 'dayjs'; +import { addLog } from '../../system/log'; + +export class ObVectorCtrl { + constructor() {} + init = async () => { + try { + await ObClient.query(` + CREATE TABLE IF NOT EXISTS ${DatasetVectorTableName} ( + id BIGINT AUTO_INCREMENT PRIMARY KEY, + vector VECTOR(1536) NOT NULL, + team_id VARCHAR(50) NOT NULL, + dataset_id VARCHAR(50) NOT NULL, + collection_id VARCHAR(50) NOT NULL, + createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ); + `); + + await ObClient.query( + `CREATE VECTOR INDEX IF NOT EXISTS vector_index ON ${DatasetVectorTableName}(vector) WITH (distance=inner_product, type=hnsw, m=32, ef_construction=128);` + ); + await ObClient.query( + `CREATE INDEX IF NOT EXISTS team_dataset_collection_index ON ${DatasetVectorTableName}(team_id, dataset_id, collection_id);` + ); + await ObClient.query( + `CREATE INDEX IF NOT EXISTS create_time_index ON ${DatasetVectorTableName}(createtime);` + ); + + addLog.info('init oceanbase successful'); + } catch (error) { + addLog.error('init oceanbase error', error); + } + }; + insert = async (props: InsertVectorControllerProps): Promise<{ insertId: string }> => { + const { teamId, datasetId, collectionId, vector, retry = 3 } = props; + + try { + const { rowCount, rows } = await ObClient.insert(DatasetVectorTableName, { + values: [ + [ + { key: 'vector', value: `[${vector}]` }, + { key: 'team_id', value: String(teamId) }, + { key: 'dataset_id', value: String(datasetId) }, + { key: 'collection_id', value: String(collectionId) } + ] + ] + }); + + if (rowCount === 0) { + return Promise.reject('insertDatasetData: no insert'); + } + + return { + insertId: rows[0].id + }; + } catch (error) { + if (retry <= 0) { + return Promise.reject(error); + } + await delay(500); + return this.insert({ + ...props, + retry: retry - 1 + }); + } + }; + delete = async (props: DelDatasetVectorCtrlProps): Promise => { + const { teamId, retry = 2 } = props; + + const teamIdWhere = `team_id='${String(teamId)}' AND`; + + const where = await (() => { + if ('id' in props && props.id) return `${teamIdWhere} id=${props.id}`; + + if ('datasetIds' in props && props.datasetIds) { + const datasetIdWhere = `dataset_id IN (${props.datasetIds + .map((id) => `'${String(id)}'`) + .join(',')})`; + + if ('collectionIds' in props && props.collectionIds) { + return `${teamIdWhere} ${datasetIdWhere} AND collection_id IN (${props.collectionIds + .map((id) => `'${String(id)}'`) + .join(',')})`; + } + + return `${teamIdWhere} ${datasetIdWhere}`; + } + + if ('idList' in props && Array.isArray(props.idList)) { + if (props.idList.length === 0) return; + return `${teamIdWhere} id IN (${props.idList.map((id) => String(id)).join(',')})`; + } + return Promise.reject('deleteDatasetData: no where'); + })(); + + if (!where) return; + + try { + await ObClient.delete(DatasetVectorTableName, { + where: [where] + }); + } catch (error) { + if (retry <= 0) { + return Promise.reject(error); + } + await delay(500); + return this.delete({ + ...props, + retry: retry - 1 + }); + } + }; + embRecall = async (props: EmbeddingRecallCtrlProps): Promise => { + const { + teamId, + datasetIds, + vector, + limit, + forbidCollectionIdList, + filterCollectionIdList, + retry = 2 + } = props; + + // Get forbid collection + const formatForbidCollectionIdList = (() => { + if (!filterCollectionIdList) return forbidCollectionIdList; + const list = forbidCollectionIdList + .map((id) => String(id)) + .filter((id) => !filterCollectionIdList.includes(id)); + return list; + })(); + const forbidCollectionSql = + formatForbidCollectionIdList.length > 0 + ? `AND collection_id NOT IN (${formatForbidCollectionIdList.map((id) => `'${id}'`).join(',')})` + : ''; + + // Filter by collectionId + const formatFilterCollectionId = (() => { + if (!filterCollectionIdList) return; + + return filterCollectionIdList + .map((id) => String(id)) + .filter((id) => !forbidCollectionIdList.includes(id)); + })(); + const filterCollectionIdSql = formatFilterCollectionId + ? `AND collection_id IN (${formatFilterCollectionId.map((id) => `'${id}'`).join(',')})` + : ''; + // Empty data + if (formatFilterCollectionId && formatFilterCollectionId.length === 0) { + return { results: [] }; + } + + try { + const rows = await ObClient.query< + ({ + id: string; + collection_id: string; + score: number; + } & RowDataPacket)[][] + >( + `BEGIN; + SET ob_hnsw_ef_search = ${global.systemEnv?.hnswEfSearch || 100}; + SELECT id, collection_id, inner_product(vector, [${vector}]) AS score + FROM ${DatasetVectorTableName} + WHERE team_id='${teamId}' + AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')}) + ${filterCollectionIdSql} + ${forbidCollectionSql} + ORDER BY score desc APPROXIMATE LIMIT ${limit}; + COMMIT;` + ).then(([rows]) => rows[2]); + + return { + results: rows.map((item) => ({ + id: String(item.id), + collectionId: item.collection_id, + score: item.score + })) + }; + } catch (error) { + if (retry <= 0) { + return Promise.reject(error); + } + return this.embRecall({ + ...props, + retry: retry - 1 + }); + } + }; + getVectorDataByTime = async (start: Date, end: Date) => { + const rows = await ObClient.query< + ({ + id: string; + team_id: string; + dataset_id: string; + } & RowDataPacket)[] + >( + `SELECT id, team_id, dataset_id + FROM ${DatasetVectorTableName} + WHERE createtime BETWEEN '${dayjs(start).format('YYYY-MM-DD HH:mm:ss')}' AND '${dayjs( + end + ).format('YYYY-MM-DD HH:mm:ss')}'; + ` + ).then(([rows]) => rows); + + return rows.map((item) => ({ + id: String(item.id), + teamId: item.team_id, + datasetId: item.dataset_id + })); + }; + getVectorCountByTeamId = async (teamId: string) => { + const total = await ObClient.count(DatasetVectorTableName, { + where: [['team_id', String(teamId)]] + }); + + return total; + }; + getVectorCountByDatasetId = async (teamId: string, datasetId: string) => { + const total = await ObClient.count(DatasetVectorTableName, { + where: [['team_id', String(teamId)], 'and', ['dataset_id', String(datasetId)]] + }); + + return total; + }; + getVectorCountByCollectionId = async ( + teamId: string, + datasetId: string, + collectionId: string + ) => { + const total = await ObClient.count(DatasetVectorTableName, { + where: [ + ['team_id', String(teamId)], + 'and', + ['dataset_id', String(datasetId)], + 'and', + ['collection_id', String(collectionId)] + ] + }); + + return total; + }; +} diff --git a/packages/service/common/vectorStore/oceanbase/index.ts b/packages/service/common/vectorStore/oceanbase/index.ts new file mode 100644 index 000000000..abc68c405 --- /dev/null +++ b/packages/service/common/vectorStore/oceanbase/index.ts @@ -0,0 +1,173 @@ +import mysql, { Pool, QueryResult, RowDataPacket, ResultSetHeader } from 'mysql2/promise'; +import { addLog } from '../../system/log'; +import { OCEANBASE_ADDRESS } from '../constants'; + +export const getClient = async (): Promise => { + if (!OCEANBASE_ADDRESS) { + return Promise.reject('OCEANBASE_ADDRESS is not set'); + } + + if (global.obClient) { + return global.obClient; + } + + global.obClient = mysql.createPool({ + uri: OCEANBASE_ADDRESS, + waitForConnections: true, + connectionLimit: Number(process.env.DB_MAX_LINK || 20), + connectTimeout: 20000, + idleTimeout: 60000, + queueLimit: 0, + enableKeepAlive: true, + keepAliveInitialDelay: 0 + }); + + addLog.info(`oceanbase connected`); + + return global.obClient; +}; + +type WhereProps = (string | [string, string | number])[]; +type GetProps = { + fields?: string[]; + where?: WhereProps; + order?: { field: string; mode: 'DESC' | 'ASC' | string }[]; + limit?: number; + offset?: number; +}; + +type DeleteProps = { + where: WhereProps; +}; + +type ValuesProps = { key: string; value?: string | number }[]; +type UpdateProps = { + values: ValuesProps; + where: WhereProps; +}; +type InsertProps = { + values: ValuesProps[]; +}; + +class ObClass { + private getWhereStr(where?: WhereProps) { + return where + ? `WHERE ${where + .map((item) => { + if (typeof item === 'string') { + return item; + } + const val = typeof item[1] === 'number' ? item[1] : `'${String(item[1])}'`; + return `${item[0]}=${val}`; + }) + .join(' ')}` + : ''; + } + private getUpdateValStr(values: ValuesProps) { + return values + .map((item) => { + const val = + typeof item.value === 'number' + ? item.value + : `'${String(item.value).replace(/\'/g, '"')}'`; + + return `${item.key}=${val}`; + }) + .join(','); + } + private getInsertValStr(values: ValuesProps[]) { + return values + .map( + (items) => + `(${items + .map((item) => + typeof item.value === 'number' + ? item.value + : `'${String(item.value).replace(/\'/g, '"')}'` + ) + .join(',')})` + ) + .join(','); + } + async select(table: string, props: GetProps) { + const sql = `SELECT ${ + !props.fields || props.fields?.length === 0 ? '*' : props.fields?.join(',') + } + FROM ${table} + ${this.getWhereStr(props.where)} + ${ + props.order + ? `ORDER BY ${props.order.map((item) => `${item.field} ${item.mode}`).join(',')}` + : '' + } + LIMIT ${props.limit || 10} OFFSET ${props.offset || 0} + `; + + const client = await getClient(); + return client.query(sql); + } + async count(table: string, props: GetProps) { + const sql = `SELECT COUNT(${props?.fields?.[0] || '*'}) + FROM ${table} + ${this.getWhereStr(props.where)} + `; + + const client = await getClient(); + return client + .query<({ count: number } & RowDataPacket)[]>(sql) + .then(([rows]) => Number(rows[0]?.count || 0)); + } + async delete(table: string, props: DeleteProps) { + const sql = `DELETE FROM ${table} ${this.getWhereStr(props.where)}`; + const client = await getClient(); + return client.query(sql); + } + async update(table: string, props: UpdateProps) { + if (props.values.length === 0) { + return { + rowCount: 0 + }; + } + + const sql = `UPDATE ${table} SET ${this.getUpdateValStr(props.values)} ${this.getWhereStr( + props.where + )}`; + const client = await getClient(); + return client.query(sql); + } + async insert(table: string, props: InsertProps) { + if (props.values.length === 0) { + return { + rowCount: 0, + rows: [] + }; + } + + const fields = props.values[0].map((item) => item.key).join(','); + const sql = `INSERT INTO ${table} (${fields}) VALUES ${this.getInsertValStr(props.values)}`; + + const client = await getClient(); + return client.query(sql).then(([result]) => { + return { + rowCount: result.affectedRows, + rows: [{ id: String(result.insertId) }] + }; + }); + } + async query(sql: string) { + const client = await getClient(); + const start = Date.now(); + return client.query(sql).then((res) => { + const time = Date.now() - start; + + if (time > 300) { + addLog.warn(`oceanbase query time: ${time}ms, sql: ${sql}`); + } + + return res; + }); + } +} + +export const ObClient = new ObClass(); +export const Oceanbase = global.obClient; diff --git a/packages/service/common/vectorStore/pg/class.ts b/packages/service/common/vectorStore/pg/class.ts index 26db89074..39287630a 100644 --- a/packages/service/common/vectorStore/pg/class.ts +++ b/packages/service/common/vectorStore/pg/class.ts @@ -187,7 +187,7 @@ export class PgVectorCtrl { try { const results: any = await PgClient.query( `BEGIN; - SET LOCAL hnsw.ef_search = ${global.systemEnv?.pgHNSWEfSearch || 100}; + SET LOCAL hnsw.ef_search = ${global.systemEnv?.hnswEfSearch || 100}; SET LOCAL hnsw.iterative_scan = relaxed_order; WITH relaxed_results AS MATERIALIZED ( select id, collection_id, vector <#> '[${vector}]' AS score diff --git a/packages/service/common/vectorStore/type.d.ts b/packages/service/common/vectorStore/type.d.ts index 2ccc1f4a6..5fa853e3d 100644 --- a/packages/service/common/vectorStore/type.d.ts +++ b/packages/service/common/vectorStore/type.d.ts @@ -1,8 +1,10 @@ import type { Pool } from 'pg'; +import { Pool as MysqlPool } from 'mysql2/promise'; import { MilvusClient } from '@zilliz/milvus2-sdk-node'; declare global { var pgClient: Pool | null; + var obClient: MysqlPool | null; var milvusClient: MilvusClient | null; } diff --git a/packages/service/package.json b/packages/service/package.json index f6ce26326..8bf1784ed 100644 --- a/packages/service/package.json +++ b/packages/service/package.json @@ -26,6 +26,7 @@ "mammoth": "^1.6.0", "mongoose": "^8.10.1", "multer": "1.4.5-lts.1", + "mysql2": "^3.11.3", "next": "14.2.25", "nextjs-cors": "^2.2.0", "node-cron": "^3.0.3", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 8a5e5898a..03cf11bbb 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -220,6 +220,9 @@ importers: multer: specifier: 1.4.5-lts.1 version: 1.4.5-lts.1 + mysql2: + specifier: ^3.11.3 + version: 3.13.0 next: specifier: 14.2.25 version: 14.2.25(@babel/core@7.26.10)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)(sass@1.85.1) @@ -14607,7 +14610,7 @@ snapshots: eslint: 8.56.0 eslint-import-resolver-node: 0.3.9 eslint-import-resolver-typescript: 3.9.0(eslint-plugin-import@2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint@8.56.0))(eslint@8.56.0) - eslint-plugin-import: 2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint-import-resolver-typescript@3.9.0(eslint-plugin-import@2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint@8.56.0))(eslint@8.56.0))(eslint@8.56.0) + eslint-plugin-import: 2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint-import-resolver-typescript@3.9.0)(eslint@8.56.0) eslint-plugin-jsx-a11y: 6.10.2(eslint@8.56.0) eslint-plugin-react: 7.37.4(eslint@8.56.0) eslint-plugin-react-hooks: 5.0.0-canary-7118f5dd7-20230705(eslint@8.56.0) @@ -14637,7 +14640,7 @@ snapshots: stable-hash: 0.0.5 tinyglobby: 0.2.12 optionalDependencies: - eslint-plugin-import: 2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint-import-resolver-typescript@3.9.0(eslint-plugin-import@2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint@8.56.0))(eslint@8.56.0))(eslint@8.56.0) + eslint-plugin-import: 2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint-import-resolver-typescript@3.9.0)(eslint@8.56.0) transitivePeerDependencies: - supports-color @@ -14652,7 +14655,7 @@ snapshots: transitivePeerDependencies: - supports-color - eslint-plugin-import@2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint-import-resolver-typescript@3.9.0(eslint-plugin-import@2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint@8.56.0))(eslint@8.56.0))(eslint@8.56.0): + eslint-plugin-import@2.31.0(@typescript-eslint/parser@6.21.0(eslint@8.56.0)(typescript@5.8.2))(eslint-import-resolver-typescript@3.9.0)(eslint@8.56.0): dependencies: '@rtsao/scc': 1.1.0 array-includes: 3.1.8 diff --git a/projects/app/.env.template b/projects/app/.env.template index d321bc86a..297b19b31 100644 --- a/projects/app/.env.template +++ b/projects/app/.env.template @@ -23,9 +23,11 @@ MULTIPLE_DATA_TO_BASE64=true # mongo 数据库连接参数,本地开发连接远程数据库时,可能需要增加 directConnection=true 参数,才能连接上。 MONGODB_URI=mongodb://username:password@0.0.0.0:27017/fastgpt?authSource=admin -# 向量库优先级: pg > milvus +# 向量库优先级: pg > oceanbase > milvus # PG 向量库连接参数 PG_URL=postgresql://username:password@host:port/postgres +# OceanBase 向量库连接参数 +OCEANBASE_URL= # milvus 向量库连接参数 MILVUS_ADDRESS= MILVUS_TOKEN= diff --git a/projects/app/data/config.json b/projects/app/data/config.json index 0b9e7a598..78c55c8f4 100644 --- a/projects/app/data/config.json +++ b/projects/app/data/config.json @@ -8,7 +8,7 @@ "qaMaxProcess": 10, // 问答拆分线程数量 "vlmMaxProcess": 10, // 图片理解模型最大处理进程 "tokenWorkers": 30, // Token 计算线程保持数,会持续占用内存,不能设置太大。 - "pgHNSWEfSearch": 100, // 向量搜索参数。越大,搜索越精确,但是速度越慢。设置为100,有99%+精度。 + "hnswEfSearch": 100, // 向量搜索参数,仅对 PG 和 OB 生效。越大,搜索越精确,但是速度越慢。设置为100,有99%+精度。 "customPdfParse": { "url": "", // 自定义 PDF 解析服务地址 "key": "", // 自定义 PDF 解析服务密钥 diff --git a/projects/app/data/model.json b/projects/app/data/model.json index 2bca347e8..774ba60c5 100644 --- a/projects/app/data/model.json +++ b/projects/app/data/model.json @@ -7,7 +7,7 @@ "vectorMaxProcess": 15, // 向量处理线程数量 "qaMaxProcess": 15, // 问答拆分线程数量 "tokenWorkers": 30, // Token 计算线程保持数,会持续占用内存,不能设置太大。 - "pgHNSWEfSearch": 100 // 向量搜索参数。越大,搜索越精确,但是速度越慢。设置为100,有99%+精度。 + "hnswEfSearch": 100 // 向量搜索参数,仅对 PG 和 OB 生效。越大,搜索越精确,但是速度越慢。设置为100,有99%+精度。 }, "llmModels": [ {