4.6.7-alpha commit (#743)

Co-authored-by: Archer <545436317@qq.com>
Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-01-19 11:17:28 +08:00
committed by GitHub
parent 8ee7407c4c
commit c031e6dcc9
324 changed files with 8509 additions and 4757 deletions

View File

@@ -1,20 +1,20 @@
import {
initPg,
insertDatasetDataVector,
updateDatasetDataVector,
deleteDatasetDataVector,
embeddingRecall,
getVectorDataByTime,
getVectorCountByTeamId
getVectorCountByTeamId,
checkDataExist
} from './controller';
export class PgVector {
constructor() {}
init = initPg;
insert = insertDatasetDataVector;
update = updateDatasetDataVector;
delete = deleteDatasetDataVector;
recall = embeddingRecall;
checkDataExist = checkDataExist;
getVectorCountByTeamId = getVectorCountByTeamId;
getVectorDataByTime = getVectorDataByTime;
}

View File

@@ -4,7 +4,7 @@ import { delay } from '@fastgpt/global/common/system/utils';
import { PgClient, connectPg } from './index';
import { PgSearchRawType } from '@fastgpt/global/core/dataset/api';
import { EmbeddingRecallItemType } from '../type';
import { DeleteDatasetVectorProps, EmbeddingRecallProps } from '../controller.d';
import { DeleteDatasetVectorProps, EmbeddingRecallProps, InsertVectorProps } from '../controller.d';
import dayjs from 'dayjs';
export async function initPg() {
@@ -16,11 +16,9 @@ export async function initPg() {
id BIGSERIAL PRIMARY KEY,
vector VECTOR(1536) NOT NULL,
team_id VARCHAR(50) NOT NULL,
tmb_id VARCHAR(50) NOT NULL,
dataset_id VARCHAR(50) NOT NULL,
collection_id VARCHAR(50) NOT NULL,
data_id VARCHAR(50) NOT NULL,
createTime TIMESTAMP DEFAULT CURRENT_TIMESTAMP
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
`);
@@ -34,26 +32,21 @@ export async function initPg() {
}
}
export const insertDatasetDataVector = async (props: {
teamId: string;
tmbId: string;
datasetId: string;
collectionId: string;
dataId: string;
vectors: number[][];
retry?: number;
}): Promise<{ insertId: string }> => {
const { dataId, teamId, tmbId, datasetId, collectionId, vectors, retry = 3 } = props;
export const insertDatasetDataVector = async (
props: InsertVectorProps & {
vectors: number[][];
retry?: number;
}
): Promise<{ insertId: string }> => {
const { teamId, datasetId, collectionId, vectors, retry = 3 } = props;
try {
const { rows } = await PgClient.insert(PgDatasetTableName, {
values: [
[
{ key: 'vector', value: `[${vectors[0]}]` },
{ key: 'team_id', value: String(teamId) },
{ key: 'tmb_id', value: String(tmbId) },
{ key: 'dataset_id', value: datasetId },
{ key: 'collection_id', value: collectionId },
{ key: 'data_id', value: String(dataId) }
{ key: 'collection_id', value: collectionId }
]
]
});
@@ -72,48 +65,33 @@ export const insertDatasetDataVector = async (props: {
}
};
export const updateDatasetDataVector = async (props: {
id: string;
vectors: number[][];
retry?: number;
}): Promise<void> => {
const { id, vectors, retry = 2 } = props;
try {
// update pg
await PgClient.update(PgDatasetTableName, {
where: [['id', id]],
values: [{ key: 'vector', value: `[${vectors[0]}]` }]
});
} catch (error) {
if (retry <= 0) {
return Promise.reject(error);
}
await delay(500);
return updateDatasetDataVector({
...props,
retry: retry - 1
});
}
};
export const deleteDatasetDataVector = async (
props: DeleteDatasetVectorProps & {
retry?: number;
}
): Promise<any> => {
const { id, datasetIds, collectionIds, collectionId, dataIds, retry = 2 } = props;
const { teamId, id, datasetIds, collectionIds, idList, retry = 2 } = props;
const teamIdWhere = `team_id='${String(teamId)}' AND`;
const where = await (() => {
if (id) return `id=${id}`;
if (datasetIds) return `dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})`;
if (collectionIds) {
return `collection_id IN (${collectionIds.map((id) => `'${String(id)}'`).join(',')})`;
}
if (collectionId && dataIds) {
return `collection_id='${String(collectionId)}' and data_id IN (${dataIds
if (id) return `${teamIdWhere} id=${id}`;
if (datasetIds) {
return `${teamIdWhere} dataset_id IN (${datasetIds
.map((id) => `'${String(id)}'`)
.join(',')})`;
}
if (collectionIds) {
return `${teamIdWhere} collection_id IN (${collectionIds
.map((id) => `'${String(id)}'`)
.join(',')})`;
}
if (idList) {
return `${teamIdWhere} id IN (${idList.map((id) => `'${String(id)}'`).join(',')})`;
}
return Promise.reject('deleteDatasetData: no where');
})();
@@ -142,13 +120,13 @@ export const embeddingRecall = async (
): Promise<{
results: EmbeddingRecallItemType[];
}> => {
const { vectors, limit, similarity = 0, datasetIds, retry = 2 } = props;
const { datasetIds, vectors, limit, similarity = 0, retry = 2 } = props;
try {
const results: any = await PgClient.query(
`BEGIN;
SET LOCAL hnsw.ef_search = ${global.systemEnv.pgHNSWEfSearch || 100};
select id, collection_id, data_id, (vector <#> '[${vectors[0]}]') * -1 AS score
select id, collection_id, (vector <#> '[${vectors[0]}]') * -1 AS score
from ${PgDatasetTableName}
where dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
AND vector <#> '[${vectors[0]}]' < -${similarity}
@@ -158,21 +136,10 @@ export const embeddingRecall = async (
const rows = results?.[2]?.rows as PgSearchRawType[];
// concat same data_id
const filterRows: PgSearchRawType[] = [];
let set = new Set<string>();
for (const row of rows) {
if (!set.has(row.data_id)) {
filterRows.push(row);
set.add(row.data_id);
}
}
return {
results: filterRows.map((item) => ({
results: rows.map((item) => ({
id: item.id,
collectionId: item.collection_id,
dataId: item.data_id,
score: item.score
}))
};
@@ -184,7 +151,11 @@ export const embeddingRecall = async (
}
};
// bill
export const checkDataExist = async (id: string) => {
const { rows } = await PgClient.query(`SELECT id FROM ${PgDatasetTableName} WHERE id=${id};`);
return rows.length > 0;
};
export const getVectorCountByTeamId = async (teamId: string) => {
const total = await PgClient.count(PgDatasetTableName, {
where: [['team_id', String(teamId)]]
@@ -193,15 +164,20 @@ export const getVectorCountByTeamId = async (teamId: string) => {
return total;
};
export const getVectorDataByTime = async (start: Date, end: Date) => {
const { rows } = await PgClient.query<{ id: string; data_id: string }>(`SELECT id, data_id
const { rows } = await PgClient.query<{
id: string;
team_id: string;
dataset_id: string;
}>(`SELECT id, team_id, dataset_id
FROM ${PgDatasetTableName}
WHERE createTime BETWEEN '${dayjs(start).format('YYYY-MM-DD')}' AND '${dayjs(end).format(
'YYYY-MM-DD 23:59:59'
WHERE createtime BETWEEN '${dayjs(start).format('YYYY-MM-DD HH:mm:ss')}' AND '${dayjs(end).format(
'YYYY-MM-DD HH:mm:ss'
)}';
`);
return rows.map((item) => ({
id: item.id,
dataId: item.data_id
datasetId: item.dataset_id,
teamId: item.team_id
}));
};