Optimize the file storage structure of the knowledge base (#386)

This commit is contained in:
Archer
2023-10-10 22:41:05 +08:00
committed by GitHub
parent 29d152784f
commit d0041a98b4
41 changed files with 591 additions and 231 deletions

View File

@@ -179,8 +179,8 @@ export const insertData2Dataset = ({
values: data.map((item) => [
{ key: 'user_id', value: userId },
{ key: 'kb_id', value: kbId },
{ key: 'source', value: item.source?.slice(0, 60)?.trim() || '' },
{ key: 'file_id', value: item.file_id || '' },
{ key: 'source', value: item.source?.slice(0, 200)?.trim() || '' },
{ key: 'file_id', value: item.file_id?.slice(0, 200)?.trim() || '' },
{ key: 'q', value: item.q.replace(/'/g, '"') },
{ key: 'a', value: item.a.replace(/'/g, '"') },
{ key: 'vector', value: `[${item.vector}]` }
@@ -198,13 +198,14 @@ export async function initPg() {
vector VECTOR(1536) NOT NULL,
user_id VARCHAR(50) NOT NULL,
kb_id VARCHAR(50),
source VARCHAR(100),
file_id VARCHAR(100),
source VARCHAR(256),
file_id VARCHAR(256),
q TEXT NOT NULL,
a TEXT
);
CREATE INDEX IF NOT EXISTS modelData_userId_index ON ${PgDatasetTableName} USING HASH (user_id);
CREATE INDEX IF NOT EXISTS modelData_kbId_index ON ${PgDatasetTableName} USING HASH (kb_id);
CREATE INDEX IF NOT EXISTS modelData_kb_id_index ON ${PgDatasetTableName} (kb_id);
CREATE INDEX IF NOT EXISTS modelData_fileId_index ON ${PgDatasetTableName} (file_id);
CREATE INDEX IF NOT EXISTS idx_model_data_md5_q_a_user_id_kb_id ON ${PgDatasetTableName} (md5(q), md5(a), user_id, kb_id);
`);
console.log('init pg successful');