This commit is contained in:
Archer
2023-12-31 14:12:51 +08:00
committed by GitHub
parent ccca0468da
commit 9ccfda47b7
270 changed files with 8182 additions and 1295 deletions

View File

@@ -0,0 +1,73 @@
import { getAIApi } from '../config';
export type GetVectorProps = {
model: string;
input: string | string[];
};
// text to vector
export async function getVectorsByText({
model = 'text-embedding-ada-002',
input
}: GetVectorProps) {
if (typeof input === 'string' && !input) {
return Promise.reject({
code: 500,
message: 'input is empty'
});
} else if (Array.isArray(input)) {
for (let i = 0; i < input.length; i++) {
if (!input[i]) {
return Promise.reject({
code: 500,
message: 'input array is empty'
});
}
}
}
try {
// 获取 chatAPI
const ai = getAIApi();
// 把输入的内容转成向量
const result = await ai.embeddings
.create({
model,
input
})
.then(async (res) => {
if (!res.data) {
return Promise.reject('Embedding API 404');
}
if (!res?.data?.[0]?.embedding) {
console.log(res?.data);
// @ts-ignore
return Promise.reject(res.data?.err?.message || 'Embedding API Error');
}
return {
tokens: res.usage.total_tokens || 0,
vectors: await Promise.all(res.data.map((item) => unityDimensional(item.embedding)))
};
});
return result;
} catch (error) {
console.log(`Embedding Error`, error);
return Promise.reject(error);
}
}
function unityDimensional(vector: number[]) {
if (vector.length > 1536) {
console.log(`当前向量维度为: ${vector.length}, 向量维度不能超过 1536, 已自动截取前 1536 维度`);
return vector.slice(0, 1536);
}
let resultVector = vector;
const vectorLen = vector.length;
const zeroVector = new Array(1536 - vectorLen).fill(0);
return resultVector.concat(zeroVector);
}

View File

@@ -26,7 +26,8 @@ export async function createQuestionGuide({
});
const answer = data.choices?.[0]?.message?.content || '';
const totalTokens = data.usage?.total_tokens || 0;
const inputTokens = data.usage?.prompt_tokens || 0;
const outputTokens = data.usage?.completion_tokens || 0;
const start = answer.indexOf('[');
const end = answer.lastIndexOf(']');
@@ -34,7 +35,8 @@ export async function createQuestionGuide({
if (start === -1 || end === -1) {
return {
result: [],
tokens: totalTokens
inputTokens,
outputTokens
};
}
@@ -46,12 +48,14 @@ export async function createQuestionGuide({
try {
return {
result: JSON.parse(jsonStr),
tokens: totalTokens
inputTokens,
outputTokens
};
} catch (error) {
return {
result: [],
tokens: totalTokens
inputTokens,
outputTokens
};
}
}

View File

@@ -1,11 +1,11 @@
import { MongoDatasetData } from './schema';
import { deletePgDataById } from './pg';
import { MongoDatasetTraining } from '../training/schema';
import { delFileByFileIdList, delFileByMetadata } from '../../../common/file/gridfs/controller';
import { BucketNameEnum } from '@fastgpt/global/common/file/constants';
import { MongoDatasetCollection } from '../collection/schema';
import { delay } from '@fastgpt/global/common/system/utils';
import { delImgByFileIdList } from '../../../common/file/image/controller';
import { deleteDatasetDataVector } from '../../../common/vectorStore/controller';
/* delete all data by datasetIds */
export async function delDatasetRelevantData({ datasetIds }: { datasetIds: string[] }) {
@@ -21,7 +21,7 @@ export async function delDatasetRelevantData({ datasetIds }: { datasetIds: strin
// delete dataset.datas
await MongoDatasetData.deleteMany({ datasetId: { $in: datasetIds } });
// delete pg data
await deletePgDataById(`dataset_id IN ('${datasetIds.join("','")}')`);
await deleteDatasetDataVector({ datasetIds });
// delete collections
await MongoDatasetCollection.deleteMany({
@@ -56,7 +56,7 @@ export async function delCollectionRelevantData({
// delete dataset.datas
await MongoDatasetData.deleteMany({ collectionId: { $in: collectionIds } });
// delete pg data
await deletePgDataById(`collection_id IN ('${collectionIds.join("','")}')`);
await deleteDatasetDataVector({ collectionIds });
// delete collections
await MongoDatasetCollection.deleteMany({
@@ -76,6 +76,6 @@ export async function delCollectionRelevantData({
* delete one data by mongoDataId
*/
export async function delDatasetDataByDataId(mongoDataId: string) {
await deletePgDataById(['data_id', mongoDataId]);
await deleteDatasetDataVector({ dataIds: [mongoDataId] });
await MongoDatasetData.findByIdAndDelete(mongoDataId);
}

View File

@@ -1,28 +0,0 @@
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
import { delay } from '@fastgpt/global/common/system/utils';
import { PgClient } from '../../../common/pg';
export async function deletePgDataById(
where: ['id' | 'dataset_id' | 'collection_id' | 'data_id', string] | string
) {
let retry = 2;
async function deleteData(): Promise<any> {
try {
await PgClient.delete(PgDatasetTableName, {
where: [where]
});
} catch (error) {
if (--retry < 0) {
return Promise.reject(error);
}
await delay(500);
return deleteData();
}
}
await deleteData();
return {
tokenLen: 0
};
}

View File

@@ -85,7 +85,6 @@ const DatasetDataSchema = new Schema({
});
try {
DatasetDataSchema.index({ teamId: 1 });
DatasetDataSchema.index({ datasetId: 1 });
DatasetDataSchema.index({ collectionId: 1 });
DatasetDataSchema.index({ updateTime: -1 });