perf: bill

2025-08-01 11:58:38 +00:00 · 2023-07-13 22:53:44 +08:00
parent 726de0396b
commit f3715731c4
67 changed files with 915 additions and 1254 deletions
--- a/client/src/pages/api/chat/chatTest.ts
+++ b/client/src/pages/api/chat/chatTest.ts
@@ -8,6 +8,8 @@ import { type ChatCompletionRequestMessage } from 'openai';
 import { AppModuleItemType } from '@/types/app';
 import { dispatchModules } from '../openapi/v1/chat/completions';
 import { gptMessage2ChatType } from '@/utils/adapt';
+import { createTaskBill, delTaskBill, finishTaskBill } from '@/service/events/pushBill';
+import { BillSourceEnum } from '@/constants/user';

 export type MessageItemType = ChatCompletionRequestMessage & { _id?: string };
 export type Props = {
@@ -15,10 +17,8 @@ export type Props = {
  prompt: string;
  modules: AppModuleItemType[];
  variables: Record<string, any>;
-};
-export type ChatResponseType = {
-  newChatId: string;
-  quoteLen?: number;
+  appId: string;
+  appName: string;
 };

 export default async function handler(req: NextApiRequest, res: NextApiResponse) {
@@ -30,8 +30,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
    res.end();
  });

-  let { modules = [], history = [], prompt, variables = {} } = req.body as Props;
-
+  let { modules = [], history = [], prompt, variables = {}, appName, appId } = req.body as Props;
+  let billId = '';
  try {
    if (!history || !modules || !prompt) {
      throw new Error('Prams Error');
@@ -45,6 +45,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
    /* user auth */
    const { userId } = await authUser({ req });

+    billId = await createTaskBill({
+      userId,
+      appName,
+      appId,
+      source: BillSourceEnum.fastgpt
+    });
+
    /* start process */
    const { responseData } = await dispatchModules({
      res,
@@ -54,7 +61,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
        history: gptMessage2ChatType(history),
        userChatInput: prompt
      },
-      stream: true
+      stream: true,
+      billId
    });

    sseResponse({
@@ -70,7 +78,11 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
    res.end();

    // bill
+    finishTaskBill({
+      billId
+    });
  } catch (err: any) {
+    delTaskBill(billId);
    res.status(500);
    sseErrRes(res, err);
    res.end();
--- a/client/src/pages/api/chat/saveChat.ts
+++ b/client/src/pages/api/chat/saveChat.ts
@@ -53,14 +53,6 @@ export async function saveChat({
  await connectToDatabase();
  const { app } = await authApp({ appId, userId, authOwner: false });

-  const content = prompts.map((item) => ({
-    _id: item._id,
-    obj: item.obj,
-    value: item.value,
-    systemPrompt: item.systemPrompt || '',
-    quote: item.quote || []
-  }));
-
  if (String(app.userId) === userId) {
    await App.findByIdAndUpdate(appId, {
      updateTime: new Date()
@@ -73,12 +65,11 @@ export async function saveChat({
          Chat.findByIdAndUpdate(historyId, {
            $push: {
              content: {
-                $each: content
+                $each: prompts
              }
            },
            variables,
-            title: content[0].value.slice(0, 20),
-            latestChat: content[1].value,
+            title: prompts[0].value.slice(0, 20),
            updateTime: new Date()
          }).then(() => ({
            newHistoryId: ''
@@ -90,9 +81,8 @@ export async function saveChat({
            userId,
            appId,
            variables,
-            content,
-            title: content[0].value.slice(0, 20),
-            latestChat: content[1].value
+            content: prompts,
+            title: prompts[0].value.slice(0, 20)
          }).then((res) => ({
            newHistoryId: String(res._id)
          }))
--- a/client/src/pages/api/openapi/kb/appKbSearch.ts
+++ b/client/src/pages/api/openapi/kb/appKbSearch.ts
@@ -1,186 +0,0 @@
-import type { NextApiRequest, NextApiResponse } from 'next';
-import { jsonRes } from '@/service/response';
-import { authUser } from '@/service/utils/auth';
-import { PgClient } from '@/service/pg';
-import { withNextCors } from '@/service/utils/tools';
-import type { ChatItemType } from '@/types/chat';
-import type { AppSchema } from '@/types/mongoSchema';
-import { authApp } from '@/service/utils/auth';
-import { ChatModelMap } from '@/constants/model';
-import { ChatRoleEnum } from '@/constants/chat';
-import { openaiEmbedding } from '../plugin/openaiEmbedding';
-import { modelToolMap } from '@/utils/plugin';
-
-export type QuoteItemType = {
-  id: string;
-  q: string;
-  a: string;
-  source?: string;
-};
-type Props = {
-  prompts: ChatItemType[];
-  similarity: number;
-  limit: number;
-  appId: string;
-};
-type Response = {
-  rawSearch: QuoteItemType[];
-  userSystemPrompt: {
-    obj: ChatRoleEnum;
-    value: string;
-  }[];
-  userLimitPrompt: {
-    obj: ChatRoleEnum;
-    value: string;
-  }[];
-  quotePrompt: {
-    obj: ChatRoleEnum;
-    value: string;
-  };
-};
-
-export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
-  try {
-    const { userId } = await authUser({ req });
-
-    if (!userId) {
-      throw new Error('userId is empty');
-    }
-
-    const { prompts, similarity, limit, appId } = req.body as Props;
-
-    if (!similarity || !Array.isArray(prompts) || !appId) {
-      throw new Error('params is error');
-    }
-
-    // auth app
-    const { app } = await authApp({
-      appId,
-      userId
-    });
-
-    const result = await appKbSearch({
-      app,
-      userId,
-      fixedQuote: [],
-      prompt: prompts[prompts.length - 1],
-      similarity,
-      limit
-    });
-
-    jsonRes<Response>(res, {
-      data: result
-    });
-  } catch (err) {
-    console.log(err);
-    jsonRes(res, {
-      code: 500,
-      error: err
-    });
-  }
-});
-
-export async function appKbSearch({
-  app,
-  userId,
-  fixedQuote = [],
-  prompt,
-  similarity = 0.8,
-  limit = 5
-}: {
-  app: AppSchema;
-  userId: string;
-  fixedQuote?: QuoteItemType[];
-  prompt: ChatItemType;
-  similarity: number;
-  limit: number;
-}): Promise<Response> {
-  const modelConstantsData = ChatModelMap[app.chat.chatModel];
-
-  // get vector
-  const promptVector = await openaiEmbedding({
-    userId,
-    input: [prompt.value]
-  });
-
-  // search kb
-  const res: any = await PgClient.query(
-    `BEGIN;
-    SET LOCAL ivfflat.probes = ${global.systemEnv.pgIvfflatProbe || 10};
-    select id,q,a,source from modelData where kb_id IN (${app.chat.relatedKbs
-      .map((item) => `'${item}'`)
-      .join(',')}) AND vector <#> '[${promptVector[0]}]' < -${similarity} order by vector <#> '[${
-      promptVector[0]
-    }]' limit ${limit};
-    COMMIT;`
-  );
-
-  const searchRes: QuoteItemType[] = res?.[2]?.rows || [];
-
-  // filter same search result
-  const idSet = new Set<string>();
-  const filterSearch = [
-    ...searchRes.slice(0, 3),
-    ...fixedQuote.slice(0, 2),
-    ...searchRes.slice(3),
-    ...fixedQuote.slice(2, Math.floor(fixedQuote.length * 0.4))
-  ].filter((item) => {
-    if (idSet.has(item.id)) {
-      return false;
-    }
-    idSet.add(item.id);
-    return true;
-  });
-
-  // 计算固定提示词的 token 数量
-  const userSystemPrompt = app.chat.systemPrompt // user system prompt
-    ? [
-        {
-          obj: ChatRoleEnum.System,
-          value: app.chat.systemPrompt
-        }
-      ]
-    : [];
-  const userLimitPrompt = [
-    {
-      obj: ChatRoleEnum.Human,
-      value: app.chat.limitPrompt
-        ? app.chat.limitPrompt
-        : `知识库是关于 ${app.name} 的内容，参考知识库回答问题。与 "${app.name}" 无关内容，直接回复: "我不知道"。`
-    }
-  ];
-
-  const fixedSystemTokens = modelToolMap.countTokens({
-    model: app.chat.chatModel,
-    messages: [...userSystemPrompt, ...userLimitPrompt]
-  });
-
-  // filter part quote by maxToken
-  const sliceResult = modelToolMap
-    .tokenSlice({
-      model: app.chat.chatModel,
-      maxToken: modelConstantsData.systemMaxToken - fixedSystemTokens,
-      messages: filterSearch.map((item, i) => ({
-        obj: ChatRoleEnum.System,
-        value: `${i + 1}: [${item.q}\n${item.a}]`
-      }))
-    })
-    .map((item) => item.value)
-    .join('\n')
-    .trim();
-
-  // slice filterSearch
-  const rawSearch = filterSearch.slice(0, sliceResult.length);
-
-  const quoteText = sliceResult ? `知识库:\n${sliceResult}` : '';
-
-  return {
-    rawSearch,
-    userSystemPrompt,
-    userLimitPrompt,
-    quotePrompt: {
-      obj: ChatRoleEnum.System,
-      value: quoteText
-    }
-  };
-}
--- a/client/src/pages/api/openapi/kb/pushData.ts
+++ b/client/src/pages/api/openapi/kb/pushData.ts
@@ -15,6 +15,7 @@ type DateItemType = { a: string; q: string; source?: string };
 export type Props = {
  kbId: string;
  data: DateItemType[];
+  model: string;
  mode: `${TrainingModeEnum}`;
  prompt?: string;
 };
@@ -25,14 +26,14 @@ export type Response = {

 const modeMaxToken = {
  [TrainingModeEnum.index]: 6000,
-  [TrainingModeEnum.qa]: 10000
+  [TrainingModeEnum.qa]: 12000
 };

 export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
  try {
-    const { kbId, data, mode, prompt } = req.body as Props;
+    const { kbId, data, mode, prompt, model } = req.body as Props;

-    if (!kbId || !Array.isArray(data)) {
+    if (!kbId || !Array.isArray(data) || !model) {
      throw new Error('缺少参数');
    }
    await connectToDatabase();
@@ -46,7 +47,8 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
        data,
        userId,
        mode,
-        prompt
+        prompt,
+        model
      })
    });
  } catch (err) {
@@ -62,7 +64,8 @@ export async function pushDataToKb({
  kbId,
  data,
  mode,
-  prompt
+  prompt,
+  model
 }: { userId: string } & Props): Promise<Response> {
  await authKb({
    userId,
@@ -79,7 +82,7 @@ export async function pushDataToKb({
    if (mode === TrainingModeEnum.qa) {
      // count token
      const token = modelToolMap.countTokens({
-        model: OpenAiChatEnum.GPT3516k,
+        model: 'gpt-3.5-turbo-16k',
        messages: [{ obj: 'System', value: item.q }]
      });
      if (token > modeMaxToken[TrainingModeEnum.qa]) {
@@ -144,6 +147,7 @@ export async function pushDataToKb({
    insertData.map((item) => ({
      q: item.q,
      a: item.a,
+      model,
      source: item.source,
      userId,
      kbId,
--- a/client/src/pages/api/openapi/kb/searchTest.ts
+++ b/client/src/pages/api/openapi/kb/searchTest.ts
@@ -3,7 +3,7 @@ import { jsonRes } from '@/service/response';
 import { authUser } from '@/service/utils/auth';
 import { PgClient } from '@/service/pg';
 import { withNextCors } from '@/service/utils/tools';
-import { openaiEmbedding } from '../plugin/openaiEmbedding';
+import { getVector } from '../plugin/vector';
 import type { KbTestItemType } from '@/types/plugin';

 export type Props = {
@@ -27,7 +27,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
      throw new Error('缺少用户ID');
    }

-    const vector = await openaiEmbedding({
+    const vector = await getVector({
      userId,
      input: [text]
    });
--- a/client/src/pages/api/openapi/kb/updateData.ts
+++ b/client/src/pages/api/openapi/kb/updateData.ts
@@ -3,7 +3,7 @@ import { jsonRes } from '@/service/response';
 import { authUser } from '@/service/utils/auth';
 import { PgClient } from '@/service/pg';
 import { withNextCors } from '@/service/utils/tools';
-import { openaiEmbedding } from '../plugin/openaiEmbedding';
+import { getVector } from '../plugin/vector';

 export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
  try {
@@ -19,7 +19,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
    // get vector
    const vector = await (async () => {
      if (q) {
-        return openaiEmbedding({
+        return getVector({
          userId,
          input: [q]
        });
--- a/client/src/pages/api/openapi/modules/agent/extract.ts
+++ b/client/src/pages/api/openapi/modules/agent/extract.ts
@@ -6,12 +6,12 @@ import { ChatContextFilter } from '@/service/utils/chat/index';
 import type { ChatItemType } from '@/types/chat';
 import { ChatRoleEnum } from '@/constants/chat';
 import { getOpenAIApi, axiosConfig } from '@/service/ai/openai';
-import type { ClassifyQuestionAgentItemType } from '@/types/app';
+import type { RecognizeIntentionAgentItemType } from '@/types/app';

 export type Props = {
  history?: ChatItemType[];
  userChatInput: string;
-  agents: ClassifyQuestionAgentItemType[];
+  agents: RecognizeIntentionAgentItemType[];
  description: string;
 };
 export type Response = { history: ChatItemType[] };
--- a/client/src/pages/api/openapi/modules/agent/recognizeIntention.ts
+++ b/client/src/pages/api/openapi/modules/agent/recognizeIntention.ts
@@ -6,29 +6,30 @@ import { ChatContextFilter } from '@/service/utils/chat/index';
 import type { ChatItemType } from '@/types/chat';
 import { ChatRoleEnum } from '@/constants/chat';
 import { getOpenAIApi, axiosConfig } from '@/service/ai/openai';
-import type { ClassifyQuestionAgentItemType } from '@/types/app';
+import type { RecognizeIntentionAgentItemType } from '@/types/app';
+import { countModelPrice, pushTaskBillListItem } from '@/service/events/pushBill';

 export type Props = {
  systemPrompt?: string;
  history?: ChatItemType[];
  userChatInput: string;
-  agents: ClassifyQuestionAgentItemType[];
+  agents: RecognizeIntentionAgentItemType[];
+  billId?: string;
 };
 export type Response = { history: ChatItemType[] };

-const agentModel = 'gpt-3.5-turbo-16k';
+const agentModel = 'gpt-3.5-turbo';
 const agentFunName = 'agent_user_question';

 export default async function handler(req: NextApiRequest, res: NextApiResponse) {
  try {
-    let { systemPrompt, agents, history = [], userChatInput } = req.body as Props;
+    let { userChatInput } = req.body as Props;

-    const response = await classifyQuestion({
-      systemPrompt,
-      history,
-      userChatInput,
-      agents
-    });
+    if (!userChatInput) {
+      throw new Error('userChatInput is empty');
+    }
+
+    const response = await classifyQuestion(req.body);

    jsonRes(res, {
      data: response
@@ -46,7 +47,8 @@ export async function classifyQuestion({
  agents,
  systemPrompt,
  history = [],
-  userChatInput
+  userChatInput,
+  billId
 }: Props) {
  const messages: ChatItemType[] = [
    ...(systemPrompt
@@ -106,8 +108,19 @@ export async function classifyQuestion({
  if (!arg.type) {
    throw new Error('');
  }
+
+  const totalTokens = response.data.usage?.total_tokens || 0;
+
+  await pushTaskBillListItem({
+    billId,
+    moduleName: 'Recognize Intention',
+    amount: countModelPrice({ model: agentModel, tokens: totalTokens }),
+    model: agentModel,
+    tokenLen: totalTokens
+  });
+
  console.log(
-    '意图结果',
+    'CQ',
    agents.findIndex((item) => item.key === arg.type)
  );

--- a/client/src/pages/api/openapi/modules/chat/gpt.ts
+++ b/client/src/pages/api/openapi/modules/chat/gpt.ts
@@ -1,9 +1,9 @@
 // Next.js API route support: https://nextjs.org/docs/api-routes/introduction
 import type { NextApiRequest, NextApiResponse } from 'next';
-import { jsonRes } from '@/service/response';
+import { jsonRes, sseErrRes } from '@/service/response';
 import { sseResponse } from '@/service/utils/tools';
-import { ChatModelMap, OpenAiChatEnum } from '@/constants/model';
-import { adaptChatItem_openAI } from '@/utils/plugin/openai';
+import { OpenAiChatEnum } from '@/constants/model';
+import { adaptChatItem_openAI, countOpenAIToken } from '@/utils/plugin/openai';
 import { modelToolMap } from '@/utils/plugin';
 import { ChatContextFilter } from '@/service/utils/chat/index';
 import type { ChatItemType } from '@/types/chat';
@@ -11,6 +11,8 @@ import { ChatRoleEnum, sseResponseEventEnum } from '@/constants/chat';
 import { parseStreamChunk, textAdaptGptResponse } from '@/utils/adapt';
 import { getOpenAIApi, axiosConfig } from '@/service/ai/openai';
 import { SpecificInputEnum } from '@/constants/app';
+import { getChatModel } from '@/service/utils/data';
+import { countModelPrice, pushTaskBillListItem } from '@/service/events/pushBill';

 export type Props = {
  model: `${OpenAiChatEnum}`;
@@ -22,39 +24,28 @@ export type Props = {
  quotePrompt?: string;
  systemPrompt?: string;
  limitPrompt?: string;
+  billId?: string;
 };
-export type Response = { [SpecificInputEnum.answerText]: string };
+export type Response = { [SpecificInputEnum.answerText]: string; totalTokens: number };

 export default async function handler(req: NextApiRequest, res: NextApiResponse) {
+  let { model, temperature = 0, stream } = req.body as Props;
  try {
-    let {
-      model,
-      stream = false,
-      temperature = 0,
-      maxToken = 4000,
-      history = [],
-      quotePrompt,
-      userChatInput,
-      systemPrompt,
-      limitPrompt
-    } = req.body as Props;
-
    // temperature adapt
-    const modelConstantsData = ChatModelMap[model];
+    const modelConstantsData = getChatModel(model);
+
+    if (!modelConstantsData) {
+      throw new Error('The chat model is undefined');
+    }
+
    // FastGpt temperature range: 1~10
    temperature = +(modelConstantsData.maxTemperature * (temperature / 10)).toFixed(2);

    const response = await chatCompletion({
+      ...req.body,
      res,
      model,
-      temperature,
-      maxToken,
-      stream,
-      history,
-      userChatInput,
-      systemPrompt,
-      limitPrompt,
-      quotePrompt
+      temperature
    });

    if (stream) {
@@ -70,25 +61,32 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
      });
    }
  } catch (err) {
-    jsonRes(res, {
-      code: 500,
-      error: err
-    });
+    if (stream) {
+      res.status(500);
+      sseErrRes(res, err);
+      res.end();
+    } else {
+      jsonRes(res, {
+        code: 500,
+        error: err
+      });
+    }
  }
 }

 /* request openai chat */
 export async function chatCompletion({
  res,
-  model = OpenAiChatEnum.GPT35,
-  temperature,
+  model,
+  temperature = 0,
  maxToken = 4000,
-  stream,
+  stream = false,
  history = [],
-  quotePrompt,
+  quotePrompt = '',
  userChatInput,
-  systemPrompt,
-  limitPrompt
+  systemPrompt = '',
+  limitPrompt = '',
+  billId
 }: Props & { res: NextApiResponse }): Promise<Response> {
  const messages: ChatItemType[] = [
    ...(quotePrompt
@@ -121,7 +119,7 @@ export async function chatCompletion({
      value: userChatInput
    }
  ];
-  const modelTokenLimit = ChatModelMap[model]?.contextMaxToken || 4000;
+  const modelTokenLimit = getChatModel(model)?.contextMaxToken || 4000;

  const filterMessages = ChatContextFilter({
    model,
@@ -157,37 +155,47 @@ export async function chatCompletion({
    }
  );

-  const { answer } = await (async () => {
+  const { answer, totalTokens } = await (async () => {
    if (stream) {
      // sse response
      const { answer } = await streamResponse({ res, response });
      // count tokens
-      // const finishMessages = filterMessages.concat({
-      //   obj: ChatRoleEnum.AI,
-      //   value: answer
-      // });
+      const finishMessages = filterMessages.concat({
+        obj: ChatRoleEnum.AI,
+        value: answer
+      });

-      // const totalTokens = modelToolMap[model].countTokens({
-      //   messages: finishMessages
-      // });
+      const totalTokens = countOpenAIToken({
+        messages: finishMessages,
+        model: 'gpt-3.5-turbo-16k'
+      });

      return {
-        answer
-        // totalTokens
+        answer,
+        totalTokens
      };
    } else {
      const answer = stream ? '' : response.data.choices?.[0].message?.content || '';
-      // const totalTokens = stream ? 0 : response.data.usage?.total_tokens || 0;
+      const totalTokens = stream ? 0 : response.data.usage?.total_tokens || 0;

      return {
-        answer
-        // totalTokens
+        answer,
+        totalTokens
      };
    }
  })();

+  await pushTaskBillListItem({
+    billId,
+    moduleName: 'AI Chat',
+    amount: countModelPrice({ model, tokens: totalTokens }),
+    model,
+    tokenLen: totalTokens
+  });
+
  return {
-    answerText: answer
+    answerText: answer,
+    totalTokens
  };
 }

--- a/client/src/pages/api/openapi/modules/kb/search.ts
+++ b/client/src/pages/api/openapi/modules/kb/search.ts
@@ -4,8 +4,9 @@ import { PgClient } from '@/service/pg';
 import { withNextCors } from '@/service/utils/tools';
 import type { ChatItemType } from '@/types/chat';
 import { ChatRoleEnum } from '@/constants/chat';
-import { openaiEmbedding_system } from '../../plugin/openaiEmbedding';
 import { modelToolMap } from '@/utils/plugin';
+import { getVector } from '../../plugin/vector';
+import { countModelPrice, pushTaskBillListItem } from '@/service/events/pushBill';

 export type QuoteItemType = {
  id: string;
@@ -21,6 +22,7 @@ type Props = {
  maxToken: number;
  userChatInput: string;
  stream?: boolean;
+  billId?: string;
 };
 type Response = {
  rawSearch: QuoteItemType[];
@@ -30,25 +32,15 @@ type Response = {

 export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
  try {
-    const {
-      kb_ids = [],
-      history = [],
-      similarity,
-      limit,
-      maxToken,
-      userChatInput
-    } = req.body as Props;
+    const { kb_ids = [], userChatInput } = req.body as Props;

-    if (!similarity || !Array.isArray(kb_ids)) {
+    if (!userChatInput || !Array.isArray(kb_ids)) {
      throw new Error('params is error');
    }

    const result = await kbSearch({
+      ...req.body,
      kb_ids,
-      history,
-      similarity,
-      limit,
-      maxToken,
      userChatInput
    });

@@ -70,7 +62,8 @@ export async function kbSearch({
  similarity = 0.8,
  limit = 5,
  maxToken = 2500,
-  userChatInput
+  userChatInput,
+  billId
 }: Props): Promise<Response> {
  if (kb_ids.length === 0)
    return {
@@ -78,22 +71,34 @@ export async function kbSearch({
      rawSearch: [],
      quotePrompt: undefined
    };
+
  // get vector
-  const promptVector = await openaiEmbedding_system({
+  const vectorModel = global.vectorModels[0].model;
+  const { vectors, tokenLen } = await getVector({
+    model: vectorModel,
    input: [userChatInput]
  });

  // search kb
-  const res: any = await PgClient.query(
-    `BEGIN;
+  const [res]: any = await Promise.all([
+    PgClient.query(
+      `BEGIN;
    SET LOCAL ivfflat.probes = ${global.systemEnv.pgIvfflatProbe || 10};
    select id,q,a,source from modelData where kb_id IN (${kb_ids
      .map((item) => `'${item}'`)
-      .join(',')}) AND vector <#> '[${promptVector[0]}]' < -${similarity} order by vector <#> '[${
-      promptVector[0]
-    }]' limit ${limit};
+      .join(',')}) AND vector <#> '[${vectors[0]}]' < -${similarity} order by vector <#> '[${
+        vectors[0]
+      }]' limit ${limit};
    COMMIT;`
-  );
+    ),
+    pushTaskBillListItem({
+      billId,
+      moduleName: 'Vector Generate',
+      amount: countModelPrice({ model: vectorModel, tokens: tokenLen }),
+      model: vectorModel,
+      tokenLen
+    })
+  ]);

  const searchRes: QuoteItemType[] = res?.[2]?.rows || [];

--- a/client/src/pages/api/openapi/plugin/openaiEmbedding.ts
+++ b/client/src/pages/api/openapi/plugin/openaiEmbedding.ts
@@ -1,115 +0,0 @@
-import type { NextApiRequest, NextApiResponse } from 'next';
-import { jsonRes } from '@/service/response';
-import { authUser, getApiKey, getSystemOpenAiKey } from '@/service/utils/auth';
-import { withNextCors } from '@/service/utils/tools';
-import { getOpenAIApi } from '@/service/utils/chat/openai';
-import { embeddingModel } from '@/constants/model';
-import { axiosConfig } from '@/service/utils/tools';
-import { pushGenerateVectorBill } from '@/service/events/pushBill';
-import { OpenAiChatEnum } from '@/constants/model';
-
-type Props = {
-  input: string[];
-};
-type Response = number[][];
-
-export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
-  try {
-    const { userId } = await authUser({ req });
-    let { input } = req.query as Props;
-
-    if (!Array.isArray(input)) {
-      throw new Error('缺少参数');
-    }
-
-    jsonRes<Response>(res, {
-      data: await openaiEmbedding({ userId, input, mustPay: true })
-    });
-  } catch (err) {
-    console.log(err);
-    jsonRes(res, {
-      code: 500,
-      error: err
-    });
-  }
-});
-
-export async function openaiEmbedding({
-  userId,
-  input,
-  mustPay = false
-}: { userId: string; mustPay?: boolean } & Props) {
-  const { userOpenAiKey, systemAuthKey } = await getApiKey({
-    model: 'gpt-3.5-turbo',
-    userId,
-    mustPay
-  });
-  const apiKey = userOpenAiKey || systemAuthKey;
-
-  // 获取 chatAPI
-  const chatAPI = getOpenAIApi(apiKey);
-
-  // 把输入的内容转成向量
-  const result = await chatAPI
-    .createEmbedding(
-      {
-        model: embeddingModel,
-        input
-      },
-      {
-        timeout: 60000,
-        ...axiosConfig(apiKey)
-      }
-    )
-    .then((res) => {
-      if (!res.data?.usage?.total_tokens) {
-        // @ts-ignore
-        return Promise.reject(res.data?.error?.message || 'Embedding Error');
-      }
-      return {
-        tokenLen: res.data.usage.total_tokens || 0,
-        vectors: res.data.data.map((item) => item.embedding)
-      };
-    });
-
-  pushGenerateVectorBill({
-    isPay: !userOpenAiKey,
-    userId,
-    text: input.join(''),
-    tokenLen: result.tokenLen
-  });
-
-  return result.vectors;
-}
-
-export async function openaiEmbedding_system({ input }: Props) {
-  const apiKey = getSystemOpenAiKey();
-
-  // 获取 chatAPI
-  const chatAPI = getOpenAIApi(apiKey);
-
-  // 把输入的内容转成向量
-  const result = await chatAPI
-    .createEmbedding(
-      {
-        model: embeddingModel,
-        input
-      },
-      {
-        timeout: 20000,
-        ...axiosConfig(apiKey)
-      }
-    )
-    .then((res) => {
-      if (!res.data?.usage?.total_tokens) {
-        // @ts-ignore
-        return Promise.reject(res.data?.error?.message || 'Embedding Error');
-      }
-      return {
-        tokenLen: res.data.usage.total_tokens || 0,
-        vectors: res.data.data.map((item) => item.embedding)
-      };
-    });
-
-  return result.vectors;
-}
--- a/client/src/pages/api/openapi/plugin/vector.ts
+++ b/client/src/pages/api/openapi/plugin/vector.ts
@@ -0,0 +1,79 @@
+import type { NextApiRequest, NextApiResponse } from 'next';
+import { jsonRes } from '@/service/response';
+import { authBalanceByUid, authUser } from '@/service/utils/auth';
+import { withNextCors } from '@/service/utils/tools';
+import { getOpenAIApi, axiosConfig } from '@/service/ai/openai';
+import { pushGenerateVectorBill } from '@/service/events/pushBill';
+
+type Props = {
+  model: string;
+  input: string[];
+};
+type Response = {
+  tokenLen: number;
+  vectors: number[][];
+};
+
+export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse<any>) {
+  try {
+    const { userId } = await authUser({ req });
+    let { input, model } = req.query as Props;
+
+    if (!Array.isArray(input)) {
+      throw new Error('缺少参数');
+    }
+
+    jsonRes<Response>(res, {
+      data: await getVector({ userId, input, model })
+    });
+  } catch (err) {
+    console.log(err);
+    jsonRes(res, {
+      code: 500,
+      error: err
+    });
+  }
+});
+
+export async function getVector({
+  model = 'text-embedding-ada-002',
+  userId,
+  input
+}: { userId?: string } & Props) {
+  userId && (await authBalanceByUid(userId));
+
+  // 获取 chatAPI
+  const chatAPI = getOpenAIApi();
+
+  // 把输入的内容转成向量
+  const result = await chatAPI
+    .createEmbedding(
+      {
+        model,
+        input
+      },
+      {
+        timeout: 60000,
+        ...axiosConfig()
+      }
+    )
+    .then((res) => {
+      if (!res.data?.usage?.total_tokens) {
+        // @ts-ignore
+        return Promise.reject(res.data?.error?.message || 'Embedding Error');
+      }
+      return {
+        tokenLen: res.data.usage.total_tokens || 0,
+        vectors: res.data.data.map((item) => item.embedding)
+      };
+    });
+
+  userId &&
+    pushGenerateVectorBill({
+      userId,
+      tokenLen: result.tokenLen,
+      model
+    });
+
+  return result;
+}
--- a/client/src/pages/api/openapi/v1/chat/completions.ts
+++ b/client/src/pages/api/openapi/v1/chat/completions.ts
@@ -15,8 +15,8 @@ import { Types } from 'mongoose';
 import { moduleFetch } from '@/service/api/request';
 import { AppModuleItemType, RunningModuleItemType } from '@/types/app';
 import { FlowInputItemTypeEnum } from '@/constants/flow';
-import { pushChatBill } from '@/service/events/pushBill';
-import { BillTypeEnum } from '@/constants/user';
+import { finishTaskBill, createTaskBill } from '@/service/events/pushBill';
+import { BillSourceEnum } from '@/constants/user';

 export type MessageItemType = ChatCompletionRequestMessage & { _id?: string };
 type FastGptWebChatProps = {
@@ -108,6 +108,13 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
      res.setHeader('newHistoryId', String(newHistoryId));
    }

+    const billId = await createTaskBill({
+      userId,
+      appName: app.name,
+      appId,
+      source: BillSourceEnum.fastgpt
+    });
+
    /* start process */
    const { responseData, answerText } = await dispatchModules({
      res,
@@ -117,7 +124,8 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
        history: prompts,
        userChatInput: prompt.value
      },
-      stream
+      stream,
+      billId: ''
    });

    // save chat
@@ -171,14 +179,9 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
      });
    }

-    pushChatBill({
-      isPay: true,
-      chatModel: 'gpt-3.5-turbo',
-      userId,
-      appId,
-      textLen: 1,
-      tokens: 100,
-      type: BillTypeEnum.chat
+    // bill
+    finishTaskBill({
+      billId
    });
  } catch (err: any) {
    if (stream) {
@@ -199,18 +202,21 @@ export async function dispatchModules({
  modules,
  params = {},
  variables = {},
-  stream = false
+  stream = false,
+  billId
 }: {
  res: NextApiResponse;
  modules: AppModuleItemType[];
  params?: Record<string, any>;
  variables?: Record<string, any>;
+  billId: string;
  stream?: boolean;
 }) {
  const runningModules = loadModules(modules, variables);
-  let storeData: Record<string, any> = {};
-  let responseData: Record<string, any> = {};
-  let answerText = '';
+
+  let storeData: Record<string, any> = {}; // after module used
+  let responseData: Record<string, any> = {}; // response request and save to database
+  let answerText = ''; // AI answer

  function pushStore({
    isResponse = false,
@@ -327,6 +333,7 @@ export async function dispatchModules({
      });
      const data = {
        stream,
+        billId,
        ...params
      };

--- a/client/src/pages/api/system/getInitData.ts
+++ b/client/src/pages/api/system/getInitData.ts
@@ -1,19 +1,114 @@
 // Next.js API route support: https://nextjs.org/docs/api-routes/introduction
 import type { NextApiRequest, NextApiResponse } from 'next';
 import { jsonRes } from '@/service/response';
+import {
+  type QAModelItemType,
+  type ChatModelItemType,
+  type VectorModelItemType
+} from '@/types/model';
+import { readFileSync } from 'fs';

 export type InitDateResponse = {
  beianText: string;
  googleVerKey: string;
-  baiduTongji: boolean;
+  baiduTongji: string;
+  chatModels: ChatModelItemType[];
+  qaModels: QAModelItemType[];
+  vectorModels: VectorModelItemType[];
+};
+
+const defaultmodels = {
+  'Gpt35-4k': {
+    model: 'gpt-3.5-turbo',
+    name: 'Gpt35-4k',
+    contextMaxToken: 4000,
+    systemMaxToken: 2400,
+    maxTemperature: 1.2,
+    price: 1.5
+  },
+  'Gpt35-16k': {
+    model: 'gpt-3.5-turbo',
+    name: 'Gpt35-16k',
+    contextMaxToken: 16000,
+    systemMaxToken: 8000,
+    maxTemperature: 1.2,
+    price: 3
+  },
+  Gpt4: {
+    model: 'gpt-4',
+    name: 'Gpt4',
+    contextMaxToken: 8000,
+    systemMaxToken: 4000,
+    maxTemperature: 1.2,
+    price: 45
+  }
+};
+const defaultQaModels = {
+  'Gpt35-16k': {
+    model: 'gpt-3.5-turbo',
+    name: 'Gpt35-16k',
+    maxToken: 16000,
+    price: 3
+  }
+};
+const defaultVectorModels = {
+  'text-embedding-ada-002': {
+    model: 'text-embedding-ada-002',
+    name: 'Embedding-2',
+    price: 0.2
+  }
 };

 export default async function handler(req: NextApiRequest, res: NextApiResponse) {
+  const envs = {
+    beianText: process.env.SAFE_BEIAN_TEXT || '',
+    googleVerKey: process.env.CLIENT_GOOGLE_VER_TOKEN || '',
+    baiduTongji: process.env.BAIDU_TONGJI || ''
+  };
+
  jsonRes<InitDateResponse>(res, {
    data: {
-      beianText: process.env.SAFE_BEIAN_TEXT || '',
-      googleVerKey: process.env.CLIENT_GOOGLE_VER_TOKEN || '',
-      baiduTongji: process.env.BAIDU_TONGJI === '1'
+      ...envs,
+      ...initSystemModels()
    }
  });
 }
+
+export function initSystemModels() {
+  const { chatModels, qaModels, vectorModels } = (() => {
+    try {
+      const chatModels = Object.values(JSON.parse(readFileSync('data/ChatModels.json', 'utf-8')));
+      const qaModels = Object.values(JSON.parse(readFileSync('data/QAModels.json', 'utf-8')));
+      const vectorModels = Object.values(
+        JSON.parse(readFileSync('data/VectorModels.json', 'utf-8'))
+      );
+
+      return {
+        chatModels,
+        qaModels,
+        vectorModels
+      };
+    } catch (error) {
+      console.log(error);
+
+      return {
+        chatModels: Object.values(defaultmodels),
+        qaModels: Object.values(defaultQaModels),
+        vectorModels: Object.values(defaultVectorModels)
+      };
+    }
+  })() as {
+    chatModels: ChatModelItemType[];
+    qaModels: QAModelItemType[];
+    vectorModels: VectorModelItemType[];
+  };
+  global.chatModels = chatModels;
+  global.qaModels = qaModels;
+  global.vectorModels = vectorModels;
+
+  return {
+    chatModels,
+    qaModels,
+    vectorModels
+  };
+}
--- a/client/src/pages/api/system/updateEnv.ts
+++ b/client/src/pages/api/system/updateEnv.ts
@@ -1,31 +1,22 @@
 import type { NextApiRequest, NextApiResponse } from 'next';
 import { jsonRes } from '@/service/response';
-import { System } from '@/service/models/system';
 import { authUser } from '@/service/utils/auth';
-
-export type InitDateResponse = {
-  beianText: string;
-  googleVerKey: string;
-};
+import { readFileSync } from 'fs';

 export default async function handler(req: NextApiRequest, res: NextApiResponse) {
  await authUser({ req, authRoot: true });
  updateSystemEnv();
-  jsonRes<InitDateResponse>(res);
+  jsonRes(res);
 }

 export async function updateSystemEnv() {
  try {
-    const mongoData = await System.findOne();
+    const res = JSON.parse(readFileSync('data/SystemParams.json', 'utf-8'));

-    if (mongoData) {
-      const obj = mongoData.toObject();
-      global.systemEnv = {
-        ...global.systemEnv,
-        ...obj
-      };
-    }
-    console.log('update env', global.systemEnv);
+    global.systemEnv = {
+      ...global.systemEnv,
+      ...res
+    };
  } catch (error) {
    console.log('update system env error');
  }