diff --git a/.npmrc b/.npmrc new file mode 100644 index 000000000..0c4bb1d5f --- /dev/null +++ b/.npmrc @@ -0,0 +1,2 @@ +public-hoist-pattern[]=*tiktoken* +public-hoist-pattern[]=*react* diff --git a/Dockerfile b/Dockerfile index 6a4be70a8..63dc48150 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ RUN apk add --no-cache libc6-compat && npm install -g pnpm@8.6.0 RUN [ -z "$proxy" ] || pnpm config set registry https://registry.npmmirror.com # copy packages and one project -COPY pnpm-lock.yaml pnpm-workspace.yaml ./ +COPY pnpm-lock.yaml pnpm-workspace.yaml .npmrc ./ COPY ./packages ./packages COPY ./projects/$name/package.json ./projects/$name/package.json @@ -27,7 +27,7 @@ ARG name ARG proxy # copy common node_modules and one project node_modules -COPY package.json pnpm-workspace.yaml ./ +COPY package.json pnpm-workspace.yaml .npmrc ./ COPY --from=mainDeps /app/node_modules ./node_modules COPY --from=mainDeps /app/packages ./packages COPY ./projects/$name ./projects/$name @@ -64,6 +64,11 @@ COPY --from=builder --chown=nextjs:nodejs /app/projects/$name/.next/static /app/ COPY --from=builder --chown=nextjs:nodejs /app/projects/$name/.next/server/chunks /app/projects/$name/.next/server/chunks # copy worker COPY --from=builder --chown=nextjs:nodejs /app/projects/$name/.next/server/worker /app/projects/$name/.next/server/worker + +# copy tiktoken but not copy ./node_modules/tiktoken/encoders +COPY --from=mainDeps /app/node_modules/tiktoken ./node_modules/tiktoken +RUN rm -rf ./node_modules/tiktoken/encoders + # copy package.json to version file COPY --from=builder /app/projects/$name/package.json ./package.json # copy config diff --git a/packages/service/common/string/tiktoken/index.ts b/packages/service/common/string/tiktoken/index.ts index 07137b0fd..9f3fd8e55 100644 --- a/packages/service/common/string/tiktoken/index.ts +++ b/packages/service/common/string/tiktoken/index.ts @@ -47,41 +47,45 @@ export const countGptMessagesTokens = ( tools?: ChatCompletionTool[], functionCall?: ChatCompletionCreateParams.Function[] ) => { - return new Promise((resolve) => { - const start = Date.now(); + return new Promise(async (resolve) => { + try { + const start = Date.now(); - const { worker, callbackMap } = getTiktokenWorker(); + const { worker, callbackMap } = getTiktokenWorker(); - const id = getNanoid(); + const id = getNanoid(); - const timer = setTimeout(() => { - console.log('Count token Time out'); - resolve( - messages.reduce((sum, item) => { - if (item.content) { - return sum + item.content.length * 0.5; - } - return sum; - }, 0) - ); - delete callbackMap[id]; - }, 60000); + const timer = setTimeout(() => { + console.log('Count token Time out'); + resolve( + messages.reduce((sum, item) => { + if (item.content) { + return sum + item.content.length * 0.5; + } + return sum; + }, 0) + ); + delete callbackMap[id]; + }, 60000); - callbackMap[id] = (data) => { - // 检测是否有内存泄漏 - addLog.info(`Count token time: ${Date.now() - start}, token: ${data}`); - // console.log(process.memoryUsage()); + callbackMap[id] = (data) => { + // 检测是否有内存泄漏 + addLog.info(`Count token time: ${Date.now() - start}, token: ${data}`); + // console.log(process.memoryUsage()); - resolve(data); - clearTimeout(timer); - }; + resolve(data); + clearTimeout(timer); + }; - worker.postMessage({ - id, - messages, - tools, - functionCall - }); + worker.postMessage({ + id, + messages, + tools, + functionCall + }); + } catch (error) { + resolve(0); + } }); }; diff --git a/packages/service/core/dataset/search/utils.ts b/packages/service/core/dataset/search/utils.ts index 4332423cf..e0c1abcab 100644 --- a/packages/service/core/dataset/search/utils.ts +++ b/packages/service/core/dataset/search/utils.ts @@ -2,6 +2,7 @@ import { LLMModelItemType } from '@fastgpt/global/core/ai/model.d'; import { queryExtension } from '../../ai/functions/queryExtension'; import { ChatItemType } from '@fastgpt/global/core/chat/type'; import { hashStr } from '@fastgpt/global/common/string/tools'; +import { chatValue2RuntimePrompt } from '@fastgpt/global/core/chat/adapt'; export const datasetSearchQueryExtension = async ({ query, @@ -33,11 +34,11 @@ export const datasetSearchQueryExtension = async ({ histories.length > 0 ? `${histories .map((item) => { - return `${item.obj}: ${item.value}`; + return `${item.obj}: ${chatValue2RuntimePrompt(item.value).text}`; }) .join('\n')} - Human: ${query} - ` +Human: ${query} +` : query; /* if query already extension, direct parse */ diff --git a/packages/service/package.json b/packages/service/package.json index 8f1f167ac..38010114a 100644 --- a/packages/service/package.json +++ b/packages/service/package.json @@ -13,7 +13,6 @@ "decompress": "^4.2.1", "domino-ext": "^2.1.4", "encoding": "^0.1.13", - "fastgpt-js-tiktoken": "^1.0.12", "file-type": "^19.0.0", "iconv-lite": "^0.6.3", "joplin-turndown-plugin-gfm": "^1.0.12", @@ -29,6 +28,7 @@ "papaparse": "5.4.1", "pdfjs-dist": "4.0.269", "pg": "^8.10.0", + "tiktoken": "^1.0.15", "tunnel": "^0.0.6", "turndown": "^7.1.2" }, diff --git a/packages/service/worker/tiktoken/countGptMessagesTokens.ts b/packages/service/worker/tiktoken/countGptMessagesTokens.ts index 1ef2b30e7..f6adb172e 100644 --- a/packages/service/worker/tiktoken/countGptMessagesTokens.ts +++ b/packages/service/worker/tiktoken/countGptMessagesTokens.ts @@ -1,5 +1,5 @@ /* Only the token of gpt-3.5-turbo is used */ -import { Tiktoken } from 'fastgpt-js-tiktoken/lite'; +import { Tiktoken } from 'tiktoken/lite'; import cl100k_base from './cl100k_base.json'; import { ChatCompletionMessageParam, @@ -10,7 +10,7 @@ import { import { ChatCompletionRequestMessageRoleEnum } from '@fastgpt/global/core/ai/constants'; import { parentPort } from 'worker_threads'; -const enc = new Tiktoken(cl100k_base); +const enc = new Tiktoken(cl100k_base.bpe_ranks, cl100k_base.special_tokens, cl100k_base.pat_str); /* count messages tokens */ parentPort?.on( diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4e78857b2..b0e2efd94 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -126,9 +126,6 @@ importers: encoding: specifier: ^0.1.13 version: 0.1.13 - fastgpt-js-tiktoken: - specifier: ^1.0.12 - version: registry.npmjs.org/fastgpt-js-tiktoken@1.0.12 file-type: specifier: ^19.0.0 version: 19.0.0 @@ -155,7 +152,7 @@ importers: version: 1.4.5-lts.1 next: specifier: 13.5.2 - version: 13.5.2(react-dom@18.2.0)(react@18.2.0) + version: 13.5.2(@babel/core@7.24.4)(react-dom@18.2.0)(react@18.2.0)(sass@1.58.3) nextjs-cors: specifier: ^2.1.2 version: 2.1.2(next@13.5.2) @@ -174,6 +171,9 @@ importers: pg: specifier: ^8.10.0 version: 8.10.0 + tiktoken: + specifier: ^1.0.15 + version: 1.0.15 tunnel: specifier: ^0.0.6 version: 0.0.6 @@ -9927,53 +9927,13 @@ packages: - '@babel/core' - babel-plugin-macros - /next@13.5.2(react-dom@18.2.0)(react@18.2.0): - resolution: {integrity: sha512-vog4UhUaMYAzeqfiAAmgB/QWLW7p01/sg+2vn6bqc/CxHFYizMzLv6gjxKzl31EVFkfl/F+GbxlKizlkTE9RdA==} - engines: {node: '>=16.14.0'} - hasBin: true - peerDependencies: - '@opentelemetry/api': ^1.1.0 - react: ^18.2.0 - react-dom: ^18.2.0 - sass: ^1.3.0 - peerDependenciesMeta: - '@opentelemetry/api': - optional: true - sass: - optional: true - dependencies: - '@next/env': 13.5.2 - '@swc/helpers': 0.5.2 - busboy: 1.6.0 - caniuse-lite: 1.0.30001603 - postcss: 8.4.14 - react: 18.2.0 - react-dom: 18.2.0(react@18.2.0) - styled-jsx: 5.1.1(react@18.2.0) - watchpack: 2.4.0 - zod: 3.21.4 - optionalDependencies: - '@next/swc-darwin-arm64': 13.5.2 - '@next/swc-darwin-x64': 13.5.2 - '@next/swc-linux-arm64-gnu': 13.5.2 - '@next/swc-linux-arm64-musl': 13.5.2 - '@next/swc-linux-x64-gnu': 13.5.2 - '@next/swc-linux-x64-musl': 13.5.2 - '@next/swc-win32-arm64-msvc': 13.5.2 - '@next/swc-win32-ia32-msvc': 13.5.2 - '@next/swc-win32-x64-msvc': 13.5.2 - transitivePeerDependencies: - - '@babel/core' - - babel-plugin-macros - dev: false - /nextjs-cors@2.1.2(next@13.5.2): resolution: {integrity: sha512-2yOVivaaf2ILe4f/qY32hnj3oC77VCOsUQJQfhVMGsXE/YMEWUY2zy78sH9FKUCM7eG42/l3pDofIzMD781XGA==} peerDependencies: next: ^8.1.1-canary.54 || ^9.0.0 || ^10.0.0-0 || ^11.0.0 || ^12.0.0 || ^13.0.0 dependencies: cors: 2.8.5 - next: 13.5.2(react-dom@18.2.0)(react@18.2.0) + next: 13.5.2(@babel/core@7.24.4)(react-dom@18.2.0)(react@18.2.0)(sass@1.58.3) dev: false /nextjs-node-loader@1.1.5(webpack@5.91.0): @@ -11759,23 +11719,6 @@ packages: client-only: 0.0.1 react: 18.2.0 - /styled-jsx@5.1.1(react@18.2.0): - resolution: {integrity: sha512-pW7uC1l4mBZ8ugbiZrcIsiIvVx1UmTfw7UkC3Um2tmfUq9Bhk8IiyEIPl6F8agHgjzku6j0xQEZbfA5uSgSaCw==} - engines: {node: '>= 12.0.0'} - peerDependencies: - '@babel/core': '*' - babel-plugin-macros: '*' - react: '>= 16.8.0 || 17.x.x || ^18.0.0-0' - peerDependenciesMeta: - '@babel/core': - optional: true - babel-plugin-macros: - optional: true - dependencies: - client-only: 0.0.1 - react: 18.2.0 - dev: false - /stylis@4.2.0: resolution: {integrity: sha512-Orov6g6BB1sDfYgzWfTHDOxamtX1bE/zo104Dh9e6fqJ3PooipYyfJ0pUmrZO2wAvO8YbEyeFrkV91XTsGMSrw==} dev: false @@ -11899,6 +11842,10 @@ packages: /through@2.3.8: resolution: {integrity: sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==} + /tiktoken@1.0.15: + resolution: {integrity: sha512-sCsrq/vMWUSEW29CJLNmPvWxlVp7yh2tlkAjpJltIKqp5CKf98ZNpdeHRmAlPVFlGEbswDc6SmI8vz64W/qErw==} + dev: false + /timers-browserify@2.0.12: resolution: {integrity: sha512-9phl76Cqm6FhSX9Xe1ZUAMLtm1BLkKj2Qd5ApyWkXzsMRaA7dgr81kf4wJmQf/hAvg8EEyJxDo3du/0KlhPiKQ==} engines: {node: '>=0.6.0'} @@ -12850,11 +12797,3 @@ packages: engines: {node: '>=0.8'} hasBin: true dev: false - - registry.npmjs.org/fastgpt-js-tiktoken@1.0.12: - resolution: {integrity: sha512-93UQM9h267PFQqnaJjcc+tqbKRZuipRbi+ASxVcE1FBzXOVb4GKfOMlsxXKCsSDdP+Luv8Fgul7F3HXKITXjYQ==, registry: https://registry.npmmirror.com/, tarball: https://registry.npmjs.org/fastgpt-js-tiktoken/-/fastgpt-js-tiktoken-1.0.12.tgz} - name: fastgpt-js-tiktoken - version: 1.0.12 - dependencies: - base64-js: 1.5.1 - dev: false diff --git a/projects/app/next.config.js b/projects/app/next.config.js index 225d21ef9..1e2be4f7a 100644 --- a/projects/app/next.config.js +++ b/projects/app/next.config.js @@ -36,6 +36,10 @@ const nextConfig = { unknownContextCritical: false }; + if (!config.externals) { + config.externals = []; + } + if (isServer) { config.externals.push('worker_threads'); @@ -73,11 +77,13 @@ const nextConfig = { fs: false } }; - if (!config.externals) { - config.externals = []; - } } + config.experiments = { + asyncWebAssembly: true, + layers: true + }; + return config; }, transpilePackages: ['@fastgpt/*', 'ahooks', '@chakra-ui/*', 'react'],