From a9ae27033559bc2eb19b9c994ded3e9a20237858 Mon Sep 17 00:00:00 2001 From: Archer <545436317@qq.com> Date: Sun, 3 Dec 2023 20:45:57 +0800 Subject: [PATCH] 4.6.3-website dataset (#532) --- .../docs/custom-models/chatglm2-m3e.md | 2 +- .../docs/installation/upgrading/463.md | 32 + packages/global/common/file/api.d.ts | 8 + packages/global/common/file/tools.ts | 86 + .../global/common/plugin/types/pluginRes.d.ts | 4 - packages/global/common/string/jieba.ts | 1508 +++++++++++++++++ packages/global/common/string/markdown.ts | 97 ++ packages/global/common/string/textSplitter.ts | 72 +- packages/global/common/system/utils.ts | 6 + packages/global/core/chat/constants.ts | 2 - packages/global/core/dataset/api.d.ts | 31 +- packages/global/core/dataset/constant.ts | 32 +- packages/global/core/dataset/type.d.ts | 21 +- packages/global/package.json | 9 +- packages/global/support/wallet/bill/tools.ts | 15 +- packages/service/common/api/plusRequest.ts | 8 +- .../service/common/file/gridfs/controller.ts | 13 +- packages/service/core/chat/utils.ts | 20 +- .../core/dataset/collection/controller.ts | 73 + .../service/core/dataset/collection/schema.ts | 9 +- .../service/core/dataset/data/controller.ts | 75 + packages/service/core/dataset/data/pg.ts | 28 + packages/service/core/dataset/data/schema.ts | 5 +- packages/service/core/dataset/schema.ts | 49 +- pnpm-lock.yaml | 446 ++--- projects/app/package.json | 3 - projects/app/public/docs/versionIntro.md | 20 +- projects/app/public/locales/en/common.json | 74 +- projects/app/public/locales/zh/common.json | 76 +- .../src/components/ChatBox/MessageInput.tsx | 4 +- .../app/src/components/ChatBox/QuoteModal.tsx | 6 +- .../src/components/ChatBox/ResponseTags.tsx | 60 +- .../src/components/ChatBox/index.module.scss | 8 - .../app/src/components/EmptyTip/index.tsx | 6 +- .../Icon/icons/common/confirm/commonTip.svg | 1 + .../Icon/icons/common/confirm/deleteTip.svg | 1 + .../Icon/icons/common/routePushLight.svg | 11 + .../Icon/icons/common/viewLight.svg | 1 + .../Icon/icons/core/dataset/commonDataset.svg | 1 + .../Icon/icons/core/dataset/folderDataset.svg | 1 + .../icons/core/dataset/websiteDataset.svg | 14 + projects/app/src/components/Icon/index.tsx | 9 +- projects/app/src/components/Layout/index.tsx | 2 +- .../src/components/Markdown/chat/Image.tsx | 25 +- .../app/src/components/Markdown/img/Image.tsx | 1 - projects/app/src/components/MyModal/index.tsx | 11 +- .../src/components/common/MyRadio/index.tsx | 15 +- .../core/module/DatasetSelectModal.tsx | 21 +- .../user/team/TeamManageModal/EditModal.tsx | 4 +- projects/app/src/constants/dataset.ts | 6 +- .../app/src/global/core/api/datasetReq.d.ts | 20 +- projects/app/src/global/core/dataset/api.d.ts | 9 +- projects/app/src/global/core/prompt/AIChat.ts | 47 +- .../pages/account/components/BillDetail.tsx | 2 +- .../app/src/pages/account/components/Info.tsx | 4 +- projects/app/src/pages/api/admin/initv46-2.ts | 2 +- .../app/src/pages/api/admin/initv46-fix.ts | 7 +- projects/app/src/pages/api/admin/initv46.ts | 2 +- .../app/src/pages/api/admin/initv462-2.ts | 2 +- projects/app/src/pages/api/admin/initv462.ts | 20 +- .../app/src/pages/api/admin/initv463-2.ts | 62 + projects/app/src/pages/api/admin/initv463.ts | 70 +- .../timeTasks/checkUnValidDatasetFiles.ts | 92 + .../app/src/pages/api/common/file/read.ts | 33 +- .../app/src/pages/api/core/chat/chatTest.ts | 6 +- .../src/pages/api/core/dataset/allDataset.ts | 18 +- .../api/core/dataset/collection/create.ts | 74 +- .../collection/{delById.ts => delete.ts} | 31 +- .../pages/api/core/dataset/collection/list.ts | 11 +- .../app/src/pages/api/core/dataset/create.ts | 23 +- .../src/pages/api/core/dataset/data/delete.ts | 4 +- .../app/src/pages/api/core/dataset/delete.ts | 22 +- .../app/src/pages/api/core/dataset/list.ts | 21 +- .../src/pages/api/core/dataset/searchTest.ts | 8 +- .../app/src/pages/api/core/dataset/update.ts | 10 +- .../app/src/pages/api/plugins/urlFetch.ts | 73 - .../support/wallet/bill/createTrainingBill.ts | 4 +- projects/app/src/pages/api/tools/urlFetch.ts | 34 + .../app/src/pages/api/v1/chat/completions.ts | 8 +- projects/app/src/pages/api/v1/embeddings.ts | 35 +- projects/app/src/pages/api/v1/rerank.ts | 12 +- .../pages/app/detail/components/InfoModal.tsx | 4 +- .../pages/app/list/component/CreateModal.tsx | 4 +- .../detail/components/CollectionCard.tsx | 366 ++-- .../dataset/detail/components/DataCard.tsx | 20 +- .../detail/components/Import/FileSelect.tsx | 4 +- .../detail/components/Import/Provider.tsx | 3 + .../components/Import/UrlFetchModal.tsx | 65 +- .../components/Import/WebsiteConfig.tsx | 101 ++ .../pages/dataset/detail/components/Info.tsx | 93 +- .../detail/components/InputDataModal.tsx | 12 +- .../pages/dataset/detail/components/Test.tsx | 6 +- .../app/src/pages/dataset/detail/index.tsx | 26 +- .../dataset/list/component/CreateModal.tsx | 148 +- projects/app/src/pages/dataset/list/index.tsx | 95 +- .../pages/plugin/list/component/EditModal.tsx | 4 +- .../app/src/service/common/api/request.ts | 8 +- projects/app/src/service/core/ai/rerank.ts | 3 +- .../service/core/dataset/data/controller.ts | 29 +- .../app/src/service/core/dataset/data/pg.ts | 27 +- .../app/src/service/core/dataset/utils.ts | 3 +- .../src/service/moduleDispatch/chat/oneapi.ts | 12 +- .../app/src/service/moduleDispatch/index.ts | 23 +- .../service/support/permission/auth/user.ts | 8 +- .../src/service/support/wallet/bill/push.ts | 14 +- projects/app/src/types/core/chat/type.d.ts | 9 +- projects/app/src/utils/tools.ts | 7 - projects/app/src/web/common/api/request.ts | 8 +- .../app/src/web/common/file/controller.ts | 8 +- projects/app/src/web/common/file/utils.ts | 16 +- .../app/src/web/common/hooks/useConfirm.tsx | 117 +- .../app/src/web/common/hooks/useEditTitle.tsx | 42 +- projects/app/src/web/common/plugin/api.ts | 6 - .../app/src/web/common/system/staticData.ts | 2 +- .../src/web/common/system/useSystemStore.ts | 2 +- projects/app/src/web/common/tools/api.ts | 5 + projects/app/src/web/core/app/templates.ts | 2 +- projects/app/src/web/core/dataset/api.ts | 21 +- .../app/src/web/core/dataset/store/dataset.ts | 19 +- projects/app/src/web/core/dataset/utils.ts | 2 +- projects/app/src/web/styles/default.scss | 9 + .../app/src/web/support/wallet/pay/api.ts | 2 +- 122 files changed, 3793 insertions(+), 1360 deletions(-) create mode 100644 docSite/content/docs/installation/upgrading/463.md create mode 100644 packages/global/common/file/api.d.ts delete mode 100644 packages/global/common/plugin/types/pluginRes.d.ts create mode 100644 packages/global/common/string/jieba.ts create mode 100644 packages/global/common/string/markdown.ts create mode 100644 packages/global/common/system/utils.ts create mode 100644 packages/service/core/dataset/collection/controller.ts create mode 100644 packages/service/core/dataset/data/controller.ts create mode 100644 packages/service/core/dataset/data/pg.ts create mode 100644 projects/app/src/components/Icon/icons/common/confirm/commonTip.svg create mode 100644 projects/app/src/components/Icon/icons/common/confirm/deleteTip.svg create mode 100644 projects/app/src/components/Icon/icons/common/routePushLight.svg create mode 100644 projects/app/src/components/Icon/icons/common/viewLight.svg create mode 100644 projects/app/src/components/Icon/icons/core/dataset/commonDataset.svg create mode 100644 projects/app/src/components/Icon/icons/core/dataset/folderDataset.svg create mode 100644 projects/app/src/components/Icon/icons/core/dataset/websiteDataset.svg create mode 100644 projects/app/src/pages/api/admin/initv463-2.ts create mode 100644 projects/app/src/pages/api/admin/timeTasks/checkUnValidDatasetFiles.ts rename projects/app/src/pages/api/core/dataset/collection/{delById.ts => delete.ts} (58%) delete mode 100644 projects/app/src/pages/api/plugins/urlFetch.ts create mode 100644 projects/app/src/pages/api/tools/urlFetch.ts create mode 100644 projects/app/src/pages/dataset/detail/components/Import/WebsiteConfig.tsx delete mode 100644 projects/app/src/web/common/plugin/api.ts create mode 100644 projects/app/src/web/common/tools/api.ts diff --git a/docSite/content/docs/custom-models/chatglm2-m3e.md b/docSite/content/docs/custom-models/chatglm2-m3e.md index daa328f50..1c8ab35c2 100644 --- a/docSite/content/docs/custom-models/chatglm2-m3e.md +++ b/docSite/content/docs/custom-models/chatglm2-m3e.md @@ -59,7 +59,7 @@ Authorization 为 sk-aaabbbcccdddeeefffggghhhiiijjjkkk。model 为刚刚在 One ## 接入 FastGPT -修改 config.json 配置文件,在 VectorModels 中加入 chatglm2 和 M3E 模型: +修改 config.json 配置文件,在 ChatModels 中加入 chatglm2, 在 VectorModels 中加入 M3E 模型: ```json "ChatModels": [ diff --git a/docSite/content/docs/installation/upgrading/463.md b/docSite/content/docs/installation/upgrading/463.md new file mode 100644 index 000000000..d57967378 --- /dev/null +++ b/docSite/content/docs/installation/upgrading/463.md @@ -0,0 +1,32 @@ +--- +title: 'V4.6.3(需要初始化)' +description: 'FastGPT V4.6.3' +icon: 'upgrade' +draft: false +toc: true +weight: 833 +--- + +## 1。执行初始化 API + +发起 1 个 HTTP 请求 ({{rootkey}} 替换成环境变量里的 `rootkey`,{{host}} 替换成自己域名) + +1. https://xxxxx/api/admin/initv463 + +```bash +curl --location --request POST 'https://{{host}}/api/admin/initv463' \ +--header 'rootkey: {{rootkey}}' \ +--header 'Content-Type: application/json' +``` + +初始化说明: +1. 初始化Mongo 中 dataset,collection 和 data 的部分字段 + +## V4.6.3 功能介绍 + +1. 商业版新增 - web站点同步 +2. 新增 - 集合元数据记录 +3. 优化 - url 读取内容 +4. 优化 - 流读取文件,防止内存溢出 +5. 优化 - 4v模型自动将 url 转 base64,本地也可调试 +6. 优化 - 图片压缩等级 diff --git a/packages/global/common/file/api.d.ts b/packages/global/common/file/api.d.ts new file mode 100644 index 000000000..40b2078e6 --- /dev/null +++ b/packages/global/common/file/api.d.ts @@ -0,0 +1,8 @@ +export type UrlFetchParams = { + urlList: string[]; + selector?: string; +}; +export type UrlFetchResponse = { + url: string; + content: string; +}[]; diff --git a/packages/global/common/file/tools.ts b/packages/global/common/file/tools.ts index 7944da451..b7c9e5d86 100644 --- a/packages/global/common/file/tools.ts +++ b/packages/global/common/file/tools.ts @@ -1,3 +1,8 @@ +import axios from 'axios'; +import { UrlFetchParams, UrlFetchResponse } from './api.d'; +import { htmlToMarkdown } from '../string/markdown'; +import * as cheerio from 'cheerio'; + export const formatFileSize = (bytes: number): string => { if (bytes === 0) return '0 B'; @@ -7,3 +12,84 @@ export const formatFileSize = (bytes: number): string => { return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; }; + +export const cheerioToHtml = ({ + fetchUrl, + $, + selector +}: { + fetchUrl: string; + $: cheerio.CheerioAPI; + selector?: string; +}) => { + // get origin url + const originUrl = new URL(fetchUrl).origin; + + // remove i element + $('i,script').remove(); + + // remove empty a element + $('a') + .filter((i, el) => { + return $(el).text().trim() === '' && $(el).children().length === 0; + }) + .remove(); + + // if link,img startWith /, add origin url + $('a').each((i, el) => { + const href = $(el).attr('href'); + if (href && href.startsWith('/')) { + $(el).attr('href', originUrl + href); + } + }); + $('img').each((i, el) => { + const src = $(el).attr('src'); + if (src && src.startsWith('/')) { + $(el).attr('src', originUrl + src); + } + }); + + return $(selector || 'body').html(); +}; +export const urlsFetch = async ({ + urlList, + selector +}: UrlFetchParams): Promise => { + urlList = urlList.filter((url) => /^(http|https):\/\/[^ "]+$/.test(url)); + + const response = ( + await Promise.all( + urlList.map(async (url) => { + try { + const fetchRes = await axios.get(url, { + timeout: 30000 + }); + + const $ = cheerio.load(fetchRes.data); + + const md = htmlToMarkdown( + cheerioToHtml({ + fetchUrl: url, + $, + selector + }) + ); + + return { + url, + content: md + }; + } catch (error) { + console.log(error, 'fetch error'); + + return { + url, + content: '' + }; + } + }) + ) + ).filter((item) => item.content); + + return response; +}; diff --git a/packages/global/common/plugin/types/pluginRes.d.ts b/packages/global/common/plugin/types/pluginRes.d.ts deleted file mode 100644 index e7504d4f0..000000000 --- a/packages/global/common/plugin/types/pluginRes.d.ts +++ /dev/null @@ -1,4 +0,0 @@ -export type FetchResultItem = { - url: string; - content: string; -}; diff --git a/packages/global/common/string/jieba.ts b/packages/global/common/string/jieba.ts new file mode 100644 index 000000000..89c3089f6 --- /dev/null +++ b/packages/global/common/string/jieba.ts @@ -0,0 +1,1508 @@ +export const stopWords = new Set([ + '--', + '?', + '“', + '”', + '》', + '--', + 'able', + 'about', + 'above', + 'according', + 'accordingly', + 'across', + 'actually', + 'after', + 'afterwards', + 'again', + 'against', + "ain't", + 'all', + 'allow', + 'allows', + 'almost', + 'alone', + 'along', + 'already', + 'also', + 'although', + 'always', + 'am', + 'among', + 'amongst', + 'an', + 'and', + 'another', + 'any', + 'anybody', + 'anyhow', + 'anyone', + 'anything', + 'anyway', + 'anyways', + 'anywhere', + 'apart', + 'appear', + 'appreciate', + 'appropriate', + 'are', + "aren't", + 'around', + 'as', + "a's", + 'aside', + 'ask', + 'asking', + 'associated', + 'at', + 'available', + 'away', + 'awfully', + 'be', + 'became', + 'because', + 'become', + 'becomes', + 'becoming', + 'been', + 'before', + 'beforehand', + 'behind', + 'being', + 'believe', + 'below', + 'beside', + 'besides', + 'best', + 'better', + 'between', + 'beyond', + 'both', + 'brief', + 'but', + 'by', + 'came', + 'can', + 'cannot', + 'cant', + "can't", + 'cause', + 'causes', + 'certain', + 'certainly', + 'changes', + 'clearly', + "c'mon", + 'co', + 'com', + 'come', + 'comes', + 'concerning', + 'consequently', + 'consider', + 'considering', + 'contain', + 'containing', + 'contains', + 'corresponding', + 'could', + "couldn't", + 'course', + "c's", + 'currently', + 'definitely', + 'described', + 'despite', + 'did', + "didn't", + 'different', + 'do', + 'does', + "doesn't", + 'doing', + 'done', + "don't", + 'down', + 'downwards', + 'during', + 'each', + 'edu', + 'eg', + 'eight', + 'either', + 'else', + 'elsewhere', + 'enough', + 'entirely', + 'especially', + 'et', + 'etc', + 'even', + 'ever', + 'every', + 'everybody', + 'everyone', + 'everything', + 'everywhere', + 'ex', + 'exactly', + 'example', + 'except', + 'far', + 'few', + 'fifth', + 'first', + 'five', + 'followed', + 'following', + 'follows', + 'for', + 'former', + 'formerly', + 'forth', + 'four', + 'from', + 'further', + 'furthermore', + 'get', + 'gets', + 'getting', + 'given', + 'gives', + 'go', + 'goes', + 'going', + 'gone', + 'got', + 'gotten', + 'greetings', + 'had', + "hadn't", + 'happens', + 'hardly', + 'has', + "hasn't", + 'have', + "haven't", + 'having', + 'he', + 'hello', + 'help', + 'hence', + 'her', + 'here', + 'hereafter', + 'hereby', + 'herein', + "here's", + 'hereupon', + 'hers', + 'herself', + "he's", + 'hi', + 'him', + 'himself', + 'his', + 'hither', + 'hopefully', + 'how', + 'howbeit', + 'however', + "i'd", + 'ie', + 'if', + 'ignored', + "i'll", + "i'm", + 'immediate', + 'in', + 'inasmuch', + 'inc', + 'indeed', + 'indicate', + 'indicated', + 'indicates', + 'inner', + 'insofar', + 'instead', + 'into', + 'inward', + 'is', + "isn't", + 'it', + "it'd", + "it'll", + 'its', + "it's", + 'itself', + "i've", + 'just', + 'keep', + 'keeps', + 'kept', + 'know', + 'known', + 'knows', + 'last', + 'lately', + 'later', + 'latter', + 'latterly', + 'least', + 'less', + 'lest', + 'let', + "let's", + 'like', + 'liked', + 'likely', + 'little', + 'look', + 'looking', + 'looks', + 'ltd', + 'mainly', + 'many', + 'may', + 'maybe', + 'me', + 'mean', + 'meanwhile', + 'merely', + 'might', + 'more', + 'moreover', + 'most', + 'mostly', + 'much', + 'must', + 'my', + 'myself', + 'name', + 'namely', + 'nd', + 'near', + 'nearly', + 'necessary', + 'need', + 'needs', + 'neither', + 'never', + 'nevertheless', + 'new', + 'next', + 'nine', + 'no', + 'nobody', + 'non', + 'none', + 'noone', + 'nor', + 'normally', + 'not', + 'nothing', + 'novel', + 'now', + 'nowhere', + 'obviously', + 'of', + 'off', + 'often', + 'oh', + 'ok', + 'okay', + 'old', + 'on', + 'once', + 'one', + 'ones', + 'only', + 'onto', + 'or', + 'other', + 'others', + 'otherwise', + 'ought', + 'our', + 'ours', + 'ourselves', + 'out', + 'outside', + 'over', + 'overall', + 'own', + 'particular', + 'particularly', + 'per', + 'perhaps', + 'placed', + 'please', + 'plus', + 'possible', + 'presumably', + 'probably', + 'provides', + 'que', + 'quite', + 'qv', + 'rather', + 'rd', + 're', + 'really', + 'reasonably', + 'regarding', + 'regardless', + 'regards', + 'relatively', + 'respectively', + 'right', + 'said', + 'same', + 'saw', + 'say', + 'saying', + 'says', + 'second', + 'secondly', + 'see', + 'seeing', + 'seem', + 'seemed', + 'seeming', + 'seems', + 'seen', + 'self', + 'selves', + 'sensible', + 'sent', + 'serious', + 'seriously', + 'seven', + 'several', + 'shall', + 'she', + 'should', + "shouldn't", + 'since', + 'six', + 'so', + 'some', + 'somebody', + 'somehow', + 'someone', + 'something', + 'sometime', + 'sometimes', + 'somewhat', + 'somewhere', + 'soon', + 'sorry', + 'specified', + 'specify', + 'specifying', + 'still', + 'sub', + 'such', + 'sup', + 'sure', + 'take', + 'taken', + 'tell', + 'tends', + 'th', + 'than', + 'thank', + 'thanks', + 'thanx', + 'that', + 'thats', + "that's", + 'the', + 'their', + 'theirs', + 'them', + 'themselves', + 'then', + 'thence', + 'there', + 'thereafter', + 'thereby', + 'therefore', + 'therein', + 'theres', + "there's", + 'thereupon', + 'these', + 'they', + "they'd", + "they'll", + "they're", + "they've", + 'think', + 'third', + 'this', + 'thorough', + 'thoroughly', + 'those', + 'though', + 'three', + 'through', + 'throughout', + 'thru', + 'thus', + 'to', + 'together', + 'too', + 'took', + 'toward', + 'towards', + 'tried', + 'tries', + 'truly', + 'try', + 'trying', + "t's", + 'twice', + 'two', + 'un', + 'under', + 'unfortunately', + 'unless', + 'unlikely', + 'until', + 'unto', + 'up', + 'upon', + 'us', + 'use', + 'used', + 'useful', + 'uses', + 'using', + 'usually', + 'value', + 'various', + 'very', + 'via', + 'viz', + 'vs', + 'want', + 'wants', + 'was', + "wasn't", + 'way', + 'we', + "we'd", + 'welcome', + 'well', + "we'll", + 'went', + 'were', + "we're", + "weren't", + "we've", + 'what', + 'whatever', + "what's", + 'when', + 'whence', + 'whenever', + 'where', + 'whereafter', + 'whereas', + 'whereby', + 'wherein', + "where's", + 'whereupon', + 'wherever', + 'whether', + 'which', + 'while', + 'whither', + 'who', + 'whoever', + 'whole', + 'whom', + "who's", + 'whose', + 'why', + 'will', + 'willing', + 'wish', + 'with', + 'within', + 'without', + 'wonder', + "won't", + 'would', + "wouldn't", + 'yes', + 'yet', + 'you', + "you'd", + "you'll", + 'your', + "you're", + 'yours', + 'yourself', + 'yourselves', + "you've", + 'zero', + 'zt', + 'ZT', + 'zz', + 'ZZ', + '一', + '一下', + '一些', + '一切', + '一则', + '一天', + '一定', + '一方面', + '一旦', + '一时', + '一来', + '一样', + '一次', + '一片', + '一直', + '一致', + '一般', + '一起', + '一边', + '一面', + '万一', + '上下', + '上升', + '上去', + '上来', + '上述', + '上面', + '下列', + '下去', + '下来', + '下面', + '不一', + '不久', + '不仅', + '不会', + '不但', + '不光', + '不单', + '不变', + '不只', + '不可', + '不同', + '不够', + '不如', + '不得', + '不怕', + '不惟', + '不成', + '不拘', + '不敢', + '不断', + '不是', + '不比', + '不然', + '不特', + '不独', + '不管', + '不能', + '不要', + '不论', + '不足', + '不过', + '不问', + '与', + '与其', + '与否', + '与此同时', + '专门', + '且', + '两者', + '严格', + '严重', + '个', + '个人', + '个别', + '中小', + '中间', + '丰富', + '临', + '为', + '为主', + '为了', + '为什么', + '为什麽', + '为何', + '为着', + '主张', + '主要', + '举行', + '乃', + '乃至', + '么', + '之', + '之一', + '之前', + '之后', + '之後', + '之所以', + '之类', + '乌乎', + '乎', + '乘', + '也', + '也好', + '也是', + '也罢', + '了', + '了解', + '争取', + '于', + '于是', + '于是乎', + '云云', + '互相', + '产生', + '人们', + '人家', + '什么', + '什么样', + '什麽', + '今后', + '今天', + '今年', + '今後', + '仍然', + '从', + '从事', + '从而', + '他', + '他人', + '他们', + '他的', + '代替', + '以', + '以上', + '以下', + '以为', + '以便', + '以免', + '以前', + '以及', + '以后', + '以外', + '以後', + '以来', + '以至', + '以至于', + '以致', + '们', + '任', + '任何', + '任凭', + '任务', + '企图', + '伟大', + '似乎', + '似的', + '但', + '但是', + '何', + '何况', + '何处', + '何时', + '作为', + '你', + '你们', + '你的', + '使得', + '使用', + '例如', + '依', + '依照', + '依靠', + '促进', + '保持', + '俺', + '俺们', + '倘', + '倘使', + '倘或', + '倘然', + '倘若', + '假使', + '假如', + '假若', + '做到', + '像', + '允许', + '充分', + '先后', + '先後', + '先生', + '全部', + '全面', + '兮', + '共同', + '关于', + '其', + '其一', + '其中', + '其二', + '其他', + '其余', + '其它', + '其实', + '其次', + '具体', + '具体地说', + '具体说来', + '具有', + '再者', + '再说', + '冒', + '冲', + '决定', + '况且', + '准备', + '几', + '几乎', + '几时', + '凭', + '凭借', + '出去', + '出来', + '出现', + '分别', + '则', + '别', + '别的', + '别说', + '到', + '前后', + '前者', + '前进', + '前面', + '加之', + '加以', + '加入', + '加强', + '十分', + '即', + '即令', + '即使', + '即便', + '即或', + '即若', + '却不', + '原来', + '又', + '及', + '及其', + '及时', + '及至', + '双方', + '反之', + '反应', + '反映', + '反过来', + '反过来说', + '取得', + '受到', + '变成', + '另', + '另一方面', + '另外', + '只是', + '只有', + '只要', + '只限', + '叫', + '叫做', + '召开', + '叮咚', + '可', + '可以', + '可是', + '可能', + '可见', + '各', + '各个', + '各人', + '各位', + '各地', + '各种', + '各级', + '各自', + '合理', + '同', + '同一', + '同时', + '同样', + '后来', + '后面', + '向', + '向着', + '吓', + '吗', + '否则', + '吧', + '吧哒', + '吱', + '呀', + '呃', + '呕', + '呗', + '呜', + '呜呼', + '呢', + '周围', + '呵', + '呸', + '呼哧', + '咋', + '和', + '咚', + '咦', + '咱', + '咱们', + '咳', + '哇', + '哈', + '哈哈', + '哉', + '哎', + '哎呀', + '哎哟', + '哗', + '哟', + '哦', + '哩', + '哪', + '哪个', + '哪些', + '哪儿', + '哪天', + '哪年', + '哪怕', + '哪样', + '哪边', + '哪里', + '哼', + '哼唷', + '唉', + '啊', + '啐', + '啥', + '啦', + '啪达', + '喂', + '喏', + '喔唷', + '嗡嗡', + '嗬', + '嗯', + '嗳', + '嘎', + '嘎登', + '嘘', + '嘛', + '嘻', + '嘿', + '因', + '因为', + '因此', + '因而', + '固然', + '在', + '在下', + '地', + '坚决', + '坚持', + '基本', + '处理', + '复杂', + '多', + '多少', + '多数', + '多次', + '大力', + '大多数', + '大大', + '大家', + '大批', + '大约', + '大量', + '失去', + '她', + '她们', + '她的', + '好的', + '好象', + '如', + '如上所述', + '如下', + '如何', + '如其', + '如果', + '如此', + '如若', + '存在', + '宁', + '宁可', + '宁愿', + '宁肯', + '它', + '它们', + '它们的', + '它的', + '安全', + '完全', + '完成', + '实现', + '实际', + '宣布', + '容易', + '密切', + '对', + '对于', + '对应', + '将', + '少数', + '尔后', + '尚且', + '尤其', + '就', + '就是', + '就是说', + '尽', + '尽管', + '属于', + '岂但', + '左右', + '巨大', + '巩固', + '己', + '已经', + '帮助', + '常常', + '并', + '并不', + '并不是', + '并且', + '并没有', + '广大', + '广泛', + '应当', + '应用', + '应该', + '开外', + '开始', + '开展', + '引起', + '强烈', + '强调', + '归', + '当', + '当前', + '当时', + '当然', + '当着', + '形成', + '彻底', + '彼', + '彼此', + '往', + '往往', + '待', + '後来', + '後面', + '得', + '得出', + '得到', + '心里', + '必然', + '必要', + '必须', + '怎', + '怎么', + '怎么办', + '怎么样', + '怎样', + '怎麽', + '总之', + '总是', + '总的来看', + '总的来说', + '总的说来', + '总结', + '总而言之', + '恰恰相反', + '您', + '意思', + '愿意', + '慢说', + '成为', + '我', + '我们', + '我的', + '或', + '或是', + '或者', + '战斗', + '所', + '所以', + '所有', + '所谓', + '打', + '扩大', + '把', + '抑或', + '拿', + '按', + '按照', + '换句话说', + '换言之', + '据', + '掌握', + '接着', + '接著', + '故', + '故此', + '整个', + '方便', + '方面', + '旁人', + '无宁', + '无法', + '无论', + '既', + '既是', + '既然', + '时候', + '明显', + '明确', + '是', + '是否', + '是的', + '显然', + '显著', + '普通', + '普遍', + '更加', + '曾经', + '替', + '最后', + '最大', + '最好', + '最後', + '最近', + '最高', + '有', + '有些', + '有关', + '有利', + '有力', + '有所', + '有效', + '有时', + '有点', + '有的', + '有着', + '有著', + '望', + '朝', + '朝着', + '本', + '本着', + '来', + '来着', + '极了', + '构成', + '果然', + '果真', + '某', + '某个', + '某些', + '根据', + '根本', + '欢迎', + '正在', + '正如', + '正常', + '此', + '此外', + '此时', + '此间', + '毋宁', + '每', + '每个', + '每天', + '每年', + '每当', + '比', + '比如', + '比方', + '比较', + '毫不', + '没有', + '沿', + '沿着', + '注意', + '深入', + '清楚', + '满足', + '漫说', + '焉', + '然则', + '然后', + '然後', + '然而', + '照', + '照着', + '特别是', + '特殊', + '特点', + '现代', + '现在', + '甚么', + '甚而', + '甚至', + '用', + '由', + '由于', + '由此可见', + '的', + '的话', + '目前', + '直到', + '直接', + '相似', + '相信', + '相反', + '相同', + '相对', + '相对而言', + '相应', + '相当', + '相等', + '省得', + '看出', + '看到', + '看来', + '看看', + '看见', + '真是', + '真正', + '着', + '着呢', + '矣', + '知道', + '确定', + '离', + '积极', + '移动', + '突出', + '突然', + '立即', + '第', + '等', + '等等', + '管', + '紧接着', + '纵', + '纵令', + '纵使', + '纵然', + '练习', + '组成', + '经', + '经常', + '经过', + '结合', + '结果', + '给', + '绝对', + '继续', + '继而', + '维持', + '综上所述', + '罢了', + '考虑', + '者', + '而', + '而且', + '而况', + '而外', + '而已', + '而是', + '而言', + '联系', + '能', + '能否', + '能够', + '腾', + '自', + '自个儿', + '自从', + '自各儿', + '自家', + '自己', + '自身', + '至', + '至于', + '良好', + '若', + '若是', + '若非', + '范围', + '莫若', + '获得', + '虽', + '虽则', + '虽然', + '虽说', + '行为', + '行动', + '表明', + '表示', + '被', + '要', + '要不', + '要不是', + '要不然', + '要么', + '要是', + '要求', + '规定', + '觉得', + '认为', + '认真', + '认识', + '让', + '许多', + '论', + '设使', + '设若', + '该', + '说明', + '诸位', + '谁', + '谁知', + '赶', + '起', + '起来', + '起见', + '趁', + '趁着', + '越是', + '跟', + '转动', + '转变', + '转贴', + '较', + '较之', + '边', + '达到', + '迅速', + '过', + '过去', + '过来', + '运用', + '还是', + '还有', + '这', + '这个', + '这么', + '这么些', + '这么样', + '这么点儿', + '这些', + '这会儿', + '这儿', + '这就是说', + '这时', + '这样', + '这点', + '这种', + '这边', + '这里', + '这麽', + '进入', + '进步', + '进而', + '进行', + '连', + '连同', + '适应', + '适当', + '适用', + '逐步', + '逐渐', + '通常', + '通过', + '造成', + '遇到', + '遭到', + '避免', + '那', + '那个', + '那么', + '那么些', + '那么样', + '那些', + '那会儿', + '那儿', + '那时', + '那样', + '那边', + '那里', + '那麽', + '部分', + '鄙人', + '采取', + '里面', + '重大', + '重新', + '重要', + '鉴于', + '问题', + '防止', + '阿', + '附近', + '限制', + '除', + '除了', + '除此之外', + '除非', + '随', + '随着', + '随著', + '集中', + '需要', + '非但', + '非常', + '非徒', + '靠', + '顺', + '顺着', + '首先', + '高兴', + '是不是', + '说说', + ' ', + [ + 'about', + 'after', + 'all', + 'also', + 'am', + 'an', + 'and', + 'another', + 'any', + 'are', + 'as', + 'at', + 'be', + 'because', + 'been', + 'before', + 'being', + 'between', + 'both', + 'but', + 'by', + 'came', + 'can', + 'come', + 'could', + 'did', + 'do', + 'each', + 'for', + 'from', + 'get', + 'got', + 'has', + 'had', + 'he', + 'have', + 'her', + 'here', + 'him', + 'himself', + 'his', + 'how', + 'if', + 'in', + 'into', + 'is', + 'it', + 'like', + 'make', + 'many', + 'me', + 'might', + 'more', + 'most', + 'much', + 'must', + 'my', + 'never', + 'now', + 'of', + 'on', + 'only', + 'or', + 'other', + 'our', + 'out', + 'over', + 'said', + 'same', + 'should', + 'since', + 'some', + 'still', + 'such', + 'take', + 'than', + 'that', + 'the', + 'their', + 'them', + 'then', + 'there', + 'these', + 'they', + 'this', + 'those', + 'through', + 'to', + 'too', + 'under', + 'up', + 'very', + 'was', + 'way', + 'we', + 'well', + 'were', + 'what', + 'where', + 'which', + 'while', + 'who', + 'with', + 'would', + 'you', + 'your', + 'a', + 'i' + ] +]); diff --git a/packages/global/common/string/markdown.ts b/packages/global/common/string/markdown.ts new file mode 100644 index 000000000..5eb3c5518 --- /dev/null +++ b/packages/global/common/string/markdown.ts @@ -0,0 +1,97 @@ +import { simpleText } from './tools'; +import { NodeHtmlMarkdown } from 'node-html-markdown'; + +/* Delete redundant text in markdown */ +export const simpleMarkdownText = (rawText: string) => { + rawText = simpleText(rawText); + + // Remove a line feed from a hyperlink or picture + rawText = rawText.replace(/\[([^\]]+)\]\((.+?)\)/g, (match, linkText, url) => { + const cleanedLinkText = linkText.replace(/\n/g, ' ').trim(); + + if (!url) { + return ''; + } + + return `[${cleanedLinkText}](${url})`; + }); + + // replace special \.* …… + const reg1 = /\\([-.!`_(){}\[\]])/g; + if (reg1.test(rawText)) { + rawText = rawText.replace(/\\([`!*()+-_\[\]{}\\.])/g, '$1'); + } + + // replace \\n + rawText = rawText.replace(/\\\\n/g, '\\n'); + + // Remove headings and code blocks front spaces + ['####', '###', '##', '#', '```', '~~~'].forEach((item) => { + const reg = new RegExp(`\\n\\s*${item}`, 'g'); + if (reg.test(rawText)) { + rawText = rawText.replace(new RegExp(`\\n\\s*(${item})`, 'g'), '\n$1'); + } + }); + + return rawText.trim(); +}; + +/* html string to markdown */ +export const htmlToMarkdown = (html?: string | null) => { + if (!html) return ''; + + const surround = (source: string, surroundStr: string) => `${surroundStr}${source}${surroundStr}`; + + const nhm = new NodeHtmlMarkdown( + { + codeFence: '```', + codeBlockStyle: 'fenced', + ignore: ['i', 'script'] + }, + { + code: ({ node, parent, options: { codeFence, codeBlockStyle }, visitor }) => { + const isCodeBlock = ['PRE', 'WRAPPED-PRE'].includes(parent?.tagName!); + + if (!isCodeBlock) { + return { + spaceIfRepeatingChar: true, + noEscape: true, + postprocess: ({ content }) => { + // Find longest occurring sequence of running backticks and add one more (so content is escaped) + const delimiter = + '`' + (content.match(/`+/g)?.sort((a, b) => b.length - a.length)?.[0] || ''); + const padding = delimiter.length > 1 ? ' ' : ''; + + return surround(surround(content, padding), delimiter); + } + }; + } + + /* Handle code block */ + if (codeBlockStyle === 'fenced') { + const language = + node.getAttribute('class')?.match(/language-(\S+)/)?.[1] || + parent?.getAttribute('class')?.match(/language-(\S+)/)?.[1] || + ''; + + return { + noEscape: true, + prefix: `${codeFence}${language}\n`, + postfix: `\n${codeFence}\n`, + childTranslators: visitor.instance.codeBlockTranslators + }; + } + + return { + noEscape: true, + postprocess: ({ content }) => content.replace(/^/gm, ' '), + childTranslators: visitor.instance.codeBlockTranslators + }; + } + } + ); + + const markdown = nhm.translate(html).trim(); + + return simpleMarkdownText(markdown); +}; diff --git a/packages/global/common/string/textSplitter.ts b/packages/global/common/string/textSplitter.ts index b3b3a1b4c..59e7b5f91 100644 --- a/packages/global/common/string/textSplitter.ts +++ b/packages/global/common/string/textSplitter.ts @@ -15,11 +15,18 @@ export const splitText2Chunks = (props: { }): { chunks: string[]; tokens: number; + overlapRatio?: number; } => { - const { text = '', chunkLen, overlapRatio = 0.2 } = props; + let { text = '', chunkLen, overlapRatio = 0.2 } = props; const splitMarker = 'SPLIT_HERE_SPLIT_HERE'; + const codeBlockMarker = 'CODE_BLOCK_LINE_MARKER'; const overlapLen = Math.round(chunkLen * overlapRatio); + // replace code block all \n to codeBlockMarker + text = text.replace(/(```[\s\S]*?```|~~~[\s\S]*?~~~)/g, function (match) { + return match.replace(/\n/g, codeBlockMarker); + }); + // The larger maxLen is, the next sentence is less likely to trigger splitting const stepReges: { reg: RegExp; maxLen: number }[] = [ { reg: /^(#\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 }, @@ -27,8 +34,8 @@ export const splitText2Chunks = (props: { { reg: /^(###\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 }, { reg: /^(####\s[^\n]+)\n/gm, maxLen: chunkLen * 1.4 }, - { reg: /([\n]{2})/g, maxLen: chunkLen * 1.4 }, - { reg: /([\n](?![\*\-|>`0-9]))/g, maxLen: chunkLen * 1.8 }, // (?![\*\-|>`0-9]): markdown special char + { reg: /([\n](`))/g, maxLen: chunkLen * 4 }, // code block + { reg: /([\n](?![\*\-|>0-9]))/g, maxLen: chunkLen * 1.8 }, // (?![\*\-|>`0-9]): markdown special char { reg: /([\n])/g, maxLen: chunkLen * 1.4 }, { reg: /([。]|([a-zA-Z])\.\s)/g, maxLen: chunkLen * 1.4 }, @@ -38,9 +45,15 @@ export const splitText2Chunks = (props: { { reg: /([,]|,\s)/g, maxLen: chunkLen * 2 } ]; + // if use markdown title split, Separate record title title const getSplitTexts = ({ text, step }: { text: string; step: number }) => { if (step >= stepReges.length) { - return [text]; + return [ + { + text, + title: '' + } + ]; } const isMarkdownSplit = step <= 3; const { reg } = stepReges[step]; @@ -49,7 +62,17 @@ export const splitText2Chunks = (props: { .replace(reg, isMarkdownSplit ? `${splitMarker}$1` : `$1${splitMarker}`) .split(`${splitMarker}`) .filter((part) => part.trim()); - return splitTexts; + + return splitTexts + .map((text) => { + const matchTitle = isMarkdownSplit ? text.match(reg)?.[0] || '' : ''; + + return { + text: isMarkdownSplit ? text.replace(matchTitle, '') : text, + title: matchTitle + }; + }) + .filter((item) => item.text.trim()); }; const getOneTextOverlapText = ({ text, step }: { text: string; step: number }): string => { @@ -63,7 +86,7 @@ export const splitText2Chunks = (props: { let overlayText = ''; for (let i = splitTexts.length - 1; i >= 0; i--) { - const currentText = splitTexts[i]; + const currentText = splitTexts[i].text; const newText = currentText + overlayText; const newTextLen = newText.length; @@ -83,12 +106,16 @@ export const splitText2Chunks = (props: { const splitTextRecursively = ({ text = '', step, - lastText + lastText, + mdTitle = '' }: { text: string; step: number; lastText: string; + mdTitle: string; }): string[] => { + const isMarkdownSplit = step <= 3; + // mini text if (text.length <= chunkLen) { return [text]; @@ -102,7 +129,7 @@ export const splitText2Chunks = (props: { // use slice-chunkLen to split text const chunks: string[] = []; for (let i = 0; i < text.length; i += chunkLen - overlapLen) { - chunks.push(text.slice(i, i + chunkLen)); + chunks.push(`${mdTitle}${text.slice(i, i + chunkLen)}`); } return chunks; } @@ -115,7 +142,10 @@ export const splitText2Chunks = (props: { const chunks: string[] = []; for (let i = 0; i < splitTexts.length; i++) { - const currentText = splitTexts[i]; + const item = splitTexts[i]; + const currentTitle = `${mdTitle}${item.title}`; + + const currentText = item.text; const currentTextLen = currentText.length; const lastTextLen = lastText.length; const newText = lastText + currentText; @@ -125,9 +155,10 @@ export const splitText2Chunks = (props: { if (newTextLen > maxLen) { // lastText greater minChunkLen, direct push it to chunks, not add to next chunk. (large lastText) if (lastTextLen > minChunkLen) { - chunks.push(lastText); + chunks.push(`${currentTitle}${lastText}`); lastText = getOneTextOverlapText({ text: lastText, step }); // next chunk will start with overlayText i--; + continue; } @@ -135,11 +166,12 @@ export const splitText2Chunks = (props: { const innerChunks = splitTextRecursively({ text: newText, step: step + 1, - lastText: '' + lastText: '', + mdTitle: currentTitle }); const lastChunk = innerChunks[innerChunks.length - 1]; // last chunk is too small, concat it to lastText - if (lastChunk.length < minChunkLen) { + if (!isMarkdownSplit && lastChunk.length < minChunkLen) { chunks.push(...innerChunks.slice(0, -1)); lastText = lastChunk; } else { @@ -156,10 +188,11 @@ export const splitText2Chunks = (props: { // size less than chunkLen, push text to last chunk. now, text definitely less than maxLen lastText = newText; - // If the chunk size reaches, add a chunk - if (newTextLen >= chunkLen) { - chunks.push(lastText); - lastText = getOneTextOverlapText({ text: lastText, step }); + // markdown paragraph block: Direct addition; If the chunk size reaches, add a chunk + if (isMarkdownSplit || newTextLen >= chunkLen) { + chunks.push(`${currentTitle}${lastText}`); + + lastText = isMarkdownSplit ? '' : getOneTextOverlapText({ text: lastText, step }); } } @@ -168,7 +201,7 @@ export const splitText2Chunks = (props: { if (lastText.length < chunkLen * 0.4) { chunks[chunks.length - 1] = chunks[chunks.length - 1] + lastText; } else { - chunks.push(lastText); + chunks.push(`${mdTitle}${lastText}`); } } @@ -179,8 +212,9 @@ export const splitText2Chunks = (props: { const chunks = splitTextRecursively({ text, step: 0, - lastText: '' - }); + lastText: '', + mdTitle: '' + }).map((chunk) => chunk.replaceAll(codeBlockMarker, '\n')); // restore code block const tokens = chunks.reduce((sum, chunk) => sum + countPromptTokens(chunk, 'system'), 0); diff --git a/packages/global/common/system/utils.ts b/packages/global/common/system/utils.ts new file mode 100644 index 000000000..a6ca74d1b --- /dev/null +++ b/packages/global/common/system/utils.ts @@ -0,0 +1,6 @@ +export const delay = (ms: number) => + new Promise((resolve) => { + setTimeout(() => { + resolve(''); + }, ms); + }); diff --git a/packages/global/core/chat/constants.ts b/packages/global/core/chat/constants.ts index 8a9213a4c..3e490abdc 100644 --- a/packages/global/core/chat/constants.ts +++ b/packages/global/core/chat/constants.ts @@ -5,7 +5,6 @@ export enum ChatRoleEnum { Function = 'Function', Tool = 'Tool' } - export const ChatRoleMap = { [ChatRoleEnum.System]: { name: '系统提示词' @@ -30,7 +29,6 @@ export enum ChatSourceEnum { share = 'share', api = 'api' } - export const ChatSourceMap = { [ChatSourceEnum.test]: { name: 'chat.logs.test' diff --git a/packages/global/core/dataset/api.d.ts b/packages/global/core/dataset/api.d.ts index 4d5049389..e1906b725 100644 --- a/packages/global/core/dataset/api.d.ts +++ b/packages/global/core/dataset/api.d.ts @@ -1,8 +1,32 @@ -import { DatasetDataIndexItemType } from './type'; +import { DatasetDataIndexItemType, DatasetSchemaType } from './type'; +import { DatasetCollectionTrainingModeEnum, DatasetCollectionTypeEnum } from './constant'; +import type { LLMModelItemType } from '../ai/model.d'; /* ================= dataset ===================== */ +export type DatasetUpdateBody = { + id: string; + parentId?: string; + tags?: string[]; + name?: string; + avatar?: string; + permission?: DatasetSchemaType['permission']; + agentModel?: LLMModelItemType; + websiteConfig?: DatasetSchemaType['websiteConfig']; + status?: DatasetSchemaType['status']; +}; /* ================= collection ===================== */ +export type CreateDatasetCollectionParams = { + datasetId: string; + parentId?: string; + name: string; + type: `${DatasetCollectionTypeEnum}`; + trainingType?: `${DatasetCollectionTrainingModeEnum}`; + chunkSize?: number; + fileId?: string; + rawLink?: string; + metadata?: Record; +}; /* ================= data ===================== */ export type PgSearchRawType = { @@ -18,3 +42,8 @@ export type PushDatasetDataChunkProps = { a?: string; // bonus content indexes?: Omit[]; }; + +export type PostWebsiteSyncParams = { + datasetId: string; + billId: string; +}; diff --git a/packages/global/core/dataset/constant.ts b/packages/global/core/dataset/constant.ts index 560c23b97..4dfb618d8 100644 --- a/packages/global/core/dataset/constant.ts +++ b/packages/global/core/dataset/constant.ts @@ -3,15 +3,37 @@ export const PgDatasetTableName = 'modeldata'; /* ------------ dataset -------------- */ export enum DatasetTypeEnum { folder = 'folder', - dataset = 'dataset' + dataset = 'dataset', + websiteDataset = 'websiteDataset' // depp link } - export const DatasetTypeMap = { [DatasetTypeEnum.folder]: { - name: 'folder' + icon: 'core/dataset/folderDataset', + label: 'core.dataset.Folder Dataset', + collectionLabel: 'common.Folder' }, [DatasetTypeEnum.dataset]: { - name: 'dataset' + icon: 'core/dataset/commonDataset', + label: 'core.dataset.Common Dataset', + collectionLabel: 'common.File' + }, + [DatasetTypeEnum.websiteDataset]: { + icon: 'core/dataset/websiteDataset', + label: 'core.dataset.Website Dataset', + collectionLabel: 'common.Website' + } +}; + +export enum DatasetStatusEnum { + active = 'active', + syncing = 'syncing' +} +export const DatasetStatusMap = { + [DatasetStatusEnum.active]: { + label: 'core.dataset.status.active' + }, + [DatasetStatusEnum.syncing]: { + label: 'core.dataset.status.syncing' } }; @@ -19,7 +41,7 @@ export const DatasetTypeMap = { export enum DatasetCollectionTypeEnum { folder = 'folder', file = 'file', - link = 'link', + link = 'link', // one link virtual = 'virtual' } export const DatasetCollectionTypeMap = { diff --git a/packages/global/core/dataset/type.d.ts b/packages/global/core/dataset/type.d.ts index be6b04d9e..2719bc5a9 100644 --- a/packages/global/core/dataset/type.d.ts +++ b/packages/global/core/dataset/type.d.ts @@ -4,6 +4,7 @@ import { PushDatasetDataChunkProps } from './api'; import { DatasetCollectionTypeEnum, DatasetDataIndexTypeEnum, + DatasetStatusEnum, DatasetTypeEnum, TrainingModeEnum } from './constant'; @@ -20,9 +21,14 @@ export type DatasetSchemaType = { name: string; vectorModel: string; agentModel: string; - tags: string[]; + intro: string; type: `${DatasetTypeEnum}`; + status: `${DatasetStatusEnum}`; permission: `${PermissionTypeEnum}`; + websiteConfig?: { + url: string; + selector: string; + }; }; export type DatasetCollectionSchemaType = { @@ -39,6 +45,7 @@ export type DatasetCollectionSchemaType = { chunkSize: number; fileId?: string; rawLink?: string; + metadata?: Record; }; export type DatasetDataIndexItemType = { @@ -91,6 +98,18 @@ export type DatasetDataWithCollectionType = Omit & { vectorModel: VectorModelItemType; agentModel: LLMModelItemType; diff --git a/packages/global/package.json b/packages/global/package.json index a4acf2986..a8b6dbbe8 100644 --- a/packages/global/package.json +++ b/packages/global/package.json @@ -3,13 +3,16 @@ "version": "1.0.0", "dependencies": { "axios": "^1.5.1", - "timezones-list": "^3.0.2", + "cheerio": "1.0.0-rc.12", "dayjs": "^1.11.7", "encoding": "^0.1.13", + "js-tiktoken": "^1.0.7", + "node-html-markdown": "^1.3.0", "openai": "^4.16.1", - "js-tiktoken": "^1.0.7" + "timezones-list": "^3.0.2" }, "devDependencies": { - "@types/node": "^20.8.5" + "@types/node": "^20.8.5", + "@types/turndown": "^5.0.4" } } diff --git a/packages/global/support/wallet/bill/tools.ts b/packages/global/support/wallet/bill/tools.ts index 50db285a7..50efa74ee 100644 --- a/packages/global/support/wallet/bill/tools.ts +++ b/packages/global/support/wallet/bill/tools.ts @@ -1,6 +1,7 @@ /* bill common */ import { PRICE_SCALE } from './constants'; -import { BillItemType, BillSchema } from './type'; +import { BillSourceEnum } from './constants'; +import { AuthUserTypeEnum } from '../../permission/constant'; /** * dataset price / PRICE_SCALE = real price @@ -8,3 +9,15 @@ import { BillItemType, BillSchema } from './type'; export const formatPrice = (val = 0, multiple = 1) => { return Number(((val / PRICE_SCALE) * multiple).toFixed(10)); }; + +export const getBillSourceByAuthType = ({ + shareId, + authType +}: { + shareId?: string; + authType?: `${AuthUserTypeEnum}`; +}) => { + if (shareId) return BillSourceEnum.shareLink; + if (authType === AuthUserTypeEnum.apikey) return BillSourceEnum.api; + return BillSourceEnum.fastgpt; +}; diff --git a/packages/service/common/api/plusRequest.ts b/packages/service/common/api/plusRequest.ts index 6aba9fe4e..1f66ee242 100644 --- a/packages/service/common/api/plusRequest.ts +++ b/packages/service/common/api/plusRequest.ts @@ -101,18 +101,18 @@ export function request(url: string, data: any, config: ConfigType, method: Meth * @param {Object} config * @returns */ -export function GET(url: string, params = {}, config: ConfigType = {}): Promise { +export function GET(url: string, params = {}, config: ConfigType = {}): Promise { return request(url, params, config, 'GET'); } -export function POST(url: string, data = {}, config: ConfigType = {}): Promise { +export function POST(url: string, data = {}, config: ConfigType = {}): Promise { return request(url, data, config, 'POST'); } -export function PUT(url: string, data = {}, config: ConfigType = {}): Promise { +export function PUT(url: string, data = {}, config: ConfigType = {}): Promise { return request(url, data, config, 'PUT'); } -export function DELETE(url: string, data = {}, config: ConfigType = {}): Promise { +export function DELETE(url: string, data = {}, config: ConfigType = {}): Promise { return request(url, data, config, 'DELETE'); } diff --git a/packages/service/common/file/gridfs/controller.ts b/packages/service/common/file/gridfs/controller.ts index 17c6a627a..717491792 100644 --- a/packages/service/common/file/gridfs/controller.ts +++ b/packages/service/common/file/gridfs/controller.ts @@ -89,7 +89,7 @@ export async function delFileById({ return true; } -export async function getDownloadBuf({ +export async function getDownloadStream({ bucketName, fileId }: { @@ -98,14 +98,5 @@ export async function getDownloadBuf({ }) { const bucket = getGridBucket(bucketName); - const stream = bucket.openDownloadStream(new Types.ObjectId(fileId)); - - const buf: Buffer = await new Promise((resolve, reject) => { - const buffers: Buffer[] = []; - stream.on('data', (data) => buffers.push(data)); - stream.on('error', reject); - stream.on('end', () => resolve(Buffer.concat(buffers))); - }); - - return buf; + return bucket.openDownloadStream(new Types.ObjectId(fileId)); } diff --git a/packages/service/core/chat/utils.ts b/packages/service/core/chat/utils.ts index 3a5056dc7..32ca4a5bf 100644 --- a/packages/service/core/chat/utils.ts +++ b/packages/service/core/chat/utils.ts @@ -3,6 +3,7 @@ import { ChatRoleEnum, IMG_BLOCK_KEY } from '@fastgpt/global/core/chat/constants import { countMessagesTokens, countPromptTokens } from '@fastgpt/global/common/string/tiktoken'; import { adaptRole_Chat2Message } from '@fastgpt/global/core/chat/adapt'; import type { ChatCompletionContentPart } from '@fastgpt/global/core/ai/type.d'; +import axios from 'axios'; /* slice chat context by tokens */ export function ChatContextFilter({ @@ -81,11 +82,13 @@ export function ChatContextFilter({ } ] */ -export function formatStr2ChatContent(str: string) { +export async function formatStr2ChatContent(str: string) { const content: ChatCompletionContentPart[] = []; let lastIndex = 0; const regex = new RegExp(`\`\`\`(${IMG_BLOCK_KEY})\\n([\\s\\S]*?)\`\`\``, 'g'); + const imgKey: 'image_url' = 'image_url'; + let match; while ((match = regex.exec(str)) !== null) { @@ -115,7 +118,7 @@ export function formatStr2ChatContent(str: string) { content.push( ...jsonLines.map((item) => ({ - type: 'image_url' as any, + type: imgKey, image_url: { url: item.src } @@ -148,5 +151,18 @@ export function formatStr2ChatContent(str: string) { if (content.length === 1 && content[0].type === 'text') { return content[0].text; } + + if (!content) return null; + // load img to base64 + for await (const item of content) { + if (item.type === imgKey && item[imgKey]?.url) { + const response = await axios.get(item[imgKey].url, { + responseType: 'arraybuffer' + }); + const base64 = Buffer.from(response.data).toString('base64'); + item[imgKey].url = `data:${response.headers['content-type']};base64,${base64}`; + } + } + return content ? content : null; } diff --git a/packages/service/core/dataset/collection/controller.ts b/packages/service/core/dataset/collection/controller.ts new file mode 100644 index 000000000..3ea431e7d --- /dev/null +++ b/packages/service/core/dataset/collection/controller.ts @@ -0,0 +1,73 @@ +import { + DatasetCollectionTrainingModeEnum, + DatasetCollectionTypeEnum +} from '@fastgpt/global/core/dataset/constant'; +import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d'; +import { MongoDatasetCollection } from './schema'; + +export async function createOneCollection({ + name, + parentId, + datasetId, + type, + trainingType = DatasetCollectionTrainingModeEnum.manual, + chunkSize = 0, + fileId, + rawLink, + teamId, + tmbId, + metadata = {} +}: CreateDatasetCollectionParams & { teamId: string; tmbId: string }) { + const { _id } = await MongoDatasetCollection.create({ + name, + teamId, + tmbId, + datasetId, + parentId: parentId || null, + type, + trainingType, + chunkSize, + fileId, + rawLink, + metadata + }); + + // create default collection + if (type === DatasetCollectionTypeEnum.folder) { + await createDefaultCollection({ + datasetId, + parentId: _id, + teamId, + tmbId + }); + } + + return _id; +} + +// create default collection +export function createDefaultCollection({ + name = '手动录入', + datasetId, + parentId, + teamId, + tmbId +}: { + name?: '手动录入' | '手动标注'; + datasetId: string; + parentId?: string; + teamId: string; + tmbId: string; +}) { + return MongoDatasetCollection.create({ + name, + teamId, + tmbId, + datasetId, + parentId, + type: DatasetCollectionTypeEnum.virtual, + trainingType: DatasetCollectionTrainingModeEnum.manual, + chunkSize: 0, + updateTime: new Date('2099') + }); +} diff --git a/packages/service/core/dataset/collection/schema.ts b/packages/service/core/dataset/collection/schema.ts index 0eb48aa16..3f19e0ab5 100644 --- a/packages/service/core/dataset/collection/schema.ts +++ b/packages/service/core/dataset/collection/schema.ts @@ -39,15 +39,16 @@ const DatasetCollectionSchema = new Schema({ ref: DatasetCollectionName, required: true }, - name: { - type: String, - required: true - }, type: { type: String, enum: Object.keys(DatasetCollectionTypeMap), required: true }, + + name: { + type: String, + required: true + }, createTime: { type: Date, default: () => new Date() diff --git a/packages/service/core/dataset/data/controller.ts b/packages/service/core/dataset/data/controller.ts new file mode 100644 index 000000000..b3517ba6f --- /dev/null +++ b/packages/service/core/dataset/data/controller.ts @@ -0,0 +1,75 @@ +import { MongoDatasetData } from './schema'; +import { deletePgDataById } from './pg'; +import { MongoDatasetTraining } from '../training/schema'; +import { delFileById } from '../../../common/file/gridfs/controller'; +import { BucketNameEnum } from '@fastgpt/global/common/file/constants'; +import { MongoDatasetCollection } from '../collection/schema'; +import { delDatasetFiles } from '../file/controller'; +import { delay } from '@fastgpt/global/common/system/utils'; + +/* delete all data by datasetIds */ +export async function delDatasetRelevantData({ datasetIds }: { datasetIds: string[] }) { + datasetIds = datasetIds.map((item) => String(item)); + + // delete training data(There could be a training mission) + await MongoDatasetTraining.deleteMany({ + datasetId: { $in: datasetIds } + }); + + // delete related files + await Promise.all(datasetIds.map((id) => delDatasetFiles({ datasetId: id }))); + + await delay(1000); + + // delete pg data + await deletePgDataById(`dataset_id IN ('${datasetIds.join("','")}')`); + // delete dataset.datas + await MongoDatasetData.deleteMany({ datasetId: { $in: datasetIds } }); + + // delete collections + await MongoDatasetCollection.deleteMany({ + datasetId: { $in: datasetIds } + }); +} +/** + * delete all data by collectionIds + */ +export async function delCollectionRelevantData({ + collectionIds, + fileIds +}: { + collectionIds: string[]; + fileIds: string[]; +}) { + collectionIds = collectionIds.map((item) => String(item)); + const filterFileIds = fileIds.filter(Boolean); + + // delete training data + await MongoDatasetTraining.deleteMany({ + collectionId: { $in: collectionIds } + }); + + // delete file + await Promise.all( + filterFileIds.map((fileId) => { + return delFileById({ + bucketName: BucketNameEnum.dataset, + fileId + }); + }) + ); + + await delay(1000); + + // delete pg data + await deletePgDataById(`collection_id IN ('${collectionIds.join("','")}')`); + // delete dataset.datas + await MongoDatasetData.deleteMany({ collectionId: { $in: collectionIds } }); +} +/** + * delete one data by mongoDataId + */ +export async function delDatasetDataByDataId(mongoDataId: string) { + await deletePgDataById(['data_id', mongoDataId]); + await MongoDatasetData.findByIdAndDelete(mongoDataId); +} diff --git a/packages/service/core/dataset/data/pg.ts b/packages/service/core/dataset/data/pg.ts new file mode 100644 index 000000000..2379be3f9 --- /dev/null +++ b/packages/service/core/dataset/data/pg.ts @@ -0,0 +1,28 @@ +import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant'; +import { delay } from '@fastgpt/global/common/system/utils'; +import { PgClient } from '../../../common/pg'; + +export async function deletePgDataById( + where: ['id' | 'dataset_id' | 'collection_id' | 'data_id', string] | string +) { + let retry = 2; + async function deleteData(): Promise { + try { + await PgClient.delete(PgDatasetTableName, { + where: [where] + }); + } catch (error) { + if (--retry < 0) { + return Promise.reject(error); + } + await delay(500); + return deleteData(); + } + } + + await deleteData(); + + return { + tokenLen: 0 + }; +} diff --git a/packages/service/core/dataset/data/schema.ts b/packages/service/core/dataset/data/schema.ts index e11d1634c..d05c0b3d4 100644 --- a/packages/service/core/dataset/data/schema.ts +++ b/packages/service/core/dataset/data/schema.ts @@ -79,6 +79,9 @@ const DatasetDataSchema = new Schema({ chunkIndex: { type: Number, default: 0 + }, + inited: { + type: Boolean } }); @@ -88,7 +91,7 @@ try { DatasetDataSchema.index({ collectionId: 1 }); // full text index DatasetDataSchema.index({ datasetId: 1, fullTextToken: 'text' }); - DatasetDataSchema.index({ fullTextToken: 1 }); + DatasetDataSchema.index({ inited: 1 }); } catch (error) { console.log(error); } diff --git a/packages/service/core/dataset/schema.ts b/packages/service/core/dataset/schema.ts index 5e024c800..039ead527 100644 --- a/packages/service/core/dataset/schema.ts +++ b/packages/service/core/dataset/schema.ts @@ -1,7 +1,11 @@ import { connectionMongo, type Model } from '../../common/mongo'; const { Schema, model, models } = connectionMongo; import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type.d'; -import { DatasetTypeMap } from '@fastgpt/global/core/dataset/constant'; +import { + DatasetStatusEnum, + DatasetStatusMap, + DatasetTypeMap +} from '@fastgpt/global/core/dataset/constant'; import { TeamCollectionName, TeamMemberCollectionName @@ -31,9 +35,16 @@ const DatasetSchema = new Schema({ ref: TeamMemberCollectionName, required: true }, - updateTime: { - type: Date, - default: () => new Date() + type: { + type: String, + enum: Object.keys(DatasetTypeMap), + required: true, + default: 'dataset' + }, + status: { + type: String, + enum: Object.keys(DatasetStatusMap), + default: DatasetStatusEnum.active }, avatar: { type: String, @@ -43,6 +54,10 @@ const DatasetSchema = new Schema({ type: String, required: true }, + updateTime: { + type: Date, + default: () => new Date() + }, vectorModel: { type: String, required: true, @@ -53,24 +68,26 @@ const DatasetSchema = new Schema({ required: true, default: 'gpt-3.5-turbo-16k' }, - type: { + intro: { type: String, - enum: Object.keys(DatasetTypeMap), - required: true, - default: 'dataset' - }, - tags: { - type: [String], - default: [], - set(val: string | string[]) { - if (Array.isArray(val)) return val; - return val.split(' ').filter((item) => item); - } + default: '' }, permission: { type: String, enum: Object.keys(PermissionTypeMap), default: PermissionTypeEnum.private + }, + websiteConfig: { + type: { + url: { + type: String, + required: true + }, + selector: { + type: String, + default: 'body' + } + } } }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ca89bb5c5..fd74d4edd 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1,4 +1,4 @@ -lockfileVersion: '6.0' +lockfileVersion: '6.1' settings: autoInstallPeers: true @@ -45,6 +45,9 @@ importers: axios: specifier: ^1.5.1 version: registry.npmmirror.com/axios@1.5.1 + cheerio: + specifier: 1.0.0-rc.12 + version: registry.npmmirror.com/cheerio@1.0.0-rc.12 dayjs: specifier: ^1.11.7 version: registry.npmmirror.com/dayjs@1.11.10 @@ -54,6 +57,9 @@ importers: js-tiktoken: specifier: ^1.0.7 version: registry.npmmirror.com/js-tiktoken@1.0.7 + node-html-markdown: + specifier: ^1.3.0 + version: registry.npmmirror.com/node-html-markdown@1.3.0 openai: specifier: ^4.16.1 version: registry.npmmirror.com/openai@4.16.1(encoding@0.1.13) @@ -64,6 +70,9 @@ importers: '@types/node': specifier: ^20.8.5 version: registry.npmmirror.com/@types/node@20.8.7 + '@types/turndown': + specifier: ^5.0.4 + version: registry.npmmirror.com/@types/turndown@5.0.4 packages/service: dependencies: @@ -161,9 +170,6 @@ importers: '@fastgpt/web': specifier: workspace:* version: link:../../packages/web - '@mozilla/readability': - specifier: ^0.4.4 - version: registry.npmmirror.com/@mozilla/readability@0.4.4 '@node-rs/jieba': specifier: ^1.7.2 version: registry.npmmirror.com/@node-rs/jieba@1.7.2 @@ -209,9 +215,6 @@ importers: jschardet: specifier: ^3.0.0 version: registry.npmmirror.com/jschardet@3.0.0 - jsdom: - specifier: ^22.1.0 - version: registry.npmmirror.com/jsdom@22.1.0 jsonwebtoken: specifier: ^9.0.2 version: registry.npmmirror.com/jsonwebtoken@9.0.2 @@ -300,9 +303,6 @@ importers: '@types/js-cookie': specifier: ^3.0.3 version: registry.npmmirror.com/@types/js-cookie@3.0.5 - '@types/jsdom': - specifier: ^21.1.1 - version: registry.npmmirror.com/@types/jsdom@21.1.4 '@types/jsonwebtoken': specifier: ^9.0.3 version: registry.npmmirror.com/@types/jsonwebtoken@9.0.4 @@ -3575,13 +3575,6 @@ packages: dev: false optional: true - registry.npmmirror.com/@mozilla/readability@0.4.4: - resolution: {integrity: sha512-MCgZyANpJ6msfvVMi6+A0UAsvZj//4OHREYUB9f2087uXHVoU+H+SWhuihvb1beKpM323bReQPRio0WNk2+V6g==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@mozilla/readability/-/readability-0.4.4.tgz} - name: '@mozilla/readability' - version: 0.4.4 - engines: {node: '>=14.0.0'} - dev: false - registry.npmmirror.com/@next/env@13.5.2: resolution: {integrity: sha512-dUseBIQVax+XtdJPzhwww4GetTjlkRSsXeQnisIJWBaHsnxYcN2RGzsPHi58D6qnkATjnhuAtQTJmR1hKYQQPg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@next/env/-/env-13.5.2.tgz} name: '@next/env' @@ -4248,13 +4241,6 @@ packages: use-sync-external-store: registry.npmmirror.com/use-sync-external-store@1.2.0(react@18.2.0) dev: false - registry.npmmirror.com/@tootallnate/once@2.0.0: - resolution: {integrity: sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@tootallnate/once/-/once-2.0.0.tgz} - name: '@tootallnate/once' - version: 2.0.0 - engines: {node: '>= 10'} - dev: false - registry.npmmirror.com/@trysound/sax@0.2.0: resolution: {integrity: sha512-L7z9BgrNEcYyUYtF+HaEfiS5ebkh9jXqbszz7pC0hRBPaatV0XjSD3+eHrpqFemQfgwiFF0QPIarnIihIDn7OA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@trysound/sax/-/sax-0.2.0.tgz} name: '@trysound/sax' @@ -4618,16 +4604,6 @@ packages: version: 3.0.5 dev: true - registry.npmmirror.com/@types/jsdom@21.1.4: - resolution: {integrity: sha512-NzAMLEV0KQ4cBaDx3Ls8VfJUElyDUm1xrtYRmcMK0gF8L5xYbujFVaQlJ50yinQ/d47j2rEP1XUzkiYrw4YRFA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@types/jsdom/-/jsdom-21.1.4.tgz} - name: '@types/jsdom' - version: 21.1.4 - dependencies: - '@types/node': registry.npmmirror.com/@types/node@20.8.7 - '@types/tough-cookie': registry.npmmirror.com/@types/tough-cookie@4.0.4 - parse5: registry.npmmirror.com/parse5@7.1.2 - dev: true - registry.npmmirror.com/@types/json5@0.0.29: resolution: {integrity: sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@types/json5/-/json5-0.0.29.tgz} name: '@types/json5' @@ -4826,12 +4802,6 @@ packages: '@types/node': registry.npmmirror.com/@types/node@20.8.7 dev: true - registry.npmmirror.com/@types/tough-cookie@4.0.4: - resolution: {integrity: sha512-95Sfz4nvMAb0Nl9DTxN3j64adfwfbBPEYq14VN7zT5J5O2M9V6iZMIIQU1U+pJyl9agHYHNCqhCXgyEtIRRa5A==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@types/tough-cookie/-/tough-cookie-4.0.4.tgz} - name: '@types/tough-cookie' - version: 4.0.4 - dev: true - registry.npmmirror.com/@types/triple-beam@1.3.4: resolution: {integrity: sha512-HlJjF3wxV4R2VQkFpKe0YqJLilYNgtRtsqqZtby7RkVsSs+i+vbyzjtUwpFEdUCKcrGzCiEJE7F/0mKjh0sunA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@types/triple-beam/-/triple-beam-1.3.4.tgz} name: '@types/triple-beam' @@ -4846,6 +4816,12 @@ packages: '@types/node': registry.npmmirror.com/@types/node@20.8.7 dev: true + registry.npmmirror.com/@types/turndown@5.0.4: + resolution: {integrity: sha512-28GI33lCCkU4SGH1GvjDhFgOVr+Tym4PXGBIU1buJUa6xQolniPArtUT+kv42RR2N9MsMLInkr904Aq+ESHBJg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@types/turndown/-/turndown-5.0.4.tgz} + name: '@types/turndown' + version: 5.0.4 + dev: true + registry.npmmirror.com/@types/unist@2.0.9: resolution: {integrity: sha512-zC0iXxAv1C1ERURduJueYzkzZ2zaGyc+P2c95hgkikHPr3z8EdUZOlgEQ5X0DRmwDZn+hekycQnoeiiRVrmilQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/@types/unist/-/unist-2.0.9.tgz} name: '@types/unist' @@ -4967,12 +4943,6 @@ packages: '@zag-js/dom-query': registry.npmmirror.com/@zag-js/dom-query@0.16.0 dev: false - registry.npmmirror.com/abab@2.0.6: - resolution: {integrity: sha512-j2afSsaIENvHZN2B8GOpF566vZ5WVk5opAiMTvWgaQT8DkbOqsTfvNAvHoRGU2zzP8cPoqys+xHTRDWW8L+/BA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/abab/-/abab-2.0.6.tgz} - name: abab - version: 2.0.6 - dev: false - registry.npmmirror.com/abort-controller@3.0.0: resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/abort-controller/-/abort-controller-3.0.0.tgz} name: abort-controller @@ -5001,17 +4971,6 @@ packages: hasBin: true dev: true - registry.npmmirror.com/agent-base@6.0.2: - resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/agent-base/-/agent-base-6.0.2.tgz} - name: agent-base - version: 6.0.2 - engines: {node: '>= 6.0.0'} - dependencies: - debug: registry.npmmirror.com/debug@4.3.4 - transitivePeerDependencies: - - supports-color - dev: false - registry.npmmirror.com/agentkeepalive@4.5.0: resolution: {integrity: sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/agentkeepalive/-/agentkeepalive-4.5.0.tgz} name: agentkeepalive @@ -5435,7 +5394,6 @@ packages: resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/boolbase/-/boolbase-1.0.0.tgz} name: boolbase version: 1.0.0 - dev: true registry.npmmirror.com/brace-expansion@1.1.11: resolution: {integrity: sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/brace-expansion/-/brace-expansion-1.1.11.tgz} @@ -5720,6 +5678,34 @@ packages: get-func-name: registry.npmmirror.com/get-func-name@2.0.2 dev: true + registry.npmmirror.com/cheerio-select@2.1.0: + resolution: {integrity: sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/cheerio-select/-/cheerio-select-2.1.0.tgz} + name: cheerio-select + version: 2.1.0 + dependencies: + boolbase: registry.npmmirror.com/boolbase@1.0.0 + css-select: registry.npmmirror.com/css-select@5.1.0 + css-what: registry.npmmirror.com/css-what@6.1.0 + domelementtype: registry.npmmirror.com/domelementtype@2.3.0 + domhandler: registry.npmmirror.com/domhandler@5.0.3 + domutils: registry.npmmirror.com/domutils@3.1.0 + dev: false + + registry.npmmirror.com/cheerio@1.0.0-rc.12: + resolution: {integrity: sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/cheerio/-/cheerio-1.0.0-rc.12.tgz} + name: cheerio + version: 1.0.0-rc.12 + engines: {node: '>= 6'} + dependencies: + cheerio-select: registry.npmmirror.com/cheerio-select@2.1.0 + dom-serializer: registry.npmmirror.com/dom-serializer@2.0.0 + domhandler: registry.npmmirror.com/domhandler@5.0.3 + domutils: registry.npmmirror.com/domutils@3.1.0 + htmlparser2: registry.npmmirror.com/htmlparser2@8.0.2 + parse5: registry.npmmirror.com/parse5@7.1.2 + parse5-htmlparser2-tree-adapter: registry.npmmirror.com/parse5-htmlparser2-tree-adapter@7.0.0 + dev: false + registry.npmmirror.com/chokidar@3.5.3: resolution: {integrity: sha512-Dr3sfKRP6oTcjf2JmUmFJfeVMvXBdegxB0iVQ5eb2V10uFJUCAS8OByZdVAyVb8xXNz3GjjTgj9kLWsZTqE6kw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/chokidar/-/chokidar-3.5.3.tgz} name: chokidar @@ -6108,6 +6094,18 @@ packages: nth-check: registry.npmmirror.com/nth-check@2.1.1 dev: true + registry.npmmirror.com/css-select@5.1.0: + resolution: {integrity: sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/css-select/-/css-select-5.1.0.tgz} + name: css-select + version: 5.1.0 + dependencies: + boolbase: registry.npmmirror.com/boolbase@1.0.0 + css-what: registry.npmmirror.com/css-what@6.1.0 + domhandler: registry.npmmirror.com/domhandler@5.0.3 + domutils: registry.npmmirror.com/domutils@3.1.0 + nth-check: registry.npmmirror.com/nth-check@2.1.1 + dev: false + registry.npmmirror.com/css-tree@1.1.3: resolution: {integrity: sha512-tRpdppF7TRazZrjJ6v3stzv93qxRcSsFmW6cX0Zm2NVKpxE1WV1HblnghVv9TreireHkqI/VDEsfolRF1p6y7Q==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/css-tree/-/css-tree-1.1.3.tgz} name: css-tree @@ -6123,7 +6121,6 @@ packages: name: css-what version: 6.1.0 engines: {node: '>= 6'} - dev: true registry.npmmirror.com/csso@4.2.0: resolution: {integrity: sha512-wvlcdIbf6pwKEk7vHj8/Bkc0B4ylXZruLvOgs9doS5eOsOpuodOV2zJChSpkp+pRpYQLQMeF04nr3Z68Sta9jA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/csso/-/csso-4.2.0.tgz} @@ -6134,15 +6131,6 @@ packages: css-tree: registry.npmmirror.com/css-tree@1.1.3 dev: true - registry.npmmirror.com/cssstyle@3.0.0: - resolution: {integrity: sha512-N4u2ABATi3Qplzf0hWbVCdjenim8F3ojEXpBDF5hBpjzW182MjNGLqfmQ0SkSPeQ+V86ZXgeH8aXj6kayd4jgg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/cssstyle/-/cssstyle-3.0.0.tgz} - name: cssstyle - version: 3.0.0 - engines: {node: '>=14'} - dependencies: - rrweb-cssom: registry.npmmirror.com/rrweb-cssom@0.6.0 - dev: false - registry.npmmirror.com/csstype@3.1.2: resolution: {integrity: sha512-I7K1Uu0MBPzaFKg4nI5Q7Vs2t+3gWWW648spaF+Rg7pI9ds18Ugn+lvg4SHczUdKlHI5LWBXyqfS8+DufyBsgQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/csstype/-/csstype-3.1.2.tgz} name: csstype @@ -6539,17 +6527,6 @@ packages: version: 1.0.8 dev: true - registry.npmmirror.com/data-urls@4.0.0: - resolution: {integrity: sha512-/mMTei/JXPqvFqQtfyTowxmJVwr2PVAeCcDxyFf6LhoOu/09TX2OX3kb2wzi4DMXcfj4OItwDOnhl5oziPnT6g==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/data-urls/-/data-urls-4.0.0.tgz} - name: data-urls - version: 4.0.0 - engines: {node: '>=14'} - dependencies: - abab: registry.npmmirror.com/abab@2.0.6 - whatwg-mimetype: registry.npmmirror.com/whatwg-mimetype@3.0.0 - whatwg-url: registry.npmmirror.com/whatwg-url@12.0.1 - dev: false - registry.npmmirror.com/date-fns@2.30.0: resolution: {integrity: sha512-fnULvOpxnC5/Vg3NCiWelDsLiUc9bRwAPs/+LfTLNvetFCtCTN+yQz15C/fs4AwX1R9K5GLtLfn8QW+dWisaAw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/date-fns/-/date-fns-2.30.0.tgz} name: date-fns @@ -6591,12 +6568,6 @@ packages: dependencies: ms: registry.npmmirror.com/ms@2.1.2 - registry.npmmirror.com/decimal.js@10.4.3: - resolution: {integrity: sha512-VBBaLc1MgL5XpzgIP7ny5Z6Nx3UrRkIViUkPUdtl9aya5amy3De1gsUUSB1g3+3sExYNjCAsAznmukyxCb1GRA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/decimal.js/-/decimal.js-10.4.3.tgz} - name: decimal.js - version: 10.4.3 - dev: false - registry.npmmirror.com/decode-named-character-reference@1.0.2: resolution: {integrity: sha512-O8x12RzrUF8xyVcY0KJowWsmaJxQbmy0/EtnNtHRpsOcT7dFk5W598coHqBVpmWo1oQQfsCqfCmkZN5DJrZVdg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/decode-named-character-reference/-/decode-named-character-reference-1.0.2.tgz} name: decode-named-character-reference @@ -6769,6 +6740,16 @@ packages: entities: registry.npmmirror.com/entities@2.2.0 dev: true + registry.npmmirror.com/dom-serializer@2.0.0: + resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/dom-serializer/-/dom-serializer-2.0.0.tgz} + name: dom-serializer + version: 2.0.0 + dependencies: + domelementtype: registry.npmmirror.com/domelementtype@2.3.0 + domhandler: registry.npmmirror.com/domhandler@5.0.3 + entities: registry.npmmirror.com/entities@4.5.0 + dev: false + registry.npmmirror.com/domain-browser@4.23.0: resolution: {integrity: sha512-ArzcM/II1wCCujdCNyQjXrAFwS4mrLh4C7DZWlaI8mdh7h3BfKdNd3bKXITfl2PT9FtfQqaGvhi1vPRQPimjGA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/domain-browser/-/domain-browser-4.23.0.tgz} name: domain-browser @@ -6780,16 +6761,6 @@ packages: resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/domelementtype/-/domelementtype-2.3.0.tgz} name: domelementtype version: 2.3.0 - dev: true - - registry.npmmirror.com/domexception@4.0.0: - resolution: {integrity: sha512-A2is4PLG+eeSfoTMA95/s4pvAoSo2mKtiM5jlHkAVewmiO8ISFTFKZjH7UAM1Atli/OT/7JHOrJRJiMKUZKYBw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/domexception/-/domexception-4.0.0.tgz} - name: domexception - version: 4.0.0 - engines: {node: '>=12'} - dependencies: - webidl-conversions: registry.npmmirror.com/webidl-conversions@7.0.0 - dev: false registry.npmmirror.com/domhandler@4.3.1: resolution: {integrity: sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/domhandler/-/domhandler-4.3.1.tgz} @@ -6800,6 +6771,15 @@ packages: domelementtype: registry.npmmirror.com/domelementtype@2.3.0 dev: true + registry.npmmirror.com/domhandler@5.0.3: + resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/domhandler/-/domhandler-5.0.3.tgz} + name: domhandler + version: 5.0.3 + engines: {node: '>= 4'} + dependencies: + domelementtype: registry.npmmirror.com/domelementtype@2.3.0 + dev: false + registry.npmmirror.com/dompurify@3.0.6: resolution: {integrity: sha512-ilkD8YEnnGh1zJ240uJsW7AzE+2qpbOUYjacomn3AvJ6J4JhKGSZ2nh4wUIXPZrEPppaCLx5jFe8T89Rk8tQ7w==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/dompurify/-/dompurify-3.0.6.tgz} name: dompurify @@ -6816,6 +6796,16 @@ packages: domhandler: registry.npmmirror.com/domhandler@4.3.1 dev: true + registry.npmmirror.com/domutils@3.1.0: + resolution: {integrity: sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/domutils/-/domutils-3.1.0.tgz} + name: domutils + version: 3.1.0 + dependencies: + dom-serializer: registry.npmmirror.com/dom-serializer@2.0.0 + domelementtype: registry.npmmirror.com/domelementtype@2.3.0 + domhandler: registry.npmmirror.com/domhandler@5.0.3 + dev: false + registry.npmmirror.com/downloadjs@1.4.7: resolution: {integrity: sha512-LN1gO7+u9xjU5oEScGFKvXhYf7Y/empUIIEAGBs1LzUq/rg5duiDrkuH5A2lQGd5jfMOb9X9usDa2oVXwJ0U/Q==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/downloadjs/-/downloadjs-1.4.7.tgz} name: downloadjs @@ -8285,6 +8275,13 @@ packages: space-separated-tokens: registry.npmmirror.com/space-separated-tokens@2.0.2 dev: false + registry.npmmirror.com/he@1.2.0: + resolution: {integrity: sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/he/-/he-1.2.0.tgz} + name: he + version: 1.2.0 + hasBin: true + dev: false + registry.npmmirror.com/heap@0.2.7: resolution: {integrity: sha512-2bsegYkkHO+h/9MGbn6KWcE45cHZgPANo5LXF7EvWdT0yT2EguSVO1nDgU5c8+ZOPwp2vMNa7YFsJhVcDR9Sdg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/heap/-/heap-0.2.7.tgz} name: heap @@ -8321,15 +8318,6 @@ packages: dependencies: react-is: registry.npmmirror.com/react-is@16.13.1 - registry.npmmirror.com/html-encoding-sniffer@3.0.0: - resolution: {integrity: sha512-oWv4T4yJ52iKrufjnyZPkrN0CH3QnrUqdB6In1g5Fe1mia8GmF36gnfNySxoZtxD5+NmYw1EElVXiBk93UeskA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/html-encoding-sniffer/-/html-encoding-sniffer-3.0.0.tgz} - name: html-encoding-sniffer - version: 3.0.0 - engines: {node: '>=12'} - dependencies: - whatwg-encoding: registry.npmmirror.com/whatwg-encoding@2.0.0 - dev: false - registry.npmmirror.com/html-parse-stringify@3.0.1: resolution: {integrity: sha512-KknJ50kTInJ7qIScF3jeaFRpMpE8/lfiTdzf/twXyPBLAGrLRTmkz3AdTnKeh40X8k9L2fdYwEp/42WGXIRGcg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/html-parse-stringify/-/html-parse-stringify-3.0.1.tgz} name: html-parse-stringify @@ -8337,17 +8325,15 @@ packages: dependencies: void-elements: registry.npmmirror.com/void-elements@3.1.0 - registry.npmmirror.com/http-proxy-agent@5.0.0: - resolution: {integrity: sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/http-proxy-agent/-/http-proxy-agent-5.0.0.tgz} - name: http-proxy-agent - version: 5.0.0 - engines: {node: '>= 6'} + registry.npmmirror.com/htmlparser2@8.0.2: + resolution: {integrity: sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/htmlparser2/-/htmlparser2-8.0.2.tgz} + name: htmlparser2 + version: 8.0.2 dependencies: - '@tootallnate/once': registry.npmmirror.com/@tootallnate/once@2.0.0 - agent-base: registry.npmmirror.com/agent-base@6.0.2 - debug: registry.npmmirror.com/debug@4.3.4 - transitivePeerDependencies: - - supports-color + domelementtype: registry.npmmirror.com/domelementtype@2.3.0 + domhandler: registry.npmmirror.com/domhandler@5.0.3 + domutils: registry.npmmirror.com/domutils@3.1.0 + entities: registry.npmmirror.com/entities@4.5.0 dev: false registry.npmmirror.com/https-browserify@1.0.0: @@ -8356,18 +8342,6 @@ packages: version: 1.0.0 dev: true - registry.npmmirror.com/https-proxy-agent@5.0.1: - resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz} - name: https-proxy-agent - version: 5.0.1 - engines: {node: '>= 6'} - dependencies: - agent-base: registry.npmmirror.com/agent-base@6.0.2 - debug: registry.npmmirror.com/debug@4.3.4 - transitivePeerDependencies: - - supports-color - dev: false - registry.npmmirror.com/human-signals@4.3.1: resolution: {integrity: sha512-nZXjEF2nbo7lIw3mgYjItAfgQXog3OjJogSbKa2CQIIvSGWcKgeJnQlNXip6NglNzYH45nSRiEVimMvYL8DDqQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/human-signals/-/human-signals-4.3.1.tgz} name: human-signals @@ -8743,12 +8717,6 @@ packages: engines: {node: '>=12'} dev: false - registry.npmmirror.com/is-potential-custom-element-name@1.0.1: - resolution: {integrity: sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz} - name: is-potential-custom-element-name - version: 1.0.1 - dev: false - registry.npmmirror.com/is-regex@1.1.4: resolution: {integrity: sha512-kvRdxDsxZjhzUX07ZnLydzS1TU/TJlTUHHY4YLL87e37oUA49DfkLqgy+VjFocowy29cKvcSiu+kIv728jTTVg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/is-regex/-/is-regex-1.1.4.tgz} name: is-regex @@ -8921,46 +8889,6 @@ packages: engines: {node: '>=0.1.90'} dev: false - registry.npmmirror.com/jsdom@22.1.0: - resolution: {integrity: sha512-/9AVW7xNbsBv6GfWho4TTNjEo9fe6Zhf9O7s0Fhhr3u+awPwAJMKwAMXnkk5vBxflqLW9hTHX/0cs+P3gW+cQw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/jsdom/-/jsdom-22.1.0.tgz} - name: jsdom - version: 22.1.0 - engines: {node: '>=16'} - peerDependencies: - canvas: ^2.5.0 - peerDependenciesMeta: - canvas: - optional: true - dependencies: - abab: registry.npmmirror.com/abab@2.0.6 - cssstyle: registry.npmmirror.com/cssstyle@3.0.0 - data-urls: registry.npmmirror.com/data-urls@4.0.0 - decimal.js: registry.npmmirror.com/decimal.js@10.4.3 - domexception: registry.npmmirror.com/domexception@4.0.0 - form-data: registry.npmmirror.com/form-data@4.0.0 - html-encoding-sniffer: registry.npmmirror.com/html-encoding-sniffer@3.0.0 - http-proxy-agent: registry.npmmirror.com/http-proxy-agent@5.0.0 - https-proxy-agent: registry.npmmirror.com/https-proxy-agent@5.0.1 - is-potential-custom-element-name: registry.npmmirror.com/is-potential-custom-element-name@1.0.1 - nwsapi: registry.npmmirror.com/nwsapi@2.2.7 - parse5: registry.npmmirror.com/parse5@7.1.2 - rrweb-cssom: registry.npmmirror.com/rrweb-cssom@0.6.0 - saxes: registry.npmmirror.com/saxes@6.0.0 - symbol-tree: registry.npmmirror.com/symbol-tree@3.2.4 - tough-cookie: registry.npmmirror.com/tough-cookie@4.1.3 - w3c-xmlserializer: registry.npmmirror.com/w3c-xmlserializer@4.0.0 - webidl-conversions: registry.npmmirror.com/webidl-conversions@7.0.0 - whatwg-encoding: registry.npmmirror.com/whatwg-encoding@2.0.0 - whatwg-mimetype: registry.npmmirror.com/whatwg-mimetype@3.0.0 - whatwg-url: registry.npmmirror.com/whatwg-url@12.0.1 - ws: registry.npmmirror.com/ws@8.14.2 - xml-name-validator: registry.npmmirror.com/xml-name-validator@4.0.0 - transitivePeerDependencies: - - bufferutil - - supports-color - - utf-8-validate - dev: false - registry.npmmirror.com/jsesc@0.5.0: resolution: {integrity: sha512-uZz5UnB7u4T9LvwmFqXii7pZSouaRPorGs5who1Ip7VO0wxanFvBL7GkM6dTHlgX+jhBApRetaWpnDabOeTcnA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/jsesc/-/jsesc-0.5.0.tgz} name: jsesc @@ -10343,6 +10271,24 @@ packages: whatwg-url: registry.npmmirror.com/whatwg-url@5.0.0 dev: false + registry.npmmirror.com/node-html-markdown@1.3.0: + resolution: {integrity: sha512-OeFi3QwC/cPjvVKZ114tzzu+YoR+v9UXW5RwSXGUqGb0qCl0DvP406tzdL7SFn8pZrMyzXoisfG2zcuF9+zw4g==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/node-html-markdown/-/node-html-markdown-1.3.0.tgz} + name: node-html-markdown + version: 1.3.0 + engines: {node: '>=10.0.0'} + dependencies: + node-html-parser: registry.npmmirror.com/node-html-parser@6.1.11 + dev: false + + registry.npmmirror.com/node-html-parser@6.1.11: + resolution: {integrity: sha512-FAgwwZ6h0DSDWxfD0Iq1tsDcBCxdJB1nXpLPPxX8YyVWzbfCjKWEzaynF4gZZ/8hziUmp7ZSaKylcn0iKhufUQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/node-html-parser/-/node-html-parser-6.1.11.tgz} + name: node-html-parser + version: 6.1.11 + dependencies: + css-select: registry.npmmirror.com/css-select@5.1.0 + he: registry.npmmirror.com/he@1.2.0 + dev: false + registry.npmmirror.com/node-releases@2.0.13: resolution: {integrity: sha512-uYr7J37ae/ORWdZeQ1xxMJe3NtdmqMC/JZK+geofDrkLUApKRHPd18/TxtBOJ4A0/+uUIliorNrfYV6s1b02eQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/node-releases/-/node-releases-2.0.13.tgz} name: node-releases @@ -10416,13 +10362,6 @@ packages: version: 2.1.1 dependencies: boolbase: registry.npmmirror.com/boolbase@1.0.0 - dev: true - - registry.npmmirror.com/nwsapi@2.2.7: - resolution: {integrity: sha512-ub5E4+FBPKwAZx0UwIQOjYWGHTEq5sPqHQNRN8Z9e4A7u3Tj1weLJsL59yH9vmvqEtBHaOmT6cYQKIZOxp35FQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/nwsapi/-/nwsapi-2.2.7.tgz} - name: nwsapi - version: 2.2.7 - dev: false registry.npmmirror.com/object-assign@4.1.1: resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/object-assign/-/object-assign-4.1.1.tgz} @@ -10703,12 +10642,22 @@ packages: json-parse-even-better-errors: registry.npmmirror.com/json-parse-even-better-errors@2.3.1 lines-and-columns: registry.npmmirror.com/lines-and-columns@1.2.4 + registry.npmmirror.com/parse5-htmlparser2-tree-adapter@7.0.0: + resolution: {integrity: sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz} + name: parse5-htmlparser2-tree-adapter + version: 7.0.0 + dependencies: + domhandler: registry.npmmirror.com/domhandler@5.0.3 + parse5: registry.npmmirror.com/parse5@7.1.2 + dev: false + registry.npmmirror.com/parse5@7.1.2: resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/parse5/-/parse5-7.1.2.tgz} name: parse5 version: 7.1.2 dependencies: entities: registry.npmmirror.com/entities@4.5.0 + dev: false registry.npmmirror.com/path-browserify@1.0.1: resolution: {integrity: sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/path-browserify/-/path-browserify-1.0.1.tgz} @@ -11060,12 +11009,6 @@ packages: version: 1.1.0 dev: false - registry.npmmirror.com/psl@1.9.0: - resolution: {integrity: sha512-E/ZsdU4HLs/68gYzgGTkMicWTLPdAftJLfJFlLUAAKZGkStNU72sZjT66SnMDVOfOWY/YAoiD7Jxa9iHvngcag==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/psl/-/psl-1.9.0.tgz} - name: psl - version: 1.9.0 - dev: false - registry.npmmirror.com/public-encrypt@4.0.3: resolution: {integrity: sha512-zVpa8oKZSz5bTMTFClc1fQOnyyEzpl5ozpi1B5YcvBrdohMjH2rfsBtyXcuNuwjsDIXmBYlF2N5FlJYhR29t8Q==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/public-encrypt/-/public-encrypt-4.0.3.tgz} name: public-encrypt @@ -11106,12 +11049,6 @@ packages: engines: {node: '>=0.4.x'} dev: true - registry.npmmirror.com/querystringify@2.2.0: - resolution: {integrity: sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/querystringify/-/querystringify-2.2.0.tgz} - name: querystringify - version: 2.2.0 - dev: false - registry.npmmirror.com/queue-microtask@1.2.3: resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/queue-microtask/-/queue-microtask-1.2.3.tgz} name: queue-microtask @@ -11628,12 +11565,6 @@ packages: engines: {node: '>=4'} dev: false - registry.npmmirror.com/requires-port@1.0.0: - resolution: {integrity: sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/requires-port/-/requires-port-1.0.0.tgz} - name: requires-port - version: 1.0.0 - dev: false - registry.npmmirror.com/resolve-from@4.0.0: resolution: {integrity: sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/resolve-from/-/resolve-from-4.0.0.tgz} name: resolve-from @@ -11724,12 +11655,6 @@ packages: fsevents: registry.npmmirror.com/fsevents@2.3.3 dev: true - registry.npmmirror.com/rrweb-cssom@0.6.0: - resolution: {integrity: sha512-APM0Gt1KoXBz0iIkkdB/kfvGOwC4UuJFeG/c+yV7wSc7q96cG/kJ0HiYCnzivD9SB53cLV1MlHFNfOuPaadYSw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/rrweb-cssom/-/rrweb-cssom-0.6.0.tgz} - name: rrweb-cssom - version: 0.6.0 - dev: false - registry.npmmirror.com/run-parallel@1.2.0: resolution: {integrity: sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/run-parallel/-/run-parallel-1.2.0.tgz} name: run-parallel @@ -11820,15 +11745,6 @@ packages: immutable: registry.npmmirror.com/immutable@4.3.4 source-map-js: registry.npmmirror.com/source-map-js@1.0.2 - registry.npmmirror.com/saxes@6.0.0: - resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/saxes/-/saxes-6.0.0.tgz} - name: saxes - version: 6.0.0 - engines: {node: '>=v12.22.7'} - dependencies: - xmlchars: registry.npmmirror.com/xmlchars@2.2.0 - dev: false - registry.npmmirror.com/scheduler@0.23.0: resolution: {integrity: sha512-CtuThmgHNg7zIZWAXi3AsyIzA3n4xx7aNyjwC2VJldO2LMVDhFK+63xGqq6CsJH4rTAt6/M+N4GhZiDYPx9eUw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/scheduler/-/scheduler-0.23.0.tgz} name: scheduler @@ -12271,12 +12187,6 @@ packages: stable: registry.npmmirror.com/stable@0.1.8 dev: true - registry.npmmirror.com/symbol-tree@3.2.4: - resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/symbol-tree/-/symbol-tree-3.2.4.tgz} - name: symbol-tree - version: 3.2.4 - dev: false - registry.npmmirror.com/tapable@2.2.1: resolution: {integrity: sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/tapable/-/tapable-2.2.1.tgz} name: tapable @@ -12351,18 +12261,6 @@ packages: version: 1.0.6 dev: false - registry.npmmirror.com/tough-cookie@4.1.3: - resolution: {integrity: sha512-aX/y5pVRkfRnfmuX+OdbSdXvPe6ieKX/G2s7e98f4poJHnqH3281gDPm/metm6E/WRamfx7WC4HUqkWHfQHprw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/tough-cookie/-/tough-cookie-4.1.3.tgz} - name: tough-cookie - version: 4.1.3 - engines: {node: '>=6'} - dependencies: - psl: registry.npmmirror.com/psl@1.9.0 - punycode: registry.npmmirror.com/punycode@2.3.0 - universalify: registry.npmmirror.com/universalify@0.2.0 - url-parse: registry.npmmirror.com/url-parse@1.5.10 - dev: false - registry.npmmirror.com/tr46@0.0.3: resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/tr46/-/tr46-0.0.3.tgz} name: tr46 @@ -12378,15 +12276,6 @@ packages: punycode: registry.npmmirror.com/punycode@2.3.0 dev: false - registry.npmmirror.com/tr46@4.1.1: - resolution: {integrity: sha512-2lv/66T7e5yNyhAAC4NaKe5nVavzuGJQVVtRYLyQ2OI8tsJ61PMLlelehb0wi2Hx6+hT/OJUWZcw8MjlSRnxvw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/tr46/-/tr46-4.1.1.tgz} - name: tr46 - version: 4.1.1 - engines: {node: '>=14'} - dependencies: - punycode: registry.npmmirror.com/punycode@2.3.0 - dev: false - registry.npmmirror.com/trim-lines@3.0.1: resolution: {integrity: sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/trim-lines/-/trim-lines-3.0.1.tgz} name: trim-lines @@ -12786,13 +12675,6 @@ packages: unist-util-visit-parents: registry.npmmirror.com/unist-util-visit-parents@5.1.3 dev: false - registry.npmmirror.com/universalify@0.2.0: - resolution: {integrity: sha512-CJ1QgKmNg3CwvAv/kOFmtnEN05f0D/cn9QntgNOQlQF9dgvVTHj3t+8JPdjqawCHk7V/KA+fbUqzZ9XWhcqPUg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/universalify/-/universalify-0.2.0.tgz} - name: universalify - version: 0.2.0 - engines: {node: '>= 4.0.0'} - dev: false - registry.npmmirror.com/update-browserslist-db@1.0.13(browserslist@4.22.1): resolution: {integrity: sha512-xebP81SNcPuNpPP3uzeW1NYXxI3rxyJzF3pD6sH4jE7o/IX+WtSpwnVU+qIsDPyk0d3hmFQ7mjqc6AtV604hbg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/update-browserslist-db/-/update-browserslist-db-1.0.13.tgz} id: registry.npmmirror.com/update-browserslist-db/1.0.13 @@ -12814,15 +12696,6 @@ packages: punycode: registry.npmmirror.com/punycode@2.3.0 dev: true - registry.npmmirror.com/url-parse@1.5.10: - resolution: {integrity: sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/url-parse/-/url-parse-1.5.10.tgz} - name: url-parse - version: 1.5.10 - dependencies: - querystringify: registry.npmmirror.com/querystringify@2.2.0 - requires-port: registry.npmmirror.com/requires-port@1.0.0 - dev: false - registry.npmmirror.com/url@0.11.3: resolution: {integrity: sha512-6hxOLGfZASQK/cijlZnZJTq8OXAkt/3YGfQX45vvMYXpZoo8NdWZcY73K108Jf759lS1Bv/8wXnHDTSz17dSRw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/url/-/url-0.11.3.tgz} name: url @@ -13073,15 +12946,6 @@ packages: version: 3.1.0 engines: {node: '>=0.10.0'} - registry.npmmirror.com/w3c-xmlserializer@4.0.0: - resolution: {integrity: sha512-d+BFHzbiCx6zGfz0HyQ6Rg69w9k19nviJspaj4yNscGjrHu94sVP+aRm75yEbCh+r2/yR+7q6hux9LVtbuTGBw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/w3c-xmlserializer/-/w3c-xmlserializer-4.0.0.tgz} - name: w3c-xmlserializer - version: 4.0.0 - engines: {node: '>=14'} - dependencies: - xml-name-validator: registry.npmmirror.com/xml-name-validator@4.0.0 - dev: false - registry.npmmirror.com/watchpack@2.4.0: resolution: {integrity: sha512-Lcvm7MGST/4fup+ifyKi2hjyIAwcdI4HRgtvTpIUxBRhB+RFtUh8XtDOxUfctVCnhVi+QQj49i91OyvzkJl6cg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/watchpack/-/watchpack-2.4.0.tgz} name: watchpack @@ -13130,22 +12994,6 @@ packages: engines: {node: '>=12'} dev: false - registry.npmmirror.com/whatwg-encoding@2.0.0: - resolution: {integrity: sha512-p41ogyeMUrw3jWclHWTQg1k05DSVXPLcVxRTYsXUk+ZooOCZLcoYgPZ/HL/D/N+uQPOtcp1me1WhBEaX02mhWg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/whatwg-encoding/-/whatwg-encoding-2.0.0.tgz} - name: whatwg-encoding - version: 2.0.0 - engines: {node: '>=12'} - dependencies: - iconv-lite: registry.npmmirror.com/iconv-lite@0.6.3 - dev: false - - registry.npmmirror.com/whatwg-mimetype@3.0.0: - resolution: {integrity: sha512-nt+N2dzIutVRxARx1nghPKGv1xHikU7HKdfafKkLNLindmPU/ch3U31NOCGGA/dmPcmb1VlofO0vnKAcsm0o/Q==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/whatwg-mimetype/-/whatwg-mimetype-3.0.0.tgz} - name: whatwg-mimetype - version: 3.0.0 - engines: {node: '>=12'} - dev: false - registry.npmmirror.com/whatwg-url@11.0.0: resolution: {integrity: sha512-RKT8HExMpoYx4igMiVMY83lN6UeITKJlBQ+vR/8ZJ8OCdSiN3RwCq+9gH0+Xzj0+5IrM6i4j/6LuvzbZIQgEcQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/whatwg-url/-/whatwg-url-11.0.0.tgz} name: whatwg-url @@ -13156,16 +13004,6 @@ packages: webidl-conversions: registry.npmmirror.com/webidl-conversions@7.0.0 dev: false - registry.npmmirror.com/whatwg-url@12.0.1: - resolution: {integrity: sha512-Ed/LrqB8EPlGxjS+TrsXcpUond1mhccS3pchLhzSgPCnTimUCKj3IZE75pAs5m6heB2U2TMerKFUXheyHY+VDQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/whatwg-url/-/whatwg-url-12.0.1.tgz} - name: whatwg-url - version: 12.0.1 - engines: {node: '>=14'} - dependencies: - tr46: registry.npmmirror.com/tr46@4.1.1 - webidl-conversions: registry.npmmirror.com/webidl-conversions@7.0.0 - dev: false - registry.npmmirror.com/whatwg-url@5.0.0: resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/whatwg-url/-/whatwg-url-5.0.0.tgz} name: whatwg-url @@ -13308,28 +13146,6 @@ packages: name: wrappy version: 1.0.2 - registry.npmmirror.com/ws@8.14.2: - resolution: {integrity: sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/ws/-/ws-8.14.2.tgz} - name: ws - version: 8.14.2 - engines: {node: '>=10.0.0'} - peerDependencies: - bufferutil: ^4.0.1 - utf-8-validate: '>=5.0.2' - peerDependenciesMeta: - bufferutil: - optional: true - utf-8-validate: - optional: true - dev: false - - registry.npmmirror.com/xml-name-validator@4.0.0: - resolution: {integrity: sha512-ICP2e+jsHvAj2E2lIHxa5tjXRlKDJo4IdvPvCXbXQGdzSfmSpNVyIKMvoZHjDY9DP0zV17iI85o90vRFXNccRw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/xml-name-validator/-/xml-name-validator-4.0.0.tgz} - name: xml-name-validator - version: 4.0.0 - engines: {node: '>=12'} - dev: false - registry.npmmirror.com/xmlbuilder@10.1.1: resolution: {integrity: sha512-OyzrcFLL/nb6fMGHbiRDuPup9ljBycsdCypwuyg5AAHvyWzGfChJpCXMG88AGTIMFhGZ9RccFN1e6lhg3hkwKg==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/xmlbuilder/-/xmlbuilder-10.1.1.tgz} name: xmlbuilder @@ -13337,12 +13153,6 @@ packages: engines: {node: '>=4.0'} dev: false - registry.npmmirror.com/xmlchars@2.2.0: - resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/xmlchars/-/xmlchars-2.2.0.tgz} - name: xmlchars - version: 2.2.0 - dev: false - registry.npmmirror.com/xtend@4.0.2: resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==, registry: https://registry.npm.taobao.org/, tarball: https://registry.npmmirror.com/xtend/-/xtend-4.0.2.tgz} name: xtend diff --git a/projects/app/package.json b/projects/app/package.json index 97b0e4176..fbf5627ee 100644 --- a/projects/app/package.json +++ b/projects/app/package.json @@ -19,7 +19,6 @@ "@fastgpt/global": "workspace:*", "@fastgpt/service": "workspace:*", "@fastgpt/web": "workspace:*", - "@mozilla/readability": "^0.4.4", "@node-rs/jieba": "^1.7.2", "@tanstack/react-query": "^4.24.10", "@types/nprogress": "^0.2.0", @@ -35,7 +34,6 @@ "i18next": "^22.5.1", "immer": "^9.0.19", "jschardet": "^3.0.0", - "jsdom": "^22.1.0", "jsonwebtoken": "^9.0.2", "lodash": "^4.17.21", "mammoth": "^1.6.0", @@ -67,7 +65,6 @@ "@types/downloadjs": "^1.4.3", "@types/formidable": "^2.0.5", "@types/js-cookie": "^3.0.3", - "@types/jsdom": "^21.1.1", "@types/jsonwebtoken": "^9.0.3", "@types/lodash": "^4.14.191", "@types/multer": "^1.4.10", diff --git a/projects/app/public/docs/versionIntro.md b/projects/app/public/docs/versionIntro.md index b467ede7e..8ab736d6d 100644 --- a/projects/app/public/docs/versionIntro.md +++ b/projects/app/public/docs/versionIntro.md @@ -1,13 +1,11 @@ ### Fast GPT V4.6.2 -1. 新增 - 团队空间 -2. 新增 - 多路向量(多个向量映射一组数据) -3. 新增 - tts语音 -4. 新增 - 语音输入 -5. 新增 - 增强召回方式,提高召回精度 -6. 优化 - 知识库导出,可直接触发流下载,无需等待转圈圈 -7. [知识库结构详解](https://doc.fastgpt.in/docs/use-cases/datasetengine/) -8. [知识库提示词详解](https://doc.fastgpt.in/docs/use-cases/ai_settings/#引用模板--引用提示词) -9. [使用文档](https://doc.fastgpt.in/docs/intro/) -10. [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow) -11. [点击查看商业版](https://doc.fastgpt.in/docs/commercial/) +1. 商业版新增 - web站点同步 +2. 新增 - 集合元数据记录 +3. 优化 - url 读取内容 +4. 优化 - 流读取文件,防止内存溢出 +5. [知识库结构详解](https://doc.fastgpt.in/docs/use-cases/datasetengine/) +6. [知识库提示词详解](https://doc.fastgpt.in/docs/use-cases/ai_settings/#引用模板--引用提示词) +7. [使用文档](https://doc.fastgpt.in/docs/intro/) +8. [点击查看高级编排介绍文档](https://doc.fastgpt.in/docs/workflow) +9. [点击查看商业版](https://doc.fastgpt.in/docs/commercial/) diff --git a/projects/app/public/locales/en/common.json b/projects/app/public/locales/en/common.json index 880f52a29..205b2cc51 100644 --- a/projects/app/public/locales/en/common.json +++ b/projects/app/public/locales/en/common.json @@ -3,7 +3,6 @@ "Cancel": "No", "Confirm": "Yes", "Create New": "Create", - "Dataset": "Dataset", "Export": "Export", "Folder": "Folder", "Move": "Move", @@ -129,6 +128,7 @@ "Choose": "Choose", "Close": "Close", "Collect": "Collect", + "Config": "Config", "Confirm": "Confirm", "Confirm Create": "Create", "Confirm Move": "Move here", @@ -138,7 +138,6 @@ "Course": "", "Create Failed": "Create Failed", "Create Success": "Create Success", - "Create Virtual File Failed": "Create Virtual File Failed", "Custom Title": "Custom Title", "Delete": "Delete", "Delete Failed": "Delete Failed", @@ -152,6 +151,7 @@ "Filed is repeat": "Filed is repeated", "Filed is repeated": "", "Input": "Input", + "Intro": "Intro", "Last Step": "Last", "Loading": "Loading", "Max credit": "Credit", @@ -190,6 +190,14 @@ "Update Time": "Update Time", "Upload File Failed": "Upload File Failed", "Username": "UserName", + "Website": "Website", + "choosable": "choosable", + "confirm": { + "Common Tip": "Operational Confirm" + }, + "empty": { + "Common Tip": "No data" + }, "error": { "unKnow": "There was an accident" }, @@ -207,11 +215,15 @@ "input": { "Repeat Value": "Repeat Value" }, + "link": { + "UnValid": "UnValid Link" + }, "speech": { "error tip": "Speech Failed" } }, "core": { + "Max Token": "MaxTokens", "ai": { "Model": "Model", "Prompt": "Prompt" @@ -238,12 +250,13 @@ "Speech model": "Speech model", "Speech speed": "Speed", "Test Listen": "Test", - "Test Listen Text": "Hello, this is FastGPT, how can I help you?", + "Test Listen Text": "Hello, this is a voice test, if you can hear this sentence, it means that the voice playback function is normal", "Web": "Browser (free)" } }, "chat": { "Audio Speech Error": "Audio Speech Error", + "Quote Amount": "Dataset Quote:{{amount}}", "Record": "Speech", "Restart": "Restart", "Select File": "Select file", @@ -252,23 +265,46 @@ "Speaking": "I'm listening...", "Stop Speak": "Stop Speak", "Type a message": "Input problem", + "quote": { + "Quote Tip": "Only the actual reference content is displayed here. If the data is updated, it will not be updated in real time", + "Read Quote": "Read Quote", + "Read Source": "Read Source" + }, "tts": { "Stop Speech": "Stop" } }, "dataset": { + "All Dataset": "All Dataset", "Choose Dataset": "Choose Dataset", + "Common Dataset": "Common Dataset", + "Common Dataset Desc": "Knowledge bases can be built by importing files, web links, or manual entry", "Create dataset": "Create Dataset", "Dataset": "Dataset", + "Dataset Type": "Dataset Type", + "Delete Website Tips": "Confirm to delete the website", + "Empty Dataset": "", + "Empty Dataset Tips": "There is no knowledge base yet, go create one!", + "Folder Dataset": "Folder", "Go Dataset": "To Dataset", + "Intro Placeholder": "This dataset has not yet been introduced~", "Quote Length": "Quote Length", "Read Dataset": "Read Dataset", "Search Top K": "Top K", "Set Empty Result Tip": ",Response empty text", + "Set Website Config": "Configuring Website", "Similarity": "Similarity", "Sync Time": "Update Time", "Virtual File": "Virtual File", + "Website Dataset": "Website Sync", + "Website Dataset Desc": "Web site synchronization allows you to build a knowledge base directly from a web link", "collection": { + "Click top config website": "Config", + "Empty Tip": "The collection is empty", + "Website Create Success": "Created successfully, data is being synchronized", + "Website Empty Tip": "No associated website yet,", + "Website Link": "Website Link", + "Website Sync": "Website", "metadata": { "Chunk Size": "Chunk Size", "Createtime": "Create Time", @@ -281,6 +317,10 @@ "source name": "Source Name", "source size": "Source Size" }, + "status": { + "active": "Ready", + "syncing": "Syncing" + }, "training": { "type chunk": "Chunk", "type manual": "Manual", @@ -295,6 +335,10 @@ "file": "File", "folder": "Folder", "import": { + "Fetch Error": "Get link failed", + "Fetch Url": "Url", + "Fetch url placeholder": "Up to 10 links, one per line.", + "Fetch url tip": "Only static links can be read, please check the results", "Ideal chunk length": "Ideal chunk length", "Ideal chunk length Tips": "Segment by end symbol. We recommend that your document should be properly punctuated to ensure that each complete sentence length does not exceed this value \n Chinese document recommended 400~1000\n English document recommended 600~1200" }, @@ -316,6 +360,10 @@ }, "search mode": "Search Mode" }, + "status": { + "active": "Ready", + "syncing": "Syncing" + }, "test": { "Test": "Start", "Test Result": "Results", @@ -327,8 +375,18 @@ "test result tip": "The contents of the knowledge base are sorted according to their similarity to the test text, and you can adjust the corresponding text according to the test results. Note: The data in the test record may have been modified, clicking on a test data will show the latest data." }, "training": { + "Website Sync": "Website Sync", "type chunk": "Chunk", "type qa": "QA" + }, + "website": { + "Base Url": "BaseUrl", + "Config": "Website Configuring", + "Config Description": "The Web site synchronization function allows you to fill in the root address of a website, and the system will automatically crawl the relevant pages deeply for knowledge base training. Only crawls static websites, mainly project documents and blogs.", + "Confirm Create Tips": "Confirm to synchronize the site, the synchronization task will start later, please confirm!", + "Confirm Update Tips": "Are you sure to update the site configuration? The synchronization starts immediately with the new configuration. Please confirm", + "Selector": "Selector", + "Start Sync": "Start Sync" } }, "module": { @@ -434,7 +492,6 @@ "Create Training Data": "Training-{{filename}}", "Create Virtual File Success": "Create Virtual File Success", "Data Amount": "Data Amount", - "Ready": "Ready", "Select Collection": "Select Collection", "Select One Collection To Store": "Select the collection to store" }, @@ -466,6 +523,10 @@ "import csv tip": "Ensure that the CSV is in UTF-8 format; otherwise, garbled characters will be displayed", "test": { "noResult": "Search results are empty" + }, + "website": { + "Base Url": "BaseUrl", + "Selector": "Selector" } }, "error": { @@ -482,7 +543,6 @@ "Fetch Url": "Fetch Url", "If the imported file is garbled, please convert CSV to UTF-8 encoding format": "If the imported file is garbled, please convert CSV to UTF-8 encoding format", "Parse": "{{name}} Parsing...", - "Ready": "Ready", "Release the mouse to upload the file": "Release the mouse to upload the file", "Select a maximum of 10 files": "Select a maximum of 10 files", "Uploading": "Uploading: {{name}}, Progress: {{percent}}%", @@ -725,6 +785,10 @@ "ReRank": "ReRank", "Whisper": "Whisper", "bill username": "User" + }, + "moduleName": { + "index": "Index Generation", + "qa": "QA Generation" } } } diff --git a/projects/app/public/locales/zh/common.json b/projects/app/public/locales/zh/common.json index 2a8692851..ef1ac6a62 100644 --- a/projects/app/public/locales/zh/common.json +++ b/projects/app/public/locales/zh/common.json @@ -3,7 +3,6 @@ "Cancel": "取消", "Confirm": "确认", "Create New": "新建", - "Dataset": "知识库", "Export": "导出", "Folder": "文件夹", "Move": "移动", @@ -129,6 +128,7 @@ "Choose": "选择", "Close": "关闭", "Collect": "收藏", + "Config": "配置", "Confirm": "确认", "Confirm Create": "确认创建", "Confirm Move": "移动到这", @@ -138,7 +138,6 @@ "Course": "", "Create Failed": "创建异常", "Create Success": "创建成功", - "Create Virtual File Failed": "创建虚拟文件失败", "Custom Title": "自定义标题", "Delete": "删除", "Delete Failed": "删除失败", @@ -152,6 +151,7 @@ "Filed is repeat": "", "Filed is repeated": "字段重复了", "Input": "输入", + "Intro": "介绍", "Last Step": "上一步", "Loading": "加载中", "Max credit": "最大金额", @@ -190,6 +190,14 @@ "Update Time": "更新时间", "Upload File Failed": "上传文件失败", "Username": "用户名", + "Website": "网站", + "choosable": "可选", + "confirm": { + "Common Tip": "操作确认" + }, + "empty": { + "Common Tip": "没有什么数据噢~" + }, "error": { "unKnow": "出现了点意外~" }, @@ -207,11 +215,15 @@ "input": { "Repeat Value": "有重复的值" }, + "link": { + "UnValid": "无效的链接" + }, "speech": { "error tip": "语音转文字失败" } }, "core": { + "Max Token": "单条数据上限", "ai": { "Model": "AI 模型", "Prompt": "提示词" @@ -238,12 +250,13 @@ "Speech model": "语音模型", "Speech speed": "语速", "Test Listen": "试听", - "Test Listen Text": "你好,我是 FastGPT,有什么可以帮助你么?", + "Test Listen Text": "你好,这是语音测试,如果你能听到这句话,说明语音播放功能正常", "Web": "浏览器自带(免费)" } }, "chat": { "Audio Speech Error": "语音播报异常", + "Quote Amount": "知识库引用({{amount}}条)", "Record": "语音输入", "Restart": "重开对话", "Select File": "选择文件", @@ -252,23 +265,46 @@ "Speaking": "我在听,请说...", "Stop Speak": "停止录音", "Type a message": "输入问题", + "quote": { + "Quote Tip": "此处仅显示实际引用内容,若数据有更新,此处不会实时更新", + "Read Quote": "查看引用", + "Read Source": "查看来源" + }, "tts": { "Stop Speech": "停止" } }, "dataset": { + "All Dataset": "全部知识库", "Choose Dataset": "关联知识库", + "Common Dataset": "通用知识库", + "Common Dataset Desc": "可通过导入文件、网页链接或手动录入形式构建知识库", "Create dataset": "创建一个知识库", "Dataset": "知识库", + "Dataset Type": "知识库类型", + "Delete Website Tips": "确认删除该站点?", + "Empty Dataset": "", + "Empty Dataset Tips": "还没有知识库,快去创建一个吧!", + "Folder Dataset": "文件夹", "Go Dataset": "前往知识库", + "Intro Placeholder": "这个知识库还没有介绍~", "Quote Length": "引用内容长度", "Read Dataset": "查看知识库详情", "Search Top K": "单次搜索数量", "Set Empty Result Tip": ",未搜索到内容时回复指定内容", + "Set Website Config": "开始配置网站信息", "Similarity": "相似度", "Sync Time": "最后更新时间", "Virtual File": "虚拟文件", + "Website Dataset": "Web 站点同步", + "Website Dataset Desc": "Web 站点同步允许你直接使用一个网页链接构建知识库", "collection": { + "Click top config website": "点击配置网站", + "Empty Tip": "数据集空空如也", + "Website Create Success": "创建成功,正在同步数据", + "Website Empty Tip": "还没有关联网站,", + "Website Link": "Web 站点地址", + "Website Sync": "Web 站点同步", "metadata": { "Chunk Size": "分割大小", "Createtime": "创建时间", @@ -281,9 +317,13 @@ "source name": "来源名", "source size": "来源大小" }, + "status": { + "active": "已就绪", + "syncing": "同步中" + }, "training": { - "type manual": "手动", "type chunk": "直接分段", + "type manual": "手动", "type qa": "问答拆分" } }, @@ -295,6 +335,10 @@ "file": "文件", "folder": "目录", "import": { + "Fetch Error": "获取链接失败", + "Fetch Url": "网络链接", + "Fetch url placeholder": "最多10个链接,每行一个。", + "Fetch url tip": "仅支持读取静态链接,请注意检查结果", "Ideal chunk length": "理想分块长度", "Ideal chunk length Tips": "按结束符号进行分段。我们建议您的文档应合理的使用标点符号,以确保每个完整的句子长度不要超过该值\n中文文档建议400~1000\n英文文档建议600~1200" }, @@ -316,6 +360,10 @@ }, "search mode": "检索模式" }, + "status": { + "active": "已就绪", + "syncing": "同步中" + }, "test": { "Test": "测试", "Test Result": "测试结果", @@ -327,8 +375,18 @@ "test result tip": "根据知识库内容与测试文本的相似度进行排序,你可以根据测试结果调整对应的文本。\n注意:测试记录中的数据可能已经被修改过,点击某条测试数据后将展示最新的数据。" }, "training": { + "Website Sync": "Web 站点同步", "type chunk": "直接分段", "type qa": "问答拆分" + }, + "website": { + "Base Url": "根地址", + "Config": "Web站点配置", + "Config Description": "Web 站点同步功能允许你填写一个网站的根地址,系统会自动深度抓取相关的网页进行知识库训练。仅会抓取静态的网站,以项目文档、博客为主。", + "Confirm Create Tips": "确认同步该站点,同步任务将随后开启,请确认!", + "Confirm Update Tips": "确认更新站点配置?会立即按新的配置开始同步,请确认!", + "Selector": "选择器", + "Start Sync": "开始同步" } }, "module": { @@ -434,7 +492,6 @@ "Create Training Data": "文件训练-{{filename}}", "Create Virtual File Success": "创建虚拟文件成功", "Data Amount": "数据总量", - "Ready": "已就绪", "Select Collection": "选择文件", "Select One Collection To Store": "选择一个文件进行存储" }, @@ -466,6 +523,10 @@ "import csv tip": "请确保CSV为UTF-8格式,否则会乱码", "test": { "noResult": "搜索结果为空" + }, + "website": { + "Base Url": "", + "Selector": "" } }, "error": { @@ -482,7 +543,6 @@ "Fetch Url": "链接读取", "If the imported file is garbled, please convert CSV to UTF-8 encoding format": "如果导入文件乱码,请将 CSV 转成 UTF-8 编码格式", "Parse": "{{name}} 解析中...", - "Ready": "", "Release the mouse to upload the file": "松开鼠标上传文件", "Select a maximum of 10 files": "最多选择10个文件", "Uploading": "正在上传 {{name}},进度: {{percent}}%", @@ -725,6 +785,10 @@ "ReRank": "结果重排", "Whisper": "语音输入", "bill username": "用户" + }, + "moduleName": { + "index": "索引生成", + "qa": "QA 拆分" } } } diff --git a/projects/app/src/components/ChatBox/MessageInput.tsx b/projects/app/src/components/ChatBox/MessageInput.tsx index 8de13741a..22da4b3dd 100644 --- a/projects/app/src/components/ChatBox/MessageInput.tsx +++ b/projects/app/src/components/ChatBox/MessageInput.tsx @@ -74,8 +74,8 @@ const MessageInput = ({ try { const src = await compressImgFileAndUpload({ file: file.rawFile, - maxW: 1000, - maxH: 1000, + maxW: 4329, + maxH: 4329, maxSize: 1024 * 1024 * 5, // 30 day expired. expiredTime: addDays(new Date(), 30) diff --git a/projects/app/src/components/ChatBox/QuoteModal.tsx b/projects/app/src/components/ChatBox/QuoteModal.tsx index 95b363430..9b6c5a9b9 100644 --- a/projects/app/src/components/ChatBox/QuoteModal.tsx +++ b/projects/app/src/components/ChatBox/QuoteModal.tsx @@ -71,9 +71,9 @@ const QuoteModal = ({ iconSrc="/imgs/modal/quote.svg" title={ - 知识库引用({rawSearch.length}条) - - 注意: 修改知识库内容成功后,此处不会显示变更情况。点击编辑后,会显示知识库最新的内容。 + {t('core.chat.Quote Amount', { amount: rawSearch.length })} + + {t('core.chat.quote.Quote Tip')} } diff --git a/projects/app/src/components/ChatBox/ResponseTags.tsx b/projects/app/src/components/ChatBox/ResponseTags.tsx index 75938f12b..6116b7a6b 100644 --- a/projects/app/src/components/ChatBox/ResponseTags.tsx +++ b/projects/app/src/components/ChatBox/ResponseTags.tsx @@ -1,7 +1,7 @@ import React, { useMemo, useState } from 'react'; import type { ChatHistoryItemResType } from '@fastgpt/global/core/chat/api.d'; import type { ChatItemType } from '@fastgpt/global/core/chat/type'; -import { Flex, BoxProps, useDisclosure, Image, useTheme } from '@chakra-ui/react'; +import { Flex, BoxProps, useDisclosure, Image, useTheme, Box } from '@chakra-ui/react'; import { useTranslation } from 'next-i18next'; import { useSystemStore } from '@/web/common/system/useSystemStore'; import type { SearchDataResponseItemType } from '@fastgpt/global/core/dataset/type'; @@ -11,6 +11,8 @@ import MyTooltip from '../MyTooltip'; import { FlowNodeTypeEnum } from '@fastgpt/global/core/module/node/constant'; import { getSourceNameIcon } from '@fastgpt/global/core/dataset/utils'; import ChatBoxDivider from '@/components/core/chat/Divider'; +import MyIcon from '../Icon'; +import { getFileAndOpen } from '@/web/core/dataset/utils'; const QuoteModal = dynamic(() => import('./QuoteModal'), { ssr: false }); const ContextModal = dynamic(() => import('./ContextModal'), { ssr: false }); @@ -59,6 +61,7 @@ const ResponseTags = ({ responseData = [] }: { responseData?: ChatHistoryItemRes .flat() .map((item) => ({ sourceName: item.sourceName, + sourceId: item.sourceId, icon: getSourceNameIcon({ sourceId: item.sourceId, sourceName: item.sourceName }) })), historyPreview: chatData?.historyPreview, @@ -83,18 +86,67 @@ const ResponseTags = ({ responseData = [] }: { responseData?: ChatHistoryItemRes alignItems={'center'} flexWrap={'wrap'} fontSize={'sm'} - cursor={'pointer'} border={theme.borders.sm} py={1} px={2} borderRadius={'md'} _hover={{ - bg: 'myBlue.100' + '.controller': { + display: 'flex' + } }} + overflow={'hidden'} + position={'relative'} onClick={() => setQuoteModalData(quoteList)} > {''} - {item.sourceName} + + {item.sourceName} + + + + + + + {item.sourceId && ( + + { + e.stopPropagation(); + + if (!item.sourceId) return; + await getFileAndOpen(item.sourceId); + }} + /> + + )} + ))} diff --git a/projects/app/src/components/ChatBox/index.module.scss b/projects/app/src/components/ChatBox/index.module.scss index c88f6c8f9..740ad9b85 100644 --- a/projects/app/src/components/ChatBox/index.module.scss +++ b/projects/app/src/components/ChatBox/index.module.scss @@ -1,14 +1,6 @@ .stopIcon { animation: zoomStopIcon 0.4s infinite alternate; } -@keyframes zoomStopIcon { - 0% { - transform: scale(0.8); - } - 100% { - transform: scale(1.2); - } -} .statusAnimation { animation: statusBox 0.8s linear infinite alternate; diff --git a/projects/app/src/components/EmptyTip/index.tsx b/projects/app/src/components/EmptyTip/index.tsx index 3d5e00dd5..d36a66e8b 100644 --- a/projects/app/src/components/EmptyTip/index.tsx +++ b/projects/app/src/components/EmptyTip/index.tsx @@ -1,17 +1,19 @@ import React from 'react'; import { Flex, Box, FlexProps } from '@chakra-ui/react'; import MyIcon from '../Icon'; +import { useTranslation } from 'next-i18next'; type Props = FlexProps & { - text?: string | null; + text?: string | React.ReactNode; }; const EmptyTip = ({ text, ...props }: Props) => { + const { t } = useTranslation(); return ( - {text || '没有什么数据噢~'} + {text || t('common.empty.Common Tip')} ); diff --git a/projects/app/src/components/Icon/icons/common/confirm/commonTip.svg b/projects/app/src/components/Icon/icons/common/confirm/commonTip.svg new file mode 100644 index 000000000..49b931072 --- /dev/null +++ b/projects/app/src/components/Icon/icons/common/confirm/commonTip.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/projects/app/src/components/Icon/icons/common/confirm/deleteTip.svg b/projects/app/src/components/Icon/icons/common/confirm/deleteTip.svg new file mode 100644 index 000000000..f11aca957 --- /dev/null +++ b/projects/app/src/components/Icon/icons/common/confirm/deleteTip.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/projects/app/src/components/Icon/icons/common/routePushLight.svg b/projects/app/src/components/Icon/icons/common/routePushLight.svg new file mode 100644 index 000000000..e20ef16e6 --- /dev/null +++ b/projects/app/src/components/Icon/icons/common/routePushLight.svg @@ -0,0 +1,11 @@ + + + + + \ No newline at end of file diff --git a/projects/app/src/components/Icon/icons/common/viewLight.svg b/projects/app/src/components/Icon/icons/common/viewLight.svg new file mode 100644 index 000000000..29f97bbc0 --- /dev/null +++ b/projects/app/src/components/Icon/icons/common/viewLight.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/projects/app/src/components/Icon/icons/core/dataset/commonDataset.svg b/projects/app/src/components/Icon/icons/core/dataset/commonDataset.svg new file mode 100644 index 000000000..c49518384 --- /dev/null +++ b/projects/app/src/components/Icon/icons/core/dataset/commonDataset.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/projects/app/src/components/Icon/icons/core/dataset/folderDataset.svg b/projects/app/src/components/Icon/icons/core/dataset/folderDataset.svg new file mode 100644 index 000000000..602393396 --- /dev/null +++ b/projects/app/src/components/Icon/icons/core/dataset/folderDataset.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/projects/app/src/components/Icon/icons/core/dataset/websiteDataset.svg b/projects/app/src/components/Icon/icons/core/dataset/websiteDataset.svg new file mode 100644 index 000000000..cd6efe529 --- /dev/null +++ b/projects/app/src/components/Icon/icons/core/dataset/websiteDataset.svg @@ -0,0 +1,14 @@ + + + + + + + \ No newline at end of file diff --git a/projects/app/src/components/Icon/index.tsx b/projects/app/src/components/Icon/index.tsx index 465e75cc4..aa6b75850 100644 --- a/projects/app/src/components/Icon/index.tsx +++ b/projects/app/src/components/Icon/index.tsx @@ -114,7 +114,14 @@ const iconPaths = { 'core/dataset/modeEmbFTRerank': () => import('./icons/core/dataset/modeEmbFTRerank.svg'), 'core/app/variable/input': () => import('./icons/core/app/variable/input.svg'), 'core/app/variable/textarea': () => import('./icons/core/app/variable/textarea.svg'), - 'core/app/variable/select': () => import('./icons/core/app/variable/select.svg') + 'core/app/variable/select': () => import('./icons/core/app/variable/select.svg'), + 'core/dataset/websiteDataset': () => import('./icons/core/dataset/websiteDataset.svg'), + 'core/dataset/commonDataset': () => import('./icons/core/dataset/commonDataset.svg'), + 'core/dataset/folderDataset': () => import('./icons/core/dataset/folderDataset.svg'), + 'common/confirm/deleteTip': () => import('./icons/common/confirm/deleteTip.svg'), + 'common/confirm/commonTip': () => import('./icons/common/confirm/commonTip.svg'), + 'common/routePushLight': () => import('./icons/common/routePushLight.svg'), + 'common/viewLight': () => import('./icons/common/viewLight.svg') }; export type IconName = keyof typeof iconPaths; diff --git a/projects/app/src/components/Layout/index.tsx b/projects/app/src/components/Layout/index.tsx index b1d262153..c5ebb79c2 100644 --- a/projects/app/src/components/Layout/index.tsx +++ b/projects/app/src/components/Layout/index.tsx @@ -111,7 +111,7 @@ const Layout = ({ children }: { children: JSX.Element }) => { )} - + {!!userInfo && } ); diff --git a/projects/app/src/components/Markdown/chat/Image.tsx b/projects/app/src/components/Markdown/chat/Image.tsx index 7fccb2b6a..87a6da286 100644 --- a/projects/app/src/components/Markdown/chat/Image.tsx +++ b/projects/app/src/components/Markdown/chat/Image.tsx @@ -1,30 +1,33 @@ -import { Box, Flex } from '@chakra-ui/react'; +import { Box, Flex, Grid } from '@chakra-ui/react'; import MdImage from '../img/Image'; import { useMemo } from 'react'; const ImageBlock = ({ images }: { images: string }) => { const formatData = useMemo( () => - images.split('\n').map((item) => { - try { - return JSON.parse(item) as { src: string }; - } catch (error) { - return { src: '' }; - } - }), + images + .split('\n') + .filter((item) => item) + .map((item) => { + try { + return JSON.parse(item) as { src: string }; + } catch (error) { + return { src: '' }; + } + }), [images] ); return ( - + {formatData.map(({ src }) => { return ( - + ); })} - + ); }; diff --git a/projects/app/src/components/Markdown/img/Image.tsx b/projects/app/src/components/Markdown/img/Image.tsx index 7dfaa6407..0c607f6cd 100644 --- a/projects/app/src/components/Markdown/img/Image.tsx +++ b/projects/app/src/components/Markdown/img/Image.tsx @@ -29,7 +29,6 @@ const MdImage = ({ src }: { src?: string }) => { borderRadius={'md'} src={src} alt={''} - maxH={'150px'} fallbackSrc={'/imgs/errImg.png'} fallbackStrategy={'onError'} cursor={succeed ? 'pointer' : 'default'} diff --git a/projects/app/src/components/MyModal/index.tsx b/projects/app/src/components/MyModal/index.tsx index 4c1c83f9f..fd0766e9b 100644 --- a/projects/app/src/components/MyModal/index.tsx +++ b/projects/app/src/components/MyModal/index.tsx @@ -9,6 +9,7 @@ import { Box, Image } from '@chakra-ui/react'; +import MyIcon from '../Icon'; export interface MyModalProps extends ModalContentProps { iconSrc?: string; @@ -56,7 +57,15 @@ const MyModal = ({ roundedTop={'lg'} py={'10px'} > - {iconSrc && } + {iconSrc && ( + <> + {iconSrc.startsWith('/') ? ( + + ) : ( + + )} + + )} {title} {onClose && } diff --git a/projects/app/src/components/common/MyRadio/index.tsx b/projects/app/src/components/common/MyRadio/index.tsx index ff83f3413..77153acef 100644 --- a/projects/app/src/components/common/MyRadio/index.tsx +++ b/projects/app/src/components/common/MyRadio/index.tsx @@ -1,5 +1,5 @@ import React from 'react'; -import { Box, Flex, useTheme, Grid, type GridProps, theme } from '@chakra-ui/react'; +import { Box, Flex, useTheme, Grid, type GridProps, theme, Image } from '@chakra-ui/react'; import MyIcon from '@/components/Icon'; import { useTranslation } from 'next-i18next'; @@ -45,8 +45,7 @@ const MyRadio = ({ : { bg: 'myWhite.300', _hover: { - bg: '#f5f8ff', - borderColor: '#b2ccff' + borderColor: 'myBlue.500' } })} _after={{ @@ -71,7 +70,15 @@ const MyRadio = ({ }} onClick={() => onChange(item.value)} > - {!!item.icon && } + {!!item.icon && ( + <> + {item.icon.startsWith('/') ? ( + {''} + ) : ( + + )} + + )} {t(item.title)} {!!item.desc && ( diff --git a/projects/app/src/components/core/module/DatasetSelectModal.tsx b/projects/app/src/components/core/module/DatasetSelectModal.tsx index 9ed61857c..f5293d223 100644 --- a/projects/app/src/components/core/module/DatasetSelectModal.tsx +++ b/projects/app/src/components/core/module/DatasetSelectModal.tsx @@ -7,29 +7,20 @@ import { ModalBody, ModalFooter, useTheme, - Textarea, Grid, - Divider, - Switch, - Image + Divider } from '@chakra-ui/react'; import Avatar from '@/components/Avatar'; -import { useForm } from 'react-hook-form'; -import { QuestionOutlineIcon } from '@chakra-ui/icons'; import type { SelectedDatasetType } from '@fastgpt/global/core/module/api.d'; import { useToast } from '@/web/common/hooks/useToast'; -import MySlider from '@/components/Slider'; import MyTooltip from '@/components/MyTooltip'; -import MyModal from '@/components/MyModal'; import MyIcon from '@/components/Icon'; -import { DatasetSearchModeEnum, DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant'; +import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant'; import { useTranslation } from 'next-i18next'; import { useDatasetStore } from '@/web/core/dataset/store/dataset'; -import { feConfigs } from '@/web/common/system/staticData'; import DatasetSelectContainer, { useDatasetSelect } from '@/components/core/dataset/SelectModal'; import { useLoading } from '@/web/common/hooks/useLoading'; import EmptyTip from '@/components/EmptyTip'; -import { AppSimpleEditFormType } from '@fastgpt/global/core/app/type'; export const DatasetSelectModal = ({ isOpen, @@ -132,9 +123,9 @@ export const DatasetSelectModal = ({ { if (item.type === DatasetTypeEnum.folder) { setParentId(item._id); - } else if (item.type === DatasetTypeEnum.dataset) { + } else { const vectorModel = selectedDatasets[0]?.vectorModel?.model; if (vectorModel && vectorModel !== item.vectorModel.model) { diff --git a/projects/app/src/components/support/user/team/TeamManageModal/EditModal.tsx b/projects/app/src/components/support/user/team/TeamManageModal/EditModal.tsx index 1bd82767f..32128132b 100644 --- a/projects/app/src/components/support/user/team/TeamManageModal/EditModal.tsx +++ b/projects/app/src/components/support/user/team/TeamManageModal/EditModal.tsx @@ -51,8 +51,8 @@ function EditModal({ try { const src = await compressImgFileAndUpload({ file, - maxW: 100, - maxH: 100 + maxW: 300, + maxH: 300 }); setValue('avatar', src); setRefresh((state) => !state); diff --git a/projects/app/src/constants/dataset.ts b/projects/app/src/constants/dataset.ts index 3f05385b8..46ba6bf98 100644 --- a/projects/app/src/constants/dataset.ts +++ b/projects/app/src/constants/dataset.ts @@ -14,7 +14,8 @@ export const defaultDatasetDetail: DatasetItemType = { type: 'dataset', avatar: '/icon/logo.svg', name: '', - tags: [], + intro: '', + status: 'active', permission: 'private', isOwner: false, canWrite: false, @@ -36,7 +37,8 @@ export const defaultCollectionDetail: DatasetCollectionItemType = { type: 'dataset', avatar: '/icon/logo.svg', name: '', - tags: [], + intro: '', + status: 'active', permission: 'private', vectorModel: defaultVectorModels[0].model, agentModel: defaultQAModels[0].model diff --git a/projects/app/src/global/core/api/datasetReq.d.ts b/projects/app/src/global/core/api/datasetReq.d.ts index f8e156d0b..ed6c41052 100644 --- a/projects/app/src/global/core/api/datasetReq.d.ts +++ b/projects/app/src/global/core/api/datasetReq.d.ts @@ -12,15 +12,6 @@ import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant' import type { LLMModelItemType } from '@fastgpt/global/core/ai/model.d'; /* ===== dataset ===== */ -export type DatasetUpdateParams = { - id: string; - parentId?: string; - tags?: string[]; - name?: string; - avatar?: string; - permission?: `${PermissionTypeEnum}`; - agentModel?: LLMModelItemType; -}; /* ======= collections =========== */ export type GetDatasetCollectionsProps = RequestPaging & { @@ -30,16 +21,7 @@ export type GetDatasetCollectionsProps = RequestPaging & { simple?: boolean; selectFolder?: boolean; }; -export type CreateDatasetCollectionParams = { - datasetId: string; - parentId?: string; - name: string; - type: `${DatasetCollectionTypeEnum}`; - trainingType?: `${DatasetCollectionTrainingModeEnum}`; - chunkSize?: number; - fileId?: string; - rawLink?: string; -}; + export type UpdateDatasetCollectionParams = { id: string; parentId?: string; diff --git a/projects/app/src/global/core/dataset/api.d.ts b/projects/app/src/global/core/dataset/api.d.ts index 1c9c0d9be..1e5727ea5 100644 --- a/projects/app/src/global/core/dataset/api.d.ts +++ b/projects/app/src/global/core/dataset/api.d.ts @@ -1,5 +1,9 @@ import { PushDatasetDataChunkProps } from '@fastgpt/global/core/dataset/api'; -import { DatasetSearchModeEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constant'; +import { + DatasetSearchModeEnum, + DatasetTypeEnum, + TrainingModeEnum +} from '@fastgpt/global/core/dataset/constant'; import { DatasetDataIndexItemType, SearchDataResponseItemType @@ -8,8 +12,9 @@ import { /* ================= dataset ===================== */ export type CreateDatasetParams = { parentId?: string; + type: `${DatasetTypeEnum}`; name: string; - tags: string; + intro: string; avatar: string; vectorModel?: string; agentModel?: string; diff --git a/projects/app/src/global/core/prompt/AIChat.ts b/projects/app/src/global/core/prompt/AIChat.ts index ebfbd3c1d..980617f16 100644 --- a/projects/app/src/global/core/prompt/AIChat.ts +++ b/projects/app/src/global/core/prompt/AIChat.ts @@ -27,57 +27,56 @@ export const Prompt_QuotePromptList: PromptTemplateItem[] = [ { title: '标准模板', desc: '', - value: `你的背景知识: + value: `你的知识库: """ {{quote}} """ -对话要求: -1. 背景知识是最新的实时的信息,使用背景知识回答问题。 -2. 优先使用背景知识的内容回答我的问题,答案应与背景知识严格一致。 -3. 背景知识无法回答我的问题时,可以忽略背景知识,根据你的知识来自由回答。 -4. 使用对话的风格,自然的回答问题。包含markdown内容,需按markdown格式返回。 +回答要求: +1. 优先使用知识库内容回答问题。 +2. 你可以回答我不知道。 +3. 不要提及你是从知识库获取的知识。 +4. 知识库包含 markdown 内容时,按 markdown 格式返回。 我的问题是:"{{question}}"` }, { title: '问答模板', desc: '', - value: `你的背景知识: + value: `你的知识库: """ {{quote}} """ -对话要求: -1. 背景知识是最新的实时的信息,使用背景知识回答问题,其中 instruction 是相关介绍,output 是预期回答或补充。 -2. 优先使用背景知识的内容回答我的问题,答案应与背景知识严格一致。 -3. 背景知识无法回答我的问题时,可以忽略背景知识,根据你的知识来自由回答。 -4. 使用对话的风格,自然的回答问题。包含markdown内容,需按markdown格式返回。 +回答要求: +1. 优先使用知识库内容回答问题,其中 instruction 是相关介绍,output 是预期回答或补充。 +2. 你可以回答我不知道。 +3. 不要提及你是从知识库获取的知识。 +4. 知识库包含 markdown 内容时,按 markdown 格式返回。 我的问题是:"{{question}}"` }, { title: '标准严格模板', desc: '', - value: `你的背景知识: + value: `你的知识库: """ {{quote}} """ -对话要求: -1. 背景知识是最新的实时的信息,是你的唯一信息来源,使用背景知识回答问题。 -2. 优先使用背景知识回答我的问题,答案与背景知识完全一致,无需做其他回答。 -3. 背景知识与问题无关,或背景知识无法回答本次问题时,则拒绝回答本次问题:“我不太清除xxx”。 -4. 使用对话的风格,自然的回答问题。包含markdown内容,需按markdown格式返回。 +回答要求: +1. 仅使用知识库内容回答问题。 +2. 与知识库无关的问题,你直接回答我不知道。 +3. 不要提及你是从知识库获取的知识。 +4. 知识库包含 markdown 内容时,按 markdown 格式返回。 我的问题是:"{{question}}"` }, { title: '严格问答模板', desc: '', - value: `你的背景知识: + value: `你的知识库: """ {{quote}} """ -对话要求: -1. 背景知识是最新的实时的信息,是你的唯一信息来源,使用背景知识回答问题。 -2. 在背景知识的 JSON 中,question 是相关问题,answer 是已知答案。 -3. 选择 answer 中的内容作为答案,要求答案与 answer 完全一致,无需做其他回答。 -4. answer 中的答案无法满足问题,直接回复:“我不太清除xxx”。 +回答要求: +1. 从知识库中选择一个合适的答案进行回答,其中 instruction 是相关问题,answer 是已知答案。 +2. 与知识库无关的问题,你直接回答我不知道。 +3. 不要提及你是从知识库获取的知识。 我的问题是:"{{question}}"` } ]; diff --git a/projects/app/src/pages/account/components/BillDetail.tsx b/projects/app/src/pages/account/components/BillDetail.tsx index 4357556c7..58759dcfc 100644 --- a/projects/app/src/pages/account/components/BillDetail.tsx +++ b/projects/app/src/pages/account/components/BillDetail.tsx @@ -35,7 +35,7 @@ const BillDetail = ({ bill, onClose }: { bill: BillItemType; onClose: () => void 用户: - {bill.memberName} + {t(bill.memberName)} 订单号: diff --git a/projects/app/src/pages/account/components/Info.tsx b/projects/app/src/pages/account/components/Info.tsx index 47cab4af6..42d838781 100644 --- a/projects/app/src/pages/account/components/Info.tsx +++ b/projects/app/src/pages/account/components/Info.tsx @@ -96,8 +96,8 @@ const UserInfo = () => { try { const src = await compressImgFileAndUpload({ file, - maxW: 100, - maxH: 100 + maxW: 300, + maxH: 300 }); onclickSave({ diff --git a/projects/app/src/pages/api/admin/initv46-2.ts b/projects/app/src/pages/api/admin/initv46-2.ts index cbb1f83ab..f0831cdb3 100644 --- a/projects/app/src/pages/api/admin/initv46-2.ts +++ b/projects/app/src/pages/api/admin/initv46-2.ts @@ -1,7 +1,7 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; -import { delay } from '@/utils/tools'; +import { delay } from '@fastgpt/global/common/system/utils'; import { PgClient } from '@fastgpt/service/common/pg'; import { DatasetDataIndexTypeEnum, diff --git a/projects/app/src/pages/api/admin/initv46-fix.ts b/projects/app/src/pages/api/admin/initv46-fix.ts index 1aeb05f2a..6703eee3a 100644 --- a/projects/app/src/pages/api/admin/initv46-fix.ts +++ b/projects/app/src/pages/api/admin/initv46-fix.ts @@ -1,12 +1,9 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; -import { delay } from '@/utils/tools'; +import { delay } from '@fastgpt/global/common/system/utils'; import { PgClient } from '@fastgpt/service/common/pg'; -import { - DatasetDataIndexTypeEnum, - PgDatasetTableName -} from '@fastgpt/global/core/dataset/constant'; +import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant'; import { authCert } from '@fastgpt/service/support/permission/auth/common'; import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema'; diff --git a/projects/app/src/pages/api/admin/initv46.ts b/projects/app/src/pages/api/admin/initv46.ts index e9f3dd822..9ed582bde 100644 --- a/projects/app/src/pages/api/admin/initv46.ts +++ b/projects/app/src/pages/api/admin/initv46.ts @@ -8,7 +8,7 @@ import { } from '@fastgpt/service/support/user/team/controller'; import { MongoUser } from '@fastgpt/service/support/user/schema'; import { UserModelSchema } from '@fastgpt/global/support/user/type'; -import { delay } from '@/utils/tools'; +import { delay } from '@fastgpt/global/common/system/utils'; import { MongoDataset } from '@fastgpt/service/core/dataset/schema'; import { PermissionTypeEnum } from '@fastgpt/global/support/permission/constant'; import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; diff --git a/projects/app/src/pages/api/admin/initv462-2.ts b/projects/app/src/pages/api/admin/initv462-2.ts index 8212c0d04..cdebbb709 100644 --- a/projects/app/src/pages/api/admin/initv462-2.ts +++ b/projects/app/src/pages/api/admin/initv462-2.ts @@ -1,7 +1,7 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; -import { delay } from '@/utils/tools'; +import { delay } from '@fastgpt/global/common/system/utils'; import { authCert } from '@fastgpt/service/support/permission/auth/common'; import { MongoApp } from '@fastgpt/service/core/app/schema'; import { FlowNodeInputTypeEnum, FlowNodeTypeEnum } from '@fastgpt/global/core/module/node/constant'; diff --git a/projects/app/src/pages/api/admin/initv462.ts b/projects/app/src/pages/api/admin/initv462.ts index d734588ca..f9616ca42 100644 --- a/projects/app/src/pages/api/admin/initv462.ts +++ b/projects/app/src/pages/api/admin/initv462.ts @@ -1,7 +1,7 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; -import { delay } from '@/utils/tools'; +import { delay } from '@fastgpt/global/common/system/utils'; import { authCert } from '@fastgpt/service/support/permission/auth/common'; import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema'; import { jiebaSplit } from '@/service/core/dataset/utils'; @@ -17,10 +17,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) console.log( 'total', - await MongoDatasetData.countDocuments({ fullTextToken: { $exists: false } }) + await MongoDatasetData.countDocuments({ + fullTextToken: { $exists: false }, + updateTime: { $lt: new Date() } + }) ); - await initFullTextToken(limit); + await initFullTextToken(limit, new Date()); jsonRes(res, { message: 'success' @@ -34,9 +37,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) }); } } -export async function initFullTextToken(limit = 50): Promise { +export async function initFullTextToken(limit = 50, endDate: Date): Promise { try { - const dataList = await MongoDatasetData.find({ fullTextToken: { $exists: false } }, '_id q a') + const dataList = await MongoDatasetData.find( + { fullTextToken: { $exists: false }, updateTime: { $lt: endDate } }, + '_id q a' + ) .limit(limit) .lean(); if (dataList.length === 0) return; @@ -56,9 +62,9 @@ export async function initFullTextToken(limit = 50): Promise { success += result.filter((item) => item.status === 'fulfilled').length; console.log(`success: ${success}`); - return initFullTextToken(limit); + return initFullTextToken(limit, endDate); } catch (error) { await delay(1000); - return initFullTextToken(limit); + return initFullTextToken(limit, endDate); } } diff --git a/projects/app/src/pages/api/admin/initv463-2.ts b/projects/app/src/pages/api/admin/initv463-2.ts new file mode 100644 index 000000000..1681f98f5 --- /dev/null +++ b/projects/app/src/pages/api/admin/initv463-2.ts @@ -0,0 +1,62 @@ +import type { NextApiRequest, NextApiResponse } from 'next'; +import { jsonRes } from '@fastgpt/service/common/response'; +import { connectToDatabase } from '@/service/mongo'; +import { delay } from '@fastgpt/global/common/system/utils'; +import { authCert } from '@fastgpt/service/support/permission/auth/common'; +import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema'; +import { jiebaSplit } from '@/service/core/dataset/utils'; + +let success = 0; +/* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */ +export default async function handler(req: NextApiRequest, res: NextApiResponse) { + try { + const { limit = 50 } = req.body as { limit: number }; + await authCert({ req, authRoot: true }); + await connectToDatabase(); + success = 0; + + console.log('total', await MongoDatasetData.countDocuments({ inited: { $exists: false } })); + + await initFullTextToken(limit); + + jsonRes(res, { + message: 'success' + }); + } catch (error) { + console.log(error); + + jsonRes(res, { + code: 500, + error + }); + } +} +export async function initFullTextToken(limit = 50): Promise { + try { + const dataList = await MongoDatasetData.find({ inited: { $exists: false } }, '_id q a') + .limit(limit) + .lean(); + if (dataList.length === 0) return; + + const result = await Promise.allSettled( + dataList.map((item) => { + const text = item.q + (item.a || ''); + const tokens = jiebaSplit({ text }); + + return MongoDatasetData.findByIdAndUpdate(item._id, { + $set: { + inited: true, + fullTextToken: tokens + } + }); + }) + ); + + success += result.filter((item) => item.status === 'fulfilled').length; + console.log(`success: ${success}`); + return initFullTextToken(limit); + } catch (error) { + await delay(1000); + return initFullTextToken(limit); + } +} diff --git a/projects/app/src/pages/api/admin/initv463.ts b/projects/app/src/pages/api/admin/initv463.ts index b64423c49..446d4f69f 100644 --- a/projects/app/src/pages/api/admin/initv463.ts +++ b/projects/app/src/pages/api/admin/initv463.ts @@ -4,7 +4,8 @@ import { connectToDatabase } from '@/service/mongo'; import { authCert } from '@fastgpt/service/support/permission/auth/common'; import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema'; import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; -import { TrainingModeEnum } from '@fastgpt/global/core/dataset/constant'; +import { DatasetStatusEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constant'; +import { MongoDataset } from '@fastgpt/service/core/dataset/schema'; let success = 0; /* pg 中的数据搬到 mongo dataset.datas 中,并做映射 */ @@ -15,32 +16,85 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) await connectToDatabase(); success = 0; - await MongoDatasetCollection.updateMany({}, [ + await MongoDatasetCollection.updateMany({ createTime: { $exists: false } }, [ + { + $set: { + createTime: '$updateTime' + } + } + ]); + await MongoDatasetCollection.updateMany({ trainingType: { $exists: false } }, [ { $set: { - createTime: '$updateTime', trainingType: { $cond: { if: { $ifNull: ['$a', false] }, then: TrainingModeEnum.qa, else: TrainingModeEnum.chunk } - }, - chunkSize: 0, - fileId: '$metadata.fileId', + } + } + } + ]); + await MongoDatasetCollection.updateMany({ chunkSize: { $exists: false } }, [ + { + $set: { + chunkSize: 0 + } + } + ]); + await MongoDatasetCollection.updateMany({ fileId: { $exists: false } }, [ + { + $set: { + fileId: '$metadata.fileId' + } + } + ]); + await MongoDatasetCollection.updateMany({ rawLink: { $exists: false } }, [ + { + $set: { rawLink: '$metadata.rawLink' } } ]); await MongoDatasetData.updateMany( - {}, + { chunkIndex: { $exists: false } }, + { + chunkIndex: 0 + } + ); + await MongoDatasetData.updateMany( + { updateTime: { $exists: false } }, { - chunkIndex: 0, updateTime: new Date() } ); + await MongoDataset.updateMany( + { status: { $exists: false } }, + { + $set: { + status: DatasetStatusEnum.active + } + } + ); + + // dataset tags to intro + await MongoDataset.updateMany({ tags: { $exists: true } }, [ + { + $set: { + intro: { + $reduce: { + input: '$tags', + initialValue: '', + in: { $concat: ['$$value', ' ', '$$this'] } + } + } + } + } + ]); + jsonRes(res, { message: 'success' }); diff --git a/projects/app/src/pages/api/admin/timeTasks/checkUnValidDatasetFiles.ts b/projects/app/src/pages/api/admin/timeTasks/checkUnValidDatasetFiles.ts new file mode 100644 index 000000000..ed256c154 --- /dev/null +++ b/projects/app/src/pages/api/admin/timeTasks/checkUnValidDatasetFiles.ts @@ -0,0 +1,92 @@ +import type { NextApiRequest, NextApiResponse } from 'next'; +import { jsonRes } from '@fastgpt/service/common/response'; +import { connectToDatabase } from '@/service/mongo'; +import { authCert } from '@fastgpt/service/support/permission/auth/common'; +import { delFileById, getGFSCollection } from '@fastgpt/service/common/file/gridfs/controller'; +import { addLog } from '@fastgpt/service/common/mongo/controller'; +import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; +import { delay } from '@fastgpt/global/common/system/utils'; + +/* + check dataset.files data. If there is no match in dataset.collections, delete it +*/ +let deleteFileAmount = 0; + +export default async function handler(req: NextApiRequest, res: NextApiResponse) { + try { + const { + startDay = 10, + endDay = 3, + limit = 30 + } = req.body as { startDay?: number; endDay?: number; limit?: number }; + await authCert({ req, authRoot: true }); + await connectToDatabase(); + + // start: now - maxDay, end: now - 3 day + const start = new Date(Date.now() - startDay * 24 * 60 * 60 * 1000); + const end = new Date(Date.now() - endDay * 24 * 60 * 60 * 1000); + deleteFileAmount = 0; + + checkFiles(start, end, limit); + + jsonRes(res, { + message: 'success' + }); + } catch (error) { + addLog.error(`check valid dataset files error`, error); + + jsonRes(res, { + code: 500, + error + }); + } +} + +export async function checkFiles(start: Date, end: Date, limit: number) { + const collection = getGFSCollection('dataset'); + const where = { + uploadDate: { $gte: start, $lte: end } + }; + + // 1. get all _id + const ids = await collection + .find(where, { + projection: { + _id: 1 + } + }) + .toArray(); + console.log('total files', ids.length); + + for (let i = 0; i < limit; i++) { + check(i); + } + + async function check(index: number): Promise { + const id = ids[index]; + if (!id) { + console.log(`检测完成,共删除 ${deleteFileAmount} 个无效文件`); + + return; + } + try { + const { _id } = id; + + // 2. find fileId in dataset.collections + const hasCollection = await MongoDatasetCollection.countDocuments({ fileId: _id }); + + // 3. if not found, delete file + if (hasCollection === 0) { + await delFileById({ bucketName: 'dataset', fileId: String(_id) }); + console.log('delete file', _id); + deleteFileAmount++; + } + index % 100 === 0 && console.log(index); + return check(index + limit); + } catch (error) { + console.log(error); + await delay(2000); + return check(index); + } + } +} diff --git a/projects/app/src/pages/api/common/file/read.ts b/projects/app/src/pages/api/common/file/read.ts index 57d3457b7..576e3ead8 100644 --- a/projects/app/src/pages/api/common/file/read.ts +++ b/projects/app/src/pages/api/common/file/read.ts @@ -2,8 +2,8 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; import { authFileToken } from '@fastgpt/service/support/permission/controller'; -import jschardet from 'jschardet'; -import { getDownloadBuf, getFileById } from '@fastgpt/service/common/file/gridfs/controller'; +import { detect } from 'jschardet'; +import { getDownloadStream, getFileById } from '@fastgpt/service/common/file/gridfs/controller'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -11,24 +11,43 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< const { token } = req.query as { token: string }; - const { fileId, teamId, tmbId, bucketName } = await authFileToken(token); + const { fileId, bucketName } = await authFileToken(token); if (!fileId) { throw new Error('fileId is empty'); } - const [file, buffer] = await Promise.all([ + const [file, encodeStream] = await Promise.all([ getFileById({ bucketName, fileId }), - getDownloadBuf({ bucketName, fileId }) + getDownloadStream({ bucketName, fileId }) ]); - const encoding = jschardet.detect(buffer)?.encoding; + // get encoding + let buffers: Buffer = Buffer.from([]); + for await (const chunk of encodeStream) { + buffers = Buffer.concat([buffers, chunk]); + if (buffers.length > 10) { + encodeStream.abort(); + break; + } + } + + const encoding = detect(buffers)?.encoding || 'utf-8'; res.setHeader('Content-Type', `${file.contentType}; charset=${encoding}`); res.setHeader('Cache-Control', 'public, max-age=3600'); res.setHeader('Content-Disposition', `inline; filename="${encodeURIComponent(file.filename)}"`); - res.end(buffer); + const fileStream = await getDownloadStream({ bucketName, fileId }); + + fileStream.pipe(res); + + fileStream.on('error', () => { + res.status(500).end(); + }); + fileStream.on('end', () => { + res.end(); + }); } catch (error) { jsonRes(res, { code: 500, diff --git a/projects/app/src/pages/api/core/chat/chatTest.ts b/projects/app/src/pages/api/core/chat/chatTest.ts index 4c85519d2..0f9169c2a 100644 --- a/projects/app/src/pages/api/core/chat/chatTest.ts +++ b/projects/app/src/pages/api/core/chat/chatTest.ts @@ -52,12 +52,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) /* start process */ const { responseData } = await dispatchModules({ res, - appId, - modules, - variables, teamId, tmbId, user, + appId, + modules, + variables, params: { history, userChatInput: prompt diff --git a/projects/app/src/pages/api/core/dataset/allDataset.ts b/projects/app/src/pages/api/core/dataset/allDataset.ts index fd7b0deaa..e6dc77c32 100644 --- a/projects/app/src/pages/api/core/dataset/allDataset.ts +++ b/projects/app/src/pages/api/core/dataset/allDataset.ts @@ -2,10 +2,11 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; import { MongoDataset } from '@fastgpt/service/core/dataset/schema'; -import { getQAModel, getVectorModel } from '@/service/core/ai/model'; -import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d'; +import { getVectorModel } from '@/service/core/ai/model'; +import type { DatasetListItemType } from '@fastgpt/global/core/dataset/type.d'; import { mongoRPermission } from '@fastgpt/global/support/permission/utils'; import { authUserRole } from '@fastgpt/service/support/permission/auth/user'; +import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant'; /* get all dataset by teamId or tmbId */ export default async function handler(req: NextApiRequest, res: NextApiResponse) { @@ -16,18 +17,23 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< const datasets = await MongoDataset.find({ ...mongoRPermission({ teamId, tmbId, role }), - type: 'dataset' + type: { $ne: DatasetTypeEnum.folder } }).lean(); const data = datasets.map((item) => ({ - ...item, + _id: item._id, + parentId: item.parentId, + avatar: item.avatar, + name: item.name, + intro: item.intro, + type: item.type, + permission: item.permission, vectorModel: getVectorModel(item.vectorModel), - agentModel: getQAModel(item.agentModel), canWrite: String(item.tmbId) === tmbId, isOwner: teamOwner || String(item.tmbId) === tmbId })); - jsonRes(res, { + jsonRes(res, { data }); } catch (err) { diff --git a/projects/app/src/pages/api/core/dataset/collection/create.ts b/projects/app/src/pages/api/core/dataset/collection/create.ts index e0933413d..140d34bb8 100644 --- a/projects/app/src/pages/api/core/dataset/collection/create.ts +++ b/projects/app/src/pages/api/core/dataset/collection/create.ts @@ -4,15 +4,10 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; -import type { CreateDatasetCollectionParams } from '@/global/core/api/datasetReq.d'; -import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; -import { - TrainingModeEnum, - DatasetCollectionTypeEnum, - DatasetCollectionTrainingModeEnum -} from '@fastgpt/global/core/dataset/constant'; +import type { CreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d'; import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user'; import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; +import { createOneCollection } from '@fastgpt/service/core/dataset/collection/controller'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -42,68 +37,3 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< }); } } - -export async function createOneCollection({ - name, - parentId, - datasetId, - type, - trainingType = DatasetCollectionTrainingModeEnum.manual, - chunkSize = 0, - fileId, - rawLink, - teamId, - tmbId -}: CreateDatasetCollectionParams & { teamId: string; tmbId: string }) { - const { _id } = await MongoDatasetCollection.create({ - name, - teamId, - tmbId, - datasetId, - parentId: parentId || null, - type, - trainingType, - chunkSize, - fileId, - rawLink - }); - - // create default collection - if (type === DatasetCollectionTypeEnum.folder) { - await createDefaultCollection({ - datasetId, - parentId: _id, - teamId, - tmbId - }); - } - - return _id; -} - -// create default collection -export function createDefaultCollection({ - name = '手动录入', - datasetId, - parentId, - teamId, - tmbId -}: { - name?: '手动录入' | '手动标注'; - datasetId: string; - parentId?: string; - teamId: string; - tmbId: string; -}) { - return MongoDatasetCollection.create({ - name, - teamId, - tmbId, - datasetId, - parentId, - type: DatasetCollectionTypeEnum.virtual, - trainingType: DatasetCollectionTrainingModeEnum.manual, - chunkSize: 0, - updateTime: new Date('2099') - }); -} diff --git a/projects/app/src/pages/api/core/dataset/collection/delById.ts b/projects/app/src/pages/api/core/dataset/collection/delete.ts similarity index 58% rename from projects/app/src/pages/api/core/dataset/collection/delById.ts rename to projects/app/src/pages/api/core/dataset/collection/delete.ts index 660c13020..0a00a6350 100644 --- a/projects/app/src/pages/api/core/dataset/collection/delById.ts +++ b/projects/app/src/pages/api/core/dataset/collection/delete.ts @@ -1,13 +1,10 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; -import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema'; import { findCollectionAndChild } from '@fastgpt/service/core/dataset/collection/utils'; -import { delDataByCollectionId } from '@/service/core/dataset/data/controller'; -import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; +import { delCollectionRelevantData } from '@fastgpt/service/core/dataset/data/controller'; import { authDatasetCollection } from '@fastgpt/service/support/permission/auth/dataset'; -import { delFileById } from '@fastgpt/service/common/file/gridfs/controller'; -import { BucketNameEnum } from '@fastgpt/global/common/file/constants'; +import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -19,7 +16,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< throw new Error('CollectionIdId is required'); } - const { teamId } = await authDatasetCollection({ + await authDatasetCollection({ req, authToken: true, collectionId, @@ -30,26 +27,12 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< const collections = await findCollectionAndChild(collectionId, '_id metadata'); const delIdList = collections.map((item) => item._id); - // delete training data - await MongoDatasetTraining.deleteMany({ - collectionId: { $in: delIdList }, - teamId + // delete + await delCollectionRelevantData({ + collectionIds: delIdList, + fileIds: collections.map((item) => String(item.metadata?.fileId)).filter(Boolean) }); - // delete pg data - await delDataByCollectionId({ collectionIds: delIdList }); - - // delete file - await Promise.all( - collections.map((collection) => { - if (!collection?.fileId) return; - return delFileById({ - bucketName: BucketNameEnum.dataset, - fileId: collection.fileId - }); - }) - ); - // delete collection await MongoDatasetCollection.deleteMany({ _id: { $in: delIdList } diff --git a/projects/app/src/pages/api/core/dataset/collection/list.ts b/projects/app/src/pages/api/core/dataset/collection/list.ts index 91cb0878d..e1c0f0024 100644 --- a/projects/app/src/pages/api/core/dataset/collection/list.ts +++ b/projects/app/src/pages/api/core/dataset/collection/list.ts @@ -6,7 +6,10 @@ import { Types } from '@fastgpt/service/common/mongo'; import type { DatasetCollectionsListItemType } from '@/global/core/dataset/type.d'; import type { GetDatasetCollectionsProps } from '@/global/core/api/datasetReq'; import { PagingData } from '@/types'; -import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; +import { + DatasetColCollectionName, + MongoDatasetCollection +} from '@fastgpt/service/core/dataset/collection/schema'; import { DatasetCollectionTypeEnum } from '@fastgpt/global/core/dataset/constant'; import { startQueue } from '@/service/utils/tools'; import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; @@ -45,7 +48,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< // not count data amount if (simple) { - const collections = await MongoDatasetCollection.find(match, '_id name type parentId') + const collections = await MongoDatasetCollection.find(match, '_id parentId type name') .sort({ updateTime: -1 }) @@ -72,6 +75,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< { $match: match }, + // count training data { $lookup: { from: DatasetTrainingCollectionName, @@ -89,6 +93,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< as: 'trainings' } }, + // count collection total data { $lookup: { from: DatasetDataCollectionName, @@ -106,7 +111,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< as: 'datas' } }, - // 统计子集合的数量和子训练的数量 { $project: { _id: 1, @@ -114,6 +118,7 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< tmbId: 1, name: 1, type: 1, + status: 1, updateTime: 1, dataAmount: { $size: '$datas' }, trainingAmount: { $size: '$trainings' }, diff --git a/projects/app/src/pages/api/core/dataset/create.ts b/projects/app/src/pages/api/core/dataset/create.ts index a9305488d..cd37214ce 100644 --- a/projects/app/src/pages/api/core/dataset/create.ts +++ b/projects/app/src/pages/api/core/dataset/create.ts @@ -3,20 +3,20 @@ import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; import { MongoDataset } from '@fastgpt/service/core/dataset/schema'; import type { CreateDatasetParams } from '@/global/core/dataset/api.d'; -import { createDefaultCollection } from './collection/create'; +import { createDefaultCollection } from '@fastgpt/service/core/dataset/collection/controller'; import { authUserNotVisitor } from '@fastgpt/service/support/permission/auth/user'; +import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { await connectToDatabase(); const { + parentId, name, - tags, + type, avatar, vectorModel = global.vectorModels[0].model, - agentModel, - parentId, - type + agentModel } = req.body as CreateDatasetParams; // 凭证校验 @@ -26,7 +26,6 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< name, teamId, tmbId, - tags, vectorModel, agentModel, avatar, @@ -34,11 +33,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< type }); - await createDefaultCollection({ - datasetId: _id, - teamId, - tmbId - }); + if (type === DatasetTypeEnum.dataset) { + await createDefaultCollection({ + datasetId: _id, + teamId, + tmbId + }); + } jsonRes(res, { data: _id }); } catch (err) { diff --git a/projects/app/src/pages/api/core/dataset/data/delete.ts b/projects/app/src/pages/api/core/dataset/data/delete.ts index 8b84afc81..ab4a7d75a 100644 --- a/projects/app/src/pages/api/core/dataset/data/delete.ts +++ b/projects/app/src/pages/api/core/dataset/data/delete.ts @@ -3,7 +3,7 @@ import { jsonRes } from '@fastgpt/service/common/response'; import { withNextCors } from '@fastgpt/service/common/middle/cors'; import { connectToDatabase } from '@/service/mongo'; import { authDatasetData } from '@/service/support/permission/auth/dataset'; -import { deleteDataByDataId } from '@/service/core/dataset/data/controller'; +import { delDatasetDataByDataId } from '@fastgpt/service/core/dataset/data/controller'; export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -19,7 +19,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex // 凭证校验 await authDatasetData({ req, authToken: true, dataId, per: 'w' }); - await deleteDataByDataId(dataId); + await delDatasetDataByDataId(dataId); jsonRes(res, { data: 'success' diff --git a/projects/app/src/pages/api/core/dataset/delete.ts b/projects/app/src/pages/api/core/dataset/delete.ts index ad3035942..6b8f796c6 100644 --- a/projects/app/src/pages/api/core/dataset/delete.ts +++ b/projects/app/src/pages/api/core/dataset/delete.ts @@ -1,13 +1,10 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; -import { MongoDatasetTraining } from '@fastgpt/service/core/dataset/training/schema'; -import { MongoDataset } from '@fastgpt/service/core/dataset/schema'; -import { delDatasetFiles } from '@fastgpt/service/core/dataset/file/controller'; -import { MongoDatasetCollection } from '@fastgpt/service/core/dataset/collection/schema'; import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; -import { delDataByDatasetId } from '@/service/core/dataset/data/controller'; +import { delDatasetRelevantData } from '@fastgpt/service/core/dataset/data/controller'; import { findDatasetIdTreeByTopDatasetId } from '@fastgpt/service/core/dataset/controller'; +import { MongoDataset } from '@fastgpt/service/core/dataset/schema'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -25,21 +22,8 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< const deletedIds = await findDatasetIdTreeByTopDatasetId(id); - // delete training data(There could be a training mission) - await MongoDatasetTraining.deleteMany({ - datasetId: { $in: deletedIds } - }); - // delete all dataset.data and pg data - await delDataByDatasetId({ datasetIds: deletedIds }); - - // delete related files - await delDatasetFiles({ datasetId: id }); - - // delete collections - await MongoDatasetCollection.deleteMany({ - datasetId: { $in: deletedIds } - }); + await delDatasetRelevantData({ datasetIds: deletedIds }); // delete dataset data await MongoDataset.deleteMany({ diff --git a/projects/app/src/pages/api/core/dataset/list.ts b/projects/app/src/pages/api/core/dataset/list.ts index 6392fd9a3..d5a411f32 100644 --- a/projects/app/src/pages/api/core/dataset/list.ts +++ b/projects/app/src/pages/api/core/dataset/list.ts @@ -1,12 +1,12 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; -import { getQAModel, getVectorModel } from '@/service/core/ai/model'; -import type { DatasetItemType } from '@fastgpt/global/core/dataset/type.d'; +import type { DatasetListItemType } from '@fastgpt/global/core/dataset/type.d'; import { DatasetTypeEnum } from '@fastgpt/global/core/dataset/constant'; import { MongoDataset } from '@fastgpt/service/core/dataset/schema'; import { mongoRPermission } from '@fastgpt/global/support/permission/utils'; import { authUserRole } from '@fastgpt/service/support/permission/auth/user'; +import { getVectorModel } from '@/service/core/ai/model'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { @@ -27,16 +27,21 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< .lean(); const data = await Promise.all( - datasets.map(async (item) => ({ - ...item, - vectorModel: getVectorModel(item.vectorModel), - agentModel: getQAModel(item.agentModel), + datasets.map((item) => ({ + _id: item._id, + parentId: item.parentId, + avatar: item.avatar, + name: item.name, + intro: item.intro, + type: item.type, + permission: item.permission, canWrite, - isOwner: teamOwner || String(item.tmbId) === tmbId + isOwner: teamOwner || String(item.tmbId) === tmbId, + vectorModel: getVectorModel(item.vectorModel) })) ); - jsonRes(res, { + jsonRes(res, { data }); } catch (err) { diff --git a/projects/app/src/pages/api/core/dataset/searchTest.ts b/projects/app/src/pages/api/core/dataset/searchTest.ts index 1d4322c09..f420e03f0 100644 --- a/projects/app/src/pages/api/core/dataset/searchTest.ts +++ b/projects/app/src/pages/api/core/dataset/searchTest.ts @@ -44,7 +44,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex }); // push bill - pushGenerateVectorBill({ + const { total } = pushGenerateVectorBill({ teamId, tmbId, tokenLen: tokenLen, @@ -54,11 +54,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex if (apikey) { updateApiKeyUsage({ apikey, - usage: countModelPrice({ - model: dataset.vectorModel, - tokens: tokenLen, - type: ModelTypeEnum.vector - }) + usage: total }); } diff --git a/projects/app/src/pages/api/core/dataset/update.ts b/projects/app/src/pages/api/core/dataset/update.ts index 42bbefe7a..fa2f02714 100644 --- a/projects/app/src/pages/api/core/dataset/update.ts +++ b/projects/app/src/pages/api/core/dataset/update.ts @@ -2,14 +2,14 @@ import type { NextApiRequest, NextApiResponse } from 'next'; import { jsonRes } from '@fastgpt/service/common/response'; import { connectToDatabase } from '@/service/mongo'; import { MongoDataset } from '@fastgpt/service/core/dataset/schema'; -import type { DatasetUpdateParams } from '@/global/core/api/datasetReq.d'; +import type { DatasetUpdateBody } from '@fastgpt/global/core/dataset/api.d'; import { authDataset } from '@fastgpt/service/support/permission/auth/dataset'; export default async function handler(req: NextApiRequest, res: NextApiResponse) { try { await connectToDatabase(); - const { id, parentId, name, avatar, tags, permission, agentModel } = - req.body as DatasetUpdateParams; + const { id, parentId, name, avatar, tags, permission, agentModel, websiteConfig, status } = + req.body as DatasetUpdateBody; if (!id) { throw new Error('缺少参数'); @@ -28,7 +28,9 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse< ...(avatar && { avatar }), ...(tags && { tags }), ...(permission && { permission }), - ...(agentModel && { agentModel: agentModel.model }) + ...(agentModel && { agentModel: agentModel.model }), + ...(websiteConfig && { websiteConfig }), + ...(status && { status }) } ); diff --git a/projects/app/src/pages/api/plugins/urlFetch.ts b/projects/app/src/pages/api/plugins/urlFetch.ts deleted file mode 100644 index 05ec958a3..000000000 --- a/projects/app/src/pages/api/plugins/urlFetch.ts +++ /dev/null @@ -1,73 +0,0 @@ -// pages/api/fetchContent.ts -import { NextApiRequest, NextApiResponse } from 'next'; -import axios from 'axios'; -import { JSDOM } from 'jsdom'; -import { Readability } from '@mozilla/readability'; -import { jsonRes } from '@fastgpt/service/common/response'; -import { authCert } from '@fastgpt/service/support/permission/auth/common'; -import type { FetchResultItem } from '@fastgpt/global/common/plugin/types/pluginRes.d'; -import { simpleText } from '@fastgpt/global/common/string/tools'; -import { connectToDatabase } from '@/service/mongo'; - -export type UrlFetchResponse = FetchResultItem[]; - -const fetchContent = async (req: NextApiRequest, res: NextApiResponse) => { - try { - await connectToDatabase(); - let { urlList = [] } = req.body as { urlList: string[] }; - - if (!urlList || urlList.length === 0) { - throw new Error('urlList is empty'); - } - - await authCert({ req, authToken: true }); - - urlList = urlList.filter((url) => /^(http|https):\/\/[^ "]+$/.test(url)); - - const response = ( - await Promise.allSettled( - urlList.map(async (url) => { - try { - const fetchRes = await axios.get(url, { - timeout: 30000 - }); - - const dom = new JSDOM(fetchRes.data, { - url, - contentType: 'text/html' - }); - - const reader = new Readability(dom.window.document); - const article = reader.parse(); - - const content = article?.textContent || ''; - - return { - url, - content: simpleText(`${article?.title}\n${content}`) - }; - } catch (error) { - return { - url, - content: '' - }; - } - }) - ) - ) - .filter((item) => item.status === 'fulfilled') - .map((item: any) => item.value) - .filter((item) => item.content); - - jsonRes(res, { - data: response - }); - } catch (error: any) { - jsonRes(res, { - code: 500, - error: error - }); - } -}; - -export default fetchContent; diff --git a/projects/app/src/pages/api/support/wallet/bill/createTrainingBill.ts b/projects/app/src/pages/api/support/wallet/bill/createTrainingBill.ts index 9660a50d9..95dffead3 100644 --- a/projects/app/src/pages/api/support/wallet/bill/createTrainingBill.ts +++ b/projects/app/src/pages/api/support/wallet/bill/createTrainingBill.ts @@ -24,13 +24,13 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse) source: BillSourceEnum.training, list: [ { - moduleName: '索引生成', + moduleName: 'wallet.moduleName.index', model: vectorModelData.name, amount: 0, tokenLen: 0 }, { - moduleName: 'QA 拆分', + moduleName: 'wallet.moduleName.qa', model: agentModelData.name, amount: 0, tokenLen: 0 diff --git a/projects/app/src/pages/api/tools/urlFetch.ts b/projects/app/src/pages/api/tools/urlFetch.ts new file mode 100644 index 000000000..50db4c346 --- /dev/null +++ b/projects/app/src/pages/api/tools/urlFetch.ts @@ -0,0 +1,34 @@ +// pages/api/fetchContent.ts +import { NextApiRequest, NextApiResponse } from 'next'; +import { jsonRes } from '@fastgpt/service/common/response'; +import { authCert } from '@fastgpt/service/support/permission/auth/common'; +import { connectToDatabase } from '@/service/mongo'; +import { UrlFetchParams, UrlFetchResponse } from '@fastgpt/global/common/file/api.d'; +import { urlsFetch } from '@fastgpt/global/common/file/tools'; + +const fetchContent = async (req: NextApiRequest, res: NextApiResponse) => { + try { + await connectToDatabase(); + let { urlList = [], selector } = req.body as UrlFetchParams; + + if (!urlList || urlList.length === 0) { + throw new Error('urlList is empty'); + } + + await authCert({ req, authToken: true }); + + jsonRes(res, { + data: await urlsFetch({ + urlList, + selector + }) + }); + } catch (error: any) { + jsonRes(res, { + code: 500, + error: error + }); + } +}; + +export default fetchContent; diff --git a/projects/app/src/pages/api/v1/chat/completions.ts b/projects/app/src/pages/api/v1/chat/completions.ts index 2704ece78..a73d4657a 100644 --- a/projects/app/src/pages/api/v1/chat/completions.ts +++ b/projects/app/src/pages/api/v1/chat/completions.ts @@ -14,10 +14,10 @@ import { getChatHistory } from './getHistory'; import { saveChat } from '@/service/utils/chat/saveChat'; import { responseWrite } from '@fastgpt/service/common/response'; import { pushChatBill } from '@/service/support/wallet/bill/push'; -import { BillSourceEnum } from '@fastgpt/global/support/wallet/bill/constants'; import { authOutLinkChat } from '@/service/support/permission/auth/outLink'; import { pushResult2Remote, updateOutLinkUsage } from '@fastgpt/service/support/outLink/tools'; import requestIp from 'request-ip'; +import { getBillSourceByAuthType } from '@fastgpt/global/support/wallet/bill/tools'; import { selectShareResponse } from '@/utils/service/core/chat'; import { updateApiKeyUsage } from '@fastgpt/service/support/openapi/tools'; @@ -276,11 +276,7 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex appId: app._id, teamId: user.team.teamId, tmbId: user.team.tmbId, - source: (() => { - if (authType === 'apikey') return BillSourceEnum.api; - if (shareId) return BillSourceEnum.shareLink; - return BillSourceEnum.fastgpt; - })(), + source: getBillSourceByAuthType({ shareId, authType }), response: responseData }); diff --git a/projects/app/src/pages/api/v1/embeddings.ts b/projects/app/src/pages/api/v1/embeddings.ts index 8ddec5e3d..d3a0b2338 100644 --- a/projects/app/src/pages/api/v1/embeddings.ts +++ b/projects/app/src/pages/api/v1/embeddings.ts @@ -6,6 +6,8 @@ import { pushGenerateVectorBill } from '@/service/support/wallet/bill/push'; import { connectToDatabase } from '@/service/mongo'; import { authTeamBalance } from '@/service/support/permission/auth/bill'; import { getVectorsByText, GetVectorProps } from '@/service/core/ai/vector'; +import { updateApiKeyUsage } from '@fastgpt/service/support/openapi/tools'; +import { getBillSourceByAuthType } from '@fastgpt/global/support/wallet/bill/tools'; type Props = GetVectorProps & { billId?: string; @@ -15,24 +17,21 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex try { let { input, model, billId } = req.body as Props; await connectToDatabase(); - const { teamId, tmbId } = await authCert({ req, authToken: true, authApiKey: true }); - if (!Array.isArray(input) || typeof input !== 'string') { + if (!Array.isArray(input) && typeof input !== 'string') { throw new Error('input is nor array or string'); } + const { teamId, tmbId, apikey, authType } = await authCert({ + req, + authToken: true, + authApiKey: true + }); + await authTeamBalance(teamId); const { tokenLen, vectors } = await getVectorsByText({ input, model }); - pushGenerateVectorBill({ - teamId, - tmbId, - tokenLen: tokenLen, - model, - billId - }); - jsonRes(res, { data: { object: 'list', @@ -48,6 +47,22 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex } } }); + + const { total } = pushGenerateVectorBill({ + teamId, + tmbId, + tokenLen, + model, + billId, + source: getBillSourceByAuthType({ authType }) + }); + + if (apikey) { + updateApiKeyUsage({ + apikey, + usage: total + }); + } } catch (err) { console.log(err); jsonRes(res, { diff --git a/projects/app/src/pages/api/v1/rerank.ts b/projects/app/src/pages/api/v1/rerank.ts index 43a3b65c2..e31e965b1 100644 --- a/projects/app/src/pages/api/v1/rerank.ts +++ b/projects/app/src/pages/api/v1/rerank.ts @@ -7,12 +7,13 @@ import { connectToDatabase } from '@/service/mongo'; import { authTeamBalance } from '@/service/support/permission/auth/bill'; import { PostReRankProps, PostReRankResponse } from '@fastgpt/global/core/ai/api'; import { reRankRecall } from '@/service/core/ai/rerank'; +import { updateApiKeyUsage } from '@fastgpt/service/support/openapi/tools'; export default withNextCors(async function handler(req: NextApiRequest, res: NextApiResponse) { let { query, inputs } = req.body as PostReRankProps; try { await connectToDatabase(); - const { teamId, tmbId } = await authCert({ + const { teamId, tmbId, apikey } = await authCert({ req, authApiKey: true }); @@ -23,12 +24,19 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex const result = await reRankRecall({ query, inputs }); - pushReRankBill({ + const { total } = pushReRankBill({ teamId, tmbId, source: 'api' }); + if (apikey) { + updateApiKeyUsage({ + apikey, + usage: total + }); + } + jsonRes(res, { data: result }); diff --git a/projects/app/src/pages/app/detail/components/InfoModal.tsx b/projects/app/src/pages/app/detail/components/InfoModal.tsx index 913b365fb..e62301083 100644 --- a/projects/app/src/pages/app/detail/components/InfoModal.tsx +++ b/projects/app/src/pages/app/detail/components/InfoModal.tsx @@ -103,8 +103,8 @@ const InfoModal = ({ try { const src = await compressImgFileAndUpload({ file, - maxW: 100, - maxH: 100 + maxW: 300, + maxH: 300 }); setValue('avatar', src); setRefresh((state) => !state); diff --git a/projects/app/src/pages/app/list/component/CreateModal.tsx b/projects/app/src/pages/app/list/component/CreateModal.tsx index 684c43114..2af403850 100644 --- a/projects/app/src/pages/app/list/component/CreateModal.tsx +++ b/projects/app/src/pages/app/list/component/CreateModal.tsx @@ -60,8 +60,8 @@ const CreateModal = ({ onClose, onSuccess }: { onClose: () => void; onSuccess: ( try { const src = await compressImgFileAndUpload({ file, - maxW: 100, - maxH: 100 + maxW: 300, + maxH: 300 }); setValue('avatar', src); setRefresh((state) => !state); diff --git a/projects/app/src/pages/dataset/detail/components/CollectionCard.tsx b/projects/app/src/pages/dataset/detail/components/CollectionCard.tsx index aa22f2288..da1b1854f 100644 --- a/projects/app/src/pages/dataset/detail/components/CollectionCard.tsx +++ b/projects/app/src/pages/dataset/detail/components/CollectionCard.tsx @@ -11,14 +11,18 @@ import { Tbody, Image, MenuButton, - useDisclosure + useDisclosure, + Button, + Link, + useTheme } from '@chakra-ui/react'; import { getDatasetCollections, delDatasetCollectionById, putDatasetCollectionById, postDatasetCollection, - getDatasetCollectionPathById + getDatasetCollectionPathById, + postWebsiteSync } from '@/web/core/dataset/api'; import { useQuery } from '@tanstack/react-query'; import { debounce } from 'lodash'; @@ -39,7 +43,10 @@ import EmptyTip from '@/components/EmptyTip'; import { FolderAvatarSrc, DatasetCollectionTypeEnum, - TrainingModeEnum + DatasetCollectionTrainingModeEnum, + DatasetTypeEnum, + DatasetTypeMap, + DatasetStatusEnum } from '@fastgpt/global/core/dataset/constant'; import { getCollectionIcon } from '@fastgpt/global/core/dataset/utils'; import EditFolderModal, { useEditFolder } from '../../component/EditFolderModal'; @@ -52,13 +59,18 @@ import { useToast } from '@/web/common/hooks/useToast'; import MyTooltip from '@/components/MyTooltip'; import { useUserStore } from '@/web/support/user/useUserStore'; import { TeamMemberRoleEnum } from '@fastgpt/global/support/user/team/constant'; +import { useDatasetStore } from '@/web/core/dataset/store/dataset'; +import { DatasetSchemaType } from '@fastgpt/global/core/dataset/type'; +import { postCreateTrainingBill } from '@/web/support/wallet/bill/api'; const FileImportModal = dynamic(() => import('./Import/ImportModal'), {}); +const WebSiteConfigModal = dynamic(() => import('./Import/WebsiteConfig'), {}); const CollectionCard = () => { const BoxRef = useRef(null); const lastSearch = useRef(''); const router = useRouter(); + const theme = useTheme(); const { toast } = useToast(); const { parentId = '', datasetId } = router.query as { parentId: string; datasetId: string }; const { t } = useTranslation(); @@ -66,7 +78,7 @@ const CollectionCard = () => { const { isPc } = useSystemStore(); const { userInfo } = useUserStore(); const [searchText, setSearchText] = useState(''); - const { setLoading } = useSystemStore(); + const { datasetDetail, updateDataset, loadDatasetDetail } = useDatasetStore(); const { openConfirm, ConfirmModal } = useConfirm({ content: t('dataset.Confirm to delete the file') @@ -76,11 +88,18 @@ const CollectionCard = () => { onOpen: onOpenFileImportModal, onClose: onCloseFileImportModal } = useDisclosure(); + const { + isOpen: isOpenWebsiteModal, + onOpen: onOpenWebsiteModal, + onClose: onCloseWebsiteModal + } = useDisclosure(); const { onOpenModal: onOpenCreateVirtualFileModal, EditModal: EditCreateVirtualFileModal } = useEditTitle({ title: t('dataset.Create Virtual File'), - tip: t('dataset.Virtual File Tip') + tip: t('dataset.Virtual File Tip'), + canEmpty: false }); + const { onOpenModal: onOpenEditTitleModal, EditModal: EditTitleModal } = useEditTitle({ title: t('Rename') }); @@ -128,48 +147,60 @@ const CollectionCard = () => { () => collections.map((collection) => { const icon = getCollectionIcon(collection.type, collection.name); + const status = (() => { + if (collection.trainingAmount > 0) { + return { + statusText: t('dataset.collections.Collection Embedding', { + total: collection.trainingAmount + }), + color: 'myGray.500' + }; + } + return { + statusText: t('core.dataset.collection.status.active'), + color: 'green.500' + }; + })(); return { ...collection, icon, - ...(collection.trainingAmount > 0 - ? { - statusText: t('dataset.collections.Collection Embedding', { - total: collection.trainingAmount - }), - color: 'myGray.500' - } - : { - statusText: t('dataset.collections.Ready'), - color: 'green.500' - }) + ...status }; }), [collections, t] ); - const hasTrainingData = useMemo( - () => !!formatCollections.find((item) => item.trainingAmount > 0), - [formatCollections] - ); - const { mutate: onCreateVirtualFile } = useRequest({ - mutationFn: ({ name }: { name: string }) => { - setLoading(true); - return postDatasetCollection({ + const { mutate: onCreateCollection, isLoading: isCreating } = useRequest({ + mutationFn: async ({ + name, + type, + callback, + ...props + }: { + name: string; + type: `${DatasetCollectionTypeEnum}`; + callback?: (id: string) => void; + trainingType?: `${DatasetCollectionTrainingModeEnum}`; + rawLink?: string; + chunkSize?: number; + }) => { + const id = await postDatasetCollection({ parentId, datasetId, name, - type: DatasetCollectionTypeEnum.virtual + type, + ...props }); + callback?.(id); + return id; }, onSuccess() { getData(pageNum); }, - onSettled() { - setLoading(false); - }, - successToast: t('dataset.collections.Create Virtual File Success'), - errorToast: t('common.Create Virtual File Failed') + + successToast: t('common.Create Success'), + errorToast: t('common.Create Failed') }); const { mutate: onUpdateCollectionName } = useRequest({ mutationFn: ({ collectionId, name }: { collectionId: string; name: string }) => { @@ -185,9 +216,8 @@ const CollectionCard = () => { successToast: t('common.Rename Success'), errorToast: t('common.Rename Failed') }); - const { mutate: onDelCollection } = useRequest({ + const { mutate: onDelCollection, isLoading: isDeleting } = useRequest({ mutationFn: (collectionId: string) => { - setLoading(true); return delDatasetCollectionById({ collectionId }); @@ -195,26 +225,54 @@ const CollectionCard = () => { onSuccess() { getData(pageNum); }, - onSettled() { - setLoading(false); - }, successToast: t('common.Delete Success'), errorToast: t('common.Delete Failed') }); + const { mutate: onUpdateDatasetWebsiteConfig, isLoading: isUpdating } = useRequest({ + mutationFn: async (websiteConfig: DatasetSchemaType['websiteConfig']) => { + onCloseWebsiteModal(); + const [_, billId] = await Promise.all([ + updateDataset({ + id: datasetDetail._id, + websiteConfig, + status: DatasetStatusEnum.syncing + }), + postCreateTrainingBill({ + name: 'core.dataset.training.Website Sync', + vectorModel: datasetDetail.vectorModel.model, + agentModel: datasetDetail.agentModel.model + }) + ]); + return billId; + }, + onSuccess(billId: string) { + try { + postWebsiteSync({ datasetId: datasetDetail._id, billId }); + } catch (error) {} + }, + errorToast: t('common.Update Failed') + }); const { data: paths = [] } = useQuery(['getDatasetCollectionPathById', parentId], () => getDatasetCollectionPathById(parentId) ); + const hasTrainingData = useMemo( + () => !!formatCollections.find((item) => item.trainingAmount > 0), + [formatCollections] + ); useQuery( ['refreshCollection'], () => { getData(1); + if (datasetDetail.status === DatasetStatusEnum.syncing) { + loadDatasetDetail(datasetId, true); + } return null; }, { refetchInterval: 6000, - enabled: hasTrainingData + enabled: hasTrainingData || datasetDetail.status === DatasetStatusEnum.syncing } ); @@ -224,17 +282,33 @@ const CollectionCard = () => { return ( - + ({ parentId: path.parentId, - parentName: i === paths.length - 1 ? `${path.parentName}(${total})` : path.parentName + parentName: i === paths.length - 1 ? `${path.parentName}` : path.parentName }))} FirstPathDom={ - - {t('common.File')}({total}) - + <> + + {t(DatasetTypeMap[datasetDetail?.type]?.collectionLabel)}({total}) + + {datasetDetail?.websiteConfig?.url && ( + + {t('core.dataset.website.Base Url')}: + + {datasetDetail.websiteConfig.url} + + + )} + } onClick={(e) => { router.replace({ @@ -279,68 +353,109 @@ const CollectionCard = () => { /> )} - {userInfo?.team?.role !== TeamMemberRoleEnum.visitor && ( - - - - {t('dataset.collections.Create And Import')} - - - } - menuList={[ - { - child: ( - - {''} - {t('Folder')} - - ), - onClick: () => setEditFolderData({}) - }, - { - child: ( - - {''} - {t('dataset.Create Virtual File')} - - ), - onClick: () => { - onOpenCreateVirtualFileModal({ - defaultVal: '', - onSuccess: (name) => onCreateVirtualFile({ name }) - }); + {datasetDetail?.type === DatasetTypeEnum.dataset && ( + <> + {userInfo?.team?.role !== TeamMemberRoleEnum.visitor && ( + + + + {t('dataset.collections.Create And Import')} + + } - }, - { - child: ( - - {''} - {t('dataset.File Input')} + menuList={[ + { + child: ( + + {''} + {t('Folder')} + + ), + onClick: () => setEditFolderData({}) + }, + { + child: ( + + {''} + {t('dataset.Create Virtual File')} + + ), + onClick: () => { + onOpenCreateVirtualFileModal({ + defaultVal: '', + onSuccess: (name) => { + onCreateCollection({ name, type: DatasetCollectionTypeEnum.virtual }); + } + }); + } + }, + { + child: ( + + {''} + {t('dataset.File Input')} + + ), + onClick: onOpenFileImportModal + } + ]} + /> + )} + + )} + {datasetDetail?.type === DatasetTypeEnum.websiteDataset && ( + <> + {datasetDetail?.websiteConfig?.url ? ( + + {datasetDetail.status === DatasetStatusEnum.active && ( + + )} + {datasetDetail.status === DatasetStatusEnum.syncing && ( + + + + {t('core.dataset.status.syncing')} + - ), - onClick: onOpenFileImportModal - } - ]} - /> + )} + + ) : ( + + )} + )} @@ -428,11 +543,7 @@ const CollectionCard = () => { - - {collection.type === DatasetCollectionTypeEnum.folder - ? '-' - : collection.dataAmount} - + {collection.dataAmount || '-'} {dayjs(collection.updateTime).format('YYYY/MM/DD HH:mm')} { h: '10px', mr: 2, borderRadius: 'lg', - bg: collection?.color + bg: collection.color }} > - {collection?.statusText} + {t(collection.statusText)} e.stopPropagation()}> @@ -536,14 +647,31 @@ const CollectionCard = () => { ))} - {total > pageSize && ( )} - {total === 0 && } + {total === 0 && ( + + {t('core.dataset.collection.Website Empty Tip')} + + {t('core.dataset.collection.Click top config website')} + + + ) + } + /> + )} + @@ -559,7 +687,6 @@ const CollectionCard = () => { onClose={onCloseFileImportModal} /> )} - {!!editFolderData && ( setEditFolderData(undefined)} @@ -570,15 +697,13 @@ const CollectionCard = () => { id: editFolderData.id, name }); + getData(pageNum); } else { - await postDatasetCollection({ - parentId, - datasetId, + onCreateCollection({ name, type: DatasetCollectionTypeEnum.folder }); } - getData(pageNum); } catch (error) { return Promise.reject(error); } @@ -587,7 +712,6 @@ const CollectionCard = () => { name={editFolderData.name} /> )} - {!!moveCollectionData && ( { }} /> )} + {isOpenWebsiteModal && ( + + )} ); }; diff --git a/projects/app/src/pages/dataset/detail/components/DataCard.tsx b/projects/app/src/pages/dataset/detail/components/DataCard.tsx index 7c5ac3968..740c2756e 100644 --- a/projects/app/src/pages/dataset/detail/components/DataCard.tsx +++ b/projects/app/src/pages/dataset/detail/components/DataCard.tsx @@ -55,7 +55,10 @@ const DataCard = () => { const router = useRouter(); const { userInfo } = useUserStore(); const { isPc } = useSystemStore(); - const { collectionId = '' } = router.query as { collectionId: string }; + const { collectionId = '', datasetId } = router.query as { + collectionId: string; + datasetId: string; + }; const { Loading, setIsLoading } = useLoading({ defaultLoading: true }); const { t } = useTranslation(); const [searchText, setSearchText] = useState(''); @@ -99,8 +102,18 @@ const DataCard = () => { ); // get file info - const { data: collection } = useQuery(['getDatasetCollectionById', collectionId], () => - getDatasetCollectionById(collectionId) + const { data: collection } = useQuery( + ['getDatasetCollectionById', collectionId], + () => getDatasetCollectionById(collectionId), + { + onError: () => { + router.replace({ + query: { + datasetId + } + }); + } + } ); const canWrite = useMemo( @@ -290,6 +303,7 @@ const DataCard = () => { import('./UrlFetchModal')); const CreateFileModal = dynamic(() => import('./CreateFileModal')); @@ -215,7 +215,7 @@ const FileSelect = ({ ); // link fetch const onUrlFetch = useCallback( - (e: FetchResultItem[]) => { + (e: UrlFetchResponse) => { const result: FileItemType[] = e.map(({ url, content }) => { const splitRes = splitText2Chunks({ text: content, diff --git a/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx b/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx index 8eac380cd..1eef313a1 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/Provider.tsx @@ -188,6 +188,8 @@ const Provider = ({ const onReSplitChunks = useCallback(async () => { try { + setPreviewFile(undefined); + setFiles((state) => state.map((file) => { const splitRes = splitText2Chunks({ @@ -490,6 +492,7 @@ export const SelectorContainer = ({ display={['block', 'none']} onClick={(e) => { e.stopPropagation(); + setPreviewFile(undefined); setFiles((state) => state.filter((file) => file.id !== item.id)); }} /> diff --git a/projects/app/src/pages/dataset/detail/components/Import/UrlFetchModal.tsx b/projects/app/src/pages/dataset/detail/components/Import/UrlFetchModal.tsx index ce33ff735..ea111489f 100644 --- a/projects/app/src/pages/dataset/detail/components/Import/UrlFetchModal.tsx +++ b/projects/app/src/pages/dataset/detail/components/Import/UrlFetchModal.tsx @@ -1,31 +1,39 @@ -import React, { useRef } from 'react'; +import React from 'react'; import { useTranslation } from 'next-i18next'; import MyModal from '@/components/MyModal'; -import { Box, Button, ModalBody, ModalFooter, Textarea } from '@chakra-ui/react'; -import type { FetchResultItem } from '@fastgpt/global/common/plugin/types/pluginRes.d'; +import { Box, Button, Input, ModalBody, ModalFooter, Textarea } from '@chakra-ui/react'; import { useRequest } from '@/web/common/hooks/useRequest'; -import { postFetchUrls } from '@/web/common/plugin/api'; +import { postFetchUrls } from '@/web/common/tools/api'; +import { useForm } from 'react-hook-form'; +import { UrlFetchResponse } from '@fastgpt/global/common/file/api.d'; const UrlFetchModal = ({ onClose, onSuccess }: { onClose: () => void; - onSuccess: (e: FetchResultItem[]) => void; + onSuccess: (e: UrlFetchResponse) => void; }) => { const { t } = useTranslation(); - const Dom = useRef(null); + const { register, handleSubmit } = useForm({ + defaultValues: { + urls: '', + selector: '' + } + }); const { mutate, isLoading } = useRequest({ - mutationFn: async () => { - const val = Dom.current?.value || ''; - const urls = val.split('\n').filter((e) => e); - const res = await postFetchUrls(urls); + mutationFn: async ({ urls, selector }: { urls: string; selector: string }) => { + const urlList = urls.split('\n').filter((e) => e); + const res = await postFetchUrls({ + urlList, + selector + }); onSuccess(res); onClose(); }, - errorToast: '获取链接失败' + errorToast: t('core.dataset.import.Fetch Error') }); return ( @@ -34,8 +42,8 @@ const UrlFetchModal = ({ title={ {t('file.Fetch Url')} - - 目前仅支持读取静态链接,请注意检查结果 + + {t('core.dataset.import.Fetch url tip')} } @@ -45,20 +53,31 @@ const UrlFetchModal = ({ w={'600px'} > -