mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-27 00:17:31 +00:00
perf: password special chars;feat: llm paragraph;perf: chunk setting params;perf: text splitter worker (#4984)
* perf: password special chars * feat: llm paragraph;perf: chunk setting params * perf: text splitter worker * perf: get rawtext buffer * fix: test * fix: test * doc * min chunk size
This commit is contained in:
@@ -151,8 +151,7 @@ const MySelect = <T = any,>(
|
||||
? {
|
||||
ref: SelectedItemRef,
|
||||
color: 'primary.700',
|
||||
bg: 'myGray.100',
|
||||
fontWeight: '600'
|
||||
bg: 'myGray.100'
|
||||
}
|
||||
: {
|
||||
color: 'myGray.900'
|
||||
@@ -167,7 +166,7 @@ const MySelect = <T = any,>(
|
||||
display={'block'}
|
||||
mb={0.5}
|
||||
>
|
||||
<Flex alignItems={'center'}>
|
||||
<Flex alignItems={'center'} fontWeight={value === item.value ? '600' : 'normal'}>
|
||||
{item.icon && (
|
||||
<Avatar mr={2} src={item.icon as any} w={item.iconSize ?? '1rem'} />
|
||||
)}
|
||||
|
@@ -20,8 +20,10 @@
|
||||
"export_title": "Time,Members,Type,Project name,AI points",
|
||||
"feishu": "Feishu",
|
||||
"generation_time": "Generation time",
|
||||
"image_index": "Image index",
|
||||
"image_parse": "Image tagging",
|
||||
"input_token_length": "input tokens",
|
||||
"llm_paragraph": "LLM segmentation",
|
||||
"mcp": "MCP call",
|
||||
"member": "member",
|
||||
"member_name": "Member name",
|
||||
|
@@ -45,6 +45,7 @@
|
||||
"core.dataset.import.Adjust parameters": "Adjust parameters",
|
||||
"custom_data_process_params": "Custom",
|
||||
"custom_data_process_params_desc": "Customize data processing rules",
|
||||
"custom_split_char": "Char",
|
||||
"custom_split_sign_tip": "Allows you to chunk according to custom delimiters. \nUsually used for processed data, using specific separators for precise chunking. \nYou can use the | symbol to represent multiple splitters, such as: \".|.\" to represent a period in Chinese and English.\n\nTry to avoid using special symbols related to regular, such as: * () [] {}, etc.",
|
||||
"data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes",
|
||||
"data_error_amount": "{{errorAmount}} Group training exception",
|
||||
@@ -117,6 +118,11 @@
|
||||
"insert_images_success": "The new picture is successfully added, and you need to wait for the training to be completed before it will be displayed.",
|
||||
"is_open_schedule": "Enable scheduled synchronization",
|
||||
"keep_image": "Keep the picture",
|
||||
"llm_paragraph_mode": "LLM recognition paragraph(Beta)",
|
||||
"llm_paragraph_mode_auto": "automatic",
|
||||
"llm_paragraph_mode_auto_desc": "Enable the model to automatically recognize the title when the file content does not contain a Markdown title.",
|
||||
"llm_paragraph_mode_forbid": "Disabled",
|
||||
"llm_paragraph_mode_forbid_desc": "Force the disabling of the model's automatic paragraph recognition",
|
||||
"loading": "Loading...",
|
||||
"max_chunk_size": "Maximum chunk size",
|
||||
"move.hint": "After moving, the selected knowledge base/folder will inherit the permission settings of the new folder, and the original permission settings will become invalid.",
|
||||
|
@@ -20,8 +20,10 @@
|
||||
"export_title": "时间,成员,类型,项目名,AI 积分消耗",
|
||||
"feishu": "飞书",
|
||||
"generation_time": "生成时间",
|
||||
"image_index": "图片索引",
|
||||
"image_parse": "图片标注",
|
||||
"input_token_length": "输入 tokens",
|
||||
"llm_paragraph": "模型分段",
|
||||
"mcp": "MCP 调用",
|
||||
"member": "成员",
|
||||
"member_name": "成员名",
|
||||
|
@@ -45,6 +45,7 @@
|
||||
"core.dataset.import.Adjust parameters": "调整参数",
|
||||
"custom_data_process_params": "自定义",
|
||||
"custom_data_process_params_desc": "自定义设置数据处理规则",
|
||||
"custom_split_char": "分隔符",
|
||||
"custom_split_sign_tip": "允许你根据自定义的分隔符进行分块。通常用于已处理好的数据,使用特定的分隔符来精确分块。可以使用 | 符号表示多个分割符,例如:“。|.” 表示中英文句号。\n尽量避免使用正则相关特殊符号,例如: * () [] {} 等。",
|
||||
"data_amount": "{{dataAmount}} 组数据, {{indexAmount}} 组索引",
|
||||
"data_error_amount": "{{errorAmount}} 组训练异常",
|
||||
@@ -117,6 +118,11 @@
|
||||
"insert_images_success": "新增图片成功,需等待训练完成才会展示",
|
||||
"is_open_schedule": "启用定时同步",
|
||||
"keep_image": "保留图片",
|
||||
"llm_paragraph_mode": "模型识别段落(Beta)",
|
||||
"llm_paragraph_mode_auto": "自动",
|
||||
"llm_paragraph_mode_auto_desc": "当文件内容不包含 Markdown 标题时,启用模型自动识别标题。",
|
||||
"llm_paragraph_mode_forbid": "禁用",
|
||||
"llm_paragraph_mode_forbid_desc": "强制禁用模型自动识别段落",
|
||||
"loading": "加载中...",
|
||||
"max_chunk_size": "最大分块大小",
|
||||
"move.hint": "移动后,所选知识库/文件夹将继承新文件夹的权限设置,原先的权限设置失效。",
|
||||
|
@@ -20,8 +20,10 @@
|
||||
"export_title": "時間,成員,類型,項目名,AI 積分消耗",
|
||||
"feishu": "飛書",
|
||||
"generation_time": "生成時間",
|
||||
"image_index": "圖片索引",
|
||||
"image_parse": "圖片標註",
|
||||
"input_token_length": "輸入 tokens",
|
||||
"llm_paragraph": "模型分段",
|
||||
"mcp": "MCP 調用",
|
||||
"member": "成員",
|
||||
"member_name": "成員名",
|
||||
|
@@ -44,6 +44,7 @@
|
||||
"core.dataset.import.Adjust parameters": "調整參數",
|
||||
"custom_data_process_params": "自訂",
|
||||
"custom_data_process_params_desc": "自訂資料處理規則",
|
||||
"custom_split_char": "分隔符",
|
||||
"custom_split_sign_tip": "允許你根據自定義的分隔符進行分塊。\n通常用於已處理好的資料,使用特定的分隔符來精確分塊。\n可以使用 | 符號表示多個分割符,例如:“。|.”表示中英文句號。\n\n盡量避免使用正則相關特殊符號,例如:* () [] {} 等。",
|
||||
"data_amount": "{{dataAmount}} 組資料,{{indexAmount}} 組索引",
|
||||
"data_error_amount": "{{errorAmount}} 組訓練異常",
|
||||
@@ -116,6 +117,11 @@
|
||||
"insert_images_success": "新增圖片成功,需等待訓練完成才會展示",
|
||||
"is_open_schedule": "啟用定時同步",
|
||||
"keep_image": "保留圖片",
|
||||
"llm_paragraph_mode": "模型識別段落(Beta)",
|
||||
"llm_paragraph_mode_auto": "自動",
|
||||
"llm_paragraph_mode_auto_desc": "當文件內容不包含 Markdown 標題時,啟用模型自動識別標題。",
|
||||
"llm_paragraph_mode_forbid": "禁用",
|
||||
"llm_paragraph_mode_forbid_desc": "強制禁用模型自動識別段落",
|
||||
"loading": "加載中...",
|
||||
"max_chunk_size": "最大分塊大小",
|
||||
"move.hint": "移動後,所選資料集/資料夾將繼承新資料夾的權限設定,原先的權限設定將失效。",
|
||||
|
Reference in New Issue
Block a user