perf: password special chars;feat: llm paragraph;perf: chunk setting params;perf: text splitter worker (#4984)

* perf: password special chars

* feat: llm paragraph;perf: chunk setting params

* perf: text splitter worker

* perf: get rawtext buffer

* fix: test

* fix: test

* doc

* min chunk size
This commit is contained in:
Archer
2025-06-10 00:05:54 +08:00
committed by GitHub
parent 068918a9ee
commit 01ff56b42b
41 changed files with 546 additions and 448 deletions

View File

@@ -20,8 +20,10 @@
"export_title": "時間,成員,類型,項目名,AI 積分消耗",
"feishu": "飛書",
"generation_time": "生成時間",
"image_index": "圖片索引",
"image_parse": "圖片標註",
"input_token_length": "輸入 tokens",
"llm_paragraph": "模型分段",
"mcp": "MCP 調用",
"member": "成員",
"member_name": "成員名",

View File

@@ -44,6 +44,7 @@
"core.dataset.import.Adjust parameters": "調整參數",
"custom_data_process_params": "自訂",
"custom_data_process_params_desc": "自訂資料處理規則",
"custom_split_char": "分隔符",
"custom_split_sign_tip": "允許你根據自定義的分隔符進行分塊。\n通常用於已處理好的資料使用特定的分隔符來精確分塊。\n可以使用 | 符號表示多個分割符,例如:“。|.”表示中英文句號。\n\n盡量避免使用正則相關特殊符號例如* () [] {} 等。",
"data_amount": "{{dataAmount}} 組資料,{{indexAmount}} 組索引",
"data_error_amount": "{{errorAmount}} 組訓練異常",
@@ -116,6 +117,11 @@
"insert_images_success": "新增圖片成功,需等待訓練完成才會展示",
"is_open_schedule": "啟用定時同步",
"keep_image": "保留圖片",
"llm_paragraph_mode": "模型識別段落(Beta)",
"llm_paragraph_mode_auto": "自動",
"llm_paragraph_mode_auto_desc": "當文件內容不包含 Markdown 標題時,啟用模型自動識別標題。",
"llm_paragraph_mode_forbid": "禁用",
"llm_paragraph_mode_forbid_desc": "強制禁用模型自動識別段落",
"loading": "加載中...",
"max_chunk_size": "最大分塊大小",
"move.hint": "移動後,所選資料集/資料夾將繼承新資料夾的權限設定,原先的權限設定將失效。",