feat: chunk index independent config (#4271)

* sync collection

* remove lock

* feat: chunk index independent config

* feat: add max chunksize to split chunk function

* remove log

* update doc

* remove

* remove log
This commit is contained in:
Archer
2025-03-21 16:44:25 +08:00
committed by archer
parent 222ff0d49a
commit e812ad6e84
47 changed files with 784 additions and 443 deletions

View File

@@ -27,7 +27,6 @@
"custom_data_process_params": "自訂",
"custom_data_process_params_desc": "自訂資料處理規則",
"custom_split_sign_tip": "允許你根據自定義的分隔符進行分塊。\n通常用於已處理好的數據使用特定的分隔符來精確分塊。\n可以使用 | 符號表示多個分割符,例如:“。|.” 表示中英文句號。\n\n盡量避免使用正則相關特殊符號例如: * () [] {} 等。",
"data.ideal_chunk_length": "理想分塊長度",
"data_amount": "{{dataAmount}} 組數據, {{indexAmount}} 組索引",
"data_index_num": "索引 {{index}}",
"data_process_params": "處理參數",
@@ -53,8 +52,6 @@
"file_model_function_tip": "用於增強索引和問答生成",
"filename": "檔案名稱",
"folder_dataset": "資料夾",
"ideal_chunk_length": "理想分塊長度",
"ideal_chunk_length_tips": "依結束符號進行分段,並將多個分段組成一個分塊,此值決定了分塊的預估大小,可能會有上下浮動。",
"image_auto_parse": "圖片自動索引",
"image_auto_parse_tips": "調用 VLM 自動標註文檔裡的圖片,並生成額外的檢索索引",
"image_training_queue": "圖片處理排隊",
@@ -68,6 +65,8 @@
"import_param_setting": "參數設置",
"import_select_file": "選擇文件",
"import_select_link": "輸入鏈接",
"index_size": "索引大小",
"index_size_tips": "向量化時內容的長度,系統會自動按該大小對分塊進行進一步的分割。",
"is_open_schedule": "啟用定時同步",
"keep_image": "保留圖片",
"move.hint": "移動後,所選資料集/資料夾將繼承新資料夾的權限設定,原先的權限設定將失效。",
@@ -89,6 +88,8 @@
"retain_collection": "調整訓練參數",
"retrain_task_submitted": "重新訓練任務已提交",
"same_api_collection": "存在相同的 API 集合",
"split_chunk_char": "按指定分割符分塊",
"split_chunk_size": "按長度分塊",
"split_sign_break": "1 個換行符",
"split_sign_break2": "2 個換行符",
"split_sign_custom": "自定義",