feat: chunk index independent config (#4271)

* sync collection

* remove lock

* feat: chunk index independent config

* feat: add max chunksize to split chunk function

* remove log

* update doc

* remove

* remove log
This commit is contained in:
Archer
2025-03-21 16:44:25 +08:00
committed by archer
parent 222ff0d49a
commit e812ad6e84
47 changed files with 784 additions and 443 deletions

View File

@@ -569,7 +569,6 @@
"core.dataset.import.Custom process": "Custom Rules",
"core.dataset.import.Custom process desc": "Customize segmentation and preprocessing rules",
"core.dataset.import.Custom prompt": "Custom Prompt",
"core.dataset.import.Custom split char": "Custom Separator",
"core.dataset.import.Custom text": "Custom Text",
"core.dataset.import.Custom text desc": "Manually enter a piece of text as a dataset",
"core.dataset.import.Data process params": "Data Processing Parameters",

View File

@@ -27,7 +27,6 @@
"custom_data_process_params": "Custom",
"custom_data_process_params_desc": "Customize data processing rules",
"custom_split_sign_tip": "Allows you to chunk according to custom delimiters. \nUsually used for processed data, using specific separators for precise chunking. \nYou can use the | symbol to represent multiple splitters, such as: \".|.\" to represent a period in Chinese and English.\n\nTry to avoid using special symbols related to regular, such as: * () [] {}, etc.",
"data.ideal_chunk_length": "ideal block length",
"data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes",
"data_index_num": "Index {{index}}",
"data_process_params": "Params",
@@ -53,8 +52,6 @@
"file_model_function_tip": "Enhances indexing and QA generation",
"filename": "Filename",
"folder_dataset": "Folder",
"ideal_chunk_length": "ideal block length",
"ideal_chunk_length_tips": "Segment according to the end symbol and combine multiple segments into one block. This value determines the estimated size of the block, if there is any fluctuation.",
"image_auto_parse": "Automatic image indexing",
"image_auto_parse_tips": "Call VLM to automatically label the pictures in the document and generate additional search indexes",
"image_training_queue": "Queue of image processing",
@@ -68,6 +65,8 @@
"import_param_setting": "Parameter settings",
"import_select_file": "Select a file",
"import_select_link": "Enter link",
"index_size": "Index size",
"index_size_tips": "When vectorized, the system will automatically further segment the blocks according to this size.",
"is_open_schedule": "Enable scheduled synchronization",
"keep_image": "Keep the picture",
"move.hint": "After moving, the selected knowledge base/folder will inherit the permission settings of the new folder, and the original permission settings will become invalid.",
@@ -89,6 +88,8 @@
"retain_collection": "Adjust Training Parameters",
"retrain_task_submitted": "The retraining task has been submitted",
"same_api_collection": "The same API set exists",
"split_chunk_char": "Block by specified splitter",
"split_chunk_size": "Block by length",
"split_sign_break": "1 newline character",
"split_sign_break2": "2 newline characters",
"split_sign_custom": "Customize",