perf: password special chars;feat: llm paragraph;perf: chunk setting params;perf: text splitter worker (#4984)

* perf: password special chars

* feat: llm paragraph;perf: chunk setting params

* perf: text splitter worker

* perf: get rawtext buffer

* fix: test

* fix: test

* doc

* min chunk size
This commit is contained in:
Archer
2025-06-10 00:05:54 +08:00
committed by GitHub
parent 068918a9ee
commit 01ff56b42b
41 changed files with 546 additions and 448 deletions

View File

@@ -45,6 +45,7 @@
"core.dataset.import.Adjust parameters": "Adjust parameters",
"custom_data_process_params": "Custom",
"custom_data_process_params_desc": "Customize data processing rules",
"custom_split_char": "Char",
"custom_split_sign_tip": "Allows you to chunk according to custom delimiters. \nUsually used for processed data, using specific separators for precise chunking. \nYou can use the | symbol to represent multiple splitters, such as: \".|.\" to represent a period in Chinese and English.\n\nTry to avoid using special symbols related to regular, such as: * () [] {}, etc.",
"data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes",
"data_error_amount": "{{errorAmount}} Group training exception",
@@ -117,6 +118,11 @@
"insert_images_success": "The new picture is successfully added, and you need to wait for the training to be completed before it will be displayed.",
"is_open_schedule": "Enable scheduled synchronization",
"keep_image": "Keep the picture",
"llm_paragraph_mode": "LLM recognition paragraph(Beta)",
"llm_paragraph_mode_auto": "automatic",
"llm_paragraph_mode_auto_desc": "Enable the model to automatically recognize the title when the file content does not contain a Markdown title.",
"llm_paragraph_mode_forbid": "Disabled",
"llm_paragraph_mode_forbid_desc": "Force the disabling of the model's automatic paragraph recognition",
"loading": "Loading...",
"max_chunk_size": "Maximum chunk size",
"move.hint": "After moving, the selected knowledge base/folder will inherit the permission settings of the new folder, and the original permission settings will become invalid.",