mirror of
https://github.com/labring/FastGPT.git
synced 2025-07-23 21:13:50 +00:00
Add image index and pdf parse (#3956)
* feat: think tag parse * feat: parse think tag test * feat: pdf parse ux * feat: doc2x parse * perf: rewrite training mode setting * feat: image parse queue * perf: image index * feat: image parse process * feat: add init sh * fix: ts
This commit is contained in:
@@ -3,11 +3,16 @@
|
||||
"add_file": "添加文件",
|
||||
"api_file": "API 文件库",
|
||||
"api_url": "接口地址",
|
||||
"auto_indexes": "自动生成补充索引",
|
||||
"auto_indexes_tips": "通过大模型进行额外索引生成,提高语义丰富度,提高检索的精度。",
|
||||
"chunk_max_tokens": "分块上限",
|
||||
"close_auto_sync": "确认关闭自动同步功能?",
|
||||
"collection.Create update time": "创建/更新时间",
|
||||
"collection.Training type": "训练模式",
|
||||
"collection.training_type": "处理模式",
|
||||
"collection_data_count": "数据量",
|
||||
"collection_metadata_custom_pdf_parse": "PDF增强解析",
|
||||
"collection_metadata_image_parse": "图片标注",
|
||||
"collection_not_support_retraining": "该集合类型不支持重新调整参数",
|
||||
"collection_not_support_sync": "该集合不支持同步",
|
||||
"collection_sync": "立即同步",
|
||||
@@ -22,12 +27,21 @@
|
||||
"custom_data_process_params_desc": "自定义设置数据处理规则",
|
||||
"data.ideal_chunk_length": "理想分块长度",
|
||||
"data_amount": "{{dataAmount}} 组数据, {{indexAmount}} 组索引",
|
||||
"data_index_custom": "自定义索引",
|
||||
"data_index_default": "默认索引",
|
||||
"data_index_image": "图片索引",
|
||||
"data_index_num": "索引 {{index}}",
|
||||
"data_index_question": "推测问题索引",
|
||||
"data_index_summary": "摘要索引",
|
||||
"data_process_params": "处理参数",
|
||||
"data_process_setting": "数据处理配置",
|
||||
"dataset.Unsupported operation": "操作不支持",
|
||||
"dataset.no_collections": "暂无数据集",
|
||||
"dataset.no_tags": "暂无标签",
|
||||
"default_params": "默认",
|
||||
"default_params_desc": "使用系统默认的参数和规则",
|
||||
"edit_dataset_config": "编辑知识库配置",
|
||||
"enhanced_indexes": "索引增强",
|
||||
"error.collectionNotFound": "集合找不到了~",
|
||||
"external_file": "外部文件库",
|
||||
"external_file_dataset_desc": "可以从外部文件库导入文件构建知识库,文件不会进行二次存储",
|
||||
@@ -38,19 +52,38 @@
|
||||
"feishu_dataset": "飞书知识库",
|
||||
"feishu_dataset_config": "配置飞书知识库",
|
||||
"feishu_dataset_desc": "可通过配置飞书文档权限,使用飞书文档构建知识库,文档不会进行二次存储",
|
||||
"file_list": "文件列表",
|
||||
"file_model_function_tip": "用于增强索引和 QA 生成",
|
||||
"filename": "文件名",
|
||||
"folder_dataset": "文件夹",
|
||||
"ideal_chunk_length": "理想分块长度",
|
||||
"ideal_chunk_length_tips": "按结束符号进行分段,并将多个分段组成一个分块,该值决定了分块的预估大小,如果会有上下浮动。",
|
||||
"image_auto_parse": "图片自动索引",
|
||||
"image_auto_parse_tips": "调用 VLM 自动标注文档里的图片,并生成额外的检索索引",
|
||||
"import.Auto mode Estimated Price Tips": "需调用文本理解模型,需要消耗较多AI 积分:{{price}} 积分/1K tokens",
|
||||
"import.Embedding Estimated Price Tips": "仅使用索引模型,消耗少量 AI 积分:{{price}} 积分/1K tokens",
|
||||
"import_confirm": "确认上传",
|
||||
"import_data_preview": "数据预览",
|
||||
"import_data_process_setting": "数据处理方式设置",
|
||||
"import_file_parse_setting": "文件解析设置",
|
||||
"import_model_config": "模型选择",
|
||||
"import_param_setting": "参数设置",
|
||||
"import_select_file": "选择文件",
|
||||
"is_open_schedule": "启用定时同步",
|
||||
"keep_image": "保留图片",
|
||||
"move.hint": "移动后,所选知识库/文件夹将继承新文件夹的权限设置,原先的权限设置失效。",
|
||||
"open_auto_sync": "开启定时同步后,系统将会每天不定时尝试同步集合,集合同步期间,会出现无法搜索到该集合数据现象。",
|
||||
"params_setting": "参数设置",
|
||||
"pdf_enhance_parse": "PDF增强解析",
|
||||
"pdf_enhance_parse_price": "{{price}}积分/页",
|
||||
"pdf_enhance_parse_tips": "解析 PDF 文件时,调用 PDF 识别模型进行识别,可以将其转换成 Markdown 并保留文档中的图片,同时也可以对扫描件进行识别。",
|
||||
"permission.des.manage": "可管理整个知识库数据和信息",
|
||||
"permission.des.read": "可查看知识库内容",
|
||||
"permission.des.write": "可增加和变更知识库内容",
|
||||
"preview_chunk": "分块预览",
|
||||
"preview_chunk_empty": "无法读取该文件内容",
|
||||
"preview_chunk_intro": "最多展示 10 个分块",
|
||||
"preview_chunk_not_selected": "点击左侧文件后进行预览",
|
||||
"rebuild_embedding_start_tip": "切换索引模型任务已开始",
|
||||
"rebuilding_index_count": "重建中索引数量:{{count}}",
|
||||
"request_headers": "请求头参数,会自动补充 Bearer",
|
||||
@@ -72,8 +105,10 @@
|
||||
"tag.tags": "标签",
|
||||
"tag.total_tags": "共{{total}}个标签",
|
||||
"the_knowledge_base_has_indexes_that_are_being_trained_or_being_rebuilt": "知识库有训练中或正在重建的索引",
|
||||
"total_num_files": "共 {{total}} 个文件",
|
||||
"training_mode": "处理方式",
|
||||
"vector_model_max_tokens_tip": "每个分块数据,最大长度为 3000 tokens",
|
||||
"vllm_model": "图片理解模型",
|
||||
"website_dataset": "Web 站点同步",
|
||||
"website_dataset_desc": "Web 站点同步允许你直接使用一个网页链接构建知识库",
|
||||
"yuque_dataset": "语雀知识库",
|
||||
|
Reference in New Issue
Block a user