Add image index and pdf parse (#3956)

* feat: think tag parse

* feat: parse think tag test

* feat: pdf parse ux

* feat: doc2x parse

* perf: rewrite training mode setting

* feat: image parse queue

* perf: image index

* feat: image parse process

* feat: add init sh

* fix: ts
This commit is contained in:
Archer
2025-03-03 23:08:29 +08:00
committed by archer
parent 08b6f594df
commit adf5377ebe
106 changed files with 2337 additions and 1454 deletions

View File

@@ -565,10 +565,7 @@
"core.dataset.file": "文件",
"core.dataset.folder": "目录",
"core.dataset.import.Auto mode Estimated Price Tips": "需调用文本理解模型需要消耗较多AI 积分:{{price}} 积分/1K tokens",
"core.dataset.import.Auto process": "自动",
"core.dataset.import.Auto process desc": "自动设置分割和预处理规则",
"core.dataset.import.Chunk Range": "范围:{{min}}~{{max}}",
"core.dataset.import.Chunk Split": "直接分段",
"core.dataset.import.Chunk Split Tip": "将文本按一定的规则进行分段处理后,转成可进行语义搜索的格式,适合绝大多数场景。不需要调用模型额外处理,成本低。",
"core.dataset.import.Continue upload": "继续上传",
"core.dataset.import.Custom process": "自定义规则",
@@ -578,7 +575,6 @@
"core.dataset.import.Custom split char Tips": "允许你根据自定义的分隔符进行分块。通常用于已处理好的数据,使用特定的分隔符来精确分块。",
"core.dataset.import.Custom text": "自定义文本",
"core.dataset.import.Custom text desc": "手动输入一段文本作为数据集",
"core.dataset.import.Data Preprocessing": "数据处理",
"core.dataset.import.Data process params": "数据处理参数",
"core.dataset.import.Down load csv template": "点击下载 CSV 模板",
"core.dataset.import.Embedding Estimated Price Tips": "仅使用索引模型,消耗少量 AI 积分:{{price}} 积分/1K tokens",
@@ -600,7 +596,6 @@
"core.dataset.import.Source name": "来源名",
"core.dataset.import.Sources list": "来源列表",
"core.dataset.import.Start upload": "开始上传",
"core.dataset.import.Total files": "共 {{total}} 个文件",
"core.dataset.import.Upload complete": "完成上传",
"core.dataset.import.Upload data": "确认上传",
"core.dataset.import.Upload file progress": "文件上传进度",
@@ -650,12 +645,12 @@
"core.dataset.test.test result placeholder": "测试结果将在这里展示",
"core.dataset.test.test result tip": "根据知识库内容与测试文本的相似度进行排序,你可以根据测试结果调整对应的文本。\n注意测试记录中的数据可能已经被修改过点击某条测试数据后将展示最新的数据。",
"core.dataset.training.Agent queue": "QA 训练排队",
"core.dataset.training.Auto mode": "增强处理",
"core.dataset.training.Auto mode": "补充索引",
"core.dataset.training.Auto mode Tip": "通过子索引以及调用模型生成相关问题与摘要,来增加数据块的语义丰富度,更利于检索。需要消耗更多的存储空间和增加 AI 调用次数。",
"core.dataset.training.Chunk mode": "直接分",
"core.dataset.training.Chunk mode": "直接分",
"core.dataset.training.Full": "预计 5 分钟以上",
"core.dataset.training.Leisure": "空闲",
"core.dataset.training.QA mode": "问答拆分",
"core.dataset.training.QA mode": "问答对提取",
"core.dataset.training.Vector queue": "索引排队",
"core.dataset.training.Waiting": "预计 5 分钟",
"core.dataset.training.Website Sync": "Web 站点同步",
@@ -864,7 +859,6 @@
"dataset.collections.Select Collection": "选择文件",
"dataset.collections.Select One Collection To Store": "选择一个文件进行存储",
"dataset.data.Can not edit": "无编辑权限",
"dataset.data.Custom Index Number": "自定义索引{{number}}",
"dataset.data.Default Index": "默认索引",
"dataset.data.Delete Tip": "确认删除该条数据?",
"dataset.data.Index Placeholder": "输入索引文本内容",
@@ -959,6 +953,7 @@
"new_create": "新建",
"no": "否",
"no_laf_env": "系统未配置Laf环境",
"not_model_config": "未配置相关模型",
"not_yet_introduced": "暂无介绍",
"option": "选项",
"pay.amount": "金额",
@@ -1124,7 +1119,6 @@
"support.wallet.invoice_detail": "发票详情",
"support.wallet.invoice_info": "发票将在 3-7 个工作日内发送至邮箱,请耐心等待",
"support.wallet.invoicing": "开票",
"support.wallet.moduleName.index": "索引生成",
"support.wallet.moduleName.qa": "QA 拆分",
"support.wallet.noBill": "无账单记录~",
"support.wallet.no_invoice": "暂无开票记录",