Use format detector feature for TextViewer
Some checks are pending
MSBuild / build (push) Waiting to run
MSBuild / publish (push) Blocked by required conditions

This commit is contained in:
ema
2025-06-19 16:38:27 +08:00
parent a580926059
commit efc83b0669
10 changed files with 496 additions and 218 deletions

View File

@@ -0,0 +1,89 @@
// Copyright © 2017-2025 QL-Win Contributors
//
// This file is part of QuickLook program.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
using System.ComponentModel.Composition;
using System.Linq;
using System.Text;
using UtfUnknown;
namespace QuickLook.Plugin.TextViewer.Detectors;
[Export]
public class EncodingDetector
{
public static Encoding DetectFromBytes(byte[] bytes)
{
var result = CharsetDetector.DetectFromBytes(bytes);
var encoding = result.DoubleDetectFromResult(bytes); // Fix issues
return encoding;
}
}
file static class DetectionExtensions
{
public static Encoding DoubleDetectFromResult(this DetectionResult result, byte[] buffer)
{
// Determine the highest confidence encoding, or fallback to ANSI
var encoding = result.Detected?.Encoding ?? Encoding.Default;
// When mixing encodings, one of the encodings may gain higher confidence
// In this case, we should return to encodings UTF8 / UTF32 / ANSI
// https://github.com/QL-Win/QuickLook/issues/769
if (encoding != Encoding.UTF8 && encoding != Encoding.UTF32 && encoding != Encoding.Default)
{
if (result.Details.Any(detail => detail.Encoding == Encoding.UTF8))
{
encoding = Encoding.UTF8;
}
else if (result.Details.Any(detail => detail.Encoding == Encoding.UTF32))
{
encoding = Encoding.UTF32;
}
else if (result.Details.Any(detail => detail.Encoding == Encoding.Default))
{
encoding = Encoding.Default;
}
}
// When the text is too short and lacks a BOM
// In this case, we should fallback to an encoding if it is not recognized as UTF8 / UTF32 / ANSI
// https://github.com/QL-Win/QuickLook/issues/471
// https://github.com/QL-Win/QuickLook/issues/600
// https://github.com/QL-Win/QuickLook/issues/954
if (buffer.Length <= 50)
{
if (encoding != Encoding.UTF8 && encoding != Encoding.UTF32 && encoding != Encoding.Default)
{
if (!Encoding.UTF8.GetString(buffer).Contains("\uFFFD"))
{
encoding = Encoding.UTF8;
}
else if (!Encoding.UTF32.GetString(buffer).Contains("\uFFFD"))
{
encoding = Encoding.UTF32;
}
else if (!Encoding.Default.GetString(buffer).Contains("\uFFFD"))
{
encoding = Encoding.Default;
}
}
}
return encoding;
}
}

View File

@@ -0,0 +1,48 @@
// Copyright © 2017-2025 QL-Win Contributors
//
// This file is part of QuickLook program.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
using System.Linq;
namespace QuickLook.Plugin.TextViewer.Detectors;
public class FormatDetector
{
public static FormatDetector Instance { get; } = new();
internal IFormatDetector[] TextDetectors =
[
new XMLDetector(),
new JSONDetector(),
];
public static IFormatDetector Detect(string text)
{
if (string.IsNullOrWhiteSpace(text)) return null;
return Instance.TextDetectors.Where(detector => detector.Detect(text))
.FirstOrDefault();
}
}
public interface IFormatDetector
{
public string Name { get; }
public string Extension { get; }
public bool Detect(string text);
}

View File

@@ -0,0 +1,52 @@
// Copyright © 2017-2025 QL-Win Contributors
//
// This file is part of QuickLook program.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
using System.Text.RegularExpressions;
namespace QuickLook.Plugin.TextViewer.Detectors;
public class JSONDetector : IFormatDetector
{
public string Name => "JSON";
public string Extension => ".json";
public bool Detect(string text)
{
if (string.IsNullOrWhiteSpace(text)) return false;
// TODO: Use AsSpan to improve
var trimmedStart = text.TrimStart();
if (trimmedStart.StartsWith("{") || trimmedStart.StartsWith("["))
{
var trimmedEnd = text.TrimEnd();
if (trimmedEnd.EndsWith("}") || trimmedEnd.EndsWith("]"))
{
// At least one key exists
if (Regex.IsMatch(text, @"""[^""]+""\s*:"))
{
return true;
}
}
}
return false;
}
}

View File

@@ -0,0 +1,34 @@
// Copyright © 2017-2025 QL-Win Contributors
//
// This file is part of QuickLook program.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
using System.Text.RegularExpressions;
namespace QuickLook.Plugin.TextViewer.Detectors;
public class XMLDetector : IFormatDetector
{
internal Regex Signature { get; } = new(@"<\?xml\b[^>]*\bversion\s*=\s*""[^""]*""[^\?>]*\?>", RegexOptions.IgnoreCase);
public string Name => "XML";
public string Extension => ".xml";
public bool Detect(string text)
{
return Signature.IsMatch(text);
}
}