diff --git a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/TextViewerPanel.cs b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/TextViewerPanel.cs index 38b4514..317ba70 100644 --- a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/TextViewerPanel.cs +++ b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/TextViewerPanel.cs @@ -24,6 +24,7 @@ using QuickLook.Common.Helpers; using QuickLook.Common.Plugin; using System; using System.IO; +using System.Linq; using System.Reflection; using System.Text; using System.Threading.Tasks; @@ -177,8 +178,8 @@ public class TextViewerPanel : TextEditor, IDisposable var bufferCopy = buffer.ToArray(); buffer.Dispose(); - var encoding = CharsetDetector.DetectFromBytes(bufferCopy).Detected?.Encoding ?? - Encoding.Default; + var result = CharsetDetector.DetectFromBytes(bufferCopy); + var encoding = result.DoubleDetectFromResult(bufferCopy); // Fix issues var doc = new TextDocument(encoding.GetString(bufferCopy)); doc.SetOwnerThread(Dispatcher.Thread); @@ -199,3 +200,57 @@ public class TextViewerPanel : TextEditor, IDisposable }); } } + +file static class DetectionExtensions +{ + public static Encoding DoubleDetectFromResult(this DetectionResult result, byte[] buffer) + { + // Determine the highest confidence encoding, or fallback to ANSI + var encoding = result.Detected?.Encoding ?? Encoding.Default; + + // When mixing encodings, one of the encodings may gain higher confidence + // In this case, we should return to encodings UTF8 / UTF32 / ANSI + // https://github.com/QL-Win/QuickLook/issues/769 + if (encoding != Encoding.UTF8 && encoding != Encoding.UTF32 && encoding != Encoding.Default) + { + if (result.Details.Any(detail => detail.Encoding == Encoding.UTF8)) + { + encoding = Encoding.UTF8; + } + else if (result.Details.Any(detail => detail.Encoding == Encoding.UTF32)) + { + encoding = Encoding.UTF32; + } + else if (result.Details.Any(detail => detail.Encoding == Encoding.Default)) + { + encoding = Encoding.Default; + } + } + + // When the text is too short and lacks a BOM + // In this case, we should fallback to an encoding if it is not recognized as UTF8 / UTF32 / ANSI + // https://github.com/QL-Win/QuickLook/issues/471 + // https://github.com/QL-Win/QuickLook/issues/600 + // https://github.com/QL-Win/QuickLook/issues/954 + if (buffer.Length <= 50) + { + if (encoding != Encoding.UTF8 && encoding != Encoding.UTF32 && encoding != Encoding.Default) + { + if (!Encoding.UTF8.GetString(buffer).Contains("\uFFFD")) + { + encoding = Encoding.UTF8; + } + else if (!Encoding.UTF32.GetString(buffer).Contains("\uFFFD")) + { + encoding = Encoding.UTF32; + } + else if (!Encoding.Default.GetString(buffer).Contains("\uFFFD")) + { + encoding = Encoding.Default; + } + } + } + + return encoding; + } +}