From 78fe8d2558f3410fccb1a5210f5515bbe93e5bce Mon Sep 17 00:00:00 2001 From: Paddy Xu Date: Sun, 23 Jul 2017 20:18:27 +0300 Subject: [PATCH] replace old Ude lib with new library --- .../FileEncoding.cs | 362 ------------------ .../QuickLook.Plugin.TextViewer/Plugin.cs | 28 +- .../QuickLook.Plugin.TextViewer.csproj | 5 +- .../TextViewerPanel.xaml.cs | 7 +- .../packages.config | 2 +- 5 files changed, 21 insertions(+), 383 deletions(-) delete mode 100644 QuickLook.Plugin/QuickLook.Plugin.TextViewer/FileEncoding.cs diff --git a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/FileEncoding.cs b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/FileEncoding.cs deleted file mode 100644 index de5e5a7..0000000 --- a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/FileEncoding.cs +++ /dev/null @@ -1,362 +0,0 @@ -#region * License * - -/* - SimpleHelpers - FileEncoding - - Copyright © 2014 Khalid Salomão - - Permission is hereby granted, free of charge, to any person - obtaining a copy of this software and associated documentation - files (the “Software”), to deal in the Software without - restriction, including without limitation the rights to use, - copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the - Software is furnished to do so, subject to the following - conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - OTHER DEALINGS IN THE SOFTWARE. - - License: http://www.opensource.org/licenses/mit-license.php - Website: https://github.com/khalidsalomao/SimpleHelpers.Net - */ - -#endregion - -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Text; -using Ude; - -namespace QuickLook.Plugin.TextViewer -{ - public class FileEncoding - { - private const int DefaultBufferSize = 128 * 1024; - - private readonly Dictionary _encodingFrequency = - new Dictionary(StringComparer.Ordinal); - - private readonly CharsetDetector _ude = new CharsetDetector(); - private bool _started; - - - /// - /// If the detection has reached a decision. - /// - /// The done. - public bool Done { get; set; } - - /// - /// Detected encoding name. - /// - public string EncodingName { get; set; } - - /// - /// If the data contains textual data. - /// - public bool IsText { get; set; } - - /// - /// If the file or data has any mark indicating encoding information (byte order mark). - /// - public bool HasByteOrderMark { get; set; } - - /// - /// Tries to detect the file encoding. - /// - /// The input filename. - /// The default encoding if none was detected. - /// - public static Encoding DetectFileEncoding(string inputFilename, Encoding defaultIfNotDetected = null) - { - using (var stream = new FileStream(inputFilename, FileMode.Open, - FileAccess.Read, FileShare.ReadWrite | FileShare.Delete, - DefaultBufferSize)) - { - return DetectFileEncoding(stream) ?? defaultIfNotDetected; - } - } - - /// - /// Tries to detect the file encoding. - /// - /// The input stream. - /// The default encoding if none was detected. - /// - public static Encoding DetectFileEncoding(Stream inputStream, Encoding defaultIfNotDetected = null) - { - var det = new FileEncoding(); - det.Detect(inputStream); - return det.Complete() ?? defaultIfNotDetected; - } - - /// - /// Tries to detect the file encoding. - /// - /// The input data. - /// The start. - /// The count. - /// The default encoding if none was detected. - /// - public static Encoding DetectFileEncoding(byte[] inputData, int start, int count, - Encoding defaultIfNotDetected = null) - { - var det = new FileEncoding(); - det.Detect(inputData, start, count); - return det.Complete() ?? defaultIfNotDetected; - } - - /// - /// Tries to load file content with the correct encoding. - /// - /// The filename. - /// The default value if unable to load file content. - /// File content - public static string TryLoadFile(string filename, string defaultValue = "") - { - try - { - if (File.Exists(filename)) - { - // enable file encoding detection - var encoding = DetectFileEncoding(filename); - // Load data based on parameters - return File.ReadAllText(filename, encoding); - } - } - catch - { - // ignored - } - return defaultValue; - } - - /// - /// Detects if contains textual data. - /// - /// The raw data. - public static bool CheckForTextualData(byte[] rawData) - { - return CheckForTextualData(rawData, 0, rawData.Length); - } - - /// - /// Detects if contains textual data. - /// - /// The raw data. - /// The start. - /// The count. - public static bool CheckForTextualData(byte[] rawData, int start, int count) - { - if (rawData.Length < count || count < 4 || start + 1 >= count) - return true; - - if (CheckForByteOrderMark(rawData, start)) - return true; - - // http://stackoverflow.com/questions/910873/how-can-i-determine-if-a-file-is-binary-or-text-in-c - // http://www.gnu.org/software/diffutils/manual/html_node/Binary.html - // count the number od null bytes sequences - // considering only sequeces of 2 0s: "\0\0" or control characters below 10 - var nullSequences = 0; - var controlSequences = 0; - for (var i = start + 1; i < count; i++) - if (rawData[i - 1] == 0 && rawData[i] == 0) - { - if (++nullSequences > 1) - break; - } - else if (rawData[i - 1] == 0 && rawData[i] < 10) - { - ++controlSequences; - } - - // is text if there is no null byte sequences or less than 10% of the buffer has control caracteres - return nullSequences == 0 && controlSequences <= rawData.Length / 10; - } - - /// - /// Detects if data has bytes order mark to indicate its encoding for textual data. - /// - /// The raw data. - /// The start. - /// - private static bool CheckForByteOrderMark(byte[] rawData, int start = 0) - { - if (rawData.Length - start < 4) - return false; - // Detect encoding correctly (from Rick Strahl's blog) - // http://www.west-wind.com/weblog/posts/2007/Nov/28/Detecting-Text-Encoding-for-StreamReader - if (rawData[start] == 0xef && rawData[start + 1] == 0xbb && rawData[start + 2] == 0xbf) - return true; - if (rawData[start] == 0xfe && rawData[start + 1] == 0xff) - return true; - if (rawData[start] == 0 && rawData[start + 1] == 0 && rawData[start + 2] == 0xfe && - rawData[start + 3] == 0xff) - return true; - if (rawData[start] == 0x2b && rawData[start + 1] == 0x2f && rawData[start + 2] == 0x76) - return true; - return false; - } - - /// - /// Resets this instance. - /// - public void Reset() - { - _started = false; - Done = false; - HasByteOrderMark = false; - _encodingFrequency.Clear(); - _ude.Reset(); - EncodingName = null; - } - - /// - /// Detects the encoding of textual data of the specified input data. - /// - /// Only the stream first 1Mb will be analysed. - /// - /// The input data. - /// Detected encoding name - public string Detect(Stream inputData) - { - return Detect(inputData, 1 * 1024 * 1024); - } - - /// - /// Detects the encoding of textual data of the specified input data. - /// - /// The input data. - /// - /// Size in byte of analysed data, if you want to analysed only a sample. Use 0 to read all stream - /// data. - /// - /// Size of the buffer for the stream read. - /// Detected encoding name - /// bufferSize parameter cannot be 0 or less. - public string Detect(Stream inputData, int maxSize, int bufferSize = 16 * 1024) - { - if (bufferSize <= 0) - throw new ArgumentOutOfRangeException(nameof(bufferSize), @"Buffer size cannot be 0 or less."); - var maxIterations = maxSize <= 0 ? int.MaxValue : maxSize / bufferSize; - var i = 0; - var buffer = new byte[bufferSize]; - while (i++ < maxIterations) - { - var sz = inputData.Read(buffer, 0, buffer.Length); - if (sz <= 0) - break; - Detect(buffer, 0, sz); - if (Done) - break; - } - Complete(); - return EncodingName; - } - - /// - /// Detects the encoding of textual data of the specified input data. - /// - /// The input data. - /// The start. - /// The count. - /// Detected encoding name - public string Detect(byte[] inputData, int start, int count) - { - if (Done) - return EncodingName; - if (!_started) - { - Reset(); - _started = true; - if (!CheckForTextualData(inputData, start, count)) - { - IsText = false; - Done = true; - return EncodingName; - } - HasByteOrderMark = CheckForByteOrderMark(inputData, start); - IsText = true; - } - - // execute charset detector - _ude.Feed(inputData, start, count); - _ude.DataEnd(); - if (_ude.IsDone() && !string.IsNullOrEmpty(_ude.Charset)) - { - IncrementFrequency(_ude.Charset); - Done = true; - return EncodingName; - } - - // singular buffer detection - var singleUde = new CharsetDetector(); - const int udeFeedSize = 4 * 1024; - var step = count - start < udeFeedSize ? count - start : udeFeedSize; - for (var pos = start; pos < count; pos += step) - { - singleUde.Reset(); - if (pos + step > count) - singleUde.Feed(inputData, pos, count - pos); - else - singleUde.Feed(inputData, pos, step); - singleUde.DataEnd(); - // update encoding frequency - if (singleUde.Confidence > 0.3 && !string.IsNullOrEmpty(singleUde.Charset)) - IncrementFrequency(singleUde.Charset); - } - // vote for best encoding - EncodingName = GetCurrentEncoding(); - // update current encoding name - return EncodingName; - } - - /// - /// Finalize detection phase and gets detected encoding name. - /// - /// - public Encoding Complete() - { - Done = true; - _ude.DataEnd(); - if (_ude.IsDone() && !string.IsNullOrEmpty(_ude.Charset)) - EncodingName = _ude.Charset; - // vote for best encoding - EncodingName = GetCurrentEncoding(); - // check result - if (!string.IsNullOrEmpty(EncodingName)) - return Encoding.GetEncoding(EncodingName); - return null; - } - - private void IncrementFrequency(string charset) - { - int currentCount; - _encodingFrequency.TryGetValue(charset, out currentCount); - _encodingFrequency[charset] = ++currentCount; - } - - private string GetCurrentEncoding() - { - if (_encodingFrequency.Count == 0) - return null; - // ASCII should be the last option, since other encodings often has ASCII included... - return _encodingFrequency - .OrderByDescending(i => i.Value * (i.Key != "ASCII" ? 1 : 0)) - .FirstOrDefault().Key; - } - } -} \ No newline at end of file diff --git a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/Plugin.cs b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/Plugin.cs index ab2f4f7..77e112e 100644 --- a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/Plugin.cs +++ b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/Plugin.cs @@ -18,6 +18,7 @@ using System.IO; using System.Windows; using ICSharpCode.AvalonEdit.Highlighting; +using UtfUnknown; namespace QuickLook.Plugin.TextViewer { @@ -39,26 +40,21 @@ namespace QuickLook.Plugin.TextViewer const long MAX_SIZE = 20 * 1024 * 1024; - // if there is a possible highlighting scheme (by file extension), treat it as a plain text file + if (Path.GetExtension(path).ToLower() == ".txt") + return new FileInfo(path).Length <= MAX_SIZE; + + // if there is a matched highlighting scheme (by file extension), treat it as a plain text file if (HighlightingManager.Instance.GetDefinitionByExtension(Path.GetExtension(path)) != null) return new FileInfo(path).Length <= MAX_SIZE; - // otherwise, read the first 512 bytes as string (StreamReader handles encoding automatically), - // check whether they are all printable chars. - using (var sr = new StreamReader(new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))) + // otherwise, read the first 10KB, check if we can get something. + using (var s = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { - var buffer = new char[512]; - var len = sr.Read(buffer, 0, 512); + const int bufferLength = 10 * 1024; + var buffer = new byte[bufferLength]; + s.Read(buffer, 0, bufferLength); - for (var i = 0; i < len; i++) - { - if (!char.IsControl(buffer[i])) continue; - - if (buffer[i] != '\r' && buffer[i] != '\n' && buffer[i] != '\t') - return false; - } - - return new FileInfo(path).Length <= MAX_SIZE; + return CharsetDetector.DetectFromBytes(buffer).Detected != null && s.Length <= MAX_SIZE; } } @@ -80,7 +76,7 @@ namespace QuickLook.Plugin.TextViewer public void Cleanup() { - _tvp = null; + _tvp.viewer = null; } } } \ No newline at end of file diff --git a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/QuickLook.Plugin.TextViewer.csproj b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/QuickLook.Plugin.TextViewer.csproj index dacea63..21dde50 100644 --- a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/QuickLook.Plugin.TextViewer.csproj +++ b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/QuickLook.Plugin.TextViewer.csproj @@ -58,8 +58,8 @@ - - ..\..\packages\Ude.Signed.0.1.1\lib\net40\Ude.dll + + ..\..\packages\UTF.Unknown.1.0.0-beta1\lib\net40\UtfUnknown.dll @@ -69,7 +69,6 @@ - TextViewerPanel.xaml diff --git a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/TextViewerPanel.xaml.cs b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/TextViewerPanel.xaml.cs index 2145f0e..ccc7a6d 100644 --- a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/TextViewerPanel.xaml.cs +++ b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/TextViewerPanel.xaml.cs @@ -21,6 +21,7 @@ using System.Windows.Controls; using System.Windows.Input; using System.Windows.Media; using ICSharpCode.AvalonEdit.Highlighting; +using UtfUnknown; namespace QuickLook.Plugin.TextViewer { @@ -79,7 +80,11 @@ namespace QuickLook.Plugin.TextViewer { using (var s = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read)) { - viewer.Encoding = FileEncoding.DetectFileEncoding(s, Encoding.Default); + const int bufferLength = 1 * 1024 * 1024; + var buffer = new byte[bufferLength]; + s.Read(buffer, 0, bufferLength); + + viewer.Encoding = CharsetDetector.DetectFromBytes(buffer).Detected?.Encoding ?? Encoding.Default; } viewer.Load(path); diff --git a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/packages.config b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/packages.config index d1c3028..cd97932 100644 --- a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/packages.config +++ b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/packages.config @@ -1,5 +1,5 @@  - + \ No newline at end of file