diff --git a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/SimpleHelpers/FileEncoding.cs b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/FileEncoding.cs similarity index 55% rename from QuickLook.Plugin/QuickLook.Plugin.TextViewer/SimpleHelpers/FileEncoding.cs rename to QuickLook.Plugin/QuickLook.Plugin.TextViewer/FileEncoding.cs index 0111964..de5e5a7 100644 --- a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/SimpleHelpers/FileEncoding.cs +++ b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/FileEncoding.cs @@ -1,4 +1,5 @@ -#region * License * +#region * License * + /* SimpleHelpers - FileEncoding @@ -28,117 +29,150 @@ License: http://www.opensource.org/licenses/mit-license.php Website: https://github.com/khalidsalomao/SimpleHelpers.Net */ + #endregion using System; -using System.IO; using System.Collections.Generic; +using System.IO; using System.Linq; using System.Text; +using Ude; -namespace QuickLook.Plugin.TextViewer.SimpleHelpers +namespace QuickLook.Plugin.TextViewer { public class FileEncoding { - const int DEFAULT_BUFFER_SIZE = 128 * 1024; + private const int DefaultBufferSize = 128 * 1024; + + private readonly Dictionary _encodingFrequency = + new Dictionary(StringComparer.Ordinal); + + private readonly CharsetDetector _ude = new CharsetDetector(); + private bool _started; + /// - /// Tries to detect the file encoding. + /// If the detection has reached a decision. + /// + /// The done. + public bool Done { get; set; } + + /// + /// Detected encoding name. + /// + public string EncodingName { get; set; } + + /// + /// If the data contains textual data. + /// + public bool IsText { get; set; } + + /// + /// If the file or data has any mark indicating encoding information (byte order mark). + /// + public bool HasByteOrderMark { get; set; } + + /// + /// Tries to detect the file encoding. /// /// The input filename. /// The default encoding if none was detected. /// - public static Encoding DetectFileEncoding (string inputFilename, Encoding defaultIfNotDetected = null) + public static Encoding DetectFileEncoding(string inputFilename, Encoding defaultIfNotDetected = null) { - using (var stream = new System.IO.FileStream (inputFilename, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.ReadWrite | System.IO.FileShare.Delete, DEFAULT_BUFFER_SIZE)) + using (var stream = new FileStream(inputFilename, FileMode.Open, + FileAccess.Read, FileShare.ReadWrite | FileShare.Delete, + DefaultBufferSize)) { - return DetectFileEncoding (stream) ?? defaultIfNotDetected; + return DetectFileEncoding(stream) ?? defaultIfNotDetected; } } /// - /// Tries to detect the file encoding. + /// Tries to detect the file encoding. /// /// The input stream. /// The default encoding if none was detected. /// - public static Encoding DetectFileEncoding (Stream inputStream, Encoding defaultIfNotDetected = null) + public static Encoding DetectFileEncoding(Stream inputStream, Encoding defaultIfNotDetected = null) { - var det = new FileEncoding (); - det.Detect (inputStream); - return det.Complete () ?? defaultIfNotDetected; + var det = new FileEncoding(); + det.Detect(inputStream); + return det.Complete() ?? defaultIfNotDetected; } /// - /// Tries to detect the file encoding. + /// Tries to detect the file encoding. /// /// The input data. /// The start. /// The count. /// The default encoding if none was detected. /// - public static Encoding DetectFileEncoding (byte[] inputData, int start, int count, Encoding defaultIfNotDetected = null) + public static Encoding DetectFileEncoding(byte[] inputData, int start, int count, + Encoding defaultIfNotDetected = null) { - var det = new FileEncoding (); - det.Detect (inputData, start, count); - return det.Complete () ?? defaultIfNotDetected; + var det = new FileEncoding(); + det.Detect(inputData, start, count); + return det.Complete() ?? defaultIfNotDetected; } /// - /// Tries to load file content with the correct encoding. + /// Tries to load file content with the correct encoding. /// /// The filename. /// The default value if unable to load file content. /// File content - public static string TryLoadFile (string filename, string defaultValue = "") + public static string TryLoadFile(string filename, string defaultValue = "") { try { - if (System.IO.File.Exists (filename)) + if (File.Exists(filename)) { // enable file encoding detection - var encoding = SimpleHelpers.FileEncoding.DetectFileEncoding (filename); + var encoding = DetectFileEncoding(filename); // Load data based on parameters - return System.IO.File.ReadAllText (filename, encoding); + return File.ReadAllText(filename, encoding); } } - catch { } + catch + { + // ignored + } return defaultValue; } /// - /// Detects if contains textual data. + /// Detects if contains textual data. /// /// The raw data. - public static bool CheckForTextualData (byte[] rawData) + public static bool CheckForTextualData(byte[] rawData) { - return CheckForTextualData (rawData, 0, rawData.Length); + return CheckForTextualData(rawData, 0, rawData.Length); } /// - /// Detects if contains textual data. + /// Detects if contains textual data. /// /// The raw data. /// The start. /// The count. - public static bool CheckForTextualData (byte[] rawData, int start, int count) + public static bool CheckForTextualData(byte[] rawData, int start, int count) { if (rawData.Length < count || count < 4 || start + 1 >= count) return true; - - if (CheckForByteOrderMark (rawData, start)) - { + + if (CheckForByteOrderMark(rawData, start)) return true; - } // http://stackoverflow.com/questions/910873/how-can-i-determine-if-a-file-is-binary-or-text-in-c // http://www.gnu.org/software/diffutils/manual/html_node/Binary.html // count the number od null bytes sequences // considering only sequeces of 2 0s: "\0\0" or control characters below 10 - int nullSequences = 0; - int controlSequences = 0; + var nullSequences = 0; + var controlSequences = 0; for (var i = start + 1; i < count; i++) - { if (rawData[i - 1] == 0 && rawData[i] == 0) { if (++nullSequences > 1) @@ -148,220 +182,181 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers { ++controlSequences; } - } // is text if there is no null byte sequences or less than 10% of the buffer has control caracteres - return nullSequences == 0 && (controlSequences <= (rawData.Length / 10)); + return nullSequences == 0 && controlSequences <= rawData.Length / 10; } - + /// - /// Detects if data has bytes order mark to indicate its encoding for textual data. + /// Detects if data has bytes order mark to indicate its encoding for textual data. /// /// The raw data. /// The start. /// - private static bool CheckForByteOrderMark (byte[] rawData, int start = 0) + private static bool CheckForByteOrderMark(byte[] rawData, int start = 0) { if (rawData.Length - start < 4) return false; // Detect encoding correctly (from Rick Strahl's blog) // http://www.west-wind.com/weblog/posts/2007/Nov/28/Detecting-Text-Encoding-for-StreamReader if (rawData[start] == 0xef && rawData[start + 1] == 0xbb && rawData[start + 2] == 0xbf) - { - // Encoding.UTF8; return true; - } - else if (rawData[start] == 0xfe && rawData[start + 1] == 0xff) - { - // Encoding.Unicode; + if (rawData[start] == 0xfe && rawData[start + 1] == 0xff) return true; - } - else if (rawData[start] == 0 && rawData[start + 1] == 0 && rawData[start + 2] == 0xfe && rawData[start + 3] == 0xff) - { - // Encoding.UTF32; + if (rawData[start] == 0 && rawData[start + 1] == 0 && rawData[start + 2] == 0xfe && + rawData[start + 3] == 0xff) return true; - } - else if (rawData[start] == 0x2b && rawData[start + 1] == 0x2f && rawData[start + 2] == 0x76) - { - // Encoding.UTF7; + if (rawData[start] == 0x2b && rawData[start + 1] == 0x2f && rawData[start + 2] == 0x76) return true; - } return false; } - Ude.CharsetDetector ude = new Ude.CharsetDetector (); - bool _started = false; - - /// - /// If the detection has reached a decision. + /// Resets this instance. /// - /// The done. - public bool Done { get; set; } - - /// - /// Detected encoding name. - /// - public string EncodingName { get; set; } - - /// - /// If the data contains textual data. - /// - public bool IsText { get; set; } - - /// - /// If the file or data has any mark indicating encoding information (byte order mark). - /// - public bool HasByteOrderMark { get; set; } - - Dictionary encodingFrequency = new Dictionary (StringComparer.Ordinal); - - /// - /// Resets this instance. - /// - public void Reset () + public void Reset() { _started = false; Done = false; HasByteOrderMark = false; - encodingFrequency.Clear (); - ude.Reset (); + _encodingFrequency.Clear(); + _ude.Reset(); EncodingName = null; } /// - /// Detects the encoding of textual data of the specified input data. - /// Only the stream first 20Mb will be analysed. + /// Detects the encoding of textual data of the specified input data. + /// + /// Only the stream first 1Mb will be analysed. /// /// The input data. /// Detected encoding name - public string Detect (Stream inputData) + public string Detect(Stream inputData) { - return Detect (inputData, 20 * 1024 * 1024); + return Detect(inputData, 1 * 1024 * 1024); } /// - /// Detects the encoding of textual data of the specified input data. + /// Detects the encoding of textual data of the specified input data. /// /// The input data. - /// Size in byte of analysed data, if you want to analysed only a sample. Use 0 to read all stream data. + /// + /// Size in byte of analysed data, if you want to analysed only a sample. Use 0 to read all stream + /// data. + /// /// Size of the buffer for the stream read. /// Detected encoding name /// bufferSize parameter cannot be 0 or less. - public string Detect (Stream inputData, int maxSize, int bufferSize = 16 * 1024) + public string Detect(Stream inputData, int maxSize, int bufferSize = 16 * 1024) { if (bufferSize <= 0) - throw new ArgumentOutOfRangeException ("bufferSize", "Buffer size cannot be 0 or less."); - int maxIterations = maxSize <= 0 ? Int32.MaxValue : maxSize / bufferSize; - int i = 0; - byte[] buffer = new byte[bufferSize]; + throw new ArgumentOutOfRangeException(nameof(bufferSize), @"Buffer size cannot be 0 or less."); + var maxIterations = maxSize <= 0 ? int.MaxValue : maxSize / bufferSize; + var i = 0; + var buffer = new byte[bufferSize]; while (i++ < maxIterations) { - int sz = inputData.Read (buffer, 0, (int)buffer.Length); + var sz = inputData.Read(buffer, 0, buffer.Length); if (sz <= 0) - { break; - } - Detect (buffer, 0, sz); + Detect(buffer, 0, sz); if (Done) break; } - Complete (); + Complete(); return EncodingName; } /// - /// Detects the encoding of textual data of the specified input data. + /// Detects the encoding of textual data of the specified input data. /// /// The input data. /// The start. /// The count. /// Detected encoding name - public string Detect (byte[] inputData, int start, int count) + public string Detect(byte[] inputData, int start, int count) { if (Done) return EncodingName; if (!_started) { - Reset (); + Reset(); _started = true; - if (!CheckForTextualData (inputData, start, count)) + if (!CheckForTextualData(inputData, start, count)) { IsText = false; Done = true; return EncodingName; } - HasByteOrderMark = CheckForByteOrderMark (inputData, start); + HasByteOrderMark = CheckForByteOrderMark(inputData, start); IsText = true; } // execute charset detector - ude.Feed (inputData, start, count); - ude.DataEnd (); - if (ude.IsDone () && !String.IsNullOrEmpty (ude.Charset)) + _ude.Feed(inputData, start, count); + _ude.DataEnd(); + if (_ude.IsDone() && !string.IsNullOrEmpty(_ude.Charset)) { - IncrementFrequency (ude.Charset); + IncrementFrequency(_ude.Charset); Done = true; return EncodingName; } // singular buffer detection - var singleUde = new Ude.CharsetDetector (); + var singleUde = new CharsetDetector(); const int udeFeedSize = 4 * 1024; - int step = (count - start) < udeFeedSize ? (count - start) : udeFeedSize; + var step = count - start < udeFeedSize ? count - start : udeFeedSize; for (var pos = start; pos < count; pos += step) { - singleUde.Reset (); + singleUde.Reset(); if (pos + step > count) - singleUde.Feed (inputData, pos, count - pos); + singleUde.Feed(inputData, pos, count - pos); else - singleUde.Feed (inputData, pos, step); - singleUde.DataEnd (); + singleUde.Feed(inputData, pos, step); + singleUde.DataEnd(); // update encoding frequency - if (singleUde.Confidence > 0.3 && !String.IsNullOrEmpty (singleUde.Charset)) - IncrementFrequency (singleUde.Charset); + if (singleUde.Confidence > 0.3 && !string.IsNullOrEmpty(singleUde.Charset)) + IncrementFrequency(singleUde.Charset); } // vote for best encoding - EncodingName = GetCurrentEncoding (); + EncodingName = GetCurrentEncoding(); // update current encoding name return EncodingName; } /// - /// Finalize detection phase and gets detected encoding name. + /// Finalize detection phase and gets detected encoding name. /// /// - public Encoding Complete () + public Encoding Complete() { Done = true; - ude.DataEnd (); - if (ude.IsDone () && !String.IsNullOrEmpty (ude.Charset)) - { - EncodingName = ude.Charset; - } + _ude.DataEnd(); + if (_ude.IsDone() && !string.IsNullOrEmpty(_ude.Charset)) + EncodingName = _ude.Charset; // vote for best encoding - EncodingName = GetCurrentEncoding (); + EncodingName = GetCurrentEncoding(); // check result - if (!String.IsNullOrEmpty (EncodingName)) - return Encoding.GetEncoding (EncodingName); + if (!string.IsNullOrEmpty(EncodingName)) + return Encoding.GetEncoding(EncodingName); return null; } - private void IncrementFrequency (string charset) + private void IncrementFrequency(string charset) { int currentCount; - encodingFrequency.TryGetValue (charset, out currentCount); - encodingFrequency[charset] = ++currentCount; + _encodingFrequency.TryGetValue(charset, out currentCount); + _encodingFrequency[charset] = ++currentCount; } - private string GetCurrentEncoding () + private string GetCurrentEncoding() { - if (encodingFrequency.Count == 0) + if (_encodingFrequency.Count == 0) return null; // ASCII should be the last option, since other encodings often has ASCII included... - return encodingFrequency - .OrderByDescending (i => i.Value * (i.Key != ("ASCII") ? 1 : 0)) - .FirstOrDefault ().Key; + return _encodingFrequency + .OrderByDescending(i => i.Value * (i.Key != "ASCII" ? 1 : 0)) + .FirstOrDefault().Key; } } -} +} \ No newline at end of file diff --git a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/QuickLook.Plugin.TextViewer.csproj b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/QuickLook.Plugin.TextViewer.csproj index 996cb1d..dacea63 100644 --- a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/QuickLook.Plugin.TextViewer.csproj +++ b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/QuickLook.Plugin.TextViewer.csproj @@ -58,8 +58,8 @@ - - ..\..\packages\UDE.CSharp.1.1.0\lib\Ude.dll + + ..\..\packages\Ude.Signed.0.1.1\lib\net40\Ude.dll @@ -69,7 +69,7 @@ - + TextViewerPanel.xaml diff --git a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/TextViewerPanel.xaml.cs b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/TextViewerPanel.xaml.cs index 1c1607d..8bdb826 100644 --- a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/TextViewerPanel.xaml.cs +++ b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/TextViewerPanel.xaml.cs @@ -20,7 +20,6 @@ using System.Text; using System.Windows.Controls; using System.Windows.Media; using ICSharpCode.AvalonEdit.Highlighting; -using QuickLook.Plugin.TextViewer.SimpleHelpers; namespace QuickLook.Plugin.TextViewer { @@ -43,18 +42,11 @@ namespace QuickLook.Plugin.TextViewer { using (var s = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read)) { - viewer.Encoding = DetectEncoding(s); - viewer.Load(path); + viewer.Encoding = FileEncoding.DetectFileEncoding(s, Encoding.Default); } + viewer.Load(path); viewer.SyntaxHighlighting = HighlightingManager.Instance.GetDefinitionByExtension(Path.GetExtension(path)); } - - private static Encoding DetectEncoding(Stream s) - { - var det = new FileEncoding(); - det.Detect(s, 1 * 1024 * 1024); - return det.Complete() ?? Encoding.Default; - } } } \ No newline at end of file diff --git a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/packages.config b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/packages.config index 8252bf6..d1c3028 100644 --- a/QuickLook.Plugin/QuickLook.Plugin.TextViewer/packages.config +++ b/QuickLook.Plugin/QuickLook.Plugin.TextViewer/packages.config @@ -1,7 +1,5 @@  - - - + \ No newline at end of file