Use signed Ude lib

This commit is contained in:
Paddy Xu
2017-07-15 23:03:47 +03:00
parent 61074fea2c
commit 9ca358bd5a
4 changed files with 141 additions and 156 deletions

View File

@@ -1,4 +1,5 @@
#region * License * #region * License *
/* /*
SimpleHelpers - FileEncoding SimpleHelpers - FileEncoding
@@ -28,19 +29,49 @@
License: http://www.opensource.org/licenses/mit-license.php License: http://www.opensource.org/licenses/mit-license.php
Website: https://github.com/khalidsalomao/SimpleHelpers.Net Website: https://github.com/khalidsalomao/SimpleHelpers.Net
*/ */
#endregion #endregion
using System; using System;
using System.IO;
using System.Collections.Generic; using System.Collections.Generic;
using System.IO;
using System.Linq; using System.Linq;
using System.Text; using System.Text;
using Ude;
namespace QuickLook.Plugin.TextViewer.SimpleHelpers namespace QuickLook.Plugin.TextViewer
{ {
public class FileEncoding public class FileEncoding
{ {
const int DEFAULT_BUFFER_SIZE = 128 * 1024; private const int DefaultBufferSize = 128 * 1024;
private readonly Dictionary<string, int> _encodingFrequency =
new Dictionary<string, int>(StringComparer.Ordinal);
private readonly CharsetDetector _ude = new CharsetDetector();
private bool _started;
/// <summary>
/// If the detection has reached a decision.
/// </summary>
/// <value>The done.</value>
public bool Done { get; set; }
/// <summary>
/// Detected encoding name.
/// </summary>
public string EncodingName { get; set; }
/// <summary>
/// If the data contains textual data.
/// </summary>
public bool IsText { get; set; }
/// <summary>
/// If the file or data has any mark indicating encoding information (byte order mark).
/// </summary>
public bool HasByteOrderMark { get; set; }
/// <summary> /// <summary>
/// Tries to detect the file encoding. /// Tries to detect the file encoding.
@@ -50,7 +81,9 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
/// <returns></returns> /// <returns></returns>
public static Encoding DetectFileEncoding(string inputFilename, Encoding defaultIfNotDetected = null) public static Encoding DetectFileEncoding(string inputFilename, Encoding defaultIfNotDetected = null)
{ {
using (var stream = new System.IO.FileStream (inputFilename, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.ReadWrite | System.IO.FileShare.Delete, DEFAULT_BUFFER_SIZE)) using (var stream = new FileStream(inputFilename, FileMode.Open,
FileAccess.Read, FileShare.ReadWrite | FileShare.Delete,
DefaultBufferSize))
{ {
return DetectFileEncoding(stream) ?? defaultIfNotDetected; return DetectFileEncoding(stream) ?? defaultIfNotDetected;
} }
@@ -77,7 +110,8 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
/// <param name="count">The count.</param> /// <param name="count">The count.</param>
/// <param name="defaultIfNotDetected">The default encoding if none was detected.</param> /// <param name="defaultIfNotDetected">The default encoding if none was detected.</param>
/// <returns></returns> /// <returns></returns>
public static Encoding DetectFileEncoding (byte[] inputData, int start, int count, Encoding defaultIfNotDetected = null) public static Encoding DetectFileEncoding(byte[] inputData, int start, int count,
Encoding defaultIfNotDetected = null)
{ {
var det = new FileEncoding(); var det = new FileEncoding();
det.Detect(inputData, start, count); det.Detect(inputData, start, count);
@@ -94,15 +128,18 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
{ {
try try
{ {
if (System.IO.File.Exists (filename)) if (File.Exists(filename))
{ {
// enable file encoding detection // enable file encoding detection
var encoding = SimpleHelpers.FileEncoding.DetectFileEncoding (filename); var encoding = DetectFileEncoding(filename);
// Load data based on parameters // Load data based on parameters
return System.IO.File.ReadAllText (filename, encoding); return File.ReadAllText(filename, encoding);
} }
} }
catch { } catch
{
// ignored
}
return defaultValue; return defaultValue;
} }
@@ -127,18 +164,15 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
return true; return true;
if (CheckForByteOrderMark(rawData, start)) if (CheckForByteOrderMark(rawData, start))
{
return true; return true;
}
// http://stackoverflow.com/questions/910873/how-can-i-determine-if-a-file-is-binary-or-text-in-c // http://stackoverflow.com/questions/910873/how-can-i-determine-if-a-file-is-binary-or-text-in-c
// http://www.gnu.org/software/diffutils/manual/html_node/Binary.html // http://www.gnu.org/software/diffutils/manual/html_node/Binary.html
// count the number od null bytes sequences // count the number od null bytes sequences
// considering only sequeces of 2 0s: "\0\0" or control characters below 10 // considering only sequeces of 2 0s: "\0\0" or control characters below 10
int nullSequences = 0; var nullSequences = 0;
int controlSequences = 0; var controlSequences = 0;
for (var i = start + 1; i < count; i++) for (var i = start + 1; i < count; i++)
{
if (rawData[i - 1] == 0 && rawData[i] == 0) if (rawData[i - 1] == 0 && rawData[i] == 0)
{ {
if (++nullSequences > 1) if (++nullSequences > 1)
@@ -148,10 +182,9 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
{ {
++controlSequences; ++controlSequences;
} }
}
// is text if there is no null byte sequences or less than 10% of the buffer has control caracteres // is text if there is no null byte sequences or less than 10% of the buffer has control caracteres
return nullSequences == 0 && (controlSequences <= (rawData.Length / 10)); return nullSequences == 0 && controlSequences <= rawData.Length / 10;
} }
/// <summary> /// <summary>
@@ -167,55 +200,17 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
// Detect encoding correctly (from Rick Strahl's blog) // Detect encoding correctly (from Rick Strahl's blog)
// http://www.west-wind.com/weblog/posts/2007/Nov/28/Detecting-Text-Encoding-for-StreamReader // http://www.west-wind.com/weblog/posts/2007/Nov/28/Detecting-Text-Encoding-for-StreamReader
if (rawData[start] == 0xef && rawData[start + 1] == 0xbb && rawData[start + 2] == 0xbf) if (rawData[start] == 0xef && rawData[start + 1] == 0xbb && rawData[start + 2] == 0xbf)
{
// Encoding.UTF8;
return true; return true;
} if (rawData[start] == 0xfe && rawData[start + 1] == 0xff)
else if (rawData[start] == 0xfe && rawData[start + 1] == 0xff)
{
// Encoding.Unicode;
return true; return true;
} if (rawData[start] == 0 && rawData[start + 1] == 0 && rawData[start + 2] == 0xfe &&
else if (rawData[start] == 0 && rawData[start + 1] == 0 && rawData[start + 2] == 0xfe && rawData[start + 3] == 0xff) rawData[start + 3] == 0xff)
{
// Encoding.UTF32;
return true; return true;
} if (rawData[start] == 0x2b && rawData[start + 1] == 0x2f && rawData[start + 2] == 0x76)
else if (rawData[start] == 0x2b && rawData[start + 1] == 0x2f && rawData[start + 2] == 0x76)
{
// Encoding.UTF7;
return true; return true;
}
return false; return false;
} }
Ude.CharsetDetector ude = new Ude.CharsetDetector ();
bool _started = false;
/// <summary>
/// If the detection has reached a decision.
/// </summary>
/// <value>The done.</value>
public bool Done { get; set; }
/// <summary>
/// Detected encoding name.
/// </summary>
public string EncodingName { get; set; }
/// <summary>
/// If the data contains textual data.
/// </summary>
public bool IsText { get; set; }
/// <summary>
/// If the file or data has any mark indicating encoding information (byte order mark).
/// </summary>
public bool HasByteOrderMark { get; set; }
Dictionary<string, int> encodingFrequency = new Dictionary<string, int> (StringComparer.Ordinal);
/// <summary> /// <summary>
/// Resets this instance. /// Resets this instance.
/// </summary> /// </summary>
@@ -224,44 +219,46 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
_started = false; _started = false;
Done = false; Done = false;
HasByteOrderMark = false; HasByteOrderMark = false;
encodingFrequency.Clear (); _encodingFrequency.Clear();
ude.Reset (); _ude.Reset();
EncodingName = null; EncodingName = null;
} }
/// <summary> /// <summary>
/// Detects the encoding of textual data of the specified input data.<para/> /// Detects the encoding of textual data of the specified input data.
/// Only the stream first 20Mb will be analysed. /// <para />
/// Only the stream first 1Mb will be analysed.
/// </summary> /// </summary>
/// <param name="inputData">The input data.</param> /// <param name="inputData">The input data.</param>
/// <returns>Detected encoding name</returns> /// <returns>Detected encoding name</returns>
public string Detect(Stream inputData) public string Detect(Stream inputData)
{ {
return Detect (inputData, 20 * 1024 * 1024); return Detect(inputData, 1 * 1024 * 1024);
} }
/// <summary> /// <summary>
/// Detects the encoding of textual data of the specified input data. /// Detects the encoding of textual data of the specified input data.
/// </summary> /// </summary>
/// <param name="inputData">The input data.</param> /// <param name="inputData">The input data.</param>
/// <param name="maxSize">Size in byte of analysed data, if you want to analysed only a sample. Use 0 to read all stream data.</param> /// <param name="maxSize">
/// Size in byte of analysed data, if you want to analysed only a sample. Use 0 to read all stream
/// data.
/// </param>
/// <param name="bufferSize">Size of the buffer for the stream read.</param> /// <param name="bufferSize">Size of the buffer for the stream read.</param>
/// <returns>Detected encoding name</returns> /// <returns>Detected encoding name</returns>
/// <exception cref="ArgumentOutOfRangeException">bufferSize parameter cannot be 0 or less.</exception> /// <exception cref="ArgumentOutOfRangeException">bufferSize parameter cannot be 0 or less.</exception>
public string Detect(Stream inputData, int maxSize, int bufferSize = 16 * 1024) public string Detect(Stream inputData, int maxSize, int bufferSize = 16 * 1024)
{ {
if (bufferSize <= 0) if (bufferSize <= 0)
throw new ArgumentOutOfRangeException ("bufferSize", "Buffer size cannot be 0 or less."); throw new ArgumentOutOfRangeException(nameof(bufferSize), @"Buffer size cannot be 0 or less.");
int maxIterations = maxSize <= 0 ? Int32.MaxValue : maxSize / bufferSize; var maxIterations = maxSize <= 0 ? int.MaxValue : maxSize / bufferSize;
int i = 0; var i = 0;
byte[] buffer = new byte[bufferSize]; var buffer = new byte[bufferSize];
while (i++ < maxIterations) while (i++ < maxIterations)
{ {
int sz = inputData.Read (buffer, 0, (int)buffer.Length); var sz = inputData.Read(buffer, 0, buffer.Length);
if (sz <= 0) if (sz <= 0)
{
break; break;
}
Detect(buffer, 0, sz); Detect(buffer, 0, sz);
if (Done) if (Done)
break; break;
@@ -296,19 +293,19 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
} }
// execute charset detector // execute charset detector
ude.Feed (inputData, start, count); _ude.Feed(inputData, start, count);
ude.DataEnd (); _ude.DataEnd();
if (ude.IsDone () && !String.IsNullOrEmpty (ude.Charset)) if (_ude.IsDone() && !string.IsNullOrEmpty(_ude.Charset))
{ {
IncrementFrequency (ude.Charset); IncrementFrequency(_ude.Charset);
Done = true; Done = true;
return EncodingName; return EncodingName;
} }
// singular buffer detection // singular buffer detection
var singleUde = new Ude.CharsetDetector (); var singleUde = new CharsetDetector();
const int udeFeedSize = 4 * 1024; const int udeFeedSize = 4 * 1024;
int step = (count - start) < udeFeedSize ? (count - start) : udeFeedSize; var step = count - start < udeFeedSize ? count - start : udeFeedSize;
for (var pos = start; pos < count; pos += step) for (var pos = start; pos < count; pos += step)
{ {
singleUde.Reset(); singleUde.Reset();
@@ -318,7 +315,7 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
singleUde.Feed(inputData, pos, step); singleUde.Feed(inputData, pos, step);
singleUde.DataEnd(); singleUde.DataEnd();
// update encoding frequency // update encoding frequency
if (singleUde.Confidence > 0.3 && !String.IsNullOrEmpty (singleUde.Charset)) if (singleUde.Confidence > 0.3 && !string.IsNullOrEmpty(singleUde.Charset))
IncrementFrequency(singleUde.Charset); IncrementFrequency(singleUde.Charset);
} }
// vote for best encoding // vote for best encoding
@@ -334,15 +331,13 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
public Encoding Complete() public Encoding Complete()
{ {
Done = true; Done = true;
ude.DataEnd (); _ude.DataEnd();
if (ude.IsDone () && !String.IsNullOrEmpty (ude.Charset)) if (_ude.IsDone() && !string.IsNullOrEmpty(_ude.Charset))
{ EncodingName = _ude.Charset;
EncodingName = ude.Charset;
}
// vote for best encoding // vote for best encoding
EncodingName = GetCurrentEncoding(); EncodingName = GetCurrentEncoding();
// check result // check result
if (!String.IsNullOrEmpty (EncodingName)) if (!string.IsNullOrEmpty(EncodingName))
return Encoding.GetEncoding(EncodingName); return Encoding.GetEncoding(EncodingName);
return null; return null;
} }
@@ -350,17 +345,17 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
private void IncrementFrequency(string charset) private void IncrementFrequency(string charset)
{ {
int currentCount; int currentCount;
encodingFrequency.TryGetValue (charset, out currentCount); _encodingFrequency.TryGetValue(charset, out currentCount);
encodingFrequency[charset] = ++currentCount; _encodingFrequency[charset] = ++currentCount;
} }
private string GetCurrentEncoding() private string GetCurrentEncoding()
{ {
if (encodingFrequency.Count == 0) if (_encodingFrequency.Count == 0)
return null; return null;
// ASCII should be the last option, since other encodings often has ASCII included... // ASCII should be the last option, since other encodings often has ASCII included...
return encodingFrequency return _encodingFrequency
.OrderByDescending (i => i.Value * (i.Key != ("ASCII") ? 1 : 0)) .OrderByDescending(i => i.Value * (i.Key != "ASCII" ? 1 : 0))
.FirstOrDefault().Key; .FirstOrDefault().Key;
} }
} }

View File

@@ -58,8 +58,8 @@
<Reference Include="System" /> <Reference Include="System" />
<Reference Include="System.Core" /> <Reference Include="System.Core" />
<Reference Include="System.Xaml" /> <Reference Include="System.Xaml" />
<Reference Include="Ude, Version=0.1.0.0, Culture=neutral, processorArchitecture=MSIL"> <Reference Include="Ude, Version=0.1.1.0, Culture=neutral, PublicKeyToken=dd537652db4726a9, processorArchitecture=MSIL">
<HintPath>..\..\packages\UDE.CSharp.1.1.0\lib\Ude.dll</HintPath> <HintPath>..\..\packages\Ude.Signed.0.1.1\lib\net40\Ude.dll</HintPath>
</Reference> </Reference>
<Reference Include="WindowsBase" /> <Reference Include="WindowsBase" />
</ItemGroup> </ItemGroup>
@@ -69,7 +69,7 @@
</Compile> </Compile>
<Compile Include="Plugin.cs" /> <Compile Include="Plugin.cs" />
<Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="SimpleHelpers\FileEncoding.cs" /> <Compile Include="FileEncoding.cs" />
<Compile Include="TextViewerPanel.xaml.cs"> <Compile Include="TextViewerPanel.xaml.cs">
<DependentUpon>TextViewerPanel.xaml</DependentUpon> <DependentUpon>TextViewerPanel.xaml</DependentUpon>
</Compile> </Compile>

View File

@@ -20,7 +20,6 @@ using System.Text;
using System.Windows.Controls; using System.Windows.Controls;
using System.Windows.Media; using System.Windows.Media;
using ICSharpCode.AvalonEdit.Highlighting; using ICSharpCode.AvalonEdit.Highlighting;
using QuickLook.Plugin.TextViewer.SimpleHelpers;
namespace QuickLook.Plugin.TextViewer namespace QuickLook.Plugin.TextViewer
{ {
@@ -43,18 +42,11 @@ namespace QuickLook.Plugin.TextViewer
{ {
using (var s = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read)) using (var s = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read))
{ {
viewer.Encoding = DetectEncoding(s); viewer.Encoding = FileEncoding.DetectFileEncoding(s, Encoding.Default);
viewer.Load(path);
} }
viewer.Load(path);
viewer.SyntaxHighlighting = HighlightingManager.Instance.GetDefinitionByExtension(Path.GetExtension(path)); viewer.SyntaxHighlighting = HighlightingManager.Instance.GetDefinitionByExtension(Path.GetExtension(path));
} }
private static Encoding DetectEncoding(Stream s)
{
var det = new FileEncoding();
det.Detect(s, 1 * 1024 * 1024);
return det.Complete() ?? Encoding.Default;
}
} }
} }

View File

@@ -1,7 +1,5 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<packages> <packages>
<package id="AvalonEdit" version="5.0.3" targetFramework="net452" /> <package id="AvalonEdit" version="5.0.3" targetFramework="net452" />
<package id="SimpleHelpers.FileEncoding" version="1.4.0" targetFramework="net462" /> <package id="Ude.Signed" version="0.1.1" targetFramework="net462" />
<package id="UDE.CSharp" version="1.1.0" targetFramework="net462" />
</packages> </packages>