mirror of
https://github.com/QL-Win/QuickLook.git
synced 2025-09-11 17:59:17 +00:00
Use signed Ude lib
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
#region * License *
|
||||
#region * License *
|
||||
|
||||
/*
|
||||
SimpleHelpers - FileEncoding
|
||||
|
||||
@@ -28,19 +29,49 @@
|
||||
License: http://www.opensource.org/licenses/mit-license.php
|
||||
Website: https://github.com/khalidsalomao/SimpleHelpers.Net
|
||||
*/
|
||||
|
||||
#endregion
|
||||
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using Ude;
|
||||
|
||||
namespace QuickLook.Plugin.TextViewer.SimpleHelpers
|
||||
namespace QuickLook.Plugin.TextViewer
|
||||
{
|
||||
public class FileEncoding
|
||||
{
|
||||
const int DEFAULT_BUFFER_SIZE = 128 * 1024;
|
||||
private const int DefaultBufferSize = 128 * 1024;
|
||||
|
||||
private readonly Dictionary<string, int> _encodingFrequency =
|
||||
new Dictionary<string, int>(StringComparer.Ordinal);
|
||||
|
||||
private readonly CharsetDetector _ude = new CharsetDetector();
|
||||
private bool _started;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// If the detection has reached a decision.
|
||||
/// </summary>
|
||||
/// <value>The done.</value>
|
||||
public bool Done { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Detected encoding name.
|
||||
/// </summary>
|
||||
public string EncodingName { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// If the data contains textual data.
|
||||
/// </summary>
|
||||
public bool IsText { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// If the file or data has any mark indicating encoding information (byte order mark).
|
||||
/// </summary>
|
||||
public bool HasByteOrderMark { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Tries to detect the file encoding.
|
||||
@@ -50,7 +81,9 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
|
||||
/// <returns></returns>
|
||||
public static Encoding DetectFileEncoding(string inputFilename, Encoding defaultIfNotDetected = null)
|
||||
{
|
||||
using (var stream = new System.IO.FileStream (inputFilename, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.ReadWrite | System.IO.FileShare.Delete, DEFAULT_BUFFER_SIZE))
|
||||
using (var stream = new FileStream(inputFilename, FileMode.Open,
|
||||
FileAccess.Read, FileShare.ReadWrite | FileShare.Delete,
|
||||
DefaultBufferSize))
|
||||
{
|
||||
return DetectFileEncoding(stream) ?? defaultIfNotDetected;
|
||||
}
|
||||
@@ -77,7 +110,8 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
|
||||
/// <param name="count">The count.</param>
|
||||
/// <param name="defaultIfNotDetected">The default encoding if none was detected.</param>
|
||||
/// <returns></returns>
|
||||
public static Encoding DetectFileEncoding (byte[] inputData, int start, int count, Encoding defaultIfNotDetected = null)
|
||||
public static Encoding DetectFileEncoding(byte[] inputData, int start, int count,
|
||||
Encoding defaultIfNotDetected = null)
|
||||
{
|
||||
var det = new FileEncoding();
|
||||
det.Detect(inputData, start, count);
|
||||
@@ -94,15 +128,18 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
|
||||
{
|
||||
try
|
||||
{
|
||||
if (System.IO.File.Exists (filename))
|
||||
if (File.Exists(filename))
|
||||
{
|
||||
// enable file encoding detection
|
||||
var encoding = SimpleHelpers.FileEncoding.DetectFileEncoding (filename);
|
||||
var encoding = DetectFileEncoding(filename);
|
||||
// Load data based on parameters
|
||||
return System.IO.File.ReadAllText (filename, encoding);
|
||||
return File.ReadAllText(filename, encoding);
|
||||
}
|
||||
}
|
||||
catch { }
|
||||
catch
|
||||
{
|
||||
// ignored
|
||||
}
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
@@ -127,18 +164,15 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
|
||||
return true;
|
||||
|
||||
if (CheckForByteOrderMark(rawData, start))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// http://stackoverflow.com/questions/910873/how-can-i-determine-if-a-file-is-binary-or-text-in-c
|
||||
// http://www.gnu.org/software/diffutils/manual/html_node/Binary.html
|
||||
// count the number od null bytes sequences
|
||||
// considering only sequeces of 2 0s: "\0\0" or control characters below 10
|
||||
int nullSequences = 0;
|
||||
int controlSequences = 0;
|
||||
var nullSequences = 0;
|
||||
var controlSequences = 0;
|
||||
for (var i = start + 1; i < count; i++)
|
||||
{
|
||||
if (rawData[i - 1] == 0 && rawData[i] == 0)
|
||||
{
|
||||
if (++nullSequences > 1)
|
||||
@@ -148,10 +182,9 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
|
||||
{
|
||||
++controlSequences;
|
||||
}
|
||||
}
|
||||
|
||||
// is text if there is no null byte sequences or less than 10% of the buffer has control caracteres
|
||||
return nullSequences == 0 && (controlSequences <= (rawData.Length / 10));
|
||||
return nullSequences == 0 && controlSequences <= rawData.Length / 10;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -167,55 +200,17 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
|
||||
// Detect encoding correctly (from Rick Strahl's blog)
|
||||
// http://www.west-wind.com/weblog/posts/2007/Nov/28/Detecting-Text-Encoding-for-StreamReader
|
||||
if (rawData[start] == 0xef && rawData[start + 1] == 0xbb && rawData[start + 2] == 0xbf)
|
||||
{
|
||||
// Encoding.UTF8;
|
||||
return true;
|
||||
}
|
||||
else if (rawData[start] == 0xfe && rawData[start + 1] == 0xff)
|
||||
{
|
||||
// Encoding.Unicode;
|
||||
if (rawData[start] == 0xfe && rawData[start + 1] == 0xff)
|
||||
return true;
|
||||
}
|
||||
else if (rawData[start] == 0 && rawData[start + 1] == 0 && rawData[start + 2] == 0xfe && rawData[start + 3] == 0xff)
|
||||
{
|
||||
// Encoding.UTF32;
|
||||
if (rawData[start] == 0 && rawData[start + 1] == 0 && rawData[start + 2] == 0xfe &&
|
||||
rawData[start + 3] == 0xff)
|
||||
return true;
|
||||
}
|
||||
else if (rawData[start] == 0x2b && rawData[start + 1] == 0x2f && rawData[start + 2] == 0x76)
|
||||
{
|
||||
// Encoding.UTF7;
|
||||
if (rawData[start] == 0x2b && rawData[start + 1] == 0x2f && rawData[start + 2] == 0x76)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Ude.CharsetDetector ude = new Ude.CharsetDetector ();
|
||||
bool _started = false;
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// If the detection has reached a decision.
|
||||
/// </summary>
|
||||
/// <value>The done.</value>
|
||||
public bool Done { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Detected encoding name.
|
||||
/// </summary>
|
||||
public string EncodingName { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// If the data contains textual data.
|
||||
/// </summary>
|
||||
public bool IsText { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// If the file or data has any mark indicating encoding information (byte order mark).
|
||||
/// </summary>
|
||||
public bool HasByteOrderMark { get; set; }
|
||||
|
||||
Dictionary<string, int> encodingFrequency = new Dictionary<string, int> (StringComparer.Ordinal);
|
||||
|
||||
/// <summary>
|
||||
/// Resets this instance.
|
||||
/// </summary>
|
||||
@@ -224,44 +219,46 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
|
||||
_started = false;
|
||||
Done = false;
|
||||
HasByteOrderMark = false;
|
||||
encodingFrequency.Clear ();
|
||||
ude.Reset ();
|
||||
_encodingFrequency.Clear();
|
||||
_ude.Reset();
|
||||
EncodingName = null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Detects the encoding of textual data of the specified input data.<para/>
|
||||
/// Only the stream first 20Mb will be analysed.
|
||||
/// Detects the encoding of textual data of the specified input data.
|
||||
/// <para />
|
||||
/// Only the stream first 1Mb will be analysed.
|
||||
/// </summary>
|
||||
/// <param name="inputData">The input data.</param>
|
||||
/// <returns>Detected encoding name</returns>
|
||||
public string Detect(Stream inputData)
|
||||
{
|
||||
return Detect (inputData, 20 * 1024 * 1024);
|
||||
return Detect(inputData, 1 * 1024 * 1024);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Detects the encoding of textual data of the specified input data.
|
||||
/// </summary>
|
||||
/// <param name="inputData">The input data.</param>
|
||||
/// <param name="maxSize">Size in byte of analysed data, if you want to analysed only a sample. Use 0 to read all stream data.</param>
|
||||
/// <param name="maxSize">
|
||||
/// Size in byte of analysed data, if you want to analysed only a sample. Use 0 to read all stream
|
||||
/// data.
|
||||
/// </param>
|
||||
/// <param name="bufferSize">Size of the buffer for the stream read.</param>
|
||||
/// <returns>Detected encoding name</returns>
|
||||
/// <exception cref="ArgumentOutOfRangeException">bufferSize parameter cannot be 0 or less.</exception>
|
||||
public string Detect(Stream inputData, int maxSize, int bufferSize = 16 * 1024)
|
||||
{
|
||||
if (bufferSize <= 0)
|
||||
throw new ArgumentOutOfRangeException ("bufferSize", "Buffer size cannot be 0 or less.");
|
||||
int maxIterations = maxSize <= 0 ? Int32.MaxValue : maxSize / bufferSize;
|
||||
int i = 0;
|
||||
byte[] buffer = new byte[bufferSize];
|
||||
throw new ArgumentOutOfRangeException(nameof(bufferSize), @"Buffer size cannot be 0 or less.");
|
||||
var maxIterations = maxSize <= 0 ? int.MaxValue : maxSize / bufferSize;
|
||||
var i = 0;
|
||||
var buffer = new byte[bufferSize];
|
||||
while (i++ < maxIterations)
|
||||
{
|
||||
int sz = inputData.Read (buffer, 0, (int)buffer.Length);
|
||||
var sz = inputData.Read(buffer, 0, buffer.Length);
|
||||
if (sz <= 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
Detect(buffer, 0, sz);
|
||||
if (Done)
|
||||
break;
|
||||
@@ -296,19 +293,19 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
|
||||
}
|
||||
|
||||
// execute charset detector
|
||||
ude.Feed (inputData, start, count);
|
||||
ude.DataEnd ();
|
||||
if (ude.IsDone () && !String.IsNullOrEmpty (ude.Charset))
|
||||
_ude.Feed(inputData, start, count);
|
||||
_ude.DataEnd();
|
||||
if (_ude.IsDone() && !string.IsNullOrEmpty(_ude.Charset))
|
||||
{
|
||||
IncrementFrequency (ude.Charset);
|
||||
IncrementFrequency(_ude.Charset);
|
||||
Done = true;
|
||||
return EncodingName;
|
||||
}
|
||||
|
||||
// singular buffer detection
|
||||
var singleUde = new Ude.CharsetDetector ();
|
||||
var singleUde = new CharsetDetector();
|
||||
const int udeFeedSize = 4 * 1024;
|
||||
int step = (count - start) < udeFeedSize ? (count - start) : udeFeedSize;
|
||||
var step = count - start < udeFeedSize ? count - start : udeFeedSize;
|
||||
for (var pos = start; pos < count; pos += step)
|
||||
{
|
||||
singleUde.Reset();
|
||||
@@ -318,7 +315,7 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
|
||||
singleUde.Feed(inputData, pos, step);
|
||||
singleUde.DataEnd();
|
||||
// update encoding frequency
|
||||
if (singleUde.Confidence > 0.3 && !String.IsNullOrEmpty (singleUde.Charset))
|
||||
if (singleUde.Confidence > 0.3 && !string.IsNullOrEmpty(singleUde.Charset))
|
||||
IncrementFrequency(singleUde.Charset);
|
||||
}
|
||||
// vote for best encoding
|
||||
@@ -334,15 +331,13 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
|
||||
public Encoding Complete()
|
||||
{
|
||||
Done = true;
|
||||
ude.DataEnd ();
|
||||
if (ude.IsDone () && !String.IsNullOrEmpty (ude.Charset))
|
||||
{
|
||||
EncodingName = ude.Charset;
|
||||
}
|
||||
_ude.DataEnd();
|
||||
if (_ude.IsDone() && !string.IsNullOrEmpty(_ude.Charset))
|
||||
EncodingName = _ude.Charset;
|
||||
// vote for best encoding
|
||||
EncodingName = GetCurrentEncoding();
|
||||
// check result
|
||||
if (!String.IsNullOrEmpty (EncodingName))
|
||||
if (!string.IsNullOrEmpty(EncodingName))
|
||||
return Encoding.GetEncoding(EncodingName);
|
||||
return null;
|
||||
}
|
||||
@@ -350,17 +345,17 @@ namespace QuickLook.Plugin.TextViewer.SimpleHelpers
|
||||
private void IncrementFrequency(string charset)
|
||||
{
|
||||
int currentCount;
|
||||
encodingFrequency.TryGetValue (charset, out currentCount);
|
||||
encodingFrequency[charset] = ++currentCount;
|
||||
_encodingFrequency.TryGetValue(charset, out currentCount);
|
||||
_encodingFrequency[charset] = ++currentCount;
|
||||
}
|
||||
|
||||
private string GetCurrentEncoding()
|
||||
{
|
||||
if (encodingFrequency.Count == 0)
|
||||
if (_encodingFrequency.Count == 0)
|
||||
return null;
|
||||
// ASCII should be the last option, since other encodings often has ASCII included...
|
||||
return encodingFrequency
|
||||
.OrderByDescending (i => i.Value * (i.Key != ("ASCII") ? 1 : 0))
|
||||
return _encodingFrequency
|
||||
.OrderByDescending(i => i.Value * (i.Key != "ASCII" ? 1 : 0))
|
||||
.FirstOrDefault().Key;
|
||||
}
|
||||
}
|
@@ -58,8 +58,8 @@
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Xaml" />
|
||||
<Reference Include="Ude, Version=0.1.0.0, Culture=neutral, processorArchitecture=MSIL">
|
||||
<HintPath>..\..\packages\UDE.CSharp.1.1.0\lib\Ude.dll</HintPath>
|
||||
<Reference Include="Ude, Version=0.1.1.0, Culture=neutral, PublicKeyToken=dd537652db4726a9, processorArchitecture=MSIL">
|
||||
<HintPath>..\..\packages\Ude.Signed.0.1.1\lib\net40\Ude.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="WindowsBase" />
|
||||
</ItemGroup>
|
||||
@@ -69,7 +69,7 @@
|
||||
</Compile>
|
||||
<Compile Include="Plugin.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs" />
|
||||
<Compile Include="SimpleHelpers\FileEncoding.cs" />
|
||||
<Compile Include="FileEncoding.cs" />
|
||||
<Compile Include="TextViewerPanel.xaml.cs">
|
||||
<DependentUpon>TextViewerPanel.xaml</DependentUpon>
|
||||
</Compile>
|
||||
|
@@ -20,7 +20,6 @@ using System.Text;
|
||||
using System.Windows.Controls;
|
||||
using System.Windows.Media;
|
||||
using ICSharpCode.AvalonEdit.Highlighting;
|
||||
using QuickLook.Plugin.TextViewer.SimpleHelpers;
|
||||
|
||||
namespace QuickLook.Plugin.TextViewer
|
||||
{
|
||||
@@ -43,18 +42,11 @@ namespace QuickLook.Plugin.TextViewer
|
||||
{
|
||||
using (var s = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read))
|
||||
{
|
||||
viewer.Encoding = DetectEncoding(s);
|
||||
viewer.Load(path);
|
||||
viewer.Encoding = FileEncoding.DetectFileEncoding(s, Encoding.Default);
|
||||
}
|
||||
|
||||
viewer.Load(path);
|
||||
viewer.SyntaxHighlighting = HighlightingManager.Instance.GetDefinitionByExtension(Path.GetExtension(path));
|
||||
}
|
||||
|
||||
private static Encoding DetectEncoding(Stream s)
|
||||
{
|
||||
var det = new FileEncoding();
|
||||
det.Detect(s, 1 * 1024 * 1024);
|
||||
return det.Complete() ?? Encoding.Default;
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,7 +1,5 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<packages>
|
||||
<package id="AvalonEdit" version="5.0.3" targetFramework="net452" />
|
||||
<package id="SimpleHelpers.FileEncoding" version="1.4.0" targetFramework="net462" />
|
||||
<package id="UDE.CSharp" version="1.1.0" targetFramework="net462" />
|
||||
<package id="Ude.Signed" version="0.1.1" targetFramework="net462" />
|
||||
</packages>
|
Reference in New Issue
Block a user