mirror of
https://github.com/QL-Win/QuickLook.git
synced 2025-09-14 20:29:07 +00:00
418 lines
18 KiB
C#
418 lines
18 KiB
C#
//Apache2, 2017-present, WinterDev, Sam Hocevar
|
||
//Apache2, 2014-2016, Samuel Carlsson, WinterDev
|
||
|
||
using System;
|
||
using System.Collections.Generic;
|
||
using System.IO;
|
||
|
||
namespace Typography.OpenFont.Tables
|
||
{
|
||
|
||
static class CharacterMapExtension
|
||
{
|
||
public static void CollectUnicodeChars(this CharacterMap cmap, List<uint> unicodes, List<ushort> glyphIndexList)
|
||
{
|
||
//temp fixed
|
||
int count1 = unicodes.Count;
|
||
cmap.CollectUnicodeChars(unicodes);
|
||
int count2 = unicodes.Count;
|
||
for (int i = count1; i < count2; ++i)
|
||
{
|
||
glyphIndexList.Add(cmap.GetGlyphIndex((int)unicodes[i]));
|
||
}
|
||
}
|
||
}
|
||
|
||
class CharMapFormat4 : CharacterMap
|
||
{
|
||
public override ushort Format => 4;
|
||
|
||
internal readonly ushort[] _startCode; //Starting character code for each segment
|
||
internal readonly ushort[] _endCode;//Ending character code for each segment, last = 0xFFFF.
|
||
internal readonly ushort[] _idDelta; //Delta for all character codes in segment
|
||
internal readonly ushort[] _idRangeOffset; //Offset in bytes to glyph indexArray, or 0 (not offset in bytes unit)
|
||
internal readonly ushort[] _glyphIdArray;
|
||
public CharMapFormat4(ushort[] startCode, ushort[] endCode, ushort[] idDelta, ushort[] idRangeOffset, ushort[] glyphIdArray)
|
||
{
|
||
_startCode = startCode;
|
||
_endCode = endCode;
|
||
_idDelta = idDelta;
|
||
_idRangeOffset = idRangeOffset;
|
||
_glyphIdArray = glyphIdArray;
|
||
}
|
||
|
||
public override ushort GetGlyphIndex(int codepoint)
|
||
{
|
||
// This lookup table only supports 16-bit codepoints
|
||
if (codepoint > ushort.MaxValue)
|
||
{
|
||
return 0;
|
||
}
|
||
|
||
// https://www.microsoft.com/typography/otspec/cmap.htm#format4
|
||
// "You search for the first endCode that is greater than or equal to the character code you want to map"
|
||
// "The segments are sorted in order of increasing endCode values"
|
||
// -> binary search is valid here
|
||
int i = Array.BinarySearch(_endCode, (ushort)codepoint);
|
||
i = i < 0 ? ~i : i;
|
||
|
||
// https://www.microsoft.com/typography/otspec/cmap.htm#format4
|
||
// "If the corresponding startCode is [not] less than or equal to the character code,
|
||
// then [...] the missingGlyph is returned"
|
||
// Index i should never be out of range, because the list ends with a
|
||
// 0xFFFF value. However, we also use this charmap for format 0, which
|
||
// does not have that final endcode, so there is a chance to overflow.
|
||
if (i >= _endCode.Length || _startCode[i] > codepoint)
|
||
{
|
||
return 0;
|
||
}
|
||
|
||
if (_idRangeOffset[i] == 0)
|
||
{
|
||
//TODO: review 65536 => use bitflags
|
||
return (ushort)((codepoint + _idDelta[i]) % 65536);
|
||
}
|
||
else
|
||
{
|
||
//If the idRangeOffset value for the segment is not 0,
|
||
//the mapping of character codes relies on glyphIdArray.
|
||
//The character code offset from startCode is added to the idRangeOffset value.
|
||
//This sum is used as an offset from the current location within idRangeOffset itself to index out the correct glyphIdArray value.
|
||
//This obscure indexing trick works because glyphIdArray immediately follows idRangeOffset in the font file.
|
||
//The C expression that yields the glyph index is:
|
||
|
||
//*(idRangeOffset[i]/2
|
||
//+ (c - startCount[i])
|
||
//+ &idRangeOffset[i])
|
||
|
||
int offset = _idRangeOffset[i] / 2 + (codepoint - _startCode[i]);
|
||
// I want to thank Microsoft for this clever pointer trick
|
||
// TODO: What if the value fetched is inside the _idRangeOffset table?
|
||
// TODO: e.g. (offset - _idRangeOffset.Length + i < 0)
|
||
return _glyphIdArray[offset - _idRangeOffset.Length + i];
|
||
}
|
||
}
|
||
public override void CollectUnicodeChars(List<uint> unicodes)
|
||
{
|
||
for (int i = 0; i < _startCode.Length; ++i)
|
||
{
|
||
uint start = _startCode[i];
|
||
uint stop = _endCode[i];
|
||
for (uint u = start; u <= stop; ++u)
|
||
{
|
||
unicodes.Add(u);
|
||
}
|
||
}
|
||
}
|
||
|
||
}
|
||
|
||
class CharMapFormat12 : CharacterMap
|
||
{
|
||
public override ushort Format => 12;
|
||
|
||
uint[] _startCharCodes, _endCharCodes, _startGlyphIds;
|
||
internal CharMapFormat12(uint[] startCharCodes, uint[] endCharCodes, uint[] startGlyphIds)
|
||
{
|
||
_startCharCodes = startCharCodes;
|
||
_endCharCodes = endCharCodes;
|
||
_startGlyphIds = startGlyphIds;
|
||
}
|
||
|
||
public override ushort GetGlyphIndex(int codepoint)
|
||
{
|
||
// https://www.microsoft.com/typography/otspec/cmap.htm#format12
|
||
// "Groups must be sorted by increasing startCharCode."
|
||
// -> binary search is valid here
|
||
int i = Array.BinarySearch(_startCharCodes, (uint)codepoint);
|
||
i = i < 0 ? ~i - 1 : i;
|
||
|
||
if (i >= 0 && codepoint <= _endCharCodes[i])
|
||
{
|
||
return (ushort)(_startGlyphIds[i] + codepoint - _startCharCodes[i]);
|
||
}
|
||
return 0;
|
||
}
|
||
public override void CollectUnicodeChars(List<uint> unicodes)
|
||
{
|
||
for (int i = 0; i < _startCharCodes.Length; ++i)
|
||
{
|
||
uint start = _startCharCodes[i];
|
||
uint stop = _endCharCodes[i];
|
||
for (uint u = start; u <= stop; ++u)
|
||
{
|
||
unicodes.Add(u);
|
||
}
|
||
}
|
||
}
|
||
|
||
}
|
||
|
||
class CharMapFormat6 : CharacterMap
|
||
{
|
||
public override ushort Format => 6;
|
||
|
||
internal CharMapFormat6(ushort startCode, ushort[] glyphIdArray)
|
||
{
|
||
_glyphIdArray = glyphIdArray;
|
||
_startCode = startCode;
|
||
}
|
||
|
||
public override ushort GetGlyphIndex(int codepoint)
|
||
{
|
||
// The firstCode and entryCount values specify a subrange (beginning at firstCode,
|
||
// length = entryCount) within the range of possible character codes.
|
||
// Codes outside of this subrange are mapped to glyph index 0.
|
||
// The offset of the code (from the first code) within this subrange is used as
|
||
// index to the glyphIdArray, which provides the glyph index value.
|
||
int i = codepoint - _startCode;
|
||
return i >= 0 && i < _glyphIdArray.Length ? _glyphIdArray[i] : (ushort)0;
|
||
}
|
||
|
||
|
||
internal readonly ushort _startCode;
|
||
internal readonly ushort[] _glyphIdArray;
|
||
public override void CollectUnicodeChars(List<uint> unicodes)
|
||
{
|
||
ushort u = _startCode;
|
||
for (uint i = 0; i < _glyphIdArray.Length; ++i)
|
||
{
|
||
unicodes.Add(u + i);
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
//https://www.microsoft.com/typography/otspec/cmap.htm#format14
|
||
// Subtable format 14 specifies the Unicode Variation Sequences(UVSes) supported by the font.
|
||
// A Variation Sequence, according to the Unicode Standard, comprises a base character followed
|
||
// by a variation selector; e.g. <U+82A6, U+E0101>.
|
||
//
|
||
// The subtable partitions the UVSes supported by the font into two categories: “default” and
|
||
// “non-default” UVSes.Given a UVS, if the glyph obtained by looking up the base character of
|
||
// that sequence in the Unicode cmap subtable(i.e.the UCS-4 or the BMP cmap subtable) is the
|
||
// glyph to use for that sequence, then the sequence is a “default” UVS; otherwise it is a
|
||
// “non-default” UVS, and the glyph to use for that sequence is specified in the format 14
|
||
// subtable itself.
|
||
class CharMapFormat14 : CharacterMap
|
||
{
|
||
public override ushort Format => 14;
|
||
public override ushort GetGlyphIndex(int character) => 0;
|
||
public ushort CharacterPairToGlyphIndex(int codepoint, ushort defaultGlyphIndex, int nextCodepoint)
|
||
{
|
||
// Only check codepoint if nextCodepoint is a variation selector
|
||
|
||
if (_variationSelectors.TryGetValue(nextCodepoint, out VariationSelector sel))
|
||
{
|
||
|
||
// If the sequence is a non-default UVS, return the mapped glyph
|
||
|
||
if (sel.UVSMappings.TryGetValue(codepoint, out ushort ret))
|
||
{
|
||
return ret;
|
||
}
|
||
|
||
// If the sequence is a default UVS, return the default glyph
|
||
for (int i = 0; i < sel.DefaultStartCodes.Count; ++i)
|
||
{
|
||
if (codepoint >= sel.DefaultStartCodes[i] && codepoint < sel.DefaultEndCodes[i])
|
||
{
|
||
return defaultGlyphIndex;
|
||
}
|
||
}
|
||
|
||
// At this point we are neither a non-default UVS nor a default UVS,
|
||
// but we know the nextCodepoint is a variation selector. Unicode says
|
||
// this glyph should be invisible: “no visible rendering for the VS”
|
||
// (http://unicode.org/faq/unsup_char.html#4)
|
||
return defaultGlyphIndex;
|
||
}
|
||
|
||
// In all other cases, return 0
|
||
return 0;
|
||
}
|
||
|
||
public override void CollectUnicodeChars(List<uint> unicodes)
|
||
{
|
||
//TODO: review here
|
||
#if DEBUG
|
||
System.Diagnostics.Debug.WriteLine("not implemented");
|
||
#endif
|
||
}
|
||
|
||
|
||
public static CharMapFormat14 Create(BinaryReader reader)
|
||
{
|
||
// 'cmap' Subtable Format 14:
|
||
// Type Name Description
|
||
// uint16 format Subtable format.Set to 14.
|
||
// uint32 length Byte length of this subtable (including this header)
|
||
// uint32 numVarSelectorRecords Number of variation Selector Records
|
||
// VariationSelector varSelector[numVarSelectorRecords] Array of VariationSelector records.
|
||
// ---
|
||
//
|
||
// Each variation selector records specifies a variation selector character, and
|
||
// offsets to “default” and “non-default” tables used to map variation sequences using
|
||
// that variation selector.
|
||
//
|
||
// VariationSelector Record:
|
||
// Type Name Description
|
||
// uint24 varSelector Variation selector
|
||
// Offset32 defaultUVSOffset Offset from the start of the format 14 subtable to
|
||
// Default UVS Table.May be 0.
|
||
// Offset32 nonDefaultUVSOffset Offset from the start of the format 14 subtable to
|
||
// Non-Default UVS Table. May be 0.
|
||
//
|
||
// The Variation Selector Records are sorted in increasing order of ‘varSelector’. No
|
||
// two records may have the same ‘varSelector’.
|
||
// A Variation Selector Record and the data its offsets point to specify those UVSes
|
||
// supported by the font for which the variation selector is the ‘varSelector’ value
|
||
// of the record. The base characters of the UVSes are stored in the tables pointed
|
||
// to by the offsets.The UVSes are partitioned by whether they are default or
|
||
// non-default UVSes.
|
||
// Glyph IDs to be used for non-default UVSes are specified in the Non-Default UVS table.
|
||
|
||
long beginAt = reader.BaseStream.Position - 2; // account for header format entry
|
||
uint length = reader.ReadUInt32(); // Byte length of this subtable (including the header)
|
||
uint numVarSelectorRecords = reader.ReadUInt32();
|
||
|
||
var variationSelectors = new Dictionary<int, VariationSelector>();
|
||
int[] varSelectors = new int[numVarSelectorRecords];
|
||
uint[] defaultUVSOffsets = new uint[numVarSelectorRecords];
|
||
uint[] nonDefaultUVSOffsets = new uint[numVarSelectorRecords];
|
||
for (int i = 0; i < numVarSelectorRecords; ++i)
|
||
{
|
||
varSelectors[i] = Utils.ReadUInt24(reader);
|
||
defaultUVSOffsets[i] = reader.ReadUInt32();
|
||
nonDefaultUVSOffsets[i] = reader.ReadUInt32();
|
||
}
|
||
|
||
|
||
for (int i = 0; i < numVarSelectorRecords; ++i)
|
||
{
|
||
var sel = new VariationSelector();
|
||
|
||
if (defaultUVSOffsets[i] != 0)
|
||
{
|
||
// Default UVS table
|
||
//
|
||
// A Default UVS Table is simply a range-compressed list of Unicode scalar
|
||
// values, representing the base characters of the default UVSes which use
|
||
// the ‘varSelector’ of the associated Variation Selector Record.
|
||
//
|
||
// DefaultUVS Table:
|
||
// Type Name Description
|
||
// uint32 numUnicodeValueRanges Number of Unicode character ranges.
|
||
// UnicodeRange ranges[numUnicodeValueRanges] Array of UnicodeRange records.
|
||
//
|
||
// Each Unicode range record specifies a contiguous range of Unicode values.
|
||
//
|
||
// UnicodeRange Record:
|
||
// Type Name Description
|
||
// uint24 startUnicodeValue First value in this range
|
||
// uint8 additionalCount Number of additional values in this range
|
||
//
|
||
// For example, the range U+4E4D&endash; U+4E4F (3 values) will set
|
||
// ‘startUnicodeValue’ to 0x004E4D and ‘additionalCount’ to 2. A singleton
|
||
// range will set ‘additionalCount’ to 0.
|
||
// (‘startUnicodeValue’ + ‘additionalCount’) must not exceed 0xFFFFFF.
|
||
// The Unicode Value Ranges are sorted in increasing order of
|
||
// ‘startUnicodeValue’. The ranges must not overlap; i.e.,
|
||
// (‘startUnicodeValue’ + ‘additionalCount’) must be less than the
|
||
// ‘startUnicodeValue’ of the following range (if any).
|
||
|
||
reader.BaseStream.Seek(beginAt + defaultUVSOffsets[i], SeekOrigin.Begin);
|
||
uint numUnicodeValueRanges = reader.ReadUInt32();
|
||
for (int n = 0; n < numUnicodeValueRanges; ++n)
|
||
{
|
||
int startCode = (int)Utils.ReadUInt24(reader);
|
||
sel.DefaultStartCodes.Add(startCode);
|
||
sel.DefaultEndCodes.Add(startCode + reader.ReadByte());
|
||
}
|
||
}
|
||
|
||
if (nonDefaultUVSOffsets[i] != 0)
|
||
{
|
||
// Non-Default UVS table
|
||
//
|
||
// A Non-Default UVS Table is a list of pairs of Unicode scalar values and
|
||
// glyph IDs.The Unicode values represent the base characters of all
|
||
// non -default UVSes which use the ‘varSelector’ of the associated Variation
|
||
// Selector Record, and the glyph IDs specify the glyph IDs to use for the
|
||
// UVSes.
|
||
//
|
||
// NonDefaultUVS Table:
|
||
// Type Name Description
|
||
// uint32 numUVSMappings Number of UVS Mappings that follow
|
||
// UVSMapping uvsMappings[numUVSMappings] Array of UVSMapping records.
|
||
//
|
||
// Each UVSMapping record provides a glyph ID mapping for one base Unicode
|
||
// character, when that base character is used in a variation sequence with
|
||
// the current variation selector.
|
||
//
|
||
// UVSMapping Record:
|
||
// Type Name Description
|
||
// uint24 unicodeValue Base Unicode value of the UVS
|
||
// uint16 glyphID Glyph ID of the UVS
|
||
//
|
||
// The UVS Mappings are sorted in increasing order of ‘unicodeValue’. No two
|
||
// mappings in this table may have the same ‘unicodeValue’ values.
|
||
|
||
reader.BaseStream.Seek(beginAt + nonDefaultUVSOffsets[i], SeekOrigin.Begin);
|
||
uint numUVSMappings = reader.ReadUInt32();
|
||
for (int n = 0; n < numUVSMappings; ++n)
|
||
{
|
||
int unicodeValue = (int)Utils.ReadUInt24(reader);
|
||
ushort glyphID = reader.ReadUInt16();
|
||
sel.UVSMappings.Add(unicodeValue, glyphID);
|
||
}
|
||
}
|
||
|
||
variationSelectors.Add(varSelectors[i], sel);
|
||
}
|
||
|
||
return new CharMapFormat14 { _variationSelectors = variationSelectors };
|
||
}
|
||
|
||
class VariationSelector
|
||
{
|
||
public List<int> DefaultStartCodes = new List<int>();
|
||
public List<int> DefaultEndCodes = new List<int>();
|
||
public Dictionary<int, ushort> UVSMappings = new Dictionary<int, ushort>();
|
||
}
|
||
|
||
private Dictionary<int, VariationSelector> _variationSelectors;
|
||
}
|
||
|
||
/// <summary>
|
||
/// An empty character map that maps all characters to glyph 0
|
||
/// </summary>
|
||
class NullCharMap : CharacterMap
|
||
{
|
||
public override ushort Format => 0;
|
||
public override ushort GetGlyphIndex(int character) => 0;
|
||
public override void CollectUnicodeChars(List<uint> unicodes) { /*nothing*/}
|
||
|
||
}
|
||
|
||
abstract class CharacterMap
|
||
{
|
||
//https://www.microsoft.com/typography/otspec/cmap.htm
|
||
public abstract ushort Format { get; }
|
||
public ushort PlatformId { get; set; }
|
||
public ushort EncodingId { get; set; }
|
||
|
||
public ushort CharacterToGlyphIndex(int codepoint)
|
||
{
|
||
return GetGlyphIndex(codepoint);
|
||
}
|
||
public abstract ushort GetGlyphIndex(int codepoint);
|
||
public abstract void CollectUnicodeChars(List<uint> unicodes);
|
||
|
||
public override string ToString()
|
||
{
|
||
return $"fmt:{ Format }, plat:{ PlatformId }, enc:{ EncodingId }";
|
||
}
|
||
}
|
||
}
|