Files
QuickLook/QuickLook.Plugin/QuickLook.Plugin.FontViewer/Typography.OpenFont/Tables/CharacterMap.cs
2024-12-30 04:21:24 +08:00

418 lines
18 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//Apache2, 2017-present, WinterDev, Sam Hocevar
//Apache2, 2014-2016, Samuel Carlsson, WinterDev
using System;
using System.Collections.Generic;
using System.IO;
namespace Typography.OpenFont.Tables
{
static class CharacterMapExtension
{
public static void CollectUnicodeChars(this CharacterMap cmap, List<uint> unicodes, List<ushort> glyphIndexList)
{
//temp fixed
int count1 = unicodes.Count;
cmap.CollectUnicodeChars(unicodes);
int count2 = unicodes.Count;
for (int i = count1; i < count2; ++i)
{
glyphIndexList.Add(cmap.GetGlyphIndex((int)unicodes[i]));
}
}
}
class CharMapFormat4 : CharacterMap
{
public override ushort Format => 4;
internal readonly ushort[] _startCode; //Starting character code for each segment
internal readonly ushort[] _endCode;//Ending character code for each segment, last = 0xFFFF.
internal readonly ushort[] _idDelta; //Delta for all character codes in segment
internal readonly ushort[] _idRangeOffset; //Offset in bytes to glyph indexArray, or 0 (not offset in bytes unit)
internal readonly ushort[] _glyphIdArray;
public CharMapFormat4(ushort[] startCode, ushort[] endCode, ushort[] idDelta, ushort[] idRangeOffset, ushort[] glyphIdArray)
{
_startCode = startCode;
_endCode = endCode;
_idDelta = idDelta;
_idRangeOffset = idRangeOffset;
_glyphIdArray = glyphIdArray;
}
public override ushort GetGlyphIndex(int codepoint)
{
// This lookup table only supports 16-bit codepoints
if (codepoint > ushort.MaxValue)
{
return 0;
}
// https://www.microsoft.com/typography/otspec/cmap.htm#format4
// "You search for the first endCode that is greater than or equal to the character code you want to map"
// "The segments are sorted in order of increasing endCode values"
// -> binary search is valid here
int i = Array.BinarySearch(_endCode, (ushort)codepoint);
i = i < 0 ? ~i : i;
// https://www.microsoft.com/typography/otspec/cmap.htm#format4
// "If the corresponding startCode is [not] less than or equal to the character code,
// then [...] the missingGlyph is returned"
// Index i should never be out of range, because the list ends with a
// 0xFFFF value. However, we also use this charmap for format 0, which
// does not have that final endcode, so there is a chance to overflow.
if (i >= _endCode.Length || _startCode[i] > codepoint)
{
return 0;
}
if (_idRangeOffset[i] == 0)
{
//TODO: review 65536 => use bitflags
return (ushort)((codepoint + _idDelta[i]) % 65536);
}
else
{
//If the idRangeOffset value for the segment is not 0,
//the mapping of character codes relies on glyphIdArray.
//The character code offset from startCode is added to the idRangeOffset value.
//This sum is used as an offset from the current location within idRangeOffset itself to index out the correct glyphIdArray value.
//This obscure indexing trick works because glyphIdArray immediately follows idRangeOffset in the font file.
//The C expression that yields the glyph index is:
//*(idRangeOffset[i]/2
//+ (c - startCount[i])
//+ &idRangeOffset[i])
int offset = _idRangeOffset[i] / 2 + (codepoint - _startCode[i]);
// I want to thank Microsoft for this clever pointer trick
// TODO: What if the value fetched is inside the _idRangeOffset table?
// TODO: e.g. (offset - _idRangeOffset.Length + i < 0)
return _glyphIdArray[offset - _idRangeOffset.Length + i];
}
}
public override void CollectUnicodeChars(List<uint> unicodes)
{
for (int i = 0; i < _startCode.Length; ++i)
{
uint start = _startCode[i];
uint stop = _endCode[i];
for (uint u = start; u <= stop; ++u)
{
unicodes.Add(u);
}
}
}
}
class CharMapFormat12 : CharacterMap
{
public override ushort Format => 12;
uint[] _startCharCodes, _endCharCodes, _startGlyphIds;
internal CharMapFormat12(uint[] startCharCodes, uint[] endCharCodes, uint[] startGlyphIds)
{
_startCharCodes = startCharCodes;
_endCharCodes = endCharCodes;
_startGlyphIds = startGlyphIds;
}
public override ushort GetGlyphIndex(int codepoint)
{
// https://www.microsoft.com/typography/otspec/cmap.htm#format12
// "Groups must be sorted by increasing startCharCode."
// -> binary search is valid here
int i = Array.BinarySearch(_startCharCodes, (uint)codepoint);
i = i < 0 ? ~i - 1 : i;
if (i >= 0 && codepoint <= _endCharCodes[i])
{
return (ushort)(_startGlyphIds[i] + codepoint - _startCharCodes[i]);
}
return 0;
}
public override void CollectUnicodeChars(List<uint> unicodes)
{
for (int i = 0; i < _startCharCodes.Length; ++i)
{
uint start = _startCharCodes[i];
uint stop = _endCharCodes[i];
for (uint u = start; u <= stop; ++u)
{
unicodes.Add(u);
}
}
}
}
class CharMapFormat6 : CharacterMap
{
public override ushort Format => 6;
internal CharMapFormat6(ushort startCode, ushort[] glyphIdArray)
{
_glyphIdArray = glyphIdArray;
_startCode = startCode;
}
public override ushort GetGlyphIndex(int codepoint)
{
// The firstCode and entryCount values specify a subrange (beginning at firstCode,
// length = entryCount) within the range of possible character codes.
// Codes outside of this subrange are mapped to glyph index 0.
// The offset of the code (from the first code) within this subrange is used as
// index to the glyphIdArray, which provides the glyph index value.
int i = codepoint - _startCode;
return i >= 0 && i < _glyphIdArray.Length ? _glyphIdArray[i] : (ushort)0;
}
internal readonly ushort _startCode;
internal readonly ushort[] _glyphIdArray;
public override void CollectUnicodeChars(List<uint> unicodes)
{
ushort u = _startCode;
for (uint i = 0; i < _glyphIdArray.Length; ++i)
{
unicodes.Add(u + i);
}
}
}
//https://www.microsoft.com/typography/otspec/cmap.htm#format14
// Subtable format 14 specifies the Unicode Variation Sequences(UVSes) supported by the font.
// A Variation Sequence, according to the Unicode Standard, comprises a base character followed
// by a variation selector; e.g. <U+82A6, U+E0101>.
//
// The subtable partitions the UVSes supported by the font into two categories: “default” and
// “non-default” UVSes.Given a UVS, if the glyph obtained by looking up the base character of
// that sequence in the Unicode cmap subtable(i.e.the UCS-4 or the BMP cmap subtable) is the
// glyph to use for that sequence, then the sequence is a “default” UVS; otherwise it is a
// “non-default” UVS, and the glyph to use for that sequence is specified in the format 14
// subtable itself.
class CharMapFormat14 : CharacterMap
{
public override ushort Format => 14;
public override ushort GetGlyphIndex(int character) => 0;
public ushort CharacterPairToGlyphIndex(int codepoint, ushort defaultGlyphIndex, int nextCodepoint)
{
// Only check codepoint if nextCodepoint is a variation selector
if (_variationSelectors.TryGetValue(nextCodepoint, out VariationSelector sel))
{
// If the sequence is a non-default UVS, return the mapped glyph
if (sel.UVSMappings.TryGetValue(codepoint, out ushort ret))
{
return ret;
}
// If the sequence is a default UVS, return the default glyph
for (int i = 0; i < sel.DefaultStartCodes.Count; ++i)
{
if (codepoint >= sel.DefaultStartCodes[i] && codepoint < sel.DefaultEndCodes[i])
{
return defaultGlyphIndex;
}
}
// At this point we are neither a non-default UVS nor a default UVS,
// but we know the nextCodepoint is a variation selector. Unicode says
// this glyph should be invisible: “no visible rendering for the VS”
// (http://unicode.org/faq/unsup_char.html#4)
return defaultGlyphIndex;
}
// In all other cases, return 0
return 0;
}
public override void CollectUnicodeChars(List<uint> unicodes)
{
//TODO: review here
#if DEBUG
System.Diagnostics.Debug.WriteLine("not implemented");
#endif
}
public static CharMapFormat14 Create(BinaryReader reader)
{
// 'cmap' Subtable Format 14:
// Type Name Description
// uint16 format Subtable format.Set to 14.
// uint32 length Byte length of this subtable (including this header)
// uint32 numVarSelectorRecords Number of variation Selector Records
// VariationSelector varSelector[numVarSelectorRecords] Array of VariationSelector records.
// ---
//
// Each variation selector records specifies a variation selector character, and
// offsets to “default” and “non-default” tables used to map variation sequences using
// that variation selector.
//
// VariationSelector Record:
// Type Name Description
// uint24 varSelector Variation selector
// Offset32 defaultUVSOffset Offset from the start of the format 14 subtable to
// Default UVS Table.May be 0.
// Offset32 nonDefaultUVSOffset Offset from the start of the format 14 subtable to
// Non-Default UVS Table. May be 0.
//
// The Variation Selector Records are sorted in increasing order of varSelector. No
// two records may have the same varSelector.
// A Variation Selector Record and the data its offsets point to specify those UVSes
// supported by the font for which the variation selector is the varSelector value
// of the record. The base characters of the UVSes are stored in the tables pointed
// to by the offsets.The UVSes are partitioned by whether they are default or
// non-default UVSes.
// Glyph IDs to be used for non-default UVSes are specified in the Non-Default UVS table.
long beginAt = reader.BaseStream.Position - 2; // account for header format entry
uint length = reader.ReadUInt32(); // Byte length of this subtable (including the header)
uint numVarSelectorRecords = reader.ReadUInt32();
var variationSelectors = new Dictionary<int, VariationSelector>();
int[] varSelectors = new int[numVarSelectorRecords];
uint[] defaultUVSOffsets = new uint[numVarSelectorRecords];
uint[] nonDefaultUVSOffsets = new uint[numVarSelectorRecords];
for (int i = 0; i < numVarSelectorRecords; ++i)
{
varSelectors[i] = Utils.ReadUInt24(reader);
defaultUVSOffsets[i] = reader.ReadUInt32();
nonDefaultUVSOffsets[i] = reader.ReadUInt32();
}
for (int i = 0; i < numVarSelectorRecords; ++i)
{
var sel = new VariationSelector();
if (defaultUVSOffsets[i] != 0)
{
// Default UVS table
//
// A Default UVS Table is simply a range-compressed list of Unicode scalar
// values, representing the base characters of the default UVSes which use
// the varSelector of the associated Variation Selector Record.
//
// DefaultUVS Table:
// Type Name Description
// uint32 numUnicodeValueRanges Number of Unicode character ranges.
// UnicodeRange ranges[numUnicodeValueRanges] Array of UnicodeRange records.
//
// Each Unicode range record specifies a contiguous range of Unicode values.
//
// UnicodeRange Record:
// Type Name Description
// uint24 startUnicodeValue First value in this range
// uint8 additionalCount Number of additional values in this range
//
// For example, the range U+4E4D&endash; U+4E4F (3 values) will set
// startUnicodeValue to 0x004E4D and additionalCount to 2. A singleton
// range will set additionalCount to 0.
// (startUnicodeValue + additionalCount) must not exceed 0xFFFFFF.
// The Unicode Value Ranges are sorted in increasing order of
// startUnicodeValue. The ranges must not overlap; i.e.,
// (startUnicodeValue + additionalCount) must be less than the
// startUnicodeValue of the following range (if any).
reader.BaseStream.Seek(beginAt + defaultUVSOffsets[i], SeekOrigin.Begin);
uint numUnicodeValueRanges = reader.ReadUInt32();
for (int n = 0; n < numUnicodeValueRanges; ++n)
{
int startCode = (int)Utils.ReadUInt24(reader);
sel.DefaultStartCodes.Add(startCode);
sel.DefaultEndCodes.Add(startCode + reader.ReadByte());
}
}
if (nonDefaultUVSOffsets[i] != 0)
{
// Non-Default UVS table
//
// A Non-Default UVS Table is a list of pairs of Unicode scalar values and
// glyph IDs.The Unicode values represent the base characters of all
// non -default UVSes which use the varSelector of the associated Variation
// Selector Record, and the glyph IDs specify the glyph IDs to use for the
// UVSes.
//
// NonDefaultUVS Table:
// Type Name Description
// uint32 numUVSMappings Number of UVS Mappings that follow
// UVSMapping uvsMappings[numUVSMappings] Array of UVSMapping records.
//
// Each UVSMapping record provides a glyph ID mapping for one base Unicode
// character, when that base character is used in a variation sequence with
// the current variation selector.
//
// UVSMapping Record:
// Type Name Description
// uint24 unicodeValue Base Unicode value of the UVS
// uint16 glyphID Glyph ID of the UVS
//
// The UVS Mappings are sorted in increasing order of unicodeValue. No two
// mappings in this table may have the same unicodeValue values.
reader.BaseStream.Seek(beginAt + nonDefaultUVSOffsets[i], SeekOrigin.Begin);
uint numUVSMappings = reader.ReadUInt32();
for (int n = 0; n < numUVSMappings; ++n)
{
int unicodeValue = (int)Utils.ReadUInt24(reader);
ushort glyphID = reader.ReadUInt16();
sel.UVSMappings.Add(unicodeValue, glyphID);
}
}
variationSelectors.Add(varSelectors[i], sel);
}
return new CharMapFormat14 { _variationSelectors = variationSelectors };
}
class VariationSelector
{
public List<int> DefaultStartCodes = new List<int>();
public List<int> DefaultEndCodes = new List<int>();
public Dictionary<int, ushort> UVSMappings = new Dictionary<int, ushort>();
}
private Dictionary<int, VariationSelector> _variationSelectors;
}
/// <summary>
/// An empty character map that maps all characters to glyph 0
/// </summary>
class NullCharMap : CharacterMap
{
public override ushort Format => 0;
public override ushort GetGlyphIndex(int character) => 0;
public override void CollectUnicodeChars(List<uint> unicodes) { /*nothing*/}
}
abstract class CharacterMap
{
//https://www.microsoft.com/typography/otspec/cmap.htm
public abstract ushort Format { get; }
public ushort PlatformId { get; set; }
public ushort EncodingId { get; set; }
public ushort CharacterToGlyphIndex(int codepoint)
{
return GetGlyphIndex(codepoint);
}
public abstract ushort GetGlyphIndex(int codepoint);
public abstract void CollectUnicodeChars(List<uint> unicodes);
public override string ToString()
{
return $"fmt:{ Format }, plat:{ PlatformId }, enc:{ EncodingId }";
}
}
}