Home About Units Download Documents Links Contact SourceForge
Units: UnicodeChar: Source

{                                                                              }
{                      Unicode character functions v3.04                       }
{                                                                              }
{             This unit is copyright © 2002-2004 by David J Butler             }
{                                                                              }
{                  This unit is part of Delphi Fundamentals.                   }
{                 Its original file name is cUnicodeChar.pas                   }
{       The latest version is available from the Fundamentals home page        }
{                     http://fundementals.sourceforge.net/                     }
{                                                                              }
{                I invite you to use this unit, free of charge.                }
{        I invite you to distibute this unit, but it must be for free.         }
{             I also invite you to contribute to its development,              }
{             but do not distribute a modified copy of this file.              }
{                                                                              }
{          A forum is available on SourceForge for general discussion          }
{             http://sourceforge.net/forum/forum.php?forum_id=2117             }
{                                                                              }
{                                                                              }
{ Description:                                                                 }
{   Unicode character constants.                                               }
{   Functions for checking unicode character properties.                       }
{   Functions to interpret unicode characters.                                 }
{   Unicode character case functions.                                          }
{                                                                              }
{                                                                              }
{ Notes:                                                                       }
{   Most functions in this unit work from tables in source code form.          }
{   All tables were generated from the Unicode 3.2 data.                       }
{                                                                              }
{   The source code is deceptively big, for example, the upper-lower case      }
{   table is about 128K in the source code, but only 7K when compiled.         }
{                                                                              }
{   This unit has no dependancies on any other unit.                           }
{                                                                              }
{ Revision history:                                                            }
{   19/04/2002  0.01  Initial version                                          }
{   21/04/2002  0.02  Added case and decomposition functions                   }
{   28/10/2002  3.03  Refactored for Fundamentals 3.                           }
{   10/01/2004  3.04  Changes to allow smart-linking by the compiler.          }
{                     Typically this saves 100-200K on the executable size.    }
{                                                                              }

{$INCLUDE ..\cDefines.inc}
unit cUnicodeChar;

interface

const
  UnitName      = 'cUnicodeChar';
  UnitVersion   = '3.04';
  UnitCopyright = 'Copyright (c) 2002-2004 David J Butler';


  
{                                                                              }
{ Unicode character constants                                                  }
{                                                                              }
const
  WideNULL = WideChar(#0);
  WideSOH  = WideChar(#1);
  WideSTX  = WideChar(#2);
  WideETX  = WideChar(#3);
  WideEOT  = WideChar(#4);
  WideENQ  = WideChar(#5);
  WideACK  = WideChar(#6);
  WideBEL  = WideChar(#7);
  WideBS   = WideChar(#8);
  WideHT   = WideChar(#9);
  WideLF   = WideChar(#10);
  WideVT   = WideChar(#11);
  WideFF   = WideChar(#12);
  WideCR   = WideChar(#13);
  WideNAK  = WideChar(#21);
  WideSYN  = WideChar(#22);
  WideCAN  = WideChar(#24);
  WideEOF  = WideChar(#26);
  WideESC  = WideChar(#27);
  WideSP   = WideChar(#32);

  WideCRLF : WideString = #13#10;

  WideSingleQuote        = WideChar('''');
  WideDoubleQuote        = WideChar('"');

  WideNoBreakSpace       = WideChar(#$00A0);
  WideLineSeparator      = WideChar(#$2028);
  WideParagraphSeparator = WideChar(#$2029);

  WideBOM_MSB_First      = WideChar(#$FFFE);
  WideBOM_LSB_First      = WideChar(#$FEFF);

  WideObjectReplacement  = WideChar(#$FFFC);
  WideCharReplacement    = WideChar(#$FFFD);
  WideInvalid            = WideChar(#$FFFF);

  WideCopyrightSign      = WideChar(#$00A9);
  WideRegisteredSign     = WideChar(#$00AE);

  WideHighSurrogateFirst        = WideChar(#$D800);
  WideHighSurrogateLast         = WideChar(#$DB7F);
  WideLowSurrogateFirst         = WideChar(#$DC00);
  WideLowSurrogateLast          = WideChar(#$DFFF);
  WidePrivateHighSurrogateFirst = WideChar(#$DB80);
  WidePrivateHighSurrogateLast  = WideChar(#$DBFF);



{                                                                              }
{ Unicode character functions                                                  }
{                                                                              }
{$IFDEF DELPHI5}
type
  UCS4Char = LongWord;
{$ENDIF}

type
  WideCharMatchFunction = function (const Ch: WideChar): Boolean;

function  IsASCIIChar(const Ch: WideChar): Boolean;
function  IsWhiteSpace(const Ch: WideChar): Boolean;
function  IsControl(const Ch: WideChar): Boolean;
function  IsControlOrWhiteSpace(const Ch: WideChar): Boolean;
function  IsIgnorable(const Ch: UCS4Char): Boolean;

function  IsDash(const Ch: WideChar): Boolean;
function  IsHyphen(const Ch: WideChar): Boolean;
function  IsFullStop(const Ch: WideChar): Boolean;
function  IsComma(const Ch: WideChar): Boolean;
function  IsExclamationMark(const Ch: WideChar): Boolean;
function  IsQuestionMark(const Ch: WideChar): Boolean;

function  IsLeftParenthesis(const Ch: WideChar): Boolean;
function  IsLeftBracket(const Ch: WideChar): Boolean;
function  GetRightParenthesis(const LeftParenthesis: WideChar): WideChar;
function  GetRightBracket(const LeftBracket: WideChar): WideChar;

function  IsSingularQuotationMark(const Ch: WideChar): Boolean;
function  IsOpeningQuotationMark(const Ch: WideChar): Boolean;
function  IsClosingQuotationMark(const Ch: WideChar): Boolean;
function  GetClosingQuotationMark(const OpeningQuote: WideChar): WideChar;
function  GetOpeningQuotationMark(const ClosingQuote: WideChar): WideChar;

function  IsPunctuation(const Ch: WideChar): Boolean;

function  IsDecimalDigit(const Ch: UCS4Char): Boolean; overload;
function  IsDecimalDigit(const Ch: WideChar): Boolean; overload;
function  DecimalDigitValue(const Ch: UCS4Char): Integer; overload;
function  DecimalDigitValue(const Ch: WideChar): Integer; overload;
function  FractionCharacterValue(const Ch: WideChar; var A, B: Integer): Boolean;
function  RomanNumeralValue(const Ch: WideChar): Integer;

function  IsHexDigit(const Ch: UCS4Char): Boolean; overload;
function  IsHexDigit(const Ch: WideChar): Boolean; overload;
function  HexDigitValue(const Ch: UCS4Char): Integer; overload;
function  HexDigitValue(const Ch: WideChar): Integer; overload;

function  IsUpperCase(const Ch: WideChar): Boolean;
function  IsLowerCase(const Ch: WideChar): Boolean;
function  IsTitleCase(const Ch: WideChar): Boolean;
function  WideUpCase(const Ch: WideChar): WideChar;
function  WideLowCase(const Ch: WideChar): WideChar;
function  WideUpCaseFolding(const Ch: WideChar): WideString;
function  WideLowCaseFolding(const Ch: WideChar): WideString;
function  WideTitleCaseFolding(const Ch: WideChar): WideString;
function  WideIsEqualNoCase(const A, B: WideChar): Boolean;
function  IsLetter(const Ch: WideChar): Boolean;
function  IsAlphabetic(const Ch: WideChar): Boolean;

function  GetCombiningClass(const Ch: WideChar): Byte;
function  GetCharacterDecomposition(const Ch: UCS4Char): WideString; overload;
function  GetCharacterDecomposition(const Ch: WideChar): WideString; overload;



implementation



{                                                                              }
{ Character functions                                                          }
{                                                                              }
function IsASCIIChar(const Ch: WideChar): Boolean;
begin
  Result := Ord(Ch) <= $7F;
end;

function IsWhiteSpace(const Ch: WideChar): Boolean;
begin
  Case Ch of
    #$0009..#$000D,    // ASCII CONTROL
    #$0020,            // SPACE
    #$0085,            // <control>
    #$00A0,            // NO-BREAK SPACE
    #$1680,            // OGHAM SPACE MARK
    #$2000..#$200A,    // EN QUAD..HAIR SPACE
    #$2028,            // LINE SEPARATOR
    #$2029,            // PARAGRAPH SEPARATOR
    #$202F,            // NARROW NO-BREAK SPACE
    #$3000 :           // IDEOGRAPHIC SPACE
      Result := True;
  else
    Result := False;
  end;
end;

function IsControl(const Ch: WideChar): Boolean;
begin
  Case Ch of
    #$0000..#$001F,
    #$007F..#$009F :
      Result := True;
  else
    Result := False;
  end;
end;

function IsControlOrWhiteSpace(const Ch: WideChar): Boolean;
begin
  Result := IsControl(Ch) or IsWhiteSpace(Ch);
end;

// Derived from 'Cf' + 'Cc' + 'Cs' - White_Space
function IsIgnorable(const Ch: UCS4Char): Boolean;
begin
  Case Ch of
    $0000..$0008,     // # Cc   [9] <control>..<control>
    $000E..$001F,     // # Cc  [18] <control>..<control>
    $007F..$0084,     // # Cc   [6] <control>..<control>
    $0086..$009F,     // # Cc  [26] <control>..<control>
    $06DD,            // # Cf       ARABIC END OF AYAH
    $070F,            // # Cf       SYRIAC ABBREVIATION MARK
    $180B..$180D,     // # Mn   [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
    $180E,            // # Cf       MONGOLIAN VOWEL SEPARATOR
    $200C..$200F,     // # Cf   [4] ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
    $202A..$202E,     // # Cf   [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
    $2060..$2063,     // # Cf   [4] WORD JOINER..INVISIBLE SEPARATOR
    $2064..$2069,     // # Cn   [6]
    $206A..$206F,     // # Cf   [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
    $D800..$DFFF,     // # Cs [2048]
    $FE00..$FE0F,     // # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
    $FEFF,            // # Cf       ZERO WIDTH NO-BREAK SPACE
    $FFF0..$FFF8,     // # Cn   [9]
    $FFF9..$FFFB,     // # Cf   [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
    $1D173..$1D17A,   // # Cf   [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
    $E0000,           // # Cn
    $E0001,           // # Cf       LANGUAGE TAG
    $E0002..$E001F,   // # Cn  [30]
    $E0020..$E007F,   // # Cf  [96] TAG SPACE..CANCEL TAG
    $E0080..$E0FFF :  // # Cn [3968]
      Result := True;
  else
    Result := False;
  end;
end;

function IsDash(const Ch: WideChar): Boolean;
begin
  Case Ch of
    #$002D,            // HYPHEN-MINUS
    #$00AD,            // SOFT HYPHEN
    #$058A,            // ARMENIAN HYPHEN
    #$1806,            // MONGOLIAN TODO SOFT HYPHEN
    #$2010..#$2015,    // HYPHEN..HORIZONTAL BAR
    #$207B,            // SUPERSCRIPT MINUS
    #$208B,            // SUBSCRIPT MINUS
    #$2212,            // MINUS SIGN
    #$301C,            // WAVE DASH
    #$3030,            // WAVY DASH
    #$FE31..#$FE32,    // PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH
    #$FE58,            // SMALL EM DASH
    #$FE63,            // SMALL HYPHEN-MINUS
    #$FF0D :           // FULLWIDTH HYPHEN-MINUS
      Result := True;
  else
    Result := False;
  end;
end;

function IsHyphen(const Ch: WideChar): Boolean;
begin
  Case Ch of
    #$002D,            // HYPHEN-MINUS
    #$00AD,            // SOFT HYPHEN
    #$058A,            // ARMENIAN HYPHEN
    #$1806,            // MONGOLIAN TODO SOFT HYPHEN
    #$2010..#$2011,    // HYPHEN..NON-BREAKING HYPHEN
    #$30FB,            // KATAKANA MIDDLE DOT
    #$FE63,            // SMALL HYPHEN-MINUS
    #$FF0D,            // FULLWIDTH HYPHEN-MINUS
    #$FF65 :           // HALFWIDTH KATAKANA MIDDLE DOT
      Result := True;
  else
    Result := False;
  end;
end;

function IsFullStop(const Ch: WideChar): Boolean;
begin
  Case Ord(Ch) of
    $002E,  // FULL STOP
    $0589,  // ARMENIAN FULL STOP
    $06D4,  // ARABIC FULL STOP
    $0701,  // SYRIAC SUPRALINEAR FULL STOP
    $0702,  // SYRIAC SUBLINEAR FULL STOP
    $1362,  // ETHIOPIC FULL STOP
    $166E,  // CANADIAN SYLLABICS FULL STOP
    $1803,  // MONGOLIAN FULL STOP
    $1809,  // MONGOLIAN MANCHU FULL STOP
    $3002,  // IDEOGRAPHIC FULL STOP
    $FE52,  // SMALL FULL STOP
    $FF0E,  // FULLWIDTH FULL STOP
    $FF61 : // HALFWIDTH IDEOGRAPHIC FULL STOP
      Result := True;
  else
    Result := False;
  end;
end;

function IsComma(const Ch: WideChar): Boolean;
begin
  Case Ord(Ch) of
    $002C,  // COMMA
    $055D,  // ARMENIAN COMMA
    $060C,  // ARABIC COMMA
    $0F14,  // TIBETAN MARK GTER TSHEG
    $1363,  // ETHIOPIC COMMA
    $1802,  // MONGOLIAN COMMA
    $1808,  // MONGOLIAN MANCHU COMMA
    $3001,  // IDEOGRAPHIC COMMA
    $FE50,  // SMALL COMMA
    $FE51,  // SMALL IDEOGRAPHIC COMMA
    $FF0C,  // FULLWIDTH COMMA
    $FF64 : // HALFWIDTH IDEOGRAPHIC COMMA
      Result := True;
  else
    Result := False;
  end;
end;

function IsExclamationMark(const Ch: WideChar): Boolean;
begin
  Case Ord(Ch) of
    $0021,    // EXCLAMATION MARK
    $00A1,    // INVERTED EXCLAMATION MARK
    $055C,    // ARMENIAN EXCLAMATION MARK
    $203C,    // DOUBLE EXCLAMATION MARK
    $203D,    // INTERROBANG
    $2048,    // QUESTION EXCLAMATION MARK
    $2049,    // EXCLAMATION QUESTION MARK
    $FE57,    // SMALL EXCLAMATION MARK
    $FF01 :   // FULLWIDTH EXCLAMATION MARK
      Result := True;
  else
    Result := False;
  end;
end;

function IsQuestionMark(const Ch: WideChar): Boolean;
begin
  Case Ord(Ch) of
    $003F,    // QUESTION MARK
    $00BF,    // INVERTED QUESTION MARK
    $037E,    // GREEK QUESTION MARK
    $055E,    // ARMENIAN QUESTION MARK
    $061F,    // ARABIC QUESTION MARK
    $1367,    // ETHIOPIC QUESTION MARK
    $2049,    // EXCLAMATION QUESTION MARK
    $FE56,    // SMALL QUESTION MARK
    $FF1F :   // FULLWIDTH QUESTION MARK
      Result := True;
  else
    Result := False;
  end;
end;

function GetRightParenthesis(const LeftParenthesis: WideChar): WideChar;
begin
  Case Ord(LeftParenthesis) of
    $0028 : Result := #$0029;  // PARENTHESIS
    $207D : Result := #$207E;  // SUPERSCRIPT PARENTHESIS
    $208D : Result := #$208E;  // SUBSCRIPT PARENTHESIS
    $FD3E : Result := #$FD3F;  // ORNATE PARENTHESIS
    $FE35 : Result := #$FE36;  // PRESENTATION FORM FOR VERTICAL PARENTHESIS
    $FE59 : Result := #$FE5A;  // SMALL PARENTHESIS
    $FF08 : Result := #$FF09;  // FULLWIDTH PARENTHESIS
  else
    Result := #$0000;
  end;
end;

function IsLeftParenthesis(const Ch: WideChar): Boolean;
begin
  Result := GetRightParenthesis(Ch) <> #$0000;
end;

function GetRightBracket(const LeftBracket: WideChar): WideChar;
begin
  Case Ord(LeftBracket) of
    $005B : Result := #$005D;  // SQUARE BRACKET
    $007B : Result := #$007D;  // CURLY BRACKET
    $2045 : Result := #$2046;  // SQUARE BRACKET WITH QUILL
    $2329 : Result := #$232A;  // POINTING ANGLE BRACKET
    $3008 : Result := #$3009;  // ANGLE BRACKET
    $300A : Result := #$300B;  // DOUBLE ANGLE BRACKET
    $300C : Result := #$300D;  // CORNER BRACKET
    $300E : Result := #$300F;  // WHITE CORNER BRACKET
    $3010 : Result := #$3011;  // BLACK LENTICULAR BRACKET
    $3014 : Result := #$3015;  // TORTOISE SHELL BRACKET
    $3016 : Result := #$3017;  // WHITE LENTICULAR BRACKET
    $3018 : Result := #$3019;  // WHITE TORTOISE SHELL BRACKET
    $301A : Result := #$301B;  // WHITE SQUARE BRACKET
    $FE37 : Result := #$FE38;  // PRESENTATION FORM FOR VERTICAL CURLY BRACKET
    $FE39 : Result := #$FE3A;  // PRESENTATION FORM FOR VERTICAL TORTOISE SHELL BRACKET
    $FE3B : Result := #$FE3C;  // PRESENTATION FORM FOR VERTICAL BLACK LENTICULAR BRACKET
    $FE3D : Result := #$FE3E;  // PRESENTATION FORM FOR VERTICAL DOUBLE ANGLE BRACKET
    $FE3F : Result := #$FE40;  // PRESENTATION FORM FOR VERTICAL ANGLE BRACKET
    $FE41 : Result := #$FE42;  // PRESENTATION FORM FOR VERTICAL CORNER BRACKET
    $FE43 : Result := #$FE44;  // PRESENTATION FORM FOR VERTICAL WHITE CORNER BRACKET
    $FE5B : Result := #$FE5C;  // SMALL CURLY BRACKET
    $FE5D : Result := #$FE5E;  // SMALL TORTOISE SHELL BRACKET
    $FF3B : Result := #$FF3D;  // FULLWIDTH SQUARE BRACKET
    $FF5B : Result := #$FF5D;  // FULLWIDTH CURLY BRACKET
    $FF62 : Result := #$FF63;  // HALFWIDTH CORNER BRACKET
  else
    Result := #$0000;
  end;
end;

function IsLeftBracket(const Ch: WideChar): Boolean;
begin
  Result := GetRightBracket(Ch) <> #$0000;
end;

function IsSingularQuotationMark(const Ch: WideChar): Boolean;
begin
  Case Ord(Ch) of
    $0022,   //        QUOTATION MARK
    $0027,   //        APOSTROPHE
    $FF02,   //        FULLWIDTH QUOTATION MARK
    $FF07 :  //        FULLWIDTH APOSTROPHE
      Result := True;
  else
    Result := False;
  end;
end;

function GetClosingQuotationMark(const OpeningQuote: WideChar): WideChar;
begin
  Case Ord(OpeningQuote) of
    $00AB : Result := #$00BB;     // LEFT/RIGHT -POINTING DOUBLE ANGLE QUOTATION MARK
    $2018 : Result := #$2019;     // LEFT/RIGHT SINGLE QUOTATION MARK
    $201A : Result := #$201B;     // SINGLE LOW-9 QUOTATION MARK / SINGLE HIGH-REVERSED-9 QUOTATION MARK
    $201C : Result := #$201D;     // LEFT/RIGHT DOUBLE QUOTATION MARK
    $201E : Result := #$201F;     // DOUBLE LOW-9 QUOTATION MARK / DOUBLE HIGH-REVERSED-9 QUOTATION MARK
    $2039 : Result := #$203A;     // SINGLE LEFT/RIGHT -POINTING ANGLE QUOTATION MARK
    $301D : Result := #$301E;     // REVERSED DOUBLE PRIME QUOTATION MARK / DOUBLE PRIME QUOTATION MARK (also $301F)
  else
    Result := #$0000;
  end;
end;

function IsOpeningQuotationMark(const Ch: WideChar): Boolean;
begin
  Result := GetClosingQuotationMark(Ch) <> #$0000;
end;

function GetOpeningQuotationMark(const ClosingQuote: WideChar): WideChar;
begin
  Case Ord(ClosingQuote) of
    $00BB : Result := #$00AB;     // LEFT/RIGHT -POINTING DOUBLE ANGLE QUOTATION MARK
    $2019 : Result := #$2018;     // LEFT/RIGHT SINGLE QUOTATION MARK
    $201B : Result := #$201A;     // SINGLE LOW-9 QUOTATION MARK / SINGLE HIGH-REVERSED-9 QUOTATION MARK
    $201D : Result := #$201C;     // LEFT/RIGHT DOUBLE QUOTATION MARK
    $201F : Result := #$201E;     // DOUBLE LOW-9 QUOTATION MARK / DOUBLE HIGH-REVERSED-9 QUOTATION MARK
    $203A : Result := #$2039;     // SINGLE LEFT/RIGHT -POINTING ANGLE QUOTATION MARK
    $301E : Result := #$301D;     // REVERSED DOUBLE PRIME QUOTATION MARK / DOUBLE PRIME QUOTATION MARK
    $301F : Result := #$301D;     // REVERSED DOUBLE PRIME QUOTATION MARK / LOW DOUBLE PRIME QUOTATION MARK
  else
    Result := #$0000;
  end;
end;

function IsClosingQuotationMark(const Ch: WideChar): Boolean;
begin
  Result := GetOpeningQuotationMark(Ch) <> #$0000;
end;

function IsPunctuation(const Ch: WideChar): Boolean;
begin
  Case Ord(Ch) of
    $0021,   // EXCLAMATION MARK
    $0022,   // QUOTATION MARK
    $0023,   // NUMBER SIGN
    $0025,   // PERCENT SIGN
    $0026,   // AMPERSAND
    $0027,   // APOSTROPHE
    $0028,   // LEFT PARENTHESIS
    $0029,   // RIGHT PARENTHESIS
    $002A,   // ASTERISK
    $002C,   // COMMA
    $002D,   // HYPHEN-MINUS
    $002E,   // FULL STOP
    $002F,   // SOLIDUS
    $003A,   // COLON
    $003B,   // SEMICOLON
    $003F,   // QUESTION MARK
    $0040,   // COMMERCIAL AT
    $005B,   // LEFT SQUARE BRACKET
    $005C,   // REVERSE SOLIDUS
    $005D,   // RIGHT SQUARE BRACKET
    $005F,   // LOW LINE
    $007B,   // LEFT CURLY BRACKET
    $007D,   // RIGHT CURLY BRACKET
    $00A1,   // INVERTED EXCLAMATION MARK
    $00AB,   // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
    $00AD,   // SOFT HYPHEN
    $00B7,   // MIDDLE DOT
    $00BB,   // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
    $00BF,   // INVERTED QUESTION MARK
    $037E,   // GREEK QUESTION MARK
    $0387,   // GREEK ANO TELEIA
    $055A,   // ARMENIAN APOSTROPHE
    $055B,   // ARMENIAN EMPHASIS MARK
    $055C,   // ARMENIAN EXCLAMATION MARK
    $055D,   // ARMENIAN COMMA
    $055E,   // ARMENIAN QUESTION MARK
    $055F,   // ARMENIAN ABBREVIATION MARK
    $0589,   // ARMENIAN FULL STOP
    $058A,   // ARMENIAN HYPHEN
    $05BE,   // HEBREW PUNCTUATION MAQAF
    $05C0,   // HEBREW PUNCTUATION PASEQ
    $05C3,   // HEBREW PUNCTUATION SOF PASUQ
    $05F3,   // HEBREW PUNCTUATION GERESH
    $05F4,   // HEBREW PUNCTUATION GERSHAYIM
    $060C,   // ARABIC COMMA
    $061B,   // ARABIC SEMICOLON
    $061F,   // ARABIC QUESTION MARK
    $066A,   // ARABIC PERCENT SIGN
    $066B,   // ARABIC DECIMAL SEPARATOR
    $066C,   // ARABIC THOUSANDS SEPARATOR
    $066D,   // ARABIC FIVE POINTED STAR
    $06D4,   // ARABIC FULL STOP
    $0700,   // SYRIAC END OF PARAGRAPH
    $0701,   // SYRIAC SUPRALINEAR FULL STOP
    $0702,   // SYRIAC SUBLINEAR FULL STOP
    $0703,   // SYRIAC SUPRALINEAR COLON
    $0704,   // SYRIAC SUBLINEAR COLON
    $0705,   // SYRIAC HORIZONTAL COLON
    $0706,   // SYRIAC COLON SKEWED LEFT
    $0707,   // SYRIAC COLON SKEWED RIGHT
    $0708,   // SYRIAC SUPRALINEAR COLON SKEWED LEFT
    $0709,   // SYRIAC SUBLINEAR COLON SKEWED RIGHT
    $070A,   // SYRIAC CONTRACTION
    $070B,   // SYRIAC HARKLEAN OBELUS
    $070C,   // SYRIAC HARKLEAN METOBELUS
    $070D,   // SYRIAC HARKLEAN ASTERISCUS
    $0964,   // DEVANAGARI DANDA
    $0965,   // DEVANAGARI DOUBLE DANDA
    $0970,   // DEVANAGARI ABBREVIATION SIGN
    $0DF4,   // SINHALA PUNCTUATION KUNDDALIYA
    $0E4F,   // THAI CHARACTER FONGMAN
    $0E5A,   // THAI CHARACTER ANGKHANKHU
    $0E5B,   // THAI CHARACTER KHOMUT
    $0F04,   // TIBETAN MARK INITIAL YIG MGO MDUN MA
    $0F05,   // TIBETAN MARK CLOSING YIG MGO SGAB MA
    $0F06,   // TIBETAN MARK CARET YIG MGO PHUR SHAD MA
    $0F07,   // TIBETAN MARK YIG MGO TSHEG SHAD MA
    $0F08,   // TIBETAN MARK SBRUL SHAD
    $0F09,   // TIBETAN MARK BSKUR YIG MGO
    $0F0A,   // TIBETAN MARK BKA- SHOG YIG MGO
    $0F0B,   // TIBETAN MARK INTERSYLLABIC TSHEG
    $0F0C,   // TIBETAN MARK DELIMITER TSHEG BSTAR
    $0F0D,   // TIBETAN MARK SHAD
    $0F0E,   // TIBETAN MARK NYIS SHAD
    $0F0F,   // TIBETAN MARK TSHEG SHAD
    $0F10,   // TIBETAN MARK NYIS TSHEG SHAD
    $0F11,   // TIBETAN MARK RIN CHEN SPUNGS SHAD
    $0F12,   // TIBETAN MARK RGYA GRAM SHAD
    $0F3A,   // TIBETAN MARK GUG RTAGS GYON
    $0F3B,   // TIBETAN MARK GUG RTAGS GYAS
    $0F3C,   // TIBETAN MARK ANG KHANG GYON
    $0F3D,   // TIBETAN MARK ANG KHANG GYAS
    $0F85,   // TIBETAN MARK PALUTA
    $104A,   // MYANMAR SIGN LITTLE SECTION
    $104B,   // MYANMAR SIGN SECTION
    $104C,   // MYANMAR SYMBOL LOCATIVE
    $104D,   // MYANMAR SYMBOL COMPLETED
    $104E,   // MYANMAR SYMBOL AFOREMENTIONED
    $104F,   // MYANMAR SYMBOL GENITIVE
    $10FB,   // GEORGIAN PARAGRAPH SEPARATOR
    $1361,   // ETHIOPIC WORDSPACE
    $1362,   // ETHIOPIC FULL STOP
    $1363,   // ETHIOPIC COMMA
    $1364,   // ETHIOPIC SEMICOLON
    $1365,   // ETHIOPIC COLON
    $1366,   // ETHIOPIC PREFACE COLON
    $1367,   // ETHIOPIC QUESTION MARK
    $1368,   // ETHIOPIC PARAGRAPH SEPARATOR
    $166D,   // CANADIAN SYLLABICS CHI SIGN
    $166E,   // CANADIAN SYLLABICS FULL STOP
    $169B,   // OGHAM FEATHER MARK
    $169C,   // OGHAM REVERSED FEATHER MARK
    $16EB,   // RUNIC SINGLE PUNCTUATION
    $16EC,   // RUNIC MULTIPLE PUNCTUATION
    $16ED,   // RUNIC CROSS PUNCTUATION
    $17D4,   // KHMER SIGN KHAN
    $17D5,   // KHMER SIGN BARIYOOSAN
    $17D6,   // KHMER SIGN CAMNUC PII KUUH
    $17D7,   // KHMER SIGN LEK TOO
    $17D8,   // KHMER SIGN BEYYAL
    $17D9,   // KHMER SIGN PHNAEK MUAN
    $17DA,   // KHMER SIGN KOOMUUT
    $17DC,   // KHMER SIGN AVAKRAHASANYA
    $1800,   // MONGOLIAN BIRGA
    $1801,   // MONGOLIAN ELLIPSIS
    $1802,   // MONGOLIAN COMMA
    $1803,   // MONGOLIAN FULL STOP
    $1804,   // MONGOLIAN COLON
    $1805,   // MONGOLIAN FOUR DOTS
    $1806,   // MONGOLIAN TODO SOFT HYPHEN
    $1807,   // MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER
    $1808,   // MONGOLIAN MANCHU COMMA
    $1809,   // MONGOLIAN MANCHU FULL STOP
    $180A,   // MONGOLIAN NIRUGU
    $2010,   // HYPHEN
    $2011,   // NON-BREAKING HYPHEN
    $2012,   // FIGURE DASH
    $2013,   // EN DASH
    $2014,   // EM DASH
    $2015,   // HORIZONTAL BAR
    $2016,   // DOUBLE VERTICAL LINE
    $2017,   // DOUBLE LOW LINE
    $2018,   // LEFT SINGLE QUOTATION MARK
    $2019,   // RIGHT SINGLE QUOTATION MARK
    $201A,   // SINGLE LOW-9 QUOTATION MARK
    $201B,   // SINGLE HIGH-REVERSED-9 QUOTATION MARK
    $201C,   // LEFT DOUBLE QUOTATION MARK
    $201D,   // RIGHT DOUBLE QUOTATION MARK
    $201E,   // DOUBLE LOW-9 QUOTATION MARK
    $201F,   // DOUBLE HIGH-REVERSED-9 QUOTATION MARK
    $2020,   // DAGGER
    $2021,   // DOUBLE DAGGER
    $2022,   // BULLET
    $2023,   // TRIANGULAR BULLET
    $2024,   // ONE DOT LEADER
    $2025,   // TWO DOT LEADER
    $2026,   // HORIZONTAL ELLIPSIS
    $2027,   // HYPHENATION POINT
    $2030,   // PER MILLE SIGN
    $2031,   // PER TEN THOUSAND SIGN
    $2032,   // PRIME
    $2033,   // DOUBLE PRIME
    $2034,   // TRIPLE PRIME
    $2035,   // REVERSED PRIME
    $2036,   // REVERSED DOUBLE PRIME
    $2037,   // REVERSED TRIPLE PRIME
    $2038,   // CARET
    $2039,   // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
    $203A,   // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
    $203B,   // REFERENCE MARK
    $203C,   // DOUBLE EXCLAMATION MARK
    $203D,   // INTERROBANG
    $203E,   // OVERLINE
    $203F,   // UNDERTIE
    $2040,   // CHARACTER TIE
    $2041,   // CARET INSERTION POINT
    $2042,   // ASTERISM
    $2043,   // HYPHEN BULLET
    $2045,   // LEFT SQUARE BRACKET WITH QUILL
    $2046,   // RIGHT SQUARE BRACKET WITH QUILL
    $2048,   // QUESTION EXCLAMATION MARK
    $2049,   // EXCLAMATION QUESTION MARK
    $204A,   // TIRONIAN SIGN ET
    $204B,   // REVERSED PILCROW SIGN
    $204C,   // BLACK LEFTWARDS BULLET
    $204D,   // BLACK RIGHTWARDS BULLET
    $207D,   // SUPERSCRIPT LEFT PARENTHESIS
    $207E,   // SUPERSCRIPT RIGHT PARENTHESIS
    $208D,   // SUBSCRIPT LEFT PARENTHESIS
    $208E,   // SUBSCRIPT RIGHT PARENTHESIS
    $2329,   // LEFT-POINTING ANGLE BRACKET
    $232A,   // RIGHT-POINTING ANGLE BRACKET
    $3001,   // IDEOGRAPHIC COMMA
    $3002,   // IDEOGRAPHIC FULL STOP
    $3003,   // DITTO MARK
    $3008,   // LEFT ANGLE BRACKET
    $3009,   // RIGHT ANGLE BRACKET
    $300A,   // LEFT DOUBLE ANGLE BRACKET
    $300B,   // RIGHT DOUBLE ANGLE BRACKET
    $300C,   // LEFT CORNER BRACKET
    $300D,   // RIGHT CORNER BRACKET
    $300E,   // LEFT WHITE CORNER BRACKET
    $300F,   // RIGHT WHITE CORNER BRACKET
    $3010,   // LEFT BLACK LENTICULAR BRACKET
    $3011,   // RIGHT BLACK LENTICULAR BRACKET
    $3014,   // LEFT TORTOISE SHELL BRACKET
    $3015,   // RIGHT TORTOISE SHELL BRACKET
    $3016,   // LEFT WHITE LENTICULAR BRACKET
    $3017,   // RIGHT WHITE LENTICULAR BRACKET
    $3018,   // LEFT WHITE TORTOISE SHELL BRACKET
    $3019,   // RIGHT WHITE TORTOISE SHELL BRACKET
    $301A,   // LEFT WHITE SQUARE BRACKET
    $301B,   // RIGHT WHITE SQUARE BRACKET
    $301C,   // WAVE DASH
    $301D,   // REVERSED DOUBLE PRIME QUOTATION MARK
    $301E,   // DOUBLE PRIME QUOTATION MARK
    $301F,   // LOW DOUBLE PRIME QUOTATION MARK
    $3030,   // WAVY DASH
    $30FB,   // KATAKANA MIDDLE DOT
    $FD3E,   // ORNATE LEFT PARENTHESIS
    $FD3F,   // ORNATE RIGHT PARENTHESIS
    $FE30,   // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
    $FE31,   // PRESENTATION FORM FOR VERTICAL EM DASH
    $FE32,   // PRESENTATION FORM FOR VERTICAL EN DASH
    $FE33,   // PRESENTATION FORM FOR VERTICAL LOW LINE
    $FE34,   // PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
    $FE35,   // PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
    $FE36,   // PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
    $FE37,   // PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
    $FE38,   // PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
    $FE39,   // PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET
    $FE3A,   // PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET
    $FE3B,   // PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET
    $FE3C,   // PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET
    $FE3D,   // PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET
    $FE3E,   // PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET
    $FE3F,   // PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET
    $FE40,   // PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET
    $FE41,   // PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
    $FE42,   // PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
    $FE43,   // PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
    $FE44,   // PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
    $FE49,   // DASHED OVERLINE
    $FE4A,   // CENTRELINE OVERLINE
    $FE4B,   // WAVY OVERLINE
    $FE4C,   // DOUBLE WAVY OVERLINE
    $FE4D,   // DASHED LOW LINE
    $FE4E,   // CENTRELINE LOW LINE
    $FE4F,   // WAVY LOW LINE
    $FE50,   // SMALL COMMA
    $FE51,   // SMALL IDEOGRAPHIC COMMA
    $FE52,   // SMALL FULL STOP
    $FE54,   // SMALL SEMICOLON
    $FE55,   // SMALL COLON
    $FE56,   // SMALL QUESTION MARK
    $FE57,   // SMALL EXCLAMATION MARK
    $FE58,   // SMALL EM DASH
    $FE59,   // SMALL LEFT PARENTHESIS
    $FE5A,   // SMALL RIGHT PARENTHESIS
    $FE5B,   // SMALL LEFT CURLY BRACKET
    $FE5C,   // SMALL RIGHT CURLY BRACKET
    $FE5D,   // SMALL LEFT TORTOISE SHELL BRACKET
    $FE5E,   // SMALL RIGHT TORTOISE SHELL BRACKET
    $FE5F,   // SMALL NUMBER SIGN
    $FE60,   // SMALL AMPERSAND
    $FE61,   // SMALL ASTERISK
    $FE63,   // SMALL HYPHEN-MINUS
    $FE68,   // SMALL REVERSE SOLIDUS
    $FE6A,   // SMALL PERCENT SIGN
    $FE6B,   // SMALL COMMERCIAL AT
    $FF01,   // FULLWIDTH EXCLAMATION MARK
    $FF02,   // FULLWIDTH QUOTATION MARK
    $FF03,   // FULLWIDTH NUMBER SIGN
    $FF05,   // FULLWIDTH PERCENT SIGN
    $FF06,   // FULLWIDTH AMPERSAND
    $FF07,   // FULLWIDTH APOSTROPHE
    $FF08,   // FULLWIDTH LEFT PARENTHESIS
    $FF09,   // FULLWIDTH RIGHT PARENTHESIS
    $FF0A,   // FULLWIDTH ASTERISK
    $FF0C,   // FULLWIDTH COMMA
    $FF0D,   // FULLWIDTH HYPHEN-MINUS
    $FF0E,   // FULLWIDTH FULL STOP
    $FF0F,   // FULLWIDTH SOLIDUS
    $FF1A,   // FULLWIDTH COLON
    $FF1B,   // FULLWIDTH SEMICOLON
    $FF1F,   // FULLWIDTH QUESTION MARK
    $FF20,   // FULLWIDTH COMMERCIAL AT
    $FF3B,   // FULLWIDTH LEFT SQUARE BRACKET
    $FF3C,   // FULLWIDTH REVERSE SOLIDUS
    $FF3D,   // FULLWIDTH RIGHT SQUARE BRACKET
    $FF3F,   // FULLWIDTH LOW LINE
    $FF5B,   // FULLWIDTH LEFT CURLY BRACKET
    $FF5D,   // FULLWIDTH RIGHT CURLY BRACKET
    $FF61,   // HALFWIDTH IDEOGRAPHIC FULL STOP
    $FF62,   // HALFWIDTH LEFT CORNER BRACKET
    $FF63,   // HALFWIDTH RIGHT CORNER BRACKET
    $FF64,   // HALFWIDTH IDEOGRAPHIC COMMA
    $FF65 :  // HALFWIDTH KATAKANA MIDDLE DOT
      Result := True;
  else
    Result := False;
  end;
end;

function DecimalDigitBase(const Ch: UCS4Char): UCS4Char;
begin
  Case Ch of
    $0030..$0039   : Result := $0030;  // DIGIT
    $0660..$0669   : Result := $0660;  // ARABIC-INDIC DIGIT
    $06F0..$06F9   : Result := $06F0;  // EXTENDED ARABIC-INDIC DIGIT
    $0966..$096F   : Result := $0966;  // DEVANAGARI DIGIT
    $09E6..$09EF   : Result := $09E6;  // BENGALI DIGIT
    $0A66..$0A6F   : Result := $0A66;  // GURMUKHI DIGIT
    $0AE6..$0AEF   : Result := $0AE6;  // GUJARATI DIGIT
    $0B66..$0B6F   : Result := $0B66;  // ORIYA DIGIT
    $0C66..$0C6F   : Result := $0C66;  // TELUGU DIGIT
    $0CE6..$0CEF   : Result := $0CE6;  // KANNADA DIGIT
    $0D66..$0D6F   : Result := $0D66;  // MALAYALAM DIGIT
    $0E50..$0E59   : Result := $0E50;  // THAI DIGIT
    $0ED0..$0ED9   : Result := $0ED0;  // LAO DIGIT
    $0F20..$0F29   : Result := $0F20;  // TIBETAN DIGIT
    $1040..$1049   : Result := $1040;  // MYANMAR DIGIT
    $17E0..$17E9   : Result := $17E0;  // KHMER DIGIT
    $1810..$1819   : Result := $1810;  // MONGOLIAN DIGIT
    $2070..$2079   : Result := $2070;  // SUPERSCRIPT DIGIT
    $2080..$2089   : Result := $2080;  // SUBSCRIPT DIGIT
    $FF10..$FF19   : Result := $FF10;  // FULLWIDTH DIGIT
    $1D7CE..$1D7D7 : Result := $1D7CE; // MATHEMATICAL BOLD DIGIT
    $1D7D8..$1D7E1 : Result := $1D7D8; // MATHEMATICAL DOUBLE-STRUCK DIGIT
    $1D7E2..$1D7EB : Result := $1D7E2; // MATHEMATICAL SANS-SERIF DIGIT
    $1D7EC..$1D7F5 : Result := $1D7EC; // MATHEMATICAL SANS-SERIF BOLD DIGIT
    $1D7F6..$1D7FF : Result := $1D7F6; // MATHEMATICAL MONOSPACE DIGIT
  else
    Result := 0;
  end;
end;

function DecimalDigitValue(const Ch: UCS4Char): Integer;
var I : LongWord;
begin
  I := DecimalDigitBase(Ch);
  if I = 0 then
    Result := -1
  else
    Result := Ch - I;
end;

function DecimalDigitValue(const Ch: WideChar): Integer;
begin
  Result := DecimalDigitValue(Ord(Ch));
end;

function IsDecimalDigit(const Ch: UCS4Char): Boolean;
begin
  Result := DecimalDigitBase(Ch) <> 0;
end;

function IsDecimalDigit(const Ch: WideChar): Boolean;
begin
  Result := DecimalDigitBase(Ord(Ch)) <> 0;
end;

function FractionCharacterValue(const Ch: WideChar; var A, B : Integer): Boolean;
begin
  Case Ord(Ch) of
    $00BC : begin A := 1; B := 4; end;       // # No       VULGAR FRACTION ONE QUARTER
    $00BD : begin A := 1; B := 2; end;       // # No       VULGAR FRACTION ONE HALF
    $00BE : begin A := 3; B := 4; end;       // # No       VULGAR FRACTION THREE QUARTERS
    $0F2A : begin A := 1; B := 2; end;       // # No       TIBETAN DIGIT HALF ONE
    $2153 : begin A := 1; B := 3; end;       // # No       VULGAR FRACTION ONE THIRD
    $2154 : begin A := 2; B := 3; end;       // # No       VULGAR FRACTION TWO THIRDS
    $2155 : begin A := 1; B := 5; end;       // # No       VULGAR FRACTION ONE FIFTH
    $2156 : begin A := 2; B := 5; end;       // # No       VULGAR FRACTION TWO FIFTHS
    $2157 : begin A := 3; B := 5; end;       // # No       VULGAR FRACTION THREE FIFTHS
    $2158 : begin A := 4; B := 5; end;       // # No       VULGAR FRACTION FOUR FIFTHS
    $2159 : begin A := 1; B := 6; end;       // # No       VULGAR FRACTION ONE SIXTH
    $215A : begin A := 5; B := 6; end;       // # No       VULGAR FRACTION FIVE SIXTHS
    $215B : begin A := 1; B := 8; end;       // # No       VULGAR FRACTION ONE EIGHTH
    $215C : begin A := 3; B := 8; end;       // # No       VULGAR FRACTION THREE EIGHTHS
    $215D : begin A := 5; B := 8; end;       // # No       VULGAR FRACTION FIVE EIGHTHS
    $215E : begin A := 7; B := 8; end;       // # No       VULGAR FRACTION SEVEN EIGHTHS
  else
    begin A := 0; B := 0; end;
  end;
  Result := B <> 0;
end;

function RomanNumeralValue(const Ch: WideChar): Integer;
begin
  Case Ord(Ch) of
    $2160        : Result := 1;     //  Nl       ROMAN NUMERAL ONE
    $2161        : Result := 2;     //  Nl       ROMAN NUMERAL TWO
    $2162        : Result := 3;     //  Nl       ROMAN NUMERAL THREE
    $2163        : Result := 4;     //  Nl       ROMAN NUMERAL FOUR
    $2164        : Result := 5;     //  Nl       ROMAN NUMERAL FIVE
    $2165        : Result := 6;     //  Nl       ROMAN NUMERAL SIX
    $2166        : Result := 7;     //  Nl       ROMAN NUMERAL SEVEN
    $2167        : Result := 8;     //  Nl       ROMAN NUMERAL EIGHT
    $2168        : Result := 9;     //  Nl       ROMAN NUMERAL NINE
    $2169        : Result := 10;    //  Nl       ROMAN NUMERAL TEN
    $216A        : Result := 11;    //  Nl       ROMAN NUMERAL ELEVEN
    $216B        : Result := 12;    //  Nl       ROMAN NUMERAL TWELVE
    $216C        : Result := 50;    //  Nl       ROMAN NUMERAL FIFTY
    $216D        : Result := 100;   //  Nl       ROMAN NUMERAL ONE HUNDRED
    $216E        : Result := 500;   //  Nl       ROMAN NUMERAL FIVE HUNDRED
    $216F        : Result := 1000;  //  Nl       ROMAN NUMERAL ONE THOUSAND
    $2170        : Result := 1;     //  Nl       SMALL ROMAN NUMERAL ONE
    $2171        : Result := 2;     //  Nl       SMALL ROMAN NUMERAL TWO
    $2172        : Result := 3;     //  Nl       SMALL ROMAN NUMERAL THREE
    $2173        : Result := 4;     //  Nl       SMALL ROMAN NUMERAL FOUR
    $2174        : Result := 5;     //  Nl       SMALL ROMAN NUMERAL FIVE
    $2175        : Result := 6;     //  Nl       SMALL ROMAN NUMERAL SIX
    $2176        : Result := 7;     //  Nl       SMALL ROMAN NUMERAL SEVEN
    $2177        : Result := 8;     //  Nl       SMALL ROMAN NUMERAL EIGHT
    $2178        : Result := 9;     //  Nl       SMALL ROMAN NUMERAL NINE
    $2179        : Result := 10;    //  Nl       SMALL ROMAN NUMERAL TEN
    $217A        : Result := 11;    //  Nl       SMALL ROMAN NUMERAL ELEVEN
    $217B        : Result := 12;    //  Nl       SMALL ROMAN NUMERAL TWELVE
    $217C        : Result := 50;    //  Nl       SMALL ROMAN NUMERAL FIFTY
    $217D        : Result := 100;   //  Nl       SMALL ROMAN NUMERAL ONE HUNDRED
    $217E        : Result := 500;   //  Nl       SMALL ROMAN NUMERAL FIVE HUNDRED
    $217F..$2180 : Result := 1000;  //  Nl   [2] SMALL ROMAN NUMERAL ONE THOUSAND..ROMAN NUMERAL ONE THOUSAND C D
    $2181        : Result := 5000;  //  Nl       ROMAN NUMERAL FIVE THOUSAND
    $2182        : Result := 10000; //  Nl       ROMAN NUMERAL TEN THOUSAND
  else
    Result := 0;
  end;
end;

function LatinAlphaCharBase(const Ch: WideChar): UCS4Char;
begin
  Case Ord(Ch) of
    $0041..$005A : Result := $0041;  // LATIN CAPITAL LETTER
    $0061..$007A : Result := $0061;  // LATIN SMALL LETTER
    $FF21..$FF3A : Result := $FF21;  // FULLWIDTH LATIN CAPITAL LETTER
    $FF41..$FF5A : Result := $FF41;  // FULLWIDTH LATIN SMALL LETTER
  else
    Result := 0;
  end;
end;

function HexAlphaDigitBase(const Ch: WideChar): UCS4Char; overload;
begin
  Result := LatinAlphaCharBase(Ch);
  if Result = 0 then
    exit;
  if Ord(Ch) - Result > 5 then
    Result := 0;
end;

function HexAlphaDigitBase(const Ch: UCS4Char): UCS4Char; overload;
begin
  if Ch <= $FFFF then
    Result := HexAlphaDigitBase(WideChar(Ch))
  else
    Case Ch of
      $1D400..$1D405 : Result := $1D400;  // MATHEMATICAL BOLD CAPITAL
      $1D41A..$1D41F : Result := $1D41A;  // MATHEMATICAL BOLD SMALL
      $1D434..$1D439 : Result := $1D434;  // MATHEMATICAL ITALIC CAPITAL
      $1D44E..$1D453 : Result := $1D44E;  // MATHEMATICAL ITALIC SMALL
      $1D468..$1D46D : Result := $1D468;  // MATHEMATICAL BOLD ITALIC CAPITAL
      $1D482..$1D487 : Result := $1D482;  // MATHEMATICAL BOLD ITALIC SMALL
      $1D49C..$1D4A1 : Result := $1D49C;  // MATHEMATICAL SCRIPT CAPITAL
      $1D4B6..$1D4BB : Result := $1D4B6;  // MATHEMATICAL SCRIPT SMALL
      $1D4D0..$1D4D5 : Result := $1D4D0;  // MATHEMATICAL BOLD SCRIPT CAPITAL
      $1D4EA..$1D4EF : Result := $1D4EA;  // MATHEMATICAL BOLD SCRIPT SMALL
      $1D504..$1D509 : Result := $1D504;  // MATHEMATICAL FRAKTUR CAPITAL
      $1D51E..$1D523 : Result := $1D51E;  // MATHEMATICAL FRAKTUR SMALL
      $1D538..$1D53D : Result := $1D538;  // MATHEMATICAL DOUBLE-STRUCK CAPITAL
      $1D552..$1D557 : Result := $1D552;  // MATHEMATICAL DOUBLE-STRUCK SMALL
      $1D56C..$1D571 : Result := $1D56C;  // MATHEMATICAL BOLD FRAKTUR CAPITAL
      $1D586..$1D58B : Result := $1D586;  // MATHEMATICAL BOLD FRAKTUR SMALL
      $1D5A0..$1D5A5 : Result := $1D5A0;  // MATHEMATICAL SANS-SERIF CAPITAL
      $1D5BA..$1D5BF : Result := $1D5BA;  // MATHEMATICAL SANS-SERIF SMALL
      $1D5D4..$1D5D9 : Result := $1D5D4;  // MATHEMATICAL SANS-SERIF BOLD CAPITAL
      $1D5EE..$1D5F3 : Result := $1D5EE;  // MATHEMATICAL SANS-SERIF BOLD SMALL
      $1D608..$1D60D : Result := $1D608;  // MATHEMATICAL SANS-SERIF ITALIC CAPITAL
      $1D622..$1D627 : Result := $1D622;  // MATHEMATICAL SANS-SERIF ITALIC SMALL
      $1D63C..$1D641 : Result := $1D63C;  // MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL
      $1D656..$1D65B : Result := $1D656;  // MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL
      $1D670..$1D675 : Result := $1D670;  // MATHEMATICAL MONOSPACE CAPITAL
      $1D68A..$1D68F : Result := $1D68A;  // MATHEMATICAL MONOSPACE SMALL
      $E0041..$E0046 : Result := $E0041;  // TAG LATIN CAPITAL LETTER
    else
      Result := 0;
    end;
end;

function HexDigitValue(const Ch: UCS4Char): Integer;
var I : UCS4Char;
begin
  Result := DecimalDigitValue(Ch);
  if Result >= 0 then
    exit;
  I := HexAlphaDigitBase(Ch);
  if I > 0 then
    Result := Ch - I + 10;
end;

function HexDigitValue(const Ch: WideChar): Integer;
var I : UCS4Char;
begin
  Result := DecimalDigitValue(Ch);
  if Result >= 0 then
    exit;
  I := HexAlphaDigitBase(Ch);
  if I > 0 then
    Result := Ord(Ch) - I + 10;
end;

function IsHexDigit(const Ch: UCS4Char): Boolean;
begin
  Result := HexDigitValue(Ch) >= 0;
end;

function IsHexDigit(const Ch: WideChar): Boolean;
begin
  Result := HexDigitValue(Ch) >= 0;
end;

{ Unicode letter table                                                         }
type
  TUnicodeLetterAttr = (laUpper, laLower);
  TUnicodeLetterInfo = packed record
    Unicode  : WideChar;
    Attr     : TUnicodeLetterAttr;
    CaseCode : WideChar;
  end;
  PUnicodeLetterInfo = ^TUnicodeLetterInfo;

const
  // Derived from 'Lu' and 'Ll' class
  UnicodeLetterEntries = 1492; // ~7K table
  UnicodeLetterInfo : Array[0..UnicodeLetterEntries - 1] of TUnicodeLetterInfo = (
    (Unicode:#$0041; Attr:laUpper; CaseCode:#$0061),   // LATIN CAPITAL LETTER A
    (Unicode:#$0042; Attr:laUpper; CaseCode:#$0062),   // LATIN CAPITAL LETTER B
    (Unicode:#$0043; Attr:laUpper; CaseCode:#$0063),   // LATIN CAPITAL LETTER C
    (Unicode:#$0044; Attr:laUpper; CaseCode:#$0064),   // LATIN CAPITAL LETTER D
    (Unicode:#$0045; Attr:laUpper; CaseCode:#$0065),   // LATIN CAPITAL LETTER E
    (Unicode:#$0046; Attr:laUpper; CaseCode:#$0066),   // LATIN CAPITAL LETTER F
    (Unicode:#$0047; Attr:laUpper; CaseCode:#$0067),   // LATIN CAPITAL LETTER G
    (Unicode:#$0048; Attr:laUpper; CaseCode:#$0068),   // LATIN CAPITAL LETTER H
    (Unicode:#$0049; Attr:laUpper; CaseCode:#$0069),   // LATIN CAPITAL LETTER I
    (Unicode:#$004A; Attr:laUpper; CaseCode:#$006A),   // LATIN CAPITAL LETTER J
    (Unicode:#$004B; Attr:laUpper; CaseCode:#$006B),   // LATIN CAPITAL LETTER K
    (Unicode:#$004C; Attr:laUpper; CaseCode:#$006C),   // LATIN CAPITAL LETTER L
    (Unicode:#$004D; Attr:laUpper; CaseCode:#$006D),   // LATIN CAPITAL LETTER M
    (Unicode:#$004E; Attr:laUpper; CaseCode:#$006E),   // LATIN CAPITAL LETTER N
    (Unicode:#$004F; Attr:laUpper; CaseCode:#$006F),   // LATIN CAPITAL LETTER O
    (Unicode:#$0050; Attr:laUpper; CaseCode:#$0070),   // LATIN CAPITAL LETTER P
    (Unicode:#$0051; Attr:laUpper; CaseCode:#$0071),   // LATIN CAPITAL LETTER Q
    (Unicode:#$0052; Attr:laUpper; CaseCode:#$0072),   // LATIN CAPITAL LETTER R
    (Unicode:#$0053; Attr:laUpper; CaseCode:#$0073),   // LATIN CAPITAL LETTER S
    (Unicode:#$0054; Attr:laUpper; CaseCode:#$0074),   // LATIN CAPITAL LETTER T
    (Unicode:#$0055; Attr:laUpper; CaseCode:#$0075),   // LATIN CAPITAL LETTER U
    (Unicode:#$0056; Attr:laUpper; CaseCode:#$0076),   // LATIN CAPITAL LETTER V
    (Unicode:#$0057; Attr:laUpper; CaseCode:#$0077),   // LATIN CAPITAL LETTER W
    (Unicode:#$0058; Attr:laUpper; CaseCode:#$0078),   // LATIN CAPITAL LETTER X
    (Unicode:#$0059; Attr:laUpper; CaseCode:#$0079),   // LATIN CAPITAL LETTER Y
    (Unicode:#$005A; Attr:laUpper; CaseCode:#$007A),   // LATIN CAPITAL LETTER Z
    (Unicode:#$0061; Attr:laLower; CaseCode:#$0041),   // LATIN SMALL LETTER A
    (Unicode:#$0062; Attr:laLower; CaseCode:#$0042),   // LATIN SMALL LETTER B
    (Unicode:#$0063; Attr:laLower; CaseCode:#$0043),   // LATIN SMALL LETTER C
    (Unicode:#$0064; Attr:laLower; CaseCode:#$0044),   // LATIN SMALL LETTER D
    (Unicode:#$0065; Attr:laLower; CaseCode:#$0045),   // LATIN SMALL LETTER E
    (Unicode:#$0066; Attr:laLower; CaseCode:#$0046),   // LATIN SMALL LETTER F
    (Unicode:#$0067; Attr:laLower; CaseCode:#$0047),   // LATIN SMALL LETTER G
    (Unicode:#$0068; Attr:laLower; CaseCode:#$0048),   // LATIN SMALL LETTER H
    (Unicode:#$0069; Attr:laLower; CaseCode:#$0049),   // LATIN SMALL LETTER I
    (Unicode:#$006A; Attr:laLower; CaseCode:#$004A),   // LATIN SMALL LETTER J
    (Unicode:#$006B; Attr:laLower; CaseCode:#$004B),   // LATIN SMALL LETTER K
    (Unicode:#$006C; Attr:laLower; CaseCode:#$004C),   // LATIN SMALL LETTER L
    (Unicode:#$006D; Attr:laLower; CaseCode:#$004D),   // LATIN SMALL LETTER M
    (Unicode:#$006E; Attr:laLower; CaseCode:#$004E),   // LATIN SMALL LETTER N
    (Unicode:#$006F; Attr:laLower; CaseCode:#$004F),   // LATIN SMALL LETTER O
    (Unicode:#$0070; Attr:laLower; CaseCode:#$0050),   // LATIN SMALL LETTER P
    (Unicode:#$0071; Attr:laLower; CaseCode:#$0051),   // LATIN SMALL LETTER Q
    (Unicode:#$0072; Attr:laLower; CaseCode:#$0052),   // LATIN SMALL LETTER R
    (Unicode:#$0073; Attr:laLower; CaseCode:#$0053),   // LATIN SMALL LETTER S
    (Unicode:#$0074; Attr:laLower; CaseCode:#$0054),   // LATIN SMALL LETTER T
    (Unicode:#$0075; Attr:laLower; CaseCode:#$0055),   // LATIN SMALL LETTER U
    (Unicode:#$0076; Attr:laLower; CaseCode:#$0056),   // LATIN SMALL LETTER V
    (Unicode:#$0077; Attr:laLower; CaseCode:#$0057),   // LATIN SMALL LETTER W
    (Unicode:#$0078; Attr:laLower; CaseCode:#$0058),   // LATIN SMALL LETTER X
    (Unicode:#$0079; Attr:laLower; CaseCode:#$0059),   // LATIN SMALL LETTER Y
    (Unicode:#$007A; Attr:laLower; CaseCode:#$005A),   // LATIN SMALL LETTER Z
    (Unicode:#$00AA; Attr:laLower; CaseCode:#$FFFF),   // FEMININE ORDINAL INDICATOR
    (Unicode:#$00B5; Attr:laLower; CaseCode:#$039C),   // MICRO SIGN
    (Unicode:#$00BA; Attr:laLower; CaseCode:#$FFFF),   // MASCULINE ORDINAL INDICATOR
    (Unicode:#$00C0; Attr:laUpper; CaseCode:#$00E0),   // LATIN CAPITAL LETTER A WITH GRAVE
    (Unicode:#$00C1; Attr:laUpper; CaseCode:#$00E1),   // LATIN CAPITAL LETTER A WITH ACUTE
    (Unicode:#$00C2; Attr:laUpper; CaseCode:#$00E2),   // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
    (Unicode:#$00C3; Attr:laUpper; CaseCode:#$00E3),   // LATIN CAPITAL LETTER A WITH TILDE
    (Unicode:#$00C4; Attr:laUpper; CaseCode:#$00E4),   // LATIN CAPITAL LETTER A WITH DIAERESIS
    (Unicode:#$00C5; Attr:laUpper; CaseCode:#$00E5),   // LATIN CAPITAL LETTER A WITH RING ABOVE
    (Unicode:#$00C6; Attr:laUpper; CaseCode:#$00E6),   // LATIN CAPITAL LETTER AE
    (Unicode:#$00C7; Attr:laUpper; CaseCode:#$00E7),   // LATIN CAPITAL LETTER C WITH CEDILLA
    (Unicode:#$00C8; Attr:laUpper; CaseCode:#$00E8),   // LATIN CAPITAL LETTER E WITH GRAVE
    (Unicode:#$00C9; Attr:laUpper; CaseCode:#$00E9),   // LATIN CAPITAL LETTER E WITH ACUTE
    (Unicode:#$00CA; Attr:laUpper; CaseCode:#$00EA),   // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
    (Unicode:#$00CB; Attr:laUpper; CaseCode:#$00EB),   // LATIN CAPITAL LETTER E WITH DIAERESIS
    (Unicode:#$00CC; Attr:laUpper; CaseCode:#$00EC),   // LATIN CAPITAL LETTER I WITH GRAVE
    (Unicode:#$00CD; Attr:laUpper; CaseCode:#$00ED),   // LATIN CAPITAL LETTER I WITH ACUTE
    (Unicode:#$00CE; Attr:laUpper; CaseCode:#$00EE),   // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
    (Unicode:#$00CF; Attr:laUpper; CaseCode:#$00EF),   // LATIN CAPITAL LETTER I WITH DIAERESIS
    (Unicode:#$00D0; Attr:laUpper; CaseCode:#$00F0),   // LATIN CAPITAL LETTER ETH
    (Unicode:#$00D1; Attr:laUpper; CaseCode:#$00F1),   // LATIN CAPITAL LETTER N WITH TILDE
    (Unicode:#$00D2; Attr:laUpper; CaseCode:#$00F2),   // LATIN CAPITAL LETTER O WITH GRAVE
    (Unicode:#$00D3; Attr:laUpper; CaseCode:#$00F3),   // LATIN CAPITAL LETTER O WITH ACUTE
    (Unicode:#$00D4; Attr:laUpper; CaseCode:#$00F4),   // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
    (Unicode:#$00D5; Attr:laUpper; CaseCode:#$00F5),   // LATIN CAPITAL LETTER O WITH TILDE
    (Unicode:#$00D6; Attr:laUpper; CaseCode:#$00F6),   // LATIN CAPITAL LETTER O WITH DIAERESIS
    (Unicode:#$00D8; Attr:laUpper; CaseCode:#$00F8),   // LATIN CAPITAL LETTER O WITH STROKE
    (Unicode:#$00D9; Attr:laUpper; CaseCode:#$00F9),   // LATIN CAPITAL LETTER U WITH GRAVE
    (Unicode:#$00DA; Attr:laUpper; CaseCode:#$00FA),   // LATIN CAPITAL LETTER U WITH ACUTE
    (Unicode:#$00DB; Attr:laUpper; CaseCode:#$00FB),   // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
    (Unicode:#$00DC; Attr:laUpper; CaseCode:#$00FC),   // LATIN CAPITAL LETTER U WITH DIAERESIS
    (Unicode:#$00DD; Attr:laUpper; CaseCode:#$00FD),   // LATIN CAPITAL LETTER Y WITH ACUTE
    (Unicode:#$00DE; Attr:laUpper; CaseCode:#$00FE),   // LATIN CAPITAL LETTER THORN
    (Unicode:#$00DF; Attr:laLower; CaseCode:#$FFFF),   // LATIN SMALL LETTER SHARP S
    (Unicode:#$00E0; Attr:laLower; CaseCode:#$00C0),   // LATIN SMALL LETTER A WITH GRAVE
    (Unicode:#$00E1; Attr:laLower; CaseCode:#$00C1),   // LATIN SMALL LETTER A WITH ACUTE
    (Unicode:#$00E2; Attr:laLower; CaseCode:#$00C2),   // LATIN SMALL LETTER A WITH CIRCUMFLEX
    (Unicode:#$00E3; Attr:laLower; CaseCode:#$00C3),   // LATIN SMALL LETTER A WITH TILDE
    (Unicode:#$00E4; Attr:laLower; CaseCode:#$00C4),   // LATIN SMALL LETTER A WITH DIAERESIS
    (Unicode:#$00E5; Attr:laLower; CaseCode:#$00C5),   // LATIN SMALL LETTER A WITH RING ABOVE
    (Unicode:#$00E6; Attr:laLower; CaseCode:#$00C6),   // LATIN SMALL LETTER AE
    (Unicode:#$00E7; Attr:laLower; CaseCode:#$00C7),   // LATIN SMALL LETTER C WITH CEDILLA
    (Unicode:#$00E8; Attr:laLower; CaseCode:#$00C8),   // LATIN SMALL LETTER E WITH GRAVE
    (Unicode:#$00E9; Attr:laLower; CaseCode:#$00C9),   // LATIN SMALL LETTER E WITH ACUTE
    (Unicode:#$00EA; Attr:laLower; CaseCode:#$00CA),   // LATIN SMALL LETTER E WITH CIRCUMFLEX
    (Unicode:#$00EB; Attr:laLower; CaseCode:#$00CB),   // LATIN SMALL LETTER E WITH DIAERESIS
    (Unicode:#$00EC; Attr:laLower; CaseCode:#$00CC),   // LATIN SMALL LETTER I WITH GRAVE
    (Unicode:#$00ED; Attr:laLower; CaseCode:#$00CD),   // LATIN SMALL LETTER I WITH ACUTE
    (Unicode:#$00EE; Attr:laLower; CaseCode:#$00CE),   // LATIN SMALL LETTER I WITH CIRCUMFLEX
    (Unicode:#$00EF; Attr:laLower; CaseCode:#$00CF),   // LATIN SMALL LETTER I WITH DIAERESIS
    (Unicode:#$00F0; Attr:laLower; CaseCode:#$00D0),   // LATIN SMALL LETTER ETH
    (Unicode:#$00F1; Attr:laLower; CaseCode:#$00D1),   // LATIN SMALL LETTER N WITH TILDE
    (Unicode:#$00F2; Attr:laLower; CaseCode:#$00D2),   // LATIN SMALL LETTER O WITH GRAVE
    (Unicode:#$00F3; Attr:laLower; CaseCode:#$00D3),   // LATIN SMALL LETTER O WITH ACUTE
    (Unicode:#$00F4; Attr:laLower; CaseCode:#$00D4),   // LATIN SMALL LETTER O WITH CIRCUMFLEX
    (Unicode:#$00F5; Attr:laLower; CaseCode:#$00D5),   // LATIN SMALL LETTER O WITH TILDE
    (Unicode:#$00F6; Attr:laLower; CaseCode:#$00D6),   // LATIN SMALL LETTER O WITH DIAERESIS
    (Unicode:#$00F8; Attr:laLower; CaseCode:#$00D8),   // LATIN SMALL LETTER O WITH STROKE
    (Unicode:#$00F9; Attr:laLower; CaseCode:#$00D9),   // LATIN SMALL LETTER U WITH GRAVE
    (Unicode:#$00FA; Attr:laLower; CaseCode:#$00DA),   // LATIN SMALL LETTER U WITH ACUTE
    (Unicode:#$00FB; Attr:laLower; CaseCode:#$00DB),   // LATIN SMALL LETTER U WITH CIRCUMFLEX
    (Unicode:#$00FC; Attr:laLower; CaseCode:#$00DC),   // LATIN SMALL LETTER U WITH DIAERESIS
    (Unicode:#$00FD; Attr:laLower; CaseCode:#$00DD),   // LATIN SMALL LETTER Y WITH ACUTE
    (Unicode:#$00FE; Attr:laLower; CaseCode:#$00DE),   // LATIN SMALL LETTER THORN
    (Unicode:#$00FF; Attr:laLower; CaseCode:#$0178),   // LATIN SMALL LETTER Y WITH DIAERESIS
    (Unicode:#$0100; Attr:laUpper; CaseCode:#$0101),   // LATIN CAPITAL LETTER A WITH MACRON
    (Unicode:#$0101; Attr:laLower; CaseCode:#$0100),   // LATIN SMALL LETTER A WITH MACRON
    (Unicode:#$0102; Attr:laUpper; CaseCode:#$0103),   // LATIN CAPITAL LETTER A WITH BREVE
    (Unicode:#$0103; Attr:laLower; CaseCode:#$0102),   // LATIN SMALL LETTER A WITH BREVE
    (Unicode:#$0104; Attr:laUpper; CaseCode:#$0105),   // LATIN CAPITAL LETTER A WITH OGONEK
    (Unicode:#$0105; Attr:laLower; CaseCode:#$0104),   // LATIN SMALL LETTER A WITH OGONEK
    (Unicode:#$0106; Attr:laUpper; CaseCode:#$0107),   // LATIN CAPITAL LETTER C WITH ACUTE
    (Unicode:#$0107; Attr:laLower; CaseCode:#$0106),   // LATIN SMALL LETTER C WITH ACUTE
    (Unicode:#$0108; Attr:laUpper; CaseCode:#$0109),   // LATIN CAPITAL LETTER C WITH CIRCUMFLEX
    (Unicode:#$0109; Attr:laLower; CaseCode:#$0108),   // LATIN SMALL LETTER C WITH CIRCUMFLEX
    (Unicode:#$010A; Attr:laUpper; CaseCode:#$010B),   // LATIN CAPITAL LETTER C WITH DOT ABOVE
    (Unicode:#$010B; Attr:laLower; CaseCode:#$010A),   // LATIN SMALL LETTER C WITH DOT ABOVE
    (Unicode:#$010C; Attr:laUpper; CaseCode:#$010D),   // LATIN CAPITAL LETTER C WITH CARON
    (Unicode:#$010D; Attr:laLower; CaseCode:#$010C),   // LATIN SMALL LETTER C WITH CARON
    (Unicode:#$010E; Attr:laUpper; CaseCode:#$010F),   // LATIN CAPITAL LETTER D WITH CARON
    (Unicode:#$010F; Attr:laLower; CaseCode:#$010E),   // LATIN SMALL LETTER D WITH CARON
    (Unicode:#$0110; Attr:laUpper; CaseCode:#$0111),   // LATIN CAPITAL LETTER D WITH STROKE
    (Unicode:#$0111; Attr:laLower; CaseCode:#$0110),   // LATIN SMALL LETTER D WITH STROKE
    (Unicode:#$0112; Attr:laUpper; CaseCode:#$0113),   // LATIN CAPITAL LETTER E WITH MACRON
    (Unicode:#$0113; Attr:laLower; CaseCode:#$0112),   // LATIN SMALL LETTER E WITH MACRON
    (Unicode:#$0114; Attr:laUpper; CaseCode:#$0115),   // LATIN CAPITAL LETTER E WITH BREVE
    (Unicode:#$0115; Attr:laLower; CaseCode:#$0114),   // LATIN SMALL LETTER E WITH BREVE
    (Unicode:#$0116; Attr:laUpper; CaseCode:#$0117),   // LATIN CAPITAL LETTER E WITH DOT ABOVE
    (Unicode:#$0117; Attr:laLower; CaseCode:#$0116),   // LATIN SMALL LETTER E WITH DOT ABOVE
    (Unicode:#$0118; Attr:laUpper; CaseCode:#$0119),   // LATIN CAPITAL LETTER E WITH OGONEK
    (Unicode:#$0119; Attr:laLower; CaseCode:#$0118),   // LATIN SMALL LETTER E WITH OGONEK
    (Unicode:#$011A; Attr:laUpper; CaseCode:#$011B),   // LATIN CAPITAL LETTER E WITH CARON
    (Unicode:#$011B; Attr:laLower; CaseCode:#$011A),   // LATIN SMALL LETTER E WITH CARON
    (Unicode:#$011C; Attr:laUpper; CaseCode:#$011D),   // LATIN CAPITAL LETTER G WITH CIRCUMFLEX
    (Unicode:#$011D; Attr:laLower; CaseCode:#$011C),   // LATIN SMALL LETTER G WITH CIRCUMFLEX
    (Unicode:#$011E; Attr:laUpper; CaseCode:#$011F),   // LATIN CAPITAL LETTER G WITH BREVE
    (Unicode:#$011F; Attr:laLower; CaseCode:#$011E),   // LATIN SMALL LETTER G WITH BREVE
    (Unicode:#$0120; Attr:laUpper; CaseCode:#$0121),   // LATIN CAPITAL LETTER G WITH DOT ABOVE
    (Unicode:#$0121; Attr:laLower; CaseCode:#$0120),   // LATIN SMALL LETTER G WITH DOT ABOVE
    (Unicode:#$0122; Attr:laUpper; CaseCode:#$0123),   // LATIN CAPITAL LETTER G WITH CEDILLA
    (Unicode:#$0123; Attr:laLower; CaseCode:#$0122),   // LATIN SMALL LETTER G WITH CEDILLA
    (Unicode:#$0124; Attr:laUpper; CaseCode:#$0125),   // LATIN CAPITAL LETTER H WITH CIRCUMFLEX
    (Unicode:#$0125; Attr:laLower; CaseCode:#$0124),   // LATIN SMALL LETTER H WITH CIRCUMFLEX
    (Unicode:#$0126; Attr:laUpper; CaseCode:#$0127),   // LATIN CAPITAL LETTER H WITH STROKE
    (Unicode:#$0127; Attr:laLower; CaseCode:#$0126),   // LATIN SMALL LETTER H WITH STROKE
    (Unicode:#$0128; Attr:laUpper; CaseCode:#$0129),   // LATIN CAPITAL LETTER I WITH TILDE
    (Unicode:#$0129; Attr:laLower; CaseCode:#$0128),   // LATIN SMALL LETTER I WITH TILDE
    (Unicode:#$012A; Attr:laUpper; CaseCode:#$012B),   // LATIN CAPITAL LETTER I WITH MACRON
    (Unicode:#$012B; Attr:laLower; CaseCode:#$012A),   // LATIN SMALL LETTER I WITH MACRON
    (Unicode:#$012C; Attr:laUpper; CaseCode:#$012D),   // LATIN CAPITAL LETTER I WITH BREVE
    (Unicode:#$012D; Attr:laLower; CaseCode:#$012C),   // LATIN SMALL LETTER I WITH BREVE
    (Unicode:#$012E; Attr:laUpper; CaseCode:#$012F),   // LATIN CAPITAL LETTER I WITH OGONEK
    (Unicode:#$012F; Attr:laLower; CaseCode:#$012E),   // LATIN SMALL LETTER I WITH OGONEK
    (Unicode:#$0130; Attr:laUpper; CaseCode:#$0069),   // LATIN CAPITAL LETTER I WITH DOT ABOVE
    (Unicode:#$0131; Attr:laLower; CaseCode:#$0049),   // LATIN SMALL LETTER DOTLESS I
    (Unicode:#$0132; Attr:laUpper; CaseCode:#$0133),   // LATIN CAPITAL LIGATURE IJ
    (Unicode:#$0133; Attr:laLower; CaseCode:#$0132),   // LATIN SMALL LIGATURE IJ
    (Unicode:#$0134; Attr:laUpper; CaseCode:#$0135),   // LATIN CAPITAL LETTER J WITH CIRCUMFLEX
    (Unicode:#$0135; Attr:laLower; CaseCode:#$0134),   // LATIN SMALL LETTER J WITH CIRCUMFLEX
    (Unicode:#$0136; Attr:laUpper; CaseCode:#$0137),   // LATIN CAPITAL LETTER K WITH CEDILLA
    (Unicode:#$0137; Attr:laLower; CaseCode:#$0136),   // LATIN SMALL LETTER K WITH CEDILLA
    (Unicode:#$0138; Attr:laLower; CaseCode:#$FFFF),   // LATIN SMALL LETTER KRA
    (Unicode:#$0139; Attr:laUpper; CaseCode:#$013A),   // LATIN CAPITAL LETTER L WITH ACUTE
    (Unicode:#$013A; Attr:laLower; CaseCode:#$0139),   // LATIN SMALL LETTER L WITH ACUTE
    (Unicode:#$013B; Attr:laUpper; CaseCode:#$013C),   // LATIN CAPITAL LETTER L WITH CEDILLA
    (Unicode:#$013C; Attr:laLower; CaseCode:#$013B),   // LATIN SMALL LETTER L WITH CEDILLA
    (Unicode:#$013D; Attr:laUpper; CaseCode:#$013E),   // LATIN CAPITAL LETTER L WITH CARON
    (Unicode:#$013E; Attr:laLower; CaseCode:#$013D),   // LATIN SMALL LETTER L WITH CARON
    (Unicode:#$013F; Attr:laUpper; CaseCode:#$0140),   // LATIN CAPITAL LETTER L WITH MIDDLE DOT
    (Unicode:#$0140; Attr:laLower; CaseCode:#$013F),   // LATIN SMALL LETTER L WITH MIDDLE DOT
    (Unicode:#$0141; Attr:laUpper; CaseCode:#$0142),   // LATIN CAPITAL LETTER L WITH STROKE
    (Unicode:#$0142; Attr:laLower; CaseCode:#$0141),   // LATIN SMALL LETTER L WITH STROKE
    (Unicode:#$0143; Attr:laUpper; CaseCode:#$0144),   // LATIN CAPITAL LETTER N WITH ACUTE
    (Unicode:#$0144; Attr:laLower; CaseCode:#$0143),   // LATIN SMALL LETTER N WITH ACUTE
    (Unicode:#$0145; Attr:laUpper; CaseCode:#$0146),   // LATIN CAPITAL LETTER N WITH CEDILLA
    (Unicode:#$0146; Attr:laLower; CaseCode:#$0145),   // LATIN SMALL LETTER N WITH CEDILLA
    (Unicode:#$0147; Attr:laUpper; CaseCode:#$0148),   // LATIN CAPITAL LETTER N WITH CARON
    (Unicode:#$0148; Attr:laLower; CaseCode:#$0147),   // LATIN SMALL LETTER N WITH CARON
    (Unicode:#$0149; Attr:laLower; CaseCode:#$FFFF),   // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
    (Unicode:#$014A; Attr:laUpper; CaseCode:#$014B),   // LATIN CAPITAL LETTER ENG
    (Unicode:#$014B; Attr:laLower; CaseCode:#$014A),   // LATIN SMALL LETTER ENG
    (Unicode:#$014C; Attr:laUpper; CaseCode:#$014D),   // LATIN CAPITAL LETTER O WITH MACRON
    (Unicode:#$014D; Attr:laLower; CaseCode:#$014C),   // LATIN SMALL LETTER O WITH MACRON
    (Unicode:#$014E; Attr:laUpper; CaseCode:#$014F),   // LATIN CAPITAL LETTER O WITH BREVE
    (Unicode:#$014F; Attr:laLower; CaseCode:#$014E),   // LATIN SMALL LETTER O WITH BREVE
    (Unicode:#$0150; Attr:laUpper; CaseCode:#$0151),   // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
    (Unicode:#$0151; Attr:laLower; CaseCode:#$0150),   // LATIN SMALL LETTER O WITH DOUBLE ACUTE
    (Unicode:#$0152; Attr:laUpper; CaseCode:#$0153),   // LATIN CAPITAL LIGATURE OE
    (Unicode:#$0153; Attr:laLower; CaseCode:#$0152),   // LATIN SMALL LIGATURE OE
    (Unicode:#$0154; Attr:laUpper; CaseCode:#$0155),   // LATIN CAPITAL LETTER R WITH ACUTE
    (Unicode:#$0155; Attr:laLower; CaseCode:#$0154),   // LATIN SMALL LETTER R WITH ACUTE
    (Unicode:#$0156; Attr:laUpper; CaseCode:#$0157),   // LATIN CAPITAL LETTER R WITH CEDILLA
    (Unicode:#$0157; Attr:laLower; CaseCode:#$0156),   // LATIN SMALL LETTER R WITH CEDILLA
    (Unicode:#$0158; Attr:laUpper; CaseCode:#$0159),   // LATIN CAPITAL LETTER R WITH CARON
    (Unicode:#$0159; Attr:laLower; CaseCode:#$0158),   // LATIN SMALL LETTER R WITH CARON
    (Unicode:#$015A; Attr:laUpper; CaseCode:#$015B),   // LATIN CAPITAL LETTER S WITH ACUTE
    (Unicode:#$015B; Attr:laLower; CaseCode:#$015A),   // LATIN SMALL LETTER S WITH ACUTE
    (Unicode:#$015C; Attr:laUpper; CaseCode:#$015D),   // LATIN CAPITAL LETTER S WITH CIRCUMFLEX
    (Unicode:#$015D; Attr:laLower; CaseCode:#$015C),   // LATIN SMALL LETTER S WITH CIRCUMFLEX
    (Unicode:#$015E; Attr:laUpper; CaseCode:#$015F),   // LATIN CAPITAL LETTER S WITH CEDILLA
    (Unicode:#$015F; Attr:laLower; CaseCode:#$015E),   // LATIN SMALL LETTER S WITH CEDILLA
    (Unicode:#$0160; Attr:laUpper; CaseCode:#$0161),   // LATIN CAPITAL LETTER S WITH CARON
    (Unicode:#$0161; Attr:laLower; CaseCode:#$0160),   // LATIN SMALL LETTER S WITH CARON
    (Unicode:#$0162; Attr:laUpper; CaseCode:#$0163),   // LATIN CAPITAL LETTER T WITH CEDILLA
    (Unicode:#$0163; Attr:laLower; CaseCode:#$0162),   // LATIN SMALL LETTER T WITH CEDILLA
    (Unicode:#$0164; Attr:laUpper; CaseCode:#$0165),   // LATIN CAPITAL LETTER T WITH CARON
    (Unicode:#$0165; Attr:laLower; CaseCode:#$0164),   // LATIN SMALL LETTER T WITH CARON
    (Unicode:#$0166; Attr:laUpper; CaseCode:#$0167),   // LATIN CAPITAL LETTER T WITH STROKE
    (Unicode:#$0167; Attr:laLower; CaseCode:#$0166),   // LATIN SMALL LETTER T WITH STROKE
    (Unicode:#$0168; Attr:laUpper; CaseCode:#$0169),   // LATIN CAPITAL LETTER U WITH TILDE
    (Unicode:#$0169; Attr:laLower; CaseCode:#$0168),   // LATIN SMALL LETTER U WITH TILDE
    (Unicode:#$016A; Attr:laUpper; CaseCode:#$016B),   // LATIN CAPITAL LETTER U WITH MACRON
    (Unicode:#$016B; Attr:laLower; CaseCode:#$016A),   // LATIN SMALL LETTER U WITH MACRON
    (Unicode:#$016C; Attr:laUpper; CaseCode:#$016D),   // LATIN CAPITAL LETTER U WITH BREVE
    (Unicode:#$016D; Attr:laLower; CaseCode:#$016C),   // LATIN SMALL LETTER U WITH BREVE
    (Unicode:#$016E; Attr:laUpper; CaseCode:#$016F),   // LATIN CAPITAL LETTER U WITH RING ABOVE
    (Unicode:#$016F; Attr:laLower; CaseCode:#$016E),   // LATIN SMALL LETTER U WITH RING ABOVE
    (Unicode:#$0170; Attr:laUpper; CaseCode:#$0171),   // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
    (Unicode:#$0171; Attr:laLower; CaseCode:#$0170),   // LATIN SMALL LETTER U WITH DOUBLE ACUTE
    (Unicode:#$0172; Attr:laUpper; CaseCode:#$0173),   // LATIN CAPITAL LETTER U WITH OGONEK
    (Unicode:#$0173; Attr:laLower; CaseCode:#$0172),   // LATIN SMALL LETTER U WITH OGONEK
    (Unicode:#$0174; Attr:laUpper; CaseCode:#$0175),   // LATIN CAPITAL LETTER W WITH CIRCUMFLEX
    (Unicode:#$0175; Attr:laLower; CaseCode:#$0174),   // LATIN SMALL LETTER W WITH CIRCUMFLEX
    (Unicode:#$0176; Attr:laUpper; CaseCode:#$0177),   // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
    (Unicode:#$0177; Attr:laLower; CaseCode:#$0176),   // LATIN SMALL LETTER Y WITH CIRCUMFLEX
    (Unicode:#$0178; Attr:laUpper; CaseCode:#$00FF),   // LATIN CAPITAL LETTER Y WITH DIAERESIS
    (Unicode:#$0179; Attr:laUpper; CaseCode:#$017A),   // LATIN CAPITAL LETTER Z WITH ACUTE
    (Unicode:#$017A; Attr:laLower; CaseCode:#$0179),   // LATIN SMALL LETTER Z WITH ACUTE
    (Unicode:#$017B; Attr:laUpper; CaseCode:#$017C),   // LATIN CAPITAL LETTER Z WITH DOT ABOVE
    (Unicode:#$017C; Attr:laLower; CaseCode:#$017B),   // LATIN SMALL LETTER Z WITH DOT ABOVE
    (Unicode:#$017D; Attr:laUpper; CaseCode:#$017E),   // LATIN CAPITAL LETTER Z WITH CARON
    (Unicode:#$017E; Attr:laLower; CaseCode:#$017D),   // LATIN SMALL LETTER Z WITH CARON
    (Unicode:#$017F; Attr:laLower; CaseCode:#$0053),   // LATIN SMALL LETTER LONG S
    (Unicode:#$0180; Attr:laLower; CaseCode:#$FFFF),   // LATIN SMALL LETTER B WITH STROKE
    (Unicode:#$0181; Attr:laUpper; CaseCode:#$0253),   // LATIN CAPITAL LETTER B WITH HOOK
    (Unicode:#$0182; Attr:laUpper; CaseCode:#$0183),   // LATIN CAPITAL LETTER B WITH TOPBAR
    (Unicode:#$0183; Attr:laLower; CaseCode:#$0182),   // LATIN SMALL LETTER B WITH TOPBAR
    (Unicode:#$0184; Attr:laUpper; CaseCode:#$0185),   // LATIN CAPITAL LETTER TONE SIX
    (Unicode:#$0185; Attr:laLower; CaseCode:#$0184),   // LATIN SMALL LETTER TONE SIX
    (Unicode:#$0186; Attr:laUpper; CaseCode:#$0254),   // LATIN CAPITAL LETTER OPEN O
    (Unicode:#$0187; Attr:laUpper; CaseCode:#$0188),   // LATIN CAPITAL LETTER C WITH HOOK
    (Unicode:#$0188; Attr:laLower; CaseCode:#$0187),   // LATIN SMALL LETTER C WITH HOOK
    (Unicode:#$0189; Attr:laUpper; CaseCode:#$0256),   // LATIN CAPITAL LETTER AFRICAN D
    (Unicode:#$018A; Attr:laUpper; CaseCode:#$0257),   // LATIN CAPITAL LETTER D WITH HOOK
    (Unicode:#$018B; Attr:laUpper; CaseCode:#$018C),   // LATIN CAPITAL LETTER D WITH TOPBAR
    (Unicode:#$018C; Attr:laLower; CaseCode:#$018B),   // LATIN SMALL LETTER D WITH TOPBAR
    (Unicode:#$018D; Attr:laLower; CaseCode:#$FFFF),   // LATIN SMALL LETTER TURNED DELTA
    (Unicode:#$018E; Attr:laUpper; CaseCode:#$01DD),   // LATIN CAPITAL LETTER REVERSED E
    (Unicode:#$018F; Attr:laUpper; CaseCode:#$0259),   // LATIN CAPITAL LETTER SCHWA
    (Unicode:#$0190; Attr:laUpper; CaseCode:#$025B),   // LATIN CAPITAL LETTER OPEN E
    (Unicode:#$0191; Attr:laUpper; CaseCode:#$0192),   // LATIN CAPITAL LETTER F WITH HOOK
    (Unicode:#$0192; Attr:laLower; CaseCode:#$0191),   // LATIN SMALL LETTER F WITH HOOK
    (Unicode:#$0193; Attr:laUpper; CaseCode:#$0260),   // LATIN CAPITAL LETTER G WITH HOOK
    (Unicode:#$0194; Attr:laUpper; CaseCode:#$0263),   // LATIN CAPITAL LETTER GAMMA
    (Unicode:#$0195; Attr:laLower; CaseCode:#$01F6),   // LATIN SMALL LETTER HV
    (Unicode:#$0196; Attr:laUpper; CaseCode:#$0269),   // LATIN CAPITAL LETTER IOTA
    (Unicode:#$0197; Attr:laUpper; CaseCode:#$0268),   // LATIN CAPITAL LETTER I WITH STROKE
    (Unicode:#$0198; Attr:laUpper; CaseCode:#$0199),   // LATIN CAPITAL LETTER K WITH HOOK
    (Unicode:#$0199; Attr:laLower; CaseCode:#$0198),   // LATIN SMALL LETTER K WITH HOOK
    (Unicode:#$019A; Attr:laLower; CaseCode:#$FFFF),   // LATIN SMALL LETTER L WITH BAR
    (Unicode:#