 | 
|
|
|
{ }
{ Unicode character functions v3.04 }
{ }
{ This unit is copyright © 2002-2004 by David J Butler }
{ }
{ This unit is part of Delphi Fundamentals. }
{ Its original file name is cUnicodeChar.pas }
{ The latest version is available from the Fundamentals home page }
{ http://fundementals.sourceforge.net/ }
{ }
{ I invite you to use this unit, free of charge. }
{ I invite you to distibute this unit, but it must be for free. }
{ I also invite you to contribute to its development, }
{ but do not distribute a modified copy of this file. }
{ }
{ A forum is available on SourceForge for general discussion }
{ http://sourceforge.net/forum/forum.php?forum_id=2117 }
{ }
{ }
{ Description: }
{ Unicode character constants. }
{ Functions for checking unicode character properties. }
{ Functions to interpret unicode characters. }
{ Unicode character case functions. }
{ }
{ }
{ Notes: }
{ Most functions in this unit work from tables in source code form. }
{ All tables were generated from the Unicode 3.2 data. }
{ }
{ The source code is deceptively big, for example, the upper-lower case }
{ table is about 128K in the source code, but only 7K when compiled. }
{ }
{ This unit has no dependancies on any other unit. }
{ }
{ Revision history: }
{ 19/04/2002 0.01 Initial version }
{ 21/04/2002 0.02 Added case and decomposition functions }
{ 28/10/2002 3.03 Refactored for Fundamentals 3. }
{ 10/01/2004 3.04 Changes to allow smart-linking by the compiler. }
{ Typically this saves 100-200K on the executable size. }
{ }
{$INCLUDE ..\cDefines.inc}
unit cUnicodeChar;
interface
const
UnitName = 'cUnicodeChar';
UnitVersion = '3.04';
UnitCopyright = 'Copyright (c) 2002-2004 David J Butler';
{ }
{ Unicode character constants }
{ }
const
WideNULL = WideChar(#0);
WideSOH = WideChar(#1);
WideSTX = WideChar(#2);
WideETX = WideChar(#3);
WideEOT = WideChar(#4);
WideENQ = WideChar(#5);
WideACK = WideChar(#6);
WideBEL = WideChar(#7);
WideBS = WideChar(#8);
WideHT = WideChar(#9);
WideLF = WideChar(#10);
WideVT = WideChar(#11);
WideFF = WideChar(#12);
WideCR = WideChar(#13);
WideNAK = WideChar(#21);
WideSYN = WideChar(#22);
WideCAN = WideChar(#24);
WideEOF = WideChar(#26);
WideESC = WideChar(#27);
WideSP = WideChar(#32);
WideCRLF : WideString = #13#10;
WideSingleQuote = WideChar('''');
WideDoubleQuote = WideChar('"');
WideNoBreakSpace = WideChar(#$00A0);
WideLineSeparator = WideChar(#$2028);
WideParagraphSeparator = WideChar(#$2029);
WideBOM_MSB_First = WideChar(#$FFFE);
WideBOM_LSB_First = WideChar(#$FEFF);
WideObjectReplacement = WideChar(#$FFFC);
WideCharReplacement = WideChar(#$FFFD);
WideInvalid = WideChar(#$FFFF);
WideCopyrightSign = WideChar(#$00A9);
WideRegisteredSign = WideChar(#$00AE);
WideHighSurrogateFirst = WideChar(#$D800);
WideHighSurrogateLast = WideChar(#$DB7F);
WideLowSurrogateFirst = WideChar(#$DC00);
WideLowSurrogateLast = WideChar(#$DFFF);
WidePrivateHighSurrogateFirst = WideChar(#$DB80);
WidePrivateHighSurrogateLast = WideChar(#$DBFF);
{ }
{ Unicode character functions }
{ }
{$IFDEF DELPHI5}
type
UCS4Char = LongWord;
{$ENDIF}
type
WideCharMatchFunction = function (const Ch: WideChar): Boolean;
function IsASCIIChar(const Ch: WideChar): Boolean;
function IsWhiteSpace(const Ch: WideChar): Boolean;
function IsControl(const Ch: WideChar): Boolean;
function IsControlOrWhiteSpace(const Ch: WideChar): Boolean;
function IsIgnorable(const Ch: UCS4Char): Boolean;
function IsDash(const Ch: WideChar): Boolean;
function IsHyphen(const Ch: WideChar): Boolean;
function IsFullStop(const Ch: WideChar): Boolean;
function IsComma(const Ch: WideChar): Boolean;
function IsExclamationMark(const Ch: WideChar): Boolean;
function IsQuestionMark(const Ch: WideChar): Boolean;
function IsLeftParenthesis(const Ch: WideChar): Boolean;
function IsLeftBracket(const Ch: WideChar): Boolean;
function GetRightParenthesis(const LeftParenthesis: WideChar): WideChar;
function GetRightBracket(const LeftBracket: WideChar): WideChar;
function IsSingularQuotationMark(const Ch: WideChar): Boolean;
function IsOpeningQuotationMark(const Ch: WideChar): Boolean;
function IsClosingQuotationMark(const Ch: WideChar): Boolean;
function GetClosingQuotationMark(const OpeningQuote: WideChar): WideChar;
function GetOpeningQuotationMark(const ClosingQuote: WideChar): WideChar;
function IsPunctuation(const Ch: WideChar): Boolean;
function IsDecimalDigit(const Ch: UCS4Char): Boolean; overload;
function IsDecimalDigit(const Ch: WideChar): Boolean; overload;
function DecimalDigitValue(const Ch: UCS4Char): Integer; overload;
function DecimalDigitValue(const Ch: WideChar): Integer; overload;
function FractionCharacterValue(const Ch: WideChar; var A, B: Integer): Boolean;
function RomanNumeralValue(const Ch: WideChar): Integer;
function IsHexDigit(const Ch: UCS4Char): Boolean; overload;
function IsHexDigit(const Ch: WideChar): Boolean; overload;
function HexDigitValue(const Ch: UCS4Char): Integer; overload;
function HexDigitValue(const Ch: WideChar): Integer; overload;
function IsUpperCase(const Ch: WideChar): Boolean;
function IsLowerCase(const Ch: WideChar): Boolean;
function IsTitleCase(const Ch: WideChar): Boolean;
function WideUpCase(const Ch: WideChar): WideChar;
function WideLowCase(const Ch: WideChar): WideChar;
function WideUpCaseFolding(const Ch: WideChar): WideString;
function WideLowCaseFolding(const Ch: WideChar): WideString;
function WideTitleCaseFolding(const Ch: WideChar): WideString;
function WideIsEqualNoCase(const A, B: WideChar): Boolean;
function IsLetter(const Ch: WideChar): Boolean;
function IsAlphabetic(const Ch: WideChar): Boolean;
function GetCombiningClass(const Ch: WideChar): Byte;
function GetCharacterDecomposition(const Ch: UCS4Char): WideString; overload;
function GetCharacterDecomposition(const Ch: WideChar): WideString; overload;
implementation
{ }
{ Character functions }
{ }
function IsASCIIChar(const Ch: WideChar): Boolean;
begin
Result := Ord(Ch) <= $7F;
end;
function IsWhiteSpace(const Ch: WideChar): Boolean;
begin
Case Ch of
#$0009..#$000D, // ASCII CONTROL
#$0020, // SPACE
#$0085, // <control>
#$00A0, // NO-BREAK SPACE
#$1680, // OGHAM SPACE MARK
#$2000..#$200A, // EN QUAD..HAIR SPACE
#$2028, // LINE SEPARATOR
#$2029, // PARAGRAPH SEPARATOR
#$202F, // NARROW NO-BREAK SPACE
#$3000 : // IDEOGRAPHIC SPACE
Result := True;
else
Result := False;
end;
end;
function IsControl(const Ch: WideChar): Boolean;
begin
Case Ch of
#$0000..#$001F,
#$007F..#$009F :
Result := True;
else
Result := False;
end;
end;
function IsControlOrWhiteSpace(const Ch: WideChar): Boolean;
begin
Result := IsControl(Ch) or IsWhiteSpace(Ch);
end;
// Derived from 'Cf' + 'Cc' + 'Cs' - White_Space
function IsIgnorable(const Ch: UCS4Char): Boolean;
begin
Case Ch of
$0000..$0008, // # Cc [9] <control>..<control>
$000E..$001F, // # Cc [18] <control>..<control>
$007F..$0084, // # Cc [6] <control>..<control>
$0086..$009F, // # Cc [26] <control>..<control>
$06DD, // # Cf ARABIC END OF AYAH
$070F, // # Cf SYRIAC ABBREVIATION MARK
$180B..$180D, // # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
$180E, // # Cf MONGOLIAN VOWEL SEPARATOR
$200C..$200F, // # Cf [4] ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
$202A..$202E, // # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
$2060..$2063, // # Cf [4] WORD JOINER..INVISIBLE SEPARATOR
$2064..$2069, // # Cn [6]
$206A..$206F, // # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
$D800..$DFFF, // # Cs [2048]
$FE00..$FE0F, // # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
$FEFF, // # Cf ZERO WIDTH NO-BREAK SPACE
$FFF0..$FFF8, // # Cn [9]
$FFF9..$FFFB, // # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
$1D173..$1D17A, // # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
$E0000, // # Cn
$E0001, // # Cf LANGUAGE TAG
$E0002..$E001F, // # Cn [30]
$E0020..$E007F, // # Cf [96] TAG SPACE..CANCEL TAG
$E0080..$E0FFF : // # Cn [3968]
Result := True;
else
Result := False;
end;
end;
function IsDash(const Ch: WideChar): Boolean;
begin
Case Ch of
#$002D, // HYPHEN-MINUS
#$00AD, // SOFT HYPHEN
#$058A, // ARMENIAN HYPHEN
#$1806, // MONGOLIAN TODO SOFT HYPHEN
#$2010..#$2015, // HYPHEN..HORIZONTAL BAR
#$207B, // SUPERSCRIPT MINUS
#$208B, // SUBSCRIPT MINUS
#$2212, // MINUS SIGN
#$301C, // WAVE DASH
#$3030, // WAVY DASH
#$FE31..#$FE32, // PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH
#$FE58, // SMALL EM DASH
#$FE63, // SMALL HYPHEN-MINUS
#$FF0D : // FULLWIDTH HYPHEN-MINUS
Result := True;
else
Result := False;
end;
end;
function IsHyphen(const Ch: WideChar): Boolean;
begin
Case Ch of
#$002D, // HYPHEN-MINUS
#$00AD, // SOFT HYPHEN
#$058A, // ARMENIAN HYPHEN
#$1806, // MONGOLIAN TODO SOFT HYPHEN
#$2010..#$2011, // HYPHEN..NON-BREAKING HYPHEN
#$30FB, // KATAKANA MIDDLE DOT
#$FE63, // SMALL HYPHEN-MINUS
#$FF0D, // FULLWIDTH HYPHEN-MINUS
#$FF65 : // HALFWIDTH KATAKANA MIDDLE DOT
Result := True;
else
Result := False;
end;
end;
function IsFullStop(const Ch: WideChar): Boolean;
begin
Case Ord(Ch) of
$002E, // FULL STOP
$0589, // ARMENIAN FULL STOP
$06D4, // ARABIC FULL STOP
$0701, // SYRIAC SUPRALINEAR FULL STOP
$0702, // SYRIAC SUBLINEAR FULL STOP
$1362, // ETHIOPIC FULL STOP
$166E, // CANADIAN SYLLABICS FULL STOP
$1803, // MONGOLIAN FULL STOP
$1809, // MONGOLIAN MANCHU FULL STOP
$3002, // IDEOGRAPHIC FULL STOP
$FE52, // SMALL FULL STOP
$FF0E, // FULLWIDTH FULL STOP
$FF61 : // HALFWIDTH IDEOGRAPHIC FULL STOP
Result := True;
else
Result := False;
end;
end;
function IsComma(const Ch: WideChar): Boolean;
begin
Case Ord(Ch) of
$002C, // COMMA
$055D, // ARMENIAN COMMA
$060C, // ARABIC COMMA
$0F14, // TIBETAN MARK GTER TSHEG
$1363, // ETHIOPIC COMMA
$1802, // MONGOLIAN COMMA
$1808, // MONGOLIAN MANCHU COMMA
$3001, // IDEOGRAPHIC COMMA
$FE50, // SMALL COMMA
$FE51, // SMALL IDEOGRAPHIC COMMA
$FF0C, // FULLWIDTH COMMA
$FF64 : // HALFWIDTH IDEOGRAPHIC COMMA
Result := True;
else
Result := False;
end;
end;
function IsExclamationMark(const Ch: WideChar): Boolean;
begin
Case Ord(Ch) of
$0021, // EXCLAMATION MARK
$00A1, // INVERTED EXCLAMATION MARK
$055C, // ARMENIAN EXCLAMATION MARK
$203C, // DOUBLE EXCLAMATION MARK
$203D, // INTERROBANG
$2048, // QUESTION EXCLAMATION MARK
$2049, // EXCLAMATION QUESTION MARK
$FE57, // SMALL EXCLAMATION MARK
$FF01 : // FULLWIDTH EXCLAMATION MARK
Result := True;
else
Result := False;
end;
end;
function IsQuestionMark(const Ch: WideChar): Boolean;
begin
Case Ord(Ch) of
$003F, // QUESTION MARK
$00BF, // INVERTED QUESTION MARK
$037E, // GREEK QUESTION MARK
$055E, // ARMENIAN QUESTION MARK
$061F, // ARABIC QUESTION MARK
$1367, // ETHIOPIC QUESTION MARK
$2049, // EXCLAMATION QUESTION MARK
$FE56, // SMALL QUESTION MARK
$FF1F : // FULLWIDTH QUESTION MARK
Result := True;
else
Result := False;
end;
end;
function GetRightParenthesis(const LeftParenthesis: WideChar): WideChar;
begin
Case Ord(LeftParenthesis) of
$0028 : Result := #$0029; // PARENTHESIS
$207D : Result := #$207E; // SUPERSCRIPT PARENTHESIS
$208D : Result := #$208E; // SUBSCRIPT PARENTHESIS
$FD3E : Result := #$FD3F; // ORNATE PARENTHESIS
$FE35 : Result := #$FE36; // PRESENTATION FORM FOR VERTICAL PARENTHESIS
$FE59 : Result := #$FE5A; // SMALL PARENTHESIS
$FF08 : Result := #$FF09; // FULLWIDTH PARENTHESIS
else
Result := #$0000;
end;
end;
function IsLeftParenthesis(const Ch: WideChar): Boolean;
begin
Result := GetRightParenthesis(Ch) <> #$0000;
end;
function GetRightBracket(const LeftBracket: WideChar): WideChar;
begin
Case Ord(LeftBracket) of
$005B : Result := #$005D; // SQUARE BRACKET
$007B : Result := #$007D; // CURLY BRACKET
$2045 : Result := #$2046; // SQUARE BRACKET WITH QUILL
$2329 : Result := #$232A; // POINTING ANGLE BRACKET
$3008 : Result := #$3009; // ANGLE BRACKET
$300A : Result := #$300B; // DOUBLE ANGLE BRACKET
$300C : Result := #$300D; // CORNER BRACKET
$300E : Result := #$300F; // WHITE CORNER BRACKET
$3010 : Result := #$3011; // BLACK LENTICULAR BRACKET
$3014 : Result := #$3015; // TORTOISE SHELL BRACKET
$3016 : Result := #$3017; // WHITE LENTICULAR BRACKET
$3018 : Result := #$3019; // WHITE TORTOISE SHELL BRACKET
$301A : Result := #$301B; // WHITE SQUARE BRACKET
$FE37 : Result := #$FE38; // PRESENTATION FORM FOR VERTICAL CURLY BRACKET
$FE39 : Result := #$FE3A; // PRESENTATION FORM FOR VERTICAL TORTOISE SHELL BRACKET
$FE3B : Result := #$FE3C; // PRESENTATION FORM FOR VERTICAL BLACK LENTICULAR BRACKET
$FE3D : Result := #$FE3E; // PRESENTATION FORM FOR VERTICAL DOUBLE ANGLE BRACKET
$FE3F : Result := #$FE40; // PRESENTATION FORM FOR VERTICAL ANGLE BRACKET
$FE41 : Result := #$FE42; // PRESENTATION FORM FOR VERTICAL CORNER BRACKET
$FE43 : Result := #$FE44; // PRESENTATION FORM FOR VERTICAL WHITE CORNER BRACKET
$FE5B : Result := #$FE5C; // SMALL CURLY BRACKET
$FE5D : Result := #$FE5E; // SMALL TORTOISE SHELL BRACKET
$FF3B : Result := #$FF3D; // FULLWIDTH SQUARE BRACKET
$FF5B : Result := #$FF5D; // FULLWIDTH CURLY BRACKET
$FF62 : Result := #$FF63; // HALFWIDTH CORNER BRACKET
else
Result := #$0000;
end;
end;
function IsLeftBracket(const Ch: WideChar): Boolean;
begin
Result := GetRightBracket(Ch) <> #$0000;
end;
function IsSingularQuotationMark(const Ch: WideChar): Boolean;
begin
Case Ord(Ch) of
$0022, // QUOTATION MARK
$0027, // APOSTROPHE
$FF02, // FULLWIDTH QUOTATION MARK
$FF07 : // FULLWIDTH APOSTROPHE
Result := True;
else
Result := False;
end;
end;
function GetClosingQuotationMark(const OpeningQuote: WideChar): WideChar;
begin
Case Ord(OpeningQuote) of
$00AB : Result := #$00BB; // LEFT/RIGHT -POINTING DOUBLE ANGLE QUOTATION MARK
$2018 : Result := #$2019; // LEFT/RIGHT SINGLE QUOTATION MARK
$201A : Result := #$201B; // SINGLE LOW-9 QUOTATION MARK / SINGLE HIGH-REVERSED-9 QUOTATION MARK
$201C : Result := #$201D; // LEFT/RIGHT DOUBLE QUOTATION MARK
$201E : Result := #$201F; // DOUBLE LOW-9 QUOTATION MARK / DOUBLE HIGH-REVERSED-9 QUOTATION MARK
$2039 : Result := #$203A; // SINGLE LEFT/RIGHT -POINTING ANGLE QUOTATION MARK
$301D : Result := #$301E; // REVERSED DOUBLE PRIME QUOTATION MARK / DOUBLE PRIME QUOTATION MARK (also $301F)
else
Result := #$0000;
end;
end;
function IsOpeningQuotationMark(const Ch: WideChar): Boolean;
begin
Result := GetClosingQuotationMark(Ch) <> #$0000;
end;
function GetOpeningQuotationMark(const ClosingQuote: WideChar): WideChar;
begin
Case Ord(ClosingQuote) of
$00BB : Result := #$00AB; // LEFT/RIGHT -POINTING DOUBLE ANGLE QUOTATION MARK
$2019 : Result := #$2018; // LEFT/RIGHT SINGLE QUOTATION MARK
$201B : Result := #$201A; // SINGLE LOW-9 QUOTATION MARK / SINGLE HIGH-REVERSED-9 QUOTATION MARK
$201D : Result := #$201C; // LEFT/RIGHT DOUBLE QUOTATION MARK
$201F : Result := #$201E; // DOUBLE LOW-9 QUOTATION MARK / DOUBLE HIGH-REVERSED-9 QUOTATION MARK
$203A : Result := #$2039; // SINGLE LEFT/RIGHT -POINTING ANGLE QUOTATION MARK
$301E : Result := #$301D; // REVERSED DOUBLE PRIME QUOTATION MARK / DOUBLE PRIME QUOTATION MARK
$301F : Result := #$301D; // REVERSED DOUBLE PRIME QUOTATION MARK / LOW DOUBLE PRIME QUOTATION MARK
else
Result := #$0000;
end;
end;
function IsClosingQuotationMark(const Ch: WideChar): Boolean;
begin
Result := GetOpeningQuotationMark(Ch) <> #$0000;
end;
function IsPunctuation(const Ch: WideChar): Boolean;
begin
Case Ord(Ch) of
$0021, // EXCLAMATION MARK
$0022, // QUOTATION MARK
$0023, // NUMBER SIGN
$0025, // PERCENT SIGN
$0026, // AMPERSAND
$0027, // APOSTROPHE
$0028, // LEFT PARENTHESIS
$0029, // RIGHT PARENTHESIS
$002A, // ASTERISK
$002C, // COMMA
$002D, // HYPHEN-MINUS
$002E, // FULL STOP
$002F, // SOLIDUS
$003A, // COLON
$003B, // SEMICOLON
$003F, // QUESTION MARK
$0040, // COMMERCIAL AT
$005B, // LEFT SQUARE BRACKET
$005C, // REVERSE SOLIDUS
$005D, // RIGHT SQUARE BRACKET
$005F, // LOW LINE
$007B, // LEFT CURLY BRACKET
$007D, // RIGHT CURLY BRACKET
$00A1, // INVERTED EXCLAMATION MARK
$00AB, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
$00AD, // SOFT HYPHEN
$00B7, // MIDDLE DOT
$00BB, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
$00BF, // INVERTED QUESTION MARK
$037E, // GREEK QUESTION MARK
$0387, // GREEK ANO TELEIA
$055A, // ARMENIAN APOSTROPHE
$055B, // ARMENIAN EMPHASIS MARK
$055C, // ARMENIAN EXCLAMATION MARK
$055D, // ARMENIAN COMMA
$055E, // ARMENIAN QUESTION MARK
$055F, // ARMENIAN ABBREVIATION MARK
$0589, // ARMENIAN FULL STOP
$058A, // ARMENIAN HYPHEN
$05BE, // HEBREW PUNCTUATION MAQAF
$05C0, // HEBREW PUNCTUATION PASEQ
$05C3, // HEBREW PUNCTUATION SOF PASUQ
$05F3, // HEBREW PUNCTUATION GERESH
$05F4, // HEBREW PUNCTUATION GERSHAYIM
$060C, // ARABIC COMMA
$061B, // ARABIC SEMICOLON
$061F, // ARABIC QUESTION MARK
$066A, // ARABIC PERCENT SIGN
$066B, // ARABIC DECIMAL SEPARATOR
$066C, // ARABIC THOUSANDS SEPARATOR
$066D, // ARABIC FIVE POINTED STAR
$06D4, // ARABIC FULL STOP
$0700, // SYRIAC END OF PARAGRAPH
$0701, // SYRIAC SUPRALINEAR FULL STOP
$0702, // SYRIAC SUBLINEAR FULL STOP
$0703, // SYRIAC SUPRALINEAR COLON
$0704, // SYRIAC SUBLINEAR COLON
$0705, // SYRIAC HORIZONTAL COLON
$0706, // SYRIAC COLON SKEWED LEFT
$0707, // SYRIAC COLON SKEWED RIGHT
$0708, // SYRIAC SUPRALINEAR COLON SKEWED LEFT
$0709, // SYRIAC SUBLINEAR COLON SKEWED RIGHT
$070A, // SYRIAC CONTRACTION
$070B, // SYRIAC HARKLEAN OBELUS
$070C, // SYRIAC HARKLEAN METOBELUS
$070D, // SYRIAC HARKLEAN ASTERISCUS
$0964, // DEVANAGARI DANDA
$0965, // DEVANAGARI DOUBLE DANDA
$0970, // DEVANAGARI ABBREVIATION SIGN
$0DF4, // SINHALA PUNCTUATION KUNDDALIYA
$0E4F, // THAI CHARACTER FONGMAN
$0E5A, // THAI CHARACTER ANGKHANKHU
$0E5B, // THAI CHARACTER KHOMUT
$0F04, // TIBETAN MARK INITIAL YIG MGO MDUN MA
$0F05, // TIBETAN MARK CLOSING YIG MGO SGAB MA
$0F06, // TIBETAN MARK CARET YIG MGO PHUR SHAD MA
$0F07, // TIBETAN MARK YIG MGO TSHEG SHAD MA
$0F08, // TIBETAN MARK SBRUL SHAD
$0F09, // TIBETAN MARK BSKUR YIG MGO
$0F0A, // TIBETAN MARK BKA- SHOG YIG MGO
$0F0B, // TIBETAN MARK INTERSYLLABIC TSHEG
$0F0C, // TIBETAN MARK DELIMITER TSHEG BSTAR
$0F0D, // TIBETAN MARK SHAD
$0F0E, // TIBETAN MARK NYIS SHAD
$0F0F, // TIBETAN MARK TSHEG SHAD
$0F10, // TIBETAN MARK NYIS TSHEG SHAD
$0F11, // TIBETAN MARK RIN CHEN SPUNGS SHAD
$0F12, // TIBETAN MARK RGYA GRAM SHAD
$0F3A, // TIBETAN MARK GUG RTAGS GYON
$0F3B, // TIBETAN MARK GUG RTAGS GYAS
$0F3C, // TIBETAN MARK ANG KHANG GYON
$0F3D, // TIBETAN MARK ANG KHANG GYAS
$0F85, // TIBETAN MARK PALUTA
$104A, // MYANMAR SIGN LITTLE SECTION
$104B, // MYANMAR SIGN SECTION
$104C, // MYANMAR SYMBOL LOCATIVE
$104D, // MYANMAR SYMBOL COMPLETED
$104E, // MYANMAR SYMBOL AFOREMENTIONED
$104F, // MYANMAR SYMBOL GENITIVE
$10FB, // GEORGIAN PARAGRAPH SEPARATOR
$1361, // ETHIOPIC WORDSPACE
$1362, // ETHIOPIC FULL STOP
$1363, // ETHIOPIC COMMA
$1364, // ETHIOPIC SEMICOLON
$1365, // ETHIOPIC COLON
$1366, // ETHIOPIC PREFACE COLON
$1367, // ETHIOPIC QUESTION MARK
$1368, // ETHIOPIC PARAGRAPH SEPARATOR
$166D, // CANADIAN SYLLABICS CHI SIGN
$166E, // CANADIAN SYLLABICS FULL STOP
$169B, // OGHAM FEATHER MARK
$169C, // OGHAM REVERSED FEATHER MARK
$16EB, // RUNIC SINGLE PUNCTUATION
$16EC, // RUNIC MULTIPLE PUNCTUATION
$16ED, // RUNIC CROSS PUNCTUATION
$17D4, // KHMER SIGN KHAN
$17D5, // KHMER SIGN BARIYOOSAN
$17D6, // KHMER SIGN CAMNUC PII KUUH
$17D7, // KHMER SIGN LEK TOO
$17D8, // KHMER SIGN BEYYAL
$17D9, // KHMER SIGN PHNAEK MUAN
$17DA, // KHMER SIGN KOOMUUT
$17DC, // KHMER SIGN AVAKRAHASANYA
$1800, // MONGOLIAN BIRGA
$1801, // MONGOLIAN ELLIPSIS
$1802, // MONGOLIAN COMMA
$1803, // MONGOLIAN FULL STOP
$1804, // MONGOLIAN COLON
$1805, // MONGOLIAN FOUR DOTS
$1806, // MONGOLIAN TODO SOFT HYPHEN
$1807, // MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER
$1808, // MONGOLIAN MANCHU COMMA
$1809, // MONGOLIAN MANCHU FULL STOP
$180A, // MONGOLIAN NIRUGU
$2010, // HYPHEN
$2011, // NON-BREAKING HYPHEN
$2012, // FIGURE DASH
$2013, // EN DASH
$2014, // EM DASH
$2015, // HORIZONTAL BAR
$2016, // DOUBLE VERTICAL LINE
$2017, // DOUBLE LOW LINE
$2018, // LEFT SINGLE QUOTATION MARK
$2019, // RIGHT SINGLE QUOTATION MARK
$201A, // SINGLE LOW-9 QUOTATION MARK
$201B, // SINGLE HIGH-REVERSED-9 QUOTATION MARK
$201C, // LEFT DOUBLE QUOTATION MARK
$201D, // RIGHT DOUBLE QUOTATION MARK
$201E, // DOUBLE LOW-9 QUOTATION MARK
$201F, // DOUBLE HIGH-REVERSED-9 QUOTATION MARK
$2020, // DAGGER
$2021, // DOUBLE DAGGER
$2022, // BULLET
$2023, // TRIANGULAR BULLET
$2024, // ONE DOT LEADER
$2025, // TWO DOT LEADER
$2026, // HORIZONTAL ELLIPSIS
$2027, // HYPHENATION POINT
$2030, // PER MILLE SIGN
$2031, // PER TEN THOUSAND SIGN
$2032, // PRIME
$2033, // DOUBLE PRIME
$2034, // TRIPLE PRIME
$2035, // REVERSED PRIME
$2036, // REVERSED DOUBLE PRIME
$2037, // REVERSED TRIPLE PRIME
$2038, // CARET
$2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
$203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
$203B, // REFERENCE MARK
$203C, // DOUBLE EXCLAMATION MARK
$203D, // INTERROBANG
$203E, // OVERLINE
$203F, // UNDERTIE
$2040, // CHARACTER TIE
$2041, // CARET INSERTION POINT
$2042, // ASTERISM
$2043, // HYPHEN BULLET
$2045, // LEFT SQUARE BRACKET WITH QUILL
$2046, // RIGHT SQUARE BRACKET WITH QUILL
$2048, // QUESTION EXCLAMATION MARK
$2049, // EXCLAMATION QUESTION MARK
$204A, // TIRONIAN SIGN ET
$204B, // REVERSED PILCROW SIGN
$204C, // BLACK LEFTWARDS BULLET
$204D, // BLACK RIGHTWARDS BULLET
$207D, // SUPERSCRIPT LEFT PARENTHESIS
$207E, // SUPERSCRIPT RIGHT PARENTHESIS
$208D, // SUBSCRIPT LEFT PARENTHESIS
$208E, // SUBSCRIPT RIGHT PARENTHESIS
$2329, // LEFT-POINTING ANGLE BRACKET
$232A, // RIGHT-POINTING ANGLE BRACKET
$3001, // IDEOGRAPHIC COMMA
$3002, // IDEOGRAPHIC FULL STOP
$3003, // DITTO MARK
$3008, // LEFT ANGLE BRACKET
$3009, // RIGHT ANGLE BRACKET
$300A, // LEFT DOUBLE ANGLE BRACKET
$300B, // RIGHT DOUBLE ANGLE BRACKET
$300C, // LEFT CORNER BRACKET
$300D, // RIGHT CORNER BRACKET
$300E, // LEFT WHITE CORNER BRACKET
$300F, // RIGHT WHITE CORNER BRACKET
$3010, // LEFT BLACK LENTICULAR BRACKET
$3011, // RIGHT BLACK LENTICULAR BRACKET
$3014, // LEFT TORTOISE SHELL BRACKET
$3015, // RIGHT TORTOISE SHELL BRACKET
$3016, // LEFT WHITE LENTICULAR BRACKET
$3017, // RIGHT WHITE LENTICULAR BRACKET
$3018, // LEFT WHITE TORTOISE SHELL BRACKET
$3019, // RIGHT WHITE TORTOISE SHELL BRACKET
$301A, // LEFT WHITE SQUARE BRACKET
$301B, // RIGHT WHITE SQUARE BRACKET
$301C, // WAVE DASH
$301D, // REVERSED DOUBLE PRIME QUOTATION MARK
$301E, // DOUBLE PRIME QUOTATION MARK
$301F, // LOW DOUBLE PRIME QUOTATION MARK
$3030, // WAVY DASH
$30FB, // KATAKANA MIDDLE DOT
$FD3E, // ORNATE LEFT PARENTHESIS
$FD3F, // ORNATE RIGHT PARENTHESIS
$FE30, // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
$FE31, // PRESENTATION FORM FOR VERTICAL EM DASH
$FE32, // PRESENTATION FORM FOR VERTICAL EN DASH
$FE33, // PRESENTATION FORM FOR VERTICAL LOW LINE
$FE34, // PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
$FE35, // PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
$FE36, // PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
$FE37, // PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
$FE38, // PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
$FE39, // PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET
$FE3A, // PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET
$FE3B, // PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET
$FE3C, // PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET
$FE3D, // PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET
$FE3E, // PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET
$FE3F, // PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET
$FE40, // PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET
$FE41, // PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
$FE42, // PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
$FE43, // PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
$FE44, // PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
$FE49, // DASHED OVERLINE
$FE4A, // CENTRELINE OVERLINE
$FE4B, // WAVY OVERLINE
$FE4C, // DOUBLE WAVY OVERLINE
$FE4D, // DASHED LOW LINE
$FE4E, // CENTRELINE LOW LINE
$FE4F, // WAVY LOW LINE
$FE50, // SMALL COMMA
$FE51, // SMALL IDEOGRAPHIC COMMA
$FE52, // SMALL FULL STOP
$FE54, // SMALL SEMICOLON
$FE55, // SMALL COLON
$FE56, // SMALL QUESTION MARK
$FE57, // SMALL EXCLAMATION MARK
$FE58, // SMALL EM DASH
$FE59, // SMALL LEFT PARENTHESIS
$FE5A, // SMALL RIGHT PARENTHESIS
$FE5B, // SMALL LEFT CURLY BRACKET
$FE5C, // SMALL RIGHT CURLY BRACKET
$FE5D, // SMALL LEFT TORTOISE SHELL BRACKET
$FE5E, // SMALL RIGHT TORTOISE SHELL BRACKET
$FE5F, // SMALL NUMBER SIGN
$FE60, // SMALL AMPERSAND
$FE61, // SMALL ASTERISK
$FE63, // SMALL HYPHEN-MINUS
$FE68, // SMALL REVERSE SOLIDUS
$FE6A, // SMALL PERCENT SIGN
$FE6B, // SMALL COMMERCIAL AT
$FF01, // FULLWIDTH EXCLAMATION MARK
$FF02, // FULLWIDTH QUOTATION MARK
$FF03, // FULLWIDTH NUMBER SIGN
$FF05, // FULLWIDTH PERCENT SIGN
$FF06, // FULLWIDTH AMPERSAND
$FF07, // FULLWIDTH APOSTROPHE
$FF08, // FULLWIDTH LEFT PARENTHESIS
$FF09, // FULLWIDTH RIGHT PARENTHESIS
$FF0A, // FULLWIDTH ASTERISK
$FF0C, // FULLWIDTH COMMA
$FF0D, // FULLWIDTH HYPHEN-MINUS
$FF0E, // FULLWIDTH FULL STOP
$FF0F, // FULLWIDTH SOLIDUS
$FF1A, // FULLWIDTH COLON
$FF1B, // FULLWIDTH SEMICOLON
$FF1F, // FULLWIDTH QUESTION MARK
$FF20, // FULLWIDTH COMMERCIAL AT
$FF3B, // FULLWIDTH LEFT SQUARE BRACKET
$FF3C, // FULLWIDTH REVERSE SOLIDUS
$FF3D, // FULLWIDTH RIGHT SQUARE BRACKET
$FF3F, // FULLWIDTH LOW LINE
$FF5B, // FULLWIDTH LEFT CURLY BRACKET
$FF5D, // FULLWIDTH RIGHT CURLY BRACKET
$FF61, // HALFWIDTH IDEOGRAPHIC FULL STOP
$FF62, // HALFWIDTH LEFT CORNER BRACKET
$FF63, // HALFWIDTH RIGHT CORNER BRACKET
$FF64, // HALFWIDTH IDEOGRAPHIC COMMA
$FF65 : // HALFWIDTH KATAKANA MIDDLE DOT
Result := True;
else
Result := False;
end;
end;
function DecimalDigitBase(const Ch: UCS4Char): UCS4Char;
begin
Case Ch of
$0030..$0039 : Result := $0030; // DIGIT
$0660..$0669 : Result := $0660; // ARABIC-INDIC DIGIT
$06F0..$06F9 : Result := $06F0; // EXTENDED ARABIC-INDIC DIGIT
$0966..$096F : Result := $0966; // DEVANAGARI DIGIT
$09E6..$09EF : Result := $09E6; // BENGALI DIGIT
$0A66..$0A6F : Result := $0A66; // GURMUKHI DIGIT
$0AE6..$0AEF : Result := $0AE6; // GUJARATI DIGIT
$0B66..$0B6F : Result := $0B66; // ORIYA DIGIT
$0C66..$0C6F : Result := $0C66; // TELUGU DIGIT
$0CE6..$0CEF : Result := $0CE6; // KANNADA DIGIT
$0D66..$0D6F : Result := $0D66; // MALAYALAM DIGIT
$0E50..$0E59 : Result := $0E50; // THAI DIGIT
$0ED0..$0ED9 : Result := $0ED0; // LAO DIGIT
$0F20..$0F29 : Result := $0F20; // TIBETAN DIGIT
$1040..$1049 : Result := $1040; // MYANMAR DIGIT
$17E0..$17E9 : Result := $17E0; // KHMER DIGIT
$1810..$1819 : Result := $1810; // MONGOLIAN DIGIT
$2070..$2079 : Result := $2070; // SUPERSCRIPT DIGIT
$2080..$2089 : Result := $2080; // SUBSCRIPT DIGIT
$FF10..$FF19 : Result := $FF10; // FULLWIDTH DIGIT
$1D7CE..$1D7D7 : Result := $1D7CE; // MATHEMATICAL BOLD DIGIT
$1D7D8..$1D7E1 : Result := $1D7D8; // MATHEMATICAL DOUBLE-STRUCK DIGIT
$1D7E2..$1D7EB : Result := $1D7E2; // MATHEMATICAL SANS-SERIF DIGIT
$1D7EC..$1D7F5 : Result := $1D7EC; // MATHEMATICAL SANS-SERIF BOLD DIGIT
$1D7F6..$1D7FF : Result := $1D7F6; // MATHEMATICAL MONOSPACE DIGIT
else
Result := 0;
end;
end;
function DecimalDigitValue(const Ch: UCS4Char): Integer;
var I : LongWord;
begin
I := DecimalDigitBase(Ch);
if I = 0 then
Result := -1
else
Result := Ch - I;
end;
function DecimalDigitValue(const Ch: WideChar): Integer;
begin
Result := DecimalDigitValue(Ord(Ch));
end;
function IsDecimalDigit(const Ch: UCS4Char): Boolean;
begin
Result := DecimalDigitBase(Ch) <> 0;
end;
function IsDecimalDigit(const Ch: WideChar): Boolean;
begin
Result := DecimalDigitBase(Ord(Ch)) <> 0;
end;
function FractionCharacterValue(const Ch: WideChar; var A, B : Integer): Boolean;
begin
Case Ord(Ch) of
$00BC : begin A := 1; B := 4; end; // # No VULGAR FRACTION ONE QUARTER
$00BD : begin A := 1; B := 2; end; // # No VULGAR FRACTION ONE HALF
$00BE : begin A := 3; B := 4; end; // # No VULGAR FRACTION THREE QUARTERS
$0F2A : begin A := 1; B := 2; end; // # No TIBETAN DIGIT HALF ONE
$2153 : begin A := 1; B := 3; end; // # No VULGAR FRACTION ONE THIRD
$2154 : begin A := 2; B := 3; end; // # No VULGAR FRACTION TWO THIRDS
$2155 : begin A := 1; B := 5; end; // # No VULGAR FRACTION ONE FIFTH
$2156 : begin A := 2; B := 5; end; // # No VULGAR FRACTION TWO FIFTHS
$2157 : begin A := 3; B := 5; end; // # No VULGAR FRACTION THREE FIFTHS
$2158 : begin A := 4; B := 5; end; // # No VULGAR FRACTION FOUR FIFTHS
$2159 : begin A := 1; B := 6; end; // # No VULGAR FRACTION ONE SIXTH
$215A : begin A := 5; B := 6; end; // # No VULGAR FRACTION FIVE SIXTHS
$215B : begin A := 1; B := 8; end; // # No VULGAR FRACTION ONE EIGHTH
$215C : begin A := 3; B := 8; end; // # No VULGAR FRACTION THREE EIGHTHS
$215D : begin A := 5; B := 8; end; // # No VULGAR FRACTION FIVE EIGHTHS
$215E : begin A := 7; B := 8; end; // # No VULGAR FRACTION SEVEN EIGHTHS
else
begin A := 0; B := 0; end;
end;
Result := B <> 0;
end;
function RomanNumeralValue(const Ch: WideChar): Integer;
begin
Case Ord(Ch) of
$2160 : Result := 1; // Nl ROMAN NUMERAL ONE
$2161 : Result := 2; // Nl ROMAN NUMERAL TWO
$2162 : Result := 3; // Nl ROMAN NUMERAL THREE
$2163 : Result := 4; // Nl ROMAN NUMERAL FOUR
$2164 : Result := 5; // Nl ROMAN NUMERAL FIVE
$2165 : Result := 6; // Nl ROMAN NUMERAL SIX
$2166 : Result := 7; // Nl ROMAN NUMERAL SEVEN
$2167 : Result := 8; // Nl ROMAN NUMERAL EIGHT
$2168 : Result := 9; // Nl ROMAN NUMERAL NINE
$2169 : Result := 10; // Nl ROMAN NUMERAL TEN
$216A : Result := 11; // Nl ROMAN NUMERAL ELEVEN
$216B : Result := 12; // Nl ROMAN NUMERAL TWELVE
$216C : Result := 50; // Nl ROMAN NUMERAL FIFTY
$216D : Result := 100; // Nl ROMAN NUMERAL ONE HUNDRED
$216E : Result := 500; // Nl ROMAN NUMERAL FIVE HUNDRED
$216F : Result := 1000; // Nl ROMAN NUMERAL ONE THOUSAND
$2170 : Result := 1; // Nl SMALL ROMAN NUMERAL ONE
$2171 : Result := 2; // Nl SMALL ROMAN NUMERAL TWO
$2172 : Result := 3; // Nl SMALL ROMAN NUMERAL THREE
$2173 : Result := 4; // Nl SMALL ROMAN NUMERAL FOUR
$2174 : Result := 5; // Nl SMALL ROMAN NUMERAL FIVE
$2175 : Result := 6; // Nl SMALL ROMAN NUMERAL SIX
$2176 : Result := 7; // Nl SMALL ROMAN NUMERAL SEVEN
$2177 : Result := 8; // Nl SMALL ROMAN NUMERAL EIGHT
$2178 : Result := 9; // Nl SMALL ROMAN NUMERAL NINE
$2179 : Result := 10; // Nl SMALL ROMAN NUMERAL TEN
$217A : Result := 11; // Nl SMALL ROMAN NUMERAL ELEVEN
$217B : Result := 12; // Nl SMALL ROMAN NUMERAL TWELVE
$217C : Result := 50; // Nl SMALL ROMAN NUMERAL FIFTY
$217D : Result := 100; // Nl SMALL ROMAN NUMERAL ONE HUNDRED
$217E : Result := 500; // Nl SMALL ROMAN NUMERAL FIVE HUNDRED
$217F..$2180 : Result := 1000; // Nl [2] SMALL ROMAN NUMERAL ONE THOUSAND..ROMAN NUMERAL ONE THOUSAND C D
$2181 : Result := 5000; // Nl ROMAN NUMERAL FIVE THOUSAND
$2182 : Result := 10000; // Nl ROMAN NUMERAL TEN THOUSAND
else
Result := 0;
end;
end;
function LatinAlphaCharBase(const Ch: WideChar): UCS4Char;
begin
Case Ord(Ch) of
$0041..$005A : Result := $0041; // LATIN CAPITAL LETTER
$0061..$007A : Result := $0061; // LATIN SMALL LETTER
$FF21..$FF3A : Result := $FF21; // FULLWIDTH LATIN CAPITAL LETTER
$FF41..$FF5A : Result := $FF41; // FULLWIDTH LATIN SMALL LETTER
else
Result := 0;
end;
end;
function HexAlphaDigitBase(const Ch: WideChar): UCS4Char; overload;
begin
Result := LatinAlphaCharBase(Ch);
if Result = 0 then
exit;
if Ord(Ch) - Result > 5 then
Result := 0;
end;
function HexAlphaDigitBase(const Ch: UCS4Char): UCS4Char; overload;
begin
if Ch <= $FFFF then
Result := HexAlphaDigitBase(WideChar(Ch))
else
Case Ch of
$1D400..$1D405 : Result := $1D400; // MATHEMATICAL BOLD CAPITAL
$1D41A..$1D41F : Result := $1D41A; // MATHEMATICAL BOLD SMALL
$1D434..$1D439 : Result := $1D434; // MATHEMATICAL ITALIC CAPITAL
$1D44E..$1D453 : Result := $1D44E; // MATHEMATICAL ITALIC SMALL
$1D468..$1D46D : Result := $1D468; // MATHEMATICAL BOLD ITALIC CAPITAL
$1D482..$1D487 : Result := $1D482; // MATHEMATICAL BOLD ITALIC SMALL
$1D49C..$1D4A1 : Result := $1D49C; // MATHEMATICAL SCRIPT CAPITAL
$1D4B6..$1D4BB : Result := $1D4B6; // MATHEMATICAL SCRIPT SMALL
$1D4D0..$1D4D5 : Result := $1D4D0; // MATHEMATICAL BOLD SCRIPT CAPITAL
$1D4EA..$1D4EF : Result := $1D4EA; // MATHEMATICAL BOLD SCRIPT SMALL
$1D504..$1D509 : Result := $1D504; // MATHEMATICAL FRAKTUR CAPITAL
$1D51E..$1D523 : Result := $1D51E; // MATHEMATICAL FRAKTUR SMALL
$1D538..$1D53D : Result := $1D538; // MATHEMATICAL DOUBLE-STRUCK CAPITAL
$1D552..$1D557 : Result := $1D552; // MATHEMATICAL DOUBLE-STRUCK SMALL
$1D56C..$1D571 : Result := $1D56C; // MATHEMATICAL BOLD FRAKTUR CAPITAL
$1D586..$1D58B : Result := $1D586; // MATHEMATICAL BOLD FRAKTUR SMALL
$1D5A0..$1D5A5 : Result := $1D5A0; // MATHEMATICAL SANS-SERIF CAPITAL
$1D5BA..$1D5BF : Result := $1D5BA; // MATHEMATICAL SANS-SERIF SMALL
$1D5D4..$1D5D9 : Result := $1D5D4; // MATHEMATICAL SANS-SERIF BOLD CAPITAL
$1D5EE..$1D5F3 : Result := $1D5EE; // MATHEMATICAL SANS-SERIF BOLD SMALL
$1D608..$1D60D : Result := $1D608; // MATHEMATICAL SANS-SERIF ITALIC CAPITAL
$1D622..$1D627 : Result := $1D622; // MATHEMATICAL SANS-SERIF ITALIC SMALL
$1D63C..$1D641 : Result := $1D63C; // MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL
$1D656..$1D65B : Result := $1D656; // MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL
$1D670..$1D675 : Result := $1D670; // MATHEMATICAL MONOSPACE CAPITAL
$1D68A..$1D68F : Result := $1D68A; // MATHEMATICAL MONOSPACE SMALL
$E0041..$E0046 : Result := $E0041; // TAG LATIN CAPITAL LETTER
else
Result := 0;
end;
end;
function HexDigitValue(const Ch: UCS4Char): Integer;
var I : UCS4Char;
begin
Result := DecimalDigitValue(Ch);
if Result >= 0 then
exit;
I := HexAlphaDigitBase(Ch);
if I > 0 then
Result := Ch - I + 10;
end;
function HexDigitValue(const Ch: WideChar): Integer;
var I : UCS4Char;
begin
Result := DecimalDigitValue(Ch);
if Result >= 0 then
exit;
I := HexAlphaDigitBase(Ch);
if I > 0 then
Result := Ord(Ch) - I + 10;
end;
function IsHexDigit(const Ch: UCS4Char): Boolean;
begin
Result := HexDigitValue(Ch) >= 0;
end;
function IsHexDigit(const Ch: WideChar): Boolean;
begin
Result := HexDigitValue(Ch) >= 0;
end;
{ Unicode letter table }
type
TUnicodeLetterAttr = (laUpper, laLower);
TUnicodeLetterInfo = packed record
Unicode : WideChar;
Attr : TUnicodeLetterAttr;
CaseCode : WideChar;
end;
PUnicodeLetterInfo = ^TUnicodeLetterInfo;
const
// Derived from 'Lu' and 'Ll' class
UnicodeLetterEntries = 1492; // ~7K table
UnicodeLetterInfo : Array[0..UnicodeLetterEntries - 1] of TUnicodeLetterInfo = (
(Unicode:#$0041; Attr:laUpper; CaseCode:#$0061), // LATIN CAPITAL LETTER A
(Unicode:#$0042; Attr:laUpper; CaseCode:#$0062), // LATIN CAPITAL LETTER B
(Unicode:#$0043; Attr:laUpper; CaseCode:#$0063), // LATIN CAPITAL LETTER C
(Unicode:#$0044; Attr:laUpper; CaseCode:#$0064), // LATIN CAPITAL LETTER D
(Unicode:#$0045; Attr:laUpper; CaseCode:#$0065), // LATIN CAPITAL LETTER E
(Unicode:#$0046; Attr:laUpper; CaseCode:#$0066), // LATIN CAPITAL LETTER F
(Unicode:#$0047; Attr:laUpper; CaseCode:#$0067), // LATIN CAPITAL LETTER G
(Unicode:#$0048; Attr:laUpper; CaseCode:#$0068), // LATIN CAPITAL LETTER H
(Unicode:#$0049; Attr:laUpper; CaseCode:#$0069), // LATIN CAPITAL LETTER I
(Unicode:#$004A; Attr:laUpper; CaseCode:#$006A), // LATIN CAPITAL LETTER J
(Unicode:#$004B; Attr:laUpper; CaseCode:#$006B), // LATIN CAPITAL LETTER K
(Unicode:#$004C; Attr:laUpper; CaseCode:#$006C), // LATIN CAPITAL LETTER L
(Unicode:#$004D; Attr:laUpper; CaseCode:#$006D), // LATIN CAPITAL LETTER M
(Unicode:#$004E; Attr:laUpper; CaseCode:#$006E), // LATIN CAPITAL LETTER N
(Unicode:#$004F; Attr:laUpper; CaseCode:#$006F), // LATIN CAPITAL LETTER O
(Unicode:#$0050; Attr:laUpper; CaseCode:#$0070), // LATIN CAPITAL LETTER P
(Unicode:#$0051; Attr:laUpper; CaseCode:#$0071), // LATIN CAPITAL LETTER Q
(Unicode:#$0052; Attr:laUpper; CaseCode:#$0072), // LATIN CAPITAL LETTER R
(Unicode:#$0053; Attr:laUpper; CaseCode:#$0073), // LATIN CAPITAL LETTER S
(Unicode:#$0054; Attr:laUpper; CaseCode:#$0074), // LATIN CAPITAL LETTER T
(Unicode:#$0055; Attr:laUpper; CaseCode:#$0075), // LATIN CAPITAL LETTER U
(Unicode:#$0056; Attr:laUpper; CaseCode:#$0076), // LATIN CAPITAL LETTER V
(Unicode:#$0057; Attr:laUpper; CaseCode:#$0077), // LATIN CAPITAL LETTER W
(Unicode:#$0058; Attr:laUpper; CaseCode:#$0078), // LATIN CAPITAL LETTER X
(Unicode:#$0059; Attr:laUpper; CaseCode:#$0079), // LATIN CAPITAL LETTER Y
(Unicode:#$005A; Attr:laUpper; CaseCode:#$007A), // LATIN CAPITAL LETTER Z
(Unicode:#$0061; Attr:laLower; CaseCode:#$0041), // LATIN SMALL LETTER A
(Unicode:#$0062; Attr:laLower; CaseCode:#$0042), // LATIN SMALL LETTER B
(Unicode:#$0063; Attr:laLower; CaseCode:#$0043), // LATIN SMALL LETTER C
(Unicode:#$0064; Attr:laLower; CaseCode:#$0044), // LATIN SMALL LETTER D
(Unicode:#$0065; Attr:laLower; CaseCode:#$0045), // LATIN SMALL LETTER E
(Unicode:#$0066; Attr:laLower; CaseCode:#$0046), // LATIN SMALL LETTER F
(Unicode:#$0067; Attr:laLower; CaseCode:#$0047), // LATIN SMALL LETTER G
(Unicode:#$0068; Attr:laLower; CaseCode:#$0048), // LATIN SMALL LETTER H
(Unicode:#$0069; Attr:laLower; CaseCode:#$0049), // LATIN SMALL LETTER I
(Unicode:#$006A; Attr:laLower; CaseCode:#$004A), // LATIN SMALL LETTER J
(Unicode:#$006B; Attr:laLower; CaseCode:#$004B), // LATIN SMALL LETTER K
(Unicode:#$006C; Attr:laLower; CaseCode:#$004C), // LATIN SMALL LETTER L
(Unicode:#$006D; Attr:laLower; CaseCode:#$004D), // LATIN SMALL LETTER M
(Unicode:#$006E; Attr:laLower; CaseCode:#$004E), // LATIN SMALL LETTER N
(Unicode:#$006F; Attr:laLower; CaseCode:#$004F), // LATIN SMALL LETTER O
(Unicode:#$0070; Attr:laLower; CaseCode:#$0050), // LATIN SMALL LETTER P
(Unicode:#$0071; Attr:laLower; CaseCode:#$0051), // LATIN SMALL LETTER Q
(Unicode:#$0072; Attr:laLower; CaseCode:#$0052), // LATIN SMALL LETTER R
(Unicode:#$0073; Attr:laLower; CaseCode:#$0053), // LATIN SMALL LETTER S
(Unicode:#$0074; Attr:laLower; CaseCode:#$0054), // LATIN SMALL LETTER T
(Unicode:#$0075; Attr:laLower; CaseCode:#$0055), // LATIN SMALL LETTER U
(Unicode:#$0076; Attr:laLower; CaseCode:#$0056), // LATIN SMALL LETTER V
(Unicode:#$0077; Attr:laLower; CaseCode:#$0057), // LATIN SMALL LETTER W
(Unicode:#$0078; Attr:laLower; CaseCode:#$0058), // LATIN SMALL LETTER X
(Unicode:#$0079; Attr:laLower; CaseCode:#$0059), // LATIN SMALL LETTER Y
(Unicode:#$007A; Attr:laLower; CaseCode:#$005A), // LATIN SMALL LETTER Z
(Unicode:#$00AA; Attr:laLower; CaseCode:#$FFFF), // FEMININE ORDINAL INDICATOR
(Unicode:#$00B5; Attr:laLower; CaseCode:#$039C), // MICRO SIGN
(Unicode:#$00BA; Attr:laLower; CaseCode:#$FFFF), // MASCULINE ORDINAL INDICATOR
(Unicode:#$00C0; Attr:laUpper; CaseCode:#$00E0), // LATIN CAPITAL LETTER A WITH GRAVE
(Unicode:#$00C1; Attr:laUpper; CaseCode:#$00E1), // LATIN CAPITAL LETTER A WITH ACUTE
(Unicode:#$00C2; Attr:laUpper; CaseCode:#$00E2), // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
(Unicode:#$00C3; Attr:laUpper; CaseCode:#$00E3), // LATIN CAPITAL LETTER A WITH TILDE
(Unicode:#$00C4; Attr:laUpper; CaseCode:#$00E4), // LATIN CAPITAL LETTER A WITH DIAERESIS
(Unicode:#$00C5; Attr:laUpper; CaseCode:#$00E5), // LATIN CAPITAL LETTER A WITH RING ABOVE
(Unicode:#$00C6; Attr:laUpper; CaseCode:#$00E6), // LATIN CAPITAL LETTER AE
(Unicode:#$00C7; Attr:laUpper; CaseCode:#$00E7), // LATIN CAPITAL LETTER C WITH CEDILLA
(Unicode:#$00C8; Attr:laUpper; CaseCode:#$00E8), // LATIN CAPITAL LETTER E WITH GRAVE
(Unicode:#$00C9; Attr:laUpper; CaseCode:#$00E9), // LATIN CAPITAL LETTER E WITH ACUTE
(Unicode:#$00CA; Attr:laUpper; CaseCode:#$00EA), // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
(Unicode:#$00CB; Attr:laUpper; CaseCode:#$00EB), // LATIN CAPITAL LETTER E WITH DIAERESIS
(Unicode:#$00CC; Attr:laUpper; CaseCode:#$00EC), // LATIN CAPITAL LETTER I WITH GRAVE
(Unicode:#$00CD; Attr:laUpper; CaseCode:#$00ED), // LATIN CAPITAL LETTER I WITH ACUTE
(Unicode:#$00CE; Attr:laUpper; CaseCode:#$00EE), // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
(Unicode:#$00CF; Attr:laUpper; CaseCode:#$00EF), // LATIN CAPITAL LETTER I WITH DIAERESIS
(Unicode:#$00D0; Attr:laUpper; CaseCode:#$00F0), // LATIN CAPITAL LETTER ETH
(Unicode:#$00D1; Attr:laUpper; CaseCode:#$00F1), // LATIN CAPITAL LETTER N WITH TILDE
(Unicode:#$00D2; Attr:laUpper; CaseCode:#$00F2), // LATIN CAPITAL LETTER O WITH GRAVE
(Unicode:#$00D3; Attr:laUpper; CaseCode:#$00F3), // LATIN CAPITAL LETTER O WITH ACUTE
(Unicode:#$00D4; Attr:laUpper; CaseCode:#$00F4), // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
(Unicode:#$00D5; Attr:laUpper; CaseCode:#$00F5), // LATIN CAPITAL LETTER O WITH TILDE
(Unicode:#$00D6; Attr:laUpper; CaseCode:#$00F6), // LATIN CAPITAL LETTER O WITH DIAERESIS
(Unicode:#$00D8; Attr:laUpper; CaseCode:#$00F8), // LATIN CAPITAL LETTER O WITH STROKE
(Unicode:#$00D9; Attr:laUpper; CaseCode:#$00F9), // LATIN CAPITAL LETTER U WITH GRAVE
(Unicode:#$00DA; Attr:laUpper; CaseCode:#$00FA), // LATIN CAPITAL LETTER U WITH ACUTE
(Unicode:#$00DB; Attr:laUpper; CaseCode:#$00FB), // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
(Unicode:#$00DC; Attr:laUpper; CaseCode:#$00FC), // LATIN CAPITAL LETTER U WITH DIAERESIS
(Unicode:#$00DD; Attr:laUpper; CaseCode:#$00FD), // LATIN CAPITAL LETTER Y WITH ACUTE
(Unicode:#$00DE; Attr:laUpper; CaseCode:#$00FE), // LATIN CAPITAL LETTER THORN
(Unicode:#$00DF; Attr:laLower; CaseCode:#$FFFF), // LATIN SMALL LETTER SHARP S
(Unicode:#$00E0; Attr:laLower; CaseCode:#$00C0), // LATIN SMALL LETTER A WITH GRAVE
(Unicode:#$00E1; Attr:laLower; CaseCode:#$00C1), // LATIN SMALL LETTER A WITH ACUTE
(Unicode:#$00E2; Attr:laLower; CaseCode:#$00C2), // LATIN SMALL LETTER A WITH CIRCUMFLEX
(Unicode:#$00E3; Attr:laLower; CaseCode:#$00C3), // LATIN SMALL LETTER A WITH TILDE
(Unicode:#$00E4; Attr:laLower; CaseCode:#$00C4), // LATIN SMALL LETTER A WITH DIAERESIS
(Unicode:#$00E5; Attr:laLower; CaseCode:#$00C5), // LATIN SMALL LETTER A WITH RING ABOVE
(Unicode:#$00E6; Attr:laLower; CaseCode:#$00C6), // LATIN SMALL LETTER AE
(Unicode:#$00E7; Attr:laLower; CaseCode:#$00C7), // LATIN SMALL LETTER C WITH CEDILLA
(Unicode:#$00E8; Attr:laLower; CaseCode:#$00C8), // LATIN SMALL LETTER E WITH GRAVE
(Unicode:#$00E9; Attr:laLower; CaseCode:#$00C9), // LATIN SMALL LETTER E WITH ACUTE
(Unicode:#$00EA; Attr:laLower; CaseCode:#$00CA), // LATIN SMALL LETTER E WITH CIRCUMFLEX
(Unicode:#$00EB; Attr:laLower; CaseCode:#$00CB), // LATIN SMALL LETTER E WITH DIAERESIS
(Unicode:#$00EC; Attr:laLower; CaseCode:#$00CC), // LATIN SMALL LETTER I WITH GRAVE
(Unicode:#$00ED; Attr:laLower; CaseCode:#$00CD), // LATIN SMALL LETTER I WITH ACUTE
(Unicode:#$00EE; Attr:laLower; CaseCode:#$00CE), // LATIN SMALL LETTER I WITH CIRCUMFLEX
(Unicode:#$00EF; Attr:laLower; CaseCode:#$00CF), // LATIN SMALL LETTER I WITH DIAERESIS
(Unicode:#$00F0; Attr:laLower; CaseCode:#$00D0), // LATIN SMALL LETTER ETH
(Unicode:#$00F1; Attr:laLower; CaseCode:#$00D1), // LATIN SMALL LETTER N WITH TILDE
(Unicode:#$00F2; Attr:laLower; CaseCode:#$00D2), // LATIN SMALL LETTER O WITH GRAVE
(Unicode:#$00F3; Attr:laLower; CaseCode:#$00D3), // LATIN SMALL LETTER O WITH ACUTE
(Unicode:#$00F4; Attr:laLower; CaseCode:#$00D4), // LATIN SMALL LETTER O WITH CIRCUMFLEX
(Unicode:#$00F5; Attr:laLower; CaseCode:#$00D5), // LATIN SMALL LETTER O WITH TILDE
(Unicode:#$00F6; Attr:laLower; CaseCode:#$00D6), // LATIN SMALL LETTER O WITH DIAERESIS
(Unicode:#$00F8; Attr:laLower; CaseCode:#$00D8), // LATIN SMALL LETTER O WITH STROKE
(Unicode:#$00F9; Attr:laLower; CaseCode:#$00D9), // LATIN SMALL LETTER U WITH GRAVE
(Unicode:#$00FA; Attr:laLower; CaseCode:#$00DA), // LATIN SMALL LETTER U WITH ACUTE
(Unicode:#$00FB; Attr:laLower; CaseCode:#$00DB), // LATIN SMALL LETTER U WITH CIRCUMFLEX
(Unicode:#$00FC; Attr:laLower; CaseCode:#$00DC), // LATIN SMALL LETTER U WITH DIAERESIS
(Unicode:#$00FD; Attr:laLower; CaseCode:#$00DD), // LATIN SMALL LETTER Y WITH ACUTE
(Unicode:#$00FE; Attr:laLower; CaseCode:#$00DE), // LATIN SMALL LETTER THORN
(Unicode:#$00FF; Attr:laLower; CaseCode:#$0178), // LATIN SMALL LETTER Y WITH DIAERESIS
(Unicode:#$0100; Attr:laUpper; CaseCode:#$0101), // LATIN CAPITAL LETTER A WITH MACRON
(Unicode:#$0101; Attr:laLower; CaseCode:#$0100), // LATIN SMALL LETTER A WITH MACRON
(Unicode:#$0102; Attr:laUpper; CaseCode:#$0103), // LATIN CAPITAL LETTER A WITH BREVE
(Unicode:#$0103; Attr:laLower; CaseCode:#$0102), // LATIN SMALL LETTER A WITH BREVE
(Unicode:#$0104; Attr:laUpper; CaseCode:#$0105), // LATIN CAPITAL LETTER A WITH OGONEK
(Unicode:#$0105; Attr:laLower; CaseCode:#$0104), // LATIN SMALL LETTER A WITH OGONEK
(Unicode:#$0106; Attr:laUpper; CaseCode:#$0107), // LATIN CAPITAL LETTER C WITH ACUTE
(Unicode:#$0107; Attr:laLower; CaseCode:#$0106), // LATIN SMALL LETTER C WITH ACUTE
(Unicode:#$0108; Attr:laUpper; CaseCode:#$0109), // LATIN CAPITAL LETTER C WITH CIRCUMFLEX
(Unicode:#$0109; Attr:laLower; CaseCode:#$0108), // LATIN SMALL LETTER C WITH CIRCUMFLEX
(Unicode:#$010A; Attr:laUpper; CaseCode:#$010B), // LATIN CAPITAL LETTER C WITH DOT ABOVE
(Unicode:#$010B; Attr:laLower; CaseCode:#$010A), // LATIN SMALL LETTER C WITH DOT ABOVE
(Unicode:#$010C; Attr:laUpper; CaseCode:#$010D), // LATIN CAPITAL LETTER C WITH CARON
(Unicode:#$010D; Attr:laLower; CaseCode:#$010C), // LATIN SMALL LETTER C WITH CARON
(Unicode:#$010E; Attr:laUpper; CaseCode:#$010F), // LATIN CAPITAL LETTER D WITH CARON
(Unicode:#$010F; Attr:laLower; CaseCode:#$010E), // LATIN SMALL LETTER D WITH CARON
(Unicode:#$0110; Attr:laUpper; CaseCode:#$0111), // LATIN CAPITAL LETTER D WITH STROKE
(Unicode:#$0111; Attr:laLower; CaseCode:#$0110), // LATIN SMALL LETTER D WITH STROKE
(Unicode:#$0112; Attr:laUpper; CaseCode:#$0113), // LATIN CAPITAL LETTER E WITH MACRON
(Unicode:#$0113; Attr:laLower; CaseCode:#$0112), // LATIN SMALL LETTER E WITH MACRON
(Unicode:#$0114; Attr:laUpper; CaseCode:#$0115), // LATIN CAPITAL LETTER E WITH BREVE
(Unicode:#$0115; Attr:laLower; CaseCode:#$0114), // LATIN SMALL LETTER E WITH BREVE
(Unicode:#$0116; Attr:laUpper; CaseCode:#$0117), // LATIN CAPITAL LETTER E WITH DOT ABOVE
(Unicode:#$0117; Attr:laLower; CaseCode:#$0116), // LATIN SMALL LETTER E WITH DOT ABOVE
(Unicode:#$0118; Attr:laUpper; CaseCode:#$0119), // LATIN CAPITAL LETTER E WITH OGONEK
(Unicode:#$0119; Attr:laLower; CaseCode:#$0118), // LATIN SMALL LETTER E WITH OGONEK
(Unicode:#$011A; Attr:laUpper; CaseCode:#$011B), // LATIN CAPITAL LETTER E WITH CARON
(Unicode:#$011B; Attr:laLower; CaseCode:#$011A), // LATIN SMALL LETTER E WITH CARON
(Unicode:#$011C; Attr:laUpper; CaseCode:#$011D), // LATIN CAPITAL LETTER G WITH CIRCUMFLEX
(Unicode:#$011D; Attr:laLower; CaseCode:#$011C), // LATIN SMALL LETTER G WITH CIRCUMFLEX
(Unicode:#$011E; Attr:laUpper; CaseCode:#$011F), // LATIN CAPITAL LETTER G WITH BREVE
(Unicode:#$011F; Attr:laLower; CaseCode:#$011E), // LATIN SMALL LETTER G WITH BREVE
(Unicode:#$0120; Attr:laUpper; CaseCode:#$0121), // LATIN CAPITAL LETTER G WITH DOT ABOVE
(Unicode:#$0121; Attr:laLower; CaseCode:#$0120), // LATIN SMALL LETTER G WITH DOT ABOVE
(Unicode:#$0122; Attr:laUpper; CaseCode:#$0123), // LATIN CAPITAL LETTER G WITH CEDILLA
(Unicode:#$0123; Attr:laLower; CaseCode:#$0122), // LATIN SMALL LETTER G WITH CEDILLA
(Unicode:#$0124; Attr:laUpper; CaseCode:#$0125), // LATIN CAPITAL LETTER H WITH CIRCUMFLEX
(Unicode:#$0125; Attr:laLower; CaseCode:#$0124), // LATIN SMALL LETTER H WITH CIRCUMFLEX
(Unicode:#$0126; Attr:laUpper; CaseCode:#$0127), // LATIN CAPITAL LETTER H WITH STROKE
(Unicode:#$0127; Attr:laLower; CaseCode:#$0126), // LATIN SMALL LETTER H WITH STROKE
(Unicode:#$0128; Attr:laUpper; CaseCode:#$0129), // LATIN CAPITAL LETTER I WITH TILDE
(Unicode:#$0129; Attr:laLower; CaseCode:#$0128), // LATIN SMALL LETTER I WITH TILDE
(Unicode:#$012A; Attr:laUpper; CaseCode:#$012B), // LATIN CAPITAL LETTER I WITH MACRON
(Unicode:#$012B; Attr:laLower; CaseCode:#$012A), // LATIN SMALL LETTER I WITH MACRON
(Unicode:#$012C; Attr:laUpper; CaseCode:#$012D), // LATIN CAPITAL LETTER I WITH BREVE
(Unicode:#$012D; Attr:laLower; CaseCode:#$012C), // LATIN SMALL LETTER I WITH BREVE
(Unicode:#$012E; Attr:laUpper; CaseCode:#$012F), // LATIN CAPITAL LETTER I WITH OGONEK
(Unicode:#$012F; Attr:laLower; CaseCode:#$012E), // LATIN SMALL LETTER I WITH OGONEK
(Unicode:#$0130; Attr:laUpper; CaseCode:#$0069), // LATIN CAPITAL LETTER I WITH DOT ABOVE
(Unicode:#$0131; Attr:laLower; CaseCode:#$0049), // LATIN SMALL LETTER DOTLESS I
(Unicode:#$0132; Attr:laUpper; CaseCode:#$0133), // LATIN CAPITAL LIGATURE IJ
(Unicode:#$0133; Attr:laLower; CaseCode:#$0132), // LATIN SMALL LIGATURE IJ
(Unicode:#$0134; Attr:laUpper; CaseCode:#$0135), // LATIN CAPITAL LETTER J WITH CIRCUMFLEX
(Unicode:#$0135; Attr:laLower; CaseCode:#$0134), // LATIN SMALL LETTER J WITH CIRCUMFLEX
(Unicode:#$0136; Attr:laUpper; CaseCode:#$0137), // LATIN CAPITAL LETTER K WITH CEDILLA
(Unicode:#$0137; Attr:laLower; CaseCode:#$0136), // LATIN SMALL LETTER K WITH CEDILLA
(Unicode:#$0138; Attr:laLower; CaseCode:#$FFFF), // LATIN SMALL LETTER KRA
(Unicode:#$0139; Attr:laUpper; CaseCode:#$013A), // LATIN CAPITAL LETTER L WITH ACUTE
(Unicode:#$013A; Attr:laLower; CaseCode:#$0139), // LATIN SMALL LETTER L WITH ACUTE
(Unicode:#$013B; Attr:laUpper; CaseCode:#$013C), // LATIN CAPITAL LETTER L WITH CEDILLA
(Unicode:#$013C; Attr:laLower; CaseCode:#$013B), // LATIN SMALL LETTER L WITH CEDILLA
(Unicode:#$013D; Attr:laUpper; CaseCode:#$013E), // LATIN CAPITAL LETTER L WITH CARON
(Unicode:#$013E; Attr:laLower; CaseCode:#$013D), // LATIN SMALL LETTER L WITH CARON
(Unicode:#$013F; Attr:laUpper; CaseCode:#$0140), // LATIN CAPITAL LETTER L WITH MIDDLE DOT
(Unicode:#$0140; Attr:laLower; CaseCode:#$013F), // LATIN SMALL LETTER L WITH MIDDLE DOT
(Unicode:#$0141; Attr:laUpper; CaseCode:#$0142), // LATIN CAPITAL LETTER L WITH STROKE
(Unicode:#$0142; Attr:laLower; CaseCode:#$0141), // LATIN SMALL LETTER L WITH STROKE
(Unicode:#$0143; Attr:laUpper; CaseCode:#$0144), // LATIN CAPITAL LETTER N WITH ACUTE
(Unicode:#$0144; Attr:laLower; CaseCode:#$0143), // LATIN SMALL LETTER N WITH ACUTE
(Unicode:#$0145; Attr:laUpper; CaseCode:#$0146), // LATIN CAPITAL LETTER N WITH CEDILLA
(Unicode:#$0146; Attr:laLower; CaseCode:#$0145), // LATIN SMALL LETTER N WITH CEDILLA
(Unicode:#$0147; Attr:laUpper; CaseCode:#$0148), // LATIN CAPITAL LETTER N WITH CARON
(Unicode:#$0148; Attr:laLower; CaseCode:#$0147), // LATIN SMALL LETTER N WITH CARON
(Unicode:#$0149; Attr:laLower; CaseCode:#$FFFF), // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
(Unicode:#$014A; Attr:laUpper; CaseCode:#$014B), // LATIN CAPITAL LETTER ENG
(Unicode:#$014B; Attr:laLower; CaseCode:#$014A), // LATIN SMALL LETTER ENG
(Unicode:#$014C; Attr:laUpper; CaseCode:#$014D), // LATIN CAPITAL LETTER O WITH MACRON
(Unicode:#$014D; Attr:laLower; CaseCode:#$014C), // LATIN SMALL LETTER O WITH MACRON
(Unicode:#$014E; Attr:laUpper; CaseCode:#$014F), // LATIN CAPITAL LETTER O WITH BREVE
(Unicode:#$014F; Attr:laLower; CaseCode:#$014E), // LATIN SMALL LETTER O WITH BREVE
(Unicode:#$0150; Attr:laUpper; CaseCode:#$0151), // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
(Unicode:#$0151; Attr:laLower; CaseCode:#$0150), // LATIN SMALL LETTER O WITH DOUBLE ACUTE
(Unicode:#$0152; Attr:laUpper; CaseCode:#$0153), // LATIN CAPITAL LIGATURE OE
(Unicode:#$0153; Attr:laLower; CaseCode:#$0152), // LATIN SMALL LIGATURE OE
(Unicode:#$0154; Attr:laUpper; CaseCode:#$0155), // LATIN CAPITAL LETTER R WITH ACUTE
(Unicode:#$0155; Attr:laLower; CaseCode:#$0154), // LATIN SMALL LETTER R WITH ACUTE
(Unicode:#$0156; Attr:laUpper; CaseCode:#$0157), // LATIN CAPITAL LETTER R WITH CEDILLA
(Unicode:#$0157; Attr:laLower; CaseCode:#$0156), // LATIN SMALL LETTER R WITH CEDILLA
(Unicode:#$0158; Attr:laUpper; CaseCode:#$0159), // LATIN CAPITAL LETTER R WITH CARON
(Unicode:#$0159; Attr:laLower; CaseCode:#$0158), // LATIN SMALL LETTER R WITH CARON
(Unicode:#$015A; Attr:laUpper; CaseCode:#$015B), // LATIN CAPITAL LETTER S WITH ACUTE
(Unicode:#$015B; Attr:laLower; CaseCode:#$015A), // LATIN SMALL LETTER S WITH ACUTE
(Unicode:#$015C; Attr:laUpper; CaseCode:#$015D), // LATIN CAPITAL LETTER S WITH CIRCUMFLEX
(Unicode:#$015D; Attr:laLower; CaseCode:#$015C), // LATIN SMALL LETTER S WITH CIRCUMFLEX
(Unicode:#$015E; Attr:laUpper; CaseCode:#$015F), // LATIN CAPITAL LETTER S WITH CEDILLA
(Unicode:#$015F; Attr:laLower; CaseCode:#$015E), // LATIN SMALL LETTER S WITH CEDILLA
(Unicode:#$0160; Attr:laUpper; CaseCode:#$0161), // LATIN CAPITAL LETTER S WITH CARON
(Unicode:#$0161; Attr:laLower; CaseCode:#$0160), // LATIN SMALL LETTER S WITH CARON
(Unicode:#$0162; Attr:laUpper; CaseCode:#$0163), // LATIN CAPITAL LETTER T WITH CEDILLA
(Unicode:#$0163; Attr:laLower; CaseCode:#$0162), // LATIN SMALL LETTER T WITH CEDILLA
(Unicode:#$0164; Attr:laUpper; CaseCode:#$0165), // LATIN CAPITAL LETTER T WITH CARON
(Unicode:#$0165; Attr:laLower; CaseCode:#$0164), // LATIN SMALL LETTER T WITH CARON
(Unicode:#$0166; Attr:laUpper; CaseCode:#$0167), // LATIN CAPITAL LETTER T WITH STROKE
(Unicode:#$0167; Attr:laLower; CaseCode:#$0166), // LATIN SMALL LETTER T WITH STROKE
(Unicode:#$0168; Attr:laUpper; CaseCode:#$0169), // LATIN CAPITAL LETTER U WITH TILDE
(Unicode:#$0169; Attr:laLower; CaseCode:#$0168), // LATIN SMALL LETTER U WITH TILDE
(Unicode:#$016A; Attr:laUpper; CaseCode:#$016B), // LATIN CAPITAL LETTER U WITH MACRON
(Unicode:#$016B; Attr:laLower; CaseCode:#$016A), // LATIN SMALL LETTER U WITH MACRON
(Unicode:#$016C; Attr:laUpper; CaseCode:#$016D), // LATIN CAPITAL LETTER U WITH BREVE
(Unicode:#$016D; Attr:laLower; CaseCode:#$016C), // LATIN SMALL LETTER U WITH BREVE
(Unicode:#$016E; Attr:laUpper; CaseCode:#$016F), // LATIN CAPITAL LETTER U WITH RING ABOVE
(Unicode:#$016F; Attr:laLower; CaseCode:#$016E), // LATIN SMALL LETTER U WITH RING ABOVE
(Unicode:#$0170; Attr:laUpper; CaseCode:#$0171), // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
(Unicode:#$0171; Attr:laLower; CaseCode:#$0170), // LATIN SMALL LETTER U WITH DOUBLE ACUTE
(Unicode:#$0172; Attr:laUpper; CaseCode:#$0173), // LATIN CAPITAL LETTER U WITH OGONEK
(Unicode:#$0173; Attr:laLower; CaseCode:#$0172), // LATIN SMALL LETTER U WITH OGONEK
(Unicode:#$0174; Attr:laUpper; CaseCode:#$0175), // LATIN CAPITAL LETTER W WITH CIRCUMFLEX
(Unicode:#$0175; Attr:laLower; CaseCode:#$0174), // LATIN SMALL LETTER W WITH CIRCUMFLEX
(Unicode:#$0176; Attr:laUpper; CaseCode:#$0177), // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
(Unicode:#$0177; Attr:laLower; CaseCode:#$0176), // LATIN SMALL LETTER Y WITH CIRCUMFLEX
(Unicode:#$0178; Attr:laUpper; CaseCode:#$00FF), // LATIN CAPITAL LETTER Y WITH DIAERESIS
(Unicode:#$0179; Attr:laUpper; CaseCode:#$017A), // LATIN CAPITAL LETTER Z WITH ACUTE
(Unicode:#$017A; Attr:laLower; CaseCode:#$0179), // LATIN SMALL LETTER Z WITH ACUTE
(Unicode:#$017B; Attr:laUpper; CaseCode:#$017C), // LATIN CAPITAL LETTER Z WITH DOT ABOVE
(Unicode:#$017C; Attr:laLower; CaseCode:#$017B), // LATIN SMALL LETTER Z WITH DOT ABOVE
(Unicode:#$017D; Attr:laUpper; CaseCode:#$017E), // LATIN CAPITAL LETTER Z WITH CARON
(Unicode:#$017E; Attr:laLower; CaseCode:#$017D), // LATIN SMALL LETTER Z WITH CARON
(Unicode:#$017F; Attr:laLower; CaseCode:#$0053), // LATIN SMALL LETTER LONG S
(Unicode:#$0180; Attr:laLower; CaseCode:#$FFFF), // LATIN SMALL LETTER B WITH STROKE
(Unicode:#$0181; Attr:laUpper; CaseCode:#$0253), // LATIN CAPITAL LETTER B WITH HOOK
(Unicode:#$0182; Attr:laUpper; CaseCode:#$0183), // LATIN CAPITAL LETTER B WITH TOPBAR
(Unicode:#$0183; Attr:laLower; CaseCode:#$0182), // LATIN SMALL LETTER B WITH TOPBAR
(Unicode:#$0184; Attr:laUpper; CaseCode:#$0185), // LATIN CAPITAL LETTER TONE SIX
(Unicode:#$0185; Attr:laLower; CaseCode:#$0184), // LATIN SMALL LETTER TONE SIX
(Unicode:#$0186; Attr:laUpper; CaseCode:#$0254), // LATIN CAPITAL LETTER OPEN O
(Unicode:#$0187; Attr:laUpper; CaseCode:#$0188), // LATIN CAPITAL LETTER C WITH HOOK
(Unicode:#$0188; Attr:laLower; CaseCode:#$0187), // LATIN SMALL LETTER C WITH HOOK
(Unicode:#$0189; Attr:laUpper; CaseCode:#$0256), // LATIN CAPITAL LETTER AFRICAN D
(Unicode:#$018A; Attr:laUpper; CaseCode:#$0257), // LATIN CAPITAL LETTER D WITH HOOK
(Unicode:#$018B; Attr:laUpper; CaseCode:#$018C), // LATIN CAPITAL LETTER D WITH TOPBAR
(Unicode:#$018C; Attr:laLower; CaseCode:#$018B), // LATIN SMALL LETTER D WITH TOPBAR
(Unicode:#$018D; Attr:laLower; CaseCode:#$FFFF), // LATIN SMALL LETTER TURNED DELTA
(Unicode:#$018E; Attr:laUpper; CaseCode:#$01DD), // LATIN CAPITAL LETTER REVERSED E
(Unicode:#$018F; Attr:laUpper; CaseCode:#$0259), // LATIN CAPITAL LETTER SCHWA
(Unicode:#$0190; Attr:laUpper; CaseCode:#$025B), // LATIN CAPITAL LETTER OPEN E
(Unicode:#$0191; Attr:laUpper; CaseCode:#$0192), // LATIN CAPITAL LETTER F WITH HOOK
(Unicode:#$0192; Attr:laLower; CaseCode:#$0191), // LATIN SMALL LETTER F WITH HOOK
(Unicode:#$0193; Attr:laUpper; CaseCode:#$0260), // LATIN CAPITAL LETTER G WITH HOOK
(Unicode:#$0194; Attr:laUpper; CaseCode:#$0263), // LATIN CAPITAL LETTER GAMMA
(Unicode:#$0195; Attr:laLower; CaseCode:#$01F6), // LATIN SMALL LETTER HV
(Unicode:#$0196; Attr:laUpper; CaseCode:#$0269), // LATIN CAPITAL LETTER IOTA
(Unicode:#$0197; Attr:laUpper; CaseCode:#$0268), // LATIN CAPITAL LETTER I WITH STROKE
(Unicode:#$0198; Attr:laUpper; CaseCode:#$0199), // LATIN CAPITAL LETTER K WITH HOOK
(Unicode:#$0199; Attr:laLower; CaseCode:#$0198), // LATIN SMALL LETTER K WITH HOOK
(Unicode:#$019A; Attr:laLower; CaseCode:#$FFFF), // LATIN SMALL LETTER L WITH BAR
(Unicode:#