From 6b28381aa189f8958edab1951e8ac20e496a1f50 Mon Sep 17 00:00:00 2001 From: Colin O'Dell Date: Fri, 26 Dec 2014 19:28:52 -0500 Subject: [PATCH] Case folding support for environments without mbstring Per the spec, reference link matching is done by normalizing the label with a Unicode case fold, which mb_strtoupper provides. But since not all systems have the relevant extension installed, we need to manually implement this logic by converting characters according to this table: http://www.unicode.org/Public/UNIDATA/CaseFolding.txt --- src/Reference/Reference.php | 8 + src/Util/UnicodeCaseFolder.php | 1181 ++++++++++++++++++++++++++ tests/Util/UnicodeCaseFolderTest.php | 26 + 3 files changed, 1215 insertions(+) create mode 100644 src/Util/UnicodeCaseFolder.php create mode 100644 tests/Util/UnicodeCaseFolderTest.php diff --git a/src/Reference/Reference.php b/src/Reference/Reference.php index 71db549322..6182890961 100644 --- a/src/Reference/Reference.php +++ b/src/Reference/Reference.php @@ -14,6 +14,8 @@ namespace League\CommonMark\Reference; +use League\CommonMark\Util\UnicodeCaseFolder; + /** * Link reference */ @@ -87,6 +89,12 @@ public static function normalizeReference($string) // leading/trailing whitespace $string = preg_replace('/\s+/', '', trim($string)); + // Convert to upper-case using Unicode case folding + // Use an alternate method if mb_strtoupper isn't available + if (!function_exists('mb_strtoupper')) { + return UnicodeCaseFolder::toUpperCase($string); + } + return mb_strtoupper($string, 'UTF-8'); } } diff --git a/src/Util/UnicodeCaseFolder.php b/src/Util/UnicodeCaseFolder.php new file mode 100644 index 0000000000..7325c76ba3 --- /dev/null +++ b/src/Util/UnicodeCaseFolder.php @@ -0,0 +1,1181 @@ + + * + * Original code based on the CommonMark JS reference parser (http://bitly.com/commonmarkjs) + * - (c) John MacFarlane + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace League\CommonMark\Util; + +/** + * Provides an alternate case-folding mechanism for users without the mbstring library + */ +class UnicodeCaseFolder +{ + /** + * @var array + * + * Manually generated from http://www.unicode.org/Public/UNIDATA/CaseFolding.txt + */ + private static $map = array( + 0x0041 => 0x0061, // LATIN CAPITAL LETTER A + 0x0042 => 0x0062, // LATIN CAPITAL LETTER B + 0x0043 => 0x0063, // LATIN CAPITAL LETTER C + 0x0044 => 0x0064, // LATIN CAPITAL LETTER D + 0x0045 => 0x0065, // LATIN CAPITAL LETTER E + 0x0046 => 0x0066, // LATIN CAPITAL LETTER F + 0x0047 => 0x0067, // LATIN CAPITAL LETTER G + 0x0048 => 0x0068, // LATIN CAPITAL LETTER H + 0x0049 => 0x0069, // LATIN CAPITAL LETTER I + 0x004A => 0x006A, // LATIN CAPITAL LETTER J + 0x004B => 0x006B, // LATIN CAPITAL LETTER K + 0x004C => 0x006C, // LATIN CAPITAL LETTER L + 0x004D => 0x006D, // LATIN CAPITAL LETTER M + 0x004E => 0x006E, // LATIN CAPITAL LETTER N + 0x004F => 0x006F, // LATIN CAPITAL LETTER O + 0x0050 => 0x0070, // LATIN CAPITAL LETTER P + 0x0051 => 0x0071, // LATIN CAPITAL LETTER Q + 0x0052 => 0x0072, // LATIN CAPITAL LETTER R + 0x0053 => 0x0073, // LATIN CAPITAL LETTER S + 0x0054 => 0x0074, // LATIN CAPITAL LETTER T + 0x0055 => 0x0075, // LATIN CAPITAL LETTER U + 0x0056 => 0x0076, // LATIN CAPITAL LETTER V + 0x0057 => 0x0077, // LATIN CAPITAL LETTER W + 0x0058 => 0x0078, // LATIN CAPITAL LETTER X + 0x0059 => 0x0079, // LATIN CAPITAL LETTER Y + 0x005A => 0x007A, // LATIN CAPITAL LETTER Z + 0x00B5 => 0x03BC, // MICRO SIGN + 0x00C0 => 0x00E0, // LATIN CAPITAL LETTER A WITH GRAVE + 0x00C1 => 0x00E1, // LATIN CAPITAL LETTER A WITH ACUTE + 0x00C2 => 0x00E2, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX + 0x00C3 => 0x00E3, // LATIN CAPITAL LETTER A WITH TILDE + 0x00C4 => 0x00E4, // LATIN CAPITAL LETTER A WITH DIAERESIS + 0x00C5 => 0x00E5, // LATIN CAPITAL LETTER A WITH RING ABOVE + 0x00C6 => 0x00E6, // LATIN CAPITAL LETTER AE + 0x00C7 => 0x00E7, // LATIN CAPITAL LETTER C WITH CEDILLA + 0x00C8 => 0x00E8, // LATIN CAPITAL LETTER E WITH GRAVE + 0x00C9 => 0x00E9, // LATIN CAPITAL LETTER E WITH ACUTE + 0x00CA => 0x00EA, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX + 0x00CB => 0x00EB, // LATIN CAPITAL LETTER E WITH DIAERESIS + 0x00CC => 0x00EC, // LATIN CAPITAL LETTER I WITH GRAVE + 0x00CD => 0x00ED, // LATIN CAPITAL LETTER I WITH ACUTE + 0x00CE => 0x00EE, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX + 0x00CF => 0x00EF, // LATIN CAPITAL LETTER I WITH DIAERESIS + 0x00D0 => 0x00F0, // LATIN CAPITAL LETTER ETH + 0x00D1 => 0x00F1, // LATIN CAPITAL LETTER N WITH TILDE + 0x00D2 => 0x00F2, // LATIN CAPITAL LETTER O WITH GRAVE + 0x00D3 => 0x00F3, // LATIN CAPITAL LETTER O WITH ACUTE + 0x00D4 => 0x00F4, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX + 0x00D5 => 0x00F5, // LATIN CAPITAL LETTER O WITH TILDE + 0x00D6 => 0x00F6, // LATIN CAPITAL LETTER O WITH DIAERESIS + 0x00D8 => 0x00F8, // LATIN CAPITAL LETTER O WITH STROKE + 0x00D9 => 0x00F9, // LATIN CAPITAL LETTER U WITH GRAVE + 0x00DA => 0x00FA, // LATIN CAPITAL LETTER U WITH ACUTE + 0x00DB => 0x00FB, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX + 0x00DC => 0x00FC, // LATIN CAPITAL LETTER U WITH DIAERESIS + 0x00DD => 0x00FD, // LATIN CAPITAL LETTER Y WITH ACUTE + 0x00DE => 0x00FE, // LATIN CAPITAL LETTER THORN + 0x0100 => 0x0101, // LATIN CAPITAL LETTER A WITH MACRON + 0x0102 => 0x0103, // LATIN CAPITAL LETTER A WITH BREVE + 0x0104 => 0x0105, // LATIN CAPITAL LETTER A WITH OGONEK + 0x0106 => 0x0107, // LATIN CAPITAL LETTER C WITH ACUTE + 0x0108 => 0x0109, // LATIN CAPITAL LETTER C WITH CIRCUMFLEX + 0x010A => 0x010B, // LATIN CAPITAL LETTER C WITH DOT ABOVE + 0x010C => 0x010D, // LATIN CAPITAL LETTER C WITH CARON + 0x010E => 0x010F, // LATIN CAPITAL LETTER D WITH CARON + 0x0110 => 0x0111, // LATIN CAPITAL LETTER D WITH STROKE + 0x0112 => 0x0113, // LATIN CAPITAL LETTER E WITH MACRON + 0x0114 => 0x0115, // LATIN CAPITAL LETTER E WITH BREVE + 0x0116 => 0x0117, // LATIN CAPITAL LETTER E WITH DOT ABOVE + 0x0118 => 0x0119, // LATIN CAPITAL LETTER E WITH OGONEK + 0x011A => 0x011B, // LATIN CAPITAL LETTER E WITH CARON + 0x011C => 0x011D, // LATIN CAPITAL LETTER G WITH CIRCUMFLEX + 0x011E => 0x011F, // LATIN CAPITAL LETTER G WITH BREVE + 0x0120 => 0x0121, // LATIN CAPITAL LETTER G WITH DOT ABOVE + 0x0122 => 0x0123, // LATIN CAPITAL LETTER G WITH CEDILLA + 0x0124 => 0x0125, // LATIN CAPITAL LETTER H WITH CIRCUMFLEX + 0x0126 => 0x0127, // LATIN CAPITAL LETTER H WITH STROKE + 0x0128 => 0x0129, // LATIN CAPITAL LETTER I WITH TILDE + 0x012A => 0x012B, // LATIN CAPITAL LETTER I WITH MACRON + 0x012C => 0x012D, // LATIN CAPITAL LETTER I WITH BREVE + 0x012E => 0x012F, // LATIN CAPITAL LETTER I WITH OGONEK + 0x0132 => 0x0133, // LATIN CAPITAL LIGATURE IJ + 0x0134 => 0x0135, // LATIN CAPITAL LETTER J WITH CIRCUMFLEX + 0x0136 => 0x0137, // LATIN CAPITAL LETTER K WITH CEDILLA + 0x0139 => 0x013A, // LATIN CAPITAL LETTER L WITH ACUTE + 0x013B => 0x013C, // LATIN CAPITAL LETTER L WITH CEDILLA + 0x013D => 0x013E, // LATIN CAPITAL LETTER L WITH CARON + 0x013F => 0x0140, // LATIN CAPITAL LETTER L WITH MIDDLE DOT + 0x0141 => 0x0142, // LATIN CAPITAL LETTER L WITH STROKE + 0x0143 => 0x0144, // LATIN CAPITAL LETTER N WITH ACUTE + 0x0145 => 0x0146, // LATIN CAPITAL LETTER N WITH CEDILLA + 0x0147 => 0x0148, // LATIN CAPITAL LETTER N WITH CARON + 0x014A => 0x014B, // LATIN CAPITAL LETTER ENG + 0x014C => 0x014D, // LATIN CAPITAL LETTER O WITH MACRON + 0x014E => 0x014F, // LATIN CAPITAL LETTER O WITH BREVE + 0x0150 => 0x0151, // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE + 0x0152 => 0x0153, // LATIN CAPITAL LIGATURE OE + 0x0154 => 0x0155, // LATIN CAPITAL LETTER R WITH ACUTE + 0x0156 => 0x0157, // LATIN CAPITAL LETTER R WITH CEDILLA + 0x0158 => 0x0159, // LATIN CAPITAL LETTER R WITH CARON + 0x015A => 0x015B, // LATIN CAPITAL LETTER S WITH ACUTE + 0x015C => 0x015D, // LATIN CAPITAL LETTER S WITH CIRCUMFLEX + 0x015E => 0x015F, // LATIN CAPITAL LETTER S WITH CEDILLA + 0x0160 => 0x0161, // LATIN CAPITAL LETTER S WITH CARON + 0x0162 => 0x0163, // LATIN CAPITAL LETTER T WITH CEDILLA + 0x0164 => 0x0165, // LATIN CAPITAL LETTER T WITH CARON + 0x0166 => 0x0167, // LATIN CAPITAL LETTER T WITH STROKE + 0x0168 => 0x0169, // LATIN CAPITAL LETTER U WITH TILDE + 0x016A => 0x016B, // LATIN CAPITAL LETTER U WITH MACRON + 0x016C => 0x016D, // LATIN CAPITAL LETTER U WITH BREVE + 0x016E => 0x016F, // LATIN CAPITAL LETTER U WITH RING ABOVE + 0x0170 => 0x0171, // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE + 0x0172 => 0x0173, // LATIN CAPITAL LETTER U WITH OGONEK + 0x0174 => 0x0175, // LATIN CAPITAL LETTER W WITH CIRCUMFLEX + 0x0176 => 0x0177, // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX + 0x0178 => 0x00FF, // LATIN CAPITAL LETTER Y WITH DIAERESIS + 0x0179 => 0x017A, // LATIN CAPITAL LETTER Z WITH ACUTE + 0x017B => 0x017C, // LATIN CAPITAL LETTER Z WITH DOT ABOVE + 0x017D => 0x017E, // LATIN CAPITAL LETTER Z WITH CARON + 0x017F => 0x0073, // LATIN SMALL LETTER LONG S + 0x0181 => 0x0253, // LATIN CAPITAL LETTER B WITH HOOK + 0x0182 => 0x0183, // LATIN CAPITAL LETTER B WITH TOPBAR + 0x0184 => 0x0185, // LATIN CAPITAL LETTER TONE SIX + 0x0186 => 0x0254, // LATIN CAPITAL LETTER OPEN O + 0x0187 => 0x0188, // LATIN CAPITAL LETTER C WITH HOOK + 0x0189 => 0x0256, // LATIN CAPITAL LETTER AFRICAN D + 0x018A => 0x0257, // LATIN CAPITAL LETTER D WITH HOOK + 0x018B => 0x018C, // LATIN CAPITAL LETTER D WITH TOPBAR + 0x018E => 0x01DD, // LATIN CAPITAL LETTER REVERSED E + 0x018F => 0x0259, // LATIN CAPITAL LETTER SCHWA + 0x0190 => 0x025B, // LATIN CAPITAL LETTER OPEN E + 0x0191 => 0x0192, // LATIN CAPITAL LETTER F WITH HOOK + 0x0193 => 0x0260, // LATIN CAPITAL LETTER G WITH HOOK + 0x0194 => 0x0263, // LATIN CAPITAL LETTER GAMMA + 0x0196 => 0x0269, // LATIN CAPITAL LETTER IOTA + 0x0197 => 0x0268, // LATIN CAPITAL LETTER I WITH STROKE + 0x0198 => 0x0199, // LATIN CAPITAL LETTER K WITH HOOK + 0x019C => 0x026F, // LATIN CAPITAL LETTER TURNED M + 0x019D => 0x0272, // LATIN CAPITAL LETTER N WITH LEFT HOOK + 0x019F => 0x0275, // LATIN CAPITAL LETTER O WITH MIDDLE TILDE + 0x01A0 => 0x01A1, // LATIN CAPITAL LETTER O WITH HORN + 0x01A2 => 0x01A3, // LATIN CAPITAL LETTER OI + 0x01A4 => 0x01A5, // LATIN CAPITAL LETTER P WITH HOOK + 0x01A6 => 0x0280, // LATIN LETTER YR + 0x01A7 => 0x01A8, // LATIN CAPITAL LETTER TONE TWO + 0x01A9 => 0x0283, // LATIN CAPITAL LETTER ESH + 0x01AC => 0x01AD, // LATIN CAPITAL LETTER T WITH HOOK + 0x01AE => 0x0288, // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK + 0x01AF => 0x01B0, // LATIN CAPITAL LETTER U WITH HORN + 0x01B1 => 0x028A, // LATIN CAPITAL LETTER UPSILON + 0x01B2 => 0x028B, // LATIN CAPITAL LETTER V WITH HOOK + 0x01B3 => 0x01B4, // LATIN CAPITAL LETTER Y WITH HOOK + 0x01B5 => 0x01B6, // LATIN CAPITAL LETTER Z WITH STROKE + 0x01B7 => 0x0292, // LATIN CAPITAL LETTER EZH + 0x01B8 => 0x01B9, // LATIN CAPITAL LETTER EZH REVERSED + 0x01BC => 0x01BD, // LATIN CAPITAL LETTER TONE FIVE + 0x01C4 => 0x01C6, // LATIN CAPITAL LETTER DZ WITH CARON + 0x01C5 => 0x01C6, // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON + 0x01C7 => 0x01C9, // LATIN CAPITAL LETTER LJ + 0x01C8 => 0x01C9, // LATIN CAPITAL LETTER L WITH SMALL LETTER J + 0x01CA => 0x01CC, // LATIN CAPITAL LETTER NJ + 0x01CB => 0x01CC, // LATIN CAPITAL LETTER N WITH SMALL LETTER J + 0x01CD => 0x01CE, // LATIN CAPITAL LETTER A WITH CARON + 0x01CF => 0x01D0, // LATIN CAPITAL LETTER I WITH CARON + 0x01D1 => 0x01D2, // LATIN CAPITAL LETTER O WITH CARON + 0x01D3 => 0x01D4, // LATIN CAPITAL LETTER U WITH CARON + 0x01D5 => 0x01D6, // LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON + 0x01D7 => 0x01D8, // LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE + 0x01D9 => 0x01DA, // LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON + 0x01DB => 0x01DC, // LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE + 0x01DE => 0x01DF, // LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON + 0x01E0 => 0x01E1, // LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON + 0x01E2 => 0x01E3, // LATIN CAPITAL LETTER AE WITH MACRON + 0x01E4 => 0x01E5, // LATIN CAPITAL LETTER G WITH STROKE + 0x01E6 => 0x01E7, // LATIN CAPITAL LETTER G WITH CARON + 0x01E8 => 0x01E9, // LATIN CAPITAL LETTER K WITH CARON + 0x01EA => 0x01EB, // LATIN CAPITAL LETTER O WITH OGONEK + 0x01EC => 0x01ED, // LATIN CAPITAL LETTER O WITH OGONEK AND MACRON + 0x01EE => 0x01EF, // LATIN CAPITAL LETTER EZH WITH CARON + 0x01F1 => 0x01F3, // LATIN CAPITAL LETTER DZ + 0x01F2 => 0x01F3, // LATIN CAPITAL LETTER D WITH SMALL LETTER Z + 0x01F4 => 0x01F5, // LATIN CAPITAL LETTER G WITH ACUTE + 0x01F6 => 0x0195, // LATIN CAPITAL LETTER HWAIR + 0x01F7 => 0x01BF, // LATIN CAPITAL LETTER WYNN + 0x01F8 => 0x01F9, // LATIN CAPITAL LETTER N WITH GRAVE + 0x01FA => 0x01FB, // LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE + 0x01FC => 0x01FD, // LATIN CAPITAL LETTER AE WITH ACUTE + 0x01FE => 0x01FF, // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE + 0x0200 => 0x0201, // LATIN CAPITAL LETTER A WITH DOUBLE GRAVE + 0x0202 => 0x0203, // LATIN CAPITAL LETTER A WITH INVERTED BREVE + 0x0204 => 0x0205, // LATIN CAPITAL LETTER E WITH DOUBLE GRAVE + 0x0206 => 0x0207, // LATIN CAPITAL LETTER E WITH INVERTED BREVE + 0x0208 => 0x0209, // LATIN CAPITAL LETTER I WITH DOUBLE GRAVE + 0x020A => 0x020B, // LATIN CAPITAL LETTER I WITH INVERTED BREVE + 0x020C => 0x020D, // LATIN CAPITAL LETTER O WITH DOUBLE GRAVE + 0x020E => 0x020F, // LATIN CAPITAL LETTER O WITH INVERTED BREVE + 0x0210 => 0x0211, // LATIN CAPITAL LETTER R WITH DOUBLE GRAVE + 0x0212 => 0x0213, // LATIN CAPITAL LETTER R WITH INVERTED BREVE + 0x0214 => 0x0215, // LATIN CAPITAL LETTER U WITH DOUBLE GRAVE + 0x0216 => 0x0217, // LATIN CAPITAL LETTER U WITH INVERTED BREVE + 0x0218 => 0x0219, // LATIN CAPITAL LETTER S WITH COMMA BELOW + 0x021A => 0x021B, // LATIN CAPITAL LETTER T WITH COMMA BELOW + 0x021C => 0x021D, // LATIN CAPITAL LETTER YOGH + 0x021E => 0x021F, // LATIN CAPITAL LETTER H WITH CARON + 0x0220 => 0x019E, // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG + 0x0222 => 0x0223, // LATIN CAPITAL LETTER OU + 0x0224 => 0x0225, // LATIN CAPITAL LETTER Z WITH HOOK + 0x0226 => 0x0227, // LATIN CAPITAL LETTER A WITH DOT ABOVE + 0x0228 => 0x0229, // LATIN CAPITAL LETTER E WITH CEDILLA + 0x022A => 0x022B, // LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON + 0x022C => 0x022D, // LATIN CAPITAL LETTER O WITH TILDE AND MACRON + 0x022E => 0x022F, // LATIN CAPITAL LETTER O WITH DOT ABOVE + 0x0230 => 0x0231, // LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON + 0x0232 => 0x0233, // LATIN CAPITAL LETTER Y WITH MACRON + 0x023A => 0x2C65, // LATIN CAPITAL LETTER A WITH STROKE + 0x023B => 0x023C, // LATIN CAPITAL LETTER C WITH STROKE + 0x023D => 0x019A, // LATIN CAPITAL LETTER L WITH BAR + 0x023E => 0x2C66, // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE + 0x0241 => 0x0242, // LATIN CAPITAL LETTER GLOTTAL STOP + 0x0243 => 0x0180, // LATIN CAPITAL LETTER B WITH STROKE + 0x0244 => 0x0289, // LATIN CAPITAL LETTER U BAR + 0x0245 => 0x028C, // LATIN CAPITAL LETTER TURNED V + 0x0246 => 0x0247, // LATIN CAPITAL LETTER E WITH STROKE + 0x0248 => 0x0249, // LATIN CAPITAL LETTER J WITH STROKE + 0x024A => 0x024B, // LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL + 0x024C => 0x024D, // LATIN CAPITAL LETTER R WITH STROKE + 0x024E => 0x024F, // LATIN CAPITAL LETTER Y WITH STROKE + 0x0345 => 0x03B9, // COMBINING GREEK YPOGEGRAMMENI + 0x0370 => 0x0371, // GREEK CAPITAL LETTER HETA + 0x0372 => 0x0373, // GREEK CAPITAL LETTER ARCHAIC SAMPI + 0x0376 => 0x0377, // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA + 0x037F => 0x03F3, // GREEK CAPITAL LETTER YOT + 0x0386 => 0x03AC, // GREEK CAPITAL LETTER ALPHA WITH TONOS + 0x0388 => 0x03AD, // GREEK CAPITAL LETTER EPSILON WITH TONOS + 0x0389 => 0x03AE, // GREEK CAPITAL LETTER ETA WITH TONOS + 0x038A => 0x03AF, // GREEK CAPITAL LETTER IOTA WITH TONOS + 0x038C => 0x03CC, // GREEK CAPITAL LETTER OMICRON WITH TONOS + 0x038E => 0x03CD, // GREEK CAPITAL LETTER UPSILON WITH TONOS + 0x038F => 0x03CE, // GREEK CAPITAL LETTER OMEGA WITH TONOS + 0x0391 => 0x03B1, // GREEK CAPITAL LETTER ALPHA + 0x0392 => 0x03B2, // GREEK CAPITAL LETTER BETA + 0x0393 => 0x03B3, // GREEK CAPITAL LETTER GAMMA + 0x0394 => 0x03B4, // GREEK CAPITAL LETTER DELTA + 0x0395 => 0x03B5, // GREEK CAPITAL LETTER EPSILON + 0x0396 => 0x03B6, // GREEK CAPITAL LETTER ZETA + 0x0397 => 0x03B7, // GREEK CAPITAL LETTER ETA + 0x0398 => 0x03B8, // GREEK CAPITAL LETTER THETA + 0x0399 => 0x03B9, // GREEK CAPITAL LETTER IOTA + 0x039A => 0x03BA, // GREEK CAPITAL LETTER KAPPA + 0x039B => 0x03BB, // GREEK CAPITAL LETTER LAMDA + 0x039C => 0x03BC, // GREEK CAPITAL LETTER MU + 0x039D => 0x03BD, // GREEK CAPITAL LETTER NU + 0x039E => 0x03BE, // GREEK CAPITAL LETTER XI + 0x039F => 0x03BF, // GREEK CAPITAL LETTER OMICRON + 0x03A0 => 0x03C0, // GREEK CAPITAL LETTER PI + 0x03A1 => 0x03C1, // GREEK CAPITAL LETTER RHO + 0x03A3 => 0x03C3, // GREEK CAPITAL LETTER SIGMA + 0x03A4 => 0x03C4, // GREEK CAPITAL LETTER TAU + 0x03A5 => 0x03C5, // GREEK CAPITAL LETTER UPSILON + 0x03A6 => 0x03C6, // GREEK CAPITAL LETTER PHI + 0x03A7 => 0x03C7, // GREEK CAPITAL LETTER CHI + 0x03A8 => 0x03C8, // GREEK CAPITAL LETTER PSI + 0x03A9 => 0x03C9, // GREEK CAPITAL LETTER OMEGA + 0x03AA => 0x03CA, // GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + 0x03AB => 0x03CB, // GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + 0x03C2 => 0x03C3, // GREEK SMALL LETTER FINAL SIGMA + 0x03CF => 0x03D7, // GREEK CAPITAL KAI SYMBOL + 0x03D0 => 0x03B2, // GREEK BETA SYMBOL + 0x03D1 => 0x03B8, // GREEK THETA SYMBOL + 0x03D5 => 0x03C6, // GREEK PHI SYMBOL + 0x03D6 => 0x03C0, // GREEK PI SYMBOL + 0x03D8 => 0x03D9, // GREEK LETTER ARCHAIC KOPPA + 0x03DA => 0x03DB, // GREEK LETTER STIGMA + 0x03DC => 0x03DD, // GREEK LETTER DIGAMMA + 0x03DE => 0x03DF, // GREEK LETTER KOPPA + 0x03E0 => 0x03E1, // GREEK LETTER SAMPI + 0x03E2 => 0x03E3, // COPTIC CAPITAL LETTER SHEI + 0x03E4 => 0x03E5, // COPTIC CAPITAL LETTER FEI + 0x03E6 => 0x03E7, // COPTIC CAPITAL LETTER KHEI + 0x03E8 => 0x03E9, // COPTIC CAPITAL LETTER HORI + 0x03EA => 0x03EB, // COPTIC CAPITAL LETTER GANGIA + 0x03EC => 0x03ED, // COPTIC CAPITAL LETTER SHIMA + 0x03EE => 0x03EF, // COPTIC CAPITAL LETTER DEI + 0x03F0 => 0x03BA, // GREEK KAPPA SYMBOL + 0x03F1 => 0x03C1, // GREEK RHO SYMBOL + 0x03F4 => 0x03B8, // GREEK CAPITAL THETA SYMBOL + 0x03F5 => 0x03B5, // GREEK LUNATE EPSILON SYMBOL + 0x03F7 => 0x03F8, // GREEK CAPITAL LETTER SHO + 0x03F9 => 0x03F2, // GREEK CAPITAL LUNATE SIGMA SYMBOL + 0x03FA => 0x03FB, // GREEK CAPITAL LETTER SAN + 0x03FD => 0x037B, // GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL + 0x03FE => 0x037C, // GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL + 0x03FF => 0x037D, // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL + 0x0400 => 0x0450, // CYRILLIC CAPITAL LETTER IE WITH GRAVE + 0x0401 => 0x0451, // CYRILLIC CAPITAL LETTER IO + 0x0402 => 0x0452, // CYRILLIC CAPITAL LETTER DJE + 0x0403 => 0x0453, // CYRILLIC CAPITAL LETTER GJE + 0x0404 => 0x0454, // CYRILLIC CAPITAL LETTER UKRAINIAN IE + 0x0405 => 0x0455, // CYRILLIC CAPITAL LETTER DZE + 0x0406 => 0x0456, // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + 0x0407 => 0x0457, // CYRILLIC CAPITAL LETTER YI + 0x0408 => 0x0458, // CYRILLIC CAPITAL LETTER JE + 0x0409 => 0x0459, // CYRILLIC CAPITAL LETTER LJE + 0x040A => 0x045A, // CYRILLIC CAPITAL LETTER NJE + 0x040B => 0x045B, // CYRILLIC CAPITAL LETTER TSHE + 0x040C => 0x045C, // CYRILLIC CAPITAL LETTER KJE + 0x040D => 0x045D, // CYRILLIC CAPITAL LETTER I WITH GRAVE + 0x040E => 0x045E, // CYRILLIC CAPITAL LETTER SHORT U + 0x040F => 0x045F, // CYRILLIC CAPITAL LETTER DZHE + 0x0410 => 0x0430, // CYRILLIC CAPITAL LETTER A + 0x0411 => 0x0431, // CYRILLIC CAPITAL LETTER BE + 0x0412 => 0x0432, // CYRILLIC CAPITAL LETTER VE + 0x0413 => 0x0433, // CYRILLIC CAPITAL LETTER GHE + 0x0414 => 0x0434, // CYRILLIC CAPITAL LETTER DE + 0x0415 => 0x0435, // CYRILLIC CAPITAL LETTER IE + 0x0416 => 0x0436, // CYRILLIC CAPITAL LETTER ZHE + 0x0417 => 0x0437, // CYRILLIC CAPITAL LETTER ZE + 0x0418 => 0x0438, // CYRILLIC CAPITAL LETTER I + 0x0419 => 0x0439, // CYRILLIC CAPITAL LETTER SHORT I + 0x041A => 0x043A, // CYRILLIC CAPITAL LETTER KA + 0x041B => 0x043B, // CYRILLIC CAPITAL LETTER EL + 0x041C => 0x043C, // CYRILLIC CAPITAL LETTER EM + 0x041D => 0x043D, // CYRILLIC CAPITAL LETTER EN + 0x041E => 0x043E, // CYRILLIC CAPITAL LETTER O + 0x041F => 0x043F, // CYRILLIC CAPITAL LETTER PE + 0x0420 => 0x0440, // CYRILLIC CAPITAL LETTER ER + 0x0421 => 0x0441, // CYRILLIC CAPITAL LETTER ES + 0x0422 => 0x0442, // CYRILLIC CAPITAL LETTER TE + 0x0423 => 0x0443, // CYRILLIC CAPITAL LETTER U + 0x0424 => 0x0444, // CYRILLIC CAPITAL LETTER EF + 0x0425 => 0x0445, // CYRILLIC CAPITAL LETTER HA + 0x0426 => 0x0446, // CYRILLIC CAPITAL LETTER TSE + 0x0427 => 0x0447, // CYRILLIC CAPITAL LETTER CHE + 0x0428 => 0x0448, // CYRILLIC CAPITAL LETTER SHA + 0x0429 => 0x0449, // CYRILLIC CAPITAL LETTER SHCHA + 0x042A => 0x044A, // CYRILLIC CAPITAL LETTER HARD SIGN + 0x042B => 0x044B, // CYRILLIC CAPITAL LETTER YERU + 0x042C => 0x044C, // CYRILLIC CAPITAL LETTER SOFT SIGN + 0x042D => 0x044D, // CYRILLIC CAPITAL LETTER E + 0x042E => 0x044E, // CYRILLIC CAPITAL LETTER YU + 0x042F => 0x044F, // CYRILLIC CAPITAL LETTER YA + 0x0460 => 0x0461, // CYRILLIC CAPITAL LETTER OMEGA + 0x0462 => 0x0463, // CYRILLIC CAPITAL LETTER YAT + 0x0464 => 0x0465, // CYRILLIC CAPITAL LETTER IOTIFIED E + 0x0466 => 0x0467, // CYRILLIC CAPITAL LETTER LITTLE YUS + 0x0468 => 0x0469, // CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS + 0x046A => 0x046B, // CYRILLIC CAPITAL LETTER BIG YUS + 0x046C => 0x046D, // CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS + 0x046E => 0x046F, // CYRILLIC CAPITAL LETTER KSI + 0x0470 => 0x0471, // CYRILLIC CAPITAL LETTER PSI + 0x0472 => 0x0473, // CYRILLIC CAPITAL LETTER FITA + 0x0474 => 0x0475, // CYRILLIC CAPITAL LETTER IZHITSA + 0x0476 => 0x0477, // CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT + 0x0478 => 0x0479, // CYRILLIC CAPITAL LETTER UK + 0x047A => 0x047B, // CYRILLIC CAPITAL LETTER ROUND OMEGA + 0x047C => 0x047D, // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO + 0x047E => 0x047F, // CYRILLIC CAPITAL LETTER OT + 0x0480 => 0x0481, // CYRILLIC CAPITAL LETTER KOPPA + 0x048A => 0x048B, // CYRILLIC CAPITAL LETTER SHORT I WITH TAIL + 0x048C => 0x048D, // CYRILLIC CAPITAL LETTER SEMISOFT SIGN + 0x048E => 0x048F, // CYRILLIC CAPITAL LETTER ER WITH TICK + 0x0490 => 0x0491, // CYRILLIC CAPITAL LETTER GHE WITH UPTURN + 0x0492 => 0x0493, // CYRILLIC CAPITAL LETTER GHE WITH STROKE + 0x0494 => 0x0495, // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK + 0x0496 => 0x0497, // CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER + 0x0498 => 0x0499, // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER + 0x049A => 0x049B, // CYRILLIC CAPITAL LETTER KA WITH DESCENDER + 0x049C => 0x049D, // CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE + 0x049E => 0x049F, // CYRILLIC CAPITAL LETTER KA WITH STROKE + 0x04A0 => 0x04A1, // CYRILLIC CAPITAL LETTER BASHKIR KA + 0x04A2 => 0x04A3, // CYRILLIC CAPITAL LETTER EN WITH DESCENDER + 0x04A4 => 0x04A5, // CYRILLIC CAPITAL LIGATURE EN GHE + 0x04A6 => 0x04A7, // CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK + 0x04A8 => 0x04A9, // CYRILLIC CAPITAL LETTER ABKHASIAN HA + 0x04AA => 0x04AB, // CYRILLIC CAPITAL LETTER ES WITH DESCENDER + 0x04AC => 0x04AD, // CYRILLIC CAPITAL LETTER TE WITH DESCENDER + 0x04AE => 0x04AF, // CYRILLIC CAPITAL LETTER STRAIGHT U + 0x04B0 => 0x04B1, // CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE + 0x04B2 => 0x04B3, // CYRILLIC CAPITAL LETTER HA WITH DESCENDER + 0x04B4 => 0x04B5, // CYRILLIC CAPITAL LIGATURE TE TSE + 0x04B6 => 0x04B7, // CYRILLIC CAPITAL LETTER CHE WITH DESCENDER + 0x04B8 => 0x04B9, // CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE + 0x04BA => 0x04BB, // CYRILLIC CAPITAL LETTER SHHA + 0x04BC => 0x04BD, // CYRILLIC CAPITAL LETTER ABKHASIAN CHE + 0x04BE => 0x04BF, // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER + 0x04C0 => 0x04CF, // CYRILLIC LETTER PALOCHKA + 0x04C1 => 0x04C2, // CYRILLIC CAPITAL LETTER ZHE WITH BREVE + 0x04C3 => 0x04C4, // CYRILLIC CAPITAL LETTER KA WITH HOOK + 0x04C5 => 0x04C6, // CYRILLIC CAPITAL LETTER EL WITH TAIL + 0x04C7 => 0x04C8, // CYRILLIC CAPITAL LETTER EN WITH HOOK + 0x04C9 => 0x04CA, // CYRILLIC CAPITAL LETTER EN WITH TAIL + 0x04CB => 0x04CC, // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE + 0x04CD => 0x04CE, // CYRILLIC CAPITAL LETTER EM WITH TAIL + 0x04D0 => 0x04D1, // CYRILLIC CAPITAL LETTER A WITH BREVE + 0x04D2 => 0x04D3, // CYRILLIC CAPITAL LETTER A WITH DIAERESIS + 0x04D4 => 0x04D5, // CYRILLIC CAPITAL LIGATURE A IE + 0x04D6 => 0x04D7, // CYRILLIC CAPITAL LETTER IE WITH BREVE + 0x04D8 => 0x04D9, // CYRILLIC CAPITAL LETTER SCHWA + 0x04DA => 0x04DB, // CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS + 0x04DC => 0x04DD, // CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS + 0x04DE => 0x04DF, // CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS + 0x04E0 => 0x04E1, // CYRILLIC CAPITAL LETTER ABKHASIAN DZE + 0x04E2 => 0x04E3, // CYRILLIC CAPITAL LETTER I WITH MACRON + 0x04E4 => 0x04E5, // CYRILLIC CAPITAL LETTER I WITH DIAERESIS + 0x04E6 => 0x04E7, // CYRILLIC CAPITAL LETTER O WITH DIAERESIS + 0x04E8 => 0x04E9, // CYRILLIC CAPITAL LETTER BARRED O + 0x04EA => 0x04EB, // CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS + 0x04EC => 0x04ED, // CYRILLIC CAPITAL LETTER E WITH DIAERESIS + 0x04EE => 0x04EF, // CYRILLIC CAPITAL LETTER U WITH MACRON + 0x04F0 => 0x04F1, // CYRILLIC CAPITAL LETTER U WITH DIAERESIS + 0x04F2 => 0x04F3, // CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE + 0x04F4 => 0x04F5, // CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS + 0x04F6 => 0x04F7, // CYRILLIC CAPITAL LETTER GHE WITH DESCENDER + 0x04F8 => 0x04F9, // CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS + 0x04FA => 0x04FB, // CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK + 0x04FC => 0x04FD, // CYRILLIC CAPITAL LETTER HA WITH HOOK + 0x04FE => 0x04FF, // CYRILLIC CAPITAL LETTER HA WITH STROKE + 0x0500 => 0x0501, // CYRILLIC CAPITAL LETTER KOMI DE + 0x0502 => 0x0503, // CYRILLIC CAPITAL LETTER KOMI DJE + 0x0504 => 0x0505, // CYRILLIC CAPITAL LETTER KOMI ZJE + 0x0506 => 0x0507, // CYRILLIC CAPITAL LETTER KOMI DZJE + 0x0508 => 0x0509, // CYRILLIC CAPITAL LETTER KOMI LJE + 0x050A => 0x050B, // CYRILLIC CAPITAL LETTER KOMI NJE + 0x050C => 0x050D, // CYRILLIC CAPITAL LETTER KOMI SJE + 0x050E => 0x050F, // CYRILLIC CAPITAL LETTER KOMI TJE + 0x0510 => 0x0511, // CYRILLIC CAPITAL LETTER REVERSED ZE + 0x0512 => 0x0513, // CYRILLIC CAPITAL LETTER EL WITH HOOK + 0x0514 => 0x0515, // CYRILLIC CAPITAL LETTER LHA + 0x0516 => 0x0517, // CYRILLIC CAPITAL LETTER RHA + 0x0518 => 0x0519, // CYRILLIC CAPITAL LETTER YAE + 0x051A => 0x051B, // CYRILLIC CAPITAL LETTER QA + 0x051C => 0x051D, // CYRILLIC CAPITAL LETTER WE + 0x051E => 0x051F, // CYRILLIC CAPITAL LETTER ALEUT KA + 0x0520 => 0x0521, // CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK + 0x0522 => 0x0523, // CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK + 0x0524 => 0x0525, // CYRILLIC CAPITAL LETTER PE WITH DESCENDER + 0x0526 => 0x0527, // CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER + 0x0528 => 0x0529, // CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK + 0x052A => 0x052B, // CYRILLIC CAPITAL LETTER DZZHE + 0x052C => 0x052D, // CYRILLIC CAPITAL LETTER DCHE + 0x052E => 0x052F, // CYRILLIC CAPITAL LETTER EL WITH DESCENDER + 0x0531 => 0x0561, // ARMENIAN CAPITAL LETTER AYB + 0x0532 => 0x0562, // ARMENIAN CAPITAL LETTER BEN + 0x0533 => 0x0563, // ARMENIAN CAPITAL LETTER GIM + 0x0534 => 0x0564, // ARMENIAN CAPITAL LETTER DA + 0x0535 => 0x0565, // ARMENIAN CAPITAL LETTER ECH + 0x0536 => 0x0566, // ARMENIAN CAPITAL LETTER ZA + 0x0537 => 0x0567, // ARMENIAN CAPITAL LETTER EH + 0x0538 => 0x0568, // ARMENIAN CAPITAL LETTER ET + 0x0539 => 0x0569, // ARMENIAN CAPITAL LETTER TO + 0x053A => 0x056A, // ARMENIAN CAPITAL LETTER ZHE + 0x053B => 0x056B, // ARMENIAN CAPITAL LETTER INI + 0x053C => 0x056C, // ARMENIAN CAPITAL LETTER LIWN + 0x053D => 0x056D, // ARMENIAN CAPITAL LETTER XEH + 0x053E => 0x056E, // ARMENIAN CAPITAL LETTER CA + 0x053F => 0x056F, // ARMENIAN CAPITAL LETTER KEN + 0x0540 => 0x0570, // ARMENIAN CAPITAL LETTER HO + 0x0541 => 0x0571, // ARMENIAN CAPITAL LETTER JA + 0x0542 => 0x0572, // ARMENIAN CAPITAL LETTER GHAD + 0x0543 => 0x0573, // ARMENIAN CAPITAL LETTER CHEH + 0x0544 => 0x0574, // ARMENIAN CAPITAL LETTER MEN + 0x0545 => 0x0575, // ARMENIAN CAPITAL LETTER YI + 0x0546 => 0x0576, // ARMENIAN CAPITAL LETTER NOW + 0x0547 => 0x0577, // ARMENIAN CAPITAL LETTER SHA + 0x0548 => 0x0578, // ARMENIAN CAPITAL LETTER VO + 0x0549 => 0x0579, // ARMENIAN CAPITAL LETTER CHA + 0x054A => 0x057A, // ARMENIAN CAPITAL LETTER PEH + 0x054B => 0x057B, // ARMENIAN CAPITAL LETTER JHEH + 0x054C => 0x057C, // ARMENIAN CAPITAL LETTER RA + 0x054D => 0x057D, // ARMENIAN CAPITAL LETTER SEH + 0x054E => 0x057E, // ARMENIAN CAPITAL LETTER VEW + 0x054F => 0x057F, // ARMENIAN CAPITAL LETTER TIWN + 0x0550 => 0x0580, // ARMENIAN CAPITAL LETTER REH + 0x0551 => 0x0581, // ARMENIAN CAPITAL LETTER CO + 0x0552 => 0x0582, // ARMENIAN CAPITAL LETTER YIWN + 0x0553 => 0x0583, // ARMENIAN CAPITAL LETTER PIWR + 0x0554 => 0x0584, // ARMENIAN CAPITAL LETTER KEH + 0x0555 => 0x0585, // ARMENIAN CAPITAL LETTER OH + 0x0556 => 0x0586, // ARMENIAN CAPITAL LETTER FEH + 0x10A0 => 0x2D00, // GEORGIAN CAPITAL LETTER AN + 0x10A1 => 0x2D01, // GEORGIAN CAPITAL LETTER BAN + 0x10A2 => 0x2D02, // GEORGIAN CAPITAL LETTER GAN + 0x10A3 => 0x2D03, // GEORGIAN CAPITAL LETTER DON + 0x10A4 => 0x2D04, // GEORGIAN CAPITAL LETTER EN + 0x10A5 => 0x2D05, // GEORGIAN CAPITAL LETTER VIN + 0x10A6 => 0x2D06, // GEORGIAN CAPITAL LETTER ZEN + 0x10A7 => 0x2D07, // GEORGIAN CAPITAL LETTER TAN + 0x10A8 => 0x2D08, // GEORGIAN CAPITAL LETTER IN + 0x10A9 => 0x2D09, // GEORGIAN CAPITAL LETTER KAN + 0x10AA => 0x2D0A, // GEORGIAN CAPITAL LETTER LAS + 0x10AB => 0x2D0B, // GEORGIAN CAPITAL LETTER MAN + 0x10AC => 0x2D0C, // GEORGIAN CAPITAL LETTER NAR + 0x10AD => 0x2D0D, // GEORGIAN CAPITAL LETTER ON + 0x10AE => 0x2D0E, // GEORGIAN CAPITAL LETTER PAR + 0x10AF => 0x2D0F, // GEORGIAN CAPITAL LETTER ZHAR + 0x10B0 => 0x2D10, // GEORGIAN CAPITAL LETTER RAE + 0x10B1 => 0x2D11, // GEORGIAN CAPITAL LETTER SAN + 0x10B2 => 0x2D12, // GEORGIAN CAPITAL LETTER TAR + 0x10B3 => 0x2D13, // GEORGIAN CAPITAL LETTER UN + 0x10B4 => 0x2D14, // GEORGIAN CAPITAL LETTER PHAR + 0x10B5 => 0x2D15, // GEORGIAN CAPITAL LETTER KHAR + 0x10B6 => 0x2D16, // GEORGIAN CAPITAL LETTER GHAN + 0x10B7 => 0x2D17, // GEORGIAN CAPITAL LETTER QAR + 0x10B8 => 0x2D18, // GEORGIAN CAPITAL LETTER SHIN + 0x10B9 => 0x2D19, // GEORGIAN CAPITAL LETTER CHIN + 0x10BA => 0x2D1A, // GEORGIAN CAPITAL LETTER CAN + 0x10BB => 0x2D1B, // GEORGIAN CAPITAL LETTER JIL + 0x10BC => 0x2D1C, // GEORGIAN CAPITAL LETTER CIL + 0x10BD => 0x2D1D, // GEORGIAN CAPITAL LETTER CHAR + 0x10BE => 0x2D1E, // GEORGIAN CAPITAL LETTER XAN + 0x10BF => 0x2D1F, // GEORGIAN CAPITAL LETTER JHAN + 0x10C0 => 0x2D20, // GEORGIAN CAPITAL LETTER HAE + 0x10C1 => 0x2D21, // GEORGIAN CAPITAL LETTER HE + 0x10C2 => 0x2D22, // GEORGIAN CAPITAL LETTER HIE + 0x10C3 => 0x2D23, // GEORGIAN CAPITAL LETTER WE + 0x10C4 => 0x2D24, // GEORGIAN CAPITAL LETTER HAR + 0x10C5 => 0x2D25, // GEORGIAN CAPITAL LETTER HOE + 0x10C7 => 0x2D27, // GEORGIAN CAPITAL LETTER YN + 0x10CD => 0x2D2D, // GEORGIAN CAPITAL LETTER AEN + 0x1E00 => 0x1E01, // LATIN CAPITAL LETTER A WITH RING BELOW + 0x1E02 => 0x1E03, // LATIN CAPITAL LETTER B WITH DOT ABOVE + 0x1E04 => 0x1E05, // LATIN CAPITAL LETTER B WITH DOT BELOW + 0x1E06 => 0x1E07, // LATIN CAPITAL LETTER B WITH LINE BELOW + 0x1E08 => 0x1E09, // LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE + 0x1E0A => 0x1E0B, // LATIN CAPITAL LETTER D WITH DOT ABOVE + 0x1E0C => 0x1E0D, // LATIN CAPITAL LETTER D WITH DOT BELOW + 0x1E0E => 0x1E0F, // LATIN CAPITAL LETTER D WITH LINE BELOW + 0x1E10 => 0x1E11, // LATIN CAPITAL LETTER D WITH CEDILLA + 0x1E12 => 0x1E13, // LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW + 0x1E14 => 0x1E15, // LATIN CAPITAL LETTER E WITH MACRON AND GRAVE + 0x1E16 => 0x1E17, // LATIN CAPITAL LETTER E WITH MACRON AND ACUTE + 0x1E18 => 0x1E19, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW + 0x1E1A => 0x1E1B, // LATIN CAPITAL LETTER E WITH TILDE BELOW + 0x1E1C => 0x1E1D, // LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE + 0x1E1E => 0x1E1F, // LATIN CAPITAL LETTER F WITH DOT ABOVE + 0x1E20 => 0x1E21, // LATIN CAPITAL LETTER G WITH MACRON + 0x1E22 => 0x1E23, // LATIN CAPITAL LETTER H WITH DOT ABOVE + 0x1E24 => 0x1E25, // LATIN CAPITAL LETTER H WITH DOT BELOW + 0x1E26 => 0x1E27, // LATIN CAPITAL LETTER H WITH DIAERESIS + 0x1E28 => 0x1E29, // LATIN CAPITAL LETTER H WITH CEDILLA + 0x1E2A => 0x1E2B, // LATIN CAPITAL LETTER H WITH BREVE BELOW + 0x1E2C => 0x1E2D, // LATIN CAPITAL LETTER I WITH TILDE BELOW + 0x1E2E => 0x1E2F, // LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE + 0x1E30 => 0x1E31, // LATIN CAPITAL LETTER K WITH ACUTE + 0x1E32 => 0x1E33, // LATIN CAPITAL LETTER K WITH DOT BELOW + 0x1E34 => 0x1E35, // LATIN CAPITAL LETTER K WITH LINE BELOW + 0x1E36 => 0x1E37, // LATIN CAPITAL LETTER L WITH DOT BELOW + 0x1E38 => 0x1E39, // LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON + 0x1E3A => 0x1E3B, // LATIN CAPITAL LETTER L WITH LINE BELOW + 0x1E3C => 0x1E3D, // LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW + 0x1E3E => 0x1E3F, // LATIN CAPITAL LETTER M WITH ACUTE + 0x1E40 => 0x1E41, // LATIN CAPITAL LETTER M WITH DOT ABOVE + 0x1E42 => 0x1E43, // LATIN CAPITAL LETTER M WITH DOT BELOW + 0x1E44 => 0x1E45, // LATIN CAPITAL LETTER N WITH DOT ABOVE + 0x1E46 => 0x1E47, // LATIN CAPITAL LETTER N WITH DOT BELOW + 0x1E48 => 0x1E49, // LATIN CAPITAL LETTER N WITH LINE BELOW + 0x1E4A => 0x1E4B, // LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW + 0x1E4C => 0x1E4D, // LATIN CAPITAL LETTER O WITH TILDE AND ACUTE + 0x1E4E => 0x1E4F, // LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS + 0x1E50 => 0x1E51, // LATIN CAPITAL LETTER O WITH MACRON AND GRAVE + 0x1E52 => 0x1E53, // LATIN CAPITAL LETTER O WITH MACRON AND ACUTE + 0x1E54 => 0x1E55, // LATIN CAPITAL LETTER P WITH ACUTE + 0x1E56 => 0x1E57, // LATIN CAPITAL LETTER P WITH DOT ABOVE + 0x1E58 => 0x1E59, // LATIN CAPITAL LETTER R WITH DOT ABOVE + 0x1E5A => 0x1E5B, // LATIN CAPITAL LETTER R WITH DOT BELOW + 0x1E5C => 0x1E5D, // LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON + 0x1E5E => 0x1E5F, // LATIN CAPITAL LETTER R WITH LINE BELOW + 0x1E60 => 0x1E61, // LATIN CAPITAL LETTER S WITH DOT ABOVE + 0x1E62 => 0x1E63, // LATIN CAPITAL LETTER S WITH DOT BELOW + 0x1E64 => 0x1E65, // LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE + 0x1E66 => 0x1E67, // LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE + 0x1E68 => 0x1E69, // LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE + 0x1E6A => 0x1E6B, // LATIN CAPITAL LETTER T WITH DOT ABOVE + 0x1E6C => 0x1E6D, // LATIN CAPITAL LETTER T WITH DOT BELOW + 0x1E6E => 0x1E6F, // LATIN CAPITAL LETTER T WITH LINE BELOW + 0x1E70 => 0x1E71, // LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW + 0x1E72 => 0x1E73, // LATIN CAPITAL LETTER U WITH DIAERESIS BELOW + 0x1E74 => 0x1E75, // LATIN CAPITAL LETTER U WITH TILDE BELOW + 0x1E76 => 0x1E77, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW + 0x1E78 => 0x1E79, // LATIN CAPITAL LETTER U WITH TILDE AND ACUTE + 0x1E7A => 0x1E7B, // LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS + 0x1E7C => 0x1E7D, // LATIN CAPITAL LETTER V WITH TILDE + 0x1E7E => 0x1E7F, // LATIN CAPITAL LETTER V WITH DOT BELOW + 0x1E80 => 0x1E81, // LATIN CAPITAL LETTER W WITH GRAVE + 0x1E82 => 0x1E83, // LATIN CAPITAL LETTER W WITH ACUTE + 0x1E84 => 0x1E85, // LATIN CAPITAL LETTER W WITH DIAERESIS + 0x1E86 => 0x1E87, // LATIN CAPITAL LETTER W WITH DOT ABOVE + 0x1E88 => 0x1E89, // LATIN CAPITAL LETTER W WITH DOT BELOW + 0x1E8A => 0x1E8B, // LATIN CAPITAL LETTER X WITH DOT ABOVE + 0x1E8C => 0x1E8D, // LATIN CAPITAL LETTER X WITH DIAERESIS + 0x1E8E => 0x1E8F, // LATIN CAPITAL LETTER Y WITH DOT ABOVE + 0x1E90 => 0x1E91, // LATIN CAPITAL LETTER Z WITH CIRCUMFLEX + 0x1E92 => 0x1E93, // LATIN CAPITAL LETTER Z WITH DOT BELOW + 0x1E94 => 0x1E95, // LATIN CAPITAL LETTER Z WITH LINE BELOW + 0x1E9B => 0x1E61, // LATIN SMALL LETTER LONG S WITH DOT ABOVE + 0x1E9E => 0x00DF, // LATIN CAPITAL LETTER SHARP S + 0x1EA0 => 0x1EA1, // LATIN CAPITAL LETTER A WITH DOT BELOW + 0x1EA2 => 0x1EA3, // LATIN CAPITAL LETTER A WITH HOOK ABOVE + 0x1EA4 => 0x1EA5, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE + 0x1EA6 => 0x1EA7, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE + 0x1EA8 => 0x1EA9, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + 0x1EAA => 0x1EAB, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE + 0x1EAC => 0x1EAD, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW + 0x1EAE => 0x1EAF, // LATIN CAPITAL LETTER A WITH BREVE AND ACUTE + 0x1EB0 => 0x1EB1, // LATIN CAPITAL LETTER A WITH BREVE AND GRAVE + 0x1EB2 => 0x1EB3, // LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE + 0x1EB4 => 0x1EB5, // LATIN CAPITAL LETTER A WITH BREVE AND TILDE + 0x1EB6 => 0x1EB7, // LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW + 0x1EB8 => 0x1EB9, // LATIN CAPITAL LETTER E WITH DOT BELOW + 0x1EBA => 0x1EBB, // LATIN CAPITAL LETTER E WITH HOOK ABOVE + 0x1EBC => 0x1EBD, // LATIN CAPITAL LETTER E WITH TILDE + 0x1EBE => 0x1EBF, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE + 0x1EC0 => 0x1EC1, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE + 0x1EC2 => 0x1EC3, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + 0x1EC4 => 0x1EC5, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE + 0x1EC6 => 0x1EC7, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW + 0x1EC8 => 0x1EC9, // LATIN CAPITAL LETTER I WITH HOOK ABOVE + 0x1ECA => 0x1ECB, // LATIN CAPITAL LETTER I WITH DOT BELOW + 0x1ECC => 0x1ECD, // LATIN CAPITAL LETTER O WITH DOT BELOW + 0x1ECE => 0x1ECF, // LATIN CAPITAL LETTER O WITH HOOK ABOVE + 0x1ED0 => 0x1ED1, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE + 0x1ED2 => 0x1ED3, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE + 0x1ED4 => 0x1ED5, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + 0x1ED6 => 0x1ED7, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE + 0x1ED8 => 0x1ED9, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW + 0x1EDA => 0x1EDB, // LATIN CAPITAL LETTER O WITH HORN AND ACUTE + 0x1EDC => 0x1EDD, // LATIN CAPITAL LETTER O WITH HORN AND GRAVE + 0x1EDE => 0x1EDF, // LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE + 0x1EE0 => 0x1EE1, // LATIN CAPITAL LETTER O WITH HORN AND TILDE + 0x1EE2 => 0x1EE3, // LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW + 0x1EE4 => 0x1EE5, // LATIN CAPITAL LETTER U WITH DOT BELOW + 0x1EE6 => 0x1EE7, // LATIN CAPITAL LETTER U WITH HOOK ABOVE + 0x1EE8 => 0x1EE9, // LATIN CAPITAL LETTER U WITH HORN AND ACUTE + 0x1EEA => 0x1EEB, // LATIN CAPITAL LETTER U WITH HORN AND GRAVE + 0x1EEC => 0x1EED, // LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE + 0x1EEE => 0x1EEF, // LATIN CAPITAL LETTER U WITH HORN AND TILDE + 0x1EF0 => 0x1EF1, // LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW + 0x1EF2 => 0x1EF3, // LATIN CAPITAL LETTER Y WITH GRAVE + 0x1EF4 => 0x1EF5, // LATIN CAPITAL LETTER Y WITH DOT BELOW + 0x1EF6 => 0x1EF7, // LATIN CAPITAL LETTER Y WITH HOOK ABOVE + 0x1EF8 => 0x1EF9, // LATIN CAPITAL LETTER Y WITH TILDE + 0x1EFA => 0x1EFB, // LATIN CAPITAL LETTER MIDDLE-WELSH LL + 0x1EFC => 0x1EFD, // LATIN CAPITAL LETTER MIDDLE-WELSH V + 0x1EFE => 0x1EFF, // LATIN CAPITAL LETTER Y WITH LOOP + 0x1F08 => 0x1F00, // GREEK CAPITAL LETTER ALPHA WITH PSILI + 0x1F09 => 0x1F01, // GREEK CAPITAL LETTER ALPHA WITH DASIA + 0x1F0A => 0x1F02, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA + 0x1F0B => 0x1F03, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA + 0x1F0C => 0x1F04, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA + 0x1F0D => 0x1F05, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA + 0x1F0E => 0x1F06, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI + 0x1F0F => 0x1F07, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI + 0x1F18 => 0x1F10, // GREEK CAPITAL LETTER EPSILON WITH PSILI + 0x1F19 => 0x1F11, // GREEK CAPITAL LETTER EPSILON WITH DASIA + 0x1F1A => 0x1F12, // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA + 0x1F1B => 0x1F13, // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA + 0x1F1C => 0x1F14, // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA + 0x1F1D => 0x1F15, // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA + 0x1F28 => 0x1F20, // GREEK CAPITAL LETTER ETA WITH PSILI + 0x1F29 => 0x1F21, // GREEK CAPITAL LETTER ETA WITH DASIA + 0x1F2A => 0x1F22, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA + 0x1F2B => 0x1F23, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA + 0x1F2C => 0x1F24, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA + 0x1F2D => 0x1F25, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA + 0x1F2E => 0x1F26, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI + 0x1F2F => 0x1F27, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI + 0x1F38 => 0x1F30, // GREEK CAPITAL LETTER IOTA WITH PSILI + 0x1F39 => 0x1F31, // GREEK CAPITAL LETTER IOTA WITH DASIA + 0x1F3A => 0x1F32, // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA + 0x1F3B => 0x1F33, // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA + 0x1F3C => 0x1F34, // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA + 0x1F3D => 0x1F35, // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA + 0x1F3E => 0x1F36, // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI + 0x1F3F => 0x1F37, // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI + 0x1F48 => 0x1F40, // GREEK CAPITAL LETTER OMICRON WITH PSILI + 0x1F49 => 0x1F41, // GREEK CAPITAL LETTER OMICRON WITH DASIA + 0x1F4A => 0x1F42, // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA + 0x1F4B => 0x1F43, // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA + 0x1F4C => 0x1F44, // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA + 0x1F4D => 0x1F45, // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA + 0x1F59 => 0x1F51, // GREEK CAPITAL LETTER UPSILON WITH DASIA + 0x1F5B => 0x1F53, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA + 0x1F5D => 0x1F55, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA + 0x1F5F => 0x1F57, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI + 0x1F68 => 0x1F60, // GREEK CAPITAL LETTER OMEGA WITH PSILI + 0x1F69 => 0x1F61, // GREEK CAPITAL LETTER OMEGA WITH DASIA + 0x1F6A => 0x1F62, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA + 0x1F6B => 0x1F63, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA + 0x1F6C => 0x1F64, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA + 0x1F6D => 0x1F65, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA + 0x1F6E => 0x1F66, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI + 0x1F6F => 0x1F67, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI + 0x1F88 => 0x1F80, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI + 0x1F89 => 0x1F81, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI + 0x1F8A => 0x1F82, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI + 0x1F8B => 0x1F83, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI + 0x1F8C => 0x1F84, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI + 0x1F8D => 0x1F85, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI + 0x1F8E => 0x1F86, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + 0x1F8F => 0x1F87, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + 0x1F98 => 0x1F90, // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI + 0x1F99 => 0x1F91, // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI + 0x1F9A => 0x1F92, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI + 0x1F9B => 0x1F93, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI + 0x1F9C => 0x1F94, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI + 0x1F9D => 0x1F95, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI + 0x1F9E => 0x1F96, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + 0x1F9F => 0x1F97, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + 0x1FA8 => 0x1FA0, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI + 0x1FA9 => 0x1FA1, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI + 0x1FAA => 0x1FA2, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI + 0x1FAB => 0x1FA3, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI + 0x1FAC => 0x1FA4, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI + 0x1FAD => 0x1FA5, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI + 0x1FAE => 0x1FA6, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + 0x1FAF => 0x1FA7, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + 0x1FB8 => 0x1FB0, // GREEK CAPITAL LETTER ALPHA WITH VRACHY + 0x1FB9 => 0x1FB1, // GREEK CAPITAL LETTER ALPHA WITH MACRON + 0x1FBA => 0x1F70, // GREEK CAPITAL LETTER ALPHA WITH VARIA + 0x1FBB => 0x1F71, // GREEK CAPITAL LETTER ALPHA WITH OXIA + 0x1FBC => 0x1FB3, // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + 0x1FBE => 0x03B9, // GREEK PROSGEGRAMMENI + 0x1FC8 => 0x1F72, // GREEK CAPITAL LETTER EPSILON WITH VARIA + 0x1FC9 => 0x1F73, // GREEK CAPITAL LETTER EPSILON WITH OXIA + 0x1FCA => 0x1F74, // GREEK CAPITAL LETTER ETA WITH VARIA + 0x1FCB => 0x1F75, // GREEK CAPITAL LETTER ETA WITH OXIA + 0x1FCC => 0x1FC3, // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + 0x1FD8 => 0x1FD0, // GREEK CAPITAL LETTER IOTA WITH VRACHY + 0x1FD9 => 0x1FD1, // GREEK CAPITAL LETTER IOTA WITH MACRON + 0x1FDA => 0x1F76, // GREEK CAPITAL LETTER IOTA WITH VARIA + 0x1FDB => 0x1F77, // GREEK CAPITAL LETTER IOTA WITH OXIA + 0x1FE8 => 0x1FE0, // GREEK CAPITAL LETTER UPSILON WITH VRACHY + 0x1FE9 => 0x1FE1, // GREEK CAPITAL LETTER UPSILON WITH MACRON + 0x1FEA => 0x1F7A, // GREEK CAPITAL LETTER UPSILON WITH VARIA + 0x1FEB => 0x1F7B, // GREEK CAPITAL LETTER UPSILON WITH OXIA + 0x1FEC => 0x1FE5, // GREEK CAPITAL LETTER RHO WITH DASIA + 0x1FF8 => 0x1F78, // GREEK CAPITAL LETTER OMICRON WITH VARIA + 0x1FF9 => 0x1F79, // GREEK CAPITAL LETTER OMICRON WITH OXIA + 0x1FFA => 0x1F7C, // GREEK CAPITAL LETTER OMEGA WITH VARIA + 0x1FFB => 0x1F7D, // GREEK CAPITAL LETTER OMEGA WITH OXIA + 0x1FFC => 0x1FF3, // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + 0x2126 => 0x03C9, // OHM SIGN + 0x212A => 0x006B, // KELVIN SIGN + 0x212B => 0x00E5, // ANGSTROM SIGN + 0x2132 => 0x214E, // TURNED CAPITAL F + 0x2160 => 0x2170, // ROMAN NUMERAL ONE + 0x2161 => 0x2171, // ROMAN NUMERAL TWO + 0x2162 => 0x2172, // ROMAN NUMERAL THREE + 0x2163 => 0x2173, // ROMAN NUMERAL FOUR + 0x2164 => 0x2174, // ROMAN NUMERAL FIVE + 0x2165 => 0x2175, // ROMAN NUMERAL SIX + 0x2166 => 0x2176, // ROMAN NUMERAL SEVEN + 0x2167 => 0x2177, // ROMAN NUMERAL EIGHT + 0x2168 => 0x2178, // ROMAN NUMERAL NINE + 0x2169 => 0x2179, // ROMAN NUMERAL TEN + 0x216A => 0x217A, // ROMAN NUMERAL ELEVEN + 0x216B => 0x217B, // ROMAN NUMERAL TWELVE + 0x216C => 0x217C, // ROMAN NUMERAL FIFTY + 0x216D => 0x217D, // ROMAN NUMERAL ONE HUNDRED + 0x216E => 0x217E, // ROMAN NUMERAL FIVE HUNDRED + 0x216F => 0x217F, // ROMAN NUMERAL ONE THOUSAND + 0x2183 => 0x2184, // ROMAN NUMERAL REVERSED ONE HUNDRED + 0x24B6 => 0x24D0, // CIRCLED LATIN CAPITAL LETTER A + 0x24B7 => 0x24D1, // CIRCLED LATIN CAPITAL LETTER B + 0x24B8 => 0x24D2, // CIRCLED LATIN CAPITAL LETTER C + 0x24B9 => 0x24D3, // CIRCLED LATIN CAPITAL LETTER D + 0x24BA => 0x24D4, // CIRCLED LATIN CAPITAL LETTER E + 0x24BB => 0x24D5, // CIRCLED LATIN CAPITAL LETTER F + 0x24BC => 0x24D6, // CIRCLED LATIN CAPITAL LETTER G + 0x24BD => 0x24D7, // CIRCLED LATIN CAPITAL LETTER H + 0x24BE => 0x24D8, // CIRCLED LATIN CAPITAL LETTER I + 0x24BF => 0x24D9, // CIRCLED LATIN CAPITAL LETTER J + 0x24C0 => 0x24DA, // CIRCLED LATIN CAPITAL LETTER K + 0x24C1 => 0x24DB, // CIRCLED LATIN CAPITAL LETTER L + 0x24C2 => 0x24DC, // CIRCLED LATIN CAPITAL LETTER M + 0x24C3 => 0x24DD, // CIRCLED LATIN CAPITAL LETTER N + 0x24C4 => 0x24DE, // CIRCLED LATIN CAPITAL LETTER O + 0x24C5 => 0x24DF, // CIRCLED LATIN CAPITAL LETTER P + 0x24C6 => 0x24E0, // CIRCLED LATIN CAPITAL LETTER Q + 0x24C7 => 0x24E1, // CIRCLED LATIN CAPITAL LETTER R + 0x24C8 => 0x24E2, // CIRCLED LATIN CAPITAL LETTER S + 0x24C9 => 0x24E3, // CIRCLED LATIN CAPITAL LETTER T + 0x24CA => 0x24E4, // CIRCLED LATIN CAPITAL LETTER U + 0x24CB => 0x24E5, // CIRCLED LATIN CAPITAL LETTER V + 0x24CC => 0x24E6, // CIRCLED LATIN CAPITAL LETTER W + 0x24CD => 0x24E7, // CIRCLED LATIN CAPITAL LETTER X + 0x24CE => 0x24E8, // CIRCLED LATIN CAPITAL LETTER Y + 0x24CF => 0x24E9, // CIRCLED LATIN CAPITAL LETTER Z + 0x2C00 => 0x2C30, // GLAGOLITIC CAPITAL LETTER AZU + 0x2C01 => 0x2C31, // GLAGOLITIC CAPITAL LETTER BUKY + 0x2C02 => 0x2C32, // GLAGOLITIC CAPITAL LETTER VEDE + 0x2C03 => 0x2C33, // GLAGOLITIC CAPITAL LETTER GLAGOLI + 0x2C04 => 0x2C34, // GLAGOLITIC CAPITAL LETTER DOBRO + 0x2C05 => 0x2C35, // GLAGOLITIC CAPITAL LETTER YESTU + 0x2C06 => 0x2C36, // GLAGOLITIC CAPITAL LETTER ZHIVETE + 0x2C07 => 0x2C37, // GLAGOLITIC CAPITAL LETTER DZELO + 0x2C08 => 0x2C38, // GLAGOLITIC CAPITAL LETTER ZEMLJA + 0x2C09 => 0x2C39, // GLAGOLITIC CAPITAL LETTER IZHE + 0x2C0A => 0x2C3A, // GLAGOLITIC CAPITAL LETTER INITIAL IZHE + 0x2C0B => 0x2C3B, // GLAGOLITIC CAPITAL LETTER I + 0x2C0C => 0x2C3C, // GLAGOLITIC CAPITAL LETTER DJERVI + 0x2C0D => 0x2C3D, // GLAGOLITIC CAPITAL LETTER KAKO + 0x2C0E => 0x2C3E, // GLAGOLITIC CAPITAL LETTER LJUDIJE + 0x2C0F => 0x2C3F, // GLAGOLITIC CAPITAL LETTER MYSLITE + 0x2C10 => 0x2C40, // GLAGOLITIC CAPITAL LETTER NASHI + 0x2C11 => 0x2C41, // GLAGOLITIC CAPITAL LETTER ONU + 0x2C12 => 0x2C42, // GLAGOLITIC CAPITAL LETTER POKOJI + 0x2C13 => 0x2C43, // GLAGOLITIC CAPITAL LETTER RITSI + 0x2C14 => 0x2C44, // GLAGOLITIC CAPITAL LETTER SLOVO + 0x2C15 => 0x2C45, // GLAGOLITIC CAPITAL LETTER TVRIDO + 0x2C16 => 0x2C46, // GLAGOLITIC CAPITAL LETTER UKU + 0x2C17 => 0x2C47, // GLAGOLITIC CAPITAL LETTER FRITU + 0x2C18 => 0x2C48, // GLAGOLITIC CAPITAL LETTER HERU + 0x2C19 => 0x2C49, // GLAGOLITIC CAPITAL LETTER OTU + 0x2C1A => 0x2C4A, // GLAGOLITIC CAPITAL LETTER PE + 0x2C1B => 0x2C4B, // GLAGOLITIC CAPITAL LETTER SHTA + 0x2C1C => 0x2C4C, // GLAGOLITIC CAPITAL LETTER TSI + 0x2C1D => 0x2C4D, // GLAGOLITIC CAPITAL LETTER CHRIVI + 0x2C1E => 0x2C4E, // GLAGOLITIC CAPITAL LETTER SHA + 0x2C1F => 0x2C4F, // GLAGOLITIC CAPITAL LETTER YERU + 0x2C20 => 0x2C50, // GLAGOLITIC CAPITAL LETTER YERI + 0x2C21 => 0x2C51, // GLAGOLITIC CAPITAL LETTER YATI + 0x2C22 => 0x2C52, // GLAGOLITIC CAPITAL LETTER SPIDERY HA + 0x2C23 => 0x2C53, // GLAGOLITIC CAPITAL LETTER YU + 0x2C24 => 0x2C54, // GLAGOLITIC CAPITAL LETTER SMALL YUS + 0x2C25 => 0x2C55, // GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL + 0x2C26 => 0x2C56, // GLAGOLITIC CAPITAL LETTER YO + 0x2C27 => 0x2C57, // GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS + 0x2C28 => 0x2C58, // GLAGOLITIC CAPITAL LETTER BIG YUS + 0x2C29 => 0x2C59, // GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS + 0x2C2A => 0x2C5A, // GLAGOLITIC CAPITAL LETTER FITA + 0x2C2B => 0x2C5B, // GLAGOLITIC CAPITAL LETTER IZHITSA + 0x2C2C => 0x2C5C, // GLAGOLITIC CAPITAL LETTER SHTAPIC + 0x2C2D => 0x2C5D, // GLAGOLITIC CAPITAL LETTER TROKUTASTI A + 0x2C2E => 0x2C5E, // GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE + 0x2C60 => 0x2C61, // LATIN CAPITAL LETTER L WITH DOUBLE BAR + 0x2C62 => 0x026B, // LATIN CAPITAL LETTER L WITH MIDDLE TILDE + 0x2C63 => 0x1D7D, // LATIN CAPITAL LETTER P WITH STROKE + 0x2C64 => 0x027D, // LATIN CAPITAL LETTER R WITH TAIL + 0x2C67 => 0x2C68, // LATIN CAPITAL LETTER H WITH DESCENDER + 0x2C69 => 0x2C6A, // LATIN CAPITAL LETTER K WITH DESCENDER + 0x2C6B => 0x2C6C, // LATIN CAPITAL LETTER Z WITH DESCENDER + 0x2C6D => 0x0251, // LATIN CAPITAL LETTER ALPHA + 0x2C6E => 0x0271, // LATIN CAPITAL LETTER M WITH HOOK + 0x2C6F => 0x0250, // LATIN CAPITAL LETTER TURNED A + 0x2C70 => 0x0252, // LATIN CAPITAL LETTER TURNED ALPHA + 0x2C72 => 0x2C73, // LATIN CAPITAL LETTER W WITH HOOK + 0x2C75 => 0x2C76, // LATIN CAPITAL LETTER HALF H + 0x2C7E => 0x023F, // LATIN CAPITAL LETTER S WITH SWASH TAIL + 0x2C7F => 0x0240, // LATIN CAPITAL LETTER Z WITH SWASH TAIL + 0x2C80 => 0x2C81, // COPTIC CAPITAL LETTER ALFA + 0x2C82 => 0x2C83, // COPTIC CAPITAL LETTER VIDA + 0x2C84 => 0x2C85, // COPTIC CAPITAL LETTER GAMMA + 0x2C86 => 0x2C87, // COPTIC CAPITAL LETTER DALDA + 0x2C88 => 0x2C89, // COPTIC CAPITAL LETTER EIE + 0x2C8A => 0x2C8B, // COPTIC CAPITAL LETTER SOU + 0x2C8C => 0x2C8D, // COPTIC CAPITAL LETTER ZATA + 0x2C8E => 0x2C8F, // COPTIC CAPITAL LETTER HATE + 0x2C90 => 0x2C91, // COPTIC CAPITAL LETTER THETHE + 0x2C92 => 0x2C93, // COPTIC CAPITAL LETTER IAUDA + 0x2C94 => 0x2C95, // COPTIC CAPITAL LETTER KAPA + 0x2C96 => 0x2C97, // COPTIC CAPITAL LETTER LAULA + 0x2C98 => 0x2C99, // COPTIC CAPITAL LETTER MI + 0x2C9A => 0x2C9B, // COPTIC CAPITAL LETTER NI + 0x2C9C => 0x2C9D, // COPTIC CAPITAL LETTER KSI + 0x2C9E => 0x2C9F, // COPTIC CAPITAL LETTER O + 0x2CA0 => 0x2CA1, // COPTIC CAPITAL LETTER PI + 0x2CA2 => 0x2CA3, // COPTIC CAPITAL LETTER RO + 0x2CA4 => 0x2CA5, // COPTIC CAPITAL LETTER SIMA + 0x2CA6 => 0x2CA7, // COPTIC CAPITAL LETTER TAU + 0x2CA8 => 0x2CA9, // COPTIC CAPITAL LETTER UA + 0x2CAA => 0x2CAB, // COPTIC CAPITAL LETTER FI + 0x2CAC => 0x2CAD, // COPTIC CAPITAL LETTER KHI + 0x2CAE => 0x2CAF, // COPTIC CAPITAL LETTER PSI + 0x2CB0 => 0x2CB1, // COPTIC CAPITAL LETTER OOU + 0x2CB2 => 0x2CB3, // COPTIC CAPITAL LETTER DIALECT-P ALEF + 0x2CB4 => 0x2CB5, // COPTIC CAPITAL LETTER OLD COPTIC AIN + 0x2CB6 => 0x2CB7, // COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE + 0x2CB8 => 0x2CB9, // COPTIC CAPITAL LETTER DIALECT-P KAPA + 0x2CBA => 0x2CBB, // COPTIC CAPITAL LETTER DIALECT-P NI + 0x2CBC => 0x2CBD, // COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI + 0x2CBE => 0x2CBF, // COPTIC CAPITAL LETTER OLD COPTIC OOU + 0x2CC0 => 0x2CC1, // COPTIC CAPITAL LETTER SAMPI + 0x2CC2 => 0x2CC3, // COPTIC CAPITAL LETTER CROSSED SHEI + 0x2CC4 => 0x2CC5, // COPTIC CAPITAL LETTER OLD COPTIC SHEI + 0x2CC6 => 0x2CC7, // COPTIC CAPITAL LETTER OLD COPTIC ESH + 0x2CC8 => 0x2CC9, // COPTIC CAPITAL LETTER AKHMIMIC KHEI + 0x2CCA => 0x2CCB, // COPTIC CAPITAL LETTER DIALECT-P HORI + 0x2CCC => 0x2CCD, // COPTIC CAPITAL LETTER OLD COPTIC HORI + 0x2CCE => 0x2CCF, // COPTIC CAPITAL LETTER OLD COPTIC HA + 0x2CD0 => 0x2CD1, // COPTIC CAPITAL LETTER L-SHAPED HA + 0x2CD2 => 0x2CD3, // COPTIC CAPITAL LETTER OLD COPTIC HEI + 0x2CD4 => 0x2CD5, // COPTIC CAPITAL LETTER OLD COPTIC HAT + 0x2CD6 => 0x2CD7, // COPTIC CAPITAL LETTER OLD COPTIC GANGIA + 0x2CD8 => 0x2CD9, // COPTIC CAPITAL LETTER OLD COPTIC DJA + 0x2CDA => 0x2CDB, // COPTIC CAPITAL LETTER OLD COPTIC SHIMA + 0x2CDC => 0x2CDD, // COPTIC CAPITAL LETTER OLD NUBIAN SHIMA + 0x2CDE => 0x2CDF, // COPTIC CAPITAL LETTER OLD NUBIAN NGI + 0x2CE0 => 0x2CE1, // COPTIC CAPITAL LETTER OLD NUBIAN NYI + 0x2CE2 => 0x2CE3, // COPTIC CAPITAL LETTER OLD NUBIAN WAU + 0x2CEB => 0x2CEC, // COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI + 0x2CED => 0x2CEE, // COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA + 0x2CF2 => 0x2CF3, // COPTIC CAPITAL LETTER BOHAIRIC KHEI + 0xA640 => 0xA641, // CYRILLIC CAPITAL LETTER ZEMLYA + 0xA642 => 0xA643, // CYRILLIC CAPITAL LETTER DZELO + 0xA644 => 0xA645, // CYRILLIC CAPITAL LETTER REVERSED DZE + 0xA646 => 0xA647, // CYRILLIC CAPITAL LETTER IOTA + 0xA648 => 0xA649, // CYRILLIC CAPITAL LETTER DJERV + 0xA64A => 0xA64B, // CYRILLIC CAPITAL LETTER MONOGRAPH UK + 0xA64C => 0xA64D, // CYRILLIC CAPITAL LETTER BROAD OMEGA + 0xA64E => 0xA64F, // CYRILLIC CAPITAL LETTER NEUTRAL YER + 0xA650 => 0xA651, // CYRILLIC CAPITAL LETTER YERU WITH BACK YER + 0xA652 => 0xA653, // CYRILLIC CAPITAL LETTER IOTIFIED YAT + 0xA654 => 0xA655, // CYRILLIC CAPITAL LETTER REVERSED YU + 0xA656 => 0xA657, // CYRILLIC CAPITAL LETTER IOTIFIED A + 0xA658 => 0xA659, // CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS + 0xA65A => 0xA65B, // CYRILLIC CAPITAL LETTER BLENDED YUS + 0xA65C => 0xA65D, // CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS + 0xA65E => 0xA65F, // CYRILLIC CAPITAL LETTER YN + 0xA660 => 0xA661, // CYRILLIC CAPITAL LETTER REVERSED TSE + 0xA662 => 0xA663, // CYRILLIC CAPITAL LETTER SOFT DE + 0xA664 => 0xA665, // CYRILLIC CAPITAL LETTER SOFT EL + 0xA666 => 0xA667, // CYRILLIC CAPITAL LETTER SOFT EM + 0xA668 => 0xA669, // CYRILLIC CAPITAL LETTER MONOCULAR O + 0xA66A => 0xA66B, // CYRILLIC CAPITAL LETTER BINOCULAR O + 0xA66C => 0xA66D, // CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O + 0xA680 => 0xA681, // CYRILLIC CAPITAL LETTER DWE + 0xA682 => 0xA683, // CYRILLIC CAPITAL LETTER DZWE + 0xA684 => 0xA685, // CYRILLIC CAPITAL LETTER ZHWE + 0xA686 => 0xA687, // CYRILLIC CAPITAL LETTER CCHE + 0xA688 => 0xA689, // CYRILLIC CAPITAL LETTER DZZE + 0xA68A => 0xA68B, // CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK + 0xA68C => 0xA68D, // CYRILLIC CAPITAL LETTER TWE + 0xA68E => 0xA68F, // CYRILLIC CAPITAL LETTER TSWE + 0xA690 => 0xA691, // CYRILLIC CAPITAL LETTER TSSE + 0xA692 => 0xA693, // CYRILLIC CAPITAL LETTER TCHE + 0xA694 => 0xA695, // CYRILLIC CAPITAL LETTER HWE + 0xA696 => 0xA697, // CYRILLIC CAPITAL LETTER SHWE + 0xA698 => 0xA699, // CYRILLIC CAPITAL LETTER DOUBLE O + 0xA69A => 0xA69B, // CYRILLIC CAPITAL LETTER CROSSED O + 0xA722 => 0xA723, // LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF + 0xA724 => 0xA725, // LATIN CAPITAL LETTER EGYPTOLOGICAL AIN + 0xA726 => 0xA727, // LATIN CAPITAL LETTER HENG + 0xA728 => 0xA729, // LATIN CAPITAL LETTER TZ + 0xA72A => 0xA72B, // LATIN CAPITAL LETTER TRESILLO + 0xA72C => 0xA72D, // LATIN CAPITAL LETTER CUATRILLO + 0xA72E => 0xA72F, // LATIN CAPITAL LETTER CUATRILLO WITH COMMA + 0xA732 => 0xA733, // LATIN CAPITAL LETTER AA + 0xA734 => 0xA735, // LATIN CAPITAL LETTER AO + 0xA736 => 0xA737, // LATIN CAPITAL LETTER AU + 0xA738 => 0xA739, // LATIN CAPITAL LETTER AV + 0xA73A => 0xA73B, // LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR + 0xA73C => 0xA73D, // LATIN CAPITAL LETTER AY + 0xA73E => 0xA73F, // LATIN CAPITAL LETTER REVERSED C WITH DOT + 0xA740 => 0xA741, // LATIN CAPITAL LETTER K WITH STROKE + 0xA742 => 0xA743, // LATIN CAPITAL LETTER K WITH DIAGONAL STROKE + 0xA744 => 0xA745, // LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE + 0xA746 => 0xA747, // LATIN CAPITAL LETTER BROKEN L + 0xA748 => 0xA749, // LATIN CAPITAL LETTER L WITH HIGH STROKE + 0xA74A => 0xA74B, // LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY + 0xA74C => 0xA74D, // LATIN CAPITAL LETTER O WITH LOOP + 0xA74E => 0xA74F, // LATIN CAPITAL LETTER OO + 0xA750 => 0xA751, // LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER + 0xA752 => 0xA753, // LATIN CAPITAL LETTER P WITH FLOURISH + 0xA754 => 0xA755, // LATIN CAPITAL LETTER P WITH SQUIRREL TAIL + 0xA756 => 0xA757, // LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER + 0xA758 => 0xA759, // LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE + 0xA75A => 0xA75B, // LATIN CAPITAL LETTER R ROTUNDA + 0xA75C => 0xA75D, // LATIN CAPITAL LETTER RUM ROTUNDA + 0xA75E => 0xA75F, // LATIN CAPITAL LETTER V WITH DIAGONAL STROKE + 0xA760 => 0xA761, // LATIN CAPITAL LETTER VY + 0xA762 => 0xA763, // LATIN CAPITAL LETTER VISIGOTHIC Z + 0xA764 => 0xA765, // LATIN CAPITAL LETTER THORN WITH STROKE + 0xA766 => 0xA767, // LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER + 0xA768 => 0xA769, // LATIN CAPITAL LETTER VEND + 0xA76A => 0xA76B, // LATIN CAPITAL LETTER ET + 0xA76C => 0xA76D, // LATIN CAPITAL LETTER IS + 0xA76E => 0xA76F, // LATIN CAPITAL LETTER CON + 0xA779 => 0xA77A, // LATIN CAPITAL LETTER INSULAR D + 0xA77B => 0xA77C, // LATIN CAPITAL LETTER INSULAR F + 0xA77D => 0x1D79, // LATIN CAPITAL LETTER INSULAR G + 0xA77E => 0xA77F, // LATIN CAPITAL LETTER TURNED INSULAR G + 0xA780 => 0xA781, // LATIN CAPITAL LETTER TURNED L + 0xA782 => 0xA783, // LATIN CAPITAL LETTER INSULAR R + 0xA784 => 0xA785, // LATIN CAPITAL LETTER INSULAR S + 0xA786 => 0xA787, // LATIN CAPITAL LETTER INSULAR T + 0xA78B => 0xA78C, // LATIN CAPITAL LETTER SALTILLO + 0xA78D => 0x0265, // LATIN CAPITAL LETTER TURNED H + 0xA790 => 0xA791, // LATIN CAPITAL LETTER N WITH DESCENDER + 0xA792 => 0xA793, // LATIN CAPITAL LETTER C WITH BAR + 0xA796 => 0xA797, // LATIN CAPITAL LETTER B WITH FLOURISH + 0xA798 => 0xA799, // LATIN CAPITAL LETTER F WITH STROKE + 0xA79A => 0xA79B, // LATIN CAPITAL LETTER VOLAPUK AE + 0xA79C => 0xA79D, // LATIN CAPITAL LETTER VOLAPUK OE + 0xA79E => 0xA79F, // LATIN CAPITAL LETTER VOLAPUK UE + 0xA7A0 => 0xA7A1, // LATIN CAPITAL LETTER G WITH OBLIQUE STROKE + 0xA7A2 => 0xA7A3, // LATIN CAPITAL LETTER K WITH OBLIQUE STROKE + 0xA7A4 => 0xA7A5, // LATIN CAPITAL LETTER N WITH OBLIQUE STROKE + 0xA7A6 => 0xA7A7, // LATIN CAPITAL LETTER R WITH OBLIQUE STROKE + 0xA7A8 => 0xA7A9, // LATIN CAPITAL LETTER S WITH OBLIQUE STROKE + 0xA7AA => 0x0266, // LATIN CAPITAL LETTER H WITH HOOK + 0xA7AB => 0x025C, // LATIN CAPITAL LETTER REVERSED OPEN E + 0xA7AC => 0x0261, // LATIN CAPITAL LETTER SCRIPT G + 0xA7AD => 0x026C, // LATIN CAPITAL LETTER L WITH BELT + 0xA7B0 => 0x029E, // LATIN CAPITAL LETTER TURNED K + 0xA7B1 => 0x0287, // LATIN CAPITAL LETTER TURNED T + 0xFF21 => 0xFF41, // FULLWIDTH LATIN CAPITAL LETTER A + 0xFF22 => 0xFF42, // FULLWIDTH LATIN CAPITAL LETTER B + 0xFF23 => 0xFF43, // FULLWIDTH LATIN CAPITAL LETTER C + 0xFF24 => 0xFF44, // FULLWIDTH LATIN CAPITAL LETTER D + 0xFF25 => 0xFF45, // FULLWIDTH LATIN CAPITAL LETTER E + 0xFF26 => 0xFF46, // FULLWIDTH LATIN CAPITAL LETTER F + 0xFF27 => 0xFF47, // FULLWIDTH LATIN CAPITAL LETTER G + 0xFF28 => 0xFF48, // FULLWIDTH LATIN CAPITAL LETTER H + 0xFF29 => 0xFF49, // FULLWIDTH LATIN CAPITAL LETTER I + 0xFF2A => 0xFF4A, // FULLWIDTH LATIN CAPITAL LETTER J + 0xFF2B => 0xFF4B, // FULLWIDTH LATIN CAPITAL LETTER K + 0xFF2C => 0xFF4C, // FULLWIDTH LATIN CAPITAL LETTER L + 0xFF2D => 0xFF4D, // FULLWIDTH LATIN CAPITAL LETTER M + 0xFF2E => 0xFF4E, // FULLWIDTH LATIN CAPITAL LETTER N + 0xFF2F => 0xFF4F, // FULLWIDTH LATIN CAPITAL LETTER O + 0xFF30 => 0xFF50, // FULLWIDTH LATIN CAPITAL LETTER P + 0xFF31 => 0xFF51, // FULLWIDTH LATIN CAPITAL LETTER Q + 0xFF32 => 0xFF52, // FULLWIDTH LATIN CAPITAL LETTER R + 0xFF33 => 0xFF53, // FULLWIDTH LATIN CAPITAL LETTER S + 0xFF34 => 0xFF54, // FULLWIDTH LATIN CAPITAL LETTER T + 0xFF35 => 0xFF55, // FULLWIDTH LATIN CAPITAL LETTER U + 0xFF36 => 0xFF56, // FULLWIDTH LATIN CAPITAL LETTER V + 0xFF37 => 0xFF57, // FULLWIDTH LATIN CAPITAL LETTER W + 0xFF38 => 0xFF58, // FULLWIDTH LATIN CAPITAL LETTER X + 0xFF39 => 0xFF59, // FULLWIDTH LATIN CAPITAL LETTER Y + 0xFF3A => 0xFF5A, // FULLWIDTH LATIN CAPITAL LETTER Z + 0x10400 => 0x10428, // DESERET CAPITAL LETTER LONG I + 0x10401 => 0x10429, // DESERET CAPITAL LETTER LONG E + 0x10402 => 0x1042A, // DESERET CAPITAL LETTER LONG A + 0x10403 => 0x1042B, // DESERET CAPITAL LETTER LONG AH + 0x10404 => 0x1042C, // DESERET CAPITAL LETTER LONG O + 0x10405 => 0x1042D, // DESERET CAPITAL LETTER LONG OO + 0x10406 => 0x1042E, // DESERET CAPITAL LETTER SHORT I + 0x10407 => 0x1042F, // DESERET CAPITAL LETTER SHORT E + 0x10408 => 0x10430, // DESERET CAPITAL LETTER SHORT A + 0x10409 => 0x10431, // DESERET CAPITAL LETTER SHORT AH + 0x1040A => 0x10432, // DESERET CAPITAL LETTER SHORT O + 0x1040B => 0x10433, // DESERET CAPITAL LETTER SHORT OO + 0x1040C => 0x10434, // DESERET CAPITAL LETTER AY + 0x1040D => 0x10435, // DESERET CAPITAL LETTER OW + 0x1040E => 0x10436, // DESERET CAPITAL LETTER WU + 0x1040F => 0x10437, // DESERET CAPITAL LETTER YEE + 0x10410 => 0x10438, // DESERET CAPITAL LETTER H + 0x10411 => 0x10439, // DESERET CAPITAL LETTER PEE + 0x10412 => 0x1043A, // DESERET CAPITAL LETTER BEE + 0x10413 => 0x1043B, // DESERET CAPITAL LETTER TEE + 0x10414 => 0x1043C, // DESERET CAPITAL LETTER DEE + 0x10415 => 0x1043D, // DESERET CAPITAL LETTER CHEE + 0x10416 => 0x1043E, // DESERET CAPITAL LETTER JEE + 0x10417 => 0x1043F, // DESERET CAPITAL LETTER KAY + 0x10418 => 0x10440, // DESERET CAPITAL LETTER GAY + 0x10419 => 0x10441, // DESERET CAPITAL LETTER EF + 0x1041A => 0x10442, // DESERET CAPITAL LETTER VEE + 0x1041B => 0x10443, // DESERET CAPITAL LETTER ETH + 0x1041C => 0x10444, // DESERET CAPITAL LETTER THEE + 0x1041D => 0x10445, // DESERET CAPITAL LETTER ES + 0x1041E => 0x10446, // DESERET CAPITAL LETTER ZEE + 0x1041F => 0x10447, // DESERET CAPITAL LETTER ESH + 0x10420 => 0x10448, // DESERET CAPITAL LETTER ZHEE + 0x10421 => 0x10449, // DESERET CAPITAL LETTER ER + 0x10422 => 0x1044A, // DESERET CAPITAL LETTER EL + 0x10423 => 0x1044B, // DESERET CAPITAL LETTER EM + 0x10424 => 0x1044C, // DESERET CAPITAL LETTER EN + 0x10425 => 0x1044D, // DESERET CAPITAL LETTER ENG + 0x10426 => 0x1044E, // DESERET CAPITAL LETTER OI + 0x10427 => 0x1044F, // DESERET CAPITAL LETTER EW + 0x118A0 => 0x118C0, // WARANG CITI CAPITAL LETTER NGAA + 0x118A1 => 0x118C1, // WARANG CITI CAPITAL LETTER A + 0x118A2 => 0x118C2, // WARANG CITI CAPITAL LETTER WI + 0x118A3 => 0x118C3, // WARANG CITI CAPITAL LETTER YU + 0x118A4 => 0x118C4, // WARANG CITI CAPITAL LETTER YA + 0x118A5 => 0x118C5, // WARANG CITI CAPITAL LETTER YO + 0x118A6 => 0x118C6, // WARANG CITI CAPITAL LETTER II + 0x118A7 => 0x118C7, // WARANG CITI CAPITAL LETTER UU + 0x118A8 => 0x118C8, // WARANG CITI CAPITAL LETTER E + 0x118A9 => 0x118C9, // WARANG CITI CAPITAL LETTER O + 0x118AA => 0x118CA, // WARANG CITI CAPITAL LETTER ANG + 0x118AB => 0x118CB, // WARANG CITI CAPITAL LETTER GA + 0x118AC => 0x118CC, // WARANG CITI CAPITAL LETTER KO + 0x118AD => 0x118CD, // WARANG CITI CAPITAL LETTER ENY + 0x118AE => 0x118CE, // WARANG CITI CAPITAL LETTER YUJ + 0x118AF => 0x118CF, // WARANG CITI CAPITAL LETTER UC + 0x118B0 => 0x118D0, // WARANG CITI CAPITAL LETTER ENN + 0x118B1 => 0x118D1, // WARANG CITI CAPITAL LETTER ODD + 0x118B2 => 0x118D2, // WARANG CITI CAPITAL LETTER TTE + 0x118B3 => 0x118D3, // WARANG CITI CAPITAL LETTER NUNG + 0x118B4 => 0x118D4, // WARANG CITI CAPITAL LETTER DA + 0x118B5 => 0x118D5, // WARANG CITI CAPITAL LETTER AT + 0x118B6 => 0x118D6, // WARANG CITI CAPITAL LETTER AM + 0x118B7 => 0x118D7, // WARANG CITI CAPITAL LETTER BU + 0x118B8 => 0x118D8, // WARANG CITI CAPITAL LETTER PU + 0x118B9 => 0x118D9, // WARANG CITI CAPITAL LETTER HIYO + 0x118BA => 0x118DA, // WARANG CITI CAPITAL LETTER HOLO + 0x118BB => 0x118DB, // WARANG CITI CAPITAL LETTER HORR + 0x118BC => 0x118DC, // WARANG CITI CAPITAL LETTER HAR + 0x118BD => 0x118DD, // WARANG CITI CAPITAL LETTER SSUU + 0x118BE => 0x118DE, // WARANG CITI CAPITAL LETTER SII + 0x118BF => 0x118DF, // WARANG CITI CAPITAL LETTER VIYO + ); + + /** + * Converts the string to uppercase (using Unicode case folding rules) + * + * @param string $str String to convert + * + * @return string Upper-case version of string + */ + public static function toUpperCase($str) + { + // Split into an array of Unicode characters + $chars = preg_split('//u', $str, -1, PREG_SPLIT_NO_EMPTY); + // Iterate through them, converting to uppercase where possible + foreach ($chars as &$char) { + $codePoint = static::ordutf8($char); + // Several + if ($key = array_search($codePoint, static::$map)) { + $char = Html5Entities::fromDecimal($key); + } + } + + return implode('', $chars); + } + + /** + * Like ord(), but for UTF-8 chars + * + * Based on code found here: http://php.net/ord#109812 + * + * @param string $char Unicode string character + * + * @return int Unicode code point + */ + private static function ordutf8($char) { + $code = ord(substr($char, 0, 1)); + if ($code >= 128) { //otherwise 0xxxxxxx + if ($code < 224) $bytesnumber = 2; //110xxxxx + else if ($code < 240) $bytesnumber = 3; //1110xxxx + else if ($code < 248) $bytesnumber = 4; //11110xxx + $codetemp = $code - 192 - ($bytesnumber > 2 ? 32 : 0) - ($bytesnumber > 3 ? 16 : 0); + for ($i = 2; $i <= $bytesnumber; $i++) { + $code2 = ord(substr($char, 1, 1)) - 128; //10xxxxxx + $codetemp = $codetemp*64 + $code2; + } + $code = $codetemp; + } + return $code; + } +} diff --git a/tests/Util/UnicodeCaseFolderTest.php b/tests/Util/UnicodeCaseFolderTest.php new file mode 100644 index 0000000000..9dd8a7037d --- /dev/null +++ b/tests/Util/UnicodeCaseFolderTest.php @@ -0,0 +1,26 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace League\CommonMark\Tests\Util; + +use League\CommonMark\Util\UnicodeCaseFolder; + +class UnicodeCaseFolderTest extends \PHPUnit_Framework_TestCase +{ + public function testToUpperCase() + { + $this->assertEquals('FOO', UnicodeCaseFolder::toUpperCase('foo')); + $this->assertEquals('ΑΓΩ', UnicodeCaseFolder::toUpperCase('αγω')); + $this->assertEquals('ТОЛПОЙ', UnicodeCaseFolder::toUpperCase('толпой')); + $this->assertEquals('ТОЛПОЙ', UnicodeCaseFolder::toUpperCase('Толпой')); + $this->assertEquals('ТОЛПОЙ', UnicodeCaseFolder::toUpperCase('ТОЛПОЙ')); + } +}