| 1 | #!perl -w
|
|---|
| 2 | BEGIN {
|
|---|
| 3 | if (ord("A") == 193) {
|
|---|
| 4 | print "1..0 # Skip: EBCDIC\n";
|
|---|
| 5 | exit 0;
|
|---|
| 6 | }
|
|---|
| 7 | chdir 't' if -d 't';
|
|---|
| 8 | @INC = '../lib';
|
|---|
| 9 | @INC = "::lib" if $^O eq 'MacOS'; # module parses @INC itself
|
|---|
| 10 | require Config; import Config;
|
|---|
| 11 | if ($Config{'extensions'} !~ /\bStorable\b/) {
|
|---|
| 12 | print "1..0 # Skip: Storable was not built; Unicode::UCD uses Storable\n";
|
|---|
| 13 | exit 0;
|
|---|
| 14 | }
|
|---|
| 15 | }
|
|---|
| 16 |
|
|---|
| 17 | use strict;
|
|---|
| 18 | use Unicode::UCD;
|
|---|
| 19 | use Test::More;
|
|---|
| 20 |
|
|---|
| 21 | BEGIN { plan tests => 188 };
|
|---|
| 22 |
|
|---|
| 23 | use Unicode::UCD 'charinfo';
|
|---|
| 24 |
|
|---|
| 25 | my $charinfo;
|
|---|
| 26 |
|
|---|
| 27 | $charinfo = charinfo(0x41);
|
|---|
| 28 |
|
|---|
| 29 | is($charinfo->{code}, '0041', 'LATIN CAPITAL LETTER A');
|
|---|
| 30 | is($charinfo->{name}, 'LATIN CAPITAL LETTER A');
|
|---|
| 31 | is($charinfo->{category}, 'Lu');
|
|---|
| 32 | is($charinfo->{combining}, '0');
|
|---|
| 33 | is($charinfo->{bidi}, 'L');
|
|---|
| 34 | is($charinfo->{decomposition}, '');
|
|---|
| 35 | is($charinfo->{decimal}, '');
|
|---|
| 36 | is($charinfo->{digit}, '');
|
|---|
| 37 | is($charinfo->{numeric}, '');
|
|---|
| 38 | is($charinfo->{mirrored}, 'N');
|
|---|
| 39 | is($charinfo->{unicode10}, '');
|
|---|
| 40 | is($charinfo->{comment}, '');
|
|---|
| 41 | is($charinfo->{upper}, '');
|
|---|
| 42 | is($charinfo->{lower}, '0061');
|
|---|
| 43 | is($charinfo->{title}, '');
|
|---|
| 44 | is($charinfo->{block}, 'Basic Latin');
|
|---|
| 45 | is($charinfo->{script}, 'Latin');
|
|---|
| 46 |
|
|---|
| 47 | $charinfo = charinfo(0x100);
|
|---|
| 48 |
|
|---|
| 49 | is($charinfo->{code}, '0100', 'LATIN CAPITAL LETTER A WITH MACRON');
|
|---|
| 50 | is($charinfo->{name}, 'LATIN CAPITAL LETTER A WITH MACRON');
|
|---|
| 51 | is($charinfo->{category}, 'Lu');
|
|---|
| 52 | is($charinfo->{combining}, '0');
|
|---|
| 53 | is($charinfo->{bidi}, 'L');
|
|---|
| 54 | is($charinfo->{decomposition}, '0041 0304');
|
|---|
| 55 | is($charinfo->{decimal}, '');
|
|---|
| 56 | is($charinfo->{digit}, '');
|
|---|
| 57 | is($charinfo->{numeric}, '');
|
|---|
| 58 | is($charinfo->{mirrored}, 'N');
|
|---|
| 59 | is($charinfo->{unicode10}, 'LATIN CAPITAL LETTER A MACRON');
|
|---|
| 60 | is($charinfo->{comment}, '');
|
|---|
| 61 | is($charinfo->{upper}, '');
|
|---|
| 62 | is($charinfo->{lower}, '0101');
|
|---|
| 63 | is($charinfo->{title}, '');
|
|---|
| 64 | is($charinfo->{block}, 'Latin Extended-A');
|
|---|
| 65 | is($charinfo->{script}, 'Latin');
|
|---|
| 66 |
|
|---|
| 67 | # 0x0590 is in the Hebrew block but unused.
|
|---|
| 68 |
|
|---|
| 69 | $charinfo = charinfo(0x590);
|
|---|
| 70 |
|
|---|
| 71 | is($charinfo->{code}, undef, '0x0590 - unused Hebrew');
|
|---|
| 72 | is($charinfo->{name}, undef);
|
|---|
| 73 | is($charinfo->{category}, undef);
|
|---|
| 74 | is($charinfo->{combining}, undef);
|
|---|
| 75 | is($charinfo->{bidi}, undef);
|
|---|
| 76 | is($charinfo->{decomposition}, undef);
|
|---|
| 77 | is($charinfo->{decimal}, undef);
|
|---|
| 78 | is($charinfo->{digit}, undef);
|
|---|
| 79 | is($charinfo->{numeric}, undef);
|
|---|
| 80 | is($charinfo->{mirrored}, undef);
|
|---|
| 81 | is($charinfo->{unicode10}, undef);
|
|---|
| 82 | is($charinfo->{comment}, undef);
|
|---|
| 83 | is($charinfo->{upper}, undef);
|
|---|
| 84 | is($charinfo->{lower}, undef);
|
|---|
| 85 | is($charinfo->{title}, undef);
|
|---|
| 86 | is($charinfo->{block}, undef);
|
|---|
| 87 | is($charinfo->{script}, undef);
|
|---|
| 88 |
|
|---|
| 89 | # 0x05d0 is in the Hebrew block and used.
|
|---|
| 90 |
|
|---|
| 91 | $charinfo = charinfo(0x5d0);
|
|---|
| 92 |
|
|---|
| 93 | is($charinfo->{code}, '05D0', '05D0 - used Hebrew');
|
|---|
| 94 | is($charinfo->{name}, 'HEBREW LETTER ALEF');
|
|---|
| 95 | is($charinfo->{category}, 'Lo');
|
|---|
| 96 | is($charinfo->{combining}, '0');
|
|---|
| 97 | is($charinfo->{bidi}, 'R');
|
|---|
| 98 | is($charinfo->{decomposition}, '');
|
|---|
| 99 | is($charinfo->{decimal}, '');
|
|---|
| 100 | is($charinfo->{digit}, '');
|
|---|
| 101 | is($charinfo->{numeric}, '');
|
|---|
| 102 | is($charinfo->{mirrored}, 'N');
|
|---|
| 103 | is($charinfo->{unicode10}, '');
|
|---|
| 104 | is($charinfo->{comment}, '');
|
|---|
| 105 | is($charinfo->{upper}, '');
|
|---|
| 106 | is($charinfo->{lower}, '');
|
|---|
| 107 | is($charinfo->{title}, '');
|
|---|
| 108 | is($charinfo->{block}, 'Hebrew');
|
|---|
| 109 | is($charinfo->{script}, 'Hebrew');
|
|---|
| 110 |
|
|---|
| 111 | # An open syllable in Hangul.
|
|---|
| 112 |
|
|---|
| 113 | $charinfo = charinfo(0xAC00);
|
|---|
| 114 |
|
|---|
| 115 | is($charinfo->{code}, 'AC00', 'HANGUL SYLLABLE-AC00');
|
|---|
| 116 | is($charinfo->{name}, 'HANGUL SYLLABLE-AC00');
|
|---|
| 117 | is($charinfo->{category}, 'Lo');
|
|---|
| 118 | is($charinfo->{combining}, '0');
|
|---|
| 119 | is($charinfo->{bidi}, 'L');
|
|---|
| 120 | is($charinfo->{decomposition}, undef);
|
|---|
| 121 | is($charinfo->{decimal}, '');
|
|---|
| 122 | is($charinfo->{digit}, '');
|
|---|
| 123 | is($charinfo->{numeric}, '');
|
|---|
| 124 | is($charinfo->{mirrored}, 'N');
|
|---|
| 125 | is($charinfo->{unicode10}, '');
|
|---|
| 126 | is($charinfo->{comment}, '');
|
|---|
| 127 | is($charinfo->{upper}, '');
|
|---|
| 128 | is($charinfo->{lower}, '');
|
|---|
| 129 | is($charinfo->{title}, '');
|
|---|
| 130 | is($charinfo->{block}, 'Hangul Syllables');
|
|---|
| 131 | is($charinfo->{script}, 'Hangul');
|
|---|
| 132 |
|
|---|
| 133 | # A closed syllable in Hangul.
|
|---|
| 134 |
|
|---|
| 135 | $charinfo = charinfo(0xAE00);
|
|---|
| 136 |
|
|---|
| 137 | is($charinfo->{code}, 'AE00', 'HANGUL SYLLABLE-AE00');
|
|---|
| 138 | is($charinfo->{name}, 'HANGUL SYLLABLE-AE00');
|
|---|
| 139 | is($charinfo->{category}, 'Lo');
|
|---|
| 140 | is($charinfo->{combining}, '0');
|
|---|
| 141 | is($charinfo->{bidi}, 'L');
|
|---|
| 142 | is($charinfo->{decomposition}, undef);
|
|---|
| 143 | is($charinfo->{decimal}, '');
|
|---|
| 144 | is($charinfo->{digit}, '');
|
|---|
| 145 | is($charinfo->{numeric}, '');
|
|---|
| 146 | is($charinfo->{mirrored}, 'N');
|
|---|
| 147 | is($charinfo->{unicode10}, '');
|
|---|
| 148 | is($charinfo->{comment}, '');
|
|---|
| 149 | is($charinfo->{upper}, '');
|
|---|
| 150 | is($charinfo->{lower}, '');
|
|---|
| 151 | is($charinfo->{title}, '');
|
|---|
| 152 | is($charinfo->{block}, 'Hangul Syllables');
|
|---|
| 153 | is($charinfo->{script}, 'Hangul');
|
|---|
| 154 |
|
|---|
| 155 | $charinfo = charinfo(0x1D400);
|
|---|
| 156 |
|
|---|
| 157 | is($charinfo->{code}, '1D400', 'MATHEMATICAL BOLD CAPITAL A');
|
|---|
| 158 | is($charinfo->{name}, 'MATHEMATICAL BOLD CAPITAL A');
|
|---|
| 159 | is($charinfo->{category}, 'Lu');
|
|---|
| 160 | is($charinfo->{combining}, '0');
|
|---|
| 161 | is($charinfo->{bidi}, 'L');
|
|---|
| 162 | is($charinfo->{decomposition}, '<font> 0041');
|
|---|
| 163 | is($charinfo->{decimal}, '');
|
|---|
| 164 | is($charinfo->{digit}, '');
|
|---|
| 165 | is($charinfo->{numeric}, '');
|
|---|
| 166 | is($charinfo->{mirrored}, 'N');
|
|---|
| 167 | is($charinfo->{unicode10}, '');
|
|---|
| 168 | is($charinfo->{comment}, '');
|
|---|
| 169 | is($charinfo->{upper}, '');
|
|---|
| 170 | is($charinfo->{lower}, '');
|
|---|
| 171 | is($charinfo->{title}, '');
|
|---|
| 172 | is($charinfo->{block}, 'Mathematical Alphanumeric Symbols');
|
|---|
| 173 | is($charinfo->{script}, 'Common');
|
|---|
| 174 |
|
|---|
| 175 | use Unicode::UCD qw(charblock charscript);
|
|---|
| 176 |
|
|---|
| 177 | # 0x0590 is in the Hebrew block but unused.
|
|---|
| 178 |
|
|---|
| 179 | is(charblock(0x590), 'Hebrew', '0x0590 - Hebrew unused charblock');
|
|---|
| 180 | is(charscript(0x590), undef, '0x0590 - Hebrew unused charscript');
|
|---|
| 181 |
|
|---|
| 182 | $charinfo = charinfo(0xbe);
|
|---|
| 183 |
|
|---|
| 184 | is($charinfo->{code}, '00BE', 'VULGAR FRACTION THREE QUARTERS');
|
|---|
| 185 | is($charinfo->{name}, 'VULGAR FRACTION THREE QUARTERS');
|
|---|
| 186 | is($charinfo->{category}, 'No');
|
|---|
| 187 | is($charinfo->{combining}, '0');
|
|---|
| 188 | is($charinfo->{bidi}, 'ON');
|
|---|
| 189 | is($charinfo->{decomposition}, '<fraction> 0033 2044 0034');
|
|---|
| 190 | is($charinfo->{decimal}, '');
|
|---|
| 191 | is($charinfo->{digit}, '');
|
|---|
| 192 | is($charinfo->{numeric}, '3/4');
|
|---|
| 193 | is($charinfo->{mirrored}, 'N');
|
|---|
| 194 | is($charinfo->{unicode10}, 'FRACTION THREE QUARTERS');
|
|---|
| 195 | is($charinfo->{comment}, '');
|
|---|
| 196 | is($charinfo->{upper}, '');
|
|---|
| 197 | is($charinfo->{lower}, '');
|
|---|
| 198 | is($charinfo->{title}, '');
|
|---|
| 199 | is($charinfo->{block}, 'Latin-1 Supplement');
|
|---|
| 200 | is($charinfo->{script}, 'Common');
|
|---|
| 201 |
|
|---|
| 202 | use Unicode::UCD qw(charblocks charscripts);
|
|---|
| 203 |
|
|---|
| 204 | my $charblocks = charblocks();
|
|---|
| 205 |
|
|---|
| 206 | ok(exists $charblocks->{Thai}, 'Thai charblock exists');
|
|---|
| 207 | is($charblocks->{Thai}->[0]->[0], hex('0e00'));
|
|---|
| 208 | ok(!exists $charblocks->{PigLatin}, 'PigLatin charblock does not exist');
|
|---|
| 209 |
|
|---|
| 210 | my $charscripts = charscripts();
|
|---|
| 211 |
|
|---|
| 212 | ok(exists $charscripts->{Armenian}, 'Armenian charscript exists');
|
|---|
| 213 | is($charscripts->{Armenian}->[0]->[0], hex('0531'));
|
|---|
| 214 | ok(!exists $charscripts->{PigLatin}, 'PigLatin charscript does not exist');
|
|---|
| 215 |
|
|---|
| 216 | my $charscript;
|
|---|
| 217 |
|
|---|
| 218 | $charscript = charscript("12ab");
|
|---|
| 219 | is($charscript, 'Ethiopic', 'Ethiopic charscript');
|
|---|
| 220 |
|
|---|
| 221 | $charscript = charscript("0x12ab");
|
|---|
| 222 | is($charscript, 'Ethiopic');
|
|---|
| 223 |
|
|---|
| 224 | $charscript = charscript("U+12ab");
|
|---|
| 225 | is($charscript, 'Ethiopic');
|
|---|
| 226 |
|
|---|
| 227 | my $ranges;
|
|---|
| 228 |
|
|---|
| 229 | $ranges = charscript('Ogham');
|
|---|
| 230 | is($ranges->[1]->[0], hex('1681'), 'Ogham charscript');
|
|---|
| 231 | is($ranges->[1]->[1], hex('169a'));
|
|---|
| 232 |
|
|---|
| 233 | use Unicode::UCD qw(charinrange);
|
|---|
| 234 |
|
|---|
| 235 | $ranges = charscript('Cherokee');
|
|---|
| 236 | ok(!charinrange($ranges, "139f"), 'Cherokee charscript');
|
|---|
| 237 | ok( charinrange($ranges, "13a0"));
|
|---|
| 238 | ok( charinrange($ranges, "13f4"));
|
|---|
| 239 | ok(!charinrange($ranges, "13f5"));
|
|---|
| 240 |
|
|---|
| 241 | is(Unicode::UCD::UnicodeVersion, '4.1.0', 'UnicodeVersion');
|
|---|
| 242 |
|
|---|
| 243 | use Unicode::UCD qw(compexcl);
|
|---|
| 244 |
|
|---|
| 245 | ok(!compexcl(0x0100), 'compexcl');
|
|---|
| 246 | ok( compexcl(0x0958));
|
|---|
| 247 |
|
|---|
| 248 | use Unicode::UCD qw(casefold);
|
|---|
| 249 |
|
|---|
| 250 | my $casefold;
|
|---|
| 251 |
|
|---|
| 252 | $casefold = casefold(0x41);
|
|---|
| 253 |
|
|---|
| 254 | ok($casefold->{code} eq '0041' &&
|
|---|
| 255 | $casefold->{status} eq 'C' &&
|
|---|
| 256 | $casefold->{mapping} eq '0061', 'casefold 0x41');
|
|---|
| 257 |
|
|---|
| 258 | $casefold = casefold(0xdf);
|
|---|
| 259 |
|
|---|
| 260 | ok($casefold->{code} eq '00DF' &&
|
|---|
| 261 | $casefold->{status} eq 'F' &&
|
|---|
| 262 | $casefold->{mapping} eq '0073 0073', 'casefold 0xDF');
|
|---|
| 263 |
|
|---|
| 264 | ok(!casefold(0x20));
|
|---|
| 265 |
|
|---|
| 266 | use Unicode::UCD qw(casespec);
|
|---|
| 267 |
|
|---|
| 268 | my $casespec;
|
|---|
| 269 |
|
|---|
| 270 | ok(!casespec(0x41));
|
|---|
| 271 |
|
|---|
| 272 | $casespec = casespec(0xdf);
|
|---|
| 273 |
|
|---|
| 274 | ok($casespec->{code} eq '00DF' &&
|
|---|
| 275 | $casespec->{lower} eq '00DF' &&
|
|---|
| 276 | $casespec->{title} eq '0053 0073' &&
|
|---|
| 277 | $casespec->{upper} eq '0053 0053' &&
|
|---|
| 278 | !defined $casespec->{condition}, 'casespec 0xDF');
|
|---|
| 279 |
|
|---|
| 280 | $casespec = casespec(0x307);
|
|---|
| 281 |
|
|---|
| 282 | ok($casespec->{az}->{code} eq '0307' &&
|
|---|
| 283 | !defined $casespec->{az}->{lower} &&
|
|---|
| 284 | $casespec->{az}->{title} eq '0307' &&
|
|---|
| 285 | $casespec->{az}->{upper} eq '0307' &&
|
|---|
| 286 | $casespec->{az}->{condition} eq 'az After_I',
|
|---|
| 287 | 'casespec 0x307');
|
|---|
| 288 |
|
|---|
| 289 | # perl #7305 UnicodeCD::compexcl is weird
|
|---|
| 290 |
|
|---|
| 291 | for (1) {my $a=compexcl $_}
|
|---|
| 292 | ok(1, 'compexcl read-only $_: perl #7305');
|
|---|
| 293 | grep {compexcl $_} %{{1=>2}};
|
|---|
| 294 | ok(1, 'compexcl read-only hash: perl #7305');
|
|---|
| 295 |
|
|---|
| 296 | is(Unicode::UCD::_getcode('123'), 123, "_getcode(123)");
|
|---|
| 297 | is(Unicode::UCD::_getcode('0123'), 0x123, "_getcode(0123)");
|
|---|
| 298 | is(Unicode::UCD::_getcode('0x123'), 0x123, "_getcode(0x123)");
|
|---|
| 299 | is(Unicode::UCD::_getcode('0X123'), 0x123, "_getcode(0X123)");
|
|---|
| 300 | is(Unicode::UCD::_getcode('U+123'), 0x123, "_getcode(U+123)");
|
|---|
| 301 | is(Unicode::UCD::_getcode('u+123'), 0x123, "_getcode(u+123)");
|
|---|
| 302 | is(Unicode::UCD::_getcode('U+1234'), 0x1234, "_getcode(U+1234)");
|
|---|
| 303 | is(Unicode::UCD::_getcode('U+12345'), 0x12345, "_getcode(U+12345)");
|
|---|
| 304 | is(Unicode::UCD::_getcode('123x'), undef, "_getcode(123x)");
|
|---|
| 305 | is(Unicode::UCD::_getcode('x123'), undef, "_getcode(x123)");
|
|---|
| 306 | is(Unicode::UCD::_getcode('0x123x'), undef, "_getcode(x123)");
|
|---|
| 307 | is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)");
|
|---|
| 308 |
|
|---|
| 309 | {
|
|---|
| 310 | my $r1 = charscript('Latin');
|
|---|
| 311 | my $n1 = @$r1;
|
|---|
| 312 | is($n1, 29, "29 ranges in Latin script (Unicode 4.1.0)");
|
|---|
| 313 | shift @$r1 while @$r1;
|
|---|
| 314 | my $r2 = charscript('Latin');
|
|---|
| 315 | is(@$r2, $n1, "modifying results should not mess up internal caches");
|
|---|
| 316 | }
|
|---|
| 317 |
|
|---|
| 318 | {
|
|---|
| 319 | is(charinfo(0xdeadbeef), undef, "[perl #23273] warnings in Unicode::UCD");
|
|---|
| 320 | }
|
|---|
| 321 |
|
|---|
| 322 | use Unicode::UCD qw(namedseq);
|
|---|
| 323 |
|
|---|
| 324 | is(namedseq("KATAKANA LETTER AINU P"), "\x{31F7}\x{309A}", "namedseq");
|
|---|
| 325 | is(namedseq("KATAKANA LETTER AINU Q"), undef);
|
|---|
| 326 | is(namedseq(), undef);
|
|---|
| 327 | is(namedseq(qw(foo bar)), undef);
|
|---|
| 328 | my @ns = namedseq("KATAKANA LETTER AINU P");
|
|---|
| 329 | is(scalar @ns, 2);
|
|---|
| 330 | is($ns[0], 0x31F7);
|
|---|
| 331 | is($ns[1], 0x309A);
|
|---|
| 332 | my %ns = namedseq();
|
|---|
| 333 | is($ns{"KATAKANA LETTER AINU P"}, "\x{31F7}\x{309A}");
|
|---|
| 334 | @ns = namedseq(42);
|
|---|
| 335 | is(@ns, 0);
|
|---|
| 336 |
|
|---|