| 1 | BEGIN {
|
|---|
| 2 | require Config; import Config;
|
|---|
| 3 | if ($Config{'extensions'} !~ /\bEncode\b/) {
|
|---|
| 4 | print "1..0 # Skip: Encode was not built\n";
|
|---|
| 5 | exit 0;
|
|---|
| 6 | }
|
|---|
| 7 | unless (find PerlIO::Layer 'perlio') {
|
|---|
| 8 | print "1..0 # Skip: PerlIO was not built\n";
|
|---|
| 9 | exit 0;
|
|---|
| 10 | }
|
|---|
| 11 | if (ord("A") == 193) {
|
|---|
| 12 | print "1..0 # encoding pragma does not support EBCDIC platforms\n";
|
|---|
| 13 | exit(0);
|
|---|
| 14 | }
|
|---|
| 15 | }
|
|---|
| 16 |
|
|---|
| 17 | print "1..31\n";
|
|---|
| 18 |
|
|---|
| 19 | use encoding "latin1"; # ignored (overwritten by the next line)
|
|---|
| 20 | use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...)
|
|---|
| 21 |
|
|---|
| 22 | # "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
|
|---|
| 23 | # \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
|
|---|
| 24 | # instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)
|
|---|
| 25 |
|
|---|
| 26 | $a = "\xDF";
|
|---|
| 27 | $b = "\x{100}";
|
|---|
| 28 |
|
|---|
| 29 | print "not " unless ord($a) == 0x3af;
|
|---|
| 30 | print "ok 1\n";
|
|---|
| 31 |
|
|---|
| 32 | print "not " unless ord($b) == 0x100;
|
|---|
| 33 | print "ok 2\n";
|
|---|
| 34 |
|
|---|
| 35 | my $c;
|
|---|
| 36 |
|
|---|
| 37 | $c = $a . $b;
|
|---|
| 38 |
|
|---|
| 39 | print "not " unless ord($c) == 0x3af;
|
|---|
| 40 | print "ok 3\n";
|
|---|
| 41 |
|
|---|
| 42 | print "not " unless length($c) == 2;
|
|---|
| 43 | print "ok 4\n";
|
|---|
| 44 |
|
|---|
| 45 | print "not " unless ord(substr($c, 1, 1)) == 0x100;
|
|---|
| 46 | print "ok 5\n";
|
|---|
| 47 |
|
|---|
| 48 | print "not " unless ord(chr(0xdf)) == 0x3af; # spooky
|
|---|
| 49 | print "ok 6\n";
|
|---|
| 50 |
|
|---|
| 51 | print "not " unless ord(pack("C", 0xdf)) == 0x3af;
|
|---|
| 52 | print "ok 7\n";
|
|---|
| 53 |
|
|---|
| 54 | # we didn't break pack/unpack, I hope
|
|---|
| 55 |
|
|---|
| 56 | print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf;
|
|---|
| 57 | print "ok 8\n";
|
|---|
| 58 |
|
|---|
| 59 | # the first octet of UTF-8 encoded 0x3af
|
|---|
| 60 | print "not " unless unpack("C", chr(0xdf)) == 0xce;
|
|---|
| 61 | print "ok 9\n";
|
|---|
| 62 |
|
|---|
| 63 | print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf;
|
|---|
| 64 | print "ok 10\n";
|
|---|
| 65 |
|
|---|
| 66 | print "not " unless unpack("U", chr(0xdf)) == 0x3af;
|
|---|
| 67 | print "ok 11\n";
|
|---|
| 68 |
|
|---|
| 69 | # charnames must still work
|
|---|
| 70 | use charnames ':full';
|
|---|
| 71 | print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf;
|
|---|
| 72 | print "ok 12\n";
|
|---|
| 73 |
|
|---|
| 74 | # combine
|
|---|
| 75 |
|
|---|
| 76 | $c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf);
|
|---|
| 77 |
|
|---|
| 78 | print "not " unless ord($c) == 0x3af;
|
|---|
| 79 | print "ok 13\n";
|
|---|
| 80 |
|
|---|
| 81 | print "not " unless ord(substr($c, 1, 1)) == 0xdf;
|
|---|
| 82 | print "ok 14\n";
|
|---|
| 83 |
|
|---|
| 84 | print "not " unless ord(substr($c, 2, 1)) == 0x3af;
|
|---|
| 85 | print "ok 15\n";
|
|---|
| 86 |
|
|---|
| 87 | # regex literals
|
|---|
| 88 |
|
|---|
| 89 | print "not " unless "\xDF" =~ /\x{3AF}/;
|
|---|
| 90 | print "ok 16\n";
|
|---|
| 91 |
|
|---|
| 92 | print "not " unless "\x{3AF}" =~ /\xDF/;
|
|---|
| 93 | print "ok 17\n";
|
|---|
| 94 |
|
|---|
| 95 | print "not " unless "\xDF" =~ /\xDF/;
|
|---|
| 96 | print "ok 18\n";
|
|---|
| 97 |
|
|---|
| 98 | print "not " unless "\x{3AF}" =~ /\x{3AF}/;
|
|---|
| 99 | print "ok 19\n";
|
|---|
| 100 |
|
|---|
| 101 | # eq, cmp
|
|---|
| 102 |
|
|---|
| 103 | my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = (
|
|---|
| 104 | pack("C*", 0xDF ), # byte
|
|---|
| 105 | pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0
|
|---|
| 106 | pack("U*", 0x3AF), # $U eq $byte
|
|---|
| 107 | pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding
|
|---|
| 108 | pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1)
|
|---|
| 109 | pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0;
|
|---|
| 110 | pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb)
|
|---|
| 111 | );
|
|---|
| 112 |
|
|---|
| 113 | # all the tests in this section that compare a byte encoded string
|
|---|
| 114 | # ato UTF-8 encoded are run in all possible vairants
|
|---|
| 115 | # all of the eq, ne, cmp operations tested,
|
|---|
| 116 | # $v z $u tested as well as $u z $v
|
|---|
| 117 |
|
|---|
| 118 | sub alleq($$){
|
|---|
| 119 | my ($a,$b) = (shift, shift);
|
|---|
| 120 | $a eq $b && $b eq $a &&
|
|---|
| 121 | !( $a ne $b ) && !( $b ne $a ) &&
|
|---|
| 122 | ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0;
|
|---|
| 123 | }
|
|---|
| 124 |
|
|---|
| 125 | sub anyeq($$){
|
|---|
| 126 | my ($a,$b) = (shift, shift);
|
|---|
| 127 | $a eq $b || $b eq $a ||
|
|---|
| 128 | !( $a ne $b ) || !( $b ne $a ) ||
|
|---|
| 129 | ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0;
|
|---|
| 130 | }
|
|---|
| 131 |
|
|---|
| 132 | sub allgt($$){
|
|---|
| 133 | my ($a,$b) = (shift, shift);
|
|---|
| 134 | ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1;
|
|---|
| 135 | }
|
|---|
| 136 | #match the correct UTF-8 string
|
|---|
| 137 | print "not " unless alleq($byte, $U);
|
|---|
| 138 | print "ok 20\n";
|
|---|
| 139 |
|
|---|
| 140 | #do not match a wrong UTF-8 string
|
|---|
| 141 | print "not " if anyeq($byte, $Ub);
|
|---|
| 142 | print "ok 21\n";
|
|---|
| 143 |
|
|---|
| 144 | #string ordering
|
|---|
| 145 | print "not " unless allgt ( $g1, $byte ) &&
|
|---|
| 146 | allgt ( $g2, $byte ) &&
|
|---|
| 147 | allgt ( $byte, $l ) &&
|
|---|
| 148 | allgt ( $bytes, $U );
|
|---|
| 149 | print "ok 22\n";
|
|---|
| 150 |
|
|---|
| 151 | # upgrade, downgrade
|
|---|
| 152 |
|
|---|
| 153 | my ($u,$v,$v2);
|
|---|
| 154 | $u = $v = $v2 = pack("C*", 0xDF);
|
|---|
| 155 | utf8::upgrade($v); #explicit upgrade
|
|---|
| 156 | $v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade
|
|---|
| 157 |
|
|---|
| 158 | # implicit upgrade === explicit upgrade
|
|---|
| 159 | print "not " if do{{use bytes; $v ne $v2}} || $v ne $v2;
|
|---|
| 160 | print "ok 23\n";
|
|---|
| 161 |
|
|---|
| 162 | # utf8::upgrade is transparent and does not break equality
|
|---|
| 163 | print "not " unless alleq( $u, $v );
|
|---|
| 164 | print "ok 24\n";
|
|---|
| 165 |
|
|---|
| 166 | $u = $v = pack("C*", 0xDF);
|
|---|
| 167 | utf8::upgrade($v);
|
|---|
| 168 | #test for a roundtrip, we should get back from where we left
|
|---|
| 169 | eval {utf8::downgrade( $v )};
|
|---|
| 170 | print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v;
|
|---|
| 171 | print "ok 25\n";
|
|---|
| 172 |
|
|---|
| 173 | # some more eq, cmp
|
|---|
| 174 |
|
|---|
| 175 | $byte=pack("C*", 0xDF);
|
|---|
| 176 |
|
|---|
| 177 | print "not " unless pack("U*", 0x3AF) eq $byte;
|
|---|
| 178 | print "ok 26\n";
|
|---|
| 179 |
|
|---|
| 180 | print "not " if chr(0xDF) cmp $byte;
|
|---|
| 181 | print "ok 27\n";
|
|---|
| 182 |
|
|---|
| 183 | print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) &&
|
|---|
| 184 | ((pack("U*", 0x3AE) cmp $byte) == -1) &&
|
|---|
| 185 | ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) &&
|
|---|
| 186 | ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
|
|---|
| 187 | print "ok 28\n";
|
|---|
| 188 |
|
|---|
| 189 |
|
|---|
| 190 | {
|
|---|
| 191 | # Used to core dump in 5.7.3
|
|---|
| 192 | no warnings; # so test goes noiselessly
|
|---|
| 193 | print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n";
|
|---|
| 194 | }
|
|---|
| 195 |
|
|---|
| 196 | {
|
|---|
| 197 | my %h1;
|
|---|
| 198 | my %h2;
|
|---|
| 199 | $h1{"\xdf"} = 41;
|
|---|
| 200 | $h2{"\x{3af}"} = 42;
|
|---|
| 201 | print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n";
|
|---|
| 202 | print $h2{"\xdf"} == 42 ? "ok 31\n" : "not ok 31\n";
|
|---|
| 203 | }
|
|---|