| 1 | #!./perl -w
|
|---|
| 2 |
|
|---|
| 3 | BEGIN {
|
|---|
| 4 | chdir 't' if -d 't';
|
|---|
| 5 | @INC = '../lib';
|
|---|
| 6 | no warnings; # Need global -w flag for later tests, but don't want this
|
|---|
| 7 | # to warn here:
|
|---|
| 8 | push @INC, "::lib:$MacPerl::Architecture:" if $^O eq 'MacOS';
|
|---|
| 9 | unless (find PerlIO::Layer 'perlio') {
|
|---|
| 10 | print "1..0 # Skip: not perlio\n";
|
|---|
| 11 | exit 0;
|
|---|
| 12 | }
|
|---|
| 13 | unless (eval { require Encode } ) {
|
|---|
| 14 | print "1..0 # Skip: not Encode\n";
|
|---|
| 15 | exit 0;
|
|---|
| 16 | }
|
|---|
| 17 | }
|
|---|
| 18 |
|
|---|
| 19 | print "1..15\n";
|
|---|
| 20 |
|
|---|
| 21 | my $grk = "grk$$";
|
|---|
| 22 | my $utf = "utf$$";
|
|---|
| 23 | my $fail1 = "fa$$";
|
|---|
| 24 | my $fail2 = "fb$$";
|
|---|
| 25 | my $russki = "koi8r$$";
|
|---|
| 26 | my $threebyte = "3byte$$";
|
|---|
| 27 |
|
|---|
| 28 | if (open(GRK, ">$grk")) {
|
|---|
| 29 | binmode(GRK, ":bytes");
|
|---|
| 30 | # alpha beta gamma in ISO 8859-7
|
|---|
| 31 | print GRK "\xe1\xe2\xe3";
|
|---|
| 32 | close GRK or die "Could not close: $!";
|
|---|
| 33 | }
|
|---|
| 34 |
|
|---|
| 35 | {
|
|---|
| 36 | open(my $i,'<:encoding(iso-8859-7)',$grk);
|
|---|
| 37 | print "ok 1\n";
|
|---|
| 38 | open(my $o,'>:utf8',$utf);
|
|---|
| 39 | print "ok 2\n";
|
|---|
| 40 | print $o readline($i);
|
|---|
| 41 | print "ok 3\n";
|
|---|
| 42 | close($o) or die "Could not close: $!";
|
|---|
| 43 | close($i);
|
|---|
| 44 | }
|
|---|
| 45 |
|
|---|
| 46 | if (open(UTF, "<$utf")) {
|
|---|
| 47 | binmode(UTF, ":bytes");
|
|---|
| 48 | if (ord('A') == 193) { # EBCDIC
|
|---|
| 49 | # alpha beta gamma in UTF-EBCDIC Unicode (0x3b1 0x3b2 0x3b3)
|
|---|
| 50 | print "not " unless <UTF> eq "\xb4\x58\xb4\x59\xb4\x62";
|
|---|
| 51 | } else {
|
|---|
| 52 | # alpha beta gamma in UTF-8 Unicode (0x3b1 0x3b2 0x3b3)
|
|---|
| 53 | print "not " unless <UTF> eq "\xce\xb1\xce\xb2\xce\xb3";
|
|---|
| 54 | }
|
|---|
| 55 | print "ok 4\n";
|
|---|
| 56 | close UTF;
|
|---|
| 57 | }
|
|---|
| 58 |
|
|---|
| 59 | {
|
|---|
| 60 | use Encode;
|
|---|
| 61 | open(my $i,'<:utf8',$utf);
|
|---|
| 62 | print "ok 5\n";
|
|---|
| 63 | open(my $o,'>:encoding(iso-8859-7)',$grk);
|
|---|
| 64 | print "ok 6\n";
|
|---|
| 65 | print $o readline($i);
|
|---|
| 66 | print "ok 7\n";
|
|---|
| 67 | close($o) or die "Could not close: $!";
|
|---|
| 68 | close($i);
|
|---|
| 69 | }
|
|---|
| 70 |
|
|---|
| 71 | if (open(GRK, "<$grk")) {
|
|---|
| 72 | binmode(GRK, ":bytes");
|
|---|
| 73 | print "not " unless <GRK> eq "\xe1\xe2\xe3";
|
|---|
| 74 | print "ok 8\n";
|
|---|
| 75 | close GRK;
|
|---|
| 76 | }
|
|---|
| 77 |
|
|---|
| 78 | $SIG{__WARN__} = sub {$warn .= $_[0]};
|
|---|
| 79 |
|
|---|
| 80 | if (open(FAIL, ">:encoding(NoneSuch)", $fail1)) {
|
|---|
| 81 | print "not ok 9 # Open should fail\n";
|
|---|
| 82 | } else {
|
|---|
| 83 | print "ok 9\n";
|
|---|
| 84 | }
|
|---|
| 85 | if (!defined $warn) {
|
|---|
| 86 | print "not ok 10 # warning is undef\n";
|
|---|
| 87 | } elsif ($warn =~ /^Cannot find encoding "NoneSuch" at/) {
|
|---|
| 88 | print "ok 10\n";
|
|---|
| 89 | } else {
|
|---|
| 90 | print "not ok 10 # warning is '$warn'";
|
|---|
| 91 | }
|
|---|
| 92 |
|
|---|
| 93 | if (open(RUSSKI, ">$russki")) {
|
|---|
| 94 | print RUSSKI "\x3c\x3f\x78";
|
|---|
| 95 | close RUSSKI or die "Could not close: $!";
|
|---|
| 96 | open(RUSSKI, "$russki");
|
|---|
| 97 | binmode(RUSSKI, ":raw");
|
|---|
| 98 | my $buf1;
|
|---|
| 99 | read(RUSSKI, $buf1, 1);
|
|---|
| 100 | # eof(RUSSKI);
|
|---|
| 101 | binmode(RUSSKI, ":encoding(koi8-r)");
|
|---|
| 102 | my $buf2;
|
|---|
| 103 | read(RUSSKI, $buf2, 1);
|
|---|
| 104 | my $offset = tell(RUSSKI);
|
|---|
| 105 | if (ord($buf1) == 0x3c &&
|
|---|
| 106 | ord($buf2) == (ord('A') == 193) ? 0x6f : 0x3f &&
|
|---|
| 107 | $offset == 2) {
|
|---|
| 108 | print "ok 11\n";
|
|---|
| 109 | } else {
|
|---|
| 110 | printf "not ok 11 # [%s] [%s] %d\n",
|
|---|
| 111 | join(" ", unpack("H*", $buf1)),
|
|---|
| 112 | join(" ", unpack("H*", $buf2)), $offset;
|
|---|
| 113 | }
|
|---|
| 114 | close(RUSSKI);
|
|---|
| 115 | } else {
|
|---|
| 116 | print "not ok 11 # open failed: $!\n";
|
|---|
| 117 | }
|
|---|
| 118 |
|
|---|
| 119 | undef $warn;
|
|---|
| 120 |
|
|---|
| 121 | # Check there is no Use of uninitialized value in concatenation (.) warning
|
|---|
| 122 | # due to the way @latin2iso_num was used to make aliases.
|
|---|
| 123 | if (open(FAIL, ">:encoding(latin42)", $fail2)) {
|
|---|
| 124 | print "not ok 12 # Open should fail\n";
|
|---|
| 125 | } else {
|
|---|
| 126 | print "ok 12\n";
|
|---|
| 127 | }
|
|---|
| 128 | if (!defined $warn) {
|
|---|
| 129 | print "not ok 13 # warning is undef\n";
|
|---|
| 130 | } elsif ($warn =~ /^Cannot find encoding "latin42" at.*line \d+\.$/) {
|
|---|
| 131 | print "ok 13\n";
|
|---|
| 132 | } else {
|
|---|
| 133 | print "not ok 13 # warning is: \n";
|
|---|
| 134 | $warn =~ s/^/# /mg;
|
|---|
| 135 | print "$warn";
|
|---|
| 136 | }
|
|---|
| 137 |
|
|---|
| 138 | # Create a string of chars that are 3 bytes in UTF-8
|
|---|
| 139 | my $str = "\x{1f80}" x 2048;
|
|---|
| 140 |
|
|---|
| 141 | # Write them to a file
|
|---|
| 142 | open(F,'>:utf8',$threebyte) || die "Cannot open $threebyte:$!";
|
|---|
| 143 | print F $str;
|
|---|
| 144 | close(F);
|
|---|
| 145 |
|
|---|
| 146 | # Read file back as UTF-8
|
|---|
| 147 | open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
|
|---|
| 148 | my $dstr = <F>;
|
|---|
| 149 | close(F);
|
|---|
| 150 | print "not " unless ($dstr eq $str);
|
|---|
| 151 | print "ok 14\n";
|
|---|
| 152 |
|
|---|
| 153 | # Try decoding some bad stuff
|
|---|
| 154 | open(F,'>:raw',$threebyte) || die "Cannot open $threebyte:$!";
|
|---|
| 155 | print F "foo\xF0\x80\x80\x80bar\n\x80foo\n";
|
|---|
| 156 | close(F);
|
|---|
| 157 |
|
|---|
| 158 | open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
|
|---|
| 159 | $dstr = join(":", <F>);
|
|---|
| 160 | close(F);
|
|---|
| 161 | print "not " unless $dstr eq "foo\\xF0\\x80\\x80\\x80bar\n:\\x80foo\n";
|
|---|
| 162 | print "ok 15\n";
|
|---|
| 163 |
|
|---|
| 164 | END {
|
|---|
| 165 | 1 while unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte);
|
|---|
| 166 | }
|
|---|