| 1 | BEGIN {
|
|---|
| 2 | if ($ENV{'PERL_CORE'}){
|
|---|
| 3 | chdir 't';
|
|---|
| 4 | unshift @INC, '../lib';
|
|---|
| 5 | }
|
|---|
| 6 | require Config; import Config;
|
|---|
| 7 | if ($Config{'extensions'} !~ /\bEncode\b/) {
|
|---|
| 8 | print "1..0 # Skip: Encode was not built\n";
|
|---|
| 9 | exit 0;
|
|---|
| 10 | }
|
|---|
| 11 | if (ord("A") == 193) {
|
|---|
| 12 | print "1..0 # Skip: EBCDIC\n";
|
|---|
| 13 | exit 0;
|
|---|
| 14 | }
|
|---|
| 15 | # should work w/o PerlIO now!
|
|---|
| 16 | # unless (PerlIO::Layer->find('perlio')){
|
|---|
| 17 | # print "1..0 # Skip: PerlIO required\n";
|
|---|
| 18 | # exit 0;
|
|---|
| 19 | # }
|
|---|
| 20 | $| = 1;
|
|---|
| 21 | }
|
|---|
| 22 | use strict;
|
|---|
| 23 | use Test::More tests => 60;
|
|---|
| 24 | use Encode;
|
|---|
| 25 | use File::Basename;
|
|---|
| 26 | use File::Spec;
|
|---|
| 27 | use File::Compare qw(compare_text);
|
|---|
| 28 | our $DEBUG = shift || 0;
|
|---|
| 29 |
|
|---|
| 30 | my %Charset =
|
|---|
| 31 | (
|
|---|
| 32 | 'big5-eten' => [qw(big5-eten)],
|
|---|
| 33 | 'big5-hkscs' => [qw(big5-hkscs)],
|
|---|
| 34 | gb2312 => [qw(euc-cn hz)],
|
|---|
| 35 | jisx0201 => [qw(euc-jp shiftjis 7bit-jis)],
|
|---|
| 36 | jisx0208 => [qw(euc-jp shiftjis 7bit-jis iso-2022-jp iso-2022-jp-1)],
|
|---|
| 37 | jisx0212 => [qw(euc-jp 7bit-jis iso-2022-jp-1)],
|
|---|
| 38 | ksc5601 => [qw(euc-kr iso-2022-kr johab)],
|
|---|
| 39 | );
|
|---|
| 40 |
|
|---|
| 41 |
|
|---|
| 42 | my $dir = dirname(__FILE__);
|
|---|
| 43 | my $seq = 1;
|
|---|
| 44 |
|
|---|
| 45 | for my $charset (sort keys %Charset){
|
|---|
| 46 | my ($src, $uni, $dst, $txt);
|
|---|
| 47 |
|
|---|
| 48 | my $transcoder = find_encoding($Charset{$charset}[0]) or die;
|
|---|
| 49 |
|
|---|
| 50 | my $src_enc = File::Spec->catfile($dir,"$charset.enc");
|
|---|
| 51 | my $src_utf = File::Spec->catfile($dir,"$charset.utf");
|
|---|
| 52 | my $dst_enc = File::Spec->catfile($dir,"$$.enc");
|
|---|
| 53 | my $dst_utf = File::Spec->catfile($dir,"$$.utf");
|
|---|
| 54 |
|
|---|
| 55 | open $src, "<$src_enc" or die "$src_enc : $!";
|
|---|
| 56 |
|
|---|
| 57 | if (PerlIO::Layer->find('perlio')){
|
|---|
| 58 | binmode($src, ":bytes"); # needed when :utf8 in default open layer
|
|---|
| 59 | }
|
|---|
| 60 |
|
|---|
| 61 | $txt = join('',<$src>);
|
|---|
| 62 | close($src);
|
|---|
| 63 |
|
|---|
| 64 | eval{ $uni = $transcoder->decode($txt, 1) };
|
|---|
| 65 | $@ and print $@;
|
|---|
| 66 | ok(defined($uni), "decode $charset"); $seq++;
|
|---|
| 67 | is(length($txt),0, "decode $charset completely"); $seq++;
|
|---|
| 68 |
|
|---|
| 69 | open $dst, ">$dst_utf" or die "$dst_utf : $!";
|
|---|
| 70 | if (PerlIO::Layer->find('perlio')){
|
|---|
| 71 | binmode($dst, ":utf8");
|
|---|
| 72 | print $dst $uni;
|
|---|
| 73 | }else{ # ugh!
|
|---|
| 74 | binmode($dst);
|
|---|
| 75 | my $raw = $uni; Encode::_utf8_off($raw);
|
|---|
| 76 | print $dst $raw;
|
|---|
| 77 | }
|
|---|
| 78 |
|
|---|
| 79 | close($dst);
|
|---|
| 80 | is(compare_text($dst_utf, $src_utf), 0, "$dst_utf eq $src_utf")
|
|---|
| 81 | or ($DEBUG and rename $dst_utf, "$dst_utf.$seq");
|
|---|
| 82 | $seq++;
|
|---|
| 83 |
|
|---|
| 84 | open $src, "<$src_utf" or die "$src_utf : $!";
|
|---|
| 85 | if (PerlIO::Layer->find('perlio')){
|
|---|
| 86 | binmode($src, ":utf8");
|
|---|
| 87 | $uni = join('', <$src>);
|
|---|
| 88 | }else{ # ugh!
|
|---|
| 89 | binmode($src);
|
|---|
| 90 | $uni = join('', <$src>);
|
|---|
| 91 | Encode::_utf8_on($uni);
|
|---|
| 92 | }
|
|---|
| 93 | close $src;
|
|---|
| 94 |
|
|---|
| 95 | my $unisave = $uni;
|
|---|
| 96 | eval{ $txt = $transcoder->encode($uni,1) };
|
|---|
| 97 | $@ and print $@;
|
|---|
| 98 | ok(defined($txt), "encode $charset"); $seq++;
|
|---|
| 99 | is(length($uni), 0, "encode $charset completely"); $seq++;
|
|---|
| 100 | $uni = $unisave;
|
|---|
| 101 |
|
|---|
| 102 | open $dst,">$dst_enc" or die "$dst_utf : $!";
|
|---|
| 103 | binmode($dst);
|
|---|
| 104 | print $dst $txt;
|
|---|
| 105 | close($dst);
|
|---|
| 106 | is(compare_text($src_enc, $dst_enc), 0 => "$dst_enc eq $src_enc")
|
|---|
| 107 | or ($DEBUG and rename $dst_enc, "$dst_enc.$seq");
|
|---|
| 108 | $seq++;
|
|---|
| 109 |
|
|---|
| 110 | unlink($dst_utf, $dst_enc);
|
|---|
| 111 |
|
|---|
| 112 | for my $encoding (@{$Charset{$charset}}){
|
|---|
| 113 | my $rt = decode($encoding, encode($encoding, $uni));
|
|---|
| 114 | is ($rt, $uni, "RT $encoding");
|
|---|
| 115 | }
|
|---|
| 116 | }
|
|---|