| 1 | #!./perl
|
|---|
| 2 | # $Id: piconv,v 2.1 2004/10/06 05:07:20 dankogai Exp $
|
|---|
| 3 | #
|
|---|
| 4 | use 5.8.0;
|
|---|
| 5 | use strict;
|
|---|
| 6 | use Encode ;
|
|---|
| 7 | use Encode::Alias;
|
|---|
| 8 | my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
|
|---|
| 9 |
|
|---|
| 10 | use File::Basename;
|
|---|
| 11 | my $name = basename($0);
|
|---|
| 12 |
|
|---|
| 13 | use Getopt::Long qw(:config no_ignore_case);
|
|---|
| 14 |
|
|---|
| 15 | my %Opt;
|
|---|
| 16 |
|
|---|
| 17 | help()
|
|---|
| 18 | unless
|
|---|
| 19 | GetOptions(\%Opt,
|
|---|
| 20 | 'from|f=s',
|
|---|
| 21 | 'to|t=s',
|
|---|
| 22 | 'list|l',
|
|---|
| 23 | 'string|s=s',
|
|---|
| 24 | 'check|C=i',
|
|---|
| 25 | 'c',
|
|---|
| 26 | 'perlqq|p',
|
|---|
| 27 | 'debug|D',
|
|---|
| 28 | 'scheme|S=s',
|
|---|
| 29 | 'resolve|r=s',
|
|---|
| 30 | 'help',
|
|---|
| 31 | );
|
|---|
| 32 |
|
|---|
| 33 | $Opt{help} and help();
|
|---|
| 34 | $Opt{list} and list_encodings();
|
|---|
| 35 | my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
|
|---|
| 36 | defined $Opt{resolve} and resolve_encoding($Opt{resolve});
|
|---|
| 37 | $Opt{from} || $Opt{to} || help();
|
|---|
| 38 | my $from = $Opt{from} || $locale or help("from_encoding unspecified");
|
|---|
| 39 | my $to = $Opt{to} || $locale or help("to_encoding unspecified");
|
|---|
| 40 | $Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
|
|---|
| 41 | my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to';
|
|---|
| 42 | $Opt{check} ||= $Opt{c};
|
|---|
| 43 | $Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ;
|
|---|
| 44 |
|
|---|
| 45 | if ($Opt{debug}){
|
|---|
| 46 | my $cfrom = Encode->getEncoding($from)->name;
|
|---|
| 47 | my $cto = Encode->getEncoding($to)->name;
|
|---|
| 48 | print <<"EOT";
|
|---|
| 49 | Scheme: $scheme
|
|---|
| 50 | From: $from => $cfrom
|
|---|
| 51 | To: $to => $cto
|
|---|
| 52 | EOT
|
|---|
| 53 | }
|
|---|
| 54 |
|
|---|
| 55 | # we do not use <> (or ARGV) for the sake of binmode()
|
|---|
| 56 | @ARGV or push @ARGV, \*STDIN;
|
|---|
| 57 |
|
|---|
| 58 | unless ($scheme eq 'perlio'){
|
|---|
| 59 | binmode STDOUT;
|
|---|
| 60 | for my $argv (@ARGV){
|
|---|
| 61 | my $ifh = ref $argv ? $argv : undef;
|
|---|
| 62 | $ifh or open $ifh, "<", $argv or next;
|
|---|
| 63 | binmode $ifh;
|
|---|
| 64 | if ($scheme eq 'from_to'){ # default
|
|---|
| 65 | while(<$ifh>){
|
|---|
| 66 | Encode::from_to($_, $from, $to, $Opt{check});
|
|---|
| 67 | print;
|
|---|
| 68 | }
|
|---|
| 69 | }elsif ($scheme eq 'decode_encode'){ # step-by-step
|
|---|
| 70 | while(<$ifh>){
|
|---|
| 71 | my $decoded = decode($from, $_, $Opt{check});
|
|---|
| 72 | my $encoded = encode($to, $decoded);
|
|---|
| 73 | print $encoded;
|
|---|
| 74 | }
|
|---|
| 75 | } else { # won't reach
|
|---|
| 76 | die "$name: unknown scheme: $scheme";
|
|---|
| 77 | }
|
|---|
| 78 | }
|
|---|
| 79 | }else{
|
|---|
| 80 | # NI-S favorite
|
|---|
| 81 | binmode STDOUT => "raw:encoding($to)";
|
|---|
| 82 | for my $argv (@ARGV){
|
|---|
| 83 | my $ifh = ref $argv ? $argv : undef;
|
|---|
| 84 | $ifh or open $ifh, "<", $argv or next;
|
|---|
| 85 | binmode $ifh => "raw:encoding($from)";
|
|---|
| 86 | print while(<$ifh>);
|
|---|
| 87 | }
|
|---|
| 88 | }
|
|---|
| 89 |
|
|---|
| 90 | sub list_encodings{
|
|---|
| 91 | print join("\n", Encode->encodings(":all")), "\n";
|
|---|
| 92 | exit 0;
|
|---|
| 93 | }
|
|---|
| 94 |
|
|---|
| 95 | sub resolve_encoding {
|
|---|
| 96 | if (my $alias = Encode::resolve_alias($_[0])) {
|
|---|
| 97 | print $alias, "\n";
|
|---|
| 98 | exit 0;
|
|---|
| 99 | } else {
|
|---|
| 100 | warn "$name: $_[0] is not known to Encode\n";
|
|---|
| 101 | exit 1;
|
|---|
| 102 | }
|
|---|
| 103 | }
|
|---|
| 104 |
|
|---|
| 105 | sub help{
|
|---|
| 106 | my $message = shift;
|
|---|
| 107 | $message and print STDERR "$name error: $message\n";
|
|---|
| 108 | print STDERR <<"EOT";
|
|---|
| 109 | $name [-f from_encoding] [-t to_encoding] [-s string] [files...]
|
|---|
| 110 | $name -l
|
|---|
| 111 | $name -r encoding_alias
|
|---|
| 112 | -l,--list
|
|---|
| 113 | lists all available encodings
|
|---|
| 114 | -r,--resolve encoding_alias
|
|---|
| 115 | resolve encoding to its (Encode) canonical name
|
|---|
| 116 | -f,--from from_encoding
|
|---|
| 117 | when omitted, the current locale will be used
|
|---|
| 118 | -t,--to to_encoding
|
|---|
| 119 | when omitted, the current locale will be used
|
|---|
| 120 | -s,--string string
|
|---|
| 121 | "string" will be the input instead of STDIN or files
|
|---|
| 122 | The following are mainly of interest to Encode hackers:
|
|---|
| 123 | -D,--debug show debug information
|
|---|
| 124 | -C N | -c | -p check the validity of the input
|
|---|
| 125 | -S,--scheme scheme use the scheme for conversion
|
|---|
| 126 | EOT
|
|---|
| 127 | exit;
|
|---|
| 128 | }
|
|---|
| 129 |
|
|---|
| 130 | __END__
|
|---|
| 131 |
|
|---|
| 132 | =head1 NAME
|
|---|
| 133 |
|
|---|
| 134 | piconv -- iconv(1), reinvented in perl
|
|---|
| 135 |
|
|---|
| 136 | =head1 SYNOPSIS
|
|---|
| 137 |
|
|---|
| 138 | piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
|
|---|
| 139 | piconv -l
|
|---|
| 140 | piconv [-C N|-c|-p]
|
|---|
| 141 | piconv -S scheme ...
|
|---|
| 142 | piconv -r encoding
|
|---|
| 143 | piconv -D ...
|
|---|
| 144 | piconv -h
|
|---|
| 145 |
|
|---|
| 146 | =head1 DESCRIPTION
|
|---|
| 147 |
|
|---|
| 148 | B<piconv> is perl version of B<iconv>, a character encoding converter
|
|---|
| 149 | widely available for various Unixen today. This script was primarily
|
|---|
| 150 | a technology demonstrator for Perl 5.8.0, but you can use piconv in the
|
|---|
| 151 | place of iconv for virtually any case.
|
|---|
| 152 |
|
|---|
| 153 | piconv converts the character encoding of either STDIN or files
|
|---|
| 154 | specified in the argument and prints out to STDOUT.
|
|---|
| 155 |
|
|---|
| 156 | Here is the list of options. Each option can be in short format (-f)
|
|---|
| 157 | or long (--from).
|
|---|
| 158 |
|
|---|
| 159 | =over 4
|
|---|
| 160 |
|
|---|
| 161 | =item -f,--from from_encoding
|
|---|
| 162 |
|
|---|
| 163 | Specifies the encoding you are converting from. Unlike B<iconv>,
|
|---|
| 164 | this option can be omitted. In such cases, the current locale is used.
|
|---|
| 165 |
|
|---|
| 166 | =item -t,--to to_encoding
|
|---|
| 167 |
|
|---|
| 168 | Specifies the encoding you are converting to. Unlike B<iconv>,
|
|---|
| 169 | this option can be omitted. In such cases, the current locale is used.
|
|---|
| 170 |
|
|---|
| 171 | Therefore, when both -f and -t are omitted, B<piconv> just acts
|
|---|
| 172 | like B<cat>.
|
|---|
| 173 |
|
|---|
| 174 | =item -s,--string I<string>
|
|---|
| 175 |
|
|---|
| 176 | uses I<string> instead of file for the source of text.
|
|---|
| 177 |
|
|---|
| 178 | =item -l,--list
|
|---|
| 179 |
|
|---|
| 180 | Lists all available encodings, one per line, in case-insensitive
|
|---|
| 181 | order. Note that only the canonical names are listed; many aliases
|
|---|
| 182 | exist. For example, the names are case-insensitive, and many standard
|
|---|
| 183 | and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
|
|---|
| 184 | instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
|
|---|
| 185 | for a full discussion.
|
|---|
| 186 |
|
|---|
| 187 | =item -C,--check I<N>
|
|---|
| 188 |
|
|---|
| 189 | Check the validity of the stream if I<N> = 1. When I<N> = -1, something
|
|---|
| 190 | interesting happens when it encounters an invalid character.
|
|---|
| 191 |
|
|---|
| 192 | =item -c
|
|---|
| 193 |
|
|---|
| 194 | Same as C<-C 1>.
|
|---|
| 195 |
|
|---|
| 196 | =item -p,--perlqq
|
|---|
| 197 |
|
|---|
| 198 | Same as C<-C -1>.
|
|---|
| 199 |
|
|---|
| 200 | =item -h,--help
|
|---|
| 201 |
|
|---|
| 202 | Show usage.
|
|---|
| 203 |
|
|---|
| 204 | =item -D,--debug
|
|---|
| 205 |
|
|---|
| 206 | Invokes debugging mode. Primarily for Encode hackers.
|
|---|
| 207 |
|
|---|
| 208 | =item -S,--scheme scheme
|
|---|
| 209 |
|
|---|
| 210 | Selects which scheme is to be used for conversion. Available schemes
|
|---|
| 211 | are as follows:
|
|---|
| 212 |
|
|---|
| 213 | =over 4
|
|---|
| 214 |
|
|---|
| 215 | =item from_to
|
|---|
| 216 |
|
|---|
| 217 | Uses Encode::from_to for conversion. This is the default.
|
|---|
| 218 |
|
|---|
| 219 | =item decode_encode
|
|---|
| 220 |
|
|---|
| 221 | Input strings are decode()d then encode()d. A straight two-step
|
|---|
| 222 | implementation.
|
|---|
| 223 |
|
|---|
| 224 | =item perlio
|
|---|
| 225 |
|
|---|
| 226 | The new perlIO layer is used. NI-S' favorite.
|
|---|
| 227 |
|
|---|
| 228 | =back
|
|---|
| 229 |
|
|---|
| 230 | Like the I<-D> option, this is also for Encode hackers.
|
|---|
| 231 |
|
|---|
| 232 | =back
|
|---|
| 233 |
|
|---|
| 234 | =head1 SEE ALSO
|
|---|
| 235 |
|
|---|
| 236 | L<iconv/1>
|
|---|
| 237 | L<locale/3>
|
|---|
| 238 | L<Encode>
|
|---|
| 239 | L<Encode::Supported>
|
|---|
| 240 | L<Encode::Alias>
|
|---|
| 241 | L<PerlIO>
|
|---|
| 242 |
|
|---|
| 243 | =cut
|
|---|