source: trunk/essentials/dev-lang/perl/ext/Encode/bin/piconv@ 3221

Last change on this file since 3221 was 3181, checked in by bird, 19 years ago

perl 5.8.8

File size: 5.6 KB
Line 
1#!./perl
2# $Id: piconv,v 2.1 2004/10/06 05:07:20 dankogai Exp $
3#
4use 5.8.0;
5use strict;
6use Encode ;
7use Encode::Alias;
8my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
9
10use File::Basename;
11my $name = basename($0);
12
13use Getopt::Long qw(:config no_ignore_case);
14
15my %Opt;
16
17help()
18 unless
19 GetOptions(\%Opt,
20 'from|f=s',
21 'to|t=s',
22 'list|l',
23 'string|s=s',
24 'check|C=i',
25 'c',
26 'perlqq|p',
27 'debug|D',
28 'scheme|S=s',
29 'resolve|r=s',
30 'help',
31 );
32
33$Opt{help} and help();
34$Opt{list} and list_encodings();
35my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
36defined $Opt{resolve} and resolve_encoding($Opt{resolve});
37$Opt{from} || $Opt{to} || help();
38my $from = $Opt{from} || $locale or help("from_encoding unspecified");
39my $to = $Opt{to} || $locale or help("to_encoding unspecified");
40$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
41my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to';
42$Opt{check} ||= $Opt{c};
43$Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ;
44
45if ($Opt{debug}){
46 my $cfrom = Encode->getEncoding($from)->name;
47 my $cto = Encode->getEncoding($to)->name;
48 print <<"EOT";
49Scheme: $scheme
50From: $from => $cfrom
51To: $to => $cto
52EOT
53}
54
55# we do not use <> (or ARGV) for the sake of binmode()
56@ARGV or push @ARGV, \*STDIN;
57
58unless ($scheme eq 'perlio'){
59 binmode STDOUT;
60 for my $argv (@ARGV){
61 my $ifh = ref $argv ? $argv : undef;
62 $ifh or open $ifh, "<", $argv or next;
63 binmode $ifh;
64 if ($scheme eq 'from_to'){ # default
65 while(<$ifh>){
66 Encode::from_to($_, $from, $to, $Opt{check});
67 print;
68 }
69 }elsif ($scheme eq 'decode_encode'){ # step-by-step
70 while(<$ifh>){
71 my $decoded = decode($from, $_, $Opt{check});
72 my $encoded = encode($to, $decoded);
73 print $encoded;
74 }
75 } else { # won't reach
76 die "$name: unknown scheme: $scheme";
77 }
78 }
79}else{
80 # NI-S favorite
81 binmode STDOUT => "raw:encoding($to)";
82 for my $argv (@ARGV){
83 my $ifh = ref $argv ? $argv : undef;
84 $ifh or open $ifh, "<", $argv or next;
85 binmode $ifh => "raw:encoding($from)";
86 print while(<$ifh>);
87 }
88}
89
90sub list_encodings{
91 print join("\n", Encode->encodings(":all")), "\n";
92 exit 0;
93}
94
95sub resolve_encoding {
96 if (my $alias = Encode::resolve_alias($_[0])) {
97 print $alias, "\n";
98 exit 0;
99 } else {
100 warn "$name: $_[0] is not known to Encode\n";
101 exit 1;
102 }
103}
104
105sub help{
106 my $message = shift;
107 $message and print STDERR "$name error: $message\n";
108 print STDERR <<"EOT";
109$name [-f from_encoding] [-t to_encoding] [-s string] [files...]
110$name -l
111$name -r encoding_alias
112 -l,--list
113 lists all available encodings
114 -r,--resolve encoding_alias
115 resolve encoding to its (Encode) canonical name
116 -f,--from from_encoding
117 when omitted, the current locale will be used
118 -t,--to to_encoding
119 when omitted, the current locale will be used
120 -s,--string string
121 "string" will be the input instead of STDIN or files
122The following are mainly of interest to Encode hackers:
123 -D,--debug show debug information
124 -C N | -c | -p check the validity of the input
125 -S,--scheme scheme use the scheme for conversion
126EOT
127 exit;
128}
129
130__END__
131
132=head1 NAME
133
134piconv -- iconv(1), reinvented in perl
135
136=head1 SYNOPSIS
137
138 piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
139 piconv -l
140 piconv [-C N|-c|-p]
141 piconv -S scheme ...
142 piconv -r encoding
143 piconv -D ...
144 piconv -h
145
146=head1 DESCRIPTION
147
148B<piconv> is perl version of B<iconv>, a character encoding converter
149widely available for various Unixen today. This script was primarily
150a technology demonstrator for Perl 5.8.0, but you can use piconv in the
151place of iconv for virtually any case.
152
153piconv converts the character encoding of either STDIN or files
154specified in the argument and prints out to STDOUT.
155
156Here is the list of options. Each option can be in short format (-f)
157or long (--from).
158
159=over 4
160
161=item -f,--from from_encoding
162
163Specifies the encoding you are converting from. Unlike B<iconv>,
164this option can be omitted. In such cases, the current locale is used.
165
166=item -t,--to to_encoding
167
168Specifies the encoding you are converting to. Unlike B<iconv>,
169this option can be omitted. In such cases, the current locale is used.
170
171Therefore, when both -f and -t are omitted, B<piconv> just acts
172like B<cat>.
173
174=item -s,--string I<string>
175
176uses I<string> instead of file for the source of text.
177
178=item -l,--list
179
180Lists all available encodings, one per line, in case-insensitive
181order. Note that only the canonical names are listed; many aliases
182exist. For example, the names are case-insensitive, and many standard
183and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
184instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
185for a full discussion.
186
187=item -C,--check I<N>
188
189Check the validity of the stream if I<N> = 1. When I<N> = -1, something
190interesting happens when it encounters an invalid character.
191
192=item -c
193
194Same as C<-C 1>.
195
196=item -p,--perlqq
197
198Same as C<-C -1>.
199
200=item -h,--help
201
202Show usage.
203
204=item -D,--debug
205
206Invokes debugging mode. Primarily for Encode hackers.
207
208=item -S,--scheme scheme
209
210Selects which scheme is to be used for conversion. Available schemes
211are as follows:
212
213=over 4
214
215=item from_to
216
217Uses Encode::from_to for conversion. This is the default.
218
219=item decode_encode
220
221Input strings are decode()d then encode()d. A straight two-step
222implementation.
223
224=item perlio
225
226The new perlIO layer is used. NI-S' favorite.
227
228=back
229
230Like the I<-D> option, this is also for Encode hackers.
231
232=back
233
234=head1 SEE ALSO
235
236L<iconv/1>
237L<locale/3>
238L<Encode>
239L<Encode::Supported>
240L<Encode::Alias>
241L<PerlIO>
242
243=cut
Note: See TracBrowser for help on using the repository browser.