| 1 | #!./perl
|
|---|
| 2 |
|
|---|
| 3 | use strict;
|
|---|
| 4 | use Encode;
|
|---|
| 5 | use Getopt::Std;
|
|---|
| 6 | my %Opt; getopts("ChH:e:f:t:s:pPv", \%Opt);
|
|---|
| 7 | $Opt{p} ||= $Opt{P};
|
|---|
| 8 | $Opt{e} ||= 'utf8';
|
|---|
| 9 | $Opt{f} ||= $Opt{e};
|
|---|
| 10 | $Opt{t} ||= $Opt{e};
|
|---|
| 11 | $Opt{h} and help();
|
|---|
| 12 |
|
|---|
| 13 | my ($linebuf, $outbuf);
|
|---|
| 14 | my $CPL = $Opt{p} ? 64 : 8;
|
|---|
| 15 | my $linenum;
|
|---|
| 16 | my $linesperheading = $Opt{H};
|
|---|
| 17 | my $nchars;
|
|---|
| 18 | our $PrevChunk;
|
|---|
| 19 |
|
|---|
| 20 | $Opt{h} and help();
|
|---|
| 21 | $Opt{p} and do_perl($Opt{s});
|
|---|
| 22 | do_dump($Opt{s});
|
|---|
| 23 | exit;
|
|---|
| 24 |
|
|---|
| 25 | #
|
|---|
| 26 |
|
|---|
| 27 | sub do_perl{
|
|---|
| 28 | my $string = shift;
|
|---|
| 29 | $Opt{P} and print "#!$^X -w\nprint\n";
|
|---|
| 30 | unless ($string){
|
|---|
| 31 | while(<>){
|
|---|
| 32 | use utf8;
|
|---|
| 33 | $linebuf .= Encode::decode($Opt{f}, $_);
|
|---|
| 34 | while($linebuf){
|
|---|
| 35 | my $chr = render_p(substr($linebuf, 0, 1, ''));
|
|---|
| 36 | length($outbuf) + length($chr) > $CPL and print_P();
|
|---|
| 37 | $outbuf .= $chr;
|
|---|
| 38 | }
|
|---|
| 39 | }
|
|---|
| 40 | $outbuf and print print_P(";");
|
|---|
| 41 | }else{
|
|---|
| 42 | while($string){
|
|---|
| 43 | my $chr = render_p(substr($string, 0, 1, ''));
|
|---|
| 44 | length($outbuf) + length($chr) > $CPL and print_P();
|
|---|
| 45 | $outbuf .= $chr;
|
|---|
| 46 | }
|
|---|
| 47 | }
|
|---|
| 48 | $outbuf and print print_P(";");
|
|---|
| 49 | exit;
|
|---|
| 50 | }
|
|---|
| 51 |
|
|---|
| 52 | sub render_p{
|
|---|
| 53 | my ($chr, $format) = @_;
|
|---|
| 54 | our %S2pstr;
|
|---|
| 55 | $S2pstr{$chr} and return $S2pstr{$chr}; # \t\n...
|
|---|
| 56 | $chr =~ /[\x20-\x7e]/ and return $chr; # ascii, printable;
|
|---|
| 57 | my $fmt = ($chr =~ /[\x00-\x1f\x7F]/) ?
|
|---|
| 58 | q(\x%x) : q(\x{%x});
|
|---|
| 59 | return sprintf $fmt, ord($chr);
|
|---|
| 60 | }
|
|---|
| 61 |
|
|---|
| 62 | sub print_P{
|
|---|
| 63 | my $end = shift;
|
|---|
| 64 | $outbuf or return;
|
|---|
| 65 | print '"', encode($Opt{t}, $outbuf), '"';
|
|---|
| 66 | my $tail = $Opt{P} ? $end ? "$end" : "," : '';
|
|---|
| 67 | print $tail, "\n";
|
|---|
| 68 | $outbuf = '';
|
|---|
| 69 | }
|
|---|
| 70 |
|
|---|
| 71 | sub do_dump{
|
|---|
| 72 | my $string = shift;
|
|---|
| 73 | !$Opt{p} and exists $Opt{H} and print_H();
|
|---|
| 74 | unless ($string){
|
|---|
| 75 | while(<>){
|
|---|
| 76 | use utf8;
|
|---|
| 77 | $linebuf .= Encode::decode($Opt{f}, $_);
|
|---|
| 78 | while (length($linebuf) > $CPL){
|
|---|
| 79 | my $chunk = substr($linebuf, 0, $CPL, '');
|
|---|
| 80 | print_C($chunk, $linenum++);
|
|---|
| 81 | $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
|
|---|
| 82 | }
|
|---|
| 83 | }
|
|---|
| 84 | $linebuf and print_C($linebuf);
|
|---|
| 85 | }else{
|
|---|
| 86 | while ($string){
|
|---|
| 87 | my $chunk = substr($string, 0, $CPL, '');
|
|---|
| 88 | print_C($chunk, $linenum++);
|
|---|
| 89 | $Opt{H} and $linenum % $Opt{H} == $CPL-1 and print_S();
|
|---|
| 90 | }
|
|---|
| 91 | }
|
|---|
| 92 | exit;
|
|---|
| 93 | }
|
|---|
| 94 |
|
|---|
| 95 | sub print_S{
|
|---|
| 96 | print "--------+------------------------------------------------";
|
|---|
| 97 | if ($Opt{C}){
|
|---|
| 98 | print "-+-----------------";
|
|---|
| 99 | }
|
|---|
| 100 | print "\n";
|
|---|
| 101 | }
|
|---|
| 102 | sub print_H{
|
|---|
| 103 | print " Offset 0 1 2 3 4 5 6 7";
|
|---|
| 104 | if ($Opt{C}){
|
|---|
| 105 | print " | 0 1 2 3 4 5 6 7";
|
|---|
| 106 | }
|
|---|
| 107 | print "\n";
|
|---|
| 108 | print_S;
|
|---|
| 109 | }
|
|---|
| 110 |
|
|---|
| 111 | sub print_C{
|
|---|
| 112 | my ($chunk, $linenum) = @_;
|
|---|
| 113 | if (!$Opt{v} and $chunk eq $PrevChunk){
|
|---|
| 114 | printf "%08x *\n", $linenum*8; return;
|
|---|
| 115 | }
|
|---|
| 116 | $PrevChunk = $chunk;
|
|---|
| 117 | my $end = length($chunk) - 1;
|
|---|
| 118 | my (@ord, @chr);
|
|---|
| 119 | for my $i (0..$end){
|
|---|
| 120 | use utf8;
|
|---|
| 121 | my $chr = substr($chunk,$i,1);
|
|---|
| 122 | my $ord = ord($chr);
|
|---|
| 123 | my $fmt = $ord <= 0xffff ? " %04x" : " %05x";
|
|---|
| 124 | push @ord, (sprintf $fmt, $ord);
|
|---|
| 125 | $Opt{C} and push @chr, render_c($chr);
|
|---|
| 126 | }
|
|---|
| 127 | if (++$end < 7){
|
|---|
| 128 | for my $i ($end..7){
|
|---|
| 129 | push @ord, (" " x 6);
|
|---|
| 130 | }
|
|---|
| 131 | }
|
|---|
| 132 | my $line = sprintf "%08x %s", $linenum*8, join('', @ord);
|
|---|
| 133 | $Opt{C} and $line .= sprintf " | %s", join('', @chr);
|
|---|
| 134 | print encode($Opt{t}, $line), "\n";
|
|---|
| 135 | }
|
|---|
| 136 |
|
|---|
| 137 | sub render_c{
|
|---|
| 138 | my ($chr, $format) = @_;
|
|---|
| 139 | our (%S2str, $IsFullWidth);
|
|---|
| 140 | $chr =~ /[\p{IsControl}\s]/o and return $S2str{$chr} || " ";
|
|---|
| 141 | $chr =~ $IsFullWidth and return $chr; # as is
|
|---|
| 142 | return " " . $chr;
|
|---|
| 143 | }
|
|---|
| 144 |
|
|---|
| 145 | sub help{
|
|---|
| 146 | my $message = shift;
|
|---|
| 147 | use File::Basename;
|
|---|
| 148 | my $name = basename($0);
|
|---|
| 149 | $message and print STDERR "$name error: $message\n";
|
|---|
| 150 | print STDERR <<"EOT";
|
|---|
| 151 | Usage:
|
|---|
| 152 | $name -[options...] [files...]
|
|---|
| 153 | $name -[options...] -s "string"
|
|---|
| 154 | $name -h
|
|---|
| 155 | -h prints this message.
|
|---|
| 156 | Inherited from hexdump;
|
|---|
| 157 | -C Canonical unidump mode
|
|---|
| 158 | -v prints the duplicate line as is. Without this option,
|
|---|
| 159 | single "*" will be printed instead.
|
|---|
| 160 | For unidump only
|
|---|
| 161 | -p prints in perl literals that you can copy and paste directly
|
|---|
| 162 | to your perl script.
|
|---|
| 163 | -P prints in perl executable format!
|
|---|
| 164 | -u prints a bunch of "Uxxxx,". Handy when you want to pass your
|
|---|
| 165 | characters in mailing lists.
|
|---|
| 166 | IO Options:
|
|---|
| 167 | -e io_encoding same as "-f io_encoding -t io_encoding"
|
|---|
| 168 | -f from_encoding convert the source stream from this encoding
|
|---|
| 169 | -t to_encoding print to STDOUT in this encoding
|
|---|
| 170 | -s string "string" will be converted instead of STDIN.
|
|---|
| 171 | -H nline prints separater for each nlines of output.
|
|---|
| 172 | 0 means only the table headding be printed.
|
|---|
| 173 | EOT
|
|---|
| 174 | exit;
|
|---|
| 175 | }
|
|---|
| 176 |
|
|---|
| 177 | BEGIN{
|
|---|
| 178 | our %S2pstr= (
|
|---|
| 179 | "\\" => '\\\\',
|
|---|
| 180 | "\0" => '\0',
|
|---|
| 181 | "\t" => '\t',
|
|---|
| 182 | "\n" => '\n',
|
|---|
| 183 | "\r" => '\r',
|
|---|
| 184 | "\v" => '\v',
|
|---|
| 185 | "\a" => '\a',
|
|---|
| 186 | "\e" => '\e',
|
|---|
| 187 | "\"" => qq(\\\"),
|
|---|
| 188 | "\'" => qq(\\\'),
|
|---|
| 189 | '$' => '\$',
|
|---|
| 190 | "@" => '\@',
|
|---|
| 191 | "%" => '\%',
|
|---|
| 192 | );
|
|---|
| 193 |
|
|---|
| 194 | our %S2str = (
|
|---|
| 195 | qq(\x00) => q(\0), # NULL
|
|---|
| 196 | qq(\x01) => q(^A), # START OF HEADING
|
|---|
| 197 | qq(\x02) => q(^B), # START OF TEXT
|
|---|
| 198 | qq(\x03) => q(^C), # END OF TEXT
|
|---|
| 199 | qq(\x04) => q(^D), # END OF TRANSMISSION
|
|---|
| 200 | qq(\x05) => q(^E), # ENQUIRY
|
|---|
| 201 | qq(\x06) => q(^F), # ACKNOWLEDGE
|
|---|
| 202 | qq(\x07) => q(\a), # BELL
|
|---|
| 203 | qq(\x08) => q(^H), # BACKSPACE
|
|---|
| 204 | qq(\x09) => q(\t), # HORIZONTAL TABULATION
|
|---|
| 205 | qq(\x0A) => q(\n), # LINE FEED
|
|---|
| 206 | qq(\x0B) => q(\v), # VERTICAL TABULATION
|
|---|
| 207 | qq(\x0C) => q(^L), # FORM FEED
|
|---|
| 208 | qq(\x0D) => q(\r), # CARRIAGE RETURN
|
|---|
| 209 | qq(\x0E) => q(^N), # SHIFT OUT
|
|---|
| 210 | qq(\x0F) => q(^O), # SHIFT IN
|
|---|
| 211 | qq(\x10) => q(^P), # DATA LINK ESCAPE
|
|---|
| 212 | qq(\x11) => q(^Q), # DEVICE CONTROL ONE
|
|---|
| 213 | qq(\x12) => q(^R), # DEVICE CONTROL TWO
|
|---|
| 214 | qq(\x13) => q(^S), # DEVICE CONTROL THREE
|
|---|
| 215 | qq(\x14) => q(^T), # DEVICE CONTROL FOUR
|
|---|
| 216 | qq(\x15) => q(^U), # NEGATIVE ACKNOWLEDGE
|
|---|
| 217 | qq(\x16) => q(^V), # SYNCHRONOUS IDLE
|
|---|
| 218 | qq(\x17) => q(^W), # END OF TRANSMISSION BLOCK
|
|---|
| 219 | qq(\x18) => q(^X), # CANCEL
|
|---|
| 220 | qq(\x19) => q(^Y), # END OF MEDIUM
|
|---|
| 221 | qq(\x1A) => q(^Z), # SUBSTITUTE
|
|---|
| 222 | qq(\x1B) => q(\e), # ESCAPE (\c[)
|
|---|
| 223 | qq(\x1C) => "^\\", # FILE SEPARATOR
|
|---|
| 224 | qq(\x1D) => "^\]", # GROUP SEPARATOR
|
|---|
| 225 | qq(\x1E) => q(^^), # RECORD SEPARATOR
|
|---|
| 226 | qq(\x1F) => q(^_), # UNIT SEPARATOR
|
|---|
| 227 | );
|
|---|
| 228 | #
|
|---|
| 229 | # Generated out of lib/unicore/EastAsianWidth.txt
|
|---|
| 230 | # will it work ?
|
|---|
| 231 | #
|
|---|
| 232 | our $IsFullWidth =
|
|---|
| 233 | qr/^[
|
|---|
| 234 | \x{1100}-\x{1159}
|
|---|
| 235 | \x{115F}-\x{115F}
|
|---|
| 236 | \x{2329}-\x{232A}
|
|---|
| 237 | \x{2E80}-\x{2E99}
|
|---|
| 238 | \x{2E9B}-\x{2EF3}
|
|---|
| 239 | \x{2F00}-\x{2FD5}
|
|---|
| 240 | \x{2FF0}-\x{2FFB}
|
|---|
| 241 | \x{3000}-\x{303E}
|
|---|
| 242 | \x{3041}-\x{3096}
|
|---|
| 243 | \x{3099}-\x{30FF}
|
|---|
| 244 | \x{3105}-\x{312C}
|
|---|
| 245 | \x{3131}-\x{318E}
|
|---|
| 246 | \x{3190}-\x{31B7}
|
|---|
| 247 | \x{31F0}-\x{321C}
|
|---|
| 248 | \x{3220}-\x{3243}
|
|---|
| 249 | \x{3251}-\x{327B}
|
|---|
| 250 | \x{327F}-\x{32CB}
|
|---|
| 251 | \x{32D0}-\x{32FE}
|
|---|
| 252 | \x{3300}-\x{3376}
|
|---|
| 253 | \x{337B}-\x{33DD}
|
|---|
| 254 | \x{3400}-\x{4DB5}
|
|---|
| 255 | \x{4E00}-\x{9FA5}
|
|---|
| 256 | \x{33E0}-\x{33FE}
|
|---|
| 257 | \x{A000}-\x{A48C}
|
|---|
| 258 | \x{AC00}-\x{D7A3}
|
|---|
| 259 | \x{A490}-\x{A4C6}
|
|---|
| 260 | \x{F900}-\x{FA2D}
|
|---|
| 261 | \x{FA30}-\x{FA6A}
|
|---|
| 262 | \x{FE30}-\x{FE46}
|
|---|
| 263 | \x{FE49}-\x{FE52}
|
|---|
| 264 | \x{FE54}-\x{FE66}
|
|---|
| 265 | \x{FE68}-\x{FE6B}
|
|---|
| 266 | \x{FF01}-\x{FF60}
|
|---|
| 267 | \x{FFE0}-\x{FFE6}
|
|---|
| 268 | \x{20000}-\x{2A6D6}
|
|---|
| 269 | ]$/xo;
|
|---|
| 270 | }
|
|---|
| 271 |
|
|---|
| 272 | __END__
|
|---|