| 1 |
|
|---|
| 2 | package Locale::Maketext::Guts;
|
|---|
| 3 | BEGIN { *zorp = sub { return scalar @_ } unless defined &zorp; }
|
|---|
| 4 | # Just so we're nice and define SOMETHING in "our" package.
|
|---|
| 5 |
|
|---|
| 6 | package Locale::Maketext;
|
|---|
| 7 | use strict;
|
|---|
| 8 | use vars qw($USE_LITERALS $GUTSPATH);
|
|---|
| 9 |
|
|---|
| 10 | BEGIN {
|
|---|
| 11 | $GUTSPATH = __FILE__;
|
|---|
| 12 | *DEBUG = sub () {0} unless defined &DEBUG;
|
|---|
| 13 | }
|
|---|
| 14 |
|
|---|
| 15 | use utf8;
|
|---|
| 16 |
|
|---|
| 17 | sub _compile {
|
|---|
| 18 | # This big scary routine compiles an entry.
|
|---|
| 19 | # It returns either a coderef if there's brackety bits in this, or
|
|---|
| 20 | # otherwise a ref to a scalar.
|
|---|
| 21 |
|
|---|
| 22 | my $target = ref($_[0]) || $_[0];
|
|---|
| 23 |
|
|---|
| 24 | my(@code);
|
|---|
| 25 | my(@c) = (''); # "chunks" -- scratch.
|
|---|
| 26 | my $call_count = 0;
|
|---|
| 27 | my $big_pile = '';
|
|---|
| 28 | {
|
|---|
| 29 | my $in_group = 0; # start out outside a group
|
|---|
| 30 | my($m, @params); # scratch
|
|---|
| 31 |
|
|---|
| 32 | while($_[1] =~ # Iterate over chunks.
|
|---|
| 33 | m<\G(
|
|---|
| 34 | [^\~\[\]]+ # non-~[] stuff
|
|---|
| 35 | |
|
|---|
| 36 | ~. # ~[, ~], ~~, ~other
|
|---|
| 37 | |
|
|---|
| 38 | \[ # [ presumably opening a group
|
|---|
| 39 | |
|
|---|
| 40 | \] # ] presumably closing a group
|
|---|
| 41 | |
|
|---|
| 42 | ~ # terminal ~ ?
|
|---|
| 43 | |
|
|---|
| 44 | $
|
|---|
| 45 | )>xgs
|
|---|
| 46 | ) {
|
|---|
| 47 | print " \"$1\"\n" if DEBUG > 2;
|
|---|
| 48 |
|
|---|
| 49 | if($1 eq '[' or $1 eq '') { # "[" or end
|
|---|
| 50 | # Whether this is "[" or end, force processing of any
|
|---|
| 51 | # preceding literal.
|
|---|
| 52 | if($in_group) {
|
|---|
| 53 | if($1 eq '') {
|
|---|
| 54 | $target->_die_pointing($_[1], "Unterminated bracket group");
|
|---|
| 55 | } else {
|
|---|
| 56 | $target->_die_pointing($_[1], "You can't nest bracket groups");
|
|---|
| 57 | }
|
|---|
| 58 | } else {
|
|---|
| 59 | if($1 eq '') {
|
|---|
| 60 | print " [end-string]\n" if DEBUG > 2;
|
|---|
| 61 | } else {
|
|---|
| 62 | $in_group = 1;
|
|---|
| 63 | }
|
|---|
| 64 | die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity
|
|---|
| 65 | if(length $c[-1]) {
|
|---|
| 66 | # Now actually processing the preceding literal
|
|---|
| 67 | $big_pile .= $c[-1];
|
|---|
| 68 | if($USE_LITERALS and (
|
|---|
| 69 | (ord('A') == 65)
|
|---|
| 70 | ? $c[-1] !~ m<[^\x20-\x7E]>s
|
|---|
| 71 | # ASCII very safe chars
|
|---|
| 72 | : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
|
|---|
| 73 | # EBCDIC very safe chars
|
|---|
| 74 | )) {
|
|---|
| 75 | # normal case -- all very safe chars
|
|---|
| 76 | $c[-1] =~ s/'/\\'/g;
|
|---|
| 77 | push @code, q{ '} . $c[-1] . "',\n";
|
|---|
| 78 | $c[-1] = ''; # reuse this slot
|
|---|
| 79 | } else {
|
|---|
| 80 | push @code, ' $c[' . $#c . "],\n";
|
|---|
| 81 | push @c, ''; # new chunk
|
|---|
| 82 | }
|
|---|
| 83 | }
|
|---|
| 84 | # else just ignore the empty string.
|
|---|
| 85 | }
|
|---|
| 86 |
|
|---|
| 87 | } elsif($1 eq ']') { # "]"
|
|---|
| 88 | # close group -- go back in-band
|
|---|
| 89 | if($in_group) {
|
|---|
| 90 | $in_group = 0;
|
|---|
| 91 |
|
|---|
| 92 | print " --Closing group [$c[-1]]\n" if DEBUG > 2;
|
|---|
| 93 |
|
|---|
| 94 | # And now process the group...
|
|---|
| 95 |
|
|---|
| 96 | if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
|
|---|
| 97 | DEBUG > 2 and print " -- (Ignoring)\n";
|
|---|
| 98 | $c[-1] = ''; # reset out chink
|
|---|
| 99 | next;
|
|---|
| 100 | }
|
|---|
| 101 |
|
|---|
| 102 | #$c[-1] =~ s/^\s+//s;
|
|---|
| 103 | #$c[-1] =~ s/\s+$//s;
|
|---|
| 104 | ($m,@params) = split(",", $c[-1], -1); # was /\s*,\s*/
|
|---|
| 105 |
|
|---|
| 106 | # A bit of a hack -- we've turned "~,"'s into DELs, so turn
|
|---|
| 107 | # 'em into real commas here.
|
|---|
| 108 | if (ord('A') == 65) { # ASCII, etc
|
|---|
| 109 | foreach($m, @params) { tr/\x7F/,/ }
|
|---|
| 110 | } else { # EBCDIC (1047, 0037, POSIX-BC)
|
|---|
| 111 | # Thanks to Peter Prymmer for the EBCDIC handling
|
|---|
| 112 | foreach($m, @params) { tr/\x07/,/ }
|
|---|
| 113 | }
|
|---|
| 114 |
|
|---|
| 115 | # Special-case handling of some method names:
|
|---|
| 116 | if($m eq '_*' or $m =~ m<^_(-?\d+)$>s) {
|
|---|
| 117 | # Treat [_1,...] as [,_1,...], etc.
|
|---|
| 118 | unshift @params, $m;
|
|---|
| 119 | $m = '';
|
|---|
| 120 | } elsif($m eq '*') {
|
|---|
| 121 | $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
|
|---|
| 122 | } elsif($m eq '#') {
|
|---|
| 123 | $m = 'numf'; # "#" for "number": [#,_1] for "the number _1"
|
|---|
| 124 | }
|
|---|
| 125 |
|
|---|
| 126 | # Most common case: a simple, legal-looking method name
|
|---|
| 127 | if($m eq '') {
|
|---|
| 128 | # 0-length method name means to just interpolate:
|
|---|
| 129 | push @code, ' (';
|
|---|
| 130 | } elsif($m =~ m<^\w+(?:\:\:\w+)*$>s
|
|---|
| 131 | and $m !~ m<(?:^|\:)\d>s
|
|---|
| 132 | # exclude starting a (sub)package or symbol with a digit
|
|---|
| 133 | ) {
|
|---|
| 134 | # Yes, it even supports the demented (and undocumented?)
|
|---|
| 135 | # $obj->Foo::bar(...) syntax.
|
|---|
| 136 | $target->_die_pointing(
|
|---|
| 137 | $_[1], "Can't (yet?) use \"SUPER::\" in a bracket-group method",
|
|---|
| 138 | 2 + length($c[-1])
|
|---|
| 139 | )
|
|---|
| 140 | if $m =~ m/^SUPER::/s;
|
|---|
| 141 | # Because for SUPER:: to work, we'd have to compile this into
|
|---|
| 142 | # the right package, and that seems just not worth the bother,
|
|---|
| 143 | # unless someone convinces me otherwise.
|
|---|
| 144 |
|
|---|
| 145 | push @code, ' $_[0]->' . $m . '(';
|
|---|
| 146 | } else {
|
|---|
| 147 | # TODO: implement something? or just too icky to consider?
|
|---|
| 148 | $target->_die_pointing(
|
|---|
| 149 | $_[1],
|
|---|
| 150 | "Can't use \"$m\" as a method name in bracket group",
|
|---|
| 151 | 2 + length($c[-1])
|
|---|
| 152 | );
|
|---|
| 153 | }
|
|---|
| 154 |
|
|---|
| 155 | pop @c; # we don't need that chunk anymore
|
|---|
| 156 | ++$call_count;
|
|---|
| 157 |
|
|---|
| 158 | foreach my $p (@params) {
|
|---|
| 159 | if($p eq '_*') {
|
|---|
| 160 | # Meaning: all parameters except $_[0]
|
|---|
| 161 | $code[-1] .= ' @_[1 .. $#_], ';
|
|---|
| 162 | # and yes, that does the right thing for all @_ < 3
|
|---|
| 163 | } elsif($p =~ m<^_(-?\d+)$>s) {
|
|---|
| 164 | # _3 meaning $_[3]
|
|---|
| 165 | $code[-1] .= '$_[' . (0 + $1) . '], ';
|
|---|
| 166 | } elsif($USE_LITERALS and (
|
|---|
| 167 | (ord('A') == 65)
|
|---|
| 168 | ? $p !~ m<[^\x20-\x7E]>s
|
|---|
| 169 | # ASCII very safe chars
|
|---|
| 170 | : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
|
|---|
| 171 | # EBCDIC very safe chars
|
|---|
| 172 | )) {
|
|---|
| 173 | # Normal case: a literal containing only safe characters
|
|---|
| 174 | $p =~ s/'/\\'/g;
|
|---|
| 175 | $code[-1] .= q{'} . $p . q{', };
|
|---|
| 176 | } else {
|
|---|
| 177 | # Stow it on the chunk-stack, and just refer to that.
|
|---|
| 178 | push @c, $p;
|
|---|
| 179 | push @code, ' $c[' . $#c . "], ";
|
|---|
| 180 | }
|
|---|
| 181 | }
|
|---|
| 182 | $code[-1] .= "),\n";
|
|---|
| 183 |
|
|---|
| 184 | push @c, '';
|
|---|
| 185 | } else {
|
|---|
| 186 | $target->_die_pointing($_[1], "Unbalanced ']'");
|
|---|
| 187 | }
|
|---|
| 188 |
|
|---|
| 189 | } elsif(substr($1,0,1) ne '~') {
|
|---|
| 190 | # it's stuff not containing "~" or "[" or "]"
|
|---|
| 191 | # i.e., a literal blob
|
|---|
| 192 | $c[-1] .= $1;
|
|---|
| 193 |
|
|---|
| 194 | } elsif($1 eq '~~') { # "~~"
|
|---|
| 195 | $c[-1] .= '~';
|
|---|
| 196 |
|
|---|
| 197 | } elsif($1 eq '~[') { # "~["
|
|---|
| 198 | $c[-1] .= '[';
|
|---|
| 199 |
|
|---|
| 200 | } elsif($1 eq '~]') { # "~]"
|
|---|
| 201 | $c[-1] .= ']';
|
|---|
| 202 |
|
|---|
| 203 | } elsif($1 eq '~,') { # "~,"
|
|---|
| 204 | if($in_group) {
|
|---|
| 205 | # This is a hack, based on the assumption that no-one will actually
|
|---|
| 206 | # want a DEL inside a bracket group. Let's hope that's it's true.
|
|---|
| 207 | if (ord('A') == 65) { # ASCII etc
|
|---|
| 208 | $c[-1] .= "\x7F";
|
|---|
| 209 | } else { # EBCDIC (cp 1047, 0037, POSIX-BC)
|
|---|
| 210 | $c[-1] .= "\x07";
|
|---|
| 211 | }
|
|---|
| 212 | } else {
|
|---|
| 213 | $c[-1] .= '~,';
|
|---|
| 214 | }
|
|---|
| 215 |
|
|---|
| 216 | } elsif($1 eq '~') { # possible only at string-end, it seems.
|
|---|
| 217 | $c[-1] .= '~';
|
|---|
| 218 |
|
|---|
| 219 | } else {
|
|---|
| 220 | # It's a "~X" where X is not a special character.
|
|---|
| 221 | # Consider it a literal ~ and X.
|
|---|
| 222 | $c[-1] .= $1;
|
|---|
| 223 | }
|
|---|
| 224 | }
|
|---|
| 225 | }
|
|---|
| 226 |
|
|---|
| 227 | if($call_count) {
|
|---|
| 228 | undef $big_pile; # Well, nevermind that.
|
|---|
| 229 | } else {
|
|---|
| 230 | # It's all literals! Ahwell, that can happen.
|
|---|
| 231 | # So don't bother with the eval. Return a SCALAR reference.
|
|---|
| 232 | return \$big_pile;
|
|---|
| 233 | }
|
|---|
| 234 |
|
|---|
| 235 | die "Last chunk isn't null??" if @c and length $c[-1]; # sanity
|
|---|
| 236 | print scalar(@c), " chunks under closure\n" if DEBUG;
|
|---|
| 237 | if(@code == 0) { # not possible?
|
|---|
| 238 | print "Empty code\n" if DEBUG;
|
|---|
| 239 | return \'';
|
|---|
| 240 | } elsif(@code > 1) { # most cases, presumably!
|
|---|
| 241 | unshift @code, "join '',\n";
|
|---|
| 242 | }
|
|---|
| 243 | unshift @code, "use strict; sub {\n";
|
|---|
| 244 | push @code, "}\n";
|
|---|
| 245 |
|
|---|
| 246 | print @code if DEBUG;
|
|---|
| 247 | my $sub = eval(join '', @code);
|
|---|
| 248 | die "$@ while evalling" . join('', @code) if $@; # Should be impossible.
|
|---|
| 249 | return $sub;
|
|---|
| 250 | }
|
|---|
| 251 |
|
|---|
| 252 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|---|
| 253 |
|
|---|
| 254 | sub _die_pointing {
|
|---|
| 255 | # This is used by _compile to throw a fatal error
|
|---|
| 256 | my $target = shift; # class name
|
|---|
| 257 | # ...leaving $_[0] the error-causing text, and $_[1] the error message
|
|---|
| 258 |
|
|---|
| 259 | my $i = index($_[0], "\n");
|
|---|
| 260 |
|
|---|
| 261 | my $pointy;
|
|---|
| 262 | my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
|
|---|
| 263 | if($pos < 1) {
|
|---|
| 264 | $pointy = "^=== near there\n";
|
|---|
| 265 | } else { # we need to space over
|
|---|
| 266 | my $first_tab = index($_[0], "\t");
|
|---|
| 267 | if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) {
|
|---|
| 268 | # No tabs, or the first tab is harmlessly after where we will point to,
|
|---|
| 269 | # AND we're far enough from the margin that we can draw a proper arrow.
|
|---|
| 270 | $pointy = ('=' x $pos) . "^ near there\n";
|
|---|
| 271 | } else {
|
|---|
| 272 | # tabs screw everything up!
|
|---|
| 273 | $pointy = substr($_[0],0,$pos);
|
|---|
| 274 | $pointy =~ tr/\t //cd;
|
|---|
| 275 | # make everything into whitespace, but preseving tabs
|
|---|
| 276 | $pointy .= "^=== near there\n";
|
|---|
| 277 | }
|
|---|
| 278 | }
|
|---|
| 279 |
|
|---|
| 280 | my $errmsg = "$_[1], in\:\n$_[0]";
|
|---|
| 281 |
|
|---|
| 282 | if($i == -1) {
|
|---|
| 283 | # No newline.
|
|---|
| 284 | $errmsg .= "\n" . $pointy;
|
|---|
| 285 | } elsif($i == (length($_[0]) - 1) ) {
|
|---|
| 286 | # Already has a newline at end.
|
|---|
| 287 | $errmsg .= $pointy;
|
|---|
| 288 | } else {
|
|---|
| 289 | # don't bother with the pointy bit, I guess.
|
|---|
| 290 | }
|
|---|
| 291 | Carp::croak( "$errmsg via $target, as used" );
|
|---|
| 292 | }
|
|---|
| 293 |
|
|---|
| 294 | 1;
|
|---|
| 295 |
|
|---|