source: trunk/essentials/dev-lang/perl/lib/charnames.pm@ 3368

Last change on this file since 3368 was 3181, checked in by bird, 19 years ago

perl 5.8.8

File size: 15.1 KB
Line 
1package charnames;
2use strict;
3use warnings;
4use Carp;
5use File::Spec;
6our $VERSION = '1.05';
7
8use bytes (); # for $bytes::hint_bits
9$charnames::hint_bits = 0x20000; # HINT_LOCALIZE_HH
10
11my %alias1 = (
12 # Icky 3.2 names with parentheses.
13 'LINE FEED' => 'LINE FEED (LF)',
14 'FORM FEED' => 'FORM FEED (FF)',
15 'CARRIAGE RETURN' => 'CARRIAGE RETURN (CR)',
16 'NEXT LINE' => 'NEXT LINE (NEL)',
17 # Convenience.
18 'LF' => 'LINE FEED (LF)',
19 'FF' => 'FORM FEED (FF)',
20 'CR' => 'CARRIAGE RETURN (CR)',
21 'NEL' => 'NEXT LINE (NEL)',
22 # More convenience. For futher convencience,
23 # it is suggested some way using using the NamesList
24 # aliases is implemented.
25 'ZWNJ' => 'ZERO WIDTH NON-JOINER',
26 'ZWJ' => 'ZERO WIDTH JOINER',
27 'BOM' => 'BYTE ORDER MARK',
28 );
29
30my %alias2 = (
31 # Pre-3.2 compatibility (only for the first 256 characters).
32 'HORIZONTAL TABULATION' => 'CHARACTER TABULATION',
33 'VERTICAL TABULATION' => 'LINE TABULATION',
34 'FILE SEPARATOR' => 'INFORMATION SEPARATOR FOUR',
35 'GROUP SEPARATOR' => 'INFORMATION SEPARATOR THREE',
36 'RECORD SEPARATOR' => 'INFORMATION SEPARATOR TWO',
37 'UNIT SEPARATOR' => 'INFORMATION SEPARATOR ONE',
38 'PARTIAL LINE DOWN' => 'PARTIAL LINE FORWARD',
39 'PARTIAL LINE UP' => 'PARTIAL LINE BACKWARD',
40 );
41
42my %alias3 = (
43 # User defined aliasses. Even more convenient :)
44 );
45my $txt;
46
47sub alias (@)
48{
49 @_ or return %alias3;
50 my $alias = ref $_[0] ? $_[0] : { @_ };
51 @alias3{keys %$alias} = values %$alias;
52} # alias
53
54sub alias_file ($)
55{
56 my ($arg, $file) = @_;
57 if (-f $arg && File::Spec->file_name_is_absolute ($arg)) {
58 $file = $arg;
59 }
60 elsif ($arg =~ m/^\w+$/) {
61 $file = "unicore/${arg}_alias.pl";
62 }
63 else {
64 croak "Charnames alias files can only have identifier characters";
65 }
66 if (my @alias = do $file) {
67 @alias == 1 && !defined $alias[0] and
68 croak "$file cannot be used as alias file for charnames";
69 @alias % 2 and
70 croak "$file did not return a (valid) list of alias pairs";
71 alias (@alias);
72 return (1);
73 }
74 0;
75} # alias_file
76
77# This is not optimized in any way yet
78sub charnames
79{
80 my $name = shift;
81
82 if (exists $alias1{$name}) {
83 $name = $alias1{$name};
84 }
85 elsif (exists $alias2{$name}) {
86 require warnings;
87 warnings::warnif('deprecated', qq{Unicode character name "$name" is deprecated, use "$alias2{$name}" instead});
88 $name = $alias2{$name};
89 }
90 elsif (exists $alias3{$name}) {
91 $name = $alias3{$name};
92 }
93
94 my $ord;
95 my @off;
96 my $fname;
97
98 if ($name eq "BYTE ORDER MARK") {
99 $fname = $name;
100 $ord = 0xFEFF;
101 } else {
102 ## Suck in the code/name list as a big string.
103 ## Lines look like:
104 ## "0052\t\tLATIN CAPITAL LETTER R\n"
105 $txt = do "unicore/Name.pl" unless $txt;
106
107 ## @off will hold the index into the code/name string of the start and
108 ## end of the name as we find it.
109
110 ## If :full, look for the name exactly
111 if ($^H{charnames_full} and $txt =~ /\t\t\Q$name\E$/m) {
112 @off = ($-[0], $+[0]);
113 }
114
115 ## If we didn't get above, and :short allowed, look for the short name.
116 ## The short name is like "greek:Sigma"
117 unless (@off) {
118 if ($^H{charnames_short} and $name =~ /^(.+?):(.+)/s) {
119 my ($script, $cname) = ($1, $2);
120 my $case = $cname =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL";
121 if ($txt =~ m/\t\t\U$script\E (?:$case )?LETTER \U\Q$cname\E$/m) {
122 @off = ($-[0], $+[0]);
123 }
124 }
125 }
126
127 ## If we still don't have it, check for the name among the loaded
128 ## scripts.
129 if (not @off) {
130 my $case = $name =~ /[[:upper:]]/ ? "CAPITAL" : "SMALL";
131 for my $script (@{$^H{charnames_scripts}}) {
132 if ($txt =~ m/\t\t$script (?:$case )?LETTER \U\Q$name\E$/m) {
133 @off = ($-[0], $+[0]);
134 last;
135 }
136 }
137 }
138
139 ## If we don't have it by now, give up.
140 unless (@off) {
141 carp "Unknown charname '$name'";
142 return "\x{FFFD}";
143 }
144
145 ##
146 ## Now know where in the string the name starts.
147 ## The code, in hex, is before that.
148 ##
149 ## The code can be 4-6 characters long, so we've got to sort of
150 ## go look for it, just after the newline that comes before $off[0].
151 ##
152 ## This would be much easier if unicore/Name.pl had info in
153 ## a name/code order, instead of code/name order.
154 ##
155 ## The +1 after the rindex() is to skip past the newline we're finding,
156 ## or, if the rindex() fails, to put us to an offset of zero.
157 ##
158 my $hexstart = rindex($txt, "\n", $off[0]) + 1;
159
160 ## we know where it starts, so turn into number -
161 ## the ordinal for the char.
162 $ord = hex substr($txt, $hexstart, $off[0] - $hexstart);
163 }
164
165 if ($^H & $bytes::hint_bits) { # "use bytes" in effect?
166 use bytes;
167 return chr $ord if $ord <= 255;
168 my $hex = sprintf "%04x", $ord;
169 if (not defined $fname) {
170 $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
171 }
172 croak "Character 0x$hex with name '$fname' is above 0xFF";
173 }
174
175 no warnings 'utf8'; # allow even illegal characters
176 return pack "U", $ord;
177} # charnames
178
179sub import
180{
181 shift; ## ignore class name
182
183 if (not @_) {
184 carp("`use charnames' needs explicit imports list");
185 }
186 $^H |= $charnames::hint_bits;
187 $^H{charnames} = \&charnames ;
188
189 ##