| [3181] | 1 | #
|
|---|
| 2 | # Locale::Script - ISO codes for script identification (ISO 15924)
|
|---|
| 3 | #
|
|---|
| 4 | # $Id: Script.pm,v 2.7 2004/06/10 21:19:34 neilb Exp $
|
|---|
| 5 | #
|
|---|
| 6 |
|
|---|
| 7 | package Locale::Script;
|
|---|
| 8 | use strict;
|
|---|
| 9 | require 5.002;
|
|---|
| 10 |
|
|---|
| 11 | require Exporter;
|
|---|
| 12 | use Carp;
|
|---|
| 13 | use Locale::Constants;
|
|---|
| 14 |
|
|---|
| 15 |
|
|---|
| 16 | #-----------------------------------------------------------------------
|
|---|
| 17 | # Public Global Variables
|
|---|
| 18 | #-----------------------------------------------------------------------
|
|---|
| 19 | use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
|
|---|
| 20 | $VERSION = sprintf("%d.%02d", q$Revision: 2.7 $ =~ /(\d+)\.(\d+)/);
|
|---|
| 21 | @ISA = qw(Exporter);
|
|---|
| 22 | @EXPORT = qw(code2script script2code
|
|---|
| 23 | all_script_codes all_script_names
|
|---|
| 24 | script_code2code
|
|---|
| 25 | LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC);
|
|---|
| 26 |
|
|---|
| 27 | #-----------------------------------------------------------------------
|
|---|
| 28 | # Private Global Variables
|
|---|
| 29 | #-----------------------------------------------------------------------
|
|---|
| 30 | my $CODES = [];
|
|---|
| 31 | my $COUNTRIES = [];
|
|---|
| 32 |
|
|---|
| 33 |
|
|---|
| 34 | #=======================================================================
|
|---|
| 35 | #
|
|---|
| 36 | # code2script ( CODE [, CODESET ] )
|
|---|
| 37 | #
|
|---|
| 38 | #=======================================================================
|
|---|
| 39 | sub code2script
|
|---|
| 40 | {
|
|---|
| 41 | my $code = shift;
|
|---|
| 42 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
|
|---|
| 43 |
|
|---|
| 44 |
|
|---|
| 45 | return undef unless defined $code;
|
|---|
| 46 |
|
|---|
| 47 | #-------------------------------------------------------------------
|
|---|
| 48 | # Make sure the code is in the right form before we use it
|
|---|
| 49 | # to look up the corresponding script.
|
|---|
| 50 | # We have to sprintf because the codes are given as 3-digits,
|
|---|
| 51 | # with leading 0's. Eg 070 for Egyptian demotic.
|
|---|
| 52 | #-------------------------------------------------------------------
|
|---|
| 53 | if ($codeset == LOCALE_CODE_NUMERIC)
|
|---|
| 54 | {
|
|---|
| 55 | return undef if ($code =~ /\D/);
|
|---|
| 56 | $code = sprintf("%.3d", $code);
|
|---|
| 57 | }
|
|---|
| 58 | else
|
|---|
| 59 | {
|
|---|
| 60 | $code = lc($code);
|
|---|
| 61 | }
|
|---|
| 62 |
|
|---|
| 63 | if (exists $CODES->[$codeset]->{$code})
|
|---|
| 64 | {
|
|---|
| 65 | return $CODES->[$codeset]->{$code};
|
|---|
| 66 | }
|
|---|
| 67 | else
|
|---|
| 68 | {
|
|---|
| 69 | #---------------------------------------------------------------
|
|---|
| 70 | # no such script code!
|
|---|
| 71 | #---------------------------------------------------------------
|
|---|
| 72 | return undef;
|
|---|
| 73 | }
|
|---|
| 74 | }
|
|---|
| 75 |
|
|---|
| 76 |
|
|---|
| 77 | #=======================================================================
|
|---|
| 78 | #
|
|---|
| 79 | # script2code ( SCRIPT [, CODESET ] )
|
|---|
| 80 | #
|
|---|
| 81 | #=======================================================================
|
|---|
| 82 | sub script2code
|
|---|
| 83 | {
|
|---|
| 84 | my $script = shift;
|
|---|
| 85 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
|
|---|
| 86 |
|
|---|
| 87 |
|
|---|
| 88 | return undef unless defined $script;
|
|---|
| 89 | $script = lc($script);
|
|---|
| 90 | if (exists $COUNTRIES->[$codeset]->{$script})
|
|---|
| 91 | {
|
|---|
| 92 | return $COUNTRIES->[$codeset]->{$script};
|
|---|
| 93 | }
|
|---|
| 94 | else
|
|---|
| 95 | {
|
|---|
| 96 | #---------------------------------------------------------------
|
|---|
| 97 | # no such script!
|
|---|
| 98 | #---------------------------------------------------------------
|
|---|
| 99 | return undef;
|
|---|
| 100 | }
|
|---|
| 101 | }
|
|---|
| 102 |
|
|---|
| 103 |
|
|---|
| 104 | #=======================================================================
|
|---|
| 105 | #
|
|---|
| 106 | # script_code2code ( CODE, IN-CODESET, OUT-CODESET )
|
|---|
| 107 | #
|
|---|
| 108 | #=======================================================================
|
|---|
| 109 | sub script_code2code
|
|---|
| 110 | {
|
|---|
| 111 | (@_ == 3) or croak "script_code2code() takes 3 arguments!";
|
|---|
| 112 |
|
|---|
| 113 | my $code = shift;
|
|---|
| 114 | my $inset = shift;
|
|---|
| 115 | my $outset = shift;
|
|---|
| 116 | my $outcode;
|
|---|
| 117 | my $script;
|
|---|
| 118 |
|
|---|
| 119 |
|
|---|
| 120 | return undef if $inset == $outset;
|
|---|
| 121 | $script = code2script($code, $inset);
|
|---|
| 122 | return undef if not defined $script;
|
|---|
| 123 | $outcode = script2code($script, $outset);
|
|---|
| 124 | return $outcode;
|
|---|
| 125 | }
|
|---|
| 126 |
|
|---|
| 127 |
|
|---|
| 128 | #=======================================================================
|
|---|
| 129 | #
|
|---|
| 130 | # all_script_codes()
|
|---|
| 131 | #
|
|---|
| 132 | #=======================================================================
|
|---|
| 133 | sub all_script_codes
|
|---|
| 134 | {
|
|---|
| 135 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
|
|---|
| 136 |
|
|---|
| 137 | return keys %{ $CODES->[$codeset] };
|
|---|
| 138 | }
|
|---|
| 139 |
|
|---|
| 140 |
|
|---|
| 141 | #=======================================================================
|
|---|
| 142 | #
|
|---|
| 143 | # all_script_names()
|
|---|
| 144 | #
|
|---|
| 145 | #=======================================================================
|
|---|
| 146 | sub all_script_names
|
|---|
| 147 | {
|
|---|
| 148 | my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
|
|---|
| 149 |
|
|---|
| 150 | return values %{ $CODES->[$codeset] };
|
|---|
| 151 | }
|
|---|
| 152 |
|
|---|
| 153 |
|
|---|
| 154 | #=======================================================================
|
|---|
| 155 | #
|
|---|
| 156 | # initialisation code - stuff the DATA into the ALPHA2 hash
|
|---|
| 157 | #
|
|---|
| 158 | #=======================================================================
|
|---|
| 159 | {
|
|---|
| 160 | my ($alpha2, $alpha3, $numeric);
|
|---|
| 161 | my $script;
|
|---|
| 162 | local $_;
|
|---|
| 163 |
|
|---|
| 164 |
|
|---|
| 165 | while (<DATA>)
|
|---|
| 166 | {
|
|---|
| 167 | next unless /\S/;
|
|---|
| 168 | chop;
|
|---|
| 169 | ($alpha2, $alpha3, $numeric, $script) = split(/:/, $_, 4);
|
|---|
| 170 |
|
|---|
| 171 | $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $script;
|
|---|
| 172 | $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$script"} = $alpha2;
|
|---|
| 173 |
|
|---|
| 174 | if ($alpha3)
|
|---|
| 175 | {
|
|---|
| 176 | $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $script;
|
|---|
| 177 | $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$script"} = $alpha3;
|
|---|
| 178 | }
|
|---|
| 179 |
|
|---|
| 180 | if ($numeric)
|
|---|
| 181 | {
|
|---|
| 182 | $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $script;
|
|---|
| 183 | $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$script"} = $numeric;
|
|---|
| 184 | }
|
|---|
| 185 |
|
|---|
| 186 | }
|
|---|
| 187 |
|
|---|
| 188 | close(DATA);
|
|---|
| 189 | }
|
|---|
| 190 |
|
|---|
| 191 | 1;
|
|---|
| 192 |
|
|---|
| 193 | __DATA__
|
|---|
| 194 | am:ama:130:Aramaic
|
|---|
| 195 | ar:ara:160:Arabic
|
|---|
| 196 | av:ave:151:Avestan
|
|---|
| 197 | bh:bhm:300:Brahmi (Ashoka)
|
|---|
| 198 | bi:bid:372:Buhid
|
|---|
| 199 | bn:ben:325:Bengali
|
|---|
| 200 | bo:bod:330:Tibetan
|
|---|
| 201 | bp:bpm:285:Bopomofo
|
|---|
| 202 | br:brl:570:Braille
|
|---|
| 203 | bt:btk:365:Batak
|
|---|
| 204 | bu:bug:367:Buginese (Makassar)
|
|---|
| 205 | by:bys:550:Blissymbols
|
|---|
| 206 | ca:cam:358:Cham
|
|---|
| 207 | ch:chu:221:Old Church Slavonic
|
|---|
| 208 | ci:cir:291:Cirth
|
|---|
| 209 | cm:cmn:402:Cypro-Minoan
|
|---|
| 210 | co:cop:205:Coptic
|
|---|
| 211 | cp:cpr:403:Cypriote syllabary
|
|---|
| 212 | cy:cyr:220:Cyrillic
|
|---|
| 213 | ds:dsr:250:Deserel (Mormon)
|
|---|
| 214 | dv:dvn:315:Devanagari (Nagari)
|
|---|
| 215 | ed:egd:070:Egyptian demotic
|
|---|
| 216 | eg:egy:050:Egyptian hieroglyphs
|
|---|
| 217 | eh:egh:060:Egyptian hieratic
|
|---|
| 218 | el:ell:200:Greek
|
|---|
| 219 | eo:eos:210:Etruscan and Oscan
|
|---|
| 220 | et:eth:430:Ethiopic
|
|---|
| 221 | gl:glg:225:Glagolitic
|
|---|
| 222 | gm:gmu:310:Gurmukhi
|
|---|
| 223 | gt:gth:206:Gothic
|
|---|
| 224 | gu:guj:320:Gujarati
|
|---|
| 225 | ha:han:500:Han ideographs
|
|---|
| 226 | he:heb:125:Hebrew
|
|---|
| 227 | hg:hgl:420:Hangul
|
|---|
| 228 | hm:hmo:450:Pahawh Hmong
|
|---|
| 229 | ho:hoo:371:Hanunoo
|
|---|
| 230 | hr:hrg:410:Hiragana
|
|---|
| 231 | hu:hun:176:Old Hungarian runic
|
|---|
| 232 | hv:hvn:175:Kok Turki runic
|
|---|
| 233 | hy:hye:230:Armenian
|
|---|
| 234 | iv:ivl:610:Indus Valley
|
|---|
| 235 | ja:jap:930:(alias for Han + Hiragana + Katakana)
|
|---|
| 236 | jl:jlg:445:Cherokee syllabary
|
|---|
| 237 | jw:jwi:360:Javanese
|
|---|
| 238 | ka:kam:241:Georgian (Mxedruli)
|
|---|
| 239 | kh:khn:931:(alias for Hangul + Han)
|
|---|
| 240 | kk:kkn:411:Katakana
|
|---|
| 241 | km:khm:354:Khmer
|
|---|
| 242 | kn:kan:345:Kannada
|
|---|
| 243 | kr:krn:357:Karenni (Kayah Li)
|
|---|
| 244 | ks:kst:305:Kharoshthi
|
|---|
| 245 | kx:kax:240:Georgian (Xucuri)
|
|---|
| 246 | la:lat:217:Latin
|
|---|
| 247 | lf:laf:215:Latin (Fraktur variant)
|
|---|
| 248 | lg:lag:216:Latin (Gaelic variant)
|
|---|
| 249 | lo:lao:356:Lao
|
|---|
| 250 | lp:lpc:335:Lepcha (Rong)
|
|---|
| 251 | md:mda:140:Mandaean
|
|---|
| 252 | me:mer:100:Meroitic
|
|---|
| 253 | mh:may:090:Mayan hieroglyphs
|
|---|
| 254 | ml:mlm:347:Malayalam
|
|---|
| 255 | mn:mon:145:Mongolian
|
|---|
| 256 | my:mya:350:Burmese
|
|---|
| 257 | na:naa:400:Linear A
|
|---|
| 258 | nb:nbb:401:Linear B
|
|---|
| 259 | og:ogm:212:Ogham
|
|---|
| 260 | or:ory:327:Oriya
|
|---|
| 261 | os:osm:260:Osmanya
|
|---|
| 262 | ph:phx:115:Phoenician
|
|---|
| 263 | ph:pah:150:Pahlavi
|
|---|
| 264 | pl:pld:282:Pollard Phonetic
|
|---|
| 265 | pq:pqd:295:Klingon plQaD
|
|---|
| 266 | pr:prm:227:Old Permic
|
|---|
| 267 | ps:pst:600:Phaistos Disk
|
|---|
| 268 | rn:rnr:211:Runic (Germanic)
|
|---|
| 269 | rr:rro:620:Rongo-rongo
|
|---|
| 270 | sa:sar:110:South Arabian
|
|---|
| 271 | si:sin:348:Sinhala
|
|---|
| 272 | sj:syj:137:Syriac (Jacobite variant)
|
|---|
| 273 | sl:slb:440:Unified Canadian Aboriginal Syllabics
|
|---|
| 274 | sn:syn:136:Syriac (Nestorian variant)
|
|---|
| 275 | sw:sww:281:Shavian (Shaw)
|
|---|
| 276 | sy:syr:135:Syriac (Estrangelo)
|
|---|
| 277 | ta:tam:346:Tamil
|
|---|
| 278 | tb:tbw:373:Tagbanwa
|
|---|
| 279 | te:tel:340:Telugu
|
|---|
| 280 | tf:tfn:120:Tifnagh
|
|---|
| 281 | tg:tag:370:Tagalog
|
|---|
| 282 | th:tha:352:Thai
|
|---|
| 283 | tn:tna:170:Thaana
|
|---|
| 284 | tw:twr:290:Tengwar
|
|---|
| 285 | va:vai:470:Vai
|
|---|
| 286 | vs:vsp:280:Visible Speech
|
|---|
| 287 | xa:xas:000:Cuneiform, Sumero-Akkadian
|
|---|
| 288 | xf:xfa:105:Cuneiform, Old Persian
|
|---|
| 289 | xk:xkn:412:(alias for Hiragana + Katakana)
|
|---|
| 290 | xu:xug:106:Cuneiform, Ugaritic
|
|---|
| 291 | yi:yii:460:Yi
|
|---|
| 292 | zx:zxx:997:Unwritten language
|
|---|
| 293 | zy:zyy:998:Undetermined script
|
|---|
| 294 | zz:zzz:999:Uncoded script
|
|---|