Context Navigation

UCD.pm

Visit:

Last change on this file was 3181, checked in by bird, 19 years ago
perl 5.8.8
File size: 22.3 KB

Line
1	package Unicode::UCD;
2
3	use strict;
4	use warnings;
5
6	our $VERSION = '0.24';
7
8	use Storable qw(dclone);
9
10	require Exporter;
11
12	our @ISA = qw(Exporter);
13
14	our @EXPORT_OK = qw(charinfo
15	charblock charscript
16	charblocks charscripts
17	charinrange
18	compexcl
19	casefold casespec
20	namedseq);
21
22	use Carp;
23
24	=head1 NAME
25
26	Unicode::UCD - Unicode character database
27
28	=head1 SYNOPSIS
29
30	use Unicode::UCD 'charinfo';
31	my $charinfo = charinfo($codepoint);
32
33	use Unicode::UCD 'charblock';
34	my $charblock = charblock($codepoint);
35
36	use Unicode::UCD 'charscript';
37	my $charscript = charscript($codepoint);
38
39	use Unicode::UCD 'charblocks';
40	my $charblocks = charblocks();
41
42	use Unicode::UCD 'charscripts';
43	my %charscripts = charscripts();
44
45	use Unicode::UCD qw(charscript charinrange);
46	my $range = charscript($script);
47	print "looks like $script\n" if charinrange($range, $codepoint);
48
49	use Unicode::UCD 'compexcl';
50	my $compexcl = compexcl($codepoint);
51
52	use Unicode::UCD 'namedseq';
53	my $namedseq = namedseq($named_sequence_name);
54
55	my $unicode_version = Unicode::UCD::UnicodeVersion();
56
57	=head1 DESCRIPTION
58
59	The Unicode::UCD module offers a simple interface to the Unicode
60	Character Database.
61
62	=cut
63
64	my $UNICODEFH;
65	my $BLOCKSFH;
66	my $SCRIPTSFH;
67	my $VERSIONFH;
68	my $COMPEXCLFH;
69	my $CASEFOLDFH;
70	my $CASESPECFH;
71	my $NAMEDSEQFH;
72
73	sub openunicode {
74	my ($rfh, @path) = @_;
75	my $f;
76	unless (defined $$rfh) {
77	for my $d (@INC) {
78	use File::Spec;
79	$f = File::Spec->catfile($d, "unicore", @path);
80	last if open($$rfh, $f);
81	undef $f;
82	}
83	croak __PACKAGE__, ": failed to find ",
84	File::Spec->catfile(@path), " in @INC"
85	unless defined $f;
86	}
87	return $f;
88	}
89
90	=head2 charinfo
91
92	use Unicode::UCD 'charinfo';
93
94	my $charinfo = charinfo(0x41);
95
96	charinfo() returns a reference to a hash that has the following fields
97	as defined by the Unicode standard:
98
99	key
100
101	code code point with at least four hexdigits
102	name name of the character IN UPPER CASE
103	category general category of the character
104	combining classes used in the Canonical Ordering Algorithm
105	bidi bidirectional category
106	decomposition character decomposition mapping
107	decimal if decimal digit this is the integer numeric value
108	digit if digit this is the numeric value
109	numeric if numeric is the integer or rational numeric value
110	mirrored if mirrored in bidirectional text
111	unicode10 Unicode 1.0 name if existed and different
112	comment ISO 10646 comment field
113	upper uppercase equivalent mapping
114	lower lowercase equivalent mapping
115	title titlecase equivalent mapping
116
117	block block the character belongs to (used in \p{In...})
118	script script the character belongs to
119
120	If no match is found, a reference to an empty hash is returned.
121
122	The C<block> property is the same as returned by charinfo(). It is
123	not defined in the Unicode Character Database proper (Chapter 4 of the
124	Unicode 3.0 Standard, aka TUS3) but instead in an auxiliary database
125	(Chapter 14 of TUS3). Similarly for the C<script> property.
126
127	Note that you cannot do (de)composition and casing based solely on the
128	above C<decomposition> and C<lower>, C<upper>, C<title>, properties,
129	you will need also the compexcl(), casefold(), and casespec() functions.
130
131	=cut
132
133	# NB: This function is duplicated in charnames.pm
134	sub _getcode {
135	my $arg = shift;