Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

encoding.t@ 3400

Visit:

Last change on this file since 3400 was 3181, checked in by bird, 19 years ago
perl 5.8.8
File size: 5.1 KB

Line
1	BEGIN {
2	require Config; import Config;
3	if ($Config{'extensions'} !~ /\bEncode\b/) {
4	print "1..0 # Skip: Encode was not built\n";
5	exit 0;
6	}
7	unless (find PerlIO::Layer 'perlio') {
8	print "1..0 # Skip: PerlIO was not built\n";
9	exit 0;
10	}
11	if (ord("A") == 193) {
12	print "1..0 # encoding pragma does not support EBCDIC platforms\n";
13	exit(0);
14	}
15	}
16
17	print "1..31\n";
18
19	use encoding "latin1"; # ignored (overwritten by the next line)
20	use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...)
21
22	# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
23	# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
24	# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)
25
26	$a = "\xDF";
27	$b = "\x{100}";
28
29	print "not " unless ord($a) == 0x3af;
30	print "ok 1\n";
31
32	print "not " unless ord($b) == 0x100;
33	print "ok 2\n";
34
35	my $c;
36
37	$c = $a . $b;
38
39	print "not " unless ord($c) == 0x3af;
40	print "ok 3\n";
41
42	print "not " unless length($c) == 2;
43	print "ok 4\n";
44
45	print "not " unless ord(substr($c, 1, 1)) == 0x100;
46	print "ok 5\n";
47
48	print "not " unless ord(chr(0xdf)) == 0x3af; # spooky
49	print "ok 6\n";
50
51	print "not " unless ord(pack("C", 0xdf)) == 0x3af;
52	print "ok 7\n";
53
54	# we didn't break pack/unpack, I hope
55
56	print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf;
57	print "ok 8\n";
58
59	# the first octet of UTF-8 encoded 0x3af
60	print "not " unless unpack("C", chr(0xdf)) == 0xce;
61	print "ok 9\n";
62
63	print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf;
64	print "ok 10\n";
65
66	print "not " unless unpack("U", chr(0xdf)) == 0x3af;
67	print "ok 11\n";
68
69	# charnames must still work
70	use charnames ':full';
71	print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf;
72	print "ok 12\n";
73
74	# combine
75
76	$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf);
77
78	print "not " unless ord($c) == 0x3af;
79	print "ok 13\n";
80
81	print "not " unless ord(substr($c, 1, 1)) == 0xdf;
82	print "ok 14\n";
83
84	print "not " unless ord(substr($c, 2, 1)) == 0x3af;
85	print "ok 15\n";
86
87	# regex literals
88
89	print "not " unless "\xDF" =~ /\x{3AF}/;
90	print "ok 16\n";
91
92	print "not " unless "\x{3AF}" =~ /\xDF/;
93	print "ok 17\n";
94
95	print "not " unless "\xDF" =~ /\xDF/;
96	print "ok 18\n";
97
98	print "not " unless "\x{3AF}" =~ /\x{3AF}/;
99	print "ok 19\n";
100
101	# eq, cmp
102
103	my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = (
104	pack("C*", 0xDF ), # byte
105	pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0
106	pack("U*", 0x3AF), # $U eq $byte
107	pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding
108	pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1)
109	pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0;
110	pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb)
111	);
112
113	# all the tests in this section that compare a byte encoded string
114	# ato UTF-8 encoded are run in all possible vairants
115	# all of the eq, ne, cmp operations tested,
116	# $v z $u tested as well as $u z $v
117
118	sub alleq($$){
119	my ($a,$b) = (shift, shift);
120	$a eq $b && $b eq $a &&
121	!( $a ne $b ) && !( $b ne $a ) &&
122	( $a cmp $b ) == 0 && ( $b cmp $a ) == 0;
123	}
124
125	sub anyeq($$){
126	my ($a,$b) = (shift, shift);
127	$a eq $b \|\| $b eq $a \|\|
128	!( $a ne $b ) \|\| !( $b ne $a ) \|\|
129	( $a cmp $b ) == 0 \|\| ( $b cmp $a ) == 0;
130	}
131
132	sub allgt($$){
133	my ($a,$b) = (shift, shift);
134	( $a cmp $b ) == 1 && ( $b cmp $a ) == -1;
135	}
136	#match the correct UTF-8 string
137	print "not " unless alleq($byte, $U);
138	print "ok 20\n";
139
140	#do not match a wrong UTF-8 string
141	print "not " if anyeq($byte, $Ub);
142	print "ok 21\n";
143
144	#string ordering
145	print "not " unless allgt ( $g1, $byte ) &&
146	allgt ( $g2, $byte ) &&
147	allgt ( $byte, $l ) &&
148	allgt ( $bytes, $U );
149	print "ok 22\n";
150
151	# upgrade, downgrade
152
153	my ($u,$v,$v2);
154	$u = $v = $v2 = pack("C*", 0xDF);
155	utf8::upgrade($v); #explicit upgrade
156	$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade
157
158	# implicit upgrade === explicit upgrade
159	print "not " if do{{use bytes; $v ne $v2}} \|\| $v ne $v2;
160	print "ok 23\n";
161
162	# utf8::upgrade is transparent and does not break equality
163	print "not " unless alleq( $u, $v );
164	print "ok 24\n";
165
166	$u = $v = pack("C*", 0xDF);
167	utf8::upgrade($v);
168	#test for a roundtrip, we should get back from where we left
169	eval {utf8::downgrade( $v )};
170	print "not " if $@ !~ /^Wide / \|\| do{{use bytes; $u eq $v}} \|\| $u ne $v;
171	print "ok 25\n";
172
173	# some more eq, cmp
174
175	$byte=pack("C*", 0xDF);
176
177	print "not " unless pack("U*", 0x3AF) eq $byte;
178	print "ok 26\n";
179
180	print "not " if chr(0xDF) cmp $byte;
181	print "ok 27\n";
182
183	print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) &&
184	((pack("U*", 0x3AE) cmp $byte) == -1) &&
185	((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) &&
186	((pack("U", 0x3AF) cmp pack("C",0xDF,0x20))==-1);
187	print "ok 28\n";
188
189
190	{
191	# Used to core dump in 5.7.3
192	no warnings; # so test goes noiselessly
193	print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n";
194	}
195
196	{
197	my %h1;
198	my %h2;
199	$h1{"\xdf"} = 41;
200	$h2{"\x{3af}"} = 42;
201	print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n";
202	print $h2{"\xdf"} == 42 ? "ok 31\n" : "not ok 31\n";
203	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/essentials/dev-lang/perl/ext/Encode/t/encoding.t@ 3400

Download in other formats: