| 1 | """ Test script for the unicodedata module.
|
|---|
| 2 |
|
|---|
| 3 | Written by Marc-Andre Lemburg ([email protected]).
|
|---|
| 4 |
|
|---|
| 5 | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
|---|
| 6 |
|
|---|
| 7 | """#"
|
|---|
| 8 | import unittest, test.test_support
|
|---|
| 9 | import hashlib
|
|---|
| 10 |
|
|---|
| 11 | encoding = 'utf-8'
|
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 | ### Run tests
|
|---|
| 15 |
|
|---|
| 16 | class UnicodeMethodsTest(unittest.TestCase):
|
|---|
| 17 |
|
|---|
| 18 | # update this, if the database changes
|
|---|
| 19 | expectedchecksum = 'c198ed264497f108434b3f576d4107237221cc8a'
|
|---|
| 20 |
|
|---|
| 21 | def test_method_checksum(self):
|
|---|
| 22 | h = hashlib.sha1()
|
|---|
| 23 | for i in range(65536):
|
|---|
| 24 | char = unichr(i)
|
|---|
| 25 | data = [
|
|---|
| 26 | # Predicates (single char)
|
|---|
| 27 | u"01"[char.isalnum()],
|
|---|
| 28 | u"01"[char.isalpha()],
|
|---|
| 29 | u"01"[char.isdecimal()],
|
|---|
| 30 | u"01"[char.isdigit()],
|
|---|
| 31 | u"01"[char.islower()],
|
|---|
| 32 | u"01"[char.isnumeric()],
|
|---|
| 33 | u"01"[char.isspace()],
|
|---|
| 34 | u"01"[char.istitle()],
|
|---|
| 35 | u"01"[char.isupper()],
|
|---|
| 36 |
|
|---|
| 37 | # Predicates (multiple chars)
|
|---|
| 38 | u"01"[(char + u'abc').isalnum()],
|
|---|
| 39 | u"01"[(char + u'abc').isalpha()],
|
|---|
| 40 | u"01"[(char + u'123').isdecimal()],
|
|---|
| 41 | u"01"[(char + u'123').isdigit()],
|
|---|
| 42 | u"01"[(char + u'abc').islower()],
|
|---|
| 43 | u"01"[(char + u'123').isnumeric()],
|
|---|
| 44 | u"01"[(char + u' \t').isspace()],
|
|---|
| 45 | u"01"[(char + u'abc').istitle()],
|
|---|
| 46 | u"01"[(char + u'ABC').isupper()],
|
|---|
| 47 |
|
|---|
| 48 | # Mappings (single char)
|
|---|
| 49 | char.lower(),
|
|---|
| 50 | char.upper(),
|
|---|
| 51 | char.title(),
|
|---|
| 52 |
|
|---|
| 53 | # Mappings (multiple chars)
|
|---|
| 54 | (char + u'abc').lower(),
|
|---|
| 55 | (char + u'ABC').upper(),
|
|---|
| 56 | (char + u'abc').title(),
|
|---|
| 57 | (char + u'ABC').title(),
|
|---|
| 58 |
|
|---|
| 59 | ]
|
|---|
| 60 | h.update(u''.join(data).encode(encoding))
|
|---|
| 61 | result = h.hexdigest()
|
|---|
| 62 | self.assertEqual(result, self.expectedchecksum)
|
|---|
| 63 |
|
|---|
| 64 | class UnicodeDatabaseTest(unittest.TestCase):
|
|---|
| 65 |
|
|---|
| 66 | def setUp(self):
|
|---|
| 67 | # In case unicodedata is not available, this will raise an ImportError,
|
|---|
| 68 | # but the other test cases will still be run
|
|---|
| 69 | import unicodedata
|
|---|
| 70 | self.db = unicodedata
|
|---|
| 71 |
|
|---|
| 72 | def tearDown(self):
|
|---|
| 73 | del self.db
|
|---|
| 74 |
|
|---|
| 75 | class UnicodeFunctionsTest(UnicodeDatabaseTest):
|
|---|
| 76 |
|
|---|
| 77 | # update this, if the database changes
|
|---|
| 78 | expectedchecksum = '4e389f97e9f88b8b7ab743121fd643089116f9f2'
|
|---|
| 79 |
|
|---|
| 80 | def test_function_checksum(self):
|
|---|
| 81 | data = []
|
|---|
| 82 | h = hashlib.sha1()
|
|---|
| 83 |
|
|---|
| 84 | for i in range(0x10000):
|
|---|
| 85 | char = unichr(i)
|
|---|
| 86 | data = [
|
|---|
| 87 | # Properties
|
|---|
| 88 | str(self.db.digit(char, -1)),
|
|---|
| 89 | str(self.db.numeric(char, -1)),
|
|---|
| 90 | str(self.db.decimal(char, -1)),
|
|---|
| 91 | self.db.category(char),
|
|---|
| 92 | self.db.bidirectional(char),
|
|---|
| 93 | self.db.decomposition(char),
|
|---|
| 94 | str(self.db.mirrored(char)),
|
|---|
| 95 | str(self.db.combining(char)),
|
|---|
| 96 | ]
|
|---|
| 97 | h.update(''.join(data))
|
|---|
| 98 | result = h.hexdigest()
|
|---|
| 99 | self.assertEqual(result, self.expectedchecksum)
|
|---|
| 100 |
|
|---|
| 101 | def test_digit(self):
|
|---|
| 102 | self.assertEqual(self.db.digit(u'A', None), None)
|
|---|
| 103 | self.assertEqual(self.db.digit(u'9'), 9)
|
|---|
| 104 | self.assertEqual(self.db.digit(u'\u215b', None), None)
|
|---|
| 105 | self.assertEqual(self.db.digit(u'\u2468'), 9)
|
|---|
| 106 |
|
|---|
| 107 | self.assertRaises(TypeError, self.db.digit)
|
|---|
| 108 | self.assertRaises(TypeError, self.db.digit, u'xx')
|
|---|
| 109 | self.assertRaises(ValueError, self.db.digit, u'x')
|
|---|
| 110 |
|
|---|
| 111 | def test_numeric(self):
|
|---|
| 112 | self.assertEqual(self.db.numeric(u'A',None), None)
|
|---|
| 113 | self.assertEqual(self.db.numeric(u'9'), 9)
|
|---|
| 114 | self.assertEqual(self.db.numeric(u'\u215b'), 0.125)
|
|---|
| 115 | self.assertEqual(self.db.numeric(u'\u2468'), 9.0)
|
|---|
| 116 |
|
|---|
| 117 | self.assertRaises(TypeError, self.db.numeric)
|
|---|
| 118 | self.assertRaises(TypeError, self.db.numeric, u'xx')
|
|---|
| 119 | self.assertRaises(ValueError, self.db.numeric, u'x')
|
|---|
| 120 |
|
|---|
| 121 | def test_decimal(self):
|
|---|
| 122 | self.assertEqual(self.db.decimal(u'A',None), None)
|
|---|
| 123 | self.assertEqual(self.db.decimal(u'9'), 9)
|
|---|
| 124 | self.assertEqual(self.db.decimal(u'\u215b', None), None)
|
|---|
| 125 | self.assertEqual(self.db.decimal(u'\u2468', None), None)
|
|---|
| 126 |
|
|---|
| 127 | self.assertRaises(TypeError, self.db.decimal)
|
|---|
| 128 | self.assertRaises(TypeError, self.db.decimal, u'xx')
|
|---|
| 129 | self.assertRaises(ValueError, self.db.decimal, u'x')
|
|---|
| 130 |
|
|---|
| 131 | def test_category(self):
|
|---|
| 132 | self.assertEqual(self.db.category(u'\uFFFE'), 'Cn')
|
|---|
| 133 | self.assertEqual(self.db.category(u'a'), 'Ll')
|
|---|
| 134 | self.assertEqual(self.db.category(u'A'), 'Lu')
|
|---|
| 135 |
|
|---|
| 136 | self.assertRaises(TypeError, self.db.category)
|
|---|
| 137 | self.assertRaises(TypeError, self.db.category, u'xx')
|
|---|
| 138 |
|
|---|
| 139 | def test_bidirectional(self):
|
|---|
| 140 | self.assertEqual(self.db.bidirectional(u'\uFFFE'), '')
|
|---|
| 141 | self.assertEqual(self.db.bidirectional(u' '), 'WS')
|
|---|
| 142 | self.assertEqual(self.db.bidirectional(u'A'), 'L')
|
|---|
| 143 |
|
|---|
| 144 | self.assertRaises(TypeError, self.db.bidirectional)
|
|---|
| 145 | self.assertRaises(TypeError, self.db.bidirectional, u'xx')
|
|---|
| 146 |
|
|---|
| 147 | def test_decomposition(self):
|
|---|
| 148 | self.assertEqual(self.db.decomposition(u'\uFFFE'),'')
|
|---|
| 149 | self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034')
|
|---|
| 150 |
|
|---|
| 151 | self.assertRaises(TypeError, self.db.decomposition)
|
|---|
| 152 | self.assertRaises(TypeError, self.db.decomposition, u'xx')
|
|---|
| 153 |
|
|---|
| 154 | def test_mirrored(self):
|
|---|
| 155 | self.assertEqual(self.db.mirrored(u'\uFFFE'), 0)
|
|---|
| 156 | self.assertEqual(self.db.mirrored(u'a'), 0)
|
|---|
| 157 | self.assertEqual(self.db.mirrored(u'\u2201'), 1)
|
|---|
| 158 |
|
|---|
| 159 | self.assertRaises(TypeError, self.db.mirrored)
|
|---|
| 160 | self.assertRaises(TypeError, self.db.mirrored, u'xx')
|
|---|
| 161 |
|
|---|
| 162 | def test_combining(self):
|
|---|
| 163 | self.assertEqual(self.db.combining(u'\uFFFE'), 0)
|
|---|
| 164 | self.assertEqual(self.db.combining(u'a'), 0)
|
|---|
| 165 | self.assertEqual(self.db.combining(u'\u20e1'), 230)
|
|---|
| 166 |
|
|---|
| 167 | self.assertRaises(TypeError, self.db.combining)
|
|---|
| 168 | self.assertRaises(TypeError, self.db.combining, u'xx')
|
|---|
| 169 |
|
|---|
| 170 | def test_normalize(self):
|
|---|
| 171 | self.assertRaises(TypeError, self.db.normalize)
|
|---|
| 172 | self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx')
|
|---|
| 173 | self.assertEqual(self.db.normalize('NFKC', u''), u'')
|
|---|
| 174 | # The rest can be found in test_normalization.py
|
|---|
| 175 | # which requires an external file.
|
|---|
| 176 |
|
|---|
| 177 | def test_east_asian_width(self):
|
|---|
| 178 | eaw = self.db.east_asian_width
|
|---|
| 179 | self.assertRaises(TypeError, eaw, 'a')
|
|---|
| 180 | self.assertRaises(TypeError, eaw, u'')
|
|---|
| 181 | self.assertRaises(TypeError, eaw, u'ra')
|
|---|
| 182 | self.assertEqual(eaw(u'\x1e'), 'N')
|
|---|
| 183 | self.assertEqual(eaw(u'\x20'), 'Na')
|
|---|
| 184 | self.assertEqual(eaw(u'\uC894'), 'W')
|
|---|
| 185 | self.assertEqual(eaw(u'\uFF66'), 'H')
|
|---|
| 186 | self.assertEqual(eaw(u'\uFF1F'), 'F')
|
|---|
| 187 | self.assertEqual(eaw(u'\u2010'), 'A')
|
|---|
| 188 |
|
|---|
| 189 | class UnicodeMiscTest(UnicodeDatabaseTest):
|
|---|
| 190 |
|
|---|
| 191 | def test_decimal_numeric_consistent(self):
|
|---|
| 192 | # Test that decimal and numeric are consistent,
|
|---|
| 193 | # i.e. if a character has a decimal value,
|
|---|
| 194 | # its numeric value should be the same.
|
|---|
| 195 | count = 0
|
|---|
| 196 | for i in xrange(0x10000):
|
|---|
| 197 | c = unichr(i)
|
|---|
| 198 | dec = self.db.decimal(c, -1)
|
|---|
| 199 | if dec != -1:
|
|---|
| 200 | self.assertEqual(dec, self.db.numeric(c))
|
|---|
| 201 | count += 1
|
|---|
| 202 | self.assert_(count >= 10) # should have tested at least the ASCII digits
|
|---|
| 203 |
|
|---|
| 204 | def test_digit_numeric_consistent(self):
|
|---|
| 205 | # Test that digit and numeric are consistent,
|
|---|
| 206 | # i.e. if a character has a digit value,
|
|---|
| 207 | # its numeric value should be the same.
|
|---|
| 208 | count = 0
|
|---|
| 209 | for i in xrange(0x10000):
|
|---|
| 210 | c = unichr(i)
|
|---|
| 211 | dec = self.db.digit(c, -1)
|
|---|
| 212 | if dec != -1:
|
|---|
| 213 | self.assertEqual(dec, self.db.numeric(c))
|
|---|
| 214 | count += 1
|
|---|
| 215 | self.assert_(count >= 10) # should have tested at least the ASCII digits
|
|---|
| 216 |
|
|---|
| 217 | def test_main():
|
|---|
| 218 | test.test_support.run_unittest(
|
|---|
| 219 | UnicodeMiscTest,
|
|---|
| 220 | UnicodeMethodsTest,
|
|---|
| 221 | UnicodeFunctionsTest
|
|---|
| 222 | )
|
|---|
| 223 |
|
|---|
| 224 | if __name__ == "__main__":
|
|---|
| 225 | test_main()
|
|---|