| 1 | """ Test script for the Unicode implementation.
|
|---|
| 2 |
|
|---|
| 3 | Written by Bill Tutt.
|
|---|
| 4 | Modified for Python 2.0 by Fredrik Lundh ([email protected])
|
|---|
| 5 |
|
|---|
| 6 | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
|---|
| 7 |
|
|---|
| 8 | """#"
|
|---|
| 9 |
|
|---|
| 10 | import unittest
|
|---|
| 11 |
|
|---|
| 12 | from test import test_support
|
|---|
| 13 |
|
|---|
| 14 | class UnicodeNamesTest(unittest.TestCase):
|
|---|
| 15 |
|
|---|
| 16 | def checkletter(self, name, code):
|
|---|
| 17 | # Helper that put all \N escapes inside eval'd raw strings,
|
|---|
| 18 | # to make sure this script runs even if the compiler
|
|---|
| 19 | # chokes on \N escapes
|
|---|
| 20 | res = eval(ur'u"\N{%s}"' % name)
|
|---|
| 21 | self.assertEqual(res, code)
|
|---|
| 22 | return res
|
|---|
| 23 |
|
|---|
| 24 | def test_general(self):
|
|---|
| 25 | # General and case insensitivity test:
|
|---|
| 26 | chars = [
|
|---|
| 27 | "LATIN CAPITAL LETTER T",
|
|---|
| 28 | "LATIN SMALL LETTER H",
|
|---|
| 29 | "LATIN SMALL LETTER E",
|
|---|
| 30 | "SPACE",
|
|---|
| 31 | "LATIN SMALL LETTER R",
|
|---|
| 32 | "LATIN CAPITAL LETTER E",
|
|---|
| 33 | "LATIN SMALL LETTER D",
|
|---|
| 34 | "SPACE",
|
|---|
| 35 | "LATIN SMALL LETTER f",
|
|---|
| 36 | "LATIN CAPITAL LeTtEr o",
|
|---|
| 37 | "LATIN SMaLl LETTER x",
|
|---|
| 38 | "SPACE",
|
|---|
| 39 | "LATIN SMALL LETTER A",
|
|---|
| 40 | "LATIN SMALL LETTER T",
|
|---|
| 41 | "LATIN SMALL LETTER E",
|
|---|
| 42 | "SPACE",
|
|---|
| 43 | "LATIN SMALL LETTER T",
|
|---|
| 44 | "LATIN SMALL LETTER H",
|
|---|
| 45 | "LATIN SMALL LETTER E",
|
|---|
| 46 | "SpAcE",
|
|---|
| 47 | "LATIN SMALL LETTER S",
|
|---|
| 48 | "LATIN SMALL LETTER H",
|
|---|
| 49 | "LATIN small LETTER e",
|
|---|
| 50 | "LATIN small LETTER e",
|
|---|
| 51 | "LATIN SMALL LETTER P",
|
|---|
| 52 | "FULL STOP"
|
|---|
| 53 | ]
|
|---|
| 54 | string = u"The rEd fOx ate the sheep."
|
|---|
| 55 |
|
|---|
| 56 | self.assertEqual(
|
|---|
| 57 | u"".join([self.checkletter(*args) for args in zip(chars, string)]),
|
|---|
| 58 | string
|
|---|
| 59 | )
|
|---|
| 60 |
|
|---|
| 61 | def test_ascii_letters(self):
|
|---|
| 62 | import unicodedata
|
|---|
| 63 |
|
|---|
| 64 | for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
|
|---|
| 65 | name = "LATIN SMALL LETTER %s" % char.upper()
|
|---|
| 66 | code = unicodedata.lookup(name)
|
|---|
| 67 | self.assertEqual(unicodedata.name(code), name)
|
|---|
| 68 |
|
|---|
| 69 | def test_hangul_syllables(self):
|
|---|
| 70 | self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
|
|---|
| 71 | self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
|
|---|
| 72 | self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
|
|---|
| 73 | self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
|
|---|
| 74 | self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
|
|---|
| 75 | self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
|
|---|
| 76 | self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
|
|---|
| 77 | self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
|
|---|
| 78 | self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
|
|---|
| 79 | self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
|
|---|
| 80 | self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
|
|---|
| 81 | self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
|
|---|
| 82 | self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")
|
|---|
| 83 |
|
|---|
| 84 | import unicodedata
|
|---|
| 85 | self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")
|
|---|
| 86 |
|
|---|
| 87 | def test_cjk_unified_ideographs(self):
|
|---|
| 88 | self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400")
|
|---|
| 89 | self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5")
|
|---|
| 90 | self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00")
|
|---|
| 91 | self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5")
|
|---|
| 92 | self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000")
|
|---|
| 93 | self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6")
|
|---|
| 94 |
|
|---|
| 95 | def test_bmp_characters(self):
|
|---|
| 96 | import unicodedata
|
|---|
| 97 | count = 0
|
|---|
| 98 | for code in xrange(0x10000):
|
|---|
| 99 | char = unichr(code)
|
|---|
| 100 | name = unicodedata.name(char, None)
|
|---|
| 101 | if name is not None:
|
|---|
| 102 | self.assertEqual(unicodedata.lookup(name), char)
|
|---|
| 103 | count += 1
|
|---|
| 104 |
|
|---|
| 105 | def test_misc_symbols(self):
|
|---|
| 106 | self.checkletter("PILCROW SIGN", u"\u00b6")
|
|---|
| 107 | self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD")
|
|---|
| 108 | self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F")
|
|---|
| 109 | self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41")
|
|---|
| 110 |
|
|---|
| 111 | def test_errors(self):
|
|---|
| 112 | import unicodedata
|
|---|
| 113 | self.assertRaises(TypeError, unicodedata.name)
|
|---|
| 114 | self.assertRaises(TypeError, unicodedata.name, u'xx')
|
|---|
| 115 | self.assertRaises(TypeError, unicodedata.lookup)
|
|---|
| 116 | self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
|
|---|
| 117 |
|
|---|
| 118 | def test_strict_eror_handling(self):
|
|---|
| 119 | # bogus character name
|
|---|
| 120 | self.assertRaises(
|
|---|
| 121 | UnicodeError,
|
|---|
| 122 | unicode, "\\N{blah}", 'unicode-escape', 'strict'
|
|---|
| 123 | )
|
|---|
| 124 | # long bogus character name
|
|---|
| 125 | self.assertRaises(
|
|---|
| 126 | UnicodeError,
|
|---|
| 127 | unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
|
|---|
| 128 | )
|
|---|
| 129 | # missing closing brace
|
|---|
| 130 | self.assertRaises(
|
|---|
| 131 | UnicodeError,
|
|---|
| 132 | unicode, "\\N{SPACE", 'unicode-escape', 'strict'
|
|---|
| 133 | )
|
|---|
| 134 | # missing opening brace
|
|---|
| 135 | self.assertRaises(
|
|---|
| 136 | UnicodeError,
|
|---|
| 137 | unicode, "\\NSPACE", 'unicode-escape', 'strict'
|
|---|
| 138 | )
|
|---|
| 139 |
|
|---|
| 140 | def test_main():
|
|---|
| 141 | test_support.run_unittest(UnicodeNamesTest)
|
|---|
| 142 |
|
|---|
| 143 | if __name__ == "__main__":
|
|---|
| 144 | test_main()
|
|---|