source: trunk/essentials/dev-lang/python/Lib/codecs.py@ 3393

Last change on this file since 3393 was 3225, checked in by bird, 19 years ago

Python 2.5

File size: 31.9 KB
Line 
1""" codecs -- Python Codec Registry, API and helpers.
2
3
4Written by Marc-Andre Lemburg ([email protected]).
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
8"""#"
9
10import __builtin__, sys
11
12### Registry and builtin stateless codec functions
13
14try:
15 from _codecs import *
16except ImportError, why:
17 raise SystemError('Failed to load the builtin codecs: %s' % why)
18
19__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
20 "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
21 "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
22 "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
23 "strict_errors", "ignore_errors", "replace_errors",
24 "xmlcharrefreplace_errors",
25 "register_error", "lookup_error"]
26
27### Constants
28
29#
30# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)
31# and its possible byte string values
32# for UTF8/UTF16/UTF32 output and little/big endian machines
33#
34
35# UTF-8
36BOM_UTF8 = '\xef\xbb\xbf'
37
38# UTF-16, little endian
39BOM_LE = BOM_UTF16_LE = '\xff\xfe'
40
41# UTF-16, big endian
42BOM_BE = BOM_UTF16_BE = '\xfe\xff'
43
44# UTF-32, little endian
45BOM_UTF32_LE = '\xff\xfe\x00\x00'
46
47# UTF-32, big endian
48BOM_UTF32_BE = '\x00\x00\xfe\xff'
49
50if sys.byteorder == 'little':
51
52 # UTF-16, native endianness
53 BOM = BOM_UTF16 = BOM_UTF16_LE
54
55 # UTF-32, native endianness
56 BOM_UTF32 = BOM_UTF32_LE
57
58else:
59
60 # UTF-16, native endianness
61 BOM = BOM_UTF16 = BOM_UTF16_BE
62
63 # UTF-32, native endianness
64 BOM_UTF32 = BOM_UTF32_BE
65
66# Old broken names (don't use in new code)
67BOM32_LE = BOM_UTF16_LE
68BOM32_BE = BOM_UTF16_BE
69BOM64_LE = BOM_UTF32_LE
70BOM64_BE = BOM_UTF32_BE
71
72
73### Codec base classes (defining the API)
74
75class CodecInfo(tuple):
76
77 def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
78 incrementalencoder=None, incrementaldecoder=None, name=None):
79 self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
80 self.name = name
81 self.encode = encode
82 self.decode = decode
83 self.incrementalencoder = incrementalencoder
84 self.incrementaldecoder = incrementaldecoder
85 self.streamwriter = streamwriter
86 self.streamreader = streamreader
87 return self
88
89 def __repr__(self):
90 return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
91