| 1 | #ifndef Py_UNICODEOBJECT_H
|
|---|
| 2 | #define Py_UNICODEOBJECT_H
|
|---|
| 3 |
|
|---|
| 4 | /*
|
|---|
| 5 |
|
|---|
| 6 | Unicode implementation based on original code by Fredrik Lundh,
|
|---|
| 7 | modified by Marc-Andre Lemburg ([email protected]) according to the
|
|---|
| 8 | Unicode Integration Proposal (see file Misc/unicode.txt).
|
|---|
| 9 |
|
|---|
| 10 | Copyright (c) Corporation for National Research Initiatives.
|
|---|
| 11 |
|
|---|
| 12 |
|
|---|
| 13 | Original header:
|
|---|
| 14 | --------------------------------------------------------------------
|
|---|
| 15 |
|
|---|
| 16 | * Yet another Unicode string type for Python. This type supports the
|
|---|
| 17 | * 16-bit Basic Multilingual Plane (BMP) only.
|
|---|
| 18 | *
|
|---|
| 19 | * Written by Fredrik Lundh, January 1999.
|
|---|
| 20 | *
|
|---|
| 21 | * Copyright (c) 1999 by Secret Labs AB.
|
|---|
| 22 | * Copyright (c) 1999 by Fredrik Lundh.
|
|---|
| 23 | *
|
|---|
| 24 | * [email protected]
|
|---|
| 25 | * http://www.pythonware.com
|
|---|
| 26 | *
|
|---|
| 27 | * --------------------------------------------------------------------
|
|---|
| 28 | * This Unicode String Type is
|
|---|
| 29 | *
|
|---|
| 30 | * Copyright (c) 1999 by Secret Labs AB
|
|---|
| 31 | * Copyright (c) 1999 by Fredrik Lundh
|
|---|
| 32 | *
|
|---|
| 33 | * By obtaining, using, and/or copying this software and/or its
|
|---|
| 34 | * associated documentation, you agree that you have read, understood,
|
|---|
| 35 | * and will comply with the following terms and conditions:
|
|---|
| 36 | *
|
|---|
| 37 | * Permission to use, copy, modify, and distribute this software and its
|
|---|
| 38 | * associated documentation for any purpose and without fee is hereby
|
|---|
| 39 | * granted, provided that the above copyright notice appears in all
|
|---|
| 40 | * copies, and that both that copyright notice and this permission notice
|
|---|
| 41 | * appear in supporting documentation, and that the name of Secret Labs
|
|---|
| 42 | * AB or the author not be used in advertising or publicity pertaining to
|
|---|
| 43 | * distribution of the software without specific, written prior
|
|---|
| 44 | * permission.
|
|---|
| 45 | *
|
|---|
| 46 | * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
|---|
| 47 | * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|---|
| 48 | * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
|
|---|
| 49 | * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|---|
| 50 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|---|
| 51 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
|---|
| 52 | * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|---|
| 53 | * -------------------------------------------------------------------- */
|
|---|
| 54 |
|
|---|
| 55 | #include <ctype.h>
|
|---|
| 56 |
|
|---|
| 57 | /* === Internal API ======================================================= */
|
|---|
| 58 |
|
|---|
| 59 | /* --- Internal Unicode Format -------------------------------------------- */
|
|---|
| 60 |
|
|---|
| 61 | #ifndef Py_USING_UNICODE
|
|---|
| 62 |
|
|---|
| 63 | #define PyUnicode_Check(op) 0
|
|---|
| 64 | #define PyUnicode_CheckExact(op) 0
|
|---|
| 65 |
|
|---|
| 66 | #else
|
|---|
| 67 |
|
|---|
| 68 | /* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
|
|---|
| 69 | properly set, but the default rules below doesn't set it. I'll
|
|---|
| 70 | sort this out some other day -- [email protected] */
|
|---|
| 71 |
|
|---|
| 72 | #ifndef Py_UNICODE_SIZE
|
|---|
| 73 | #error Must define Py_UNICODE_SIZE
|
|---|
| 74 | #endif
|
|---|
| 75 |
|
|---|
| 76 | /* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
|
|---|
| 77 | strings are stored as UCS-2 (with limited support for UTF-16) */
|
|---|
| 78 |
|
|---|
| 79 | #if Py_UNICODE_SIZE >= 4
|
|---|
| 80 | #define Py_UNICODE_WIDE
|
|---|
| 81 | #endif
|
|---|
| 82 |
|
|---|
| 83 | /* Set these flags if the platform has "wchar.h", "wctype.h" and the
|
|---|
| 84 | wchar_t type is a 16-bit unsigned type */
|
|---|
| 85 | /* #define HAVE_WCHAR_H */
|
|---|
| 86 | /* #define HAVE_USABLE_WCHAR_T */
|
|---|
| 87 |
|
|---|
| 88 | /* Defaults for various platforms */
|
|---|
| 89 | #ifndef PY_UNICODE_TYPE
|
|---|
| 90 |
|
|---|
| 91 | /* Windows has a usable wchar_t type (unless we're using UCS-4) */
|
|---|
| 92 | # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
|
|---|
| 93 | # define HAVE_USABLE_WCHAR_T
|
|---|
| 94 | # define PY_UNICODE_TYPE wchar_t
|
|---|
| 95 | # endif
|
|---|
| 96 |
|
|---|
| 97 | # if defined(Py_UNICODE_WIDE)
|
|---|
| 98 | # define PY_UNICODE_TYPE Py_UCS4
|
|---|
| 99 | # endif
|
|---|
| 100 |
|
|---|
| 101 | #endif
|
|---|
| 102 |
|
|---|
| 103 | /* If the compiler provides a wchar_t type we try to support it
|
|---|
| 104 | through the interface functions PyUnicode_FromWideChar() and
|
|---|
| 105 | PyUnicode_AsWideChar(). */
|
|---|
| 106 |
|
|---|
| 107 | #ifdef HAVE_USABLE_WCHAR_T
|
|---|
| 108 | # ifndef HAVE_WCHAR_H
|
|---|
| 109 | # define HAVE_WCHAR_H
|
|---|
| 110 | # endif
|
|---|
| 111 | #endif
|
|---|
| 112 |
|
|---|
| 113 | #ifdef HAVE_WCHAR_H
|
|---|
| 114 | /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
|
|---|
| 115 | # ifdef _HAVE_BSDI
|
|---|
| 116 | # include <time.h>
|
|---|
| 117 | # endif
|
|---|
| 118 | # include <wchar.h>
|
|---|
| 119 | #endif
|
|---|
| 120 |
|
|---|
| 121 | /*
|
|---|
| 122 | * Use this typedef when you need to represent a UTF-16 surrogate pair
|
|---|
| 123 | * as single unsigned integer.
|
|---|
| 124 | */
|
|---|
| 125 | #if SIZEOF_INT >= 4
|
|---|
| 126 | typedef unsigned int Py_UCS4;
|
|---|
| 127 | #elif SIZEOF_LONG >= 4
|
|---|
| 128 | typedef unsigned long Py_UCS4;
|
|---|
| 129 | #endif
|
|---|
| 130 |
|
|---|
| 131 | typedef PY_UNICODE_TYPE Py_UNICODE;
|
|---|
| 132 |
|
|---|
| 133 | /* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
|
|---|
| 134 |
|
|---|
| 135 | /* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
|
|---|
| 136 | produce different external names and thus cause import errors in
|
|---|
| 137 | case Python interpreters and extensions with mixed compiled in
|
|---|
| 138 | Unicode width assumptions are combined. */
|
|---|
| 139 |
|
|---|
| 140 | #ifndef Py_UNICODE_WIDE
|
|---|
| 141 |
|
|---|
| 142 | # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
|
|---|
| 143 | # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
|
|---|
| 144 | # define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
|
|---|
| 145 | # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
|
|---|
| 146 | # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
|
|---|
| 147 | # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
|
|---|
| 148 | # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
|
|---|
| 149 | # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
|
|---|
| 150 | # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
|
|---|
| 151 | # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
|
|---|
| 152 | # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
|
|---|
| 153 | # define PyUnicode_Compare PyUnicodeUCS2_Compare
|
|---|
| 154 | # define PyUnicode_Concat PyUnicodeUCS2_Concat
|
|---|
| 155 | # define PyUnicode_Contains PyUnicodeUCS2_Contains
|
|---|
| 156 | # define PyUnicode_Count PyUnicodeUCS2_Count
|
|---|
| 157 | # define PyUnicode_Decode PyUnicodeUCS2_Decode
|
|---|
| 158 | # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
|
|---|
| 159 | # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
|
|---|
| 160 | # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
|
|---|
| 161 | # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
|
|---|
| 162 | # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
|
|---|
| 163 | # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
|
|---|
| 164 | # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
|
|---|
| 165 | # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
|
|---|
| 166 | # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
|
|---|
| 167 | # define PyUnicode_Encode PyUnicodeUCS2_Encode
|
|---|
| 168 | # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
|
|---|
| 169 | # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
|
|---|
| 170 | # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
|
|---|
| 171 | # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
|
|---|
| 172 | # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
|
|---|
| 173 | # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
|
|---|
| 174 | # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
|
|---|
| 175 | # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
|
|---|
| 176 | # define PyUnicode_Find PyUnicodeUCS2_Find
|
|---|
| 177 | # define PyUnicode_Format PyUnicodeUCS2_Format
|
|---|
| 178 | # define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
|
|---|
| 179 | # define PyUnicode_FromObject PyUnicodeUCS2_FromObject
|
|---|
| 180 | # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
|
|---|
| 181 | # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
|
|---|
| 182 | # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
|
|---|
| 183 | # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
|
|---|
| 184 | # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
|
|---|
| 185 | # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
|
|---|
| 186 | # define PyUnicode_Join PyUnicodeUCS2_Join
|
|---|
| 187 | # define PyUnicode_Partition PyUnicodeUCS2_Partition
|
|---|
| 188 | # define PyUnicode_RPartition PyUnicodeUCS2_RPartition
|
|---|
| 189 | # define PyUnicode_RSplit PyUnicodeUCS2_RSplit
|
|---|
| 190 | # define PyUnicode_Replace PyUnicodeUCS2_Replace
|
|---|
| 191 | # define PyUnicode_Resize PyUnicodeUCS2_Resize
|
|---|
| 192 | # define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
|
|---|
| 193 | # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
|
|---|
| 194 | # define PyUnicode_Split PyUnicodeUCS2_Split
|
|---|
| 195 | # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
|
|---|
| 196 | # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
|
|---|
| 197 | # define PyUnicode_Translate PyUnicodeUCS2_Translate
|
|---|
| 198 | # define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
|
|---|
| 199 | # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
|
|---|
| 200 | # define _PyUnicode_Fini _PyUnicodeUCS2_Fini
|
|---|
| 201 | # define _PyUnicode_Init _PyUnicodeUCS2_Init
|
|---|
| 202 | # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
|
|---|
| 203 | # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
|
|---|
| 204 | # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
|
|---|
| 205 | # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
|
|---|
| 206 | # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
|
|---|
| 207 | # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
|
|---|
| 208 | # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
|
|---|
| 209 | # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
|
|---|
| 210 | # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
|
|---|
| 211 | # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
|
|---|
| 212 | # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
|
|---|
| 213 | # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
|
|---|
| 214 | # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
|
|---|
| 215 | # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
|
|---|
| 216 | # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
|
|---|
| 217 |
|
|---|
| 218 | #else
|
|---|
| 219 |
|
|---|
| 220 | # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
|
|---|
| 221 | # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
|
|---|
| 222 | # define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
|
|---|
| 223 | # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
|
|---|
| 224 | # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
|
|---|
| 225 | # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
|
|---|
| 226 | # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
|
|---|
| 227 | # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
|
|---|
| 228 | # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
|
|---|
| 229 | # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
|
|---|
| 230 | # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
|
|---|
| 231 | # define PyUnicode_Compare PyUnicodeUCS4_Compare
|
|---|
| 232 | # define PyUnicode_Concat PyUnicodeUCS4_Concat
|
|---|
| 233 | # define PyUnicode_Contains PyUnicodeUCS4_Contains
|
|---|
| 234 | # define PyUnicode_Count PyUnicodeUCS4_Count
|
|---|
| 235 | # define PyUnicode_Decode PyUnicodeUCS4_Decode
|
|---|
| 236 | # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
|
|---|
| 237 | # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
|
|---|
| 238 | # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
|
|---|
| 239 | # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
|
|---|
| 240 | # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
|
|---|
| 241 | # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
|
|---|
| 242 | # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
|
|---|
| 243 | # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
|
|---|
| 244 | # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
|
|---|
| 245 | # define PyUnicode_Encode PyUnicodeUCS4_Encode
|
|---|
| 246 | # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
|
|---|
| 247 | # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
|
|---|
| 248 | # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
|
|---|
| 249 | # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
|
|---|
| 250 | # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
|
|---|
| 251 | # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
|
|---|
| 252 | # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
|
|---|
| 253 | # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
|
|---|
| 254 | # define PyUnicode_Find PyUnicodeUCS4_Find
|
|---|
| 255 | # define PyUnicode_Format PyUnicodeUCS4_Format
|
|---|
| 256 | # define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
|
|---|
| 257 | # define PyUnicode_FromObject PyUnicodeUCS4_FromObject
|
|---|
| 258 | # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
|
|---|
| 259 | # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
|
|---|
| 260 | # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
|
|---|
| 261 | # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
|
|---|
| 262 | # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
|
|---|
| 263 | # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
|
|---|
| 264 | # define PyUnicode_Join PyUnicodeUCS4_Join
|
|---|
| 265 | # define PyUnicode_Partition PyUnicodeUCS4_Partition
|
|---|
| 266 | # define PyUnicode_RPartition PyUnicodeUCS4_RPartition
|
|---|
| 267 | # define PyUnicode_RSplit PyUnicodeUCS4_RSplit
|
|---|
| 268 | # define PyUnicode_Replace PyUnicodeUCS4_Replace
|
|---|
| 269 | # define PyUnicode_Resize PyUnicodeUCS4_Resize
|
|---|
| 270 | # define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
|
|---|
| 271 | # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
|
|---|
| 272 | # define PyUnicode_Split PyUnicodeUCS4_Split
|
|---|
| 273 | # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
|
|---|
| 274 | # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
|
|---|
| 275 | # define PyUnicode_Translate PyUnicodeUCS4_Translate
|
|---|
| 276 | # define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
|
|---|
| 277 | # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
|
|---|
| 278 | # define _PyUnicode_Fini _PyUnicodeUCS4_Fini
|
|---|
| 279 | # define _PyUnicode_Init _PyUnicodeUCS4_Init
|
|---|
| 280 | # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
|
|---|
| 281 | # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
|
|---|
| 282 | # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
|
|---|
| 283 | # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
|
|---|
| 284 | # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
|
|---|
| 285 | # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
|
|---|
| 286 | # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
|
|---|
| 287 | # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
|
|---|
| 288 | # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
|
|---|
| 289 | # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
|
|---|
| 290 | # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
|
|---|
| 291 | # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
|
|---|
| 292 | # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
|
|---|
| 293 | # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
|
|---|
| 294 | # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
|
|---|
| 295 |
|
|---|
| 296 |
|
|---|
| 297 | #endif
|
|---|
| 298 |
|
|---|
| 299 | /* --- Internal Unicode Operations ---------------------------------------- */
|
|---|
| 300 |
|
|---|
| 301 | /* If you want Python to use the compiler's wctype.h functions instead
|
|---|
| 302 | of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
|
|---|
| 303 | configure Python using --with-wctype-functions. This reduces the
|
|---|
| 304 | interpreter's code size. */
|
|---|
| 305 |
|
|---|
| 306 | #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
|
|---|
| 307 |
|
|---|
| 308 | #include <wctype.h>
|
|---|
| 309 |
|
|---|
| 310 | #define Py_UNICODE_ISSPACE(ch) iswspace(ch)
|
|---|
| 311 |
|
|---|
| 312 | #define Py_UNICODE_ISLOWER(ch) iswlower(ch)
|
|---|
| 313 | #define Py_UNICODE_ISUPPER(ch) iswupper(ch)
|
|---|
| 314 | #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
|
|---|
| 315 | #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
|
|---|
| 316 |
|
|---|
| 317 | #define Py_UNICODE_TOLOWER(ch) towlower(ch)
|
|---|
| 318 | #define Py_UNICODE_TOUPPER(ch) towupper(ch)
|
|---|
| 319 | #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
|
|---|
| 320 |
|
|---|
| 321 | #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
|
|---|
| 322 | #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
|
|---|
| 323 | #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
|
|---|
| 324 |
|
|---|
| 325 | #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
|
|---|
| 326 | #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
|
|---|
| 327 | #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
|
|---|
| 328 |
|
|---|
| 329 | #define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
|
|---|
| 330 |
|
|---|
| 331 | #else
|
|---|
| 332 |
|
|---|
| 333 | #define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
|
|---|
| 334 |
|
|---|
| 335 | #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
|
|---|
| 336 | #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
|
|---|
| 337 | #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
|
|---|
| 338 | #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
|
|---|
| 339 |
|
|---|
| 340 | #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
|
|---|
| 341 | #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
|
|---|
| 342 | #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
|
|---|
| 343 |
|
|---|
| 344 | #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
|
|---|
| 345 | #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
|
|---|
| 346 | #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
|
|---|
| 347 |
|
|---|
| 348 | #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
|
|---|
| 349 | #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
|
|---|
| 350 | #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
|
|---|
| 351 |
|
|---|
| 352 | #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
|
|---|
| 353 |
|
|---|
| 354 | #endif
|
|---|
| 355 |
|
|---|
| 356 | #define Py_UNICODE_ISALNUM(ch) \
|
|---|
| 357 | (Py_UNICODE_ISALPHA(ch) || \
|
|---|
| 358 | Py_UNICODE_ISDECIMAL(ch) || \
|
|---|
| 359 | Py_UNICODE_ISDIGIT(ch) || \
|
|---|
| 360 | Py_UNICODE_ISNUMERIC(ch))
|
|---|
| 361 |
|
|---|
| 362 | #define Py_UNICODE_COPY(target, source, length) \
|
|---|
| 363 | Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
|
|---|
| 364 |
|
|---|
| 365 | #define Py_UNICODE_FILL(target, value, length) do\
|
|---|
| 366 | {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
|
|---|
| 367 | for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
|
|---|
| 368 | } while (0)
|
|---|
| 369 |
|
|---|
| 370 | /* check if substring matches at given offset. the offset must be
|
|---|
| 371 | valid, and the substring must not be empty */
|
|---|
| 372 | #define Py_UNICODE_MATCH(string, offset, substring) \
|
|---|
| 373 | ((*((string)->str + (offset)) == *((substring)->str)) && \
|
|---|
| 374 | ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
|
|---|
| 375 | !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
|
|---|
| 376 |
|
|---|
| 377 | #ifdef __cplusplus
|
|---|
| 378 | extern "C" {
|
|---|
| 379 | #endif
|
|---|
| 380 |
|
|---|
| 381 | /* --- Unicode Type ------------------------------------------------------- */
|
|---|
| 382 |
|
|---|
| 383 | typedef struct {
|
|---|
| 384 | PyObject_HEAD
|
|---|
| 385 | Py_ssize_t length; /* Length of raw Unicode data in buffer */
|
|---|
| 386 | Py_UNICODE *str; /* Raw Unicode buffer */
|
|---|
| 387 | long hash; /* Hash value; -1 if not set */
|
|---|
| 388 | PyObject *defenc; /* (Default) Encoded version as Python
|
|---|
| 389 | string, or NULL; this is used for
|
|---|
| 390 | implementing the buffer protocol */
|
|---|
| 391 | } PyUnicodeObject;
|
|---|
| 392 |
|
|---|
| 393 | PyAPI_DATA(PyTypeObject) PyUnicode_Type;
|
|---|
| 394 |
|
|---|
| 395 | #define PyUnicode_Check(op) PyObject_TypeCheck(op, &PyUnicode_Type)
|
|---|
| 396 | #define PyUnicode_CheckExact(op) ((op)->ob_type == &PyUnicode_Type)
|
|---|
| 397 |
|
|---|
| 398 | /* Fast access macros */
|
|---|
| 399 | #define PyUnicode_GET_SIZE(op) \
|
|---|
| 400 | (((PyUnicodeObject *)(op))->length)
|
|---|
| 401 | #define PyUnicode_GET_DATA_SIZE(op) \
|
|---|
| 402 | (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
|
|---|
| 403 | #define PyUnicode_AS_UNICODE(op) \
|
|---|
| 404 | (((PyUnicodeObject *)(op))->str)
|
|---|
| 405 | #define PyUnicode_AS_DATA(op) \
|
|---|
| 406 | ((const char *)((PyUnicodeObject *)(op))->str)
|
|---|
| 407 |
|
|---|
| 408 | /* --- Constants ---------------------------------------------------------- */
|
|---|
| 409 |
|
|---|
| 410 | /* This Unicode character will be used as replacement character during
|
|---|
| 411 | decoding if the errors argument is set to "replace". Note: the
|
|---|
| 412 | Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
|
|---|
| 413 | Unicode 3.0. */
|
|---|
| 414 |
|
|---|
| 415 | #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
|
|---|
| 416 |
|
|---|
| 417 | /* === Public API ========================================================= */
|
|---|
| 418 |
|
|---|
| 419 | /* --- Plain Py_UNICODE --------------------------------------------------- */
|
|---|
| 420 |
|
|---|
| 421 | /* Create a Unicode Object from the Py_UNICODE buffer u of the given
|
|---|
| 422 | size.
|
|---|
| 423 |
|
|---|
| 424 | u may be NULL which causes the contents to be undefined. It is the
|
|---|
| 425 | user's responsibility to fill in the needed data afterwards. Note
|
|---|
| 426 | that modifying the Unicode object contents after construction is
|
|---|
| 427 | only allowed if u was set to NULL.
|
|---|
| 428 |
|
|---|
| 429 | The buffer is copied into the new object. */
|
|---|
| 430 |
|
|---|
| 431 | PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
|
|---|
| 432 | const Py_UNICODE *u, /* Unicode buffer */
|
|---|
| 433 | Py_ssize_t size /* size of buffer */
|
|---|
| 434 | );
|
|---|
| 435 |
|
|---|
| 436 | /* Return a read-only pointer to the Unicode object's internal
|
|---|
| 437 | Py_UNICODE buffer. */
|
|---|
| 438 |
|
|---|
| 439 | PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
|
|---|
| 440 | PyObject *unicode /* Unicode object */
|
|---|
| 441 | );
|
|---|
| 442 |
|
|---|
| 443 | /* Get the length of the Unicode object. */
|
|---|
| 444 |
|
|---|
| 445 | PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
|
|---|
| 446 | PyObject *unicode /* Unicode object */
|
|---|
| 447 | );
|
|---|
| 448 |
|
|---|
| 449 | /* Get the maximum ordinal for a Unicode character. */
|
|---|
| 450 | PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
|
|---|
| 451 |
|
|---|
| 452 | /* Resize an already allocated Unicode object to the new size length.
|
|---|
| 453 |
|
|---|
| 454 | *unicode is modified to point to the new (resized) object and 0
|
|---|
| 455 | returned on success.
|
|---|
| 456 |
|
|---|
| 457 | This API may only be called by the function which also called the
|
|---|
| 458 | Unicode constructor. The refcount on the object must be 1. Otherwise,
|
|---|
| 459 | an error is returned.
|
|---|
| 460 |
|
|---|
| 461 | Error handling is implemented as follows: an exception is set, -1
|
|---|
| 462 | is returned and *unicode left untouched.
|
|---|
| 463 |
|
|---|
| 464 | */
|
|---|
| 465 |
|
|---|
| 466 | PyAPI_FUNC(int) PyUnicode_Resize(
|
|---|
| 467 | PyObject **unicode, /* Pointer to the Unicode object */
|
|---|
| 468 | Py_ssize_t length /* New length */
|
|---|
| 469 | );
|
|---|
| 470 |
|
|---|
| 471 | /* Coerce obj to an Unicode object and return a reference with
|
|---|
| 472 | *incremented* refcount.
|
|---|
| 473 |
|
|---|
| 474 | Coercion is done in the following way:
|
|---|
| 475 |
|
|---|
| 476 | 1. String and other char buffer compatible objects are decoded
|
|---|
| 477 | under the assumptions that they contain data using the current
|
|---|
| 478 | default encoding. Decoding is done in "strict" mode.
|
|---|
| 479 |
|
|---|
| 480 | 2. All other objects (including Unicode objects) raise an
|
|---|
| 481 | exception.
|
|---|
| 482 |
|
|---|
| 483 | The API returns NULL in case of an error. The caller is responsible
|
|---|
| 484 | for decref'ing the returned objects.
|
|---|
| 485 |
|
|---|
| 486 | */
|
|---|
| 487 |
|
|---|
| 488 | PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
|
|---|
| 489 | register PyObject *obj, /* Object */
|
|---|
| 490 | const char *encoding, /* encoding */
|
|---|
| 491 | const char *errors /* error handling */
|
|---|
| 492 | );
|
|---|
| 493 |
|
|---|
| 494 | /* Coerce obj to an Unicode object and return a reference with
|
|---|
| 495 | *incremented* refcount.
|
|---|
| 496 |
|
|---|
| 497 | Unicode objects are passed back as-is (subclasses are converted to
|
|---|
| 498 | true Unicode objects), all other objects are delegated to
|
|---|
| 499 | PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
|
|---|
| 500 | using the default encoding as basis for decoding the object.
|
|---|
| 501 |
|
|---|
| 502 | The API returns NULL in case of an error. The caller is responsible
|
|---|
| 503 | for decref'ing the returned objects.
|
|---|
| 504 |
|
|---|
| 505 | */
|
|---|
| 506 |
|
|---|
| 507 | PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
|
|---|
| 508 | register PyObject *obj /* Object */
|
|---|
| 509 | );
|
|---|
| 510 |
|
|---|
| 511 | /* --- wchar_t support for platforms which support it --------------------- */
|
|---|
| 512 |
|
|---|
| 513 | #ifdef HAVE_WCHAR_H
|
|---|
| 514 |
|
|---|
| 515 | /* Create a Unicode Object from the whcar_t buffer w of the given
|
|---|
| 516 | size.
|
|---|
| 517 |
|
|---|
| 518 | The buffer is copied into the new object. */
|
|---|
| 519 |
|
|---|
| 520 | PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
|
|---|
| 521 | register const wchar_t *w, /* wchar_t buffer */
|
|---|
| 522 | Py_ssize_t size /* size of buffer */
|
|---|
| 523 | );
|
|---|
| 524 |
|
|---|
| 525 | /* Copies the Unicode Object contents into the wchar_t buffer w. At
|
|---|
| 526 | most size wchar_t characters are copied.
|
|---|
| 527 |
|
|---|
| 528 | Note that the resulting wchar_t string may or may not be
|
|---|
| 529 | 0-terminated. It is the responsibility of the caller to make sure
|
|---|
| 530 | that the wchar_t string is 0-terminated in case this is required by
|
|---|
| 531 | the application.
|
|---|
| 532 |
|
|---|
| 533 | Returns the number of wchar_t characters copied (excluding a
|
|---|
| 534 | possibly trailing 0-termination character) or -1 in case of an
|
|---|
| 535 | error. */
|
|---|
| 536 |
|
|---|
| 537 | PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
|
|---|
| 538 | PyUnicodeObject *unicode, /* Unicode object */
|
|---|
| 539 | register wchar_t *w, /* wchar_t buffer */
|
|---|
| 540 | Py_ssize_t size /* size of buffer */
|
|---|
| 541 | );
|
|---|
| 542 |
|
|---|
| 543 | #endif
|
|---|
| 544 |
|
|---|
| 545 | /* --- Unicode ordinals --------------------------------------------------- */
|
|---|
| 546 |
|
|---|
| 547 | /* Create a Unicode Object from the given Unicode code point ordinal.
|
|---|
| 548 |
|
|---|
| 549 | The ordinal must be in range(0x10000) on narrow Python builds
|
|---|
| 550 | (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
|
|---|
| 551 | raised in case it is not.
|
|---|
| 552 |
|
|---|
| 553 | */
|
|---|
| 554 |
|
|---|
| 555 | PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
|
|---|
| 556 |
|
|---|
| 557 | /* === Builtin Codecs =====================================================
|
|---|
| 558 |
|
|---|
| 559 | Many of these APIs take two arguments encoding and errors. These
|
|---|
| 560 | parameters encoding and errors have the same semantics as the ones
|
|---|
| 561 | of the builtin unicode() API.
|
|---|
| 562 |
|
|---|
| 563 | Setting encoding to NULL causes the default encoding to be used.
|
|---|
| 564 |
|
|---|
| 565 | Error handling is set by errors which may also be set to NULL
|
|---|
| 566 | meaning to use the default handling defined for the codec. Default
|
|---|
| 567 | error handling for all builtin codecs is "strict" (ValueErrors are
|
|---|
| 568 | raised).
|
|---|
| 569 |
|
|---|
| 570 | The codecs all use a similar interface. Only deviation from the
|
|---|
| 571 | generic ones are documented.
|
|---|
| 572 |
|
|---|
| 573 | */
|
|---|
| 574 |
|
|---|
| 575 | /* --- Manage the default encoding ---------------------------------------- */
|
|---|
| 576 |
|
|---|
| 577 | /* Return a Python string holding the default encoded value of the
|
|---|
| 578 | Unicode object.
|
|---|
| 579 |
|
|---|
| 580 | The resulting string is cached in the Unicode object for subsequent
|
|---|
| 581 | usage by this function. The cached version is needed to implement
|
|---|
| 582 | the character buffer interface and will live (at least) as long as
|
|---|
| 583 | the Unicode object itself.
|
|---|
| 584 |
|
|---|
| 585 | The refcount of the string is *not* incremented.
|
|---|
| 586 |
|
|---|
| 587 | *** Exported for internal use by the interpreter only !!! ***
|
|---|
| 588 |
|
|---|
| 589 | */
|
|---|
| 590 |
|
|---|
| 591 | PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
|
|---|
| 592 | PyObject *, const char *);
|
|---|
| 593 |
|
|---|
| 594 | /* Returns the currently active default encoding.
|
|---|
| 595 |
|
|---|
| 596 | The default encoding is currently implemented as run-time settable
|
|---|
| 597 | process global. This may change in future versions of the
|
|---|
| 598 | interpreter to become a parameter which is managed on a per-thread
|
|---|
| 599 | basis.
|
|---|
| 600 |
|
|---|
| 601 | */
|
|---|
| 602 |
|
|---|
| 603 | PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
|
|---|
| 604 |
|
|---|
| 605 | /* Sets the currently active default encoding.
|
|---|
| 606 |
|
|---|
| 607 | Returns 0 on success, -1 in case of an error.
|
|---|
| 608 |
|
|---|
| 609 | */
|
|---|
| 610 |
|
|---|
| 611 | PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding(
|
|---|
| 612 | const char *encoding /* Encoding name in standard form */
|
|---|
| 613 | );
|
|---|
| 614 |
|
|---|
| 615 | /* --- Generic Codecs ----------------------------------------------------- */
|
|---|
| 616 |
|
|---|
| 617 | /* Create a Unicode object by decoding the encoded string s of the
|
|---|
| 618 | given size. */
|
|---|
| 619 |
|
|---|
| 620 | PyAPI_FUNC(PyObject*) PyUnicode_Decode(
|
|---|
| 621 | const char *s, /* encoded string */
|
|---|
| 622 | Py_ssize_t size, /* size of buffer */
|
|---|
| 623 | const char *encoding, /* encoding */
|
|---|
| 624 | const char *errors /* error handling */
|
|---|
| 625 | );
|
|---|
| 626 |
|
|---|
| 627 | /* Encodes a Py_UNICODE buffer of the given size and returns a
|
|---|
| 628 | Python string object. */
|
|---|
| 629 |
|
|---|
| 630 | PyAPI_FUNC(PyObject*) PyUnicode_Encode(
|
|---|
| 631 | const Py_UNICODE *s, /* Unicode char buffer */
|
|---|
| 632 | Py_ssize_t size, /* number of Py_UNICODE chars to encode */
|
|---|
| 633 | const char *encoding, /* encoding */
|
|---|
| 634 | const char *errors /* error handling */
|
|---|
| 635 | );
|
|---|
| 636 |
|
|---|
| 637 | /* Encodes a Unicode object and returns the result as Python
|
|---|
| 638 | object. */
|
|---|
| 639 |
|
|---|
| 640 | PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
|
|---|
| 641 | PyObject *unicode, /* Unicode object */
|
|---|
| 642 | const char *encoding, /* encoding */
|
|---|
| 643 | const char *errors /* error handling */
|
|---|
| 644 | );
|
|---|
| 645 |
|
|---|
| 646 | /* Encodes a Unicode object and returns the result as Python string
|
|---|
| 647 | object. */
|
|---|
| 648 |
|
|---|
| 649 | PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
|
|---|
| 650 | PyObject *unicode, /* Unicode object */
|
|---|
| 651 | const char *encoding, /* encoding */
|
|---|
| 652 | const char *errors /* error handling */
|
|---|
| 653 | );
|
|---|
| 654 |
|
|---|
| 655 | PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
|
|---|
| 656 | PyObject* string /* 256 character map */
|
|---|
| 657 | );
|
|---|
| 658 |
|
|---|
| 659 |
|
|---|
| 660 | /* --- UTF-7 Codecs ------------------------------------------------------- */
|
|---|
| 661 |
|
|---|
| 662 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
|
|---|
| 663 | const char *string, /* UTF-7 encoded string */
|
|---|
| 664 | Py_ssize_t length, /* size of string */
|
|---|
| 665 | const char *errors /* error handling */
|
|---|
| 666 | );
|
|---|
| 667 |
|
|---|
| 668 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
|
|---|
| 669 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 670 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
|---|
| 671 | int encodeSetO, /* force the encoder to encode characters in
|
|---|
| 672 | Set O, as described in RFC2152 */
|
|---|
| 673 | int encodeWhiteSpace, /* force the encoder to encode space, tab,
|
|---|
| 674 | carriage return and linefeed characters */
|
|---|
| 675 | const char *errors /* error handling */
|
|---|
| 676 | );
|
|---|
| 677 |
|
|---|
| 678 | /* --- UTF-8 Codecs ------------------------------------------------------- */
|
|---|
| 679 |
|
|---|
| 680 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
|
|---|
| 681 | const char *string, /* UTF-8 encoded string */
|
|---|
| 682 | Py_ssize_t length, /* size of string */
|
|---|
| 683 | const char *errors /* error handling */
|
|---|
| 684 | );
|
|---|
| 685 |
|
|---|
| 686 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
|
|---|
| 687 | const char *string, /* UTF-8 encoded string */
|
|---|
| 688 | Py_ssize_t length, /* size of string */
|
|---|
| 689 | const char *errors, /* error handling */
|
|---|
| 690 | Py_ssize_t *consumed /* bytes consumed */
|
|---|
| 691 | );
|
|---|
| 692 |
|
|---|
| 693 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
|
|---|
| 694 | PyObject *unicode /* Unicode object */
|
|---|
| 695 | );
|
|---|
| 696 |
|
|---|
| 697 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
|
|---|
| 698 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 699 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
|---|
| 700 | const char *errors /* error handling */
|
|---|
| 701 | );
|
|---|
| 702 |
|
|---|
| 703 | /* --- UTF-16 Codecs ------------------------------------------------------ */
|
|---|
| 704 |
|
|---|
| 705 | /* Decodes length bytes from a UTF-16 encoded buffer string and returns
|
|---|
| 706 | the corresponding Unicode object.
|
|---|
| 707 |
|
|---|
| 708 | errors (if non-NULL) defines the error handling. It defaults
|
|---|
| 709 | to "strict".
|
|---|
| 710 |
|
|---|
| 711 | If byteorder is non-NULL, the decoder starts decoding using the
|
|---|
| 712 | given byte order:
|
|---|
| 713 |
|
|---|
| 714 | *byteorder == -1: little endian
|
|---|
| 715 | *byteorder == 0: native order
|
|---|
| 716 | *byteorder == 1: big endian
|
|---|
| 717 |
|
|---|
| 718 | In native mode, the first two bytes of the stream are checked for a
|
|---|
| 719 | BOM mark. If found, the BOM mark is analysed, the byte order
|
|---|
| 720 | adjusted and the BOM skipped. In the other modes, no BOM mark
|
|---|
| 721 | interpretation is done. After completion, *byteorder is set to the
|
|---|
| 722 | current byte order at the end of input data.
|
|---|
| 723 |
|
|---|
| 724 | If byteorder is NULL, the codec starts in native order mode.
|
|---|
| 725 |
|
|---|
| 726 | */
|
|---|
| 727 |
|
|---|
| 728 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
|
|---|
| 729 | const char *string, /* UTF-16 encoded string */
|
|---|
| 730 | Py_ssize_t length, /* size of string */
|
|---|
| 731 | const char *errors, /* error handling */
|
|---|
| 732 | int *byteorder /* pointer to byteorder to use
|
|---|
| 733 | 0=native;-1=LE,1=BE; updated on
|
|---|
| 734 | exit */
|
|---|
| 735 | );
|
|---|
| 736 |
|
|---|
| 737 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
|
|---|
| 738 | const char *string, /* UTF-16 encoded string */
|
|---|
| 739 | Py_ssize_t length, /* size of string */
|
|---|
| 740 | const char *errors, /* error handling */
|
|---|
| 741 | int *byteorder, /* pointer to byteorder to use
|
|---|
| 742 | 0=native;-1=LE,1=BE; updated on
|
|---|
| 743 | exit */
|
|---|
| 744 | Py_ssize_t *consumed /* bytes consumed */
|
|---|
| 745 | );
|
|---|
| 746 |
|
|---|
| 747 | /* Returns a Python string using the UTF-16 encoding in native byte
|
|---|
| 748 | order. The string always starts with a BOM mark. */
|
|---|
| 749 |
|
|---|
| 750 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
|
|---|
| 751 | PyObject *unicode /* Unicode object */
|
|---|
| 752 | );
|
|---|
| 753 |
|
|---|
| 754 | /* Returns a Python string object holding the UTF-16 encoded value of
|
|---|
| 755 | the Unicode data.
|
|---|
| 756 |
|
|---|
| 757 | If byteorder is not 0, output is written according to the following
|
|---|
| 758 | byte order:
|
|---|
| 759 |
|
|---|
| 760 | byteorder == -1: little endian
|
|---|
| 761 | byteorder == 0: native byte order (writes a BOM mark)
|
|---|
| 762 | byteorder == 1: big endian
|
|---|
| 763 |
|
|---|
| 764 | If byteorder is 0, the output string will always start with the
|
|---|
| 765 | Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
|
|---|
| 766 | prepended.
|
|---|
| 767 |
|
|---|
| 768 | Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
|
|---|
| 769 | UCS-2. This trick makes it possible to add full UTF-16 capabilities
|
|---|
| 770 | at a later point without compromising the APIs.
|
|---|
| 771 |
|
|---|
| 772 | */
|
|---|
| 773 |
|
|---|
| 774 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
|
|---|
| 775 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 776 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */
|
|---|
| 777 | const char *errors, /* error handling */
|
|---|
| 778 | int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
|
|---|
| 779 | );
|
|---|
| 780 |
|
|---|
| 781 | /* --- Unicode-Escape Codecs ---------------------------------------------- */
|
|---|
| 782 |
|
|---|
| 783 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
|
|---|
| 784 | const char *string, /* Unicode-Escape encoded string */
|
|---|
| 785 | Py_ssize_t length, /* size of string */
|
|---|
| 786 | const char *errors /* error handling */
|
|---|
| 787 | );
|
|---|
| 788 |
|
|---|
| 789 | PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
|
|---|
| 790 | PyObject *unicode /* Unicode object */
|
|---|
| 791 | );
|
|---|
| 792 |
|
|---|
| 793 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
|
|---|
| 794 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 795 | Py_ssize_t length /* Number of Py_UNICODE chars to encode */
|
|---|
| 796 | );
|
|---|
| 797 |
|
|---|
| 798 | /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
|
|---|
| 799 |
|
|---|
| 800 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
|
|---|
| 801 | const char *string, /* Raw-Unicode-Escape encoded string */
|
|---|
| 802 | Py_ssize_t length, /* size of string */
|
|---|
| 803 | const char *errors /* error handling */
|
|---|
| 804 | );
|
|---|
| 805 |
|
|---|
| 806 | PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
|
|---|
| 807 | PyObject *unicode /* Unicode object */
|
|---|
| 808 | );
|
|---|
| 809 |
|
|---|
| 810 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
|
|---|
| 811 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 812 | Py_ssize_t length /* Number of Py_UNICODE chars to encode */
|
|---|
| 813 | );
|
|---|
| 814 |
|
|---|
| 815 | /* --- Unicode Internal Codec ---------------------------------------------
|
|---|
| 816 |
|
|---|
| 817 | Only for internal use in _codecsmodule.c */
|
|---|
| 818 |
|
|---|
| 819 | PyObject *_PyUnicode_DecodeUnicodeInternal(
|
|---|
| 820 | const char *string,
|
|---|
| 821 | Py_ssize_t length,
|
|---|
| 822 | const char *errors
|
|---|
| 823 | );
|
|---|
| 824 |
|
|---|
| 825 | /* --- Latin-1 Codecs -----------------------------------------------------
|
|---|
| 826 |
|
|---|
| 827 | Note: Latin-1 corresponds to the first 256 Unicode ordinals.
|
|---|
| 828 |
|
|---|
| 829 | */
|
|---|
| 830 |
|
|---|
| 831 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
|
|---|
| 832 | const char *string, /* Latin-1 encoded string */
|
|---|
| 833 | Py_ssize_t length, /* size of string */
|
|---|
| 834 | const char *errors /* error handling */
|
|---|
| 835 | );
|
|---|
| 836 |
|
|---|
| 837 | PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
|
|---|
| 838 | PyObject *unicode /* Unicode object */
|
|---|
| 839 | );
|
|---|
| 840 |
|
|---|
| 841 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
|
|---|
| 842 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 843 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
|---|
| 844 | const char *errors /* error handling */
|
|---|
| 845 | );
|
|---|
| 846 |
|
|---|
| 847 | /* --- ASCII Codecs -------------------------------------------------------
|
|---|
| 848 |
|
|---|
| 849 | Only 7-bit ASCII data is excepted. All other codes generate errors.
|
|---|
| 850 |
|
|---|
| 851 | */
|
|---|
| 852 |
|
|---|
| 853 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
|
|---|
| 854 | const char *string, /* ASCII encoded string */
|
|---|
| 855 | Py_ssize_t length, /* size of string */
|
|---|
| 856 | const char *errors /* error handling */
|
|---|
| 857 | );
|
|---|
| 858 |
|
|---|
| 859 | PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
|
|---|
| 860 | PyObject *unicode /* Unicode object */
|
|---|
| 861 | );
|
|---|
| 862 |
|
|---|
| 863 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
|
|---|
| 864 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 865 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
|---|
| 866 | const char *errors /* error handling */
|
|---|
| 867 | );
|
|---|
| 868 |
|
|---|
| 869 | /* --- Character Map Codecs -----------------------------------------------
|
|---|
| 870 |
|
|---|
| 871 | This codec uses mappings to encode and decode characters.
|
|---|
| 872 |
|
|---|
| 873 | Decoding mappings must map single string characters to single
|
|---|
| 874 | Unicode characters, integers (which are then interpreted as Unicode
|
|---|
| 875 | ordinals) or None (meaning "undefined mapping" and causing an
|
|---|
| 876 | error).
|
|---|
| 877 |
|
|---|
| 878 | Encoding mappings must map single Unicode characters to single
|
|---|
| 879 | string characters, integers (which are then interpreted as Latin-1
|
|---|
| 880 | ordinals) or None (meaning "undefined mapping" and causing an
|
|---|
| 881 | error).
|
|---|
| 882 |
|
|---|
| 883 | If a character lookup fails with a LookupError, the character is
|
|---|
| 884 | copied as-is meaning that its ordinal value will be interpreted as
|
|---|
| 885 | Unicode or Latin-1 ordinal resp. Because of this mappings only need
|
|---|
| 886 | to contain those mappings which map characters to different code
|
|---|
| 887 | points.
|
|---|
| 888 |
|
|---|
| 889 | */
|
|---|
| 890 |
|
|---|
| 891 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
|
|---|
| 892 | const char *string, /* Encoded string */
|
|---|
| 893 | Py_ssize_t length, /* size of string */
|
|---|
| 894 | PyObject *mapping, /* character mapping
|
|---|
| 895 | (char ordinal -> unicode ordinal) */
|
|---|
| 896 | const char *errors /* error handling */
|
|---|
| 897 | );
|
|---|
| 898 |
|
|---|
| 899 | PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
|
|---|
| 900 | PyObject *unicode, /* Unicode object */
|
|---|
| 901 | PyObject *mapping /* character mapping
|
|---|
| 902 | (unicode ordinal -> char ordinal) */
|
|---|
| 903 | );
|
|---|
| 904 |
|
|---|
| 905 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
|
|---|
| 906 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 907 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
|---|
| 908 | PyObject *mapping, /* character mapping
|
|---|
| 909 | (unicode ordinal -> char ordinal) */
|
|---|
| 910 | const char *errors /* error handling */
|
|---|
| 911 | );
|
|---|
| 912 |
|
|---|
| 913 | /* Translate a Py_UNICODE buffer of the given length by applying a
|
|---|
| 914 | character mapping table to it and return the resulting Unicode
|
|---|
| 915 | object.
|
|---|
| 916 |
|
|---|
| 917 | The mapping table must map Unicode ordinal integers to Unicode
|
|---|
| 918 | ordinal integers or None (causing deletion of the character).
|
|---|
| 919 |
|
|---|
| 920 | Mapping tables may be dictionaries or sequences. Unmapped character
|
|---|
| 921 | ordinals (ones which cause a LookupError) are left untouched and
|
|---|
| 922 | are copied as-is.
|
|---|
| 923 |
|
|---|
| 924 | */
|
|---|
| 925 |
|
|---|
| 926 | PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
|
|---|
| 927 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 928 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
|---|
| 929 | PyObject *table, /* Translate table */
|
|---|
| 930 | const char *errors /* error handling */
|
|---|
| 931 | );
|
|---|
| 932 |
|
|---|
| 933 | #ifdef MS_WIN32
|
|---|
| 934 |
|
|---|
| 935 | /* --- MBCS codecs for Windows -------------------------------------------- */
|
|---|
| 936 |
|
|---|
| 937 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
|
|---|
| 938 | const char *string, /* MBCS encoded string */
|
|---|
| 939 | Py_ssize_t length, /* size of string */
|
|---|
| 940 | const char *errors /* error handling */
|
|---|
| 941 | );
|
|---|
| 942 |
|
|---|
| 943 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
|
|---|
| 944 | const char *string, /* MBCS encoded string */
|
|---|
| 945 | Py_ssize_t length, /* size of string */
|
|---|
| 946 | const char *errors, /* error handling */
|
|---|
| 947 | Py_ssize_t *consumed /* bytes consumed */
|
|---|
| 948 | );
|
|---|
| 949 |
|
|---|
| 950 | PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
|
|---|
| 951 | PyObject *unicode /* Unicode object */
|
|---|
| 952 | );
|
|---|
| 953 |
|
|---|
| 954 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
|
|---|
| 955 | const Py_UNICODE *data, /* Unicode char buffer */
|
|---|
| 956 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
|---|
| 957 | const char *errors /* error handling */
|
|---|
| 958 | );
|
|---|
| 959 |
|
|---|
| 960 | #endif /* MS_WIN32 */
|
|---|
| 961 |
|
|---|
| 962 | /* --- Decimal Encoder ---------------------------------------------------- */
|
|---|
| 963 |
|
|---|
| 964 | /* Takes a Unicode string holding a decimal value and writes it into
|
|---|
| 965 | an output buffer using standard ASCII digit codes.
|
|---|
| 966 |
|
|---|
| 967 | The output buffer has to provide at least length+1 bytes of storage
|
|---|
| 968 | area. The output string is 0-terminated.
|
|---|
| 969 |
|
|---|
| 970 | The encoder converts whitespace to ' ', decimal characters to their
|
|---|
| 971 | corresponding ASCII digit and all other Latin-1 characters except
|
|---|
| 972 | \0 as-is. Characters outside this range (Unicode ordinals 1-256)
|
|---|
| 973 | are treated as errors. This includes embedded NULL bytes.
|
|---|
| 974 |
|
|---|
| 975 | Error handling is defined by the errors argument:
|
|---|
| 976 |
|
|---|
| 977 | NULL or "strict": raise a ValueError
|
|---|
| 978 | "ignore": ignore the wrong characters (these are not copied to the
|
|---|
| 979 | output buffer)
|
|---|
| 980 | "replace": replaces illegal characters with '?'
|
|---|
| 981 |
|
|---|
| 982 | Returns 0 on success, -1 on failure.
|
|---|
| 983 |
|
|---|
| 984 | */
|
|---|
| 985 |
|
|---|
| 986 | PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
|
|---|
| 987 | Py_UNICODE *s, /* Unicode buffer */
|
|---|
| 988 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
|
|---|
| 989 | char *output, /* Output buffer; must have size >= length */
|
|---|
| 990 | const char *errors /* error handling */
|
|---|
| 991 | );
|
|---|
| 992 |
|
|---|
| 993 | /* --- Methods & Slots ----------------------------------------------------
|
|---|
| 994 |
|
|---|
| 995 | These are capable of handling Unicode objects and strings on input
|
|---|
| 996 | (we refer to them as strings in the descriptions) and return
|
|---|
| 997 | Unicode objects or integers as apporpriate. */
|
|---|
| 998 |
|
|---|
| 999 | /* Concat two strings giving a new Unicode string. */
|
|---|
| 1000 |
|
|---|
| 1001 | PyAPI_FUNC(PyObject*) PyUnicode_Concat(
|
|---|
| 1002 | PyObject *left, /* Left string */
|
|---|
| 1003 | PyObject *right /* Right string */
|
|---|
| 1004 | );
|
|---|
| 1005 |
|
|---|
| 1006 | /* Split a string giving a list of Unicode strings.
|
|---|
| 1007 |
|
|---|
| 1008 | If sep is NULL, splitting will be done at all whitespace
|
|---|
| 1009 | substrings. Otherwise, splits occur at the given separator.
|
|---|
| 1010 |
|
|---|
| 1011 | At most maxsplit splits will be done. If negative, no limit is set.
|
|---|
| 1012 |
|
|---|
| 1013 | Separators are not included in the resulting list.
|
|---|
| 1014 |
|
|---|
| 1015 | */
|
|---|
| 1016 |
|
|---|
| 1017 | PyAPI_FUNC(PyObject*) PyUnicode_Split(
|
|---|
| 1018 | PyObject *s, /* String to split */
|
|---|
| 1019 | PyObject *sep, /* String separator */
|
|---|
| 1020 | Py_ssize_t maxsplit /* Maxsplit count */
|
|---|
| 1021 | );
|
|---|
| 1022 |
|
|---|
| 1023 | /* Dito, but split at line breaks.
|
|---|
| 1024 |
|
|---|
| 1025 | CRLF is considered to be one line break. Line breaks are not
|
|---|
| 1026 | included in the resulting list. */
|
|---|
| 1027 |
|
|---|
| 1028 | PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
|
|---|
| 1029 | PyObject *s, /* String to split */
|
|---|
| 1030 | int keepends /* If true, line end markers are included */
|
|---|
| 1031 | );
|
|---|
| 1032 |
|
|---|
| 1033 | /* Partition a string using a given separator. */
|
|---|
| 1034 |
|
|---|
| 1035 | PyAPI_FUNC(PyObject*) PyUnicode_Partition(
|
|---|
| 1036 | PyObject *s, /* String to partition */
|
|---|
| 1037 | PyObject *sep /* String separator */
|
|---|
| 1038 | );
|
|---|
| 1039 |
|
|---|
| 1040 | /* Partition a string using a given separator, searching from the end of the
|
|---|
| 1041 | string. */
|
|---|
| 1042 |
|
|---|
| 1043 | PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
|
|---|
| 1044 | PyObject *s, /* String to partition */
|
|---|
| 1045 | PyObject *sep /* String separator */
|
|---|
| 1046 | );
|
|---|
| 1047 |
|
|---|
| 1048 | /* Split a string giving a list of Unicode strings.
|
|---|
| 1049 |
|
|---|
| 1050 | If sep is NULL, splitting will be done at all whitespace
|
|---|
| 1051 | substrings. Otherwise, splits occur at the given separator.
|
|---|
| 1052 |
|
|---|
| 1053 | At most maxsplit splits will be done. But unlike PyUnicode_Split
|
|---|
| 1054 | PyUnicode_RSplit splits from the end of the string. If negative,
|
|---|
| 1055 | no limit is set.
|
|---|
| 1056 |
|
|---|
| 1057 | Separators are not included in the resulting list.
|
|---|
| 1058 |
|
|---|
| 1059 | */
|
|---|
| 1060 |
|
|---|
| 1061 | PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
|
|---|
| 1062 | PyObject *s, /* String to split */
|
|---|
| 1063 | PyObject *sep, /* String separator */
|
|---|
| 1064 | Py_ssize_t maxsplit /* Maxsplit count */
|
|---|
| 1065 | );
|
|---|
| 1066 |
|
|---|
| 1067 | /* Translate a string by applying a character mapping table to it and
|
|---|
| 1068 | return the resulting Unicode object.
|
|---|
| 1069 |
|
|---|
| 1070 | The mapping table must map Unicode ordinal integers to Unicode
|
|---|
| 1071 | ordinal integers or None (causing deletion of the character).
|
|---|
| 1072 |
|
|---|
| 1073 | Mapping tables may be dictionaries or sequences. Unmapped character
|
|---|
| 1074 | ordinals (ones which cause a LookupError) are left untouched and
|
|---|
| 1075 | are copied as-is.
|
|---|
| 1076 |
|
|---|
| 1077 | */
|
|---|
| 1078 |
|
|---|
| 1079 | PyAPI_FUNC(PyObject *) PyUnicode_Translate(
|
|---|
| 1080 | PyObject *str, /* String */
|
|---|
| 1081 | PyObject *table, /* Translate table */
|
|---|
| 1082 | const char *errors /* error handling */
|
|---|
| 1083 | );
|
|---|
| 1084 |
|
|---|
| 1085 | /* Join a sequence of strings using the given separator and return
|
|---|
| 1086 | the resulting Unicode string. */
|
|---|
| 1087 |
|
|---|
| 1088 | PyAPI_FUNC(PyObject*) PyUnicode_Join(
|
|---|
| 1089 | PyObject *separator, /* Separator string */
|
|---|
| 1090 | PyObject *seq /* Sequence object */
|
|---|
| 1091 | );
|
|---|
| 1092 |
|
|---|
| 1093 | /* Return 1 if substr matches str[start:end] at the given tail end, 0
|
|---|
| 1094 | otherwise. */
|
|---|
| 1095 |
|
|---|
| 1096 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
|
|---|
| 1097 | PyObject *str, /* String */
|
|---|
| 1098 | PyObject *substr, /* Prefix or Suffix string */
|
|---|
| 1099 | Py_ssize_t start, /* Start index */
|
|---|
| 1100 | Py_ssize_t end, /* Stop index */
|
|---|
| 1101 | int direction /* Tail end: -1 prefix, +1 suffix */
|
|---|
| 1102 | );
|
|---|
| 1103 |
|
|---|
| 1104 | /* Return the first position of substr in str[start:end] using the
|
|---|
| 1105 | given search direction or -1 if not found. -2 is returned in case
|
|---|
| 1106 | an error occurred and an exception is set. */
|
|---|
| 1107 |
|
|---|
| 1108 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
|
|---|
| 1109 | PyObject *str, /* String */
|
|---|
| 1110 | PyObject *substr, /* Substring to find */
|
|---|
| 1111 | Py_ssize_t start, /* Start index */
|
|---|
| 1112 | Py_ssize_t end, /* Stop index */
|
|---|
| 1113 | int direction /* Find direction: +1 forward, -1 backward */
|
|---|
| 1114 | );
|
|---|
| 1115 |
|
|---|
| 1116 | /* Count the number of occurrences of substr in str[start:end]. */
|
|---|
| 1117 |
|
|---|
| 1118 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
|
|---|
| 1119 | PyObject *str, /* String */
|
|---|
| 1120 | PyObject *substr, /* Substring to count */
|
|---|
| 1121 | Py_ssize_t start, /* Start index */
|
|---|
| 1122 | Py_ssize_t end /* Stop index */
|
|---|
| 1123 | );
|
|---|
| 1124 |
|
|---|
| 1125 | /* Replace at most maxcount occurrences of substr in str with replstr
|
|---|
| 1126 | and return the resulting Unicode object. */
|
|---|
| 1127 |
|
|---|
| 1128 | PyAPI_FUNC(PyObject *) PyUnicode_Replace(
|
|---|
| 1129 | PyObject *str, /* String */
|
|---|
| 1130 | PyObject *substr, /* Substring to find */
|
|---|
| 1131 | PyObject *replstr, /* Substring to replace */
|
|---|
| 1132 | Py_ssize_t maxcount /* Max. number of replacements to apply;
|
|---|
| 1133 | -1 = all */
|
|---|
| 1134 | );
|
|---|
| 1135 |
|
|---|
| 1136 | /* Compare two strings and return -1, 0, 1 for less than, equal,
|
|---|
| 1137 | greater than resp. */
|
|---|
| 1138 |
|
|---|
| 1139 | PyAPI_FUNC(int) PyUnicode_Compare(
|
|---|
| 1140 | PyObject *left, /* Left string */
|
|---|
| 1141 | PyObject *right /* Right string */
|
|---|
| 1142 | );
|
|---|
| 1143 |
|
|---|
| 1144 | /* Rich compare two strings and return one of the following:
|
|---|
| 1145 |
|
|---|
| 1146 | - NULL in case an exception was raised
|
|---|
| 1147 | - Py_True or Py_False for successfuly comparisons
|
|---|
| 1148 | - Py_NotImplemented in case the type combination is unknown
|
|---|
| 1149 |
|
|---|
| 1150 | Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
|
|---|
| 1151 | case the conversion of the arguments to Unicode fails with a
|
|---|
| 1152 | UnicodeDecodeError.
|
|---|
| 1153 |
|
|---|
| 1154 | Possible values for op:
|
|---|
| 1155 |
|
|---|
| 1156 | Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
|
|---|
| 1157 |
|
|---|
| 1158 | */
|
|---|
| 1159 |
|
|---|
| 1160 | PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
|
|---|
| 1161 | PyObject *left, /* Left string */
|
|---|
| 1162 | PyObject *right, /* Right string */
|
|---|
| 1163 | int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
|
|---|
| 1164 | );
|
|---|
| 1165 |
|
|---|
| 1166 | /* Apply a argument tuple or dictionary to a format string and return
|
|---|
| 1167 | the resulting Unicode string. */
|
|---|
| 1168 |
|
|---|
| 1169 | PyAPI_FUNC(PyObject *) PyUnicode_Format(
|
|---|
| 1170 | PyObject *format, /* Format string */
|
|---|
| 1171 | PyObject *args /* Argument tuple or dictionary */
|
|---|
| 1172 | );
|
|---|
| 1173 |
|
|---|
| 1174 | /* Checks whether element is contained in container and return 1/0
|
|---|
| 1175 | accordingly.
|
|---|
| 1176 |
|
|---|
| 1177 | element has to coerce to an one element Unicode string. -1 is
|
|---|
| 1178 | returned in case of an error. */
|
|---|
| 1179 |
|
|---|
| 1180 | PyAPI_FUNC(int) PyUnicode_Contains(
|
|---|
| 1181 | PyObject *container, /* Container string */
|
|---|
| 1182 | PyObject *element /* Element string */
|
|---|
| 1183 | );
|
|---|
| 1184 |
|
|---|
| 1185 | /* Externally visible for str.strip(unicode) */
|
|---|
| 1186 | PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
|
|---|
| 1187 | PyUnicodeObject *self,
|
|---|
| 1188 | int striptype,
|
|---|
| 1189 | PyObject *sepobj
|
|---|
| 1190 | );
|
|---|
| 1191 |
|
|---|
| 1192 | /* === Characters Type APIs =============================================== */
|
|---|
| 1193 |
|
|---|
| 1194 | /* These should not be used directly. Use the Py_UNICODE_IS* and
|
|---|
| 1195 | Py_UNICODE_TO* macros instead.
|
|---|
| 1196 |
|
|---|
| 1197 | These APIs are implemented in Objects/unicodectype.c.
|
|---|
| 1198 |
|
|---|
| 1199 | */
|
|---|
| 1200 |
|
|---|
| 1201 | PyAPI_FUNC(int) _PyUnicode_IsLowercase(
|
|---|
| 1202 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1203 | );
|
|---|
| 1204 |
|
|---|
| 1205 | PyAPI_FUNC(int) _PyUnicode_IsUppercase(
|
|---|
| 1206 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1207 | );
|
|---|
| 1208 |
|
|---|
| 1209 | PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
|
|---|
| 1210 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1211 | );
|
|---|
| 1212 |
|
|---|
| 1213 | PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
|
|---|
| 1214 | const Py_UNICODE ch /* Unicode character */
|
|---|
| 1215 | );
|
|---|
| 1216 |
|
|---|
| 1217 | PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
|
|---|
| 1218 | const Py_UNICODE ch /* Unicode character */
|
|---|
| 1219 | );
|
|---|
| 1220 |
|
|---|
| 1221 | PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
|
|---|
| 1222 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1223 | );
|
|---|
| 1224 |
|
|---|
| 1225 | PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
|
|---|
| 1226 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1227 | );
|
|---|
| 1228 |
|
|---|
| 1229 | PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
|
|---|
| 1230 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1231 | );
|
|---|
| 1232 |
|
|---|
| 1233 | PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
|
|---|
| 1234 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1235 | );
|
|---|
| 1236 |
|
|---|
| 1237 | PyAPI_FUNC(int) _PyUnicode_ToDigit(
|
|---|
| 1238 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1239 | );
|
|---|
| 1240 |
|
|---|
| 1241 | PyAPI_FUNC(double) _PyUnicode_ToNumeric(
|
|---|
| 1242 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1243 | );
|
|---|
| 1244 |
|
|---|
| 1245 | PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
|
|---|
| 1246 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1247 | );
|
|---|
| 1248 |
|
|---|
| 1249 | PyAPI_FUNC(int) _PyUnicode_IsDigit(
|
|---|
| 1250 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1251 | );
|
|---|
| 1252 |
|
|---|
| 1253 | PyAPI_FUNC(int) _PyUnicode_IsNumeric(
|
|---|
| 1254 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1255 | );
|
|---|
| 1256 |
|
|---|
| 1257 | PyAPI_FUNC(int) _PyUnicode_IsAlpha(
|
|---|
| 1258 | Py_UNICODE ch /* Unicode character */
|
|---|
| 1259 | );
|
|---|
| 1260 |
|
|---|
| 1261 | #ifdef __cplusplus
|
|---|
| 1262 | }
|
|---|
| 1263 | #endif
|
|---|
| 1264 | #endif /* Py_USING_UNICODE */
|
|---|
| 1265 | #endif /* !Py_UNICODEOBJECT_H */
|
|---|