1 | #ifndef Py_UNICODEOBJECT_H |
2 | #define Py_UNICODEOBJECT_H |
3 | |
4 | #include <stdarg.h> |
5 | |
6 | /* |
7 | |
8 | Unicode implementation based on original code by Fredrik Lundh, |
9 | modified by Marc-Andre Lemburg (mal@lemburg.com) according to the |
10 | Unicode Integration Proposal. (See |
11 | http://www.egenix.com/files/python/unicode-proposal.txt). |
12 | |
13 | Copyright (c) Corporation for National Research Initiatives. |
14 | |
15 | |
16 | Original header: |
17 | -------------------------------------------------------------------- |
18 | |
19 | * Yet another Unicode string type for Python. This type supports the |
20 | * 16-bit Basic Multilingual Plane (BMP) only. |
21 | * |
22 | * Written by Fredrik Lundh, January 1999. |
23 | * |
24 | * Copyright (c) 1999 by Secret Labs AB. |
25 | * Copyright (c) 1999 by Fredrik Lundh. |
26 | * |
27 | * fredrik@pythonware.com |
28 | * http://www.pythonware.com |
29 | * |
30 | * -------------------------------------------------------------------- |
31 | * This Unicode String Type is |
32 | * |
33 | * Copyright (c) 1999 by Secret Labs AB |
34 | * Copyright (c) 1999 by Fredrik Lundh |
35 | * |
36 | * By obtaining, using, and/or copying this software and/or its |
37 | * associated documentation, you agree that you have read, understood, |
38 | * and will comply with the following terms and conditions: |
39 | * |
40 | * Permission to use, copy, modify, and distribute this software and its |
41 | * associated documentation for any purpose and without fee is hereby |
42 | * granted, provided that the above copyright notice appears in all |
43 | * copies, and that both that copyright notice and this permission notice |
44 | * appear in supporting documentation, and that the name of Secret Labs |
45 | * AB or the author not be used in advertising or publicity pertaining to |
46 | * distribution of the software without specific, written prior |
47 | * permission. |
48 | * |
49 | * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO |
50 | * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND |
51 | * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR |
52 | * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
53 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
54 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT |
55 | * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
56 | * -------------------------------------------------------------------- */ |
57 | |
58 | #include <ctype.h> |
59 | |
60 | /* === Internal API ======================================================= */ |
61 | |
62 | /* --- Internal Unicode Format -------------------------------------------- */ |
63 | |
64 | /* Python 3.x requires unicode */ |
65 | #define Py_USING_UNICODE |
66 | |
67 | #ifndef SIZEOF_WCHAR_T |
68 | #error Must define SIZEOF_WCHAR_T |
69 | #endif |
70 | |
71 | #define Py_UNICODE_SIZE SIZEOF_WCHAR_T |
72 | |
73 | /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE. |
74 | Otherwise, Unicode strings are stored as UCS-2 (with limited support |
75 | for UTF-16) */ |
76 | |
77 | #if Py_UNICODE_SIZE >= 4 |
78 | #define Py_UNICODE_WIDE |
79 | #endif |
80 | |
81 | /* Set these flags if the platform has "wchar.h" and the |
82 | wchar_t type is a 16-bit unsigned type */ |
83 | /* #define HAVE_WCHAR_H */ |
84 | /* #define HAVE_USABLE_WCHAR_T */ |
85 | |
86 | /* Py_UNICODE was the native Unicode storage format (code unit) used by |
87 | Python and represents a single Unicode element in the Unicode type. |
88 | With PEP 393, Py_UNICODE is deprecated and replaced with a |
89 | typedef to wchar_t. */ |
90 | |
91 | #ifndef Py_LIMITED_API |
92 | #define PY_UNICODE_TYPE wchar_t |
93 | typedef wchar_t Py_UNICODE; |
94 | #endif |
95 | |
96 | /* If the compiler provides a wchar_t type we try to support it |
97 | through the interface functions PyUnicode_FromWideChar(), |
98 | PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ |
99 | |
100 | #ifdef HAVE_USABLE_WCHAR_T |
101 | # ifndef HAVE_WCHAR_H |
102 | # define HAVE_WCHAR_H |
103 | # endif |
104 | #endif |
105 | |
106 | #if defined(MS_WINDOWS) |
107 | # define HAVE_MBCS |
108 | #endif |
109 | |
110 | #ifdef HAVE_WCHAR_H |
111 | /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */ |
112 | # ifdef _HAVE_BSDI |
113 | # include <time.h> |
114 | # endif |
115 | # include <wchar.h> |
116 | #endif |
117 | |
118 | /* Py_UCS4 and Py_UCS2 are typedefs for the respective |
119 | unicode representations. */ |
120 | #if SIZEOF_INT == 4 |
121 | typedef unsigned int Py_UCS4; |
122 | #elif SIZEOF_LONG == 4 |
123 | typedef unsigned long Py_UCS4; |
124 | #else |
125 | #error "Could not find a proper typedef for Py_UCS4" |
126 | #endif |
127 | |
128 | #if SIZEOF_SHORT == 2 |
129 | typedef unsigned short Py_UCS2; |
130 | #else |
131 | #error "Could not find a proper typedef for Py_UCS2" |
132 | #endif |
133 | |
134 | typedef unsigned char Py_UCS1; |
135 | |
136 | /* --- Internal Unicode Operations ---------------------------------------- */ |
137 | |
138 | /* Since splitting on whitespace is an important use case, and |
139 | whitespace in most situations is solely ASCII whitespace, we |
140 | optimize for the common case by using a quick look-up table |
141 | _Py_ascii_whitespace (see below) with an inlined check. |
142 | |
143 | */ |
144 | #ifndef Py_LIMITED_API |
145 | #define Py_UNICODE_ISSPACE(ch) \ |
146 | ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) |
147 | |
148 | #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) |
149 | #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) |
150 | #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) |
151 | #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) |
152 | |
153 | #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) |
154 | #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) |
155 | #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) |
156 | |
157 | #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) |
158 | #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) |
159 | #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) |
160 | #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch) |
161 | |
162 | #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) |
163 | #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) |
164 | #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) |
165 | |
166 | #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) |
167 | |
168 | #define Py_UNICODE_ISALNUM(ch) \ |
169 | (Py_UNICODE_ISALPHA(ch) || \ |
170 | Py_UNICODE_ISDECIMAL(ch) || \ |
171 | Py_UNICODE_ISDIGIT(ch) || \ |
172 | Py_UNICODE_ISNUMERIC(ch)) |
173 | |
174 | #define Py_UNICODE_COPY(target, source, length) \ |
175 | Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE)) |
176 | |
177 | #define Py_UNICODE_FILL(target, value, length) \ |
178 | do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ |
179 | for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ |
180 | } while (0) |
181 | |
182 | /* macros to work with surrogates */ |
183 | #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF) |
184 | #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF) |
185 | #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF) |
186 | /* Join two surrogate characters and return a single Py_UCS4 value. */ |
187 | #define Py_UNICODE_JOIN_SURROGATES(high, low) \ |
188 | (((((Py_UCS4)(high) & 0x03FF) << 10) | \ |
189 | ((Py_UCS4)(low) & 0x03FF)) + 0x10000) |
190 | /* high surrogate = top 10 bits added to D800 */ |
191 | #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10)) |
192 | /* low surrogate = bottom 10 bits added to DC00 */ |
193 | #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) |
194 | |
195 | /* Check if substring matches at given offset. The offset must be |
196 | valid, and the substring must not be empty. */ |
197 | |
198 | #define Py_UNICODE_MATCH(string, offset, substring) \ |
199 | ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \ |
200 | ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \ |
201 | !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE))) |
202 | |
203 | #endif /* Py_LIMITED_API */ |
204 | |
205 | #ifdef __cplusplus |
206 | extern "C" { |
207 | #endif |
208 | |
209 | /* --- Unicode Type ------------------------------------------------------- */ |
210 | |
211 | #ifndef Py_LIMITED_API |
212 | |
213 | /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject |
214 | structure. state.ascii and state.compact are set, and the data |
215 | immediately follow the structure. utf8_length and wstr_length can be found |
216 | in the length field; the utf8 pointer is equal to the data pointer. */ |
217 | typedef struct { |
218 | /* There are 4 forms of Unicode strings: |
219 | |
220 | - compact ascii: |
221 | |
222 | * structure = PyASCIIObject |
223 | * test: PyUnicode_IS_COMPACT_ASCII(op) |
224 | * kind = PyUnicode_1BYTE_KIND |
225 | * compact = 1 |
226 | * ascii = 1 |
227 | * ready = 1 |
228 | * (length is the length of the utf8 and wstr strings) |
229 | * (data starts just after the structure) |
230 | * (since ASCII is decoded from UTF-8, the utf8 string are the data) |
231 | |
232 | - compact: |
233 | |
234 | * structure = PyCompactUnicodeObject |
235 | * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op) |
236 | * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or |
237 | PyUnicode_4BYTE_KIND |
238 | * compact = 1 |
239 | * ready = 1 |
240 | * ascii = 0 |
241 | * utf8 is not shared with data |
242 | * utf8_length = 0 if utf8 is NULL |
243 | * wstr is shared with data and wstr_length=length |
244 | if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 |
245 | or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4 |
246 | * wstr_length = 0 if wstr is NULL |
247 | * (data starts just after the structure) |
248 | |
249 | - legacy string, not ready: |
250 | |
251 | * structure = PyUnicodeObject |
252 | * test: kind == PyUnicode_WCHAR_KIND |
253 | * length = 0 (use wstr_length) |
254 | * hash = -1 |
255 | * kind = PyUnicode_WCHAR_KIND |
256 | * compact = 0 |
257 | * ascii = 0 |
258 | * ready = 0 |
259 | * interned = SSTATE_NOT_INTERNED |
260 | * wstr is not NULL |
261 | * data.any is NULL |
262 | * utf8 is NULL |
263 | * utf8_length = 0 |
264 | |
265 | - legacy string, ready: |
266 | |
267 | * structure = PyUnicodeObject structure |
268 | * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND |
269 | * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or |
270 | PyUnicode_4BYTE_KIND |
271 | * compact = 0 |
272 | * ready = 1 |
273 | * data.any is not NULL |
274 | * utf8 is shared and utf8_length = length with data.any if ascii = 1 |
275 | * utf8_length = 0 if utf8 is NULL |
276 | * wstr is shared with data.any and wstr_length = length |
277 | if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 |
278 | or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4 |
279 | * wstr_length = 0 if wstr is NULL |
280 | |
281 | Compact strings use only one memory block (structure + characters), |
282 | whereas legacy strings use one block for the structure and one block |
283 | for characters. |
284 | |
285 | Legacy strings are created by PyUnicode_FromUnicode() and |
286 | PyUnicode_FromStringAndSize(NULL, size) functions. They become ready |
287 | when PyUnicode_READY() is called. |
288 | |
289 | See also _PyUnicode_CheckConsistency(). |
290 | */ |
291 | PyObject_HEAD |
292 | Py_ssize_t length; /* Number of code points in the string */ |
293 | Py_hash_t hash; /* Hash value; -1 if not set */ |
294 | struct { |
295 | /* |
296 | SSTATE_NOT_INTERNED (0) |
297 | SSTATE_INTERNED_MORTAL (1) |
298 | SSTATE_INTERNED_IMMORTAL (2) |
299 | |
300 | If interned != SSTATE_NOT_INTERNED, the two references from the |
301 | dictionary to this object are *not* counted in ob_refcnt. |
302 | */ |
303 | unsigned int interned:2; |
304 | /* Character size: |
305 | |
306 | - PyUnicode_WCHAR_KIND (0): |
307 | |
308 | * character type = wchar_t (16 or 32 bits, depending on the |
309 | platform) |
310 | |
311 | - PyUnicode_1BYTE_KIND (1): |
312 | |
313 | * character type = Py_UCS1 (8 bits, unsigned) |
314 | * all characters are in the range U+0000-U+00FF (latin1) |
315 | * if ascii is set, all characters are in the range U+0000-U+007F |
316 | (ASCII), otherwise at least one character is in the range |
317 | U+0080-U+00FF |
318 | |
319 | - PyUnicode_2BYTE_KIND (2): |
320 | |
321 | * character type = Py_UCS2 (16 bits, unsigned) |
322 | * all characters are in the range U+0000-U+FFFF (BMP) |
323 | * at least one character is in the range U+0100-U+FFFF |
324 | |
325 | - PyUnicode_4BYTE_KIND (4): |
326 | |
327 | * character type = Py_UCS4 (32 bits, unsigned) |
328 | * all characters are in the range U+0000-U+10FFFF |
329 | * at least one character is in the range U+10000-U+10FFFF |
330 | */ |
331 | unsigned int kind:3; |
332 | /* Compact is with respect to the allocation scheme. Compact unicode |
333 | objects only require one memory block while non-compact objects use |
334 | one block for the PyUnicodeObject struct and another for its data |
335 | buffer. */ |
336 | unsigned int compact:1; |
337 | /* The string only contains characters in the range U+0000-U+007F (ASCII) |
338 | and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is |
339 | set, use the PyASCIIObject structure. */ |
340 | unsigned int ascii:1; |
341 | /* The ready flag indicates whether the object layout is initialized |
342 | completely. This means that this is either a compact object, or |
343 | the data pointer is filled out. The bit is redundant, and helps |
344 | to minimize the test in PyUnicode_IS_READY(). */ |
345 | unsigned int ready:1; |
346 | /* Padding to ensure that PyUnicode_DATA() is always aligned to |
347 | 4 bytes (see issue #19537 on m68k). */ |
348 | unsigned int :24; |
349 | } state; |
350 | wchar_t *wstr; /* wchar_t representation (null-terminated) */ |
351 | } PyASCIIObject; |
352 | |
353 | /* Non-ASCII strings allocated through PyUnicode_New use the |
354 | PyCompactUnicodeObject structure. state.compact is set, and the data |
355 | immediately follow the structure. */ |
356 | typedef struct { |
357 | PyASCIIObject _base; |
358 | Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the |
359 | * terminating \0. */ |
360 | char *utf8; /* UTF-8 representation (null-terminated) */ |
361 | Py_ssize_t wstr_length; /* Number of code points in wstr, possible |
362 | * surrogates count as two code points. */ |
363 | } PyCompactUnicodeObject; |
364 | |
365 | /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the |
366 | PyUnicodeObject structure. The actual string data is initially in the wstr |
367 | block, and copied into the data block using _PyUnicode_Ready. */ |
368 | typedef struct { |
369 | PyCompactUnicodeObject _base; |
370 | union { |
371 | void *any; |
372 | Py_UCS1 *latin1; |
373 | Py_UCS2 *ucs2; |
374 | Py_UCS4 *ucs4; |
375 | } data; /* Canonical, smallest-form Unicode buffer */ |
376 | } PyUnicodeObject; |
377 | #endif |
378 | |
379 | PyAPI_DATA(PyTypeObject) PyUnicode_Type; |
380 | PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; |
381 | |
382 | #define PyUnicode_Check(op) \ |
383 | PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) |
384 | #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) |
385 | |
386 | /* Fast access macros */ |
387 | #ifndef Py_LIMITED_API |
388 | |
389 | #define PyUnicode_WSTR_LENGTH(op) \ |
390 | (PyUnicode_IS_COMPACT_ASCII(op) ? \ |
391 | ((PyASCIIObject*)op)->length : \ |
392 | ((PyCompactUnicodeObject*)op)->wstr_length) |
393 | |
394 | /* Returns the deprecated Py_UNICODE representation's size in code units |
395 | (this includes surrogate pairs as 2 units). |
396 | If the Py_UNICODE representation is not available, it will be computed |
397 | on request. Use PyUnicode_GET_LENGTH() for the length in code points. */ |
398 | |
399 | #define PyUnicode_GET_SIZE(op) \ |
400 | (assert(PyUnicode_Check(op)), \ |
401 | (((PyASCIIObject *)(op))->wstr) ? \ |
402 | PyUnicode_WSTR_LENGTH(op) : \ |
403 | ((void)PyUnicode_AsUnicode((PyObject *)(op)), \ |
404 | assert(((PyASCIIObject *)(op))->wstr), \ |
405 | PyUnicode_WSTR_LENGTH(op))) |
406 | |
407 | #define PyUnicode_GET_DATA_SIZE(op) \ |
408 | (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE) |
409 | |
410 | /* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE |
411 | representation on demand. Using this macro is very inefficient now, |
412 | try to port your code to use the new PyUnicode_*BYTE_DATA() macros or |
413 | use PyUnicode_WRITE() and PyUnicode_READ(). */ |
414 | |
415 | #define PyUnicode_AS_UNICODE(op) \ |
416 | (assert(PyUnicode_Check(op)), \ |
417 | (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \ |
418 | PyUnicode_AsUnicode((PyObject *)(op))) |
419 | |
420 | #define PyUnicode_AS_DATA(op) \ |
421 | ((const char *)(PyUnicode_AS_UNICODE(op))) |
422 | |
423 | |
424 | /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */ |
425 | |
426 | /* Values for PyASCIIObject.state: */ |
427 | |
428 | /* Interning state. */ |
429 | #define SSTATE_NOT_INTERNED 0 |
430 | #define SSTATE_INTERNED_MORTAL 1 |
431 | #define SSTATE_INTERNED_IMMORTAL 2 |
432 | |
433 | /* Return true if the string contains only ASCII characters, or 0 if not. The |
434 | string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be |
435 | ready. */ |
436 | #define PyUnicode_IS_ASCII(op) \ |
437 | (assert(PyUnicode_Check(op)), \ |
438 | assert(PyUnicode_IS_READY(op)), \ |
439 | ((PyASCIIObject*)op)->state.ascii) |
440 | |
441 | /* Return true if the string is compact or 0 if not. |
442 | No type checks or Ready calls are performed. */ |
443 | #define PyUnicode_IS_COMPACT(op) \ |
444 | (((PyASCIIObject*)(op))->state.compact) |
445 | |
446 | /* Return true if the string is a compact ASCII string (use PyASCIIObject |
447 | structure), or 0 if not. No type checks or Ready calls are performed. */ |
448 | #define PyUnicode_IS_COMPACT_ASCII(op) \ |
449 | (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op)) |
450 | |
451 | enum PyUnicode_Kind { |
452 | /* String contains only wstr byte characters. This is only possible |
453 | when the string was created with a legacy API and _PyUnicode_Ready() |
454 | has not been called yet. */ |
455 | PyUnicode_WCHAR_KIND = 0, |
456 | /* Return values of the PyUnicode_KIND() macro: */ |
457 | PyUnicode_1BYTE_KIND = 1, |
458 | PyUnicode_2BYTE_KIND = 2, |
459 | PyUnicode_4BYTE_KIND = 4 |
460 | }; |
461 | |
462 | /* Return pointers to the canonical representation cast to unsigned char, |
463 | Py_UCS2, or Py_UCS4 for direct character access. |
464 | No checks are performed, use PyUnicode_KIND() before to ensure |
465 | these will work correctly. */ |
466 | |
467 | #define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op)) |
468 | #define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op)) |
469 | #define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op)) |
470 | |
471 | /* Return one of the PyUnicode_*_KIND values defined above. */ |
472 | #define PyUnicode_KIND(op) \ |
473 | (assert(PyUnicode_Check(op)), \ |
474 | assert(PyUnicode_IS_READY(op)), \ |
475 | ((PyASCIIObject *)(op))->state.kind) |
476 | |
477 | /* Return a void pointer to the raw unicode buffer. */ |
478 | #define _PyUnicode_COMPACT_DATA(op) \ |
479 | (PyUnicode_IS_ASCII(op) ? \ |
480 | ((void*)((PyASCIIObject*)(op) + 1)) : \ |
481 | ((void*)((PyCompactUnicodeObject*)(op) + 1))) |
482 | |
483 | #define _PyUnicode_NONCOMPACT_DATA(op) \ |
484 | (assert(((PyUnicodeObject*)(op))->data.any), \ |
485 | ((((PyUnicodeObject *)(op))->data.any))) |
486 | |
487 | #define PyUnicode_DATA(op) \ |
488 | (assert(PyUnicode_Check(op)), \ |
489 | PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \ |
490 | _PyUnicode_NONCOMPACT_DATA(op)) |
491 | |
492 | /* In the access macros below, "kind" may be evaluated more than once. |
493 | All other macro parameters are evaluated exactly once, so it is safe |
494 | to put side effects into them (such as increasing the index). */ |
495 | |
496 | /* Write into the canonical representation, this macro does not do any sanity |
497 | checks and is intended for usage in loops. The caller should cache the |
498 | kind and data pointers obtained from other macro calls. |
499 | index is the index in the string (starts at 0) and value is the new |
500 | code point value which should be written to that location. */ |
501 | #define PyUnicode_WRITE(kind, data, index, value) \ |
502 | do { \ |
503 | switch ((kind)) { \ |
504 | case PyUnicode_1BYTE_KIND: { \ |
505 | ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \ |
506 | break; \ |
507 | } \ |
508 | case PyUnicode_2BYTE_KIND: { \ |
509 | ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \ |
510 | break; \ |
511 | } \ |
512 | default: { \ |
513 | assert((kind) == PyUnicode_4BYTE_KIND); \ |
514 | ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \ |
515 | } \ |
516 | } \ |
517 | } while (0) |
518 | |
519 | /* Read a code point from the string's canonical representation. No checks |
520 | or ready calls are performed. */ |
521 | #define PyUnicode_READ(kind, data, index) \ |
522 | ((Py_UCS4) \ |
523 | ((kind) == PyUnicode_1BYTE_KIND ? \ |
524 | ((const Py_UCS1 *)(data))[(index)] : \ |
525 | ((kind) == PyUnicode_2BYTE_KIND ? \ |
526 | ((const Py_UCS2 *)(data))[(index)] : \ |
527 | ((const Py_UCS4 *)(data))[(index)] \ |
528 | ) \ |
529 | )) |
530 | |
531 | /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it |
532 | calls PyUnicode_KIND() and might call it twice. For single reads, use |
533 | PyUnicode_READ_CHAR, for multiple consecutive reads callers should |
534 | cache kind and use PyUnicode_READ instead. */ |
535 | #define PyUnicode_READ_CHAR(unicode, index) \ |
536 | (assert(PyUnicode_Check(unicode)), \ |
537 | assert(PyUnicode_IS_READY(unicode)), \ |
538 | (Py_UCS4) \ |
539 | (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \ |
540 | ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \ |
541 | (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \ |
542 | ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \ |
543 | ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \ |
544 | ) \ |
545 | )) |
546 | |
547 | /* Returns the length of the unicode string. The caller has to make sure that |
548 | the string has it's canonical representation set before calling |
549 | this macro. Call PyUnicode_(FAST_)Ready to ensure that. */ |
550 | #define PyUnicode_GET_LENGTH(op) \ |
551 | (assert(PyUnicode_Check(op)), \ |
552 | assert(PyUnicode_IS_READY(op)), \ |
553 | ((PyASCIIObject *)(op))->length) |
554 | |
555 | |
556 | /* Fast check to determine whether an object is ready. Equivalent to |
557 | PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */ |
558 | |
559 | #define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready) |
560 | |
561 | /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best |
562 | case. If the canonical representation is not yet set, it will still call |
563 | _PyUnicode_Ready(). |
564 | Returns 0 on success and -1 on errors. */ |
565 | #define PyUnicode_READY(op) \ |
566 | (assert(PyUnicode_Check(op)), \ |
567 | (PyUnicode_IS_READY(op) ? \ |
568 | 0 : _PyUnicode_Ready((PyObject *)(op)))) |
569 | |
570 | /* Return a maximum character value which is suitable for creating another |
571 | string based on op. This is always an approximation but more efficient |
572 | than iterating over the string. */ |
573 | #define PyUnicode_MAX_CHAR_VALUE(op) \ |
574 | (assert(PyUnicode_IS_READY(op)), \ |
575 | (PyUnicode_IS_ASCII(op) ? \ |
576 | (0x7f) : \ |
577 | (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \ |
578 | (0xffU) : \ |
579 | (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \ |
580 | (0xffffU) : \ |
581 | (0x10ffffU))))) |
582 | |
583 | #endif |
584 | |
585 | /* --- Constants ---------------------------------------------------------- */ |
586 | |
587 | /* This Unicode character will be used as replacement character during |
588 | decoding if the errors argument is set to "replace". Note: the |
589 | Unicode character U+FFFD is the official REPLACEMENT CHARACTER in |
590 | Unicode 3.0. */ |
591 | |
592 | #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD) |
593 | |
594 | /* === Public API ========================================================= */ |
595 | |
596 | /* --- Plain Py_UNICODE --------------------------------------------------- */ |
597 | |
598 | /* With PEP 393, this is the recommended way to allocate a new unicode object. |
599 | This function will allocate the object and its buffer in a single memory |
600 | block. Objects created using this function are not resizable. */ |
601 | #ifndef Py_LIMITED_API |
602 | PyAPI_FUNC(PyObject*) PyUnicode_New( |
603 | Py_ssize_t size, /* Number of code points in the new string */ |
604 | Py_UCS4 maxchar /* maximum code point value in the string */ |
605 | ); |
606 | #endif |
607 | |
608 | /* Initializes the canonical string representation from the deprecated |
609 | wstr/Py_UNICODE representation. This function is used to convert Unicode |
610 | objects which were created using the old API to the new flexible format |
611 | introduced with PEP 393. |
612 | |
613 | Don't call this function directly, use the public PyUnicode_READY() macro |
614 | instead. */ |
615 | #ifndef Py_LIMITED_API |
616 | PyAPI_FUNC(int) _PyUnicode_Ready( |
617 | PyObject *unicode /* Unicode object */ |
618 | ); |
619 | #endif |
620 | |
621 | /* Get a copy of a Unicode string. */ |
622 | #ifndef Py_LIMITED_API |
623 | PyAPI_FUNC(PyObject*) _PyUnicode_Copy( |
624 | PyObject *unicode |
625 | ); |
626 | #endif |
627 | |
628 | /* Copy character from one unicode object into another, this function performs |
629 | character conversion when necessary and falls back to memcpy() if possible. |
630 | |
631 | Fail if to is too small (smaller than *how_many* or smaller than |
632 | len(from)-from_start), or if kind(from[from_start:from_start+how_many]) > |
633 | kind(to), or if *to* has more than 1 reference. |
634 | |
635 | Return the number of written character, or return -1 and raise an exception |
636 | on error. |
637 | |
638 | Pseudo-code: |
639 | |
640 | how_many = min(how_many, len(from) - from_start) |
641 | to[to_start:to_start+how_many] = from[from_start:from_start+how_many] |
642 | return how_many |
643 | |
644 | Note: The function doesn't write a terminating null character. |
645 | */ |
646 | #ifndef Py_LIMITED_API |
647 | PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( |
648 | PyObject *to, |
649 | Py_ssize_t to_start, |
650 | PyObject *from, |
651 | Py_ssize_t from_start, |
652 | Py_ssize_t how_many |
653 | ); |
654 | |
655 | /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so |
656 | may crash if parameters are invalid (e.g. if the output string |
657 | is too short). */ |
658 | PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters( |
659 | PyObject *to, |
660 | Py_ssize_t to_start, |
661 | PyObject *from, |
662 | Py_ssize_t from_start, |
663 | Py_ssize_t how_many |
664 | ); |
665 | #endif |
666 | |
667 | #ifndef Py_LIMITED_API |
668 | /* Fill a string with a character: write fill_char into |
669 | unicode[start:start+length]. |
670 | |
671 | Fail if fill_char is bigger than the string maximum character, or if the |
672 | string has more than 1 reference. |
673 | |
674 | Return the number of written character, or return -1 and raise an exception |
675 | on error. */ |
676 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill( |
677 | PyObject *unicode, |
678 | Py_ssize_t start, |
679 | Py_ssize_t length, |
680 | Py_UCS4 fill_char |
681 | ); |
682 | |
683 | /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash |
684 | if parameters are invalid (e.g. if length is longer than the string). */ |
685 | PyAPI_FUNC(void) _PyUnicode_FastFill( |
686 | PyObject *unicode, |
687 | Py_ssize_t start, |
688 | Py_ssize_t length, |
689 | Py_UCS4 fill_char |
690 | ); |
691 | #endif |
692 | |
693 | /* Create a Unicode Object from the Py_UNICODE buffer u of the given |
694 | size. |
695 | |
696 | u may be NULL which causes the contents to be undefined. It is the |
697 | user's responsibility to fill in the needed data afterwards. Note |
698 | that modifying the Unicode object contents after construction is |
699 | only allowed if u was set to NULL. |
700 | |
701 | The buffer is copied into the new object. */ |
702 | |
703 | #ifndef Py_LIMITED_API |
704 | PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( |
705 | const Py_UNICODE *u, /* Unicode buffer */ |
706 | Py_ssize_t size /* size of buffer */ |
707 | ); |
708 | #endif |
709 | |
710 | /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */ |
711 | PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( |
712 | const char *u, /* UTF-8 encoded string */ |
713 | Py_ssize_t size /* size of buffer */ |
714 | ); |
715 | |
716 | /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated |
717 | UTF-8 encoded bytes. The size is determined with strlen(). */ |
718 | PyAPI_FUNC(PyObject*) PyUnicode_FromString( |
719 | const char *u /* UTF-8 encoded string */ |
720 | ); |
721 | |
722 | #ifndef Py_LIMITED_API |
723 | /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters. |
724 | Scan the string to find the maximum character. */ |
725 | PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( |
726 | int kind, |
727 | const void *buffer, |
728 | Py_ssize_t size); |
729 | |
730 | /* Create a new string from a buffer of ASCII characters. |
731 | WARNING: Don't check if the string contains any non-ASCII character. */ |
732 | PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII( |
733 | const char *buffer, |
734 | Py_ssize_t size); |
735 | #endif |
736 | |
737 | PyAPI_FUNC(PyObject*) PyUnicode_Substring( |
738 | PyObject *str, |
739 | Py_ssize_t start, |
740 | Py_ssize_t end); |
741 | |
742 | #ifndef Py_LIMITED_API |
743 | /* Compute the maximum character of the substring unicode[start:end]. |
744 | Return 127 for an empty string. */ |
745 | PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( |
746 | PyObject *unicode, |
747 | Py_ssize_t start, |
748 | Py_ssize_t end); |
749 | #endif |
750 | |
751 | /* Copy the string into a UCS4 buffer including the null character if copy_null |
752 | is set. Return NULL and raise an exception on error. Raise a ValueError if |
753 | the buffer is smaller than the string. Return buffer on success. |
754 | |
755 | buflen is the length of the buffer in (Py_UCS4) characters. */ |
756 | PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4( |
757 | PyObject *unicode, |
758 | Py_UCS4* buffer, |
759 | Py_ssize_t buflen, |
760 | int copy_null); |
761 | |
762 | /* Copy the string into a UCS4 buffer. A new buffer is allocated using |
763 | * PyMem_Malloc; if this fails, NULL is returned with a memory error |
764 | exception set. */ |
765 | PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode); |
766 | |
767 | /* Return a read-only pointer to the Unicode object's internal |
768 | Py_UNICODE buffer. |
769 | If the wchar_t/Py_UNICODE representation is not yet available, this |
770 | function will calculate it. */ |
771 | |
772 | #ifndef Py_LIMITED_API |
773 | PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( |
774 | PyObject *unicode /* Unicode object */ |
775 | ); |
776 | #endif |
777 | |
778 | /* Return a read-only pointer to the Unicode object's internal |
779 | Py_UNICODE buffer and save the length at size. |
780 | If the wchar_t/Py_UNICODE representation is not yet available, this |
781 | function will calculate it. */ |
782 | |
783 | #ifndef Py_LIMITED_API |
784 | PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( |
785 | PyObject *unicode, /* Unicode object */ |
786 | Py_ssize_t *size /* location where to save the length */ |
787 | ); |
788 | #endif |
789 | |
790 | /* Get the length of the Unicode object. */ |
791 | |
792 | PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength( |
793 | PyObject *unicode |
794 | ); |
795 | |
796 | /* Get the number of Py_UNICODE units in the |
797 | string representation. */ |
798 | |
799 | PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( |
800 | PyObject *unicode /* Unicode object */ |
801 | ); |
802 | |
803 | /* Read a character from the string. */ |
804 | |
805 | PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar( |
806 | PyObject *unicode, |
807 | Py_ssize_t index |
808 | ); |
809 | |
810 | /* Write a character to the string. The string must have been created through |
811 | PyUnicode_New, must not be shared, and must not have been hashed yet. |
812 | |
813 | Return 0 on success, -1 on error. */ |
814 | |
815 | PyAPI_FUNC(int) PyUnicode_WriteChar( |
816 | PyObject *unicode, |
817 | Py_ssize_t index, |
818 | Py_UCS4 character |
819 | ); |
820 | |
821 | #ifndef Py_LIMITED_API |
822 | /* Get the maximum ordinal for a Unicode character. */ |
823 | PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void); |
824 | #endif |
825 | |
826 | /* Resize a Unicode object. The length is the number of characters, except |
827 | if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length |
828 | is the number of Py_UNICODE characters. |
829 | |
830 | *unicode is modified to point to the new (resized) object and 0 |
831 | returned on success. |
832 | |
833 | Try to resize the string in place (which is usually faster than allocating |
834 | a new string and copy characters), or create a new string. |
835 | |
836 | Error handling is implemented as follows: an exception is set, -1 |
837 | is returned and *unicode left untouched. |
838 | |
839 | WARNING: The function doesn't check string content, the result may not be a |
840 | string in canonical representation. */ |
841 | |
842 | PyAPI_FUNC(int) PyUnicode_Resize( |
843 | PyObject **unicode, /* Pointer to the Unicode object */ |
844 | Py_ssize_t length /* New length */ |
845 | ); |
846 | |
847 | /* Decode obj to a Unicode object. |
848 | |
849 | bytes, bytearray and other bytes-like objects are decoded according to the |
850 | given encoding and error handler. The encoding and error handler can be |
851 | NULL to have the interface use UTF-8 and "strict". |
852 | |
853 | All other objects (including Unicode objects) raise an exception. |
854 | |
855 | The API returns NULL in case of an error. The caller is responsible |
856 | for decref'ing the returned objects. |
857 | |
858 | */ |
859 | |
860 | PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( |
861 | PyObject *obj, /* Object */ |
862 | const char *encoding, /* encoding */ |
863 | const char *errors /* error handling */ |
864 | ); |
865 | |
866 | /* Copy an instance of a Unicode subtype to a new true Unicode object if |
867 | necessary. If obj is already a true Unicode object (not a subtype), return |
868 | the reference with *incremented* refcount. |
869 | |
870 | The API returns NULL in case of an error. The caller is responsible |
871 | for decref'ing the returned objects. |
872 | |
873 | */ |
874 | |
875 | PyAPI_FUNC(PyObject*) PyUnicode_FromObject( |
876 | PyObject *obj /* Object */ |
877 | ); |
878 | |
879 | PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( |
880 | const char *format, /* ASCII-encoded string */ |
881 | va_list vargs |
882 | ); |
883 | PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( |
884 | const char *format, /* ASCII-encoded string */ |
885 | ... |
886 | ); |
887 | |
888 | #ifndef Py_LIMITED_API |
889 | typedef struct { |
890 | PyObject *buffer; |
891 | void *data; |
892 | enum PyUnicode_Kind kind; |
893 | Py_UCS4 maxchar; |
894 | Py_ssize_t size; |
895 | Py_ssize_t pos; |
896 | |
897 | /* minimum number of allocated characters (default: 0) */ |
898 | Py_ssize_t min_length; |
899 | |
900 | /* minimum character (default: 127, ASCII) */ |
901 | Py_UCS4 min_char; |
902 | |
903 | /* If non-zero, overallocate the buffer by 25% (default: 0). */ |
904 | unsigned char overallocate; |
905 | |
906 | /* If readonly is 1, buffer is a shared string (cannot be modified) |
907 | and size is set to 0. */ |
908 | unsigned char readonly; |
909 | } _PyUnicodeWriter ; |
910 | |
911 | /* Initialize a Unicode writer. |
912 | * |
913 | * By default, the minimum buffer size is 0 character and overallocation is |
914 | * disabled. Set min_length, min_char and overallocate attributes to control |
915 | * the allocation of the buffer. */ |
916 | PyAPI_FUNC(void) |
917 | _PyUnicodeWriter_Init(_PyUnicodeWriter *writer); |
918 | |
919 | /* Prepare the buffer to write 'length' characters |
920 | with the specified maximum character. |
921 | |
922 | Return 0 on success, raise an exception and return -1 on error. */ |
923 | #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ |
924 | (((MAXCHAR) <= (WRITER)->maxchar \ |
925 | && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ |
926 | ? 0 \ |
927 | : (((LENGTH) == 0) \ |
928 | ? 0 \ |
929 | : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) |
930 | |
931 | /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro |
932 | instead. */ |
933 | PyAPI_FUNC(int) |
934 | _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, |
935 | Py_ssize_t length, Py_UCS4 maxchar); |
936 | |
937 | /* Append a Unicode character. |
938 | Return 0 on success, raise an exception and return -1 on error. */ |
939 | PyAPI_FUNC(int) |
940 | _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, |
941 | Py_UCS4 ch |
942 | ); |
943 | |
944 | /* Append a Unicode string. |
945 | Return 0 on success, raise an exception and return -1 on error. */ |
946 | PyAPI_FUNC(int) |
947 | _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, |
948 | PyObject *str /* Unicode string */ |
949 | ); |
950 | |
951 | /* Append a substring of a Unicode string. |
952 | Return 0 on success, raise an exception and return -1 on error. */ |
953 | PyAPI_FUNC(int) |
954 | _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, |
955 | PyObject *str, /* Unicode string */ |
956 | Py_ssize_t start, |
957 | Py_ssize_t end |
958 | ); |
959 | |
960 | /* Append an ASCII-encoded byte string. |
961 | Return 0 on success, raise an exception and return -1 on error. */ |
962 | PyAPI_FUNC(int) |
963 | _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, |
964 | const char *str, /* ASCII-encoded byte string */ |
965 | Py_ssize_t len /* number of bytes, or -1 if unknown */ |
966 | ); |
967 | |
968 | /* Append a latin1-encoded byte string. |
969 | Return 0 on success, raise an exception and return -1 on error. */ |
970 | PyAPI_FUNC(int) |
971 | _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, |
972 | const char *str, /* latin1-encoded byte string */ |
973 | Py_ssize_t len /* length in bytes */ |
974 | ); |
975 | |
976 | /* Get the value of the writer as a Unicode string. Clear the |
977 | buffer of the writer. Raise an exception and return NULL |
978 | on error. */ |
979 | PyAPI_FUNC(PyObject *) |
980 | _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); |
981 | |
982 | /* Deallocate memory of a writer (clear its internal buffer). */ |
983 | PyAPI_FUNC(void) |
984 | _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); |
985 | #endif |
986 | |
987 | #ifndef Py_LIMITED_API |
988 | /* Format the object based on the format_spec, as defined in PEP 3101 |
989 | (Advanced String Formatting). */ |
990 | PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( |
991 | _PyUnicodeWriter *writer, |
992 | PyObject *obj, |
993 | PyObject *format_spec, |
994 | Py_ssize_t start, |
995 | Py_ssize_t end); |
996 | #endif |
997 | |
998 | PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); |
999 | PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); |
1000 | PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( |
1001 | const char *u /* UTF-8 encoded string */ |
1002 | ); |
1003 | #ifndef Py_LIMITED_API |
1004 | PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void); |
1005 | #endif |
1006 | |
1007 | /* Use only if you know it's a string */ |
1008 | #define PyUnicode_CHECK_INTERNED(op) \ |
1009 | (((PyASCIIObject *)(op))->state.interned) |
1010 | |
1011 | /* --- wchar_t support for platforms which support it --------------------- */ |
1012 | |
1013 | #ifdef HAVE_WCHAR_H |
1014 | |
1015 | /* Create a Unicode Object from the wchar_t buffer w of the given |
1016 | size. |
1017 | |
1018 | The buffer is copied into the new object. */ |
1019 | |
1020 | PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( |
1021 | const wchar_t *w, /* wchar_t buffer */ |
1022 | Py_ssize_t size /* size of buffer */ |
1023 | ); |
1024 | |
1025 | /* Copies the Unicode Object contents into the wchar_t buffer w. At |
1026 | most size wchar_t characters are copied. |
1027 | |
1028 | Note that the resulting wchar_t string may or may not be |
1029 | 0-terminated. It is the responsibility of the caller to make sure |
1030 | that the wchar_t string is 0-terminated in case this is required by |
1031 | the application. |
1032 | |
1033 | Returns the number of wchar_t characters copied (excluding a |
1034 | possibly trailing 0-termination character) or -1 in case of an |
1035 | error. */ |
1036 | |
1037 | PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( |
1038 | PyObject *unicode, /* Unicode object */ |
1039 | wchar_t *w, /* wchar_t buffer */ |
1040 | Py_ssize_t size /* size of buffer */ |
1041 | ); |
1042 | |
1043 | /* Convert the Unicode object to a wide character string. The output string |
1044 | always ends with a nul character. If size is not NULL, write the number of |
1045 | wide characters (excluding the null character) into *size. |
1046 | |
1047 | Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it) |
1048 | on success. On error, returns NULL, *size is undefined and raises a |
1049 | MemoryError. */ |
1050 | |
1051 | PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( |
1052 | PyObject *unicode, /* Unicode object */ |
1053 | Py_ssize_t *size /* number of characters of the result */ |
1054 | ); |
1055 | |
1056 | #ifndef Py_LIMITED_API |
1057 | PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind); |
1058 | #endif |
1059 | |
1060 | #endif |
1061 | |
1062 | /* --- Unicode ordinals --------------------------------------------------- */ |
1063 | |
1064 | /* Create a Unicode Object from the given Unicode code point ordinal. |
1065 | |
1066 | The ordinal must be in range(0x110000). A ValueError is |
1067 | raised in case it is not. |
1068 | |
1069 | */ |
1070 | |
1071 | PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); |
1072 | |
1073 | /* --- Free-list management ----------------------------------------------- */ |
1074 | |
1075 | /* Clear the free list used by the Unicode implementation. |
1076 | |
1077 | This can be used to release memory used for objects on the free |
1078 | list back to the Python memory allocator. |
1079 | |
1080 | */ |
1081 | |
1082 | PyAPI_FUNC(int) PyUnicode_ClearFreeList(void); |
1083 | |
1084 | /* === Builtin Codecs ===================================================== |
1085 | |
1086 | Many of these APIs take two arguments encoding and errors. These |
1087 | parameters encoding and errors have the same semantics as the ones |
1088 | of the builtin str() API. |
1089 | |
1090 | Setting encoding to NULL causes the default encoding (UTF-8) to be used. |
1091 | |
1092 | Error handling is set by errors which may also be set to NULL |
1093 | meaning to use the default handling defined for the codec. Default |
1094 | error handling for all builtin codecs is "strict" (ValueErrors are |
1095 | raised). |
1096 | |
1097 | The codecs all use a similar interface. Only deviation from the |
1098 | generic ones are documented. |
1099 | |
1100 | */ |
1101 | |
1102 | /* --- Manage the default encoding ---------------------------------------- */ |
1103 | |
1104 | /* Returns a pointer to the default encoding (UTF-8) of the |
1105 | Unicode object unicode and the size of the encoded representation |
1106 | in bytes stored in *size. |
1107 | |
1108 | In case of an error, no *size is set. |
1109 | |
1110 | This function caches the UTF-8 encoded string in the unicodeobject |
1111 | and subsequent calls will return the same string. The memory is released |
1112 | when the unicodeobject is deallocated. |
1113 | |
1114 | _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to |
1115 | support the previous internal function with the same behaviour. |
1116 | |
1117 | *** This API is for interpreter INTERNAL USE ONLY and will likely |
1118 | *** be removed or changed in the future. |
1119 | |
1120 | *** If you need to access the Unicode object as UTF-8 bytes string, |
1121 | *** please use PyUnicode_AsUTF8String() instead. |
1122 | */ |
1123 | |
1124 | #ifndef Py_LIMITED_API |
1125 | PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize( |
1126 | PyObject *unicode, |
1127 | Py_ssize_t *size); |
1128 | #define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize |
1129 | #endif |
1130 | |
1131 | /* Returns a pointer to the default encoding (UTF-8) of the |
1132 | Unicode object unicode. |
1133 | |
1134 | Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation |
1135 | in the unicodeobject. |
1136 | |
1137 | _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to |
1138 | support the previous internal function with the same behaviour. |
1139 | |
1140 | Use of this API is DEPRECATED since no size information can be |
1141 | extracted from the returned data. |
1142 | |
1143 | *** This API is for interpreter INTERNAL USE ONLY and will likely |
1144 | *** be removed or changed for Python 3.1. |
1145 | |
1146 | *** If you need to access the Unicode object as UTF-8 bytes string, |
1147 | *** please use PyUnicode_AsUTF8String() instead. |
1148 | |
1149 | */ |
1150 | |
1151 | #ifndef Py_LIMITED_API |
1152 | PyAPI_FUNC(char *) PyUnicode_AsUTF8(PyObject *unicode); |
1153 | #define _PyUnicode_AsString PyUnicode_AsUTF8 |
1154 | #endif |
1155 | |
1156 | /* Returns "utf-8". */ |
1157 | |
1158 | PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); |
1159 | |
1160 | /* --- Generic Codecs ----------------------------------------------------- */ |
1161 | |
1162 | /* Create a Unicode object by decoding the encoded string s of the |
1163 | given size. */ |
1164 | |
1165 | PyAPI_FUNC(PyObject*) PyUnicode_Decode( |
1166 | const char *s, /* encoded string */ |
1167 | Py_ssize_t size, /* size of buffer */ |
1168 | const char *encoding, /* encoding */ |
1169 | const char *errors /* error handling */ |
1170 | ); |
1171 | |
1172 | /* Decode a Unicode object unicode and return the result as Python |
1173 | object. */ |
1174 | |
1175 | PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject( |
1176 | PyObject *unicode, /* Unicode object */ |
1177 | const char *encoding, /* encoding */ |
1178 | const char *errors /* error handling */ |
1179 | ); |
1180 | |
1181 | /* Decode a Unicode object unicode and return the result as Unicode |
1182 | object. */ |
1183 | |
1184 | PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( |
1185 | PyObject *unicode, /* Unicode object */ |
1186 | const char *encoding, /* encoding */ |
1187 | const char *errors /* error handling */ |
1188 | ); |
1189 | |
1190 | /* Encodes a Py_UNICODE buffer of the given size and returns a |
1191 | Python string object. */ |
1192 | |
1193 | #ifndef Py_LIMITED_API |
1194 | PyAPI_FUNC(PyObject*) PyUnicode_Encode( |
1195 | const Py_UNICODE *s, /* Unicode char buffer */ |
1196 | Py_ssize_t size, /* number of Py_UNICODE chars to encode */ |
1197 | const char *encoding, /* encoding */ |
1198 | const char *errors /* error handling */ |
1199 | ); |
1200 | #endif |
1201 | |
1202 | /* Encodes a Unicode object and returns the result as Python |
1203 | object. */ |
1204 | |
1205 | PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( |
1206 | PyObject *unicode, /* Unicode object */ |
1207 | const char *encoding, /* encoding */ |
1208 | const char *errors /* error handling */ |
1209 | ); |
1210 | |
1211 | /* Encodes a Unicode object and returns the result as Python string |
1212 | object. */ |
1213 | |
1214 | PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( |
1215 | PyObject *unicode, /* Unicode object */ |
1216 | const char *encoding, /* encoding */ |
1217 | const char *errors /* error handling */ |
1218 | ); |
1219 | |
1220 | /* Encodes a Unicode object and returns the result as Unicode |
1221 | object. */ |
1222 | |
1223 | PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode( |
1224 | PyObject *unicode, /* Unicode object */ |
1225 | const char *encoding, /* encoding */ |
1226 | const char *errors /* error handling */ |
1227 | ); |
1228 | |
1229 | /* Build an encoding map. */ |
1230 | |
1231 | PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( |
1232 | PyObject* string /* 256 character map */ |
1233 | ); |
1234 | |
1235 | /* --- UTF-7 Codecs ------------------------------------------------------- */ |
1236 | |
1237 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( |
1238 | const char *string, /* UTF-7 encoded string */ |
1239 | Py_ssize_t length, /* size of string */ |
1240 | const char *errors /* error handling */ |
1241 | ); |
1242 | |
1243 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( |
1244 | const char *string, /* UTF-7 encoded string */ |
1245 | Py_ssize_t length, /* size of string */ |
1246 | const char *errors, /* error handling */ |
1247 | Py_ssize_t *consumed /* bytes consumed */ |
1248 | ); |
1249 | |
1250 | #ifndef Py_LIMITED_API |
1251 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( |
1252 | const Py_UNICODE *data, /* Unicode char buffer */ |
1253 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */ |
1254 | int base64SetO, /* Encode RFC2152 Set O characters in base64 */ |
1255 | int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ |
1256 | const char *errors /* error handling */ |
1257 | ); |
1258 | PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7( |
1259 | PyObject *unicode, /* Unicode object */ |
1260 | int base64SetO, /* Encode RFC2152 Set O characters in base64 */ |
1261 | int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ |
1262 | const char *errors /* error handling */ |
1263 | ); |
1264 | #endif |
1265 | |
1266 | /* --- UTF-8 Codecs ------------------------------------------------------- */ |
1267 | |
1268 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( |
1269 | const char *string, /* UTF-8 encoded string */ |
1270 | Py_ssize_t length, /* size of string */ |
1271 | const char *errors /* error handling */ |
1272 | ); |
1273 | |
1274 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( |
1275 | const char *string, /* UTF-8 encoded string */ |
1276 | Py_ssize_t length, /* size of string */ |
1277 | const char *errors, /* error handling */ |
1278 | Py_ssize_t *consumed /* bytes consumed */ |
1279 | ); |
1280 | |
1281 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( |
1282 | PyObject *unicode /* Unicode object */ |
1283 | ); |
1284 | |
1285 | #ifndef Py_LIMITED_API |
1286 | PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String( |
1287 | PyObject *unicode, |
1288 | const char *errors); |
1289 | |
1290 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8( |
1291 | const Py_UNICODE *data, /* Unicode char buffer */ |
1292 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */ |
1293 | const char *errors /* error handling */ |
1294 | ); |
1295 | #endif |
1296 | |
1297 | /* --- UTF-32 Codecs ------------------------------------------------------ */ |
1298 | |
1299 | /* Decodes length bytes from a UTF-32 encoded buffer string and returns |
1300 | the corresponding Unicode object. |
1301 | |
1302 | errors (if non-NULL) defines the error handling. It defaults |
1303 | to "strict". |
1304 | |
1305 | If byteorder is non-NULL, the decoder starts decoding using the |
1306 | given byte order: |
1307 | |
1308 | *byteorder == -1: little endian |
1309 | *byteorder == 0: native order |
1310 | *byteorder == 1: big endian |
1311 | |
1312 | In native mode, the first four bytes of the stream are checked for a |
1313 | BOM mark. If found, the BOM mark is analysed, the byte order |
1314 | adjusted and the BOM skipped. In the other modes, no BOM mark |
1315 | interpretation is done. After completion, *byteorder is set to the |
1316 | current byte order at the end of input data. |
1317 | |
1318 | If byteorder is NULL, the codec starts in native order mode. |
1319 | |
1320 | */ |
1321 | |
1322 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( |
1323 | const char *string, /* UTF-32 encoded string */ |
1324 | Py_ssize_t length, /* size of string */ |
1325 | const char *errors, /* error handling */ |
1326 | int *byteorder /* pointer to byteorder to use |
1327 | 0=native;-1=LE,1=BE; updated on |
1328 | exit */ |
1329 | ); |
1330 | |
1331 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( |
1332 | const char *string, /* UTF-32 encoded string */ |
1333 | Py_ssize_t length, /* size of string */ |
1334 | const char *errors, /* error handling */ |
1335 | int *byteorder, /* pointer to byteorder to use |
1336 | 0=native;-1=LE,1=BE; updated on |
1337 | exit */ |
1338 | Py_ssize_t *consumed /* bytes consumed */ |
1339 | ); |
1340 | |
1341 | /* Returns a Python string using the UTF-32 encoding in native byte |
1342 | order. The string always starts with a BOM mark. */ |
1343 | |
1344 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( |
1345 | PyObject *unicode /* Unicode object */ |
1346 | ); |
1347 | |
1348 | /* Returns a Python string object holding the UTF-32 encoded value of |
1349 | the Unicode data. |
1350 | |
1351 | If byteorder is not 0, output is written according to the following |
1352 | byte order: |
1353 | |
1354 | byteorder == -1: little endian |
1355 | byteorder == 0: native byte order (writes a BOM mark) |
1356 | byteorder == 1: big endian |
1357 | |
1358 | If byteorder is 0, the output string will always start with the |
1359 | Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is |
1360 | prepended. |
1361 | |
1362 | */ |
1363 | |
1364 | #ifndef Py_LIMITED_API |
1365 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32( |
1366 | const Py_UNICODE *data, /* Unicode char buffer */ |
1367 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */ |
1368 | const char *errors, /* error handling */ |
1369 | int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ |
1370 | ); |
1371 | PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32( |
1372 | PyObject *object, /* Unicode object */ |
1373 | const char *errors, /* error handling */ |
1374 | int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ |
1375 | ); |
1376 | #endif |
1377 | |
1378 | /* --- UTF-16 Codecs ------------------------------------------------------ */ |
1379 | |
1380 | /* Decodes length bytes from a UTF-16 encoded buffer string and returns |
1381 | the corresponding Unicode object. |
1382 | |
1383 | errors (if non-NULL) defines the error handling. It defaults |
1384 | to "strict". |
1385 | |
1386 | If byteorder is non-NULL, the decoder starts decoding using the |
1387 | given byte order: |
1388 | |
1389 | *byteorder == -1: little endian |
1390 | *byteorder == 0: native order |
1391 | *byteorder == 1: big endian |
1392 | |
1393 | In native mode, the first two bytes of the stream are checked for a |
1394 | BOM mark. If found, the BOM mark is analysed, the byte order |
1395 | adjusted and the BOM skipped. In the other modes, no BOM mark |
1396 | interpretation is done. After completion, *byteorder is set to the |
1397 | current byte order at the end of input data. |
1398 | |
1399 | If byteorder is NULL, the codec starts in native order mode. |
1400 | |
1401 | */ |
1402 | |
1403 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( |
1404 | const char *string, /* UTF-16 encoded string */ |
1405 | Py_ssize_t length, /* size of string */ |
1406 | const char *errors, /* error handling */ |
1407 | int *byteorder /* pointer to byteorder to use |
1408 | 0=native;-1=LE,1=BE; updated on |
1409 | exit */ |
1410 | ); |
1411 | |
1412 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( |
1413 | const char *string, /* UTF-16 encoded string */ |
1414 | Py_ssize_t length, /* size of string */ |
1415 | const char *errors, /* error handling */ |
1416 | int *byteorder, /* pointer to byteorder to use |
1417 | 0=native;-1=LE,1=BE; updated on |
1418 | exit */ |
1419 | Py_ssize_t *consumed /* bytes consumed */ |
1420 | ); |
1421 | |
1422 | /* Returns a Python string using the UTF-16 encoding in native byte |
1423 | order. The string always starts with a BOM mark. */ |
1424 | |
1425 | PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( |
1426 | PyObject *unicode /* Unicode object */ |
1427 | ); |
1428 | |
1429 | /* Returns a Python string object holding the UTF-16 encoded value of |
1430 | the Unicode data. |
1431 | |
1432 | If byteorder is not 0, output is written according to the following |
1433 | byte order: |
1434 | |
1435 | byteorder == -1: little endian |
1436 | byteorder == 0: native byte order (writes a BOM mark) |
1437 | byteorder == 1: big endian |
1438 | |
1439 | If byteorder is 0, the output string will always start with the |
1440 | Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is |
1441 | prepended. |
1442 | |
1443 | Note that Py_UNICODE data is being interpreted as UTF-16 reduced to |
1444 | UCS-2. This trick makes it possible to add full UTF-16 capabilities |
1445 | at a later point without compromising the APIs. |
1446 | |
1447 | */ |
1448 | |
1449 | #ifndef Py_LIMITED_API |
1450 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16( |
1451 | const Py_UNICODE *data, /* Unicode char buffer */ |
1452 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */ |
1453 | const char *errors, /* error handling */ |
1454 | int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ |
1455 | ); |
1456 | PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16( |
1457 | PyObject* unicode, /* Unicode object */ |
1458 | const char *errors, /* error handling */ |
1459 | int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ |
1460 | ); |
1461 | #endif |
1462 | |
1463 | /* --- Unicode-Escape Codecs ---------------------------------------------- */ |
1464 | |
1465 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( |
1466 | const char *string, /* Unicode-Escape encoded string */ |
1467 | Py_ssize_t length, /* size of string */ |
1468 | const char *errors /* error handling */ |
1469 | ); |
1470 | |
1471 | PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( |
1472 | PyObject *unicode /* Unicode object */ |
1473 | ); |
1474 | |
1475 | #ifndef Py_LIMITED_API |
1476 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape( |
1477 | const Py_UNICODE *data, /* Unicode char buffer */ |
1478 | Py_ssize_t length /* Number of Py_UNICODE chars to encode */ |
1479 | ); |
1480 | #endif |
1481 | |
1482 | /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ |
1483 | |
1484 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( |
1485 | const char *string, /* Raw-Unicode-Escape encoded string */ |
1486 | Py_ssize_t length, /* size of string */ |
1487 | const char *errors /* error handling */ |
1488 | ); |
1489 | |
1490 | PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( |
1491 | PyObject *unicode /* Unicode object */ |
1492 | ); |
1493 | |
1494 | #ifndef Py_LIMITED_API |
1495 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape( |
1496 | const Py_UNICODE *data, /* Unicode char buffer */ |
1497 | Py_ssize_t length /* Number of Py_UNICODE chars to encode */ |
1498 | ); |
1499 | #endif |
1500 | |
1501 | /* --- Unicode Internal Codec --------------------------------------------- |
1502 | |
1503 | Only for internal use in _codecsmodule.c */ |
1504 | |
1505 | #ifndef Py_LIMITED_API |
1506 | PyObject *_PyUnicode_DecodeUnicodeInternal( |
1507 | const char *string, |
1508 | Py_ssize_t length, |
1509 | const char *errors |
1510 | ); |
1511 | #endif |
1512 | |
1513 | /* --- Latin-1 Codecs ----------------------------------------------------- |
1514 | |
1515 | Note: Latin-1 corresponds to the first 256 Unicode ordinals. |
1516 | |
1517 | */ |
1518 | |
1519 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( |
1520 | const char *string, /* Latin-1 encoded string */ |
1521 | Py_ssize_t length, /* size of string */ |
1522 | const char *errors /* error handling */ |
1523 | ); |
1524 | |
1525 | PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( |
1526 | PyObject *unicode /* Unicode object */ |
1527 | ); |
1528 | |
1529 | #ifndef Py_LIMITED_API |
1530 | PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String( |
1531 | PyObject* unicode, |
1532 | const char* errors); |
1533 | |
1534 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1( |
1535 | const Py_UNICODE *data, /* Unicode char buffer */ |
1536 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ |
1537 | const char *errors /* error handling */ |
1538 | ); |
1539 | #endif |
1540 | |
1541 | /* --- ASCII Codecs ------------------------------------------------------- |
1542 | |
1543 | Only 7-bit ASCII data is excepted. All other codes generate errors. |
1544 | |
1545 | */ |
1546 | |
1547 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( |
1548 | const char *string, /* ASCII encoded string */ |
1549 | Py_ssize_t length, /* size of string */ |
1550 | const char *errors /* error handling */ |
1551 | ); |
1552 | |
1553 | PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( |
1554 | PyObject *unicode /* Unicode object */ |
1555 | ); |
1556 | |
1557 | #ifndef Py_LIMITED_API |
1558 | PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString( |
1559 | PyObject* unicode, |
1560 | const char* errors); |
1561 | |
1562 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII( |
1563 | const Py_UNICODE *data, /* Unicode char buffer */ |
1564 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ |
1565 | const char *errors /* error handling */ |
1566 | ); |
1567 | #endif |
1568 | |
1569 | /* --- Character Map Codecs ----------------------------------------------- |
1570 | |
1571 | This codec uses mappings to encode and decode characters. |
1572 | |
1573 | Decoding mappings must map single string characters to single |
1574 | Unicode characters, integers (which are then interpreted as Unicode |
1575 | ordinals) or None (meaning "undefined mapping" and causing an |
1576 | error). |
1577 | |
1578 | Encoding mappings must map single Unicode characters to single |
1579 | string characters, integers (which are then interpreted as Latin-1 |
1580 | ordinals) or None (meaning "undefined mapping" and causing an |
1581 | error). |
1582 | |
1583 | If a character lookup fails with a LookupError, the character is |
1584 | copied as-is meaning that its ordinal value will be interpreted as |
1585 | Unicode or Latin-1 ordinal resp. Because of this mappings only need |
1586 | to contain those mappings which map characters to different code |
1587 | points. |
1588 | |
1589 | */ |
1590 | |
1591 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( |
1592 | const char *string, /* Encoded string */ |
1593 | Py_ssize_t length, /* size of string */ |
1594 | PyObject *mapping, /* character mapping |
1595 | (char ordinal -> unicode ordinal) */ |
1596 | const char *errors /* error handling */ |
1597 | ); |
1598 | |
1599 | PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( |
1600 | PyObject *unicode, /* Unicode object */ |
1601 | PyObject *mapping /* character mapping |
1602 | (unicode ordinal -> char ordinal) */ |
1603 | ); |
1604 | |
1605 | #ifndef Py_LIMITED_API |
1606 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( |
1607 | const Py_UNICODE *data, /* Unicode char buffer */ |
1608 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ |
1609 | PyObject *mapping, /* character mapping |
1610 | (unicode ordinal -> char ordinal) */ |
1611 | const char *errors /* error handling */ |
1612 | ); |
1613 | PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap( |
1614 | PyObject *unicode, /* Unicode object */ |
1615 | PyObject *mapping, /* character mapping |
1616 | (unicode ordinal -> char ordinal) */ |
1617 | const char *errors /* error handling */ |
1618 | ); |
1619 | #endif |
1620 | |
1621 | /* Translate a Py_UNICODE buffer of the given length by applying a |
1622 | character mapping table to it and return the resulting Unicode |
1623 | object. |
1624 | |
1625 | The mapping table must map Unicode ordinal integers to Unicode |
1626 | ordinal integers or None (causing deletion of the character). |
1627 | |
1628 | Mapping tables may be dictionaries or sequences. Unmapped character |
1629 | ordinals (ones which cause a LookupError) are left untouched and |
1630 | are copied as-is. |
1631 | |
1632 | */ |
1633 | |
1634 | #ifndef Py_LIMITED_API |
1635 | PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( |
1636 | const Py_UNICODE *data, /* Unicode char buffer */ |
1637 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ |
1638 | PyObject *table, /* Translate table */ |
1639 | const char *errors /* error handling */ |
1640 | ); |
1641 | #endif |
1642 | |
1643 | #ifdef HAVE_MBCS |
1644 | |
1645 | /* --- MBCS codecs for Windows -------------------------------------------- */ |
1646 | |
1647 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( |
1648 | const char *string, /* MBCS encoded string */ |
1649 | Py_ssize_t length, /* size of string */ |
1650 | const char *errors /* error handling */ |
1651 | ); |
1652 | |
1653 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( |
1654 | const char *string, /* MBCS encoded string */ |
1655 | Py_ssize_t length, /* size of string */ |
1656 | const char *errors, /* error handling */ |
1657 | Py_ssize_t *consumed /* bytes consumed */ |
1658 | ); |
1659 | |
1660 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful( |
1661 | int code_page, /* code page number */ |
1662 | const char *string, /* encoded string */ |
1663 | Py_ssize_t length, /* size of string */ |
1664 | const char *errors, /* error handling */ |
1665 | Py_ssize_t *consumed /* bytes consumed */ |
1666 | ); |
1667 | |
1668 | PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( |
1669 | PyObject *unicode /* Unicode object */ |
1670 | ); |
1671 | |
1672 | #ifndef Py_LIMITED_API |
1673 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( |
1674 | const Py_UNICODE *data, /* Unicode char buffer */ |
1675 | Py_ssize_t length, /* number of Py_UNICODE chars to encode */ |
1676 | const char *errors /* error handling */ |
1677 | ); |
1678 | #endif |
1679 | |
1680 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( |
1681 | int code_page, /* code page number */ |
1682 | PyObject *unicode, /* Unicode object */ |
1683 | const char *errors /* error handling */ |
1684 | ); |
1685 | |
1686 | #endif /* HAVE_MBCS */ |
1687 | |
1688 | /* --- Decimal Encoder ---------------------------------------------------- */ |
1689 | |
1690 | /* Takes a Unicode string holding a decimal value and writes it into |
1691 | an output buffer using standard ASCII digit codes. |
1692 | |
1693 | The output buffer has to provide at least length+1 bytes of storage |
1694 | area. The output string is 0-terminated. |
1695 | |
1696 | The encoder converts whitespace to ' ', decimal characters to their |
1697 | corresponding ASCII digit and all other Latin-1 characters except |
1698 | \0 as-is. Characters outside this range (Unicode ordinals 1-256) |
1699 | are treated as errors. This includes embedded NULL bytes. |
1700 | |
1701 | Error handling is defined by the errors argument: |
1702 | |
1703 | NULL or "strict": raise a ValueError |
1704 | "ignore": ignore the wrong characters (these are not copied to the |
1705 | output buffer) |
1706 | "replace": replaces illegal characters with '?' |
1707 | |
1708 | Returns 0 on success, -1 on failure. |
1709 | |
1710 | */ |
1711 | |
1712 | #ifndef Py_LIMITED_API |
1713 | PyAPI_FUNC(int) PyUnicode_EncodeDecimal( |
1714 | Py_UNICODE *s, /* Unicode buffer */ |
1715 | Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ |
1716 | char *output, /* Output buffer; must have size >= length */ |
1717 | const char *errors /* error handling */ |
1718 | ); |
1719 | #endif |
1720 | |
1721 | /* Transforms code points that have decimal digit property to the |
1722 | corresponding ASCII digit code points. |
1723 | |
1724 | Returns a new Unicode string on success, NULL on failure. |
1725 | */ |
1726 | |
1727 | #ifndef Py_LIMITED_API |
1728 | PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII( |
1729 | Py_UNICODE *s, /* Unicode buffer */ |
1730 | Py_ssize_t length /* Number of Py_UNICODE chars to transform */ |
1731 | ); |
1732 | #endif |
1733 | |
1734 | /* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyObject |
1735 | as argument instead of a raw buffer and length. This function additionally |
1736 | transforms spaces to ASCII because this is what the callers in longobject, |
1737 | floatobject, and complexobject did anyways. */ |
1738 | |
1739 | #ifndef Py_LIMITED_API |
1740 | PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( |
1741 | PyObject *unicode /* Unicode object */ |
1742 | ); |
1743 | #endif |
1744 | |
1745 | /* --- Locale encoding --------------------------------------------------- */ |
1746 | |
1747 | /* Decode a string from the current locale encoding. The decoder is strict if |
1748 | *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape' |
1749 | error handler (PEP 383) to escape undecodable bytes. If a byte sequence can |
1750 | be decoded as a surrogate character and *surrogateescape* is not equal to |
1751 | zero, the byte sequence is escaped using the 'surrogateescape' error handler |
1752 | instead of being decoded. *str* must end with a null character but cannot |
1753 | contain embedded null characters. */ |
1754 | |
1755 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize( |
1756 | const char *str, |
1757 | Py_ssize_t len, |
1758 | const char *errors); |
1759 | |
1760 | /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string |
1761 | length using strlen(). */ |
1762 | |
1763 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale( |
1764 | const char *str, |
1765 | const char *errors); |
1766 | |
1767 | /* Encode a Unicode object to the current locale encoding. The encoder is |
1768 | strict is *surrogateescape* is equal to zero, otherwise the |
1769 | "surrogateescape" error handler is used. Return a bytes object. The string |
1770 | cannot contain embedded null characters. */ |
1771 | |
1772 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( |
1773 | PyObject *unicode, |
1774 | const char *errors |
1775 | ); |
1776 | |
1777 | /* --- File system encoding ---------------------------------------------- */ |
1778 | |
1779 | /* ParseTuple converter: encode str objects to bytes using |
1780 | PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ |
1781 | |
1782 | PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); |
1783 | |
1784 | /* ParseTuple converter: decode bytes objects to unicode using |
1785 | PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ |
1786 | |
1787 | PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*); |
1788 | |
1789 | /* Decode a null-terminated string using Py_FileSystemDefaultEncoding |
1790 | and the "surrogateescape" error handler. |
1791 | |
1792 | If Py_FileSystemDefaultEncoding is not set, fall back to the locale |
1793 | encoding. |
1794 | |
1795 | Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known. |
1796 | */ |
1797 | |
1798 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( |
1799 | const char *s /* encoded string */ |
1800 | ); |
1801 | |
1802 | /* Decode a string using Py_FileSystemDefaultEncoding |
1803 | and the "surrogateescape" error handler. |
1804 | |
1805 | If Py_FileSystemDefaultEncoding is not set, fall back to the locale |
1806 | encoding. |
1807 | */ |
1808 | |
1809 | PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( |
1810 | const char *s, /* encoded string */ |
1811 | Py_ssize_t size /* size */ |
1812 | ); |
1813 | |
1814 | /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the |
1815 | "surrogateescape" error handler, and return bytes. |
1816 | |
1817 | If Py_FileSystemDefaultEncoding is not set, fall back to the locale |
1818 | encoding. |
1819 | */ |
1820 | |
1821 | PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault( |
1822 | PyObject *unicode |
1823 | ); |
1824 | |
1825 | /* --- Methods & Slots ---------------------------------------------------- |
1826 | |
1827 | These are capable of handling Unicode objects and strings on input |
1828 | (we refer to them as strings in the descriptions) and return |
1829 | Unicode objects or integers as appropriate. */ |
1830 | |
1831 | /* Concat two strings giving a new Unicode string. */ |
1832 | |
1833 | PyAPI_FUNC(PyObject*) PyUnicode_Concat( |
1834 | PyObject *left, /* Left string */ |
1835 | PyObject *right /* Right string */ |
1836 | ); |
1837 | |
1838 | /* Concat two strings and put the result in *pleft |
1839 | (sets *pleft to NULL on error) */ |
1840 | |
1841 | PyAPI_FUNC(void) PyUnicode_Append( |
1842 | PyObject **pleft, /* Pointer to left string */ |
1843 | PyObject *right /* Right string */ |
1844 | ); |
1845 | |
1846 | /* Concat two strings, put the result in *pleft and drop the right object |
1847 | (sets *pleft to NULL on error) */ |
1848 | |
1849 | PyAPI_FUNC(void) PyUnicode_AppendAndDel( |
1850 | PyObject **pleft, /* Pointer to left string */ |
1851 | PyObject *right /* Right string */ |
1852 | ); |
1853 | |
1854 | /* Split a string giving a list of Unicode strings. |
1855 | |
1856 | If sep is NULL, splitting will be done at all whitespace |
1857 | substrings. Otherwise, splits occur at the given separator. |
1858 | |
1859 | At most maxsplit splits will be done. If negative, no limit is set. |
1860 | |
1861 | Separators are not included in the resulting list. |
1862 | |
1863 | */ |
1864 | |
1865 | PyAPI_FUNC(PyObject*) PyUnicode_Split( |
1866 | PyObject *s, /* String to split */ |
1867 | PyObject *sep, /* String separator */ |
1868 | Py_ssize_t maxsplit /* Maxsplit count */ |
1869 | ); |
1870 | |
1871 | /* Dito, but split at line breaks. |
1872 | |
1873 | CRLF is considered to be one line break. Line breaks are not |
1874 | included in the resulting list. */ |
1875 | |
1876 | PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( |
1877 | PyObject *s, /* String to split */ |
1878 | int keepends /* If true, line end markers are included */ |
1879 | ); |
1880 | |
1881 | /* Partition a string using a given separator. */ |
1882 | |
1883 | PyAPI_FUNC(PyObject*) PyUnicode_Partition( |
1884 | PyObject *s, /* String to partition */ |
1885 | PyObject *sep /* String separator */ |
1886 | ); |
1887 | |
1888 | /* Partition a string using a given separator, searching from the end of the |
1889 | string. */ |
1890 | |
1891 | PyAPI_FUNC(PyObject*) PyUnicode_RPartition( |
1892 | PyObject *s, /* String to partition */ |
1893 | PyObject *sep /* String separator */ |
1894 | ); |
1895 | |
1896 | /* Split a string giving a list of Unicode strings. |
1897 | |
1898 | If sep is NULL, splitting will be done at all whitespace |
1899 | substrings. Otherwise, splits occur at the given separator. |
1900 | |
1901 | At most maxsplit splits will be done. But unlike PyUnicode_Split |
1902 | PyUnicode_RSplit splits from the end of the string. If negative, |
1903 | no limit is set. |
1904 | |
1905 | Separators are not included in the resulting list. |
1906 | |
1907 | */ |
1908 | |
1909 | PyAPI_FUNC(PyObject*) PyUnicode_RSplit( |
1910 | PyObject *s, /* String to split */ |
1911 | PyObject *sep, /* String separator */ |
1912 | Py_ssize_t maxsplit /* Maxsplit count */ |
1913 | ); |
1914 | |
1915 | /* Translate a string by applying a character mapping table to it and |
1916 | return the resulting Unicode object. |
1917 | |
1918 | The mapping table must map Unicode ordinal integers to Unicode |
1919 | ordinal integers or None (causing deletion of the character). |
1920 | |
1921 | Mapping tables may be dictionaries or sequences. Unmapped character |
1922 | ordinals (ones which cause a LookupError) are left untouched and |
1923 | are copied as-is. |
1924 | |
1925 | */ |
1926 | |
1927 | PyAPI_FUNC(PyObject *) PyUnicode_Translate( |
1928 | PyObject *str, /* String */ |
1929 | PyObject *table, /* Translate table */ |
1930 | const char *errors /* error handling */ |
1931 | ); |
1932 | |
1933 | /* Join a sequence of strings using the given separator and return |
1934 | the resulting Unicode string. */ |
1935 | |
1936 | PyAPI_FUNC(PyObject*) PyUnicode_Join( |
1937 | PyObject *separator, /* Separator string */ |
1938 | PyObject *seq /* Sequence object */ |
1939 | ); |
1940 | |
1941 | /* Return 1 if substr matches str[start:end] at the given tail end, 0 |
1942 | otherwise. */ |
1943 | |
1944 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( |
1945 | PyObject *str, /* String */ |
1946 | PyObject *substr, /* Prefix or Suffix string */ |
1947 | Py_ssize_t start, /* Start index */ |
1948 | Py_ssize_t end, /* Stop index */ |
1949 | int direction /* Tail end: -1 prefix, +1 suffix */ |
1950 | ); |
1951 | |
1952 | /* Return the first position of substr in str[start:end] using the |
1953 | given search direction or -1 if not found. -2 is returned in case |
1954 | an error occurred and an exception is set. */ |
1955 | |
1956 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( |
1957 | PyObject *str, /* String */ |
1958 | PyObject *substr, /* Substring to find */ |
1959 | Py_ssize_t start, /* Start index */ |
1960 | Py_ssize_t end, /* Stop index */ |
1961 | int direction /* Find direction: +1 forward, -1 backward */ |
1962 | ); |
1963 | |
1964 | /* Like PyUnicode_Find, but search for single character only. */ |
1965 | PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar( |
1966 | PyObject *str, |
1967 | Py_UCS4 ch, |
1968 | Py_ssize_t start, |
1969 | Py_ssize_t end, |
1970 | int direction |
1971 | ); |
1972 | |
1973 | /* Count the number of occurrences of substr in str[start:end]. */ |
1974 | |
1975 | PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( |
1976 | PyObject *str, /* String */ |
1977 | PyObject *substr, /* Substring to count */ |
1978 | Py_ssize_t start, /* Start index */ |
1979 | Py_ssize_t end /* Stop index */ |
1980 | ); |
1981 | |
1982 | /* Replace at most maxcount occurrences of substr in str with replstr |
1983 | and return the resulting Unicode object. */ |
1984 | |
1985 | PyAPI_FUNC(PyObject *) PyUnicode_Replace( |
1986 | PyObject *str, /* String */ |
1987 | PyObject *substr, /* Substring to find */ |
1988 | PyObject *replstr, /* Substring to replace */ |
1989 | Py_ssize_t maxcount /* Max. number of replacements to apply; |
1990 | -1 = all */ |
1991 | ); |
1992 | |
1993 | /* Compare two strings and return -1, 0, 1 for less than, equal, |
1994 | greater than resp. |
1995 | Raise an exception and return -1 on error. */ |
1996 | |
1997 | PyAPI_FUNC(int) PyUnicode_Compare( |
1998 | PyObject *left, /* Left string */ |
1999 | PyObject *right /* Right string */ |
2000 | ); |
2001 | |
2002 | #ifndef Py_LIMITED_API |
2003 | PyAPI_FUNC(int) _PyUnicode_CompareWithId( |
2004 | PyObject *left, /* Left string */ |
2005 | _Py_Identifier *right /* Right identifier */ |
2006 | ); |
2007 | #endif |
2008 | |
2009 | PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( |
2010 | PyObject *left, |
2011 | const char *right /* ASCII-encoded string */ |
2012 | ); |
2013 | |
2014 | /* Rich compare two strings and return one of the following: |
2015 | |
2016 | - NULL in case an exception was raised |
2017 | - Py_True or Py_False for successfully comparisons |
2018 | - Py_NotImplemented in case the type combination is unknown |
2019 | |
2020 | Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in |
2021 | case the conversion of the arguments to Unicode fails with a |
2022 | UnicodeDecodeError. |
2023 | |
2024 | Possible values for op: |
2025 | |
2026 | Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE |
2027 | |
2028 | */ |
2029 | |
2030 | PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( |
2031 | PyObject *left, /* Left string */ |
2032 | PyObject *right, /* Right string */ |
2033 | int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ |
2034 | ); |
2035 | |
2036 | /* Apply an argument tuple or dictionary to a format string and return |
2037 | the resulting Unicode string. */ |
2038 | |
2039 | PyAPI_FUNC(PyObject *) PyUnicode_Format( |
2040 | PyObject *format, /* Format string */ |
2041 | PyObject *args /* Argument tuple or dictionary */ |
2042 | ); |
2043 | |
2044 | /* Checks whether element is contained in container and return 1/0 |
2045 | accordingly. |
2046 | |
2047 | element has to coerce to a one element Unicode string. -1 is |
2048 | returned in case of an error. */ |
2049 | |
2050 | PyAPI_FUNC(int) PyUnicode_Contains( |
2051 | PyObject *container, /* Container string */ |
2052 | PyObject *element /* Element string */ |
2053 | ); |
2054 | |
2055 | /* Checks whether argument is a valid identifier. */ |
2056 | |
2057 | PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s); |
2058 | |
2059 | #ifndef Py_LIMITED_API |
2060 | /* Externally visible for str.strip(unicode) */ |
2061 | PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( |
2062 | PyObject *self, |
2063 | int striptype, |
2064 | PyObject *sepobj |
2065 | ); |
2066 | #endif |
2067 | |
2068 | /* Using explicit passed-in values, insert the thousands grouping |
2069 | into the string pointed to by buffer. For the argument descriptions, |
2070 | see Objects/stringlib/localeutil.h */ |
2071 | #ifndef Py_LIMITED_API |
2072 | PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( |
2073 | PyObject *unicode, |
2074 | Py_ssize_t index, |
2075 | Py_ssize_t n_buffer, |
2076 | void *digits, |
2077 | Py_ssize_t n_digits, |
2078 | Py_ssize_t min_width, |
2079 | const char *grouping, |
2080 | PyObject *thousands_sep, |
2081 | Py_UCS4 *maxchar); |
2082 | #endif |
2083 | /* === Characters Type APIs =============================================== */ |
2084 | |
2085 | /* Helper array used by Py_UNICODE_ISSPACE(). */ |
2086 | |
2087 | #ifndef Py_LIMITED_API |
2088 | PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; |
2089 | |
2090 | /* These should not be used directly. Use the Py_UNICODE_IS* and |
2091 | Py_UNICODE_TO* macros instead. |
2092 | |
2093 | These APIs are implemented in Objects/unicodectype.c. |
2094 | |
2095 | */ |
2096 | |
2097 | PyAPI_FUNC(int) _PyUnicode_IsLowercase( |
2098 | Py_UCS4 ch /* Unicode character */ |
2099 | ); |
2100 | |
2101 | PyAPI_FUNC(int) _PyUnicode_IsUppercase( |
2102 | Py_UCS4 ch /* Unicode character */ |
2103 | ); |
2104 | |
2105 | PyAPI_FUNC(int) _PyUnicode_IsTitlecase( |
2106 | Py_UCS4 ch /* Unicode character */ |
2107 | ); |
2108 | |
2109 | PyAPI_FUNC(int) _PyUnicode_IsXidStart( |
2110 | Py_UCS4 ch /* Unicode character */ |
2111 | ); |
2112 | |
2113 | PyAPI_FUNC(int) _PyUnicode_IsXidContinue( |
2114 | Py_UCS4 ch /* Unicode character */ |
2115 | ); |
2116 | |
2117 | PyAPI_FUNC(int) _PyUnicode_IsWhitespace( |
2118 | const Py_UCS4 ch /* Unicode character */ |
2119 | ); |
2120 | |
2121 | PyAPI_FUNC(int) _PyUnicode_IsLinebreak( |
2122 | const Py_UCS4 ch /* Unicode character */ |
2123 | ); |
2124 | |
2125 | PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( |
2126 | Py_UCS4 ch /* Unicode character */ |
2127 | ); |
2128 | |
2129 | PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( |
2130 | Py_UCS4 ch /* Unicode character */ |
2131 | ); |
2132 | |
2133 | PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( |
2134 | Py_UCS4 ch /* Unicode character */ |
2135 | ); |
2136 | |
2137 | PyAPI_FUNC(int) _PyUnicode_ToLowerFull( |
2138 | Py_UCS4 ch, /* Unicode character */ |
2139 | Py_UCS4 *res |
2140 | ); |
2141 | |
2142 | PyAPI_FUNC(int) _PyUnicode_ToTitleFull( |
2143 | Py_UCS4 ch, /* Unicode character */ |
2144 | Py_UCS4 *res |
2145 | ); |
2146 | |
2147 | PyAPI_FUNC(int) _PyUnicode_ToUpperFull( |
2148 | Py_UCS4 ch, /* Unicode character */ |
2149 | Py_UCS4 *res |
2150 | ); |
2151 | |
2152 | PyAPI_FUNC(int) _PyUnicode_ToFoldedFull( |
2153 | Py_UCS4 ch, /* Unicode character */ |
2154 | Py_UCS4 *res |
2155 | ); |
2156 | |
2157 | PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable( |
2158 | Py_UCS4 ch /* Unicode character */ |
2159 | ); |
2160 | |
2161 | PyAPI_FUNC(int) _PyUnicode_IsCased( |
2162 | Py_UCS4 ch /* Unicode character */ |
2163 | ); |
2164 | |
2165 | PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( |
2166 | Py_UCS4 ch /* Unicode character */ |
2167 | ); |
2168 | |
2169 | PyAPI_FUNC(int) _PyUnicode_ToDigit( |
2170 | Py_UCS4 ch /* Unicode character */ |
2171 | ); |
2172 | |
2173 | PyAPI_FUNC(double) _PyUnicode_ToNumeric( |
2174 | Py_UCS4 ch /* Unicode character */ |
2175 | ); |
2176 | |
2177 | PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( |
2178 | Py_UCS4 ch /* Unicode character */ |
2179 | ); |
2180 | |
2181 | PyAPI_FUNC(int) _PyUnicode_IsDigit( |
2182 | Py_UCS4 ch /* Unicode character */ |
2183 | ); |
2184 | |
2185 | PyAPI_FUNC(int) _PyUnicode_IsNumeric( |
2186 | Py_UCS4 ch /* Unicode character */ |
2187 | ); |
2188 | |
2189 | PyAPI_FUNC(int) _PyUnicode_IsPrintable( |
2190 | Py_UCS4 ch /* Unicode character */ |
2191 | ); |
2192 | |
2193 | PyAPI_FUNC(int) _PyUnicode_IsAlpha( |
2194 | Py_UCS4 ch /* Unicode character */ |
2195 | ); |
2196 | |
2197 | PyAPI_FUNC(size_t) Py_UNICODE_strlen( |
2198 | const Py_UNICODE *u |
2199 | ); |
2200 | |
2201 | PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy( |
2202 | Py_UNICODE *s1, |
2203 | const Py_UNICODE *s2); |
2204 | |
2205 | PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat( |
2206 | Py_UNICODE *s1, const Py_UNICODE *s2); |
2207 | |
2208 | PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy( |
2209 | Py_UNICODE *s1, |
2210 | const Py_UNICODE *s2, |
2211 | size_t n); |
2212 | |
2213 | PyAPI_FUNC(int) Py_UNICODE_strcmp( |
2214 | const Py_UNICODE *s1, |
2215 | const Py_UNICODE *s2 |
2216 | ); |
2217 | |
2218 | PyAPI_FUNC(int) Py_UNICODE_strncmp( |
2219 | const Py_UNICODE *s1, |
2220 | const Py_UNICODE *s2, |
2221 | size_t n |
2222 | ); |
2223 | |
2224 | PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr( |
2225 | const Py_UNICODE *s, |
2226 | Py_UNICODE c |
2227 | ); |
2228 | |
2229 | PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr( |
2230 | const Py_UNICODE *s, |
2231 | Py_UNICODE c |
2232 | ); |
2233 | |
2234 | PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int); |
2235 | |
2236 | /* Create a copy of a unicode string ending with a nul character. Return NULL |
2237 | and raise a MemoryError exception on memory allocation failure, otherwise |
2238 | return a new allocated buffer (use PyMem_Free() to free the buffer). */ |
2239 | |
2240 | PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy( |
2241 | PyObject *unicode |
2242 | ); |
2243 | #endif /* Py_LIMITED_API */ |
2244 | |
2245 | #if defined(Py_DEBUG) && !defined(Py_LIMITED_API) |
2246 | PyAPI_FUNC(int) _PyUnicode_CheckConsistency( |
2247 | PyObject *op, |
2248 | int check_content); |
2249 | #endif |
2250 | |
2251 | /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/ |
2252 | PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); |
2253 | /* Clear all static strings. */ |
2254 | PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void); |
2255 | |
2256 | #ifdef __cplusplus |
2257 | } |
2258 | #endif |
2259 | #endif /* !Py_UNICODEOBJECT_H */ |
2260 | |