unicodeobject.h source code [include/python3.5m/unicodeobject.h]

1	#ifndef Py_UNICODEOBJECT_H
2	#define Py_UNICODEOBJECT_H
3
4	#include <stdarg.h>
5
6	/*
7
8	Unicode implementation based on original code by Fredrik Lundh,
9	modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
10	Unicode Integration Proposal. (See
11	http://www.egenix.com/files/python/unicode-proposal.txt).
12
13	Copyright (c) Corporation for National Research Initiatives.
14
15
16	Original header:
17	--------------------------------------------------------------------
18
19	* Yet another Unicode string type for Python. This type supports the
20	* 16-bit Basic Multilingual Plane (BMP) only.
21	*
22	* Written by Fredrik Lundh, January 1999.
23	*
24	* Copyright (c) 1999 by Secret Labs AB.
25	* Copyright (c) 1999 by Fredrik Lundh.
26	*
27	* fredrik@pythonware.com
28	* http://www.pythonware.com
29	*
30	* --------------------------------------------------------------------
31	* This Unicode String Type is
32	*
33	* Copyright (c) 1999 by Secret Labs AB
34	* Copyright (c) 1999 by Fredrik Lundh
35	*
36	* By obtaining, using, and/or copying this software and/or its
37	* associated documentation, you agree that you have read, understood,
38	* and will comply with the following terms and conditions:
39	*
40	* Permission to use, copy, modify, and distribute this software and its
41	* associated documentation for any purpose and without fee is hereby
42	* granted, provided that the above copyright notice appears in all
43	* copies, and that both that copyright notice and this permission notice
44	* appear in supporting documentation, and that the name of Secret Labs
45	* AB or the author not be used in advertising or publicity pertaining to
46	* distribution of the software without specific, written prior
47	* permission.
48	*
49	* SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
50	* THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
51	* FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
52	* ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
53	* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
54	* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
55	* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
56	* -------------------------------------------------------------------- */
57
58	#include <ctype.h>
59
60	/ === Internal API ======================================================= /
61
62	/ --- Internal Unicode Format -------------------------------------------- /
63
64	/ Python 3.x requires unicode /
65	#define Py_USING_UNICODE
66
67	#ifndef SIZEOF_WCHAR_T
68	#error Must define SIZEOF_WCHAR_T
69	#endif
70
71	#define Py_UNICODE_SIZE SIZEOF_WCHAR_T
72
73	/ If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.*
74	Otherwise, Unicode strings are stored as UCS-2 (with limited support
75	for UTF-16) /*
76
77	#if Py_UNICODE_SIZE >= 4
78	#define Py_UNICODE_WIDE
79	#endif
80
81	/ Set these flags if the platform has "wchar.h" and the*
82	wchar_t type is a 16-bit unsigned type /*
83	/ #define HAVE_WCHAR_H /
84	/ #define HAVE_USABLE_WCHAR_T /
85
86	/ Py_UNICODE was the native Unicode storage format (code unit) used by*
87	Python and represents a single Unicode element in the Unicode type.
88	With PEP 393, Py_UNICODE is deprecated and replaced with a
89	typedef to wchar_t. /*
90
91	#ifndef Py_LIMITED_API
92	#define PY_UNICODE_TYPE wchar_t
93	typedef wchar_t Py_UNICODE;
94	#endif
95
96	/ If the compiler provides a wchar_t type we try to support it*
97	through the interface functions PyUnicode_FromWideChar(),
98	PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). /*
99
100	#ifdef HAVE_USABLE_WCHAR_T
101	# ifndef HAVE_WCHAR_H
102	# define HAVE_WCHAR_H
103	# endif
104	#endif
105
106	#if defined(MS_WINDOWS)
107	# define HAVE_MBCS
108	#endif
109
110	#ifdef HAVE_WCHAR_H
111	/ Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters /
112	# ifdef _HAVE_BSDI
113	# include <time.h>
114	# endif
115	# include <wchar.h>
116	#endif
117
118	/ Py_UCS4 and Py_UCS2 are typedefs for the respective*
119	unicode representations. /*
120	#if SIZEOF_INT == 4
121	typedef unsigned int Py_UCS4;
122	#elif SIZEOF_LONG == 4
123	typedef unsigned long Py_UCS4;
124	#else
125	#error "Could not find a proper typedef for Py_UCS4"
126	#endif
127
128	#if SIZEOF_SHORT == 2
129	typedef unsigned short Py_UCS2;
130	#else
131	#error "Could not find a proper typedef for Py_UCS2"
132	#endif
133
134	typedef unsigned char Py_UCS1;
135
136	/ --- Internal Unicode Operations ---------------------------------------- /
137
138	/ Since splitting on whitespace is an important use case, and*
139	whitespace in most situations is solely ASCII whitespace, we
140	optimize for the common case by using a quick look-up table
141	_Py_ascii_whitespace (see below) with an inlined check.
142
143	*/
144	#ifndef Py_LIMITED_API
145	#define Py_UNICODE_ISSPACE(ch) \
146	((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
147
148	#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
149	#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
150	#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
151	#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
152
153	#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
154	#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
155	#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
156
157	#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
158	#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
159	#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
160	#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
161
162	#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
163	#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
164	#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
165
166	#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
167
168	#define Py_UNICODE_ISALNUM(ch) \
169	(Py_UNICODE_ISALPHA(ch) \|\| \
170	Py_UNICODE_ISDECIMAL(ch) \|\| \
171	Py_UNICODE_ISDIGIT(ch) \|\| \
172	Py_UNICODE_ISNUMERIC(ch))
173
174	#define Py_UNICODE_COPY(target, source, length) \
175	Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
176
177	#define Py_UNICODE_FILL(target, value, length) \
178	do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
179	for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
180	} while (0)
181
182	/ macros to work with surrogates /
183	#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
184	#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
185	#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
186	/ Join two surrogate characters and return a single Py_UCS4 value. /
187	#define Py_UNICODE_JOIN_SURROGATES(high, low) \
188	(((((Py_UCS4)(high) & 0x03FF) << 10) \| \
189	((Py_UCS4)(low) & 0x03FF)) + 0x10000)
190	/ high surrogate = top 10 bits added to D800 /
191	#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
192	/ low surrogate = bottom 10 bits added to DC00 /
193	#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
194
195	/ Check if substring matches at given offset. The offset must be*
196	valid, and the substring must not be empty. /*
197
198	#define Py_UNICODE_MATCH(string, offset, substring) \
199	((((string)->wstr + (offset)) == ((substring)->wstr)) && \
200	((((string)->wstr + (offset) + (substring)->wstr_length-1) == ((substring)->wstr + (substring)->wstr_length-1))) && \
201	!memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
202
203	#endif /* Py_LIMITED_API */
204
205	#ifdef __cplusplus
206	extern "C" {
207	#endif
208
209	/ --- Unicode Type ------------------------------------------------------- /
210
211	#ifndef Py_LIMITED_API
212
213	/ ASCII-only strings created through PyUnicode_New use the PyASCIIObject*
214	structure. state.ascii and state.compact are set, and the data
215	immediately follow the structure. utf8_length and wstr_length can be found
216	in the length field; the utf8 pointer is equal to the data pointer. /*
217	typedef struct {
218	/ There are 4 forms of Unicode strings:*
219
220	- compact ascii:
221
222	* structure = PyASCIIObject
223	* test: PyUnicode_IS_COMPACT_ASCII(op)
224	* kind = PyUnicode_1BYTE_KIND
225	* compact = 1
226	* ascii = 1
227	* ready = 1
228	* (length is the length of the utf8 and wstr strings)
229	* (data starts just after the structure)
230	* (since ASCII is decoded from UTF-8, the utf8 string are the data)
231
232	- compact:
233
234	* structure = PyCompactUnicodeObject
235	* test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
236	* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
237	PyUnicode_4BYTE_KIND
238	* compact = 1
239	* ready = 1
240	* ascii = 0
241	* utf8 is not shared with data
242	* utf8_length = 0 if utf8 is NULL
243	* wstr is shared with data and wstr_length=length
244	if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
245	or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
246	* wstr_length = 0 if wstr is NULL
247	* (data starts just after the structure)
248
249	- legacy string, not ready:
250
251	* structure = PyUnicodeObject
252	* test: kind == PyUnicode_WCHAR_KIND
253	* length = 0 (use wstr_length)
254	* hash = -1
255	* kind = PyUnicode_WCHAR_KIND
256	* compact = 0
257	* ascii = 0
258	* ready = 0
259	* interned = SSTATE_NOT_INTERNED
260	* wstr is not NULL
261	* data.any is NULL
262	* utf8 is NULL
263	* utf8_length = 0
264
265	- legacy string, ready:
266
267	* structure = PyUnicodeObject structure
268	* test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
269	* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
270	PyUnicode_4BYTE_KIND
271	* compact = 0
272	* ready = 1
273	* data.any is not NULL
274	* utf8 is shared and utf8_length = length with data.any if ascii = 1
275	* utf8_length = 0 if utf8 is NULL
276	* wstr is shared with data.any and wstr_length = length
277	if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
278	or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
279	* wstr_length = 0 if wstr is NULL
280
281	Compact strings use only one memory block (structure + characters),
282	whereas legacy strings use one block for the structure and one block
283	for characters.
284
285	Legacy strings are created by PyUnicode_FromUnicode() and
286	PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
287	when PyUnicode_READY() is called.
288
289	See also _PyUnicode_CheckConsistency().
290	*/
291	PyObject_HEAD
292	Py_ssize_t length; / Number of code points in the string /
293	Py_hash_t hash; / Hash value; -1 if not set /
294	struct {
295	/*
296	SSTATE_NOT_INTERNED (0)
297	SSTATE_INTERNED_MORTAL (1)
298	SSTATE_INTERNED_IMMORTAL (2)
299
300	If interned != SSTATE_NOT_INTERNED, the two references from the
301	dictionary to this object are not* counted in ob_refcnt.*
302	*/
303	unsigned int interned:`2`;
304	/ Character size:*
305
306	- PyUnicode_WCHAR_KIND (0):
307
308	* character type = wchar_t (16 or 32 bits, depending on the
309	platform)
310
311	- PyUnicode_1BYTE_KIND (1):
312
313	* character type = Py_UCS1 (8 bits, unsigned)
314	* all characters are in the range U+0000-U+00FF (latin1)
315	* if ascii is set, all characters are in the range U+0000-U+007F
316	(ASCII), otherwise at least one character is in the range
317	U+0080-U+00FF
318
319	- PyUnicode_2BYTE_KIND (2):
320
321	* character type = Py_UCS2 (16 bits, unsigned)
322	* all characters are in the range U+0000-U+FFFF (BMP)
323	* at least one character is in the range U+0100-U+FFFF
324
325	- PyUnicode_4BYTE_KIND (4):
326
327	* character type = Py_UCS4 (32 bits, unsigned)
328	* all characters are in the range U+0000-U+10FFFF
329	* at least one character is in the range U+10000-U+10FFFF
330	*/
331	unsigned int kind:`3`;
332	/ Compact is with respect to the allocation scheme. Compact unicode*
333	objects only require one memory block while non-compact objects use
334	one block for the PyUnicodeObject struct and another for its data
335	buffer. /*
336	unsigned int compact:`1`;
337	/ The string only contains characters in the range U+0000-U+007F (ASCII)*
338	and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
339	set, use the PyASCIIObject structure. /*
340	unsigned int ascii:`1`;
341	/ The ready flag indicates whether the object layout is initialized*
342	completely. This means that this is either a compact object, or
343	the data pointer is filled out. The bit is redundant, and helps
344	to minimize the test in PyUnicode_IS_READY(). /*
345	unsigned int ready:`1`;
346	/ Padding to ensure that PyUnicode_DATA() is always aligned to*
347	4 bytes (see issue #19537 on m68k). /*
348	unsigned int :`24`;
349	} state;
350	wchar_t wstr; /* wchar_t representation (null-terminated) /
351	} PyASCIIObject;
352
353	/ Non-ASCII strings allocated through PyUnicode_New use the*
354	PyCompactUnicodeObject structure. state.compact is set, and the data
355	immediately follow the structure. /*
356	typedef struct {
357	PyASCIIObject _base;
358	Py_ssize_t utf8_length; / Number of bytes in utf8, excluding the*
359	* terminating \0. */
360	char utf8; /* UTF-8 representation (null-terminated) /
361	Py_ssize_t wstr_length; / Number of code points in wstr, possible*
362	* surrogates count as two code points. */
363	} PyCompactUnicodeObject;
364
365	/ Strings allocated through PyUnicode_FromUnicode(NULL, len) use the*
366	PyUnicodeObject structure. The actual string data is initially in the wstr
367	block, and copied into the data block using _PyUnicode_Ready. /*
368	typedef struct {
369	PyCompactUnicodeObject _base;
370	union {
371	void *any;
372	Py_UCS1 *latin1;
373	Py_UCS2 *ucs2;
374	Py_UCS4 *ucs4;
375	} data; / Canonical, smallest-form Unicode buffer /
376	} PyUnicodeObject;
377	#endif
378
379	PyAPI_DATA(PyTypeObject) PyUnicode_Type;
380	PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
381
382	#define PyUnicode_Check(op) \
383	PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
384	#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
385
386	/ Fast access macros /
387	#ifndef Py_LIMITED_API
388
389	#define PyUnicode_WSTR_LENGTH(op) \
390	(PyUnicode_IS_COMPACT_ASCII(op) ? \
391	((PyASCIIObject*)op)->length : \
392	((PyCompactUnicodeObject*)op)->wstr_length)
393
394	/ Returns the deprecated Py_UNICODE representation's size in code units*
395	(this includes surrogate pairs as 2 units).
396	If the Py_UNICODE representation is not available, it will be computed
397	on request. Use PyUnicode_GET_LENGTH() for the length in code points. /*
398
399	#define PyUnicode_GET_SIZE(op) \
400	(assert(PyUnicode_Check(op)), \
401	(((PyASCIIObject *)(op))->wstr) ? \
402	PyUnicode_WSTR_LENGTH(op) : \
403	((void)PyUnicode_AsUnicode((PyObject *)(op)), \
404	assert(((PyASCIIObject *)(op))->wstr), \
405	PyUnicode_WSTR_LENGTH(op)))
406
407	#define PyUnicode_GET_DATA_SIZE(op) \
408	(PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
409
410	/ Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE*
411	representation on demand. Using this macro is very inefficient now,
412	try to port your code to use the new PyUnicode_BYTE_DATA() macros or*
413	use PyUnicode_WRITE() and PyUnicode_READ(). /*
414
415	#define PyUnicode_AS_UNICODE(op) \
416	(assert(PyUnicode_Check(op)), \
417	(((PyASCIIObject )(op))->wstr) ? (((PyASCIIObject )(op))->wstr) : \
418	PyUnicode_AsUnicode((PyObject *)(op)))
419
420	#define PyUnicode_AS_DATA(op) \
421	((const char *)(PyUnicode_AS_UNICODE(op)))
422
423
424	/ --- Flexible String Representation Helper Macros (PEP 393) -------------- /
425
426	/ Values for PyASCIIObject.state: /
427
428	/ Interning state. /
429	#define SSTATE_NOT_INTERNED 0
430	#define SSTATE_INTERNED_MORTAL 1
431	#define SSTATE_INTERNED_IMMORTAL 2
432
433	/ Return true if the string contains only ASCII characters, or 0 if not. The*
434	string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
435	ready. /*
436	#define PyUnicode_IS_ASCII(op) \
437	(assert(PyUnicode_Check(op)), \
438	assert(PyUnicode_IS_READY(op)), \
439	((PyASCIIObject*)op)->state.ascii)
440
441	/ Return true if the string is compact or 0 if not.*
442	No type checks or Ready calls are performed. /*
443	#define PyUnicode_IS_COMPACT(op) \
444	(((PyASCIIObject*)(op))->state.compact)
445
446	/ Return true if the string is a compact ASCII string (use PyASCIIObject*
447	structure), or 0 if not. No type checks or Ready calls are performed. /*
448	#define PyUnicode_IS_COMPACT_ASCII(op) \
449	(((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
450
451	enum PyUnicode_Kind {
452	/ String contains only wstr byte characters. This is only possible*
453	when the string was created with a legacy API and _PyUnicode_Ready()
454	has not been called yet. /*
455	PyUnicode_WCHAR_KIND = `0`,
456	/ Return values of the PyUnicode_KIND() macro: /
457	PyUnicode_1BYTE_KIND = `1`,
458	PyUnicode_2BYTE_KIND = `2`,
459	PyUnicode_4BYTE_KIND = `4`
460	};
461
462	/ Return pointers to the canonical representation cast to unsigned char,*
463	Py_UCS2, or Py_UCS4 for direct character access.
464	No checks are performed, use PyUnicode_KIND() before to ensure
465	these will work correctly. /*
466
467	#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
468	#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
469	#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
470
471	/ Return one of the PyUnicode__KIND values defined above. /*
472	#define PyUnicode_KIND(op) \
473	(assert(PyUnicode_Check(op)), \
474	assert(PyUnicode_IS_READY(op)), \
475	((PyASCIIObject *)(op))->state.kind)
476
477	/ Return a void pointer to the raw unicode buffer. /
478	#define _PyUnicode_COMPACT_DATA(op) \
479	(PyUnicode_IS_ASCII(op) ? \
480	((void)((PyASCIIObject)(op) + 1)) : \
481	((void)((PyCompactUnicodeObject)(op) + 1)))
482
483	#define _PyUnicode_NONCOMPACT_DATA(op) \
484	(assert(((PyUnicodeObject*)(op))->data.any), \
485	((((PyUnicodeObject *)(op))->data.any)))
486
487	#define PyUnicode_DATA(op) \
488	(assert(PyUnicode_Check(op)), \
489	PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
490	_PyUnicode_NONCOMPACT_DATA(op))
491
492	/ In the access macros below, "kind" may be evaluated more than once.*
493	All other macro parameters are evaluated exactly once, so it is safe
494	to put side effects into them (such as increasing the index). /*
495
496	/ Write into the canonical representation, this macro does not do any sanity*
497	checks and is intended for usage in loops. The caller should cache the
498	kind and data pointers obtained from other macro calls.
499	index is the index in the string (starts at 0) and value is the new
500	code point value which should be written to that location. /*
501	#define PyUnicode_WRITE(kind, data, index, value) \
502	do { \
503	switch ((kind)) { \
504	case PyUnicode_1BYTE_KIND: { \
505	((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
506	break; \
507	} \
508	case PyUnicode_2BYTE_KIND: { \
509	((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
510	break; \
511	} \
512	default: { \
513	assert((kind) == PyUnicode_4BYTE_KIND); \
514	((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
515	} \
516	} \
517	} while (0)
518
519	/ Read a code point from the string's canonical representation. No checks*
520	or ready calls are performed. /*
521	#define PyUnicode_READ(kind, data, index) \
522	((Py_UCS4) \
523	((kind) == PyUnicode_1BYTE_KIND ? \
524	((const Py_UCS1 *)(data))[(index)] : \
525	((kind) == PyUnicode_2BYTE_KIND ? \
526	((const Py_UCS2 *)(data))[(index)] : \
527	((const Py_UCS4 *)(data))[(index)] \
528	) \
529	))
530
531	/ PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it*
532	calls PyUnicode_KIND() and might call it twice. For single reads, use
533	PyUnicode_READ_CHAR, for multiple consecutive reads callers should
534	cache kind and use PyUnicode_READ instead. /*
535	#define PyUnicode_READ_CHAR(unicode, index) \
536	(assert(PyUnicode_Check(unicode)), \
537	assert(PyUnicode_IS_READY(unicode)), \
538	(Py_UCS4) \
539	(PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
540	((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
541	(PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
542	((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
543	((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
544	) \
545	))
546
547	/ Returns the length of the unicode string. The caller has to make sure that*
548	the string has it's canonical representation set before calling
549	this macro. Call PyUnicode_(FAST_)Ready to ensure that. /*
550	#define PyUnicode_GET_LENGTH(op) \
551	(assert(PyUnicode_Check(op)), \
552	assert(PyUnicode_IS_READY(op)), \
553	((PyASCIIObject *)(op))->length)
554
555
556	/ Fast check to determine whether an object is ready. Equivalent to*
557	PyUnicode_IS_COMPACT(op) \|\| ((PyUnicodeObject)(op))->data.any) /
558
559	#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
560
561	/ PyUnicode_READY() does less work than _PyUnicode_Ready() in the best*
562	case. If the canonical representation is not yet set, it will still call
563	_PyUnicode_Ready().
564	Returns 0 on success and -1 on errors. /*
565	#define PyUnicode_READY(op) \
566	(assert(PyUnicode_Check(op)), \
567	(PyUnicode_IS_READY(op) ? \
568	0 : _PyUnicode_Ready((PyObject *)(op))))
569
570	/ Return a maximum character value which is suitable for creating another*
571	string based on op. This is always an approximation but more efficient
572	than iterating over the string. /*
573	#define PyUnicode_MAX_CHAR_VALUE(op) \
574	(assert(PyUnicode_IS_READY(op)), \
575	(PyUnicode_IS_ASCII(op) ? \
576	(0x7f) : \
577	(PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \
578	(0xffU) : \
579	(PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \
580	(0xffffU) : \
581	(0x10ffffU)))))
582
583	#endif
584
585	/ --- Constants ---------------------------------------------------------- /
586
587	/ This Unicode character will be used as replacement character during*
588	decoding if the errors argument is set to "replace". Note: the
589	Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
590	Unicode 3.0. /*
591
592	#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
593
594	/ === Public API ========================================================= /
595
596	/ --- Plain Py_UNICODE --------------------------------------------------- /
597
598	/ With PEP 393, this is the recommended way to allocate a new unicode object.*
599	This function will allocate the object and its buffer in a single memory
600	block. Objects created using this function are not resizable. /*
601	#ifndef Py_LIMITED_API
602	PyAPI_FUNC(PyObject*) PyUnicode_New(
603	Py_ssize_t size, / Number of code points in the new string /
604	Py_UCS4 maxchar / maximum code point value in the string /
605	);
606	#endif
607
608	/ Initializes the canonical string representation from the deprecated*
609	wstr/Py_UNICODE representation. This function is used to convert Unicode
610	objects which were created using the old API to the new flexible format
611	introduced with PEP 393.
612
613	Don't call this function directly, use the public PyUnicode_READY() macro
614	instead. /*
615	#ifndef Py_LIMITED_API
616	PyAPI_FUNC(int) _PyUnicode_Ready(
617	PyObject unicode /* Unicode object /
618	);
619	#endif
620
621	/ Get a copy of a Unicode string. /
622	#ifndef Py_LIMITED_API
623	PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
624	PyObject *unicode
625	);
626	#endif
627
628	/ Copy character from one unicode object into another, this function performs*
629	character conversion when necessary and falls back to memcpy() if possible.
630
631	Fail if to is too small (smaller than how_many* or smaller than*
632	len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
633	kind(to), or if to* has more than 1 reference.*
634
635	Return the number of written character, or return -1 and raise an exception
636	on error.
637
638	Pseudo-code:
639
640	how_many = min(how_many, len(from) - from_start)
641	to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
642	return how_many
643
644	Note: The function doesn't write a terminating null character.
645	*/
646	#ifndef Py_LIMITED_API
647	PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
648	PyObject *to,
649	Py_ssize_t to_start,
650	PyObject *from,
651	Py_ssize_t from_start,
652	Py_ssize_t how_many
653	);
654
655	/ Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so*
656	may crash if parameters are invalid (e.g. if the output string
657	is too short). /*
658	PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
659	PyObject *to,
660	Py_ssize_t to_start,
661	PyObject *from,
662	Py_ssize_t from_start,
663	Py_ssize_t how_many
664	);
665	#endif
666
667	#ifndef Py_LIMITED_API
668	/ Fill a string with a character: write fill_char into*
669	unicode[start:start+length].
670
671	Fail if fill_char is bigger than the string maximum character, or if the
672	string has more than 1 reference.
673
674	Return the number of written character, or return -1 and raise an exception
675	on error. /*
676	PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
677	PyObject *unicode,
678	Py_ssize_t start,
679	Py_ssize_t length,
680	Py_UCS4 fill_char
681	);
682
683	/ Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash*
684	if parameters are invalid (e.g. if length is longer than the string). /*
685	PyAPI_FUNC(void) _PyUnicode_FastFill(
686	PyObject *unicode,
687	Py_ssize_t start,
688	Py_ssize_t length,
689	Py_UCS4 fill_char
690	);
691	#endif
692
693	/ Create a Unicode Object from the Py_UNICODE buffer u of the given*
694	size.
695
696	u may be NULL which causes the contents to be undefined. It is the
697	user's responsibility to fill in the needed data afterwards. Note
698	that modifying the Unicode object contents after construction is
699	only allowed if u was set to NULL.
700
701	The buffer is copied into the new object. /*
702
703	#ifndef Py_LIMITED_API
704	PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
705	const Py_UNICODE u, /* Unicode buffer /
706	Py_ssize_t size / size of buffer /
707	);
708	#endif
709
710	/ Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes /
711	PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
712	const char u, /* UTF-8 encoded string /
713	Py_ssize_t size / size of buffer /
714	);
715
716	/ Similar to PyUnicode_FromUnicode(), but u points to null-terminated*
717	UTF-8 encoded bytes. The size is determined with strlen(). /*
718	PyAPI_FUNC(PyObject*) PyUnicode_FromString(
719	const char u /* UTF-8 encoded string /
720	);
721
722	#ifndef Py_LIMITED_API
723	/ Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.*
724	Scan the string to find the maximum character. /*
725	PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
726	int kind,
727	const void *buffer,
728	Py_ssize_t size);
729
730	/ Create a new string from a buffer of ASCII characters.*
731	WARNING: Don't check if the string contains any non-ASCII character. /*
732	PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
733	const char *buffer,
734	Py_ssize_t size);
735	#endif
736
737	PyAPI_FUNC(PyObject*) PyUnicode_Substring(
738	PyObject *str,
739	Py_ssize_t start,
740	Py_ssize_t end);
741
742	#ifndef Py_LIMITED_API
743	/ Compute the maximum character of the substring unicode[start:end].*
744	Return 127 for an empty string. /*
745	PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
746	PyObject *unicode,
747	Py_ssize_t start,
748	Py_ssize_t end);
749	#endif
750
751	/ Copy the string into a UCS4 buffer including the null character if copy_null*
752	is set. Return NULL and raise an exception on error. Raise a ValueError if
753	the buffer is smaller than the string. Return buffer on success.
754
755	buflen is the length of the buffer in (Py_UCS4) characters. /*
756	PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
757	PyObject *unicode,
758	Py_UCS4* buffer,
759	Py_ssize_t buflen,
760	int copy_null);
761
762	/ Copy the string into a UCS4 buffer. A new buffer is allocated using*
763	* PyMem_Malloc; if this fails, NULL is returned with a memory error
764	exception set. /*
765	PyAPI_FUNC(Py_UCS4) PyUnicode_AsUCS4Copy(PyObject unicode);
766
767	/ Return a read-only pointer to the Unicode object's internal*
768	Py_UNICODE buffer.
769	If the wchar_t/Py_UNICODE representation is not yet available, this
770	function will calculate it. /*
771
772	#ifndef Py_LIMITED_API
773	PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
774	PyObject unicode /* Unicode object /
775	);
776	#endif
777
778	/ Return a read-only pointer to the Unicode object's internal*
779	Py_UNICODE buffer and save the length at size.
780	If the wchar_t/Py_UNICODE representation is not yet available, this
781	function will calculate it. /*
782
783	#ifndef Py_LIMITED_API
784	PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
785	PyObject unicode, /* Unicode object /
786	Py_ssize_t size /* location where to save the length /
787	);
788	#endif
789
790	/ Get the length of the Unicode object. /
791
792	PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
793	PyObject *unicode
794	);
795
796	/ Get the number of Py_UNICODE units in the*
797	string representation. /*
798
799	PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
800	PyObject unicode /* Unicode object /
801	);
802
803	/ Read a character from the string. /
804
805	PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
806	PyObject *unicode,
807	Py_ssize_t index
808	);
809
810	/ Write a character to the string. The string must have been created through*
811	PyUnicode_New, must not be shared, and must not have been hashed yet.
812
813	Return 0 on success, -1 on error. /*
814
815	PyAPI_FUNC(int) PyUnicode_WriteChar(
816	PyObject *unicode,
817	Py_ssize_t index,
818	Py_UCS4 character
819	);
820
821	#ifndef Py_LIMITED_API
822	/ Get the maximum ordinal for a Unicode character. /
823	PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
824	#endif
825
826	/ Resize a Unicode object. The length is the number of characters, except*
827	if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length
828	is the number of Py_UNICODE characters.
829
830	*unicode is modified to point to the new (resized) object and 0
831	returned on success.
832
833	Try to resize the string in place (which is usually faster than allocating
834	a new string and copy characters), or create a new string.
835
836	Error handling is implemented as follows: an exception is set, -1
837	is returned and unicode left untouched.*
838
839	WARNING: The function doesn't check string content, the result may not be a
840	string in canonical representation. /*
841
842	PyAPI_FUNC(int) PyUnicode_Resize(
843	PyObject *unicode, /* Pointer to the Unicode object /
844	Py_ssize_t length / New length /
845	);
846
847	/ Decode obj to a Unicode object.*
848
849	bytes, bytearray and other bytes-like objects are decoded according to the
850	given encoding and error handler. The encoding and error handler can be
851	NULL to have the interface use UTF-8 and "strict".
852
853	All other objects (including Unicode objects) raise an exception.
854
855	The API returns NULL in case of an error. The caller is responsible
856	for decref'ing the returned objects.
857
858	*/
859
860	PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
861	PyObject obj, /* Object /
862	const char encoding, /* encoding /
863	const char errors /* error handling /
864	);
865
866	/ Copy an instance of a Unicode subtype to a new true Unicode object if*
867	necessary. If obj is already a true Unicode object (not a subtype), return
868	the reference with incremented* refcount.*
869
870	The API returns NULL in case of an error. The caller is responsible
871	for decref'ing the returned objects.
872
873	*/
874
875	PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
876	PyObject obj /* Object /
877	);
878
879	PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
880	const char format, /* ASCII-encoded string /
881	va_list vargs
882	);
883	PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
884	const char format, /* ASCII-encoded string /
885	...
886	);
887
888	#ifndef Py_LIMITED_API
889	typedef struct {
890	PyObject *buffer;
891	void *data;
892	enum PyUnicode_Kind kind;
893	Py_UCS4 maxchar;
894	Py_ssize_t size;
895	Py_ssize_t pos;
896
897	/ minimum number of allocated characters (default: 0) /
898	Py_ssize_t min_length;
899
900	/ minimum character (default: 127, ASCII) /
901	Py_UCS4 min_char;
902
903	/ If non-zero, overallocate the buffer by 25% (default: 0). /
904	unsigned char overallocate;
905
906	/ If readonly is 1, buffer is a shared string (cannot be modified)*
907	and size is set to 0. /*
908	unsigned char readonly;
909	} _PyUnicodeWriter ;
910
911	/ Initialize a Unicode writer.*
912	*
913	* By default, the minimum buffer size is 0 character and overallocation is
914	* disabled. Set min_length, min_char and overallocate attributes to control
915	* the allocation of the buffer. */
916	PyAPI_FUNC(void)
917	_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
918
919	/ Prepare the buffer to write 'length' characters*
920	with the specified maximum character.
921
922	Return 0 on success, raise an exception and return -1 on error. /*
923	#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
924	(((MAXCHAR) <= (WRITER)->maxchar \
925	&& (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
926	? 0 \
927	: (((LENGTH) == 0) \
928	? 0 \
929	: _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
930
931	/ Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro*
932	instead. /*
933	PyAPI_FUNC(int)
934	_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
935	Py_ssize_t length, Py_UCS4 maxchar);
936
937	/ Append a Unicode character.*
938	Return 0 on success, raise an exception and return -1 on error. /*
939	PyAPI_FUNC(int)
940	_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
941	Py_UCS4 ch
942	);
943
944	/ Append a Unicode string.*
945	Return 0 on success, raise an exception and return -1 on error. /*
946	PyAPI_FUNC(int)
947	_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
948	PyObject str /* Unicode string /
949	);
950
951	/ Append a substring of a Unicode string.*
952	Return 0 on success, raise an exception and return -1 on error. /*
953	PyAPI_FUNC(int)
954	_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
955	PyObject str, /* Unicode string /
956	Py_ssize_t start,
957	Py_ssize_t end
958	);
959
960	/ Append an ASCII-encoded byte string.*
961	Return 0 on success, raise an exception and return -1 on error. /*
962	PyAPI_FUNC(int)
963	_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
964	const char str, /* ASCII-encoded byte string /
965	Py_ssize_t len / number of bytes, or -1 if unknown /
966	);
967
968	/ Append a latin1-encoded byte string.*
969	Return 0 on success, raise an exception and return -1 on error. /*
970	PyAPI_FUNC(int)
971	_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
972	const char str, /* latin1-encoded byte string /
973	Py_ssize_t len / length in bytes /
974	);
975
976	/ Get the value of the writer as a Unicode string. Clear the*
977	buffer of the writer. Raise an exception and return NULL
978	on error. /*
979	PyAPI_FUNC(PyObject *)
980	_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
981
982	/ Deallocate memory of a writer (clear its internal buffer). /
983	PyAPI_FUNC(void)
984	_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
985	#endif
986
987	#ifndef Py_LIMITED_API
988	/ Format the object based on the format_spec, as defined in PEP 3101*
989	(Advanced String Formatting). /*
990	PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
991	_PyUnicodeWriter *writer,
992	PyObject *obj,
993	PyObject *format_spec,
994	Py_ssize_t start,
995	Py_ssize_t end);
996	#endif
997
998	PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
999	PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
1000	PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
1001	const char u /* UTF-8 encoded string /
1002	);
1003	#ifndef Py_LIMITED_API
1004	PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
1005	#endif
1006
1007	/ Use only if you know it's a string /
1008	#define PyUnicode_CHECK_INTERNED(op) \
1009	(((PyASCIIObject *)(op))->state.interned)
1010
1011	/ --- wchar_t support for platforms which support it --------------------- /
1012
1013	#ifdef HAVE_WCHAR_H
1014
1015	/ Create a Unicode Object from the wchar_t buffer w of the given*
1016	size.
1017
1018	The buffer is copied into the new object. /*
1019
1020	PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
1021	const wchar_t w, /* wchar_t buffer /
1022	Py_ssize_t size / size of buffer /
1023	);
1024
1025	/ Copies the Unicode Object contents into the wchar_t buffer w. At*
1026	most size wchar_t characters are copied.
1027
1028	Note that the resulting wchar_t string may or may not be
1029	0-terminated. It is the responsibility of the caller to make sure
1030	that the wchar_t string is 0-terminated in case this is required by
1031	the application.
1032
1033	Returns the number of wchar_t characters copied (excluding a
1034	possibly trailing 0-termination character) or -1 in case of an
1035	error. /*
1036
1037	PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
1038	PyObject unicode, /* Unicode object /
1039	wchar_t w, /* wchar_t buffer /
1040	Py_ssize_t size / size of buffer /
1041	);
1042
1043	/ Convert the Unicode object to a wide character string. The output string*
1044	always ends with a nul character. If size is not NULL, write the number of
1045	wide characters (excluding the null character) into size.*
1046
1047	Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it)
1048	on success. On error, returns NULL, size is undefined and raises a*
1049	MemoryError. /*
1050
1051	PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
1052	PyObject unicode, /* Unicode object /
1053	Py_ssize_t size /* number of characters of the result /
1054	);
1055
1056	#ifndef Py_LIMITED_API
1057	PyAPI_FUNC(void) _PyUnicode_AsKind(PyObject s, unsigned int kind);
1058	#endif
1059
1060	#endif
1061
1062	/ --- Unicode ordinals --------------------------------------------------- /
1063
1064	/ Create a Unicode Object from the given Unicode code point ordinal.*
1065
1066	The ordinal must be in range(0x110000). A ValueError is
1067	raised in case it is not.
1068
1069	*/
1070
1071	PyAPI_FUNC(PyObject) PyUnicode_FromOrdinal(int* ordinal);
1072
1073	/ --- Free-list management ----------------------------------------------- /
1074
1075	/ Clear the free list used by the Unicode implementation.*
1076
1077	This can be used to release memory used for objects on the free
1078	list back to the Python memory allocator.
1079
1080	*/
1081
1082	PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
1083
1084	/ === Builtin Codecs =====================================================*
1085
1086	Many of these APIs take two arguments encoding and errors. These
1087	parameters encoding and errors have the same semantics as the ones
1088	of the builtin str() API.
1089
1090	Setting encoding to NULL causes the default encoding (UTF-8) to be used.
1091
1092	Error handling is set by errors which may also be set to NULL
1093	meaning to use the default handling defined for the codec. Default
1094	error handling for all builtin codecs is "strict" (ValueErrors are
1095	raised).
1096
1097	The codecs all use a similar interface. Only deviation from the
1098	generic ones are documented.
1099
1100	*/
1101
1102	/ --- Manage the default encoding ---------------------------------------- /
1103
1104	/ Returns a pointer to the default encoding (UTF-8) of the*
1105	Unicode object unicode and the size of the encoded representation
1106	in bytes stored in size.*
1107
1108	In case of an error, no size is set.*
1109
1110	This function caches the UTF-8 encoded string in the unicodeobject
1111	and subsequent calls will return the same string. The memory is released
1112	when the unicodeobject is deallocated.
1113
1114	_PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
1115	support the previous internal function with the same behaviour.
1116
1117	*** This API is for interpreter INTERNAL USE ONLY and will likely
1118	*** be removed or changed in the future.
1119
1120	*** If you need to access the Unicode object as UTF-8 bytes string,
1121	*** please use PyUnicode_AsUTF8String() instead.
1122	*/
1123
1124	#ifndef Py_LIMITED_API
1125	PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize(
1126	PyObject *unicode,
1127	Py_ssize_t *size);
1128	#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
1129	#endif
1130
1131	/ Returns a pointer to the default encoding (UTF-8) of the*
1132	Unicode object unicode.
1133
1134	Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
1135	in the unicodeobject.
1136
1137	_PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
1138	support the previous internal function with the same behaviour.
1139
1140	Use of this API is DEPRECATED since no size information can be
1141	extracted from the returned data.
1142
1143	*** This API is for interpreter INTERNAL USE ONLY and will likely
1144	*** be removed or changed for Python 3.1.
1145
1146	*** If you need to access the Unicode object as UTF-8 bytes string,
1147	*** please use PyUnicode_AsUTF8String() instead.
1148
1149	*/
1150
1151	#ifndef Py_LIMITED_API
1152	PyAPI_FUNC(char ) PyUnicode_AsUTF8(PyObject unicode);
1153	#define _PyUnicode_AsString PyUnicode_AsUTF8
1154	#endif
1155
1156	/ Returns "utf-8". /
1157
1158	PyAPI_FUNC(const char) PyUnicode_GetDefaultEncoding(void*);
1159
1160	/ --- Generic Codecs ----------------------------------------------------- /
1161
1162	/ Create a Unicode object by decoding the encoded string s of the*
1163	given size. /*
1164
1165	PyAPI_FUNC(PyObject*) PyUnicode_Decode(
1166	const char s, /* encoded string /
1167	Py_ssize_t size, / size of buffer /
1168	const char encoding, /* encoding /
1169	const char errors /* error handling /
1170	);
1171
1172	/ Decode a Unicode object unicode and return the result as Python*
1173	object. /*
1174
1175	PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
1176	PyObject unicode, /* Unicode object /
1177	const char encoding, /* encoding /
1178	const char errors /* error handling /
1179	);
1180
1181	/ Decode a Unicode object unicode and return the result as Unicode*
1182	object. /*
1183
1184	PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
1185	PyObject unicode, /* Unicode object /
1186	const char encoding, /* encoding /
1187	const char errors /* error handling /
1188	);
1189
1190	/ Encodes a Py_UNICODE buffer of the given size and returns a*
1191	Python string object. /*
1192
1193	#ifndef Py_LIMITED_API
1194	PyAPI_FUNC(PyObject*) PyUnicode_Encode(
1195	const Py_UNICODE s, /* Unicode char buffer /
1196	Py_ssize_t size, / number of Py_UNICODE chars to encode /
1197	const char encoding, /* encoding /
1198	const char errors /* error handling /
1199	);
1200	#endif
1201
1202	/ Encodes a Unicode object and returns the result as Python*
1203	object. /*
1204
1205	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
1206	PyObject unicode, /* Unicode object /
1207	const char encoding, /* encoding /
1208	const char errors /* error handling /
1209	);
1210
1211	/ Encodes a Unicode object and returns the result as Python string*
1212	object. /*
1213
1214	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
1215	PyObject unicode, /* Unicode object /
1216	const char encoding, /* encoding /
1217	const char errors /* error handling /
1218	);
1219
1220	/ Encodes a Unicode object and returns the result as Unicode*
1221	object. /*
1222
1223	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
1224	PyObject unicode, /* Unicode object /
1225	const char encoding, /* encoding /
1226	const char errors /* error handling /
1227	);
1228
1229	/ Build an encoding map. /
1230
1231	PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
1232	PyObject* string / 256 character map /
1233	);
1234
1235	/ --- UTF-7 Codecs ------------------------------------------------------- /
1236
1237	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
1238	const char string, /* UTF-7 encoded string /
1239	Py_ssize_t length, / size of string /
1240	const char errors /* error handling /
1241	);
1242
1243	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
1244	const char string, /* UTF-7 encoded string /
1245	Py_ssize_t length, / size of string /
1246	const char errors, /* error handling /
1247	Py_ssize_t consumed /* bytes consumed /
1248	);
1249
1250	#ifndef Py_LIMITED_API
1251	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
1252	const Py_UNICODE data, /* Unicode char buffer /
1253	Py_ssize_t length, / number of Py_UNICODE chars to encode /
1254	int base64SetO, / Encode RFC2152 Set O characters in base64 /
1255	int base64WhiteSpace, / Encode whitespace (sp, ht, nl, cr) in base64 /
1256	const char errors /* error handling /
1257	);
1258	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
1259	PyObject unicode, /* Unicode object /
1260	int base64SetO, / Encode RFC2152 Set O characters in base64 /
1261	int base64WhiteSpace, / Encode whitespace (sp, ht, nl, cr) in base64 /
1262	const char errors /* error handling /
1263	);
1264	#endif
1265
1266	/ --- UTF-8 Codecs ------------------------------------------------------- /
1267
1268	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
1269	const char string, /* UTF-8 encoded string /
1270	Py_ssize_t length, / size of string /
1271	const char errors /* error handling /
1272	);
1273
1274	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
1275	const char string, /* UTF-8 encoded string /
1276	Py_ssize_t length, / size of string /
1277	const char errors, /* error handling /
1278	Py_ssize_t consumed /* bytes consumed /
1279	);
1280
1281	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
1282	PyObject unicode /* Unicode object /
1283	);
1284
1285	#ifndef Py_LIMITED_API
1286	PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
1287	PyObject *unicode,
1288	const char *errors);
1289
1290	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
1291	const Py_UNICODE data, /* Unicode char buffer /
1292	Py_ssize_t length, / number of Py_UNICODE chars to encode /
1293	const char errors /* error handling /
1294	);
1295	#endif
1296
1297	/ --- UTF-32 Codecs ------------------------------------------------------ /
1298
1299	/ Decodes length bytes from a UTF-32 encoded buffer string and returns*
1300	the corresponding Unicode object.
1301
1302	errors (if non-NULL) defines the error handling. It defaults
1303	to "strict".
1304
1305	If byteorder is non-NULL, the decoder starts decoding using the
1306	given byte order:
1307
1308	*byteorder == -1: little endian
1309	*byteorder == 0: native order
1310	*byteorder == 1: big endian
1311
1312	In native mode, the first four bytes of the stream are checked for a
1313	BOM mark. If found, the BOM mark is analysed, the byte order
1314	adjusted and the BOM skipped. In the other modes, no BOM mark
1315	interpretation is done. After completion, byteorder is set to the*
1316	current byte order at the end of input data.
1317
1318	If byteorder is NULL, the codec starts in native order mode.
1319
1320	*/
1321
1322	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
1323	const char string, /* UTF-32 encoded string /
1324	Py_ssize_t length, / size of string /
1325	const char errors, /* error handling /
1326	int byteorder /* pointer to byteorder to use*
1327	0=native;-1=LE,1=BE; updated on
1328	exit /*
1329	);
1330
1331	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
1332	const char string, /* UTF-32 encoded string /
1333	Py_ssize_t length, / size of string /
1334	const char errors, /* error handling /
1335	int byteorder, /* pointer to byteorder to use*
1336	0=native;-1=LE,1=BE; updated on
1337	exit /*
1338	Py_ssize_t consumed /* bytes consumed /
1339	);
1340
1341	/ Returns a Python string using the UTF-32 encoding in native byte*
1342	order. The string always starts with a BOM mark. /*
1343
1344	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
1345	PyObject unicode /* Unicode object /
1346	);
1347
1348	/ Returns a Python string object holding the UTF-32 encoded value of*
1349	the Unicode data.
1350
1351	If byteorder is not 0, output is written according to the following
1352	byte order:
1353
1354	byteorder == -1: little endian
1355	byteorder == 0: native byte order (writes a BOM mark)
1356	byteorder == 1: big endian
1357
1358	If byteorder is 0, the output string will always start with the
1359	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1360	prepended.
1361
1362	*/
1363
1364	#ifndef Py_LIMITED_API
1365	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
1366	const Py_UNICODE data, /* Unicode char buffer /
1367	Py_ssize_t length, / number of Py_UNICODE chars to encode /
1368	const char errors, /* error handling /
1369	int byteorder / byteorder to use 0=BOM+native;-1=LE,1=BE /
1370	);
1371	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
1372	PyObject object, /* Unicode object /
1373	const char errors, /* error handling /
1374	int byteorder / byteorder to use 0=BOM+native;-1=LE,1=BE /
1375	);
1376	#endif
1377
1378	/ --- UTF-16 Codecs ------------------------------------------------------ /
1379
1380	/ Decodes length bytes from a UTF-16 encoded buffer string and returns*
1381	the corresponding Unicode object.
1382
1383	errors (if non-NULL) defines the error handling. It defaults
1384	to "strict".
1385
1386	If byteorder is non-NULL, the decoder starts decoding using the
1387	given byte order:
1388
1389	*byteorder == -1: little endian
1390	*byteorder == 0: native order
1391	*byteorder == 1: big endian
1392
1393	In native mode, the first two bytes of the stream are checked for a
1394	BOM mark. If found, the BOM mark is analysed, the byte order
1395	adjusted and the BOM skipped. In the other modes, no BOM mark
1396	interpretation is done. After completion, byteorder is set to the*
1397	current byte order at the end of input data.
1398
1399	If byteorder is NULL, the codec starts in native order mode.
1400
1401	*/
1402
1403	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
1404	const char string, /* UTF-16 encoded string /
1405	Py_ssize_t length, / size of string /
1406	const char errors, /* error handling /
1407	int byteorder /* pointer to byteorder to use*
1408	0=native;-1=LE,1=BE; updated on
1409	exit /*
1410	);
1411
1412	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
1413	const char string, /* UTF-16 encoded string /
1414	Py_ssize_t length, / size of string /
1415	const char errors, /* error handling /
1416	int byteorder, /* pointer to byteorder to use*
1417	0=native;-1=LE,1=BE; updated on
1418	exit /*
1419	Py_ssize_t consumed /* bytes consumed /
1420	);
1421
1422	/ Returns a Python string using the UTF-16 encoding in native byte*
1423	order. The string always starts with a BOM mark. /*
1424
1425	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
1426	PyObject unicode /* Unicode object /
1427	);
1428
1429	/ Returns a Python string object holding the UTF-16 encoded value of*
1430	the Unicode data.
1431
1432	If byteorder is not 0, output is written according to the following
1433	byte order:
1434
1435	byteorder == -1: little endian
1436	byteorder == 0: native byte order (writes a BOM mark)
1437	byteorder == 1: big endian
1438
1439	If byteorder is 0, the output string will always start with the
1440	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1441	prepended.
1442
1443	Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
1444	UCS-2. This trick makes it possible to add full UTF-16 capabilities
1445	at a later point without compromising the APIs.
1446
1447	*/
1448
1449	#ifndef Py_LIMITED_API
1450	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
1451	const Py_UNICODE data, /* Unicode char buffer /
1452	Py_ssize_t length, / number of Py_UNICODE chars to encode /
1453	const char errors, /* error handling /
1454	int byteorder / byteorder to use 0=BOM+native;-1=LE,1=BE /
1455	);
1456	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
1457	PyObject* unicode, / Unicode object /
1458	const char errors, /* error handling /
1459	int byteorder / byteorder to use 0=BOM+native;-1=LE,1=BE /
1460	);
1461	#endif
1462
1463	/ --- Unicode-Escape Codecs ---------------------------------------------- /
1464
1465	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
1466	const char string, /* Unicode-Escape encoded string /
1467	Py_ssize_t length, / size of string /
1468	const char errors /* error handling /
1469	);
1470
1471	PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
1472	PyObject unicode /* Unicode object /
1473	);
1474
1475	#ifndef Py_LIMITED_API
1476	PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
1477	const Py_UNICODE data, /* Unicode char buffer /
1478	Py_ssize_t length / Number of Py_UNICODE chars to encode /
1479	);
1480	#endif
1481
1482	/ --- Raw-Unicode-Escape Codecs ------------------------------------------ /
1483
1484	PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
1485	const char string, /* Raw-Unicode-Escape encoded string /
1486	Py_ssize_t length, / size of string /
1487	const char errors /* error handling /
1488	);
1489
1490	PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
1491	PyObject unicode /* Unicode object /
1492	);
1493
1494	#ifndef Py_LIMITED_API
1495	PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
1496	const Py_UNICODE data, /* Unicode char buffer /
1497	Py_ssize_t length / Number of Py_UNICODE chars to encode /
1498	);
1499	#endif
1500
1501	/ --- Unicode Internal Codec ---------------------------------------------*
1502
1503	Only for internal use in _codecsmodule.c /*
1504
1505	#ifndef Py_LIMITED_API
1506	PyObject *_PyUnicode_DecodeUnicodeInternal(
1507	const char *string,
1508	Py_ssize_t length,
1509	const char *errors
1510	);
1511	#endif
1512
1513	/ --- Latin-1 Codecs -----------------------------------------------------*
1514
1515	Note: Latin-1 corresponds to the first 256 Unicode ordinals.
1516
1517	*/
1518
1519	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
1520	const char string, /* Latin-1 encoded string /
1521	Py_ssize_t length, / size of string /
1522	const char errors /* error handling /
1523	);
1524
1525	PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
1526	PyObject unicode /* Unicode object /
1527	);
1528
1529	#ifndef Py_LIMITED_API
1530	PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
1531	PyObject* unicode,
1532	const char* errors);
1533
1534	PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
1535	const Py_UNICODE data, /* Unicode char buffer /
1536	Py_ssize_t length, / Number of Py_UNICODE chars to encode /
1537	const char errors /* error handling /
1538	);
1539	#endif
1540
1541	/ --- ASCII Codecs -------------------------------------------------------*
1542
1543	Only 7-bit ASCII data is excepted. All other codes generate errors.
1544
1545	*/
1546
1547	PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
1548	const char string, /* ASCII encoded string /
1549	Py_ssize_t length, / size of string /
1550	const char errors /* error handling /
1551	);
1552
1553	PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
1554	PyObject unicode /* Unicode object /
1555	);
1556
1557	#ifndef Py_LIMITED_API
1558	PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
1559	PyObject* unicode,
1560	const char* errors);
1561
1562	PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
1563	const Py_UNICODE data, /* Unicode char buffer /
1564	Py_ssize_t length, / Number of Py_UNICODE chars to encode /
1565	const char errors /* error handling /
1566	);
1567	#endif
1568
1569	/ --- Character Map Codecs -----------------------------------------------*
1570
1571	This codec uses mappings to encode and decode characters.
1572
1573	Decoding mappings must map single string characters to single
1574	Unicode characters, integers (which are then interpreted as Unicode
1575	ordinals) or None (meaning "undefined mapping" and causing an
1576	error).
1577
1578	Encoding mappings must map single Unicode characters to single
1579	string characters, integers (which are then interpreted as Latin-1
1580	ordinals) or None (meaning "undefined mapping" and causing an
1581	error).
1582
1583	If a character lookup fails with a LookupError, the character is
1584	copied as-is meaning that its ordinal value will be interpreted as
1585	Unicode or Latin-1 ordinal resp. Because of this mappings only need
1586	to contain those mappings which map characters to different code
1587	points.
1588
1589	*/
1590
1591	PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
1592	const char string, /* Encoded string /
1593	Py_ssize_t length, / size of string /
1594	PyObject mapping, /* character mapping*
1595	(char ordinal -> unicode ordinal) /*
1596	const char errors /* error handling /
1597	);
1598
1599	PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
1600	PyObject unicode, /* Unicode object /
1601	PyObject mapping /* character mapping*
1602	(unicode ordinal -> char ordinal) /*
1603	);
1604
1605	#ifndef Py_LIMITED_API
1606	PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
1607	const Py_UNICODE data, /* Unicode char buffer /
1608	Py_ssize_t length, / Number of Py_UNICODE chars to encode /
1609	PyObject mapping, /* character mapping*
1610	(unicode ordinal -> char ordinal) /*
1611	const char errors /* error handling /
1612	);
1613	PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
1614	PyObject unicode, /* Unicode object /
1615	PyObject mapping, /* character mapping*
1616	(unicode ordinal -> char ordinal) /*
1617	const char errors /* error handling /
1618	);
1619	#endif
1620
1621	/ Translate a Py_UNICODE buffer of the given length by applying a*
1622	character mapping table to it and return the resulting Unicode
1623	object.
1624
1625	The mapping table must map Unicode ordinal integers to Unicode
1626	ordinal integers or None (causing deletion of the character).
1627
1628	Mapping tables may be dictionaries or sequences. Unmapped character
1629	ordinals (ones which cause a LookupError) are left untouched and
1630	are copied as-is.
1631
1632	*/
1633
1634	#ifndef Py_LIMITED_API
1635	PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
1636	const Py_UNICODE data, /* Unicode char buffer /
1637	Py_ssize_t length, / Number of Py_UNICODE chars to encode /
1638	PyObject table, /* Translate table /
1639	const char errors /* error handling /
1640	);
1641	#endif
1642
1643	#ifdef HAVE_MBCS
1644
1645	/ --- MBCS codecs for Windows -------------------------------------------- /
1646
1647	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
1648	const char string, /* MBCS encoded string /
1649	Py_ssize_t length, / size of string /
1650	const char errors /* error handling /
1651	);
1652
1653	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
1654	const char string, /* MBCS encoded string /
1655	Py_ssize_t length, / size of string /
1656	const char errors, /* error handling /
1657	Py_ssize_t consumed /* bytes consumed /
1658	);
1659
1660	PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
1661	int code_page, / code page number /
1662	const char string, /* encoded string /
1663	Py_ssize_t length, / size of string /
1664	const char errors, /* error handling /
1665	Py_ssize_t consumed /* bytes consumed /
1666	);
1667
1668	PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
1669	PyObject unicode /* Unicode object /
1670	);
1671
1672	#ifndef Py_LIMITED_API
1673	PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
1674	const Py_UNICODE data, /* Unicode char buffer /
1675	Py_ssize_t length, / number of Py_UNICODE chars to encode /
1676	const char errors /* error handling /
1677	);
1678	#endif
1679
1680	PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
1681	int code_page, / code page number /
1682	PyObject unicode, /* Unicode object /
1683	const char errors /* error handling /
1684	);
1685
1686	#endif /* HAVE_MBCS */
1687
1688	/ --- Decimal Encoder ---------------------------------------------------- /
1689
1690	/ Takes a Unicode string holding a decimal value and writes it into*
1691	an output buffer using standard ASCII digit codes.
1692
1693	The output buffer has to provide at least length+1 bytes of storage
1694	area. The output string is 0-terminated.
1695
1696	The encoder converts whitespace to ' ', decimal characters to their
1697	corresponding ASCII digit and all other Latin-1 characters except
1698	\0 as-is. Characters outside this range (Unicode ordinals 1-256)
1699	are treated as errors. This includes embedded NULL bytes.
1700
1701	Error handling is defined by the errors argument:
1702
1703	NULL or "strict": raise a ValueError
1704	"ignore": ignore the wrong characters (these are not copied to the
1705	output buffer)
1706	"replace": replaces illegal characters with '?'
1707
1708	Returns 0 on success, -1 on failure.
1709
1710	*/
1711
1712	#ifndef Py_LIMITED_API
1713	PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
1714	Py_UNICODE s, /* Unicode buffer /
1715	Py_ssize_t length, / Number of Py_UNICODE chars to encode /
1716	char output, /* Output buffer; must have size >= length /
1717	const char errors /* error handling /
1718	);
1719	#endif
1720
1721	/ Transforms code points that have decimal digit property to the*
1722	corresponding ASCII digit code points.
1723
1724	Returns a new Unicode string on success, NULL on failure.
1725	*/
1726
1727	#ifndef Py_LIMITED_API
1728	PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
1729	Py_UNICODE s, /* Unicode buffer /
1730	Py_ssize_t length / Number of Py_UNICODE chars to transform /
1731	);
1732	#endif
1733
1734	/ Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyObject*
1735	as argument instead of a raw buffer and length. This function additionally
1736	transforms spaces to ASCII because this is what the callers in longobject,
1737	floatobject, and complexobject did anyways. /*
1738
1739	#ifndef Py_LIMITED_API
1740	PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
1741	PyObject unicode /* Unicode object /
1742	);
1743	#endif
1744
1745	/ --- Locale encoding --------------------------------------------------- /
1746
1747	/ Decode a string from the current locale encoding. The decoder is strict if*
1748	surrogateescape is equal to zero, otherwise it uses the 'surrogateescape'
1749	error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
1750	be decoded as a surrogate character and surrogateescape* is not equal to*
1751	zero, the byte sequence is escaped using the 'surrogateescape' error handler
1752	instead of being decoded. str* must end with a null character but cannot*
1753	contain embedded null characters. /*
1754
1755	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
1756	const char *str,
1757	Py_ssize_t len,
1758	const char *errors);
1759
1760	/ Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string*
1761	length using strlen(). /*
1762
1763	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
1764	const char *str,
1765	const char *errors);
1766
1767	/ Encode a Unicode object to the current locale encoding. The encoder is*
1768	strict is surrogateescape* is equal to zero, otherwise the*
1769	"surrogateescape" error handler is used. Return a bytes object. The string
1770	cannot contain embedded null characters. /*
1771
1772	PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
1773	PyObject *unicode,
1774	const char *errors
1775	);
1776
1777	/ --- File system encoding ---------------------------------------------- /
1778
1779	/ ParseTuple converter: encode str objects to bytes using*
1780	PyUnicode_EncodeFSDefault(); bytes objects are output as-is. /*
1781
1782	PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject, void**);
1783
1784	/ ParseTuple converter: decode bytes objects to unicode using*
1785	PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. /*
1786
1787	PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject, void**);
1788
1789	/ Decode a null-terminated string using Py_FileSystemDefaultEncoding*
1790	and the "surrogateescape" error handler.
1791
1792	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1793	encoding.
1794
1795	Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
1796	*/
1797
1798	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
1799	const char s /* encoded string /
1800	);
1801
1802	/ Decode a string using Py_FileSystemDefaultEncoding*
1803	and the "surrogateescape" error handler.
1804
1805	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1806	encoding.
1807	*/
1808
1809	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
1810	const char s, /* encoded string /
1811	Py_ssize_t size / size /
1812	);
1813
1814	/ Encode a Unicode object to Py_FileSystemDefaultEncoding with the*
1815	"surrogateescape" error handler, and return bytes.
1816
1817	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1818	encoding.
1819	*/
1820
1821	PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
1822	PyObject *unicode
1823	);
1824
1825	/ --- Methods & Slots ----------------------------------------------------*
1826
1827	These are capable of handling Unicode objects and strings on input
1828	(we refer to them as strings in the descriptions) and return
1829	Unicode objects or integers as appropriate. /*
1830
1831	/ Concat two strings giving a new Unicode string. /
1832
1833	PyAPI_FUNC(PyObject*) PyUnicode_Concat(
1834	PyObject left, /* Left string /
1835	PyObject right /* Right string /
1836	);
1837
1838	/ Concat two strings and put the result in pleft
1839	(sets pleft to NULL on error) /
1840
1841	PyAPI_FUNC(void) PyUnicode_Append(
1842	PyObject *pleft, /* Pointer to left string /
1843	PyObject right /* Right string /
1844	);
1845
1846	/ Concat two strings, put the result in pleft and drop the right object
1847	(sets pleft to NULL on error) /
1848
1849	PyAPI_FUNC(void) PyUnicode_AppendAndDel(
1850	PyObject *pleft, /* Pointer to left string /
1851	PyObject right /* Right string /
1852	);
1853
1854	/ Split a string giving a list of Unicode strings.*
1855
1856	If sep is NULL, splitting will be done at all whitespace
1857	substrings. Otherwise, splits occur at the given separator.
1858
1859	At most maxsplit splits will be done. If negative, no limit is set.
1860
1861	Separators are not included in the resulting list.
1862
1863	*/
1864
1865	PyAPI_FUNC(PyObject*) PyUnicode_Split(
1866	PyObject s, /* String to split /
1867	PyObject sep, /* String separator /
1868	Py_ssize_t maxsplit / Maxsplit count /
1869	);
1870
1871	/ Dito, but split at line breaks.*
1872
1873	CRLF is considered to be one line break. Line breaks are not
1874	included in the resulting list. /*
1875
1876	PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
1877	PyObject s, /* String to split /
1878	int keepends / If true, line end markers are included /
1879	);
1880
1881	/ Partition a string using a given separator. /
1882
1883	PyAPI_FUNC(PyObject*) PyUnicode_Partition(
1884	PyObject s, /* String to partition /
1885	PyObject sep /* String separator /
1886	);
1887
1888	/ Partition a string using a given separator, searching from the end of the*
1889	string. /*
1890
1891	PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
1892	PyObject s, /* String to partition /
1893	PyObject sep /* String separator /
1894	);
1895
1896	/ Split a string giving a list of Unicode strings.*
1897
1898	If sep is NULL, splitting will be done at all whitespace
1899	substrings. Otherwise, splits occur at the given separator.
1900
1901	At most maxsplit splits will be done. But unlike PyUnicode_Split
1902	PyUnicode_RSplit splits from the end of the string. If negative,
1903	no limit is set.
1904
1905	Separators are not included in the resulting list.
1906
1907	*/
1908
1909	PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
1910	PyObject s, /* String to split /
1911	PyObject sep, /* String separator /
1912	Py_ssize_t maxsplit / Maxsplit count /
1913	);
1914
1915	/ Translate a string by applying a character mapping table to it and*
1916	return the resulting Unicode object.
1917
1918	The mapping table must map Unicode ordinal integers to Unicode
1919	ordinal integers or None (causing deletion of the character).
1920
1921	Mapping tables may be dictionaries or sequences. Unmapped character
1922	ordinals (ones which cause a LookupError) are left untouched and
1923	are copied as-is.
1924
1925	*/
1926
1927	PyAPI_FUNC(PyObject *) PyUnicode_Translate(
1928	PyObject str, /* String /
1929	PyObject table, /* Translate table /
1930	const char errors /* error handling /
1931	);
1932
1933	/ Join a sequence of strings using the given separator and return*
1934	the resulting Unicode string. /*
1935
1936	PyAPI_FUNC(PyObject*) PyUnicode_Join(
1937	PyObject separator, /* Separator string /
1938	PyObject seq /* Sequence object /
1939	);
1940
1941	/ Return 1 if substr matches str[start:end] at the given tail end, 0*
1942	otherwise. /*
1943
1944	PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
1945	PyObject str, /* String /
1946	PyObject substr, /* Prefix or Suffix string /
1947	Py_ssize_t start, / Start index /
1948	Py_ssize_t end, / Stop index /
1949	int direction / Tail end: -1 prefix, +1 suffix /
1950	);
1951
1952	/ Return the first position of substr in str[start:end] using the*
1953	given search direction or -1 if not found. -2 is returned in case
1954	an error occurred and an exception is set. /*
1955
1956	PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
1957	PyObject str, /* String /
1958	PyObject substr, /* Substring to find /
1959	Py_ssize_t start, / Start index /
1960	Py_ssize_t end, / Stop index /
1961	int direction / Find direction: +1 forward, -1 backward /
1962	);
1963
1964	/ Like PyUnicode_Find, but search for single character only. /
1965	PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
1966	PyObject *str,
1967	Py_UCS4 ch,
1968	Py_ssize_t start,
1969	Py_ssize_t end,
1970	int direction
1971	);
1972
1973	/ Count the number of occurrences of substr in str[start:end]. /
1974
1975	PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
1976	PyObject str, /* String /
1977	PyObject substr, /* Substring to count /
1978	Py_ssize_t start, / Start index /
1979	Py_ssize_t end / Stop index /
1980	);
1981
1982	/ Replace at most maxcount occurrences of substr in str with replstr*
1983	and return the resulting Unicode object. /*
1984
1985	PyAPI_FUNC(PyObject *) PyUnicode_Replace(
1986	PyObject str, /* String /
1987	PyObject substr, /* Substring to find /
1988	PyObject replstr, /* Substring to replace /
1989	Py_ssize_t maxcount / Max. number of replacements to apply;*
1990	-1 = all /*
1991	);
1992
1993	/ Compare two strings and return -1, 0, 1 for less than, equal,*
1994	greater than resp.
1995	Raise an exception and return -1 on error. /*
1996
1997	PyAPI_FUNC(int) PyUnicode_Compare(
1998	PyObject left, /* Left string /
1999	PyObject right /* Right string /
2000	);
2001
2002	#ifndef Py_LIMITED_API
2003	PyAPI_FUNC(int) _PyUnicode_CompareWithId(
2004	PyObject left, /* Left string /
2005	_Py_Identifier right /* Right identifier /
2006	);
2007	#endif
2008
2009	PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
2010	PyObject *left,
2011	const char right /* ASCII-encoded string /
2012	);
2013
2014	/ Rich compare two strings and return one of the following:*
2015
2016	- NULL in case an exception was raised
2017	- Py_True or Py_False for successfully comparisons
2018	- Py_NotImplemented in case the type combination is unknown
2019
2020	Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
2021	case the conversion of the arguments to Unicode fails with a
2022	UnicodeDecodeError.
2023
2024	Possible values for op:
2025
2026	Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
2027
2028	*/
2029
2030	PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
2031	PyObject left, /* Left string /
2032	PyObject right, /* Right string /
2033	int op / Operation: Py_EQ, Py_NE, Py_GT, etc. /
2034	);
2035
2036	/ Apply an argument tuple or dictionary to a format string and return*
2037	the resulting Unicode string. /*
2038
2039	PyAPI_FUNC(PyObject *) PyUnicode_Format(
2040	PyObject format, /* Format string /
2041	PyObject args /* Argument tuple or dictionary /
2042	);
2043
2044	/ Checks whether element is contained in container and return 1/0*
2045	accordingly.
2046
2047	element has to coerce to a one element Unicode string. -1 is
2048	returned in case of an error. /*
2049
2050	PyAPI_FUNC(int) PyUnicode_Contains(
2051	PyObject container, /* Container string /
2052	PyObject element /* Element string /
2053	);
2054
2055	/ Checks whether argument is a valid identifier. /
2056
2057	PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
2058
2059	#ifndef Py_LIMITED_API
2060	/ Externally visible for str.strip(unicode) /
2061	PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
2062	PyObject *self,
2063	int striptype,
2064	PyObject *sepobj
2065	);
2066	#endif
2067
2068	/ Using explicit passed-in values, insert the thousands grouping*
2069	into the string pointed to by buffer. For the argument descriptions,
2070	see Objects/stringlib/localeutil.h /*
2071	#ifndef Py_LIMITED_API
2072	PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
2073	PyObject *unicode,
2074	Py_ssize_t index,
2075	Py_ssize_t n_buffer,
2076	void *digits,
2077	Py_ssize_t n_digits,
2078	Py_ssize_t min_width,
2079	const char *grouping,
2080	PyObject *thousands_sep,
2081	Py_UCS4 *maxchar);
2082	#endif
2083	/ === Characters Type APIs =============================================== /
2084
2085	/ Helper array used by Py_UNICODE_ISSPACE(). /
2086
2087	#ifndef Py_LIMITED_API
2088	PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
2089
2090	/ These should not be used directly. Use the Py_UNICODE_IS* and*
2091	Py_UNICODE_TO macros instead.*
2092
2093	These APIs are implemented in Objects/unicodectype.c.
2094
2095	*/
2096
2097	PyAPI_FUNC(int) _PyUnicode_IsLowercase(
2098	Py_UCS4 ch / Unicode character /
2099	);
2100
2101	PyAPI_FUNC(int) _PyUnicode_IsUppercase(
2102	Py_UCS4 ch / Unicode character /
2103	);
2104
2105	PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
2106	Py_UCS4 ch / Unicode character /
2107	);
2108
2109	PyAPI_FUNC(int) _PyUnicode_IsXidStart(
2110	Py_UCS4 ch / Unicode character /
2111	);
2112
2113	PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
2114	Py_UCS4 ch / Unicode character /
2115	);
2116
2117	PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
2118	const Py_UCS4 ch / Unicode character /
2119	);
2120
2121	PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
2122	const Py_UCS4 ch / Unicode character /
2123	);
2124
2125	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
2126	Py_UCS4 ch / Unicode character /
2127	);
2128
2129	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
2130	Py_UCS4 ch / Unicode character /
2131	);
2132
2133	PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
2134	Py_UCS4 ch / Unicode character /
2135	);
2136
2137	PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
2138	Py_UCS4 ch, / Unicode character /
2139	Py_UCS4 *res
2140	);
2141
2142	PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
2143	Py_UCS4 ch, / Unicode character /
2144	Py_UCS4 *res
2145	);
2146
2147	PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
2148	Py_UCS4 ch, / Unicode character /
2149	Py_UCS4 *res
2150	);
2151
2152	PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
2153	Py_UCS4 ch, / Unicode character /
2154	Py_UCS4 *res
2155	);
2156
2157	PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
2158	Py_UCS4 ch / Unicode character /
2159	);
2160
2161	PyAPI_FUNC(int) _PyUnicode_IsCased(
2162	Py_UCS4 ch / Unicode character /
2163	);
2164
2165	PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
2166	Py_UCS4 ch / Unicode character /
2167	);
2168
2169	PyAPI_FUNC(int) _PyUnicode_ToDigit(
2170	Py_UCS4 ch / Unicode character /
2171	);
2172
2173	PyAPI_FUNC(double) _PyUnicode_ToNumeric(
2174	Py_UCS4 ch / Unicode character /
2175	);
2176
2177	PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
2178	Py_UCS4 ch / Unicode character /
2179	);
2180
2181	PyAPI_FUNC(int) _PyUnicode_IsDigit(
2182	Py_UCS4 ch / Unicode character /
2183	);
2184
2185	PyAPI_FUNC(int) _PyUnicode_IsNumeric(
2186	Py_UCS4 ch / Unicode character /
2187	);
2188
2189	PyAPI_FUNC(int) _PyUnicode_IsPrintable(
2190	Py_UCS4 ch / Unicode character /
2191	);
2192
2193	PyAPI_FUNC(int) _PyUnicode_IsAlpha(
2194	Py_UCS4 ch / Unicode character /
2195	);
2196
2197	PyAPI_FUNC(size_t) Py_UNICODE_strlen(
2198	const Py_UNICODE *u
2199	);
2200
2201	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
2202	Py_UNICODE *s1,
2203	const Py_UNICODE *s2);
2204
2205	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
2206	Py_UNICODE s1, const* Py_UNICODE *s2);
2207
2208	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
2209	Py_UNICODE *s1,
2210	const Py_UNICODE *s2,
2211	size_t n);
2212
2213	PyAPI_FUNC(int) Py_UNICODE_strcmp(
2214	const Py_UNICODE *s1,
2215	const Py_UNICODE *s2
2216	);
2217
2218	PyAPI_FUNC(int) Py_UNICODE_strncmp(
2219	const Py_UNICODE *s1,
2220	const Py_UNICODE *s2,
2221	size_t n
2222	);
2223
2224	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
2225	const Py_UNICODE *s,
2226	Py_UNICODE c
2227	);
2228
2229	PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
2230	const Py_UNICODE *s,
2231	Py_UNICODE c
2232	);
2233
2234	PyAPI_FUNC(PyObject) _PyUnicode_FormatLong(PyObject , int, int, int);
2235
2236	/ Create a copy of a unicode string ending with a nul character. Return NULL*
2237	and raise a MemoryError exception on memory allocation failure, otherwise
2238	return a new allocated buffer (use PyMem_Free() to free the buffer). /*
2239
2240	PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
2241	PyObject *unicode
2242	);
2243	#endif /* Py_LIMITED_API */
2244
2245	#if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
2246	PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
2247	PyObject *op,
2248	int check_content);
2249	#endif
2250
2251	/ Return an interned Unicode object for an Identifier; may fail if there is no memory./
2252	PyAPI_FUNC(PyObject) _PyUnicode_FromId(_Py_Identifier);
2253	/ Clear all static strings. /
2254	PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
2255
2256	#ifdef __cplusplus
2257	}
2258	#endif
2259	#endif /* !Py_UNICODEOBJECT_H */
2260

Browse the source code of include/python3.5m/unicodeobject.h