1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*
6* Copyright (C) 1999-2015, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10* file name: umachine.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 1999sep13
16* created by: Markus W. Scherer
17*
18* This file defines basic types and constants for ICU to be
19* platform-independent. umachine.h and utf.h are included into
20* utypes.h to provide all the general definitions for ICU.
21* All of these definitions used to be in utypes.h before
22* the UTF-handling macros made this unmaintainable.
23*/
24
25#ifndef __UMACHINE_H__
26#define __UMACHINE_H__
27
28
29/**
30 * \file
31 * \brief Basic types and constants for UTF
32 *
33 * <h2> Basic types and constants for UTF </h2>
34 * This file defines basic types and constants for utf.h to be
35 * platform-independent. umachine.h and utf.h are included into
36 * utypes.h to provide all the general definitions for ICU.
37 * All of these definitions used to be in utypes.h before
38 * the UTF-handling macros made this unmaintainable.
39 *
40 */
41/*==========================================================================*/
42/* Include platform-dependent definitions */
43/* which are contained in the platform-specific file platform.h */
44/*==========================================================================*/
45
46#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
47
48/*
49 * ANSI C headers:
50 * stddef.h defines wchar_t
51 */
52#include <stdbool.h>
53#include <stddef.h>
54
55/*==========================================================================*/
56/* For C wrappers, we use the symbol U_CAPI. */
57/* This works properly if the includer is C or C++. */
58/* Functions are declared U_CAPI return-type U_EXPORT2 function-name()... */
59/*==========================================================================*/
60
61/**
62 * \def U_CFUNC
63 * This is used in a declaration of a library private ICU C function.
64 * @stable ICU 2.4
65 */
66
67/**
68 * \def U_CDECL_BEGIN
69 * This is used to begin a declaration of a library private ICU C API.
70 * @stable ICU 2.4
71 */
72
73/**
74 * \def U_CDECL_END
75 * This is used to end a declaration of a library private ICU C API
76 * @stable ICU 2.4
77 */
78
79#ifdef __cplusplus
80# define U_CFUNC extern "C"
81# define U_CDECL_BEGIN extern "C" {
82# define U_CDECL_END }
83#else
84# define U_CFUNC extern
85# define U_CDECL_BEGIN
86# define U_CDECL_END
87#endif
88
89#ifndef U_ATTRIBUTE_DEPRECATED
90/**
91 * \def U_ATTRIBUTE_DEPRECATED
92 * This is used for GCC specific attributes
93 * @internal
94 */
95#if U_GCC_MAJOR_MINOR >= 302
96# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
97/**
98 * \def U_ATTRIBUTE_DEPRECATED
99 * This is used for Visual C++ specific attributes
100 * @internal
101 */
102#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
103# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
104#else
105# define U_ATTRIBUTE_DEPRECATED
106#endif
107#endif
108
109/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
110#define U_CAPI U_CFUNC U_EXPORT
111/** Obsolete/same as U_CAPI; was used to declare a function as a stable public ICU C API*/
112#define U_STABLE U_CAPI
113/** Obsolete/same as U_CAPI; was used to declare a function as a draft public ICU C API */
114#define U_DRAFT U_CAPI
115/** This is used to declare a function as a deprecated public ICU C API */
116#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
117/** Obsolete/same as U_CAPI; was used to declare a function as an obsolete public ICU C API */
118#define U_OBSOLETE U_CAPI
119/** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API */
120#define U_INTERNAL U_CAPI
121
122/**
123 * \def U_OVERRIDE
124 * Defined to the C++11 "override" keyword if available.
125 * Denotes a class or member which is an override of the base class.
126 * May result in an error if it applied to something not an override.
127 * @internal
128 */
129#ifndef U_OVERRIDE
130#define U_OVERRIDE override
131#endif
132
133/**
134 * \def U_FINAL
135 * Defined to the C++11 "final" keyword if available.
136 * Denotes a class or member which may not be overridden in subclasses.
137 * May result in an error if subclasses attempt to override.
138 * @internal
139 */
140#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
141#define U_FINAL final
142#endif
143
144// Before ICU 65, function-like, multi-statement ICU macros were just defined as
145// series of statements wrapped in { } blocks and the caller could choose to
146// either treat them as if they were actual functions and end the invocation
147// with a trailing ; creating an empty statement after the block or else omit
148// this trailing ; using the knowledge that the macro would expand to { }.
149//
150// But doing so doesn't work well with macros that look like functions and
151// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
152// switches to the standard solution of wrapping such macros in do { } while.
153//
154// This will however break existing code that depends on being able to invoke
155// these macros without a trailing ; so to be able to remain compatible with
156// such code the wrapper is itself defined as macros so that it's possible to
157// build ICU 65 and later with the old macro behaviour, like this:
158//
159// export CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
160// runConfigureICU ...
161//
162
163/**
164 * \def UPRV_BLOCK_MACRO_BEGIN
165 * Defined as the "do" keyword by default.
166 * @internal
167 */
168#ifndef UPRV_BLOCK_MACRO_BEGIN
169#define UPRV_BLOCK_MACRO_BEGIN do
170#endif
171
172/**
173 * \def UPRV_BLOCK_MACRO_END
174 * Defined as "while (false)" by default.
175 * @internal
176 */
177#ifndef UPRV_BLOCK_MACRO_END
178#define UPRV_BLOCK_MACRO_END while (false)
179#endif
180
181/*==========================================================================*/
182/* limits for int32_t etc., like in POSIX inttypes.h */
183/*==========================================================================*/
184
185#ifndef INT8_MIN
186/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
187# define INT8_MIN ((int8_t)(-128))
188#endif
189#ifndef INT16_MIN
190/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
191# define INT16_MIN ((int16_t)(-32767-1))
192#endif
193#ifndef INT32_MIN
194/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
195# define INT32_MIN ((int32_t)(-2147483647-1))
196#endif
197
198#ifndef INT8_MAX
199/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
200# define INT8_MAX ((int8_t)(127))
201#endif
202#ifndef INT16_MAX
203/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
204# define INT16_MAX ((int16_t)(32767))
205#endif
206#ifndef INT32_MAX
207/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
208# define INT32_MAX ((int32_t)(2147483647))
209#endif
210
211#ifndef UINT8_MAX
212/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
213# define UINT8_MAX ((uint8_t)(255U))
214#endif
215#ifndef UINT16_MAX
216/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
217# define UINT16_MAX ((uint16_t)(65535U))
218#endif
219#ifndef UINT32_MAX
220/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
221# define UINT32_MAX ((uint32_t)(4294967295U))
222#endif
223
224#if defined(U_INT64_T_UNAVAILABLE)
225# error int64_t is required for decimal format and rule-based number format.
226#else
227# ifndef INT64_C
228/**
229 * Provides a platform independent way to specify a signed 64-bit integer constant.
230 * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
231 * @stable ICU 2.8
232 */
233# define INT64_C(c) c ## LL
234# endif
235# ifndef UINT64_C
236/**
237 * Provides a platform independent way to specify an unsigned 64-bit integer constant.
238 * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
239 * @stable ICU 2.8
240 */
241# define UINT64_C(c) c ## ULL
242# endif
243# ifndef U_INT64_MIN
244/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
245# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
246# endif
247# ifndef U_INT64_MAX
248/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
249# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
250# endif
251# ifndef U_UINT64_MAX
252/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
253# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
254# endif
255#endif
256
257/*==========================================================================*/
258/* Boolean data type */
259/*==========================================================================*/
260
261/**
262 * The ICU boolean type, a signed-byte integer.
263 * ICU-specific for historical reasons: The C and C++ standards used to not define type bool.
264 * Also provides a fixed type definition, as opposed to
265 * type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.
266 *
267 * @stable ICU 2.0
268 */
269typedef int8_t UBool;
270
271/**
272 * \def U_DEFINE_FALSE_AND_TRUE
273 * Normally turns off defining macros FALSE=0 & TRUE=1 in public ICU headers.
274 * These obsolete macros sometimes break compilation of other code that
275 * defines enum constants or similar with these names.
276 * C++ has long defined bool/false/true.
277 * C99 also added definitions for these, although as macros; see stdbool.h.
278 *
279 * You may transitionally define U_DEFINE_FALSE_AND_TRUE=1 if you need time to migrate code.
280 *
281 * @internal ICU 68
282 */
283#ifdef U_DEFINE_FALSE_AND_TRUE
284 // Use the predefined value.
285#elif defined(U_COMBINED_IMPLEMENTATION) || \
286 defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
287 defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
288 defined(U_TOOLUTIL_IMPLEMENTATION)
289 // Inside ICU: Keep FALSE & TRUE available.
290# define U_DEFINE_FALSE_AND_TRUE 1
291#else
292 // Outside ICU: Avoid collision with non-macro definitions of FALSE & TRUE.
293# define U_DEFINE_FALSE_AND_TRUE 0
294#endif
295
296#if U_DEFINE_FALSE_AND_TRUE || defined(U_IN_DOXYGEN)
297#ifndef TRUE
298/**
299 * The TRUE value of a UBool.
300 *
301 * @deprecated ICU 68 Use standard "true" instead.
302 */
303# define TRUE 1
304#endif
305#ifndef FALSE
306/**
307 * The FALSE value of a UBool.
308 *
309 * @deprecated ICU 68 Use standard "false" instead.
310 */
311# define FALSE 0
312#endif
313#endif // U_DEFINE_FALSE_AND_TRUE
314
315/*==========================================================================*/
316/* Unicode data types */
317/*==========================================================================*/
318
319/* wchar_t-related definitions -------------------------------------------- */
320
321/*
322 * \def U_WCHAR_IS_UTF16
323 * Defined if wchar_t uses UTF-16.
324 *
325 * @stable ICU 2.0
326 */
327/*
328 * \def U_WCHAR_IS_UTF32
329 * Defined if wchar_t uses UTF-32.
330 *
331 * @stable ICU 2.0
332 */
333#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
334# ifdef __STDC_ISO_10646__
335# if (U_SIZEOF_WCHAR_T==2)
336# define U_WCHAR_IS_UTF16
337# elif (U_SIZEOF_WCHAR_T==4)
338# define U_WCHAR_IS_UTF32
339# endif
340# elif defined __UCS2__
341# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
342# define U_WCHAR_IS_UTF16
343# endif
344# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
345# if (U_SIZEOF_WCHAR_T==4)
346# define U_WCHAR_IS_UTF32
347# endif
348# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
349# define U_WCHAR_IS_UTF32
350# elif U_PLATFORM_HAS_WIN32_API
351# define U_WCHAR_IS_UTF16
352# endif
353#endif
354
355/* UChar and UChar32 definitions -------------------------------------------- */
356
357/** Number of bytes in a UChar. @stable ICU 2.0 */
358#define U_SIZEOF_UCHAR 2
359
360/**
361 * \def U_CHAR16_IS_TYPEDEF
362 * If 1, then char16_t is a typedef and not a real type (yet)
363 * @internal
364 */
365#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
366// for AIX, uchar.h needs to be included
367# include <uchar.h>
368# define U_CHAR16_IS_TYPEDEF 1
369#elif defined(_MSC_VER) && (_MSC_VER < 1900)
370// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
371// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
372# define U_CHAR16_IS_TYPEDEF 1
373#else
374# define U_CHAR16_IS_TYPEDEF 0
375#endif
376
377
378/**
379 * \var UChar
380 *
381 * The base type for UTF-16 code units and pointers.
382 * Unsigned 16-bit integer.
383 * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
384 *
385 * UChar is configurable by defining the macro UCHAR_TYPE
386 * on the preprocessor or compiler command line:
387 * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
388 * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
389 * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
390 *
391 * The default is UChar=char16_t.
392 *
393 * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
394 *
395 * In C, char16_t is a simple typedef of uint_least16_t.
396 * ICU requires uint_least16_t=uint16_t for data memory mapping.
397 * On macOS, char16_t is not available because the uchar.h standard header is missing.
398 *
399 * @stable ICU 4.4
400 */
401
402#if 1
403 // #if 1 is normal. UChar defaults to char16_t in C++.
404 // For configuration testing of UChar=uint16_t temporarily change this to #if 0.
405 // The intltest Makefile #defines UCHAR_TYPE=char16_t,
406 // so we only #define it to uint16_t if it is undefined so far.
407#elif !defined(UCHAR_TYPE)
408# define UCHAR_TYPE uint16_t
409#endif
410
411#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
412 defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
413 // Inside the ICU library code, never configurable.
414 typedef char16_t UChar;
415#elif defined(UCHAR_TYPE)
416 typedef UCHAR_TYPE UChar;
417#elif (U_CPLUSPLUS_VERSION >= 11)
418 typedef char16_t UChar;
419#else
420 typedef uint16_t UChar;
421#endif
422
423/**
424 * \var OldUChar
425 * Default ICU 58 definition of UChar.
426 * A base type for UTF-16 code units and pointers.
427 * Unsigned 16-bit integer.
428 *
429 * Define OldUChar to be wchar_t if that is 16 bits wide.
430 * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
431 *
432 * This makes the definition of OldUChar platform-dependent
433 * but allows direct string type compatibility with platforms with
434 * 16-bit wchar_t types.
435 *
436 * This is how UChar was defined in ICU 58, for transition convenience.
437 * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
438 * The current UChar responds to UCHAR_TYPE but OldUChar does not.
439 *
440 * @stable ICU 59
441 */
442#if U_SIZEOF_WCHAR_T==2
443 typedef wchar_t OldUChar;
444#elif defined(__CHAR16_TYPE__)
445 typedef __CHAR16_TYPE__ OldUChar;
446#else
447 typedef uint16_t OldUChar;
448#endif
449
450/**
451 * Define UChar32 as a type for single Unicode code points.
452 * UChar32 is a signed 32-bit integer (same as int32_t).
453 *
454 * The Unicode code point range is 0..0x10ffff.
455 * All other values (negative or >=0x110000) are illegal as Unicode code points.
456 * They may be used as sentinel values to indicate "done", "error"
457 * or similar non-code point conditions.
458 *
459 * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
460 * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
461 * or else to be uint32_t.
462 * That is, the definition of UChar32 was platform-dependent.
463 *
464 * @see U_SENTINEL
465 * @stable ICU 2.4
466 */
467typedef int32_t UChar32;
468
469/**
470 * This value is intended for sentinel values for APIs that
471 * (take or) return single code points (UChar32).
472 * It is outside of the Unicode code point range 0..0x10ffff.
473 *
474 * For example, a "done" or "error" value in a new API
475 * could be indicated with U_SENTINEL.
476 *
477 * ICU APIs designed before ICU 2.4 usually define service-specific "done"
478 * values, mostly 0xffff.
479 * Those may need to be distinguished from
480 * actual U+ffff text contents by calling functions like
481 * CharacterIterator::hasNext() or UnicodeString::length().
482 *
483 * @return -1
484 * @see UChar32
485 * @stable ICU 2.4
486 */
487#define U_SENTINEL (-1)
488
489#include "unicode/urename.h"
490
491#endif
492

source code of include/unicode/umachine.h