1/*
2 **********************************************************************
3 * Copyright (C) 1997-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File USCRIPT.H
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 07/06/2001 Ram Creation.
13 ******************************************************************************
14 */
15
16#ifndef USCRIPT_H
17#define USCRIPT_H
18#include "unicode/utypes.h"
19
20/**
21 * \file
22 * \brief C API: Unicode Script Information
23 */
24
25/**
26 * Constants for ISO 15924 script codes.
27 *
28 * Many of these script codes - those from Unicode's ScriptNames.txt -
29 * are character property values for Unicode's Script property.
30 * See UAX #24 Script Names (http://www.unicode.org/reports/tr24/).
31 *
32 * Starting with ICU 3.6, constants for most ISO 15924 script codes
33 * are included (currently excluding private-use codes Qaaa..Qabx).
34 * For scripts for which there are codes in ISO 15924 but which are not
35 * used in the Unicode Character Database (UCD), there are no Unicode characters
36 * associated with those scripts.
37 *
38 * For example, there are no characters that have a UCD script code of
39 * Hans or Hant. All Han ideographs have the Hani script code.
40 * The Hans and Hant script codes are used with CLDR data.
41 *
42 * ISO 15924 script codes are included for use with CLDR and similar.
43 *
44 * @stable ICU 2.2
45 */
46typedef enum UScriptCode {
47 /*
48 * Note: UScriptCode constants and their ISO script code comments
49 * are parsed by preparseucd.py.
50 * It matches lines like
51 * USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * /
52 */
53
54 /** @stable ICU 2.2 */
55 USCRIPT_INVALID_CODE = -1,
56 /** @stable ICU 2.2 */
57 USCRIPT_COMMON = 0, /* Zyyy */
58 /** @stable ICU 2.2 */
59 USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
60 /** @stable ICU 2.2 */
61 USCRIPT_ARABIC = 2, /* Arab */
62 /** @stable ICU 2.2 */
63 USCRIPT_ARMENIAN = 3, /* Armn */
64 /** @stable ICU 2.2 */
65 USCRIPT_BENGALI = 4, /* Beng */
66 /** @stable ICU 2.2 */
67 USCRIPT_BOPOMOFO = 5, /* Bopo */
68 /** @stable ICU 2.2 */
69 USCRIPT_CHEROKEE = 6, /* Cher */
70 /** @stable ICU 2.2 */
71 USCRIPT_COPTIC = 7, /* Copt */
72 /** @stable ICU 2.2 */
73 USCRIPT_CYRILLIC = 8, /* Cyrl */
74 /** @stable ICU 2.2 */
75 USCRIPT_DESERET = 9, /* Dsrt */
76 /** @stable ICU 2.2 */
77 USCRIPT_DEVANAGARI = 10, /* Deva */
78 /** @stable ICU 2.2 */
79 USCRIPT_ETHIOPIC = 11, /* Ethi */
80 /** @stable ICU 2.2 */
81 USCRIPT_GEORGIAN = 12, /* Geor */
82 /** @stable ICU 2.2 */
83 USCRIPT_GOTHIC = 13, /* Goth */
84 /** @stable ICU 2.2 */
85 USCRIPT_GREEK = 14, /* Grek */
86 /** @stable ICU 2.2 */
87 USCRIPT_GUJARATI = 15, /* Gujr */
88 /** @stable ICU 2.2 */
89 USCRIPT_GURMUKHI = 16, /* Guru */
90 /** @stable ICU 2.2 */
91 USCRIPT_HAN = 17, /* Hani */
92 /** @stable ICU 2.2 */
93 USCRIPT_HANGUL = 18, /* Hang */
94 /** @stable ICU 2.2 */
95 USCRIPT_HEBREW = 19, /* Hebr */
96 /** @stable ICU 2.2 */
97 USCRIPT_HIRAGANA = 20, /* Hira */
98 /** @stable ICU 2.2 */
99 USCRIPT_KANNADA = 21, /* Knda */
100 /** @stable ICU 2.2 */
101 USCRIPT_KATAKANA = 22, /* Kana */
102 /** @stable ICU 2.2 */
103 USCRIPT_KHMER = 23, /* Khmr */
104 /** @stable ICU 2.2 */
105 USCRIPT_LAO = 24, /* Laoo */
106 /** @stable ICU 2.2 */
107 USCRIPT_LATIN = 25, /* Latn */
108 /** @stable ICU 2.2 */
109 USCRIPT_MALAYALAM = 26, /* Mlym */
110 /** @stable ICU 2.2 */
111 USCRIPT_MONGOLIAN = 27, /* Mong */
112 /** @stable ICU 2.2 */
113 USCRIPT_MYANMAR = 28, /* Mymr */
114 /** @stable ICU 2.2 */
115 USCRIPT_OGHAM = 29, /* Ogam */
116 /** @stable ICU 2.2 */
117 USCRIPT_OLD_ITALIC = 30, /* Ital */
118 /** @stable ICU 2.2 */
119 USCRIPT_ORIYA = 31, /* Orya */
120 /** @stable ICU 2.2 */
121 USCRIPT_RUNIC = 32, /* Runr */
122 /** @stable ICU 2.2 */
123 USCRIPT_SINHALA = 33, /* Sinh */
124 /** @stable ICU 2.2 */
125 USCRIPT_SYRIAC = 34, /* Syrc */
126 /** @stable ICU 2.2 */
127 USCRIPT_TAMIL = 35, /* Taml */
128 /** @stable ICU 2.2 */
129 USCRIPT_TELUGU = 36, /* Telu */
130 /** @stable ICU 2.2 */
131 USCRIPT_THAANA = 37, /* Thaa */
132 /** @stable ICU 2.2 */
133 USCRIPT_THAI = 38, /* Thai */
134 /** @stable ICU 2.2 */
135 USCRIPT_TIBETAN = 39, /* Tibt */
136 /** Canadian_Aboriginal script. @stable ICU 2.6 */
137 USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */
138 /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
139 USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
140 /** @stable ICU 2.2 */
141 USCRIPT_YI = 41, /* Yiii */
142 /* New scripts in Unicode 3.2 */
143 /** @stable ICU 2.2 */
144 USCRIPT_TAGALOG = 42, /* Tglg */
145 /** @stable ICU 2.2 */
146 USCRIPT_HANUNOO = 43, /* Hano */
147 /** @stable ICU 2.2 */
148 USCRIPT_BUHID = 44, /* Buhd */
149 /** @stable ICU 2.2 */
150 USCRIPT_TAGBANWA = 45, /* Tagb */
151
152 /* New scripts in Unicode 4 */
153 /** @stable ICU 2.6 */
154 USCRIPT_BRAILLE = 46, /* Brai */
155 /** @stable ICU 2.6 */
156 USCRIPT_CYPRIOT = 47, /* Cprt */
157 /** @stable ICU 2.6 */
158 USCRIPT_LIMBU = 48, /* Limb */
159 /** @stable ICU 2.6 */
160 USCRIPT_LINEAR_B = 49, /* Linb */
161 /** @stable ICU 2.6 */
162 USCRIPT_OSMANYA = 50, /* Osma */
163 /** @stable ICU 2.6 */
164 USCRIPT_SHAVIAN = 51, /* Shaw */
165 /** @stable ICU 2.6 */
166 USCRIPT_TAI_LE = 52, /* Tale */
167 /** @stable ICU 2.6 */
168 USCRIPT_UGARITIC = 53, /* Ugar */
169
170 /** New script code in Unicode 4.0.1 @stable ICU 3.0 */
171 USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */
172
173 /* New scripts in Unicode 4.1 */
174 /** @stable ICU 3.4 */
175 USCRIPT_BUGINESE = 55, /* Bugi */
176 /** @stable ICU 3.4 */
177 USCRIPT_GLAGOLITIC = 56, /* Glag */
178 /** @stable ICU 3.4 */
179 USCRIPT_KHAROSHTHI = 57, /* Khar */
180 /** @stable ICU 3.4 */
181 USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */
182 /** @stable ICU 3.4 */
183 USCRIPT_NEW_TAI_LUE = 59, /* Talu */
184 /** @stable ICU 3.4 */
185 USCRIPT_TIFINAGH = 60, /* Tfng */
186 /** @stable ICU 3.4 */
187 USCRIPT_OLD_PERSIAN = 61, /* Xpeo */
188
189 /* New script codes from ISO 15924 */
190 /** @stable ICU 3.6 */
191 USCRIPT_BALINESE = 62, /* Bali */
192 /** @stable ICU 3.6 */
193 USCRIPT_BATAK = 63, /* Batk */
194 /** @stable ICU 3.6 */
195 USCRIPT_BLISSYMBOLS = 64, /* Blis */
196 /** @stable ICU 3.6 */
197 USCRIPT_BRAHMI = 65, /* Brah */
198 /** @stable ICU 3.6 */
199 USCRIPT_CHAM = 66, /* Cham */
200 /** @stable ICU 3.6 */
201 USCRIPT_CIRTH = 67, /* Cirt */
202 /** @stable ICU 3.6 */
203 USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */
204 /** @stable ICU 3.6 */
205 USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */
206 /** @stable ICU 3.6 */
207 USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */
208 /** @stable ICU 3.6 */
209 USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */
210 /** @stable ICU 3.6 */
211 USCRIPT_KHUTSURI = 72, /* Geok */
212 /** @stable ICU 3.6 */
213 USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */
214 /** @stable ICU 3.6 */
215 USCRIPT_TRADITIONAL_HAN = 74, /* Hant */
216 /** @stable ICU 3.6 */
217 USCRIPT_PAHAWH_HMONG = 75, /* Hmng */
218 /** @stable ICU 3.6 */
219 USCRIPT_OLD_HUNGARIAN = 76, /* Hung */
220 /** @stable ICU 3.6 */
221 USCRIPT_HARAPPAN_INDUS = 77, /* Inds */
222 /** @stable ICU 3.6 */
223 USCRIPT_JAVANESE = 78, /* Java */
224 /** @stable ICU 3.6 */
225 USCRIPT_KAYAH_LI = 79, /* Kali */
226 /** @stable ICU 3.6 */
227 USCRIPT_LATIN_FRAKTUR = 80, /* Latf */
228 /** @stable ICU 3.6 */
229 USCRIPT_LATIN_GAELIC = 81, /* Latg */
230 /** @stable ICU 3.6 */
231 USCRIPT_LEPCHA = 82, /* Lepc */
232 /** @stable ICU 3.6 */
233 USCRIPT_LINEAR_A = 83, /* Lina */
234 /** @stable ICU 4.6 */
235 USCRIPT_MANDAIC = 84, /* Mand */
236 /** @stable ICU 3.6 */
237 USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
238 /** @stable ICU 3.6 */
239 USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */
240 /** @stable ICU 4.6 */
241 USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */
242 /** @stable ICU 3.6 */
243 USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS,
244 /** @stable ICU 3.6 */
245 USCRIPT_NKO = 87, /* Nkoo */
246 /** @stable ICU 3.6 */
247 USCRIPT_ORKHON = 88, /* Orkh */
248 /** @stable ICU 3.6 */
249 USCRIPT_OLD_PERMIC = 89, /* Perm */
250 /** @stable ICU 3.6 */
251 USCRIPT_PHAGS_PA = 90, /* Phag */
252 /** @stable ICU 3.6 */
253 USCRIPT_PHOENICIAN = 91, /* Phnx */
254 /** @stable ICU 52 */
255 USCRIPT_MIAO = 92, /* Plrd */
256 /** @stable ICU 3.6 */
257 USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO,
258 /** @stable ICU 3.6 */
259 USCRIPT_RONGORONGO = 93, /* Roro */
260 /** @stable ICU 3.6 */
261 USCRIPT_SARATI = 94, /* Sara */
262 /** @stable ICU 3.6 */
263 USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */
264 /** @stable ICU 3.6 */
265 USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */
266 /** @stable ICU 3.6 */
267 USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */
268 /** @stable ICU 3.6 */
269 USCRIPT_TENGWAR = 98, /* Teng */
270 /** @stable ICU 3.6 */
271 USCRIPT_VAI = 99, /* Vaii */
272 /** @stable ICU 3.6 */
273 USCRIPT_VISIBLE_SPEECH = 100,/* Visp */
274 /** @stable ICU 3.6 */
275 USCRIPT_CUNEIFORM = 101,/* Xsux */
276 /** @stable ICU 3.6 */
277 USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */
278 /** @stable ICU 3.6 */
279 USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
280
281 /* New script codes from ISO 15924 */
282 /** @stable ICU 3.8 */
283 USCRIPT_CARIAN = 104,/* Cari */
284 /** @stable ICU 3.8 */
285 USCRIPT_JAPANESE = 105,/* Jpan */
286 /** @stable ICU 3.8 */
287 USCRIPT_LANNA = 106,/* Lana */
288 /** @stable ICU 3.8 */
289 USCRIPT_LYCIAN = 107,/* Lyci */
290 /** @stable ICU 3.8 */
291 USCRIPT_LYDIAN = 108,/* Lydi */
292 /** @stable ICU 3.8 */
293 USCRIPT_OL_CHIKI = 109,/* Olck */
294 /** @stable ICU 3.8 */
295 USCRIPT_REJANG = 110,/* Rjng */
296 /** @stable ICU 3.8 */
297 USCRIPT_SAURASHTRA = 111,/* Saur */
298 /** @stable ICU 3.8 */
299 USCRIPT_SIGN_WRITING = 112,/* Sgnw */
300 /** @stable ICU 3.8 */
301 USCRIPT_SUNDANESE = 113,/* Sund */
302 /** @stable ICU 3.8 */
303 USCRIPT_MOON = 114,/* Moon */
304 /** @stable ICU 3.8 */
305 USCRIPT_MEITEI_MAYEK = 115,/* Mtei */
306
307 /* New script codes from ISO 15924 */
308 /** @stable ICU 4.0 */
309 USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */
310 /** @stable ICU 4.0 */
311 USCRIPT_AVESTAN = 117,/* Avst */
312 /** @stable ICU 4.0 */
313 USCRIPT_CHAKMA = 118,/* Cakm */
314 /** @stable ICU 4.0 */
315 USCRIPT_KOREAN = 119,/* Kore */
316 /** @stable ICU 4.0 */
317 USCRIPT_KAITHI = 120,/* Kthi */
318 /** @stable ICU 4.0 */
319 USCRIPT_MANICHAEAN = 121,/* Mani */
320 /** @stable ICU 4.0 */
321 USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */
322 /** @stable ICU 4.0 */
323 USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */
324 /** @stable ICU 4.0 */
325 USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */
326 /** @stable ICU 4.0 */
327 USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */
328 /** @stable ICU 4.0 */
329 USCRIPT_SAMARITAN = 126,/* Samr */
330 /** @stable ICU 4.0 */
331 USCRIPT_TAI_VIET = 127,/* Tavt */
332 /** @stable ICU 4.0 */
333 USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */
334 /** @stable ICU 4.0 */
335 USCRIPT_SYMBOLS = 129,/* Zsym */
336
337 /* New script codes from ISO 15924 */
338 /** @stable ICU 4.4 */
339 USCRIPT_BAMUM = 130,/* Bamu */
340 /** @stable ICU 4.4 */
341 USCRIPT_LISU = 131,/* Lisu */
342 /** @stable ICU 4.4 */
343 USCRIPT_NAKHI_GEBA = 132,/* Nkgb */
344 /** @stable ICU 4.4 */
345 USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */
346
347 /* New script codes from ISO 15924 */
348 /** @stable ICU 4.6 */
349 USCRIPT_BASSA_VAH = 134,/* Bass */
350 /** @stable ICU 4.6 */
351 USCRIPT_DUPLOYAN_SHORTAND = 135,/* Dupl */
352 /** @stable ICU 4.6 */
353 USCRIPT_ELBASAN = 136,/* Elba */
354 /** @stable ICU 4.6 */
355 USCRIPT_GRANTHA = 137,/* Gran */
356 /** @stable ICU 4.6 */
357 USCRIPT_KPELLE = 138,/* Kpel */
358 /** @stable ICU 4.6 */
359 USCRIPT_LOMA = 139,/* Loma */
360 /** @stable ICU 4.6 */
361 USCRIPT_MENDE = 140,/* Mend */
362 /** @stable ICU 4.6 */
363 USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */
364 /** @stable ICU 4.6 */
365 USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */
366 /** @stable ICU 4.6 */
367 USCRIPT_NABATAEAN = 143,/* Nbat */
368 /** @stable ICU 4.6 */
369 USCRIPT_PALMYRENE = 144,/* Palm */
370 /** @stable ICU 4.6 */
371 USCRIPT_SINDHI = 145,/* Sind */
372 /** @stable ICU 4.6 */
373 USCRIPT_WARANG_CITI = 146,/* Wara */
374
375 /** @stable ICU 4.8 */
376 USCRIPT_AFAKA = 147,/* Afak */
377 /** @stable ICU 4.8 */
378 USCRIPT_JURCHEN = 148,/* Jurc */
379 /** @stable ICU 4.8 */
380 USCRIPT_MRO = 149,/* Mroo */
381 /** @stable ICU 4.8 */
382 USCRIPT_NUSHU = 150,/* Nshu */
383 /** @stable ICU 4.8 */
384 USCRIPT_SHARADA = 151,/* Shrd */
385 /** @stable ICU 4.8 */
386 USCRIPT_SORA_SOMPENG = 152,/* Sora */
387 /** @stable ICU 4.8 */
388 USCRIPT_TAKRI = 153,/* Takr */
389 /** @stable ICU 4.8 */
390 USCRIPT_TANGUT = 154,/* Tang */
391 /** @stable ICU 4.8 */
392 USCRIPT_WOLEAI = 155,/* Wole */
393
394 /** @stable ICU 49 */
395 USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */
396 /** @stable ICU 49 */
397 USCRIPT_KHOJKI = 157,/* Khoj */
398 /** @stable ICU 49 */
399 USCRIPT_TIRHUTA = 158,/* Tirh */
400
401 /** @stable ICU 52 */
402 USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */
403 /** @stable ICU 52 */
404 USCRIPT_MAHAJANI = 160,/* Mahj */
405
406 /* Private use codes from Qaaa - Qabx are not supported */
407
408 /** @stable ICU 2.2 */
409 USCRIPT_CODE_LIMIT = 161
410} UScriptCode;
411
412/**
413 * Gets script codes associated with the given locale or ISO 15924 abbreviation or name.
414 * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
415 * Fills in USCRIPT_LATIN given "en" OR "en_US"
416 * If required capacity is greater than capacity of the destination buffer then the error code
417 * is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned
418 *
419 * <p>Note: To search by short or long script alias only, use
420 * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does
421 * a fast lookup with no access of the locale data.
422 * @param nameOrAbbrOrLocale name of the script, as given in
423 * PropertyValueAliases.txt, or ISO 15924 code or locale
424 * @param fillIn the UScriptCode buffer to fill in the script code
425 * @param capacity the capacity (size) fo UScriptCode buffer passed in.
426 * @param err the error status code.
427 * @return The number of script codes filled in the buffer passed in
428 * @stable ICU 2.4
429 */
430U_STABLE int32_t U_EXPORT2
431uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
432
433/**
434 * Gets a script name associated with the given script code.
435 * Returns "Malayam" given USCRIPT_MALAYALAM
436 * @param scriptCode UScriptCode enum
437 * @return script long name as given in
438 * PropertyValueAliases.txt, or NULL if scriptCode is invalid
439 * @stable ICU 2.4
440 */
441U_STABLE const char* U_EXPORT2
442uscript_getName(UScriptCode scriptCode);
443
444/**
445 * Gets a script name associated with the given script code.
446 * Returns "Mlym" given USCRIPT_MALAYALAM
447 * @param scriptCode UScriptCode enum
448 * @return script abbreviated name as given in
449 * PropertyValueAliases.txt, or NULL if scriptCode is invalid
450 * @stable ICU 2.4
451 */
452U_STABLE const char* U_EXPORT2
453uscript_getShortName(UScriptCode scriptCode);
454
455/**
456 * Gets the script code associated with the given codepoint.
457 * Returns USCRIPT_MALAYALAM given 0x0D02
458 * @param codepoint UChar32 codepoint
459 * @param err the error status code.
460 * @return The UScriptCode, or 0 if codepoint is invalid
461 * @stable ICU 2.4
462 */
463U_STABLE UScriptCode U_EXPORT2
464uscript_getScript(UChar32 codepoint, UErrorCode *err);
465
466/**
467 * Do the Script_Extensions of code point c contain script sc?
468 * If c does not have explicit Script_Extensions, then this tests whether
469 * c has the Script property value sc.
470 *
471 * Some characters are commonly used in multiple scripts.
472 * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
473 *
474 * The Script_Extensions property is provisional. It may be modified or removed
475 * in future versions of the Unicode Standard, and thus in ICU.
476 * @param c code point
477 * @param sc script code
478 * @return TRUE if sc is in Script_Extensions(c)
479 * @stable ICU 49
480 */
481U_STABLE UBool U_EXPORT2
482uscript_hasScript(UChar32 c, UScriptCode sc);
483
484/**
485 * Writes code point c's Script_Extensions as a list of UScriptCode values
486 * to the output scripts array and returns the number of script codes.
487 * - If c does have Script_Extensions, then the Script property value
488 * (normally Common or Inherited) is not included.
489 * - If c does not have Script_Extensions, then the one Script code is written to the output array.
490 * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written.
491 * In other words, if the return value is 1,
492 * then the output array contains exactly c's single Script code.
493 * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.
494 *
495 * Some characters are commonly used in multiple scripts.
496 * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
497 *
498 * If there are more than capacity script codes to be written, then
499 * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
500 * (Usual ICU buffer handling behavior.)
501 *
502 * The Script_Extensions property is provisional. It may be modified or removed
503 * in future versions of the Unicode Standard, and thus in ICU.
504 * @param c code point
505 * @param scripts output script code array
506 * @param capacity capacity of the scripts array
507 * @param errorCode Standard ICU error code. Its input value must
508 * pass the U_SUCCESS() test, or else the function returns
509 * immediately. Check for U_FAILURE() on output or use with
510 * function chaining. (See User Guide for details.)
511 * @return number of script codes in c's Script_Extensions, or 1 for the single Script value,
512 * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
513 * @stable ICU 49
514 */
515U_STABLE int32_t U_EXPORT2
516uscript_getScriptExtensions(UChar32 c,
517 UScriptCode *scripts, int32_t capacity,
518 UErrorCode *errorCode);
519
520/**
521 * Script usage constants.
522 * See UAX #31 Unicode Identifier and Pattern Syntax.
523 * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
524 *
525 * @stable ICU 51
526 */
527typedef enum UScriptUsage {
528 /** Not encoded in Unicode. @stable ICU 51 */
529 USCRIPT_USAGE_NOT_ENCODED,
530 /** Unknown script usage. @stable ICU 51 */
531 USCRIPT_USAGE_UNKNOWN,
532 /** Candidate for Exclusion from Identifiers. @stable ICU 51 */
533 USCRIPT_USAGE_EXCLUDED,
534 /** Limited Use script. @stable ICU 51 */
535 USCRIPT_USAGE_LIMITED_USE,
536 /** Aspirational Use script. @stable ICU 51 */
537 USCRIPT_USAGE_ASPIRATIONAL,
538 /** Recommended script. @stable ICU 51 */
539 USCRIPT_USAGE_RECOMMENDED
540} UScriptUsage;
541
542/**
543 * Writes the script sample character string.
544 * This string normally consists of one code point but might be longer.
545 * The string is empty if the script is not encoded.
546 *
547 * @param script script code
548 * @param dest output string array
549 * @param capacity number of UChars in the dest array
550 * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
551 * @return the string length, even if U_BUFFER_OVERFLOW_ERROR
552 * @stable ICU 51
553 */
554U_STABLE int32_t U_EXPORT2
555uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode);
556
557#if U_SHOW_CPLUSPLUS_API
558
559U_NAMESPACE_BEGIN
560class UnicodeString;
561U_NAMESPACE_END
562
563/**
564 * Returns the script sample character string.
565 * This string normally consists of one code point but might be longer.
566 * The string is empty if the script is not encoded.
567 *
568 * @param script script code
569 * @return the sample character string
570 * @stable ICU 51
571 */
572U_COMMON_API icu::UnicodeString U_EXPORT2
573uscript_getSampleUnicodeString(UScriptCode script);
574
575#endif
576
577/**
578 * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
579 * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
580 *
581 * @param script script code
582 * @return script usage
583 * @see UScriptUsage
584 * @stable ICU 51
585 */
586U_STABLE UScriptUsage U_EXPORT2
587uscript_getUsage(UScriptCode script);
588
589/**
590 * Returns TRUE if the script is written right-to-left.
591 * For example, Arab and Hebr.
592 *
593 * @param script script code
594 * @return TRUE if the script is right-to-left
595 * @stable ICU 51
596 */
597U_STABLE UBool U_EXPORT2
598uscript_isRightToLeft(UScriptCode script);
599
600/**
601 * Returns TRUE if the script allows line breaks between letters (excluding hyphenation).
602 * Such a script typically requires dictionary-based line breaking.
603 * For example, Hani and Thai.
604 *
605 * @param script script code
606 * @return TRUE if the script allows line breaks between letters
607 * @stable ICU 51
608 */
609U_STABLE UBool U_EXPORT2
610uscript_breaksBetweenLetters(UScriptCode script);
611
612/**
613 * Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary.
614 * For example, Latn and Cyrl.
615 *
616 * @param script script code
617 * @return TRUE if the script is cased
618 * @stable ICU 51
619 */
620U_STABLE UBool U_EXPORT2
621uscript_isCased(UScriptCode script);
622
623#endif
624