1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 1999-2012, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: utf16.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 1999sep09
16* created by: Markus W. Scherer
17*/
18
19/**
20 * \file
21 * \brief C API: 16-bit Unicode handling macros
22 *
23 * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
24 *
25 * For more information see utf.h and the ICU User Guide Strings chapter
26 * (https://unicode-org.github.io/icu/userguide/strings).
27 *
28 * <em>Usage:</em>
29 * ICU coding guidelines for if() statements should be followed when using these macros.
30 * Compound statements (curly braces {}) must be used for if-else-while...
31 * bodies and all macro statements should be terminated with semicolon.
32 */
33
34#ifndef __UTF16_H__
35#define __UTF16_H__
36
37#include <stdbool.h>
38#include "unicode/umachine.h"
39#ifndef __UTF_H__
40# include "unicode/utf.h"
41#endif
42
43/* single-code point definitions -------------------------------------------- */
44
45/**
46 * Does this code unit alone encode a code point (BMP, not a surrogate)?
47 * @param c 16-bit code unit
48 * @return true or false
49 * @stable ICU 2.4
50 */
51#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
52
53/**
54 * Is this code unit a lead surrogate (U+d800..U+dbff)?
55 * @param c 16-bit code unit
56 * @return true or false
57 * @stable ICU 2.4
58 */
59#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
60
61/**
62 * Is this code unit a trail surrogate (U+dc00..U+dfff)?
63 * @param c 16-bit code unit
64 * @return true or false
65 * @stable ICU 2.4
66 */
67#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
68
69/**
70 * Is this code unit a surrogate (U+d800..U+dfff)?
71 * @param c 16-bit code unit
72 * @return true or false
73 * @stable ICU 2.4
74 */
75#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
76
77/**
78 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
79 * is it a lead surrogate?
80 * @param c 16-bit code unit
81 * @return true or false
82 * @stable ICU 2.4
83 */
84#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
85
86/**
87 * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
88 * is it a trail surrogate?
89 * @param c 16-bit code unit
90 * @return true or false
91 * @stable ICU 4.2
92 */
93#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
94
95/**
96 * Helper constant for U16_GET_SUPPLEMENTARY.
97 * @internal
98 */
99#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
100
101/**
102 * Get a supplementary code point value (U+10000..U+10ffff)
103 * from its lead and trail surrogates.
104 * The result is undefined if the input values are not
105 * lead and trail surrogates.
106 *
107 * @param lead lead surrogate (U+d800..U+dbff)
108 * @param trail trail surrogate (U+dc00..U+dfff)
109 * @return supplementary code point (U+10000..U+10ffff)
110 * @stable ICU 2.4
111 */
112#define U16_GET_SUPPLEMENTARY(lead, trail) \
113 (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
114
115
116/**
117 * Get the lead surrogate (0xd800..0xdbff) for a
118 * supplementary code point (0x10000..0x10ffff).
119 * @param supplementary 32-bit code point (U+10000..U+10ffff)
120 * @return lead surrogate (U+d800..U+dbff) for supplementary
121 * @stable ICU 2.4
122 */
123#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
124
125/**
126 * Get the trail surrogate (0xdc00..0xdfff) for a
127 * supplementary code point (0x10000..0x10ffff).
128 * @param supplementary 32-bit code point (U+10000..U+10ffff)
129 * @return trail surrogate (U+dc00..U+dfff) for supplementary
130 * @stable ICU 2.4
131 */
132#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
133
134/**
135 * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
136 * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
137 * @param c 32-bit code point
138 * @return 1 or 2
139 * @stable ICU 2.4
140 */
141#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
142
143/**
144 * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
145 * @return 2
146 * @stable ICU 2.4
147 */
148#define U16_MAX_LENGTH 2
149
150/**
151 * Get a code point from a string at a random-access offset,
152 * without changing the offset.
153 * "Unsafe" macro, assumes well-formed UTF-16.
154 *
155 * The offset may point to either the lead or trail surrogate unit
156 * for a supplementary code point, in which case the macro will read
157 * the adjacent matching surrogate as well.
158 * The result is undefined if the offset points to a single, unpaired surrogate.
159 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
160 *
161 * @param s const UChar * string
162 * @param i string offset
163 * @param c output UChar32 variable
164 * @see U16_GET
165 * @stable ICU 2.4
166 */
167#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
168 (c)=(s)[i]; \
169 if(U16_IS_SURROGATE(c)) { \
170 if(U16_IS_SURROGATE_LEAD(c)) { \
171 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
172 } else { \
173 (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
174 } \
175 } \
176} UPRV_BLOCK_MACRO_END
177
178/**
179 * Get a code point from a string at a random-access offset,
180 * without changing the offset.
181 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
182 *
183 * The offset may point to either the lead or trail surrogate unit
184 * for a supplementary code point, in which case the macro will read
185 * the adjacent matching surrogate as well.
186 *
187 * The length can be negative for a NUL-terminated string.
188 *
189 * If the offset points to a single, unpaired surrogate, then
190 * c is set to that unpaired surrogate.
191 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
192 *
193 * @param s const UChar * string
194 * @param start starting string offset (usually 0)
195 * @param i string offset, must be start<=i<length
196 * @param length string length
197 * @param c output UChar32 variable
198 * @see U16_GET_UNSAFE
199 * @stable ICU 2.4
200 */
201#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
202 (c)=(s)[i]; \
203 if(U16_IS_SURROGATE(c)) { \
204 uint16_t __c2; \
205 if(U16_IS_SURROGATE_LEAD(c)) { \
206 if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
207 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
208 } \
209 } else { \
210 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
211 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
212 } \
213 } \
214 } \
215} UPRV_BLOCK_MACRO_END
216
217/**
218 * Get a code point from a string at a random-access offset,
219 * without changing the offset.
220 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
221 *
222 * The offset may point to either the lead or trail surrogate unit
223 * for a supplementary code point, in which case the macro will read
224 * the adjacent matching surrogate as well.
225 *
226 * The length can be negative for a NUL-terminated string.
227 *
228 * If the offset points to a single, unpaired surrogate, then
229 * c is set to U+FFFD.
230 * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
231 *
232 * @param s const UChar * string
233 * @param start starting string offset (usually 0)
234 * @param i string offset, must be start<=i<length
235 * @param length string length
236 * @param c output UChar32 variable
237 * @see U16_GET_UNSAFE
238 * @stable ICU 60
239 */
240#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
241 (c)=(s)[i]; \
242 if(U16_IS_SURROGATE(c)) { \
243 uint16_t __c2; \
244 if(U16_IS_SURROGATE_LEAD(c)) { \
245 if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
246 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
247 } else { \
248 (c)=0xfffd; \
249 } \
250 } else { \
251 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
252 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
253 } else { \
254 (c)=0xfffd; \
255 } \
256 } \
257 } \
258} UPRV_BLOCK_MACRO_END
259
260/* definitions with forward iteration --------------------------------------- */
261
262/**
263 * Get a code point from a string at a code point boundary offset,
264 * and advance the offset to the next code point boundary.
265 * (Post-incrementing forward iteration.)
266 * "Unsafe" macro, assumes well-formed UTF-16.
267 *
268 * The offset may point to the lead surrogate unit
269 * for a supplementary code point, in which case the macro will read
270 * the following trail surrogate as well.
271 * If the offset points to a trail surrogate, then that itself
272 * will be returned as the code point.
273 * The result is undefined if the offset points to a single, unpaired lead surrogate.
274 *
275 * @param s const UChar * string
276 * @param i string offset
277 * @param c output UChar32 variable
278 * @see U16_NEXT
279 * @stable ICU 2.4
280 */
281#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
282 (c)=(s)[(i)++]; \
283 if(U16_IS_LEAD(c)) { \
284 (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
285 } \
286} UPRV_BLOCK_MACRO_END
287
288/**
289 * Get a code point from a string at a code point boundary offset,
290 * and advance the offset to the next code point boundary.
291 * (Post-incrementing forward iteration.)
292 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
293 *
294 * The length can be negative for a NUL-terminated string.
295 *
296 * The offset may point to the lead surrogate unit
297 * for a supplementary code point, in which case the macro will read
298 * the following trail surrogate as well.
299 * If the offset points to a trail surrogate or
300 * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
301 *
302 * @param s const UChar * string
303 * @param i string offset, must be i<length
304 * @param length string length
305 * @param c output UChar32 variable
306 * @see U16_NEXT_UNSAFE
307 * @stable ICU 2.4
308 */
309#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
310 (c)=(s)[(i)++]; \
311 if(U16_IS_LEAD(c)) { \
312 uint16_t __c2; \
313 if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
314 ++(i); \
315 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
316 } \
317 } \
318} UPRV_BLOCK_MACRO_END
319
320/**
321 * Get a code point from a string at a code point boundary offset,
322 * and advance the offset to the next code point boundary.
323 * (Post-incrementing forward iteration.)
324 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
325 *
326 * The length can be negative for a NUL-terminated string.
327 *
328 * The offset may point to the lead surrogate unit
329 * for a supplementary code point, in which case the macro will read
330 * the following trail surrogate as well.
331 * If the offset points to a trail surrogate or
332 * to a single, unpaired lead surrogate, then c is set to U+FFFD.
333 *
334 * @param s const UChar * string
335 * @param i string offset, must be i<length
336 * @param length string length
337 * @param c output UChar32 variable
338 * @see U16_NEXT_UNSAFE
339 * @stable ICU 60
340 */
341#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
342 (c)=(s)[(i)++]; \
343 if(U16_IS_SURROGATE(c)) { \
344 uint16_t __c2; \
345 if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
346 ++(i); \
347 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
348 } else { \
349 (c)=0xfffd; \
350 } \
351 } \
352} UPRV_BLOCK_MACRO_END
353
354/**
355 * Append a code point to a string, overwriting 1 or 2 code units.
356 * The offset points to the current end of the string contents
357 * and is advanced (post-increment).
358 * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
359 * Otherwise, the result is undefined.
360 *
361 * @param s const UChar * string buffer
362 * @param i string offset
363 * @param c code point to append
364 * @see U16_APPEND
365 * @stable ICU 2.4
366 */
367#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
368 if((uint32_t)(c)<=0xffff) { \
369 (s)[(i)++]=(uint16_t)(c); \
370 } else { \
371 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
372 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
373 } \
374} UPRV_BLOCK_MACRO_END
375
376/**
377 * Append a code point to a string, overwriting 1 or 2 code units.
378 * The offset points to the current end of the string contents
379 * and is advanced (post-increment).
380 * "Safe" macro, checks for a valid code point.
381 * If a surrogate pair is written, checks for sufficient space in the string.
382 * If the code point is not valid or a trail surrogate does not fit,
383 * then isError is set to true.
384 *
385 * @param s const UChar * string buffer
386 * @param i string offset, must be i<capacity
387 * @param capacity size of the string buffer
388 * @param c code point to append
389 * @param isError output UBool set to true if an error occurs, otherwise not modified
390 * @see U16_APPEND_UNSAFE
391 * @stable ICU 2.4
392 */
393#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
394 if((uint32_t)(c)<=0xffff) { \
395 (s)[(i)++]=(uint16_t)(c); \
396 } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
397 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
398 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
399 } else /* c>0x10ffff or not enough space */ { \
400 (isError)=true; \
401 } \
402} UPRV_BLOCK_MACRO_END
403
404/**
405 * Advance the string offset from one code point boundary to the next.
406 * (Post-incrementing iteration.)
407 * "Unsafe" macro, assumes well-formed UTF-16.
408 *
409 * @param s const UChar * string
410 * @param i string offset
411 * @see U16_FWD_1
412 * @stable ICU 2.4
413 */
414#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
415 if(U16_IS_LEAD((s)[(i)++])) { \
416 ++(i); \
417 } \
418} UPRV_BLOCK_MACRO_END
419
420/**
421 * Advance the string offset from one code point boundary to the next.
422 * (Post-incrementing iteration.)
423 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
424 *
425 * The length can be negative for a NUL-terminated string.
426 *
427 * @param s const UChar * string
428 * @param i string offset, must be i<length
429 * @param length string length
430 * @see U16_FWD_1_UNSAFE
431 * @stable ICU 2.4
432 */
433#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
434 if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
435 ++(i); \
436 } \
437} UPRV_BLOCK_MACRO_END
438
439/**
440 * Advance the string offset from one code point boundary to the n-th next one,
441 * i.e., move forward by n code points.
442 * (Post-incrementing iteration.)
443 * "Unsafe" macro, assumes well-formed UTF-16.
444 *
445 * @param s const UChar * string
446 * @param i string offset
447 * @param n number of code points to skip
448 * @see U16_FWD_N
449 * @stable ICU 2.4
450 */
451#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
452 int32_t __N=(n); \
453 while(__N>0) { \
454 U16_FWD_1_UNSAFE(s, i); \
455 --__N; \
456 } \
457} UPRV_BLOCK_MACRO_END
458
459/**
460 * Advance the string offset from one code point boundary to the n-th next one,
461 * i.e., move forward by n code points.
462 * (Post-incrementing iteration.)
463 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
464 *
465 * The length can be negative for a NUL-terminated string.
466 *
467 * @param s const UChar * string
468 * @param i int32_t string offset, must be i<length
469 * @param length int32_t string length
470 * @param n number of code points to skip
471 * @see U16_FWD_N_UNSAFE
472 * @stable ICU 2.4
473 */
474#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
475 int32_t __N=(n); \
476 while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
477 U16_FWD_1(s, i, length); \
478 --__N; \
479 } \
480} UPRV_BLOCK_MACRO_END
481
482/**
483 * Adjust a random-access offset to a code point boundary
484 * at the start of a code point.
485 * If the offset points to the trail surrogate of a surrogate pair,
486 * then the offset is decremented.
487 * Otherwise, it is not modified.
488 * "Unsafe" macro, assumes well-formed UTF-16.
489 *
490 * @param s const UChar * string
491 * @param i string offset
492 * @see U16_SET_CP_START
493 * @stable ICU 2.4
494 */
495#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
496 if(U16_IS_TRAIL((s)[i])) { \
497 --(i); \
498 } \
499} UPRV_BLOCK_MACRO_END
500
501/**
502 * Adjust a random-access offset to a code point boundary
503 * at the start of a code point.
504 * If the offset points to the trail surrogate of a surrogate pair,
505 * then the offset is decremented.
506 * Otherwise, it is not modified.
507 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
508 *
509 * @param s const UChar * string
510 * @param start starting string offset (usually 0)
511 * @param i string offset, must be start<=i
512 * @see U16_SET_CP_START_UNSAFE
513 * @stable ICU 2.4
514 */
515#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
516 if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
517 --(i); \
518 } \
519} UPRV_BLOCK_MACRO_END
520
521/* definitions with backward iteration -------------------------------------- */
522
523/**
524 * Move the string offset from one code point boundary to the previous one
525 * and get the code point between them.
526 * (Pre-decrementing backward iteration.)
527 * "Unsafe" macro, assumes well-formed UTF-16.
528 *
529 * The input offset may be the same as the string length.
530 * If the offset is behind a trail surrogate unit
531 * for a supplementary code point, then the macro will read
532 * the preceding lead surrogate as well.
533 * If the offset is behind a lead surrogate, then that itself
534 * will be returned as the code point.
535 * The result is undefined if the offset is behind a single, unpaired trail surrogate.
536 *
537 * @param s const UChar * string
538 * @param i string offset
539 * @param c output UChar32 variable
540 * @see U16_PREV
541 * @stable ICU 2.4
542 */
543#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
544 (c)=(s)[--(i)]; \
545 if(U16_IS_TRAIL(c)) { \
546 (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
547 } \
548} UPRV_BLOCK_MACRO_END
549
550/**
551 * Move the string offset from one code point boundary to the previous one
552 * and get the code point between them.
553 * (Pre-decrementing backward iteration.)
554 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
555 *
556 * The input offset may be the same as the string length.
557 * If the offset is behind a trail surrogate unit
558 * for a supplementary code point, then the macro will read
559 * the preceding lead surrogate as well.
560 * If the offset is behind a lead surrogate or behind a single, unpaired
561 * trail surrogate, then c is set to that unpaired surrogate.
562 *
563 * @param s const UChar * string
564 * @param start starting string offset (usually 0)
565 * @param i string offset, must be start<i
566 * @param c output UChar32 variable
567 * @see U16_PREV_UNSAFE
568 * @stable ICU 2.4
569 */
570#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
571 (c)=(s)[--(i)]; \
572 if(U16_IS_TRAIL(c)) { \
573 uint16_t __c2; \
574 if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
575 --(i); \
576 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
577 } \
578 } \
579} UPRV_BLOCK_MACRO_END
580
581/**
582 * Move the string offset from one code point boundary to the previous one
583 * and get the code point between them.
584 * (Pre-decrementing backward iteration.)
585 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
586 *
587 * The input offset may be the same as the string length.
588 * If the offset is behind a trail surrogate unit
589 * for a supplementary code point, then the macro will read
590 * the preceding lead surrogate as well.
591 * If the offset is behind a lead surrogate or behind a single, unpaired
592 * trail surrogate, then c is set to U+FFFD.
593 *
594 * @param s const UChar * string
595 * @param start starting string offset (usually 0)
596 * @param i string offset, must be start<i
597 * @param c output UChar32 variable
598 * @see U16_PREV_UNSAFE
599 * @stable ICU 60
600 */
601#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
602 (c)=(s)[--(i)]; \
603 if(U16_IS_SURROGATE(c)) { \
604 uint16_t __c2; \
605 if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
606 --(i); \
607 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
608 } else { \
609 (c)=0xfffd; \
610 } \
611 } \
612} UPRV_BLOCK_MACRO_END
613
614/**
615 * Move the string offset from one code point boundary to the previous one.
616 * (Pre-decrementing backward iteration.)
617 * The input offset may be the same as the string length.
618 * "Unsafe" macro, assumes well-formed UTF-16.
619 *
620 * @param s const UChar * string
621 * @param i string offset
622 * @see U16_BACK_1
623 * @stable ICU 2.4
624 */
625#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
626 if(U16_IS_TRAIL((s)[--(i)])) { \
627 --(i); \
628 } \
629} UPRV_BLOCK_MACRO_END
630
631/**
632 * Move the string offset from one code point boundary to the previous one.
633 * (Pre-decrementing backward iteration.)
634 * The input offset may be the same as the string length.
635 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
636 *
637 * @param s const UChar * string
638 * @param start starting string offset (usually 0)
639 * @param i string offset, must be start<i
640 * @see U16_BACK_1_UNSAFE
641 * @stable ICU 2.4
642 */
643#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
644 if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
645 --(i); \
646 } \
647} UPRV_BLOCK_MACRO_END
648
649/**
650 * Move the string offset from one code point boundary to the n-th one before it,
651 * i.e., move backward by n code points.
652 * (Pre-decrementing backward iteration.)
653 * The input offset may be the same as the string length.
654 * "Unsafe" macro, assumes well-formed UTF-16.
655 *
656 * @param s const UChar * string
657 * @param i string offset
658 * @param n number of code points to skip
659 * @see U16_BACK_N
660 * @stable ICU 2.4
661 */
662#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
663 int32_t __N=(n); \
664 while(__N>0) { \
665 U16_BACK_1_UNSAFE(s, i); \
666 --__N; \
667 } \
668} UPRV_BLOCK_MACRO_END
669
670/**
671 * Move the string offset from one code point boundary to the n-th one before it,
672 * i.e., move backward by n code points.
673 * (Pre-decrementing backward iteration.)
674 * The input offset may be the same as the string length.
675 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
676 *
677 * @param s const UChar * string
678 * @param start start of string
679 * @param i string offset, must be start<i
680 * @param n number of code points to skip
681 * @see U16_BACK_N_UNSAFE
682 * @stable ICU 2.4
683 */
684#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
685 int32_t __N=(n); \
686 while(__N>0 && (i)>(start)) { \
687 U16_BACK_1(s, start, i); \
688 --__N; \
689 } \
690} UPRV_BLOCK_MACRO_END
691
692/**
693 * Adjust a random-access offset to a code point boundary after a code point.
694 * If the offset is behind the lead surrogate of a surrogate pair,
695 * then the offset is incremented.
696 * Otherwise, it is not modified.
697 * The input offset may be the same as the string length.
698 * "Unsafe" macro, assumes well-formed UTF-16.
699 *
700 * @param s const UChar * string
701 * @param i string offset
702 * @see U16_SET_CP_LIMIT
703 * @stable ICU 2.4
704 */
705#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
706 if(U16_IS_LEAD((s)[(i)-1])) { \
707 ++(i); \
708 } \
709} UPRV_BLOCK_MACRO_END
710
711/**
712 * Adjust a random-access offset to a code point boundary after a code point.
713 * If the offset is behind the lead surrogate of a surrogate pair,
714 * then the offset is incremented.
715 * Otherwise, it is not modified.
716 * The input offset may be the same as the string length.
717 * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
718 *
719 * The length can be negative for a NUL-terminated string.
720 *
721 * @param s const UChar * string
722 * @param start int32_t starting string offset (usually 0)
723 * @param i int32_t string offset, start<=i<=length
724 * @param length int32_t string length
725 * @see U16_SET_CP_LIMIT_UNSAFE
726 * @stable ICU 2.4
727 */
728#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
729 if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
730 ++(i); \
731 } \
732} UPRV_BLOCK_MACRO_END
733
734#endif
735

source code of include/unicode/utf16.h