1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies). |
4 | ** Contact: http://www.qt-project.org/legal |
5 | ** |
6 | ** This file is part of the QtCore module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and Digia. For licensing terms and |
14 | ** conditions see http://qt.digia.com/licensing. For further information |
15 | ** use the contact form at http://qt.digia.com/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 2.1 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 2.1 requirements |
23 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
24 | ** |
25 | ** In addition, as a special exception, Digia gives you certain additional |
26 | ** rights. These rights are described in the Digia Qt LGPL Exception |
27 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
28 | ** |
29 | ** GNU General Public License Usage |
30 | ** Alternatively, this file may be used under the terms of the GNU |
31 | ** General Public License version 3.0 as published by the Free Software |
32 | ** Foundation and appearing in the file LICENSE.GPL included in the |
33 | ** packaging of this file. Please review the following information to |
34 | ** ensure the GNU General Public License version 3.0 requirements will be |
35 | ** met: http://www.gnu.org/copyleft/gpl.html. |
36 | ** |
37 | ** |
38 | ** $QT_END_LICENSE$ |
39 | ** |
40 | ****************************************************************************/ |
41 | |
42 | // Don't define it while compiling this module, or USERS of Qt will |
43 | // not be able to link. |
44 | #ifdef QT_NO_CAST_FROM_ASCII |
45 | # undef QT_NO_CAST_FROM_ASCII |
46 | #endif |
47 | #ifdef QT_NO_CAST_TO_ASCII |
48 | # undef QT_NO_CAST_TO_ASCII |
49 | #endif |
50 | #include "qchar.h" |
51 | |
52 | #include "qdatastream.h" |
53 | #include "qtextcodec.h" |
54 | |
55 | #include "qunicodetables_p.h" |
56 | #include "qunicodetables.cpp" |
57 | |
58 | QT_BEGIN_NAMESPACE |
59 | |
60 | #ifndef QT_NO_CODEC_FOR_C_STRINGS |
61 | # ifdef QT_NO_TEXTCODEC |
62 | # define QT_NO_CODEC_FOR_C_STRINGS |
63 | # endif |
64 | #endif |
65 | |
66 | #define FLAG(x) (1 << (x)) |
67 | |
68 | /*! |
69 | \class QLatin1Char |
70 | \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character. |
71 | |
72 | \ingroup string-processing |
73 | |
74 | This class is only useful to avoid the codec for C strings business |
75 | in the QChar(ch) constructor. You can avoid it by writing |
76 | QChar(ch, 0). |
77 | |
78 | \sa QChar, QLatin1String, QString |
79 | */ |
80 | |
81 | /*! |
82 | \fn const char QLatin1Char::toLatin1() const |
83 | |
84 | Converts a Latin-1 character to an 8-bit ASCII representation of |
85 | the character. |
86 | */ |
87 | |
88 | /*! |
89 | \fn const ushort QLatin1Char::unicode() const |
90 | |
91 | Converts a Latin-1 character to an 16-bit-encoded Unicode representation |
92 | of the character. |
93 | */ |
94 | |
95 | /*! |
96 | \fn QLatin1Char::QLatin1Char(char c) |
97 | |
98 | Constructs a Latin-1 character for \a c. This constructor should be |
99 | used when the encoding of the input character is known to be Latin-1. |
100 | */ |
101 | |
102 | /*! |
103 | \class QChar |
104 | \brief The QChar class provides a 16-bit Unicode character. |
105 | |
106 | \ingroup string-processing |
107 | \reentrant |
108 | |
109 | In Qt, Unicode characters are 16-bit entities without any markup |
110 | or structure. This class represents such an entity. It is |
111 | lightweight, so it can be used everywhere. Most compilers treat |
112 | it like a \c{unsigned short}. |
113 | |
114 | QChar provides a full complement of testing/classification |
115 | functions, converting to and from other formats, converting from |
116 | composed to decomposed Unicode, and trying to compare and |
117 | case-convert if you ask it to. |
118 | |
119 | The classification functions include functions like those in the |
120 | standard C++ header \<cctype\> (formerly \<ctype.h\>), but |
121 | operating on the full range of Unicode characters. They all |
122 | return true if the character is a certain type of character; |
123 | otherwise they return false. These classification functions are |
124 | isNull() (returns true if the character is '\\0'), isPrint() |
125 | (true if the character is any sort of printable character, |
126 | including whitespace), isPunct() (any sort of punctation), |
127 | isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any |
128 | sort of numeric character, not just 0-9), isLetterOrNumber(), and |
129 | isDigit() (decimal digits). All of these are wrappers around |
130 | category() which return the Unicode-defined category of each |
131 | character. |
132 | |
133 | QChar also provides direction(), which indicates the "natural" |
134 | writing direction of this character. The joining() function |
135 | indicates how the character joins with its neighbors (needed |
136 | mostly for Arabic) and finally hasMirrored(), which indicates |
137 | whether the character needs to be mirrored when it is printed in |
138 | its "unnatural" writing direction. |
139 | |
140 | Composed Unicode characters (like \aring) can be converted to |
141 | decomposed Unicode ("a" followed by "ring above") by using |
142 | decomposition(). |
143 | |
144 | In Unicode, comparison is not necessarily possible and case |
145 | conversion is very difficult at best. Unicode, covering the |
146 | "entire" world, also includes most of the world's case and |
147 | sorting problems. operator==() and friends will do comparison |
148 | based purely on the numeric Unicode value (code point) of the |
149 | characters, and toUpper() and toLower() will do case changes when |
150 | the character has a well-defined uppercase/lowercase equivalent. |
151 | For locale-dependent comparisons, use |
152 | QString::localeAwareCompare(). |
153 | |
154 | The conversion functions include unicode() (to a scalar), |
155 | toLatin1() (to scalar, but converts all non-Latin-1 characters to |
156 | 0), row() (gives the Unicode row), cell() (gives the Unicode |
157 | cell), digitValue() (gives the integer value of any of the |
158 | numerous digit characters), and a host of constructors. |
159 | |
160 | QChar provides constructors and cast operators that make it easy |
161 | to convert to and from traditional 8-bit \c{char}s. If you |
162 | defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as |
163 | explained in the QString documentation, you will need to |
164 | explicitly call fromAscii() or fromLatin1(), or use QLatin1Char, |
165 | to construct a QChar from an 8-bit \c char, and you will need to |
166 | call toAscii() or toLatin1() to get the 8-bit value back. |
167 | |
168 | \sa QString, Unicode, QLatin1Char |
169 | */ |
170 | |
171 | /*! |
172 | \enum QChar::UnicodeVersion |
173 | |
174 | Specifies which version of the \l{http://www.unicode.org/}{Unicode standard} |
175 | introduced a certain character. |
176 | |
177 | \value Unicode_1_1 Version 1.1 |
178 | \value Unicode_2_0 Version 2.0 |
179 | \value Unicode_2_1_2 Version 2.1.2 |
180 | \value Unicode_3_0 Version 3.0 |
181 | \value Unicode_3_1 Version 3.1 |
182 | \value Unicode_3_2 Version 3.2 |
183 | \value Unicode_4_0 Version 4.0 |
184 | \value Unicode_4_1 Version 4.1 |
185 | \value Unicode_5_0 Version 5.0 |
186 | \value Unicode_Unassigned The value is not assigned to any character |
187 | in version 5.0 of Unicode. |
188 | |
189 | \sa unicodeVersion() |
190 | */ |
191 | |
192 | /*! |
193 | \enum QChar::Category |
194 | |
195 | This enum maps the Unicode character categories. |
196 | |
197 | The following characters are normative in Unicode: |
198 | |
199 | \value Mark_NonSpacing Unicode class name Mn |
200 | |
201 | \value Mark_SpacingCombining Unicode class name Mc |
202 | |
203 | \value Mark_Enclosing Unicode class name Me |
204 | |
205 | \value Number_DecimalDigit Unicode class name Nd |
206 | |
207 | \value Number_Letter Unicode class name Nl |
208 | |
209 | \value Number_Other Unicode class name No |
210 | |
211 | \value Separator_Space Unicode class name Zs |
212 | |
213 | \value Separator_Line Unicode class name Zl |
214 | |
215 | \value Separator_Paragraph Unicode class name Zp |
216 | |
217 | \value Other_Control Unicode class name Cc |
218 | |
219 | \value Other_Format Unicode class name Cf |
220 | |
221 | \value Other_Surrogate Unicode class name Cs |
222 | |
223 | \value Other_PrivateUse Unicode class name Co |
224 | |
225 | \value Other_NotAssigned Unicode class name Cn |
226 | |
227 | |
228 | The following categories are informative in Unicode: |
229 | |
230 | \value Letter_Uppercase Unicode class name Lu |
231 | |
232 | \value Letter_Lowercase Unicode class name Ll |
233 | |
234 | \value Letter_Titlecase Unicode class name Lt |
235 | |
236 | \value Letter_Modifier Unicode class name Lm |
237 | |
238 | \value Letter_Other Unicode class name Lo |
239 | |
240 | \value Punctuation_Connector Unicode class name Pc |
241 | |
242 | \value Punctuation_Dash Unicode class name Pd |
243 | |
244 | \value Punctuation_Open Unicode class name Ps |
245 | |
246 | \value Punctuation_Close Unicode class name Pe |
247 | |
248 | \value Punctuation_InitialQuote Unicode class name Pi |
249 | |
250 | \value Punctuation_FinalQuote Unicode class name Pf |
251 | |
252 | \value Punctuation_Other Unicode class name Po |
253 | |
254 | \value Symbol_Math Unicode class name Sm |
255 | |
256 | \value Symbol_Currency Unicode class name Sc |
257 | |
258 | \value Symbol_Modifier Unicode class name Sk |
259 | |
260 | \value Symbol_Other Unicode class name So |
261 | |
262 | \value NoCategory Qt cannot find an appropriate category for the character. |
263 | |
264 | \omitvalue Punctuation_Dask |
265 | |
266 | \sa category() |
267 | */ |
268 | |
269 | /*! |
270 | \enum QChar::Direction |
271 | |
272 | This enum type defines the Unicode direction attributes. See the |
273 | \l{http://www.unicode.org/}{Unicode Standard} for a description |
274 | of the values. |
275 | |
276 | In order to conform to C/C++ naming conventions "Dir" is prepended |
277 | to the codes used in the Unicode Standard. |
278 | |
279 | \value DirAL |
280 | \value DirAN |
281 | \value DirB |
282 | \value DirBN |
283 | \value DirCS |
284 | \value DirEN |
285 | \value DirES |
286 | \value DirET |
287 | \value DirL |
288 | \value DirLRE |
289 | \value DirLRO |
290 | \value DirNSM |
291 | \value DirON |
292 | \value DirPDF |
293 | \value DirR |
294 | \value DirRLE |
295 | \value DirRLO |
296 | \value DirS |
297 | \value DirWS |
298 | |
299 | \sa direction() |
300 | */ |
301 | |
302 | /*! |
303 | \enum QChar::Decomposition |
304 | |
305 | This enum type defines the Unicode decomposition attributes. See |
306 | the \l{http://www.unicode.org/}{Unicode Standard} for a |
307 | description of the values. |
308 | |
309 | \value NoDecomposition |
310 | \value Canonical |
311 | \value Circle |
312 | \value Compat |
313 | \value Final |
314 | \value Font |
315 | \value Fraction |
316 | \value Initial |
317 | \value Isolated |
318 | \value Medial |
319 | \value Narrow |
320 | \value NoBreak |
321 | \value Small |
322 | \value Square |
323 | \value Sub |
324 | \value Super |
325 | \value Vertical |
326 | \value Wide |
327 | |
328 | \omitvalue Single |
329 | |
330 | \sa decomposition() |
331 | */ |
332 | |
333 | /*! |
334 | \enum QChar::Joining |
335 | |
336 | This enum type defines the Unicode joining attributes. See the |
337 | \l{http://www.unicode.org/}{Unicode Standard} for a description |
338 | of the values. |
339 | |
340 | \value Center |
341 | \value Dual |
342 | \value OtherJoining |
343 | \value Right |
344 | |
345 | \sa joining() |
346 | */ |
347 | |
348 | /*! |
349 | \enum QChar::CombiningClass |
350 | |
351 | \internal |
352 | |
353 | This enum type defines names for some of the Unicode combining |
354 | classes. See the \l{http://www.unicode.org/}{Unicode Standard} |
355 | for a description of the values. |
356 | |
357 | \value Combining_Above |
358 | \value Combining_AboveAttached |
359 | \value Combining_AboveLeft |
360 | \value Combining_AboveLeftAttached |
361 | \value Combining_AboveRight |
362 | \value Combining_AboveRightAttached |
363 | \value Combining_Below |
364 | \value Combining_BelowAttached |
365 | \value Combining_BelowLeft |
366 | \value Combining_BelowLeftAttached |
367 | \value Combining_BelowRight |
368 | \value Combining_BelowRightAttached |
369 | \value Combining_DoubleAbove |
370 | \value Combining_DoubleBelow |
371 | \value Combining_IotaSubscript |
372 | \value Combining_Left |
373 | \value Combining_LeftAttached |
374 | \value Combining_Right |
375 | \value Combining_RightAttached |
376 | */ |
377 | |
378 | /*! |
379 | \enum QChar::SpecialCharacter |
380 | |
381 | \value Null A QChar with this value isNull(). |
382 | \value Nbsp Non-breaking space. |
383 | \value ReplacementCharacter The character shown when a font has no glyph |
384 | for a certain codepoint. A special question mark character is often |
385 | used. Codecs use this codepoint when input data cannot be |
386 | represented in Unicode. |
387 | \value ObjectReplacementCharacter Used to represent an object such as an |
388 | image when such objects cannot be presented. |
389 | \value ByteOrderMark |
390 | \value ByteOrderSwapped |
391 | \value ParagraphSeparator |
392 | \value LineSeparator |
393 | |
394 | \omitvalue null |
395 | \omitvalue replacement |
396 | \omitvalue byteOrderMark |
397 | \omitvalue byteOrderSwapped |
398 | \omitvalue nbsp |
399 | */ |
400 | |
401 | /*! |
402 | \fn void QChar::setCell(uchar cell) |
403 | \internal |
404 | */ |
405 | |
406 | /*! |
407 | \fn void QChar::setRow(uchar row) |
408 | \internal |
409 | */ |
410 | |
411 | /*! |
412 | \fn QChar::QChar() |
413 | |
414 | Constructs a null QChar ('\\0'). |
415 | |
416 | \sa isNull() |
417 | */ |
418 | |
419 | /*! |
420 | \fn QChar::QChar(QLatin1Char ch) |
421 | |
422 | Constructs a QChar corresponding to ASCII/Latin-1 character \a ch. |
423 | */ |
424 | |
425 | /*! |
426 | \fn QChar::QChar(SpecialCharacter ch) |
427 | |
428 | Constructs a QChar for the predefined character value \a ch. |
429 | */ |
430 | |
431 | /*! |
432 | Constructs a QChar corresponding to ASCII/Latin-1 character \a |
433 | ch. |
434 | */ |
435 | QChar::QChar(char ch) |
436 | { |
437 | #ifndef QT_NO_CODEC_FOR_C_STRINGS |
438 | if (QTextCodec::codecForCStrings()) |
439 | // ##### |
440 | ucs = QTextCodec::codecForCStrings()->toUnicode(&ch, 1).at(0).unicode(); |
441 | else |
442 | #endif |
443 | ucs = uchar(ch); |
444 | } |
445 | |
446 | /*! |
447 | Constructs a QChar corresponding to ASCII/Latin-1 character \a ch. |
448 | */ |
449 | QChar::QChar(uchar ch) |
450 | { |
451 | #ifndef QT_NO_CODEC_FOR_C_STRINGS |
452 | if (QTextCodec::codecForCStrings()) { |
453 | // ##### |
454 | char c = char(ch); |
455 | ucs = QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode(); |
456 | } else |
457 | #endif |
458 | ucs = ch; |
459 | } |
460 | |
461 | /*! |
462 | \fn QChar::QChar(uchar cell, uchar row) |
463 | |
464 | Constructs a QChar for Unicode cell \a cell in row \a row. |
465 | |
466 | \sa cell(), row() |
467 | */ |
468 | |
469 | /*! |
470 | \fn QChar::QChar(ushort code) |
471 | |
472 | Constructs a QChar for the character with Unicode code point \a |
473 | code. |
474 | */ |
475 | |
476 | |
477 | /*! |
478 | \fn QChar::QChar(short code) |
479 | |
480 | Constructs a QChar for the character with Unicode code point \a |
481 | code. |
482 | */ |
483 | |
484 | |
485 | /*! |
486 | \fn QChar::QChar(uint code) |
487 | |
488 | Constructs a QChar for the character with Unicode code point \a |
489 | code. |
490 | */ |
491 | |
492 | |
493 | /*! |
494 | \fn QChar::QChar(int code) |
495 | |
496 | Constructs a QChar for the character with Unicode code point \a |
497 | code. |
498 | */ |
499 | |
500 | |
501 | /*! |
502 | \fn bool QChar::isNull() const |
503 | |
504 | Returns true if the character is the Unicode character 0x0000 |
505 | ('\\0'); otherwise returns false. |
506 | */ |
507 | |
508 | /*! |
509 | \fn uchar QChar::cell() const |
510 | |
511 | Returns the cell (least significant byte) of the Unicode |
512 | character. |
513 | |
514 | \sa row() |
515 | */ |
516 | |
517 | /*! |
518 | \fn uchar QChar::row() const |
519 | |
520 | Returns the row (most significant byte) of the Unicode character. |
521 | |
522 | \sa cell() |
523 | */ |
524 | |
525 | /*! |
526 | Returns true if the character is a printable character; otherwise |
527 | returns false. This is any character not of category Cc or Cn. |
528 | |
529 | Note that this gives no indication of whether the character is |
530 | available in a particular font. |
531 | */ |
532 | bool QChar::isPrint() const |
533 | { |
534 | const int test = FLAG(Other_Control) | |
535 | FLAG(Other_NotAssigned); |
536 | return !(FLAG(qGetProp(ucs)->category) & test); |
537 | } |
538 | |
539 | /*! |
540 | Returns true if the character is a separator character |
541 | (Separator_* categories); otherwise returns false. |
542 | */ |
543 | bool QChar::isSpace() const |
544 | { |
545 | if(ucs >= 9 && ucs <=13) |
546 | return true; |
547 | const int test = FLAG(Separator_Space) | |
548 | FLAG(Separator_Line) | |
549 | FLAG(Separator_Paragraph); |
550 | return FLAG(qGetProp(ucs)->category) & test; |
551 | } |
552 | |
553 | /*! |
554 | Returns true if the character is a mark (Mark_* categories); |
555 | otherwise returns false. |
556 | |
557 | See QChar::Category for more information regarding marks. |
558 | */ |
559 | bool QChar::isMark() const |
560 | { |
561 | const int test = FLAG(Mark_NonSpacing) | |
562 | FLAG(Mark_SpacingCombining) | |
563 | FLAG(Mark_Enclosing); |
564 | return FLAG(qGetProp(ucs)->category) & test; |
565 | } |
566 | |
567 | /*! |
568 | Returns true if the character is a punctuation mark (Punctuation_* |
569 | categories); otherwise returns false. |
570 | */ |
571 | bool QChar::isPunct() const |
572 | { |
573 | const int test = FLAG(Punctuation_Connector) | |
574 | FLAG(Punctuation_Dash) | |
575 | FLAG(Punctuation_Open) | |
576 | FLAG(Punctuation_Close) | |
577 | FLAG(Punctuation_InitialQuote) | |
578 | FLAG(Punctuation_FinalQuote) | |
579 | FLAG(Punctuation_Other); |
580 | return FLAG(qGetProp(ucs)->category) & test; |
581 | } |
582 | |
583 | /*! |
584 | Returns true if the character is a letter (Letter_* categories); |
585 | otherwise returns false. |
586 | */ |
587 | bool QChar::isLetter() const |
588 | { |
589 | const int test = FLAG(Letter_Uppercase) | |
590 | FLAG(Letter_Lowercase) | |
591 | FLAG(Letter_Titlecase) | |
592 | FLAG(Letter_Modifier) | |
593 | FLAG(Letter_Other); |
594 | return FLAG(qGetProp(ucs)->category) & test; |
595 | } |
596 | |
597 | /*! |
598 | Returns true if the character is a number (Number_* categories, |
599 | not just 0-9); otherwise returns false. |
600 | |
601 | \sa isDigit() |
602 | */ |
603 | bool QChar::isNumber() const |
604 | { |
605 | const int test = FLAG(Number_DecimalDigit) | |
606 | FLAG(Number_Letter) | |
607 | FLAG(Number_Other); |
608 | return FLAG(qGetProp(ucs)->category) & test; |
609 | } |
610 | |
611 | /*! |
612 | Returns true if the character is a letter or number (Letter_* or |
613 | Number_* categories); otherwise returns false. |
614 | */ |
615 | bool QChar::isLetterOrNumber() const |
616 | { |
617 | const int test = FLAG(Letter_Uppercase) | |
618 | FLAG(Letter_Lowercase) | |
619 | FLAG(Letter_Titlecase) | |
620 | FLAG(Letter_Modifier) | |
621 | FLAG(Letter_Other) | |
622 | FLAG(Number_DecimalDigit) | |
623 | FLAG(Number_Letter) | |
624 | FLAG(Number_Other); |
625 | return FLAG(qGetProp(ucs)->category) & test; |
626 | } |
627 | |
628 | |
629 | /*! |
630 | Returns true if the character is a decimal digit |
631 | (Number_DecimalDigit); otherwise returns false. |
632 | */ |
633 | bool QChar::isDigit() const |
634 | { |
635 | return (qGetProp(ucs)->category == Number_DecimalDigit); |
636 | } |
637 | |
638 | |
639 | /*! |
640 | Returns true if the character is a symbol (Symbol_* categories); |
641 | otherwise returns false. |
642 | */ |
643 | bool QChar::isSymbol() const |
644 | { |
645 | const int test = FLAG(Symbol_Math) | |
646 | FLAG(Symbol_Currency) | |
647 | FLAG(Symbol_Modifier) | |
648 | FLAG(Symbol_Other); |
649 | return FLAG(qGetProp(ucs)->category) & test; |
650 | } |
651 | |
652 | /*! |
653 | \fn bool QChar::isHighSurrogate() const |
654 | |
655 | Returns true if the QChar is the high part of a utf16 surrogate |
656 | (ie. if its code point is between 0xd800 and 0xdbff, inclusive). |
657 | */ |
658 | |
659 | /*! |
660 | \fn bool QChar::isLowSurrogate() const |
661 | |
662 | Returns true if the QChar is the low part of a utf16 surrogate |
663 | (ie. if its code point is between 0xdc00 and 0xdfff, inclusive). |
664 | */ |
665 | |
666 | /*! |
667 | \fn static bool QChar::isHighSurrogate(uint ucs4) |
668 | \since 4.7 |
669 | |
670 | Returns true if the UCS-4-encoded character specified by \a ucs4 |
671 | is the high part of a utf16 surrogate |
672 | (ie. if its code point is between 0xd800 and 0xdbff, inclusive). |
673 | */ |
674 | |
675 | /*! |
676 | \fn static bool QChar::isLowSurrogate(uint ucs4) |
677 | \since 4.7 |
678 | |
679 | Returns true if the UCS-4-encoded character specified by \a ucs4 |
680 | is the low part of a utf16 surrogate |
681 | (ie. if its code point is between 0xdc00 and 0xdfff, inclusive). |
682 | */ |
683 | |
684 | /*! |
685 | \fn static bool QChar::requiresSurrogates(uint ucs4) |
686 | \since 4.7 |
687 | |
688 | Returns true if the UCS-4-encoded character specified by \a ucs4 |
689 | can be split into the high and low parts of a utf16 surrogate |
690 | (ie. if its code point is greater than or equals to 0x10000). |
691 | */ |
692 | |
693 | /*! |
694 | \fn static uint QChar::surrogateToUcs4(ushort high, ushort low) |
695 | |
696 | Converts a UTF16 surrogate pair with the given \a high and \a low values |
697 | to its UCS-4 code point. |
698 | */ |
699 | |
700 | /*! |
701 | \fn static uint QChar::surrogateToUcs4(QChar high, QChar low) |
702 | |
703 | Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code point. |
704 | */ |
705 | |
706 | /*! |
707 | \fn static ushort QChar::highSurrogate(uint ucs4) |
708 | |
709 | Returns the high surrogate value of a ucs4 code point. |
710 | The returned result is undefined if \a ucs4 is smaller than 0x10000. |
711 | */ |
712 | |
713 | /*! |
714 | \fn static ushort QChar::lowSurrogate(uint ucs4) |
715 | |
716 | Returns the low surrogate value of a ucs4 code point. |
717 | The returned result is undefined if \a ucs4 is smaller than 0x10000. |
718 | */ |
719 | |
720 | /*! |
721 | Returns the numeric value of the digit, or -1 if the character is |
722 | not a digit. |
723 | */ |
724 | int QChar::digitValue() const |
725 | { |
726 | return qGetProp(ucs)->digitValue; |
727 | } |
728 | |
729 | /*! |
730 | \overload |
731 | Returns the numeric value of the digit, specified by the UCS-2-encoded |
732 | character, \a ucs2, or -1 if the character is not a digit. |
733 | */ |
734 | int QChar::digitValue(ushort ucs2) |
735 | { |
736 | return qGetProp(ucs2)->digitValue; |
737 | } |
738 | |
739 | /*! |
740 | \overload |
741 | Returns the numeric value of the digit specified by the UCS-4-encoded |
742 | character, \a ucs4, or -1 if the character is not a digit. |
743 | */ |
744 | int QChar::digitValue(uint ucs4) |
745 | { |
746 | if (ucs4 > UNICODE_LAST_CODEPOINT) |
747 | return 0; |
748 | return qGetProp(ucs4)->digitValue; |
749 | } |
750 | |
751 | /*! |
752 | Returns the character's category. |
753 | */ |
754 | QChar::Category QChar::category() const |
755 | { |
756 | return (QChar::Category) qGetProp(ucs)->category; |
757 | } |
758 | |
759 | /*! |
760 | \overload |
761 | \since 4.3 |
762 | Returns the category of the UCS-4-encoded character specified by \a ucs4. |
763 | */ |
764 | QChar::Category QChar::category(uint ucs4) |
765 | { |
766 | if (ucs4 > UNICODE_LAST_CODEPOINT) |
767 | return QChar::NoCategory; |
768 | return (QChar::Category) qGetProp(ucs4)->category; |
769 | } |
770 | |
771 | /*! |
772 | \overload |
773 | Returns the category of the UCS-2-encoded character specified by \a ucs2. |
774 | */ |
775 | QChar::Category QChar::category(ushort ucs2) |
776 | { |
777 | return (QChar::Category) qGetProp(ucs2)->category; |
778 | } |
779 | |
780 | |
781 | /*! |
782 | Returns the character's direction. |
783 | */ |
784 | QChar::Direction QChar::direction() const |
785 | { |
786 | return (QChar::Direction) qGetProp(ucs)->direction; |
787 | } |
788 | |
789 | /*! |
790 | \overload |
791 | Returns the direction of the UCS-4-encoded character specified by \a ucs4. |
792 | */ |
793 | QChar::Direction QChar::direction(uint ucs4) |
794 | { |
795 | if (ucs4 > UNICODE_LAST_CODEPOINT) |
796 | return QChar::DirL; |
797 | return (QChar::Direction) qGetProp(ucs4)->direction; |
798 | } |
799 | |
800 | /*! |
801 | \overload |
802 | Returns the direction of the UCS-2-encoded character specified by \a ucs2. |
803 | */ |
804 | QChar::Direction QChar::direction(ushort ucs2) |
805 | { |
806 | return (QChar::Direction) qGetProp(ucs2)->direction; |
807 | } |
808 | |
809 | /*! |
810 | Returns information about the joining properties of the character |
811 | (needed for certain languages such as Arabic). |
812 | */ |
813 | QChar::Joining QChar::joining() const |
814 | { |
815 | return (QChar::Joining) qGetProp(ucs)->joining; |
816 | } |
817 | |
818 | /*! |
819 | \overload |
820 | Returns information about the joining properties of the UCS-4-encoded |
821 | character specified by \a ucs4 (needed for certain languages such as |
822 | Arabic). |
823 | */ |
824 | QChar::Joining QChar::joining(uint ucs4) |
825 | { |
826 | if (ucs4 > UNICODE_LAST_CODEPOINT) |
827 | return QChar::OtherJoining; |
828 | return (QChar::Joining) qGetProp(ucs4)->joining; |
829 | } |
830 | |
831 | /*! |
832 | \overload |
833 | Returns information about the joining properties of the UCS-2-encoded |
834 | character specified by \a ucs2 (needed for certain languages such as |
835 | Arabic). |
836 | */ |
837 | QChar::Joining QChar::joining(ushort ucs2) |
838 | { |
839 | return (QChar::Joining) qGetProp(ucs2)->joining; |
840 | } |
841 | |
842 | |
843 | /*! |
844 | Returns true if the character should be reversed if the text |
845 | direction is reversed; otherwise returns false. |
846 | |
847 | Same as (ch.mirroredChar() != ch). |
848 | |
849 | \sa mirroredChar() |
850 | */ |
851 | bool QChar::hasMirrored() const |
852 | { |
853 | return qGetProp(ucs)->mirrorDiff != 0; |
854 | } |
855 | |
856 | /*! |
857 | \fn bool QChar::isLower() const |
858 | |
859 | Returns true if the character is a lowercase letter, i.e. |
860 | category() is Letter_Lowercase. |
861 | |
862 | \sa isUpper(), toLower(), toUpper() |
863 | */ |
864 | |
865 | /*! |
866 | \fn bool QChar::isUpper() const |
867 | |
868 | Returns true if the character is an uppercase letter, i.e. |
869 | category() is Letter_Uppercase. |
870 | |
871 | \sa isLower(), toUpper(), toLower() |
872 | */ |
873 | |
874 | /*! |
875 | \fn bool QChar::isTitleCase() const |
876 | \since 4.3 |
877 | |
878 | Returns true if the character is a titlecase letter, i.e. |
879 | category() is Letter_Titlecase. |
880 | |
881 | \sa isLower(), toUpper(), toLower(), toTitleCase() |
882 | */ |
883 | |
884 | /*! |
885 | Returns the mirrored character if this character is a mirrored |
886 | character; otherwise returns the character itself. |
887 | |
888 | \sa hasMirrored() |
889 | */ |
890 | QChar QChar::mirroredChar() const |
891 | { |
892 | return ucs + qGetProp(ucs)->mirrorDiff; |
893 | } |
894 | |
895 | /*! |
896 | \overload |
897 | Returns the mirrored character if the UCS-4-encoded character specified |
898 | by \a ucs4 is a mirrored character; otherwise returns the character itself. |
899 | |
900 | \sa hasMirrored() |
901 | */ |
902 | uint QChar::mirroredChar(uint ucs4) |
903 | { |
904 | if (ucs4 > UNICODE_LAST_CODEPOINT) |
905 | return ucs4; |
906 | return ucs4 + qGetProp(ucs4)->mirrorDiff; |
907 | } |
908 | |
909 | /*! |
910 | \overload |
911 | Returns the mirrored character if the UCS-2-encoded character specified |
912 | by \a ucs2 is a mirrored character; otherwise returns the character itself. |
913 | |
914 | \sa hasMirrored() |
915 | */ |
916 | ushort QChar::mirroredChar(ushort ucs2) |
917 | { |
918 | return ucs2 + qGetProp(ucs2)->mirrorDiff; |
919 | } |
920 | |
921 | |
922 | enum { |
923 | Hangul_SBase = 0xac00, |
924 | Hangul_LBase = 0x1100, |
925 | Hangul_VBase = 0x1161, |
926 | Hangul_TBase = 0x11a7, |
927 | Hangul_SCount = 11172, |
928 | Hangul_LCount = 19, |
929 | Hangul_VCount = 21, |
930 | Hangul_TCount = 28, |
931 | Hangul_NCount = 21*28 |
932 | }; |
933 | |
934 | // buffer has to have a length of 3. It's needed for Hangul decomposition |
935 | static const unsigned short * QT_FASTCALL decompositionHelper |
936 | (uint ucs4, int *length, int *tag, unsigned short *buffer) |
937 | { |
938 | *length = 0; |
939 | if (ucs4 > UNICODE_LAST_CODEPOINT) |
940 | return 0; |
941 | if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) { |
942 | int SIndex = ucs4 - Hangul_SBase; |
943 | buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L |
944 | buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V |
945 | buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T |
946 | *length = buffer[2] == Hangul_TBase ? 2 : 3; |
947 | *tag = QChar::Canonical; |
948 | return buffer; |
949 | } |
950 | |
951 | const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4); |
952 | if (index == 0xffff) |
953 | return 0; |
954 | const unsigned short *decomposition = uc_decomposition_map+index; |
955 | *tag = (*decomposition) & 0xff; |
956 | *length = (*decomposition) >> 8; |
957 | return decomposition+1; |
958 | } |
959 | |
960 | /*! |
961 | Decomposes a character into its parts. Returns an empty string if |
962 | no decomposition exists. |
963 | */ |
964 | QString QChar::decomposition() const |
965 | { |
966 | return decomposition(ucs); |
967 | } |
968 | |
969 | /*! |
970 | \overload |
971 | Decomposes the UCS-4-encoded character specified by \a ucs4 into its |
972 | constituent parts. Returns an empty string if no decomposition exists. |
973 | */ |
974 | QString QChar::decomposition(uint ucs4) |
975 | { |
976 | unsigned short buffer[3]; |
977 | int length; |
978 | int tag; |
979 | const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer); |
980 | return QString::fromUtf16(d, length); |
981 | } |
982 | |
983 | /*! |
984 | Returns the tag defining the composition of the character. Returns |
985 | QChar::Single if no decomposition exists. |
986 | */ |
987 | QChar::Decomposition QChar::decompositionTag() const |
988 | { |
989 | return decompositionTag(ucs); |
990 | } |
991 | |
992 | /*! |
993 | \overload |
994 | Returns the tag defining the composition of the UCS-4-encoded character |
995 | specified by \a ucs4. Returns QChar::Single if no decomposition exists. |
996 | */ |
997 | QChar::Decomposition QChar::decompositionTag(uint ucs4) |
998 | { |
999 | if (ucs4 > UNICODE_LAST_CODEPOINT) |
1000 | return QChar::NoDecomposition; |
1001 | const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4); |
1002 | if (index == 0xffff) |
1003 | return QChar::NoDecomposition; |
1004 | return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff); |
1005 | } |
1006 | |
1007 | /*! |
1008 | Returns the combining class for the character as defined in the |
1009 | Unicode standard. This is mainly useful as a positioning hint for |
1010 | marks attached to a base character. |
1011 | |
1012 | The Qt text rendering engine uses this information to correctly |
1013 | position non-spacing marks around a base character. |
1014 | */ |
1015 | unsigned char QChar::combiningClass() const |
1016 | { |
1017 | return (unsigned char) qGetProp(ucs)->combiningClass; |
1018 | } |
1019 | |
1020 | /*! |
1021 | \overload |
1022 | Returns the combining class for the UCS-4-encoded character specified by |
1023 | \a ucs4, as defined in the Unicode standard. |
1024 | */ |
1025 | unsigned char QChar::combiningClass(uint ucs4) |
1026 | { |
1027 | if (ucs4 > UNICODE_LAST_CODEPOINT) |
1028 | return 0; |
1029 | return (unsigned char) qGetProp(ucs4)->combiningClass; |
1030 | } |
1031 | |
1032 | /*! |
1033 | \overload |
1034 | Returns the combining class for the UCS-2-encoded character specified by |
1035 | \a ucs2, as defined in the Unicode standard. |
1036 | */ |
1037 | unsigned char QChar::combiningClass(ushort ucs2) |
1038 | { |
1039 | return (unsigned char) qGetProp(ucs2)->combiningClass; |
1040 | } |
1041 | |
1042 | /*! |
1043 | Returns the Unicode version that introduced this character. |
1044 | */ |
1045 | QChar::UnicodeVersion QChar::unicodeVersion() const |
1046 | { |
1047 | return (QChar::UnicodeVersion) qGetProp(ucs)->unicodeVersion; |
1048 | } |
1049 | |
1050 | /*! |
1051 | \overload |
1052 | Returns the Unicode version that introduced the character specified in |
1053 | its UCS-4-encoded form as \a ucs4. |
1054 | */ |
1055 | QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4) |
1056 | { |
1057 | if (ucs4 > UNICODE_LAST_CODEPOINT) |
1058 | return QChar::Unicode_Unassigned; |
1059 | return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion; |
1060 | } |
1061 | |
1062 | /*! |
1063 | \overload |
1064 | Returns the Unicode version that introduced the character specified in |
1065 | its UCS-2-encoded form as \a ucs2. |
1066 | */ |
1067 | QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2) |
1068 | { |
1069 | return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion; |
1070 | } |
1071 | |
1072 | /*! |
1073 | \since 4.8 |
1074 | |
1075 | Returns the most recent supported Unicode version. |
1076 | */ |
1077 | QChar::UnicodeVersion QChar::currentUnicodeVersion() |
1078 | { |
1079 | return UNICODE_DATA_VERSION; |
1080 | } |
1081 | |
1082 | /*! |
1083 | Returns the lowercase equivalent if the character is uppercase or titlecase; |
1084 | otherwise returns the character itself. |
1085 | */ |
1086 | QChar QChar::toLower() const |
1087 | { |
1088 | const QUnicodeTables::Properties *p = qGetProp(ucs); |
1089 | if (!p->lowerCaseSpecial) |
1090 | return ucs + p->lowerCaseDiff; |
1091 | return ucs; |
1092 | } |
1093 | |
1094 | /*! |
1095 | \overload |
1096 | Returns the lowercase equivalent of the UCS-4-encoded character specified |
1097 | by \a ucs4 if the character is uppercase or titlecase; otherwise returns |
1098 | the character itself. |
1099 | */ |
1100 | uint QChar::toLower(uint ucs4) |
1101 | { |
1102 | if (ucs4 > UNICODE_LAST_CODEPOINT) |
1103 | return ucs4; |
1104 | const QUnicodeTables::Properties *p = qGetProp(ucs4); |
1105 | if (!p->lowerCaseSpecial) |
1106 | return ucs4 + p->lowerCaseDiff; |
1107 | return ucs4; |
1108 | } |
1109 | |
1110 | /*! |
1111 | \overload |
1112 | Returns the lowercase equivalent of the UCS-2-encoded character specified |
1113 | by \a ucs2 if the character is uppercase or titlecase; otherwise returns |
1114 | the character itself. |
1115 | */ |
1116 | ushort QChar::toLower(ushort ucs2) |
1117 | { |
1118 | const QUnicodeTables::Properties *p = qGetProp(ucs2); |
1119 | if (!p->lowerCaseSpecial) |
1120 | return ucs2 + p->lowerCaseDiff; |
1121 | return ucs2; |
1122 | } |
1123 | |
1124 | /*! |
1125 | Returns the uppercase equivalent if the character is lowercase or titlecase; |
1126 | otherwise returns the character itself. |
1127 | */ |
1128 | QChar QChar::toUpper() const |
1129 | { |
1130 | const QUnicodeTables::Properties *p = qGetProp(ucs); |
1131 | if (!p->upperCaseSpecial) |
1132 | return ucs + p->upperCaseDiff; |
1133 | return ucs; |
1134 | } |
1135 | |
1136 | /*! |
1137 | \overload |
1138 | Returns the uppercase equivalent of the UCS-4-encoded character specified |
1139 | by \a ucs4 if the character is lowercase or titlecase; otherwise returns |
1140 | the character itself. |
1141 | */ |
1142 | uint QChar::toUpper(uint ucs4) |
1143 | { |
1144 | if (ucs4 > UNICODE_LAST_CODEPOINT) |
1145 | return ucs4; |
1146 | const QUnicodeTables::Properties *p = qGetProp(ucs4); |
1147 | if (!p->upperCaseSpecial) |
1148 | return ucs4 + p->upperCaseDiff; |
1149 | return ucs4; |
1150 | } |
1151 | |
1152 | /*! |
1153 | \overload |
1154 | Returns the uppercase equivalent of the UCS-2-encoded character specified |
1155 | by \a ucs2 if the character is lowercase or titlecase; otherwise returns |
1156 | the character itself. |
1157 | */ |
1158 | ushort QChar::toUpper(ushort ucs2) |
1159 | { |
1160 | const QUnicodeTables::Properties *p = qGetProp(ucs2); |
1161 | if (!p->upperCaseSpecial) |
1162 | return ucs2 + p->upperCaseDiff; |
1163 | return ucs2; |
1164 | } |
1165 | |
1166 | /*! |
1167 | Returns the title case equivalent if the character is lowercase or uppercase; |
1168 | otherwise returns the character itself. |
1169 | */ |
1170 | QChar QChar::toTitleCase() const |
1171 | { |
1172 | const QUnicodeTables::Properties *p = qGetProp(ucs); |
1173 | if (!p->titleCaseSpecial) |
1174 | return ucs + p->titleCaseDiff; |
1175 | return ucs; |
1176 | } |
1177 | |
1178 | /*! |
1179 | \overload |
1180 | Returns the title case equivalent of the UCS-4-encoded character specified |
1181 | by \a ucs4 if the character is lowercase or uppercase; otherwise returns |
1182 | the character itself. |
1183 | */ |
1184 | uint QChar::toTitleCase(uint ucs4) |
1185 | { |
1186 | if (ucs4 > UNICODE_LAST_CODEPOINT) |
1187 | return ucs4; |
1188 | const QUnicodeTables::Properties *p = qGetProp(ucs4); |
1189 | if (!p->titleCaseSpecial) |
1190 | return ucs4 + p->titleCaseDiff; |
1191 | return ucs4; |
1192 | } |
1193 | |
1194 | /*! |
1195 | \overload |
1196 | Returns the title case equivalent of the UCS-2-encoded character specified |
1197 | by \a ucs2 if the character is lowercase or uppercase; otherwise returns |
1198 | the character itself. |
1199 | */ |
1200 | ushort QChar::toTitleCase(ushort ucs2) |
1201 | { |
1202 | const QUnicodeTables::Properties *p = qGetProp(ucs2); |
1203 | if (!p->titleCaseSpecial) |
1204 | return ucs2 + p->titleCaseDiff; |
1205 | return ucs2; |
1206 | } |
1207 | |
1208 | |
1209 | static inline uint foldCase(const ushort *ch, const ushort *start) |
1210 | { |
1211 | uint c = *ch; |
1212 | if (QChar(c).isLowSurrogate() && ch > start && QChar(*(ch - 1)).isHighSurrogate()) |
1213 | c = QChar::surrogateToUcs4(*(ch - 1), c); |
1214 | return *ch + qGetProp(c)->caseFoldDiff; |
1215 | } |
1216 | |
1217 | static inline uint foldCase(uint ch, uint &last) |
1218 | { |
1219 | uint c = ch; |
1220 | if (QChar(c).isLowSurrogate() && QChar(last).isHighSurrogate()) |
1221 | c = QChar::surrogateToUcs4(last, c); |
1222 | last = ch; |
1223 | return ch + qGetProp(c)->caseFoldDiff; |
1224 | } |
1225 | |
1226 | static inline ushort foldCase(ushort ch) |
1227 | { |
1228 | return ch + qGetProp(ch)->caseFoldDiff; |
1229 | } |
1230 | |
1231 | /*! |
1232 | Returns the case folded equivalent of the character. For most Unicode characters this |
1233 | is the same as toLowerCase(). |
1234 | */ |
1235 | QChar QChar::toCaseFolded() const |
1236 | { |
1237 | return ucs + qGetProp(ucs)->caseFoldDiff; |
1238 | } |
1239 | |
1240 | /*! |
1241 | \overload |
1242 | Returns the case folded equivalent of the UCS-4-encoded character specified |
1243 | by \a ucs4. For most Unicode characters this is the same as toLowerCase(). |
1244 | */ |
1245 | uint QChar::toCaseFolded(uint ucs4) |
1246 | { |
1247 | if (ucs4 > UNICODE_LAST_CODEPOINT) |
1248 | return ucs4; |
1249 | return ucs4 + qGetProp(ucs4)->caseFoldDiff; |
1250 | } |
1251 | |
1252 | /*! |
1253 | \overload |
1254 | Returns the case folded equivalent of the UCS-2-encoded character specified |
1255 | by \a ucs2. For most Unicode characters this is the same as toLowerCase(). |
1256 | */ |
1257 | ushort QChar::toCaseFolded(ushort ucs2) |
1258 | { |
1259 | return ucs2 + qGetProp(ucs2)->caseFoldDiff; |
1260 | } |
1261 | |
1262 | |
1263 | /*! |
1264 | \fn char QChar::latin1() const |
1265 | |
1266 | Use toLatin1() instead. |
1267 | */ |
1268 | |
1269 | /*! |
1270 | \fn char QChar::ascii() const |
1271 | |
1272 | Use toAscii() instead. |
1273 | */ |
1274 | |
1275 | /*! |
1276 | \fn char QChar::toLatin1() const |
1277 | |
1278 | Returns the Latin-1 character equivalent to the QChar, or 0. This |
1279 | is mainly useful for non-internationalized software. |
1280 | |
1281 | \sa toAscii(), unicode(), QTextCodec::codecForCStrings() |
1282 | */ |
1283 | |
1284 | /*! |
1285 | \fn char QChar::toAscii() const |
1286 | Returns the character value of the QChar obtained using the current |
1287 | codec used to read C strings, or 0 if the character is not representable |
1288 | using this codec. The default codec handles Latin-1 encoded text, |
1289 | but this can be changed to assist developers writing source code using |
1290 | other encodings. |
1291 | |
1292 | The main purpose of this function is to preserve ASCII characters used |
1293 | in C strings. This is mainly useful for developers of non-internationalized |
1294 | software. |
1295 | |
1296 | \sa toLatin1(), unicode(), QTextCodec::codecForCStrings() |
1297 | */ |
1298 | #ifdef Q_COMPILER_MANGLES_RETURN_TYPE |
1299 | const char QChar::toAscii() const |
1300 | #else |
1301 | char QChar::toAscii() const |
1302 | #endif |
1303 | { |
1304 | #ifndef QT_NO_CODEC_FOR_C_STRINGS |
1305 | if (QTextCodec::codecForCStrings()) |
1306 | // ##### |
1307 | return QTextCodec::codecForCStrings()->fromUnicode(QString(*this)).at(0); |
1308 | #endif |
1309 | return ucs > 0xff ? 0 : char(ucs); |
1310 | } |
1311 | |
1312 | /*! |
1313 | \fn QChar QChar::fromLatin1(char c) |
1314 | |
1315 | Converts the Latin-1 character \a c to its equivalent QChar. This |
1316 | is mainly useful for non-internationalized software. |
1317 | |
1318 | \sa fromAscii(), unicode(), QTextCodec::codecForCStrings() |
1319 | */ |
1320 | |
1321 | /*! |
1322 | Converts the ASCII character \a c to its equivalent QChar. This |
1323 | is mainly useful for non-internationalized software. |
1324 | |
1325 | An alternative is to use QLatin1Char. |
1326 | |
1327 | \sa fromLatin1(), unicode(), QTextCodec::codecForCStrings() |
1328 | */ |
1329 | QChar QChar::fromAscii(char c) |
1330 | { |
1331 | #ifndef QT_NO_CODEC_FOR_C_STRINGS |
1332 | if (QTextCodec::codecForCStrings()) |
1333 | // ##### |
1334 | return QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode(); |
1335 | #endif |
1336 | return QChar(ushort((uchar)c)); |
1337 | } |
1338 | |
1339 | #ifndef QT_NO_DATASTREAM |
1340 | /*! |
1341 | \relates QChar |
1342 | |
1343 | Writes the char \a chr to the stream \a out. |
1344 | |
1345 | \sa {Serializing Qt Data Types} |
1346 | */ |
1347 | QDataStream &operator<<(QDataStream &out, const QChar &chr) |
1348 | { |
1349 | out << quint16(chr.unicode()); |
1350 | return out; |
1351 | } |
1352 | |
1353 | /*! |
1354 | \relates QChar |
1355 | |
1356 | Reads a char from the stream \a in into char \a chr. |
1357 | |
1358 | \sa {Serializing Qt Data Types} |
1359 | */ |
1360 | QDataStream &operator>>(QDataStream &in, QChar &chr) |
1361 | { |
1362 | quint16 u; |
1363 | in >> u; |
1364 | chr.unicode() = ushort(u); |
1365 | return in; |
1366 | } |
1367 | #endif // QT_NO_DATASTREAM |
1368 | |
1369 | /*! |
1370 | \fn ushort & QChar::unicode() |
1371 | |
1372 | Returns a reference to the numeric Unicode value of the QChar. |
1373 | */ |
1374 | |
1375 | /*! |
1376 | \fn ushort QChar::unicode() const |
1377 | |
1378 | \overload |
1379 | */ |
1380 | |
1381 | /***************************************************************************** |
1382 | Documentation of QChar related functions |
1383 | *****************************************************************************/ |
1384 | |
1385 | /*! |
1386 | \fn bool operator==(QChar c1, QChar c2) |
1387 | |
1388 | \relates QChar |
1389 | |
1390 | Returns true if \a c1 and \a c2 are the same Unicode character; |
1391 | otherwise returns false. |
1392 | */ |
1393 | |
1394 | /*! |
1395 | \fn int operator!=(QChar c1, QChar c2) |
1396 | |
1397 | \relates QChar |
1398 | |
1399 | Returns true if \a c1 and \a c2 are not the same Unicode |
1400 | character; otherwise returns false. |
1401 | */ |
1402 | |
1403 | /*! |
1404 | \fn int operator<=(QChar c1, QChar c2) |
1405 | |
1406 | \relates QChar |
1407 | |
1408 | Returns true if the numeric Unicode value of \a c1 is less than |
1409 | or equal to that of \a c2; otherwise returns false. |
1410 | */ |
1411 | |
1412 | /*! |
1413 | \fn int operator>=(QChar c1, QChar c2) |
1414 | |
1415 | \relates QChar |
1416 | |
1417 | Returns true if the numeric Unicode value of \a c1 is greater than |
1418 | or equal to that of \a c2; otherwise returns false. |
1419 | */ |
1420 | |
1421 | /*! |
1422 | \fn int operator<(QChar c1, QChar c2) |
1423 | |
1424 | \relates QChar |
1425 | |
1426 | Returns true if the numeric Unicode value of \a c1 is less than |
1427 | that of \a c2; otherwise returns false. |
1428 | */ |
1429 | |
1430 | /*! |
1431 | \fn int operator>(QChar c1, QChar c2) |
1432 | |
1433 | \relates QChar |
1434 | |
1435 | Returns true if the numeric Unicode value of \a c1 is greater than |
1436 | that of \a c2; otherwise returns false. |
1437 | */ |
1438 | |
1439 | /*! |
1440 | \fn bool QChar::mirrored() const |
1441 | |
1442 | Use hasMirrored() instead. |
1443 | */ |
1444 | |
1445 | /*! |
1446 | \fn QChar QChar::lower() const |
1447 | |
1448 | Use toLower() instead. |
1449 | */ |
1450 | |
1451 | /*! |
1452 | \fn QChar QChar::upper() const |
1453 | |
1454 | Use toUpper() instead. |
1455 | */ |
1456 | |
1457 | /*! |
1458 | \fn bool QChar::networkOrdered() |
1459 | |
1460 | See if QSysInfo::ByteOrder == QSysInfo::BigEndian instead. |
1461 | */ |
1462 | |
1463 | |
1464 | // --------------------------------------------------------------------------- |
1465 | |
1466 | |
1467 | static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from) |
1468 | { |
1469 | unsigned short buffer[3]; |
1470 | |
1471 | QString &s = *str; |
1472 | |
1473 | const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data()); |
1474 | const unsigned short *uc = utf16 + s.length(); |
1475 | while (uc != utf16 + from) { |
1476 | uint ucs4 = *(--uc); |
1477 | if (QChar(ucs4).isLowSurrogate() && uc != utf16) { |
1478 | ushort high = *(uc - 1); |
1479 | if (QChar(high).isHighSurrogate()) { |
1480 | --uc; |
1481 | ucs4 = QChar::surrogateToUcs4(high, ucs4); |
1482 | } |
1483 | } |
1484 | QChar::UnicodeVersion v = QChar::unicodeVersion(ucs4); |
1485 | if (v == QChar::Unicode_Unassigned || v > version) |
1486 | continue; |
1487 | int length; |
1488 | int tag; |
1489 | const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer); |
1490 | if (!d || (canonical && tag != QChar::Canonical)) |
1491 | continue; |
1492 | |
1493 | int pos = uc - utf16; |
1494 | s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length); |
1495 | // since the insert invalidates the pointers and we do decomposition recursive |
1496 | utf16 = reinterpret_cast<unsigned short *>(s.data()); |
1497 | uc = utf16 + pos + length; |
1498 | } |
1499 | } |
1500 | |
1501 | |
1502 | struct UCS2Pair { |
1503 | ushort u1; |
1504 | ushort u2; |
1505 | }; |
1506 | |
1507 | inline bool operator<(ushort u1, const UCS2Pair &ligature) |
1508 | { return u1 < ligature.u1; } |
1509 | inline bool operator<(const UCS2Pair &ligature, ushort u1) |
1510 | { return ligature.u1 < u1; } |
1511 | |
1512 | static ushort ligatureHelper(ushort u1, ushort u2) |
1513 | { |
1514 | // hangul L-V pair |
1515 | int LIndex = u1 - Hangul_LBase; |
1516 | if (0 <= LIndex && LIndex < Hangul_LCount) { |
1517 | int VIndex = u2 - Hangul_VBase; |
1518 | if (0 <= VIndex && VIndex < Hangul_VCount) |
1519 | return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount; |
1520 | } |
1521 | |
1522 | // hangul LV-T pair |
1523 | int SIndex = u1 - Hangul_SBase; |
1524 | if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) { |
1525 | int TIndex = u2 - Hangul_TBase; |
1526 | if (0 <= TIndex && TIndex <= Hangul_TCount) |
1527 | return u1 + TIndex; |
1528 | } |
1529 | |
1530 | const unsigned short index = GET_LIGATURE_INDEX(u2); |
1531 | if (index == 0xffff) |
1532 | return 0; |
1533 | const unsigned short *ligatures = uc_ligature_map+index; |
1534 | ushort length = *ligatures++; |
1535 | { |
1536 | const UCS2Pair *data = reinterpret_cast<const UCS2Pair *>(ligatures); |
1537 | const UCS2Pair *r = qBinaryFind(data, data + length, u1); |
1538 | if (r != data + length) |
1539 | return r->u2; |
1540 | } |
1541 | |
1542 | return 0; |
1543 | } |
1544 | |
1545 | static void composeHelper(QString *str, QChar::UnicodeVersion version, int from) |
1546 | { |
1547 | QString &s = *str; |
1548 | |
1549 | if (from < 0 || s.length() - from < 2) |
1550 | return; |
1551 | |
1552 | // the loop can partly ignore high Unicode as all ligatures are in the BMP |
1553 | int starter = -2; // to prevent starter == pos - 1 |
1554 | int lastCombining = 255; // to prevent combining > lastCombining |
1555 | int pos = from; |
1556 | while (pos < s.length()) { |
1557 | uint uc = s.at(pos).unicode(); |
1558 | if (QChar(uc).isHighSurrogate() && pos < s.length()-1) { |
1559 | ushort low = s.at(pos+1).unicode(); |
1560 | if (QChar(low).isLowSurrogate()) { |
1561 | uc = QChar::surrogateToUcs4(uc, low); |
1562 | ++pos; |
1563 | } |
1564 | } |
1565 | const QUnicodeTables::Properties *p = qGetProp(uc); |
1566 | if (p->unicodeVersion == QChar::Unicode_Unassigned || p->unicodeVersion > version) { |
1567 | starter = -1; // to prevent starter == pos - 1 |
1568 | lastCombining = 255; // to prevent combining > lastCombining |
1569 | ++pos; |
1570 | continue; |
1571 | } |
1572 | int combining = p->combiningClass; |
1573 | if ((starter == pos - 1 || combining > lastCombining) && starter >= from) { |
1574 | // allowed to form ligature with S |
1575 | QChar ligature = ligatureHelper(s.at(starter).unicode(), uc); |
1576 | if (ligature.unicode()) { |
1577 | s[starter] = ligature; |
1578 | s.remove(pos, 1); |
1579 | continue; |
1580 | } |
1581 | } |
1582 | if (!combining) |
1583 | starter = pos; |
1584 | lastCombining = combining; |
1585 | ++pos; |
1586 | } |
1587 | } |
1588 | |
1589 | |
1590 | static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from) |
1591 | { |
1592 | QString &s = *str; |
1593 | const int l = s.length()-1; |
1594 | int pos = from; |
1595 | while (pos < l) { |
1596 | int p2 = pos+1; |
1597 | uint u1 = s.at(pos).unicode(); |
1598 | if (QChar(u1).isHighSurrogate()) { |
1599 | ushort low = s.at(p2).unicode(); |
1600 | if (QChar(low).isLowSurrogate()) { |
1601 | u1 = QChar::surrogateToUcs4(u1, low); |
1602 | if (p2 >= l) |
1603 | break; |
1604 | ++p2; |
1605 | } |
1606 | } |
1607 | uint u2 = s.at(p2).unicode(); |
1608 | if (QChar(u2).isHighSurrogate() && p2 < l) { |
1609 | ushort low = s.at(p2+1).unicode(); |
1610 | if (QChar(low).isLowSurrogate()) { |
1611 | u2 = QChar::surrogateToUcs4(u2, low); |
1612 | ++p2; |
1613 | } |
1614 | } |
1615 | |
1616 | ushort c2 = 0; |
1617 | { |
1618 | const QUnicodeTables::Properties *p = qGetProp(u2); |
1619 | if (p->unicodeVersion != QChar::Unicode_Unassigned && p->unicodeVersion <= version) |
1620 | c2 = p->combiningClass; |
1621 | } |
1622 | if (c2 == 0) { |
1623 | pos = p2+1; |
1624 | continue; |
1625 | } |
1626 | |
1627 | ushort c1 = 0; |
1628 | { |
1629 | const QUnicodeTables::Properties *p = qGetProp(u1); |
1630 | if (p->unicodeVersion != QChar::Unicode_Unassigned && p->unicodeVersion <= version) |
1631 | c1 = p->combiningClass; |
1632 | } |
1633 | |
1634 | if (c1 > c2) { |
1635 | QChar *uc = s.data(); |
1636 | int p = pos; |
1637 | // exchange characters |
1638 | if (!QChar::requiresSurrogates(u2)) { |
1639 | uc[p++] = u2; |
1640 | } else { |
1641 | uc[p++] = QChar::highSurrogate(u2); |
1642 | uc[p++] = QChar::lowSurrogate(u2); |
1643 | } |
1644 | if (!QChar::requiresSurrogates(u1)) { |
1645 | uc[p++] = u1; |
1646 | } else { |
1647 | uc[p++] = QChar::highSurrogate(u1); |
1648 | uc[p++] = QChar::lowSurrogate(u1); |
1649 | } |
1650 | if (pos > 0) |
1651 | --pos; |
1652 | if (pos > 0 && s.at(pos).isLowSurrogate()) |
1653 | --pos; |
1654 | } else { |
1655 | ++pos; |
1656 | if (QChar::requiresSurrogates(u1)) |
1657 | ++pos; |
1658 | } |
1659 | } |
1660 | } |
1661 | |
1662 | QT_END_NAMESPACE |
1663 | |