1/****************************************************************************
2**
3** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4** Contact: http://www.qt-project.org/legal
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Digia. For licensing terms and
14** conditions see http://qt.digia.com/licensing. For further information
15** use the contact form at http://qt.digia.com/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 2.1 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 2.1 requirements
23** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24**
25** In addition, as a special exception, Digia gives you certain additional
26** rights. These rights are described in the Digia Qt LGPL Exception
27** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28**
29** GNU General Public License Usage
30** Alternatively, this file may be used under the terms of the GNU
31** General Public License version 3.0 as published by the Free Software
32** Foundation and appearing in the file LICENSE.GPL included in the
33** packaging of this file. Please review the following information to
34** ensure the GNU General Public License version 3.0 requirements will be
35** met: http://www.gnu.org/copyleft/gpl.html.
36**
37**
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42// Don't define it while compiling this module, or USERS of Qt will
43// not be able to link.
44#ifdef QT_NO_CAST_FROM_ASCII
45# undef QT_NO_CAST_FROM_ASCII
46#endif
47#ifdef QT_NO_CAST_TO_ASCII
48# undef QT_NO_CAST_TO_ASCII
49#endif
50#include "qchar.h"
51
52#include "qdatastream.h"
53#include "qtextcodec.h"
54
55#include "qunicodetables_p.h"
56#include "qunicodetables.cpp"
57
58QT_BEGIN_NAMESPACE
59
60#ifndef QT_NO_CODEC_FOR_C_STRINGS
61# ifdef QT_NO_TEXTCODEC
62# define QT_NO_CODEC_FOR_C_STRINGS
63# endif
64#endif
65
66#define FLAG(x) (1 << (x))
67
68/*!
69 \class QLatin1Char
70 \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
71
72 \ingroup string-processing
73
74 This class is only useful to avoid the codec for C strings business
75 in the QChar(ch) constructor. You can avoid it by writing
76 QChar(ch, 0).
77
78 \sa QChar, QLatin1String, QString
79*/
80
81/*!
82 \fn const char QLatin1Char::toLatin1() const
83
84 Converts a Latin-1 character to an 8-bit ASCII representation of
85 the character.
86*/
87
88/*!
89 \fn const ushort QLatin1Char::unicode() const
90
91 Converts a Latin-1 character to an 16-bit-encoded Unicode representation
92 of the character.
93*/
94
95/*!
96 \fn QLatin1Char::QLatin1Char(char c)
97
98 Constructs a Latin-1 character for \a c. This constructor should be
99 used when the encoding of the input character is known to be Latin-1.
100*/
101
102/*!
103 \class QChar
104 \brief The QChar class provides a 16-bit Unicode character.
105
106 \ingroup string-processing
107 \reentrant
108
109 In Qt, Unicode characters are 16-bit entities without any markup
110 or structure. This class represents such an entity. It is
111 lightweight, so it can be used everywhere. Most compilers treat
112 it like a \c{unsigned short}.
113
114 QChar provides a full complement of testing/classification
115 functions, converting to and from other formats, converting from
116 composed to decomposed Unicode, and trying to compare and
117 case-convert if you ask it to.
118
119 The classification functions include functions like those in the
120 standard C++ header \<cctype\> (formerly \<ctype.h\>), but
121 operating on the full range of Unicode characters. They all
122 return true if the character is a certain type of character;
123 otherwise they return false. These classification functions are
124 isNull() (returns true if the character is '\\0'), isPrint()
125 (true if the character is any sort of printable character,
126 including whitespace), isPunct() (any sort of punctation),
127 isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any
128 sort of numeric character, not just 0-9), isLetterOrNumber(), and
129 isDigit() (decimal digits). All of these are wrappers around
130 category() which return the Unicode-defined category of each
131 character.
132
133 QChar also provides direction(), which indicates the "natural"
134 writing direction of this character. The joining() function
135 indicates how the character joins with its neighbors (needed
136 mostly for Arabic) and finally hasMirrored(), which indicates
137 whether the character needs to be mirrored when it is printed in
138 its "unnatural" writing direction.
139
140 Composed Unicode characters (like \aring) can be converted to
141 decomposed Unicode ("a" followed by "ring above") by using
142 decomposition().
143
144 In Unicode, comparison is not necessarily possible and case
145 conversion is very difficult at best. Unicode, covering the
146 "entire" world, also includes most of the world's case and
147 sorting problems. operator==() and friends will do comparison
148 based purely on the numeric Unicode value (code point) of the
149 characters, and toUpper() and toLower() will do case changes when
150 the character has a well-defined uppercase/lowercase equivalent.
151 For locale-dependent comparisons, use
152 QString::localeAwareCompare().
153
154 The conversion functions include unicode() (to a scalar),
155 toLatin1() (to scalar, but converts all non-Latin-1 characters to
156 0), row() (gives the Unicode row), cell() (gives the Unicode
157 cell), digitValue() (gives the integer value of any of the
158 numerous digit characters), and a host of constructors.
159
160 QChar provides constructors and cast operators that make it easy
161 to convert to and from traditional 8-bit \c{char}s. If you
162 defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as
163 explained in the QString documentation, you will need to
164 explicitly call fromAscii() or fromLatin1(), or use QLatin1Char,
165 to construct a QChar from an 8-bit \c char, and you will need to
166 call toAscii() or toLatin1() to get the 8-bit value back.
167
168 \sa QString, Unicode, QLatin1Char
169*/
170
171/*!
172 \enum QChar::UnicodeVersion
173
174 Specifies which version of the \l{http://www.unicode.org/}{Unicode standard}
175 introduced a certain character.
176
177 \value Unicode_1_1 Version 1.1
178 \value Unicode_2_0 Version 2.0
179 \value Unicode_2_1_2 Version 2.1.2
180 \value Unicode_3_0 Version 3.0
181 \value Unicode_3_1 Version 3.1
182 \value Unicode_3_2 Version 3.2
183 \value Unicode_4_0 Version 4.0
184 \value Unicode_4_1 Version 4.1
185 \value Unicode_5_0 Version 5.0
186 \value Unicode_Unassigned The value is not assigned to any character
187 in version 5.0 of Unicode.
188
189 \sa unicodeVersion()
190*/
191
192/*!
193 \enum QChar::Category
194
195 This enum maps the Unicode character categories.
196
197 The following characters are normative in Unicode:
198
199 \value Mark_NonSpacing Unicode class name Mn
200
201 \value Mark_SpacingCombining Unicode class name Mc
202
203 \value Mark_Enclosing Unicode class name Me
204
205 \value Number_DecimalDigit Unicode class name Nd
206
207 \value Number_Letter Unicode class name Nl
208
209 \value Number_Other Unicode class name No
210
211 \value Separator_Space Unicode class name Zs
212
213 \value Separator_Line Unicode class name Zl
214
215 \value Separator_Paragraph Unicode class name Zp
216
217 \value Other_Control Unicode class name Cc
218
219 \value Other_Format Unicode class name Cf
220
221 \value Other_Surrogate Unicode class name Cs
222
223 \value Other_PrivateUse Unicode class name Co
224
225 \value Other_NotAssigned Unicode class name Cn
226
227
228 The following categories are informative in Unicode:
229
230 \value Letter_Uppercase Unicode class name Lu
231
232 \value Letter_Lowercase Unicode class name Ll
233
234 \value Letter_Titlecase Unicode class name Lt
235
236 \value Letter_Modifier Unicode class name Lm
237
238 \value Letter_Other Unicode class name Lo
239
240 \value Punctuation_Connector Unicode class name Pc
241
242 \value Punctuation_Dash Unicode class name Pd
243
244 \value Punctuation_Open Unicode class name Ps
245
246 \value Punctuation_Close Unicode class name Pe
247
248 \value Punctuation_InitialQuote Unicode class name Pi
249
250 \value Punctuation_FinalQuote Unicode class name Pf
251
252 \value Punctuation_Other Unicode class name Po
253
254 \value Symbol_Math Unicode class name Sm
255
256 \value Symbol_Currency Unicode class name Sc
257
258 \value Symbol_Modifier Unicode class name Sk
259
260 \value Symbol_Other Unicode class name So
261
262 \value NoCategory Qt cannot find an appropriate category for the character.
263
264 \omitvalue Punctuation_Dask
265
266 \sa category()
267*/
268
269/*!
270 \enum QChar::Direction
271
272 This enum type defines the Unicode direction attributes. See the
273 \l{http://www.unicode.org/}{Unicode Standard} for a description
274 of the values.
275
276 In order to conform to C/C++ naming conventions "Dir" is prepended
277 to the codes used in the Unicode Standard.
278
279 \value DirAL
280 \value DirAN
281 \value DirB
282 \value DirBN
283 \value DirCS
284 \value DirEN
285 \value DirES
286 \value DirET
287 \value DirL
288 \value DirLRE
289 \value DirLRO
290 \value DirNSM
291 \value DirON
292 \value DirPDF
293 \value DirR
294 \value DirRLE
295 \value DirRLO
296 \value DirS
297 \value DirWS
298
299 \sa direction()
300*/
301
302/*!
303 \enum QChar::Decomposition
304
305 This enum type defines the Unicode decomposition attributes. See
306 the \l{http://www.unicode.org/}{Unicode Standard} for a
307 description of the values.
308
309 \value NoDecomposition
310 \value Canonical
311 \value Circle
312 \value Compat
313 \value Final
314 \value Font
315 \value Fraction
316 \value Initial
317 \value Isolated
318 \value Medial
319 \value Narrow
320 \value NoBreak
321 \value Small
322 \value Square
323 \value Sub
324 \value Super
325 \value Vertical
326 \value Wide
327
328 \omitvalue Single
329
330 \sa decomposition()
331*/
332
333/*!
334 \enum QChar::Joining
335
336 This enum type defines the Unicode joining attributes. See the
337 \l{http://www.unicode.org/}{Unicode Standard} for a description
338 of the values.
339
340 \value Center
341 \value Dual
342 \value OtherJoining
343 \value Right
344
345 \sa joining()
346*/
347
348/*!
349 \enum QChar::CombiningClass
350
351 \internal
352
353 This enum type defines names for some of the Unicode combining
354 classes. See the \l{http://www.unicode.org/}{Unicode Standard}
355 for a description of the values.
356
357 \value Combining_Above
358 \value Combining_AboveAttached
359 \value Combining_AboveLeft
360 \value Combining_AboveLeftAttached
361 \value Combining_AboveRight
362 \value Combining_AboveRightAttached
363 \value Combining_Below
364 \value Combining_BelowAttached
365 \value Combining_BelowLeft
366 \value Combining_BelowLeftAttached
367 \value Combining_BelowRight
368 \value Combining_BelowRightAttached
369 \value Combining_DoubleAbove
370 \value Combining_DoubleBelow
371 \value Combining_IotaSubscript
372 \value Combining_Left
373 \value Combining_LeftAttached
374 \value Combining_Right
375 \value Combining_RightAttached
376*/
377
378/*!
379 \enum QChar::SpecialCharacter
380
381 \value Null A QChar with this value isNull().
382 \value Nbsp Non-breaking space.
383 \value ReplacementCharacter The character shown when a font has no glyph
384 for a certain codepoint. A special question mark character is often
385 used. Codecs use this codepoint when input data cannot be
386 represented in Unicode.
387 \value ObjectReplacementCharacter Used to represent an object such as an
388 image when such objects cannot be presented.
389 \value ByteOrderMark
390 \value ByteOrderSwapped
391 \value ParagraphSeparator
392 \value LineSeparator
393
394 \omitvalue null
395 \omitvalue replacement
396 \omitvalue byteOrderMark
397 \omitvalue byteOrderSwapped
398 \omitvalue nbsp
399*/
400
401/*!
402 \fn void QChar::setCell(uchar cell)
403 \internal
404*/
405
406/*!
407 \fn void QChar::setRow(uchar row)
408 \internal
409*/
410
411/*!
412 \fn QChar::QChar()
413
414 Constructs a null QChar ('\\0').
415
416 \sa isNull()
417*/
418
419/*!
420 \fn QChar::QChar(QLatin1Char ch)
421
422 Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
423*/
424
425/*!
426 \fn QChar::QChar(SpecialCharacter ch)
427
428 Constructs a QChar for the predefined character value \a ch.
429*/
430
431/*!
432 Constructs a QChar corresponding to ASCII/Latin-1 character \a
433 ch.
434*/
435QChar::QChar(char ch)
436{
437#ifndef QT_NO_CODEC_FOR_C_STRINGS
438 if (QTextCodec::codecForCStrings())
439 // #####
440 ucs = QTextCodec::codecForCStrings()->toUnicode(&ch, 1).at(0).unicode();
441 else
442#endif
443 ucs = uchar(ch);
444}
445
446/*!
447 Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
448*/
449QChar::QChar(uchar ch)
450{
451#ifndef QT_NO_CODEC_FOR_C_STRINGS
452 if (QTextCodec::codecForCStrings()) {
453 // #####
454 char c = char(ch);
455 ucs = QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
456 } else
457#endif
458 ucs = ch;
459}
460
461/*!
462 \fn QChar::QChar(uchar cell, uchar row)
463
464 Constructs a QChar for Unicode cell \a cell in row \a row.
465
466 \sa cell(), row()
467*/
468
469/*!
470 \fn QChar::QChar(ushort code)
471
472 Constructs a QChar for the character with Unicode code point \a
473 code.
474*/
475
476
477/*!
478 \fn QChar::QChar(short code)
479
480 Constructs a QChar for the character with Unicode code point \a
481 code.
482*/
483
484
485/*!
486 \fn QChar::QChar(uint code)
487
488 Constructs a QChar for the character with Unicode code point \a
489 code.
490*/
491
492
493/*!
494 \fn QChar::QChar(int code)
495
496 Constructs a QChar for the character with Unicode code point \a
497 code.
498*/
499
500
501/*!
502 \fn bool QChar::isNull() const
503
504 Returns true if the character is the Unicode character 0x0000
505 ('\\0'); otherwise returns false.
506*/
507
508/*!
509 \fn uchar QChar::cell() const
510
511 Returns the cell (least significant byte) of the Unicode
512 character.
513
514 \sa row()
515*/
516
517/*!
518 \fn uchar QChar::row() const
519
520 Returns the row (most significant byte) of the Unicode character.
521
522 \sa cell()
523*/
524
525/*!
526 Returns true if the character is a printable character; otherwise
527 returns false. This is any character not of category Cc or Cn.
528
529 Note that this gives no indication of whether the character is
530 available in a particular font.
531*/
532bool QChar::isPrint() const
533{
534 const int test = FLAG(Other_Control) |
535 FLAG(Other_NotAssigned);
536 return !(FLAG(qGetProp(ucs)->category) & test);
537}
538
539/*!
540 Returns true if the character is a separator character
541 (Separator_* categories); otherwise returns false.
542*/
543bool QChar::isSpace() const
544{
545 if(ucs >= 9 && ucs <=13)
546 return true;
547 const int test = FLAG(Separator_Space) |
548 FLAG(Separator_Line) |
549 FLAG(Separator_Paragraph);
550 return FLAG(qGetProp(ucs)->category) & test;
551}
552
553/*!
554 Returns true if the character is a mark (Mark_* categories);
555 otherwise returns false.
556
557 See QChar::Category for more information regarding marks.
558*/
559bool QChar::isMark() const
560{
561 const int test = FLAG(Mark_NonSpacing) |
562 FLAG(Mark_SpacingCombining) |
563 FLAG(Mark_Enclosing);
564 return FLAG(qGetProp(ucs)->category) & test;
565}
566
567/*!
568 Returns true if the character is a punctuation mark (Punctuation_*
569 categories); otherwise returns false.
570*/
571bool QChar::isPunct() const
572{
573 const int test = FLAG(Punctuation_Connector) |
574 FLAG(Punctuation_Dash) |
575 FLAG(Punctuation_Open) |
576 FLAG(Punctuation_Close) |
577 FLAG(Punctuation_InitialQuote) |
578 FLAG(Punctuation_FinalQuote) |
579 FLAG(Punctuation_Other);
580 return FLAG(qGetProp(ucs)->category) & test;
581}
582
583/*!
584 Returns true if the character is a letter (Letter_* categories);
585 otherwise returns false.
586*/
587bool QChar::isLetter() const
588{
589 const int test = FLAG(Letter_Uppercase) |
590 FLAG(Letter_Lowercase) |
591 FLAG(Letter_Titlecase) |
592 FLAG(Letter_Modifier) |
593 FLAG(Letter_Other);
594 return FLAG(qGetProp(ucs)->category) & test;
595}
596
597/*!
598 Returns true if the character is a number (Number_* categories,
599 not just 0-9); otherwise returns false.
600
601 \sa isDigit()
602*/
603bool QChar::isNumber() const
604{
605 const int test = FLAG(Number_DecimalDigit) |
606 FLAG(Number_Letter) |
607 FLAG(Number_Other);
608 return FLAG(qGetProp(ucs)->category) & test;
609}
610
611/*!
612 Returns true if the character is a letter or number (Letter_* or
613 Number_* categories); otherwise returns false.
614*/
615bool QChar::isLetterOrNumber() const
616{
617 const int test = FLAG(Letter_Uppercase) |
618 FLAG(Letter_Lowercase) |
619 FLAG(Letter_Titlecase) |
620 FLAG(Letter_Modifier) |
621 FLAG(Letter_Other) |
622 FLAG(Number_DecimalDigit) |
623 FLAG(Number_Letter) |
624 FLAG(Number_Other);
625 return FLAG(qGetProp(ucs)->category) & test;
626}
627
628
629/*!
630 Returns true if the character is a decimal digit
631 (Number_DecimalDigit); otherwise returns false.
632*/
633bool QChar::isDigit() const
634{
635 return (qGetProp(ucs)->category == Number_DecimalDigit);
636}
637
638
639/*!
640 Returns true if the character is a symbol (Symbol_* categories);
641 otherwise returns false.
642*/
643bool QChar::isSymbol() const
644{
645 const int test = FLAG(Symbol_Math) |
646 FLAG(Symbol_Currency) |
647 FLAG(Symbol_Modifier) |
648 FLAG(Symbol_Other);
649 return FLAG(qGetProp(ucs)->category) & test;
650}
651
652/*!
653 \fn bool QChar::isHighSurrogate() const
654
655 Returns true if the QChar is the high part of a utf16 surrogate
656 (ie. if its code point is between 0xd800 and 0xdbff, inclusive).
657*/
658
659/*!
660 \fn bool QChar::isLowSurrogate() const
661
662 Returns true if the QChar is the low part of a utf16 surrogate
663 (ie. if its code point is between 0xdc00 and 0xdfff, inclusive).
664*/
665
666/*!
667 \fn static bool QChar::isHighSurrogate(uint ucs4)
668 \since 4.7
669
670 Returns true if the UCS-4-encoded character specified by \a ucs4
671 is the high part of a utf16 surrogate
672 (ie. if its code point is between 0xd800 and 0xdbff, inclusive).
673*/
674
675/*!
676 \fn static bool QChar::isLowSurrogate(uint ucs4)
677 \since 4.7
678
679 Returns true if the UCS-4-encoded character specified by \a ucs4
680 is the low part of a utf16 surrogate
681 (ie. if its code point is between 0xdc00 and 0xdfff, inclusive).
682*/
683
684/*!
685 \fn static bool QChar::requiresSurrogates(uint ucs4)
686 \since 4.7
687
688 Returns true if the UCS-4-encoded character specified by \a ucs4
689 can be split into the high and low parts of a utf16 surrogate
690 (ie. if its code point is greater than or equals to 0x10000).
691*/
692
693/*!
694 \fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
695
696 Converts a UTF16 surrogate pair with the given \a high and \a low values
697 to its UCS-4 code point.
698*/
699
700/*!
701 \fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
702
703 Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code point.
704*/
705
706/*!
707 \fn static ushort QChar::highSurrogate(uint ucs4)
708
709 Returns the high surrogate value of a ucs4 code point.
710 The returned result is undefined if \a ucs4 is smaller than 0x10000.
711*/
712
713/*!
714 \fn static ushort QChar::lowSurrogate(uint ucs4)
715
716 Returns the low surrogate value of a ucs4 code point.
717 The returned result is undefined if \a ucs4 is smaller than 0x10000.
718*/
719
720/*!
721 Returns the numeric value of the digit, or -1 if the character is
722 not a digit.
723*/
724int QChar::digitValue() const
725{
726 return qGetProp(ucs)->digitValue;
727}
728
729/*!
730 \overload
731 Returns the numeric value of the digit, specified by the UCS-2-encoded
732 character, \a ucs2, or -1 if the character is not a digit.
733*/
734int QChar::digitValue(ushort ucs2)
735{
736 return qGetProp(ucs2)->digitValue;
737}
738
739/*!
740 \overload
741 Returns the numeric value of the digit specified by the UCS-4-encoded
742 character, \a ucs4, or -1 if the character is not a digit.
743*/
744int QChar::digitValue(uint ucs4)
745{
746 if (ucs4 > UNICODE_LAST_CODEPOINT)
747 return 0;
748 return qGetProp(ucs4)->digitValue;
749}
750
751/*!
752 Returns the character's category.
753*/
754QChar::Category QChar::category() const
755{
756 return (QChar::Category) qGetProp(ucs)->category;
757}
758
759/*!
760 \overload
761 \since 4.3
762 Returns the category of the UCS-4-encoded character specified by \a ucs4.
763*/
764QChar::Category QChar::category(uint ucs4)
765{
766 if (ucs4 > UNICODE_LAST_CODEPOINT)
767 return QChar::NoCategory;
768 return (QChar::Category) qGetProp(ucs4)->category;
769}
770
771/*!
772 \overload
773 Returns the category of the UCS-2-encoded character specified by \a ucs2.
774*/
775QChar::Category QChar::category(ushort ucs2)
776{
777 return (QChar::Category) qGetProp(ucs2)->category;
778}
779
780
781/*!
782 Returns the character's direction.
783*/
784QChar::Direction QChar::direction() const
785{
786 return (QChar::Direction) qGetProp(ucs)->direction;
787}
788
789/*!
790 \overload
791 Returns the direction of the UCS-4-encoded character specified by \a ucs4.
792*/
793QChar::Direction QChar::direction(uint ucs4)
794{
795 if (ucs4 > UNICODE_LAST_CODEPOINT)
796 return QChar::DirL;
797 return (QChar::Direction) qGetProp(ucs4)->direction;
798}
799
800/*!
801 \overload
802 Returns the direction of the UCS-2-encoded character specified by \a ucs2.
803*/
804QChar::Direction QChar::direction(ushort ucs2)
805{
806 return (QChar::Direction) qGetProp(ucs2)->direction;
807}
808
809/*!
810 Returns information about the joining properties of the character
811 (needed for certain languages such as Arabic).
812*/
813QChar::Joining QChar::joining() const
814{
815 return (QChar::Joining) qGetProp(ucs)->joining;
816}
817
818/*!
819 \overload
820 Returns information about the joining properties of the UCS-4-encoded
821 character specified by \a ucs4 (needed for certain languages such as
822 Arabic).
823*/
824QChar::Joining QChar::joining(uint ucs4)
825{
826 if (ucs4 > UNICODE_LAST_CODEPOINT)
827 return QChar::OtherJoining;
828 return (QChar::Joining) qGetProp(ucs4)->joining;
829}
830
831/*!
832 \overload
833 Returns information about the joining properties of the UCS-2-encoded
834 character specified by \a ucs2 (needed for certain languages such as
835 Arabic).
836*/
837QChar::Joining QChar::joining(ushort ucs2)
838{
839 return (QChar::Joining) qGetProp(ucs2)->joining;
840}
841
842
843/*!
844 Returns true if the character should be reversed if the text
845 direction is reversed; otherwise returns false.
846
847 Same as (ch.mirroredChar() != ch).
848
849 \sa mirroredChar()
850*/
851bool QChar::hasMirrored() const
852{
853 return qGetProp(ucs)->mirrorDiff != 0;
854}
855
856/*!
857 \fn bool QChar::isLower() const
858
859 Returns true if the character is a lowercase letter, i.e.
860 category() is Letter_Lowercase.
861
862 \sa isUpper(), toLower(), toUpper()
863*/
864
865/*!
866 \fn bool QChar::isUpper() const
867
868 Returns true if the character is an uppercase letter, i.e.
869 category() is Letter_Uppercase.
870
871 \sa isLower(), toUpper(), toLower()
872*/
873
874/*!
875 \fn bool QChar::isTitleCase() const
876 \since 4.3
877
878 Returns true if the character is a titlecase letter, i.e.
879 category() is Letter_Titlecase.
880
881 \sa isLower(), toUpper(), toLower(), toTitleCase()
882*/
883
884/*!
885 Returns the mirrored character if this character is a mirrored
886 character; otherwise returns the character itself.
887
888 \sa hasMirrored()
889*/
890QChar QChar::mirroredChar() const
891{
892 return ucs + qGetProp(ucs)->mirrorDiff;
893}
894
895/*!
896 \overload
897 Returns the mirrored character if the UCS-4-encoded character specified
898 by \a ucs4 is a mirrored character; otherwise returns the character itself.
899
900 \sa hasMirrored()
901*/
902uint QChar::mirroredChar(uint ucs4)
903{
904 if (ucs4 > UNICODE_LAST_CODEPOINT)
905 return ucs4;
906 return ucs4 + qGetProp(ucs4)->mirrorDiff;
907}
908
909/*!
910 \overload
911 Returns the mirrored character if the UCS-2-encoded character specified
912 by \a ucs2 is a mirrored character; otherwise returns the character itself.
913
914 \sa hasMirrored()
915*/
916ushort QChar::mirroredChar(ushort ucs2)
917{
918 return ucs2 + qGetProp(ucs2)->mirrorDiff;
919}
920
921
922enum {
923 Hangul_SBase = 0xac00,
924 Hangul_LBase = 0x1100,
925 Hangul_VBase = 0x1161,
926 Hangul_TBase = 0x11a7,
927 Hangul_SCount = 11172,
928 Hangul_LCount = 19,
929 Hangul_VCount = 21,
930 Hangul_TCount = 28,
931 Hangul_NCount = 21*28
932};
933
934// buffer has to have a length of 3. It's needed for Hangul decomposition
935static const unsigned short * QT_FASTCALL decompositionHelper
936 (uint ucs4, int *length, int *tag, unsigned short *buffer)
937{
938 *length = 0;
939 if (ucs4 > UNICODE_LAST_CODEPOINT)
940 return 0;
941 if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
942 int SIndex = ucs4 - Hangul_SBase;
943 buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
944 buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
945 buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
946 *length = buffer[2] == Hangul_TBase ? 2 : 3;
947 *tag = QChar::Canonical;
948 return buffer;
949 }
950
951 const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
952 if (index == 0xffff)
953 return 0;
954 const unsigned short *decomposition = uc_decomposition_map+index;
955 *tag = (*decomposition) & 0xff;
956 *length = (*decomposition) >> 8;
957 return decomposition+1;
958}
959
960/*!
961 Decomposes a character into its parts. Returns an empty string if
962 no decomposition exists.
963*/
964QString QChar::decomposition() const
965{
966 return decomposition(ucs);
967}
968
969/*!
970 \overload
971 Decomposes the UCS-4-encoded character specified by \a ucs4 into its
972 constituent parts. Returns an empty string if no decomposition exists.
973*/
974QString QChar::decomposition(uint ucs4)
975{
976 unsigned short buffer[3];
977 int length;
978 int tag;
979 const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
980 return QString::fromUtf16(d, length);
981}
982
983/*!
984 Returns the tag defining the composition of the character. Returns
985 QChar::Single if no decomposition exists.
986*/
987QChar::Decomposition QChar::decompositionTag() const
988{
989 return decompositionTag(ucs);
990}
991
992/*!
993 \overload
994 Returns the tag defining the composition of the UCS-4-encoded character
995 specified by \a ucs4. Returns QChar::Single if no decomposition exists.
996*/
997QChar::Decomposition QChar::decompositionTag(uint ucs4)
998{
999 if (ucs4 > UNICODE_LAST_CODEPOINT)
1000 return QChar::NoDecomposition;
1001 const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
1002 if (index == 0xffff)
1003 return QChar::NoDecomposition;
1004 return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff);
1005}
1006
1007/*!
1008 Returns the combining class for the character as defined in the
1009 Unicode standard. This is mainly useful as a positioning hint for
1010 marks attached to a base character.
1011
1012 The Qt text rendering engine uses this information to correctly
1013 position non-spacing marks around a base character.
1014*/
1015unsigned char QChar::combiningClass() const
1016{
1017 return (unsigned char) qGetProp(ucs)->combiningClass;
1018}
1019
1020/*!
1021 \overload
1022 Returns the combining class for the UCS-4-encoded character specified by
1023 \a ucs4, as defined in the Unicode standard.
1024*/
1025unsigned char QChar::combiningClass(uint ucs4)
1026{
1027 if (ucs4 > UNICODE_LAST_CODEPOINT)
1028 return 0;
1029 return (unsigned char) qGetProp(ucs4)->combiningClass;
1030}
1031
1032/*!
1033 \overload
1034 Returns the combining class for the UCS-2-encoded character specified by
1035 \a ucs2, as defined in the Unicode standard.
1036*/
1037unsigned char QChar::combiningClass(ushort ucs2)
1038{
1039 return (unsigned char) qGetProp(ucs2)->combiningClass;
1040}
1041
1042/*!
1043 Returns the Unicode version that introduced this character.
1044*/
1045QChar::UnicodeVersion QChar::unicodeVersion() const
1046{
1047 return (QChar::UnicodeVersion) qGetProp(ucs)->unicodeVersion;
1048}
1049
1050/*!
1051 \overload
1052 Returns the Unicode version that introduced the character specified in
1053 its UCS-4-encoded form as \a ucs4.
1054*/
1055QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4)
1056{
1057 if (ucs4 > UNICODE_LAST_CODEPOINT)
1058 return QChar::Unicode_Unassigned;
1059 return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion;
1060}
1061
1062/*!
1063 \overload
1064 Returns the Unicode version that introduced the character specified in
1065 its UCS-2-encoded form as \a ucs2.
1066*/
1067QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2)
1068{
1069 return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion;
1070}
1071
1072/*!
1073 \since 4.8
1074
1075 Returns the most recent supported Unicode version.
1076*/
1077QChar::UnicodeVersion QChar::currentUnicodeVersion()
1078{
1079 return UNICODE_DATA_VERSION;
1080}
1081
1082/*!
1083 Returns the lowercase equivalent if the character is uppercase or titlecase;
1084 otherwise returns the character itself.
1085*/
1086QChar QChar::toLower() const
1087{
1088 const QUnicodeTables::Properties *p = qGetProp(ucs);
1089 if (!p->lowerCaseSpecial)
1090 return ucs + p->lowerCaseDiff;
1091 return ucs;
1092}
1093
1094/*!
1095 \overload
1096 Returns the lowercase equivalent of the UCS-4-encoded character specified
1097 by \a ucs4 if the character is uppercase or titlecase; otherwise returns
1098 the character itself.
1099*/
1100uint QChar::toLower(uint ucs4)
1101{
1102 if (ucs4 > UNICODE_LAST_CODEPOINT)
1103 return ucs4;
1104 const QUnicodeTables::Properties *p = qGetProp(ucs4);
1105 if (!p->lowerCaseSpecial)
1106 return ucs4 + p->lowerCaseDiff;
1107 return ucs4;
1108}
1109
1110/*!
1111 \overload
1112 Returns the lowercase equivalent of the UCS-2-encoded character specified
1113 by \a ucs2 if the character is uppercase or titlecase; otherwise returns
1114 the character itself.
1115*/
1116ushort QChar::toLower(ushort ucs2)
1117{
1118 const QUnicodeTables::Properties *p = qGetProp(ucs2);
1119 if (!p->lowerCaseSpecial)
1120 return ucs2 + p->lowerCaseDiff;
1121 return ucs2;
1122}
1123
1124/*!
1125 Returns the uppercase equivalent if the character is lowercase or titlecase;
1126 otherwise returns the character itself.
1127*/
1128QChar QChar::toUpper() const
1129{
1130 const QUnicodeTables::Properties *p = qGetProp(ucs);
1131 if (!p->upperCaseSpecial)
1132 return ucs + p->upperCaseDiff;
1133 return ucs;
1134}
1135
1136/*!
1137 \overload
1138 Returns the uppercase equivalent of the UCS-4-encoded character specified
1139 by \a ucs4 if the character is lowercase or titlecase; otherwise returns
1140 the character itself.
1141*/
1142uint QChar::toUpper(uint ucs4)
1143{
1144 if (ucs4 > UNICODE_LAST_CODEPOINT)
1145 return ucs4;
1146 const QUnicodeTables::Properties *p = qGetProp(ucs4);
1147 if (!p->upperCaseSpecial)
1148 return ucs4 + p->upperCaseDiff;
1149 return ucs4;
1150}
1151
1152/*!
1153 \overload
1154 Returns the uppercase equivalent of the UCS-2-encoded character specified
1155 by \a ucs2 if the character is lowercase or titlecase; otherwise returns
1156 the character itself.
1157*/
1158ushort QChar::toUpper(ushort ucs2)
1159{
1160 const QUnicodeTables::Properties *p = qGetProp(ucs2);
1161 if (!p->upperCaseSpecial)
1162 return ucs2 + p->upperCaseDiff;
1163 return ucs2;
1164}
1165
1166/*!
1167 Returns the title case equivalent if the character is lowercase or uppercase;
1168 otherwise returns the character itself.
1169*/
1170QChar QChar::toTitleCase() const
1171{
1172 const QUnicodeTables::Properties *p = qGetProp(ucs);
1173 if (!p->titleCaseSpecial)
1174 return ucs + p->titleCaseDiff;
1175 return ucs;
1176}
1177
1178/*!
1179 \overload
1180 Returns the title case equivalent of the UCS-4-encoded character specified
1181 by \a ucs4 if the character is lowercase or uppercase; otherwise returns
1182 the character itself.
1183*/
1184uint QChar::toTitleCase(uint ucs4)
1185{
1186 if (ucs4 > UNICODE_LAST_CODEPOINT)
1187 return ucs4;
1188 const QUnicodeTables::Properties *p = qGetProp(ucs4);
1189 if (!p->titleCaseSpecial)
1190 return ucs4 + p->titleCaseDiff;
1191 return ucs4;
1192}
1193
1194/*!
1195 \overload
1196 Returns the title case equivalent of the UCS-2-encoded character specified
1197 by \a ucs2 if the character is lowercase or uppercase; otherwise returns
1198 the character itself.
1199*/
1200ushort QChar::toTitleCase(ushort ucs2)
1201{
1202 const QUnicodeTables::Properties *p = qGetProp(ucs2);
1203 if (!p->titleCaseSpecial)
1204 return ucs2 + p->titleCaseDiff;
1205 return ucs2;
1206}
1207
1208
1209static inline uint foldCase(const ushort *ch, const ushort *start)
1210{
1211 uint c = *ch;
1212 if (QChar(c).isLowSurrogate() && ch > start && QChar(*(ch - 1)).isHighSurrogate())
1213 c = QChar::surrogateToUcs4(*(ch - 1), c);
1214 return *ch + qGetProp(c)->caseFoldDiff;
1215}
1216
1217static inline uint foldCase(uint ch, uint &last)
1218{
1219 uint c = ch;
1220 if (QChar(c).isLowSurrogate() && QChar(last).isHighSurrogate())
1221 c = QChar::surrogateToUcs4(last, c);
1222 last = ch;
1223 return ch + qGetProp(c)->caseFoldDiff;
1224}
1225
1226static inline ushort foldCase(ushort ch)
1227{
1228 return ch + qGetProp(ch)->caseFoldDiff;
1229}
1230
1231/*!
1232 Returns the case folded equivalent of the character. For most Unicode characters this
1233 is the same as toLowerCase().
1234*/
1235QChar QChar::toCaseFolded() const
1236{
1237 return ucs + qGetProp(ucs)->caseFoldDiff;
1238}
1239
1240/*!
1241 \overload
1242 Returns the case folded equivalent of the UCS-4-encoded character specified
1243 by \a ucs4. For most Unicode characters this is the same as toLowerCase().
1244*/
1245uint QChar::toCaseFolded(uint ucs4)
1246{
1247 if (ucs4 > UNICODE_LAST_CODEPOINT)
1248 return ucs4;
1249 return ucs4 + qGetProp(ucs4)->caseFoldDiff;
1250}
1251
1252/*!
1253 \overload
1254 Returns the case folded equivalent of the UCS-2-encoded character specified
1255 by \a ucs2. For most Unicode characters this is the same as toLowerCase().
1256*/
1257ushort QChar::toCaseFolded(ushort ucs2)
1258{
1259 return ucs2 + qGetProp(ucs2)->caseFoldDiff;
1260}
1261
1262
1263/*!
1264 \fn char QChar::latin1() const
1265
1266 Use toLatin1() instead.
1267*/
1268
1269/*!
1270 \fn char QChar::ascii() const
1271
1272 Use toAscii() instead.
1273*/
1274
1275/*!
1276 \fn char QChar::toLatin1() const
1277
1278 Returns the Latin-1 character equivalent to the QChar, or 0. This
1279 is mainly useful for non-internationalized software.
1280
1281 \sa toAscii(), unicode(), QTextCodec::codecForCStrings()
1282*/
1283
1284/*!
1285 \fn char QChar::toAscii() const
1286 Returns the character value of the QChar obtained using the current
1287 codec used to read C strings, or 0 if the character is not representable
1288 using this codec. The default codec handles Latin-1 encoded text,
1289 but this can be changed to assist developers writing source code using
1290 other encodings.
1291
1292 The main purpose of this function is to preserve ASCII characters used
1293 in C strings. This is mainly useful for developers of non-internationalized
1294 software.
1295
1296 \sa toLatin1(), unicode(), QTextCodec::codecForCStrings()
1297*/
1298#ifdef Q_COMPILER_MANGLES_RETURN_TYPE
1299const char QChar::toAscii() const
1300#else
1301char QChar::toAscii() const
1302#endif
1303{
1304#ifndef QT_NO_CODEC_FOR_C_STRINGS
1305 if (QTextCodec::codecForCStrings())
1306 // #####
1307 return QTextCodec::codecForCStrings()->fromUnicode(QString(*this)).at(0);
1308#endif
1309 return ucs > 0xff ? 0 : char(ucs);
1310}
1311
1312/*!
1313 \fn QChar QChar::fromLatin1(char c)
1314
1315 Converts the Latin-1 character \a c to its equivalent QChar. This
1316 is mainly useful for non-internationalized software.
1317
1318 \sa fromAscii(), unicode(), QTextCodec::codecForCStrings()
1319*/
1320
1321/*!
1322 Converts the ASCII character \a c to its equivalent QChar. This
1323 is mainly useful for non-internationalized software.
1324
1325 An alternative is to use QLatin1Char.
1326
1327 \sa fromLatin1(), unicode(), QTextCodec::codecForCStrings()
1328*/
1329QChar QChar::fromAscii(char c)
1330{
1331#ifndef QT_NO_CODEC_FOR_C_STRINGS
1332 if (QTextCodec::codecForCStrings())
1333 // #####
1334 return QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
1335#endif
1336 return QChar(ushort((uchar)c));
1337}
1338
1339#ifndef QT_NO_DATASTREAM
1340/*!
1341 \relates QChar
1342
1343 Writes the char \a chr to the stream \a out.
1344
1345 \sa {Serializing Qt Data Types}
1346*/
1347QDataStream &operator<<(QDataStream &out, const QChar &chr)
1348{
1349 out << quint16(chr.unicode());
1350 return out;
1351}
1352
1353/*!
1354 \relates QChar
1355
1356 Reads a char from the stream \a in into char \a chr.
1357
1358 \sa {Serializing Qt Data Types}
1359*/
1360QDataStream &operator>>(QDataStream &in, QChar &chr)
1361{
1362 quint16 u;
1363 in >> u;
1364 chr.unicode() = ushort(u);
1365 return in;
1366}
1367#endif // QT_NO_DATASTREAM
1368
1369/*!
1370 \fn ushort & QChar::unicode()
1371
1372 Returns a reference to the numeric Unicode value of the QChar.
1373*/
1374
1375/*!
1376 \fn ushort QChar::unicode() const
1377
1378 \overload
1379*/
1380
1381/*****************************************************************************
1382 Documentation of QChar related functions
1383 *****************************************************************************/
1384
1385/*!
1386 \fn bool operator==(QChar c1, QChar c2)
1387
1388 \relates QChar
1389
1390 Returns true if \a c1 and \a c2 are the same Unicode character;
1391 otherwise returns false.
1392*/
1393
1394/*!
1395 \fn int operator!=(QChar c1, QChar c2)
1396
1397 \relates QChar
1398
1399 Returns true if \a c1 and \a c2 are not the same Unicode
1400 character; otherwise returns false.
1401*/
1402
1403/*!
1404 \fn int operator<=(QChar c1, QChar c2)
1405
1406 \relates QChar
1407
1408 Returns true if the numeric Unicode value of \a c1 is less than
1409 or equal to that of \a c2; otherwise returns false.
1410*/
1411
1412/*!
1413 \fn int operator>=(QChar c1, QChar c2)
1414
1415 \relates QChar
1416
1417 Returns true if the numeric Unicode value of \a c1 is greater than
1418 or equal to that of \a c2; otherwise returns false.
1419*/
1420
1421/*!
1422 \fn int operator<(QChar c1, QChar c2)
1423
1424 \relates QChar
1425
1426 Returns true if the numeric Unicode value of \a c1 is less than
1427 that of \a c2; otherwise returns false.
1428*/
1429
1430/*!
1431 \fn int operator>(QChar c1, QChar c2)
1432
1433 \relates QChar
1434
1435 Returns true if the numeric Unicode value of \a c1 is greater than
1436 that of \a c2; otherwise returns false.
1437*/
1438
1439/*!
1440 \fn bool QChar::mirrored() const
1441
1442 Use hasMirrored() instead.
1443*/
1444
1445/*!
1446 \fn QChar QChar::lower() const
1447
1448 Use toLower() instead.
1449*/
1450
1451/*!
1452 \fn QChar QChar::upper() const
1453
1454 Use toUpper() instead.
1455*/
1456
1457/*!
1458 \fn bool QChar::networkOrdered()
1459
1460 See if QSysInfo::ByteOrder == QSysInfo::BigEndian instead.
1461*/
1462
1463
1464// ---------------------------------------------------------------------------
1465
1466
1467static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from)
1468{
1469 unsigned short buffer[3];
1470
1471 QString &s = *str;
1472
1473 const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data());
1474 const unsigned short *uc = utf16 + s.length();
1475 while (uc != utf16 + from) {
1476 uint ucs4 = *(--uc);
1477 if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
1478 ushort high = *(uc - 1);
1479 if (QChar(high).isHighSurrogate()) {
1480 --uc;
1481 ucs4 = QChar::surrogateToUcs4(high, ucs4);
1482 }
1483 }
1484 QChar::UnicodeVersion v = QChar::unicodeVersion(ucs4);
1485 if (v == QChar::Unicode_Unassigned || v > version)
1486 continue;
1487 int length;
1488 int tag;
1489 const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
1490 if (!d || (canonical && tag != QChar::Canonical))
1491 continue;
1492
1493 int pos = uc - utf16;
1494 s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length);
1495 // since the insert invalidates the pointers and we do decomposition recursive
1496 utf16 = reinterpret_cast<unsigned short *>(s.data());
1497 uc = utf16 + pos + length;
1498 }
1499}
1500
1501
1502struct UCS2Pair {
1503 ushort u1;
1504 ushort u2;
1505};
1506
1507inline bool operator<(ushort u1, const UCS2Pair &ligature)
1508{ return u1 < ligature.u1; }
1509inline bool operator<(const UCS2Pair &ligature, ushort u1)
1510{ return ligature.u1 < u1; }
1511
1512static ushort ligatureHelper(ushort u1, ushort u2)
1513{
1514 // hangul L-V pair
1515 int LIndex = u1 - Hangul_LBase;
1516 if (0 <= LIndex && LIndex < Hangul_LCount) {
1517 int VIndex = u2 - Hangul_VBase;
1518 if (0 <= VIndex && VIndex < Hangul_VCount)
1519 return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
1520 }
1521
1522 // hangul LV-T pair
1523 int SIndex = u1 - Hangul_SBase;
1524 if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
1525 int TIndex = u2 - Hangul_TBase;
1526 if (0 <= TIndex && TIndex <= Hangul_TCount)
1527 return u1 + TIndex;
1528 }
1529
1530 const unsigned short index = GET_LIGATURE_INDEX(u2);
1531 if (index == 0xffff)
1532 return 0;
1533 const unsigned short *ligatures = uc_ligature_map+index;
1534 ushort length = *ligatures++;
1535 {
1536 const UCS2Pair *data = reinterpret_cast<const UCS2Pair *>(ligatures);
1537 const UCS2Pair *r = qBinaryFind(data, data + length, u1);
1538 if (r != data + length)
1539 return r->u2;
1540 }
1541
1542 return 0;
1543}
1544
1545static void composeHelper(QString *str, QChar::UnicodeVersion version, int from)
1546{
1547 QString &s = *str;
1548
1549 if (from < 0 || s.length() - from < 2)
1550 return;
1551
1552 // the loop can partly ignore high Unicode as all ligatures are in the BMP
1553 int starter = -2; // to prevent starter == pos - 1
1554 int lastCombining = 255; // to prevent combining > lastCombining
1555 int pos = from;
1556 while (pos < s.length()) {
1557 uint uc = s.at(pos).unicode();
1558 if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
1559 ushort low = s.at(pos+1).unicode();
1560 if (QChar(low).isLowSurrogate()) {
1561 uc = QChar::surrogateToUcs4(uc, low);
1562 ++pos;
1563 }
1564 }
1565 const QUnicodeTables::Properties *p = qGetProp(uc);
1566 if (p->unicodeVersion == QChar::Unicode_Unassigned || p->unicodeVersion > version) {
1567 starter = -1; // to prevent starter == pos - 1
1568 lastCombining = 255; // to prevent combining > lastCombining
1569 ++pos;
1570 continue;
1571 }
1572 int combining = p->combiningClass;
1573 if ((starter == pos - 1 || combining > lastCombining) && starter >= from) {
1574 // allowed to form ligature with S
1575 QChar ligature = ligatureHelper(s.at(starter).unicode(), uc);
1576 if (ligature.unicode()) {
1577 s[starter] = ligature;
1578 s.remove(pos, 1);
1579 continue;
1580 }
1581 }
1582 if (!combining)
1583 starter = pos;
1584 lastCombining = combining;
1585 ++pos;
1586 }
1587}
1588
1589
1590static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from)
1591{
1592 QString &s = *str;
1593 const int l = s.length()-1;
1594 int pos = from;
1595 while (pos < l) {
1596 int p2 = pos+1;
1597 uint u1 = s.at(pos).unicode();
1598 if (QChar(u1).isHighSurrogate()) {
1599 ushort low = s.at(p2).unicode();
1600 if (QChar(low).isLowSurrogate()) {
1601 u1 = QChar::surrogateToUcs4(u1, low);
1602 if (p2 >= l)
1603 break;
1604 ++p2;
1605 }
1606 }
1607 uint u2 = s.at(p2).unicode();
1608 if (QChar(u2).isHighSurrogate() && p2 < l) {
1609 ushort low = s.at(p2+1).unicode();
1610 if (QChar(low).isLowSurrogate()) {
1611 u2 = QChar::surrogateToUcs4(u2, low);
1612 ++p2;
1613 }
1614 }
1615
1616 ushort c2 = 0;
1617 {
1618 const QUnicodeTables::Properties *p = qGetProp(u2);
1619 if (p->unicodeVersion != QChar::Unicode_Unassigned && p->unicodeVersion <= version)
1620 c2 = p->combiningClass;
1621 }
1622 if (c2 == 0) {
1623 pos = p2+1;
1624 continue;
1625 }
1626
1627 ushort c1 = 0;
1628 {
1629 const QUnicodeTables::Properties *p = qGetProp(u1);
1630 if (p->unicodeVersion != QChar::Unicode_Unassigned && p->unicodeVersion <= version)
1631 c1 = p->combiningClass;
1632 }
1633
1634 if (c1 > c2) {
1635 QChar *uc = s.data();
1636 int p = pos;
1637 // exchange characters
1638 if (!QChar::requiresSurrogates(u2)) {
1639 uc[p++] = u2;
1640 } else {
1641 uc[p++] = QChar::highSurrogate(u2);
1642 uc[p++] = QChar::lowSurrogate(u2);
1643 }
1644 if (!QChar::requiresSurrogates(u1)) {
1645 uc[p++] = u1;
1646 } else {
1647 uc[p++] = QChar::highSurrogate(u1);
1648 uc[p++] = QChar::lowSurrogate(u1);
1649 }
1650 if (pos > 0)
1651 --pos;
1652 if (pos > 0 && s.at(pos).isLowSurrogate())
1653 --pos;
1654 } else {
1655 ++pos;
1656 if (QChar::requiresSurrogates(u1))
1657 ++pos;
1658 }
1659 }
1660}
1661
1662QT_END_NAMESPACE
1663