1#include <mbgl/text/bidi.hpp>
2#include <mbgl/util/traits.hpp>
3
4#include <unicode/ubidi.h>
5#include <unicode/ushape.h>
6
7#include <memory>
8#include <stdexcept>
9
10namespace mbgl {
11
12class BiDiImpl {
13public:
14 BiDiImpl() : bidiText(ubidi_open()), bidiLine(ubidi_open()) {
15 }
16 ~BiDiImpl() {
17 ubidi_close(pBiDi: bidiText);
18 ubidi_close(pBiDi: bidiLine);
19 }
20
21 UBiDi* bidiText = nullptr;
22 UBiDi* bidiLine = nullptr;
23};
24
25BiDi::BiDi() : impl(std::make_unique<BiDiImpl>()) {}
26BiDi::~BiDi() = default;
27
28// Takes UTF16 input in logical order and applies Arabic shaping to the input while maintaining
29// logical order. Output won't be intelligible until the bidirectional algorithm is applied
30std::u16string applyArabicShaping(const std::u16string& input) {
31 UErrorCode errorCode = U_ZERO_ERROR;
32
33 const int32_t outputLength =
34 u_shapeArabic(source: mbgl::utf16char_cast<const UChar*>(in: input.c_str()), sourceLength: static_cast<int32_t>(input.size()), dest: nullptr, destSize: 0,
35 options: (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) |
36 (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK),
37 pErrorCode: &errorCode);
38
39 // Pre-flighting will always set U_BUFFER_OVERFLOW_ERROR
40 errorCode = U_ZERO_ERROR;
41
42 std::u16string outputText(outputLength, 0);
43
44 u_shapeArabic(source: mbgl::utf16char_cast<const UChar*>(in: input.c_str()), sourceLength: static_cast<int32_t>(input.size()), dest: mbgl::utf16char_cast<UChar*>(in: &outputText[0]), destSize: outputLength,
45 options: (U_SHAPE_LETTERS_SHAPE & U_SHAPE_LETTERS_MASK) |
46 (U_SHAPE_TEXT_DIRECTION_LOGICAL & U_SHAPE_TEXT_DIRECTION_MASK),
47 pErrorCode: &errorCode);
48
49 // If the algorithm fails for any reason, fall back to non-transformed text
50 if (U_FAILURE(code: errorCode))
51 return input;
52
53 return outputText;
54}
55
56void BiDi::mergeParagraphLineBreaks(std::set<size_t>& lineBreakPoints) {
57 int32_t paragraphCount = ubidi_countParagraphs(pBiDi: impl->bidiText);
58 for (int32_t i = 0; i < paragraphCount; i++) {
59 UErrorCode errorCode = U_ZERO_ERROR;
60 int32_t paragraphEndIndex;
61 ubidi_getParagraphByIndex(pBiDi: impl->bidiText, paraIndex: i, pParaStart: nullptr, pParaLimit: &paragraphEndIndex, pParaLevel: nullptr, pErrorCode: &errorCode);
62
63 if (U_FAILURE(code: errorCode)) {
64 throw std::runtime_error(std::string("ProcessedBiDiText::mergeParagraphLineBreaks: ") +
65 u_errorName(code: errorCode));
66 }
67
68 lineBreakPoints.insert(x: static_cast<std::size_t>(paragraphEndIndex));
69 }
70}
71
72std::vector<std::u16string> BiDi::applyLineBreaking(std::set<std::size_t> lineBreakPoints) {
73 // BiDi::getLine will error if called across a paragraph boundary, so we need to ensure that all
74 // paragraph boundaries are included in the set of line break points. The calling code might not
75 // include the line break because it didn't need to wrap at that point, or because the text was
76 // separated with a more exotic code point such as (U+001C)
77 mergeParagraphLineBreaks(lineBreakPoints);
78
79 std::vector<std::u16string> transformedLines;
80 transformedLines.reserve(n: lineBreakPoints.size());
81
82 std::size_t start = 0;
83 for (std::size_t lineBreakPoint : lineBreakPoints) {
84 transformedLines.push_back(x: getLine(start, end: lineBreakPoint));
85 start = lineBreakPoint;
86 }
87
88 return transformedLines;
89}
90
91std::vector<std::u16string> BiDi::processText(const std::u16string& input,
92 std::set<std::size_t> lineBreakPoints) {
93 UErrorCode errorCode = U_ZERO_ERROR;
94
95 ubidi_setPara(pBiDi: impl->bidiText, text: mbgl::utf16char_cast<const UChar*>(in: input.c_str()), length: static_cast<int32_t>(input.size()),
96 UBIDI_DEFAULT_LTR, embeddingLevels: nullptr, pErrorCode: &errorCode);
97
98 if (U_FAILURE(code: errorCode)) {
99 throw std::runtime_error(std::string("BiDi::processText: ") + u_errorName(code: errorCode));
100 }
101
102 return applyLineBreaking(lineBreakPoints);
103}
104
105std::u16string BiDi::getLine(std::size_t start, std::size_t end) {
106 UErrorCode errorCode = U_ZERO_ERROR;
107 ubidi_setLine(pParaBiDi: impl->bidiText, start: static_cast<int32_t>(start), limit: static_cast<int32_t>(end), pLineBiDi: impl->bidiLine, pErrorCode: &errorCode);
108
109 if (U_FAILURE(code: errorCode)) {
110 throw std::runtime_error(std::string("BiDi::getLine (setLine): ") + u_errorName(code: errorCode));
111 }
112
113 // Because we set UBIDI_REMOVE_BIDI_CONTROLS, the output may be smaller than what we reserve
114 // Setting UBIDI_INSERT_LRM_FOR_NUMERIC would require
115 // ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)
116 const int32_t outputLength = ubidi_getProcessedLength(pBiDi: impl->bidiLine);
117 std::u16string outputText(outputLength, 0);
118
119 // UBIDI_DO_MIRRORING: Apply unicode mirroring of characters like parentheses
120 // UBIDI_REMOVE_BIDI_CONTROLS: Now that all the lines are set, remove control characters so that
121 // they don't show up on screen (some fonts have glyphs representing them)
122 ubidi_writeReordered(pBiDi: impl->bidiLine, dest: mbgl::utf16char_cast<UChar*>(in: &outputText[0]), destSize: outputLength,
123 UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, pErrorCode: &errorCode);
124
125 if (U_FAILURE(code: errorCode)) {
126 throw std::runtime_error(std::string("BiDi::getLine (writeReordered): ") +
127 u_errorName(code: errorCode));
128 }
129
130 return outputText;
131}
132
133} // end namespace mbgl
134

source code of qtlocation/src/3rdparty/mapbox-gl-native/platform/default/bidi.cpp