1/*
2 * Copyright (C) 2015 The Qt Company Ltd
3 *
4 * This is part of HarfBuzz, an OpenType Layout engine library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 */
24
25#include "harfbuzz-shaper.h"
26#include "harfbuzz-shaper-private.h"
27#include "harfbuzz-external.h"
28
29#include <assert.h>
30#include <stdio.h>
31
32#define LIBTHAI_MAJOR 0
33
34/*
35 * if libthai changed please update these codes too.
36 */
37struct thcell_t {
38 unsigned char base; /**< base character */
39 unsigned char hilo; /**< upper/lower vowel/diacritic */
40 unsigned char top; /**< top-level mark */
41};
42typedef int (*th_brk_def) (const unsigned char*, int*, size_t);
43typedef int (*th_render_cell_tis_def) (struct thcell_t cell, unsigned char res[], size_t res_sz, int is_decomp_am);
44typedef int (*th_render_cell_win_def) (struct thcell_t cell, unsigned char res[], size_t res_sz, int is_decomp_am);
45typedef int (*th_render_cell_mac_def) (struct thcell_t cell, unsigned char res[], size_t res_sz, int is_decomp_am);
46typedef size_t (*th_next_cell_def) (const unsigned char *, size_t, struct thcell_t *, int);
47
48/* libthai releated function handles */
49static th_brk_def th_brk = 0;
50static th_next_cell_def th_next_cell = 0;
51static th_render_cell_tis_def th_render_cell_tis = 0;
52static th_render_cell_win_def th_render_cell_win = 0;
53static th_render_cell_mac_def th_render_cell_mac = 0;
54
55static int init_libthai() {
56 static HB_Bool initialized = false;
57 if (!initialized && (!th_brk || !th_next_cell || !th_render_cell_tis || !th_render_cell_win || !th_render_cell_mac)) {
58 th_brk = (th_brk_def) HB_Library_Resolve(library: "thai", version: (int)LIBTHAI_MAJOR, symbol: "th_brk");
59 th_next_cell = (th_next_cell_def)HB_Library_Resolve(library: "thai", LIBTHAI_MAJOR, symbol: "th_next_cell");
60 th_render_cell_tis = (th_render_cell_tis_def) HB_Library_Resolve(library: "thai", version: (int)LIBTHAI_MAJOR, symbol: "th_render_cell_tis");
61 th_render_cell_win = (th_render_cell_win_def) HB_Library_Resolve(library: "thai", version: (int)LIBTHAI_MAJOR, symbol: "th_render_cell_win");
62 th_render_cell_mac = (th_render_cell_mac_def) HB_Library_Resolve(library: "thai", version: (int)LIBTHAI_MAJOR, symbol: "th_render_cell_mac");
63 initialized = true;
64 }
65 if (th_brk && th_next_cell && th_render_cell_tis && th_render_cell_win && th_render_cell_mac)
66 return 1;
67 else
68 return 0;
69}
70
71static void to_tis620(const HB_UChar16 *string, hb_uint32 len, char *cstr)
72{
73 hb_uint32 i;
74 unsigned char *result = (unsigned char *)cstr;
75
76 for (i = 0; i < len; ++i) {
77 if (string[i] <= 0xa0)
78 result[i] = (unsigned char)string[i];
79 else if (string[i] >= 0xe01 && string[i] <= 0xe5b)
80 result[i] = (unsigned char)(string[i] - 0xe00 + 0xa0);
81 else
82 result[i] = (unsigned char)~0; // Same encoding as libthai uses for invalid chars
83 }
84
85 result[len] = 0;
86}
87
88/*
89 * ---------------------------------------------------------------------------
90 * Thai Shaper / Attributes
91 * ---------------------------------------------------------------------------
92 */
93
94/*
95 * USe basic_features prepare for future adding.
96 */
97#ifndef NO_OPENTYPE
98static const HB_OpenTypeFeature thai_features[] = {
99 { HB_MAKE_TAG('c', 'c', 'm', 'p'), .property: CcmpProperty },
100 { HB_MAKE_TAG('l', 'i', 'g', 'a'), .property: CcmpProperty },
101 { HB_MAKE_TAG('c', 'l', 'i', 'g'), .property: CcmpProperty },
102 {.tag: 0, .property: 0}
103};
104#endif
105
106/* TIS-to-Unicode glyph maps for characters 0x80-0xff */
107static int tis620_0[128] = {
108 /**/ 0, 0, 0, 0, 0, 0, 0, 0,
109 /**/ 0, 0, 0, 0, 0, 0, 0, 0,
110 /**/ 0, 0, 0, 0, 0, 0, 0, 0,
111 /**/ 0, 0, 0, 0, 0, 0, 0, 0,
112 0x0020, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
113 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
114 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
115 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
116 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
117 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
118 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
119 0x0e38, 0x0e39, 0x0e3a, 0, 0, 0, 0, 0x0e3f,
120 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
121 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
122 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
123 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0, 0, 0, 0
124};
125
126static int tis620_1[128] = {
127 0xf89e, 0, 0, 0xf88c, 0xf88f, 0xf892, 0xf895, 0xf898,
128 0xf88b, 0xf88e, 0xf891, 0xf894, 0xf897, 0, 0, 0xf899,
129 0xf89a, 0, 0xf884, 0xf889, 0xf885, 0xf886, 0xf887, 0xf888,
130 0xf88a, 0xf88d, 0xf890, 0xf893, 0xf896, 0, 0, 0,
131 /**/ 0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
132 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
133 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
134 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
135 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
136 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
137 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
138 0x0e38, 0x0e39, 0x0e3a, 0, 0, 0, 0, 0x0e3f,
139 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
140 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0, 0x0e4f,
141 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
142 0x0e58, 0x0e59, 0, 0, 0xf89b, 0xf89c, 0xf89d, 0
143};
144
145static int tis620_2[128] = {
146 0xf700, 0xf701, 0xf702, 0xf703, 0xf704, 0x2026, 0xf705, 0xf706,
147 0xf707, 0xf708, 0xf709, 0xf70a, 0xf70b, 0xf70c, 0xf70d, 0xf70e,
148 0xf70f, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
149 0xf710, 0xf711, 0xf712, 0xf713, 0xf714, 0xf715, 0xf716, 0xf717,
150 0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
151 0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
152 0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
153 0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
154 0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
155 0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
156 0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
157 0x0e38, 0x0e39, 0x0e3a, 0, 0, 0, 0, 0x0e3f,
158 0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
159 0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
160 0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
161 0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0xf718, 0xf719, 0xf71a, 0
162};
163
164enum ThaiFontType {
165 TIS,
166 WIN,
167 MAC
168};
169
170static int thai_get_glyph_index (ThaiFontType font_type, unsigned char c)
171{
172 switch (font_type){
173 case TIS: return (c & 0x80) ? tis620_0[c & 0x7f] : c;
174 case WIN: return (c & 0x80) ? tis620_1[c & 0x7f] : c;
175 case MAC: return (c & 0x80) ? tis620_2[c & 0x7f] : c;
176 default: return 0;
177 }
178}
179
180static int thai_contain_glyphs (HB_ShaperItem *shaper_item, const int glyph_map[128])
181{
182 unsigned char c;
183
184 for (c = 0; c < 0x80; c++) {
185 if ( glyph_map[c] ) {
186 if ( !shaper_item->font->klass->canRender (shaper_item->font, (const HB_UChar16 *) &glyph_map[c], 1) )
187 return 0;
188 }
189 }
190 return 1;
191}
192
193static ThaiFontType getThaiFontType(HB_ShaperItem *shaper_item)
194{
195 if ( thai_contain_glyphs (shaper_item, glyph_map: tis620_2) )
196 return MAC;
197 else if ( thai_contain_glyphs (shaper_item, glyph_map: tis620_1) )
198 return WIN;
199 else
200 return TIS;
201}
202
203/*
204 * convert to the correct display level of THAI vowels and marks.
205 */
206static HB_Bool HB_ThaiConvertStringToGlyphIndices (HB_ShaperItem *item)
207{
208 char s[128];
209 char *cstr = s;
210 const HB_UChar16 *string = item->string + item->item.pos;
211 const hb_uint32 len = item->item.length;
212 unsigned short *logClusters = item->log_clusters;
213 hb_uint32 i = 0, slen = 0;
214
215 if (!init_libthai())
216 return HB_BasicShape (shaper_item: item);
217
218 if (len >= 128)
219 cstr = (char *)malloc(size: len*sizeof(char) + 1);
220
221 if (!cstr)
222 return HB_BasicShape (shaper_item: item);
223
224 to_tis620(string, len, cstr);
225
226 /* Get font type */
227 static ThaiFontType font_type;
228 static HB_Font itemFont;
229 if (itemFont != item->font) {
230 font_type = getThaiFontType (shaper_item: item);
231 itemFont = item->font;
232 }
233
234 /* allocate temporary glyphs buffers */
235 HB_STACKARRAY (HB_UChar16, glyphString, (item->item.length * 2));
236
237 while (i < item->item.length) {
238 struct thcell_t tis_cell;
239 unsigned char rglyphs[4];
240 int cell_length;
241 int lgn = 0;
242 HB_Bool haveSaraAm = false;
243
244 cell_length = (int)(th_next_cell ((const unsigned char *)cstr + i, len - i, &tis_cell, true)); /* !item->fixedPitch); */
245 haveSaraAm = (cstr[i + cell_length - 1] == (char)0xd3);
246
247 /* set shaper item's log_clusters */
248 logClusters[i] = slen;
249 for (int j = 1; j < cell_length; j++) {
250 logClusters[i + j] = logClusters[i];
251 }
252
253 /* Find Logical Glyphs by font type */
254 switch (font_type) {
255 case TIS: lgn = th_render_cell_tis (tis_cell, rglyphs, sizeof(rglyphs) / sizeof(rglyphs[0]), true); break;
256 case WIN: lgn = th_render_cell_mac (tis_cell, rglyphs, sizeof(rglyphs) / sizeof(rglyphs[0]), true); break;
257 case MAC: lgn = th_render_cell_win (tis_cell, rglyphs, sizeof(rglyphs) / sizeof(rglyphs[0]), true); break;
258 }
259
260 /* Add glyphs to glyphs string and setting some attributes */
261 for (int lgi = 0; lgi < lgn; lgi++) {
262 if ( rglyphs[lgi] == 0xdd/*TH_BLANK_BASE_GLYPH*/ ) {
263 glyphString[slen++] = C_DOTTED_CIRCLE;
264 } else if ((unsigned char)cstr[i] == (unsigned char)~0) {
265 // The only glyphs that should be passed to this function that cannot be mapped to
266 // tis620 are the ones of type Inherited class. Pass these glyphs untouched.
267 glyphString[slen++] = string[i];
268 if (string[i] == 0x200D || string[i] == 0x200C) {
269 // Check that we do not run out of bounds when setting item->attributes. If we do
270 // run out of bounds then this function will return false, the necessary amount of
271 // memory is reallocated, and this function will then be called again.
272 if (slen <= item->num_glyphs)
273 item->attributes[slen-1].dontPrint = true; // Hide ZWJ and ZWNJ characters
274 }
275 } else {
276 glyphString[slen++] = (HB_UChar16) thai_get_glyph_index (font_type, c: rglyphs[lgi]);
277 }
278 }
279
280 /* Special case to handle U+0E33 (SARA AM): SARA AM is normally written at the end of a
281 * word with a base character and an optional top character before it. For example, U+0E0B
282 * (base), U+0E49 (top), U+0E33 (SARA AM). The sequence should be converted to 4 glyphs:
283 * base, hilo (the little circle in the top left part of SARA AM, NIKHAHIT), top, then the
284 * right part of SARA AM (SARA AA).
285 *
286 * The painting process finds out the starting glyph and ending glyph of a character
287 * sequence by checking the logClusters array. In this case, logClusters array should
288 * ideally be [ 0, 1, 3 ] so that glyphsStart = 0 and glyphsEnd = 3 (slen - 1) to paint out
289 * all the glyphs generated.
290 *
291 * A special case in this special case is when we have no base character. When an isolated
292 * SARA AM is processed (cell_length = 1), libthai will produce 3 glyphs: dotted circle
293 * (indicates that the base is empty), NIKHAHIT then SARA AA. If logClusters[0] = 1, it will
294 * paint from the second glyph in the glyphs array. So in this case logClusters[0] should
295 * point to the first glyph it produces, aka. the dotted circle. */
296 if (haveSaraAm) {
297 logClusters[i + cell_length - 1] = cell_length == 1 ? slen - 3 : slen - 1;
298 if (tis_cell.top != 0) {
299 if (cell_length > 1) {
300 /* set the logClusters[top character] to slen - 2 as it points to the second to
301 * lastglyph (slen - 2) */
302 logClusters[i + cell_length - 2] = slen - 2;
303 }
304 }
305 /* check for overflow */
306 if (logClusters[i + cell_length - 1] > slen)
307 logClusters[i + cell_length - 1] = 0;
308 }
309
310 i += cell_length;
311 }
312 glyphString[slen] = (HB_UChar16) '\0';
313
314 /* for check, should reallocate space or not */
315 HB_Bool spaceOK = (item->num_glyphs >= slen);
316
317 /* Convert to Glyph indices */
318 HB_Bool haveGlyphs = item->font->klass->convertStringToGlyphIndices (
319 item->font,
320 glyphString, slen,
321 item->glyphs, &item->num_glyphs,
322 item->shaperFlags);
323
324 HB_FREE_STACKARRAY (glyphString);
325
326 if (len >= 128)
327 free(ptr: cstr);
328
329 return (haveGlyphs && spaceOK);
330}
331
332/*
333 * set the glyph attributes heuristically.
334 */
335static void HB_ThaiHeuristicSetGlyphAttributes (HB_ShaperItem *item)
336{
337 /* Set Glyph Attributes */
338 hb_uint32 iCluster = 0;
339 hb_uint32 length = item->item.length;
340 while (iCluster < length) {
341 int cluster_start = item->log_clusters[iCluster];
342 ++iCluster;
343 while (iCluster < length && item->log_clusters[iCluster] == cluster_start) {
344 ++iCluster;
345 }
346 int cluster_end = (iCluster < length) ? item->log_clusters[iCluster] : item->num_glyphs;
347 item->attributes[cluster_start].clusterStart = true;
348 for (int i = cluster_start + 1; i < cluster_end; i++) {
349 item->attributes[i].clusterStart = false;
350 }
351 }
352}
353
354/*
355 * THAI Shaping.
356 */
357HB_Bool HB_ThaiShape (HB_ShaperItem *shaper_item)
358{
359 if ( !HB_ThaiConvertStringToGlyphIndices (item: shaper_item) )
360 return false;
361
362 HB_ThaiHeuristicSetGlyphAttributes (item: shaper_item);
363
364#ifndef NO_OPENTYPE
365 const int availableGlyphs = shaper_item->num_glyphs;
366 if ( HB_SelectScript (shaper_item, features: thai_features) ) {
367 HB_OpenTypeShape (item: shaper_item, /*properties*/properties: 0);
368 return HB_OpenTypePosition (item: shaper_item, availableGlyphs, /*doLogClusters*/doLogClusters: true);
369 }
370#endif
371
372 HB_HeuristicPosition (item: shaper_item);
373 return true;
374}
375
376/*
377 * Thai Attributes: computes Word Break, Word Boundary and Char stop for THAI.
378 */
379static void HB_ThaiAssignAttributes(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes)
380{
381 char s[128];
382 char *cstr = s;
383 int *break_positions = 0;
384 int brp[128];
385 int brp_size = 0;
386 hb_uint32 numbreaks, i, j, cell_length;
387 struct thcell_t tis_cell;
388
389 if (!init_libthai())
390 return ;
391
392 if (len >= 128)
393 cstr = (char *)malloc(size: len*sizeof(char) + 1);
394
395 to_tis620(string, len, cstr);
396
397 for (i = 0; i < len; ++i) {
398 attributes[i].wordBreak = FALSE;
399 attributes[i].wordStart = FALSE;
400 attributes[i].wordEnd = FALSE;
401 attributes[i].lineBreak = FALSE;
402 }
403
404 if (len > 128) {
405 break_positions = (int*) malloc (size: sizeof(int) * len);
406 memset (s: break_positions, c: 0, n: sizeof(int) * len);
407 brp_size = len;
408 }
409 else {
410 break_positions = brp;
411 brp_size = 128;
412 }
413
414 if (break_positions) {
415 attributes[0].wordBreak = TRUE;
416 attributes[0].wordStart = TRUE;
417 attributes[0].wordEnd = FALSE;
418 numbreaks = th_brk((const unsigned char *)cstr, break_positions, brp_size);
419 for (i = 0; i < numbreaks; ++i) {
420 attributes[break_positions[i]].wordBreak = TRUE;
421 attributes[break_positions[i]].wordStart = TRUE;
422 attributes[break_positions[i]].wordEnd = TRUE;
423 attributes[break_positions[i]].lineBreak = TRUE;
424 }
425 if (numbreaks > 0)
426 attributes[break_positions[numbreaks - 1]].wordStart = FALSE;
427
428 if (break_positions != brp)
429 free(ptr: break_positions);
430 }
431
432 /* manage grapheme boundaries */
433 i = 0;
434 while (i < len) {
435 cell_length = (hb_uint32)(th_next_cell((const unsigned char *)cstr + i, len - i, &tis_cell, true));
436
437 attributes[i].graphemeBoundary = true;
438 for (j = 1; j < cell_length; j++)
439 attributes[i + j].graphemeBoundary = false;
440
441 /* Set graphemeBoundary for SARA AM */
442 if (cstr[i + cell_length - 1] == (char)0xd3)
443 attributes[i + cell_length - 1].graphemeBoundary = true;
444
445 i += cell_length;
446 }
447
448 if (len >= 128)
449 free(ptr: cstr);
450}
451
452void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
453{
454 assert(script == HB_Script_Thai);
455 const HB_UChar16 *uc = text + from;
456 attributes += from;
457 HB_UNUSED(script);
458 HB_ThaiAssignAttributes(string: uc, len, attributes);
459}
460
461

source code of qtbase/src/3rdparty/harfbuzz/src/harfbuzz-thai.c