1 | /* |
2 | * Copyright © 2011,2012,2013 Google, Inc. |
3 | * |
4 | * This is part of HarfBuzz, a text shaping library. |
5 | * |
6 | * Permission is hereby granted, without written agreement and without |
7 | * license or royalty fees, to use, copy, modify, and distribute this |
8 | * software and its documentation for any purpose, provided that the |
9 | * above copyright notice and the following two paragraphs appear in |
10 | * all copies of this software. |
11 | * |
12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | * DAMAGE. |
17 | * |
18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | * |
24 | * Google Author(s): Behdad Esfahbod |
25 | */ |
26 | |
27 | #include "hb-ot-shape-complex-indic-private.hh" |
28 | |
29 | /* buffer var allocations */ |
30 | #define myanmar_category() complex_var_u8_0() /* myanmar_category_t */ |
31 | #define myanmar_position() complex_var_u8_1() /* myanmar_position_t */ |
32 | |
33 | |
34 | /* |
35 | * Myanmar shaper. |
36 | */ |
37 | |
38 | static const hb_tag_t |
39 | basic_features[] = |
40 | { |
41 | /* |
42 | * Basic features. |
43 | * These features are applied in order, one at a time, after initial_reordering. |
44 | */ |
45 | HB_TAG('r','p','h','f'), |
46 | HB_TAG('p','r','e','f'), |
47 | HB_TAG('b','l','w','f'), |
48 | HB_TAG('p','s','t','f'), |
49 | }; |
50 | static const hb_tag_t |
51 | other_features[] = |
52 | { |
53 | /* |
54 | * Other features. |
55 | * These features are applied all at once, after final_reordering. |
56 | */ |
57 | HB_TAG('p','r','e','s'), |
58 | HB_TAG('a','b','v','s'), |
59 | HB_TAG('b','l','w','s'), |
60 | HB_TAG('p','s','t','s'), |
61 | /* Positioning features, though we don't care about the types. */ |
62 | HB_TAG('d','i','s','t'), |
63 | /* Pre-release version of Windows 8 Myanmar font had abvm,blwm |
64 | * features. The released Windows 8 version of the font (as well |
65 | * as the released spec) used 'mark' instead. The Windows 8 |
66 | * shaper however didn't apply 'mark' but did apply 'mkmk'. |
67 | * Perhaps it applied abvm/blwm. This was fixed in a Windows 8 |
68 | * update, so now it applies mark/mkmk. We are guessing that |
69 | * it still applies abvm/blwm too. |
70 | */ |
71 | HB_TAG('a','b','v','m'), |
72 | HB_TAG('b','l','w','m'), |
73 | }; |
74 | |
75 | static void |
76 | setup_syllables (const hb_ot_shape_plan_t *plan, |
77 | hb_font_t *font, |
78 | hb_buffer_t *buffer); |
79 | static void |
80 | initial_reordering (const hb_ot_shape_plan_t *plan, |
81 | hb_font_t *font, |
82 | hb_buffer_t *buffer); |
83 | static void |
84 | final_reordering (const hb_ot_shape_plan_t *plan, |
85 | hb_font_t *font, |
86 | hb_buffer_t *buffer); |
87 | |
88 | static void |
89 | collect_features_myanmar (hb_ot_shape_planner_t *plan) |
90 | { |
91 | hb_ot_map_builder_t *map = &plan->map; |
92 | |
93 | /* Do this before any lookups have been applied. */ |
94 | map->add_gsub_pause (pause_func: setup_syllables); |
95 | |
96 | map->add_global_bool_feature (HB_TAG('l','o','c','l')); |
97 | /* The Indic specs do not require ccmp, but we apply it here since if |
98 | * there is a use of it, it's typically at the beginning. */ |
99 | map->add_global_bool_feature (HB_TAG('c','c','m','p')); |
100 | |
101 | |
102 | map->add_gsub_pause (pause_func: initial_reordering); |
103 | for (unsigned int i = 0; i < ARRAY_LENGTH (basic_features); i++) |
104 | { |
105 | map->add_feature (tag: basic_features[i], value: 1, flags: F_GLOBAL | F_MANUAL_ZWJ); |
106 | map->add_gsub_pause (pause_func: nullptr); |
107 | } |
108 | map->add_gsub_pause (pause_func: final_reordering); |
109 | for (unsigned int i = 0; i < ARRAY_LENGTH (other_features); i++) |
110 | map->add_feature (tag: other_features[i], value: 1, flags: F_GLOBAL | F_MANUAL_ZWJ); |
111 | } |
112 | |
113 | static void |
114 | override_features_myanmar (hb_ot_shape_planner_t *plan) |
115 | { |
116 | plan->map.add_feature (HB_TAG('l','i','g','a'), value: 0, flags: F_GLOBAL); |
117 | } |
118 | |
119 | |
120 | enum syllable_type_t { |
121 | consonant_syllable, |
122 | punctuation_cluster, |
123 | broken_cluster, |
124 | non_myanmar_cluster, |
125 | }; |
126 | |
127 | #include "hb-ot-shape-complex-myanmar-machine.hh" |
128 | |
129 | |
130 | /* Note: This enum is duplicated in the -machine.rl source file. |
131 | * Not sure how to avoid duplication. */ |
132 | enum myanmar_category_t { |
133 | OT_As = 18, /* Asat */ |
134 | OT_D0 = 20, /* Digit zero */ |
135 | OT_DB = OT_N, /* Dot below */ |
136 | OT_GB = OT_PLACEHOLDER, |
137 | OT_MH = 21, /* Various consonant medial types */ |
138 | OT_MR = 22, /* Various consonant medial types */ |
139 | OT_MW = 23, /* Various consonant medial types */ |
140 | OT_MY = 24, /* Various consonant medial types */ |
141 | OT_PT = 25, /* Pwo and other tones */ |
142 | OT_VAbv = 26, |
143 | OT_VBlw = 27, |
144 | OT_VPre = 28, |
145 | OT_VPst = 29, |
146 | OT_VS = 30, /* Variation selectors */ |
147 | OT_P = 31, /* Punctuation */ |
148 | OT_D = 32, /* Digits except zero */ |
149 | }; |
150 | |
151 | |
152 | static inline bool |
153 | is_one_of (const hb_glyph_info_t &info, unsigned int flags) |
154 | { |
155 | /* If it ligated, all bets are off. */ |
156 | if (_hb_glyph_info_ligated (info: &info)) return false; |
157 | return !!(FLAG_UNSAFE (info.myanmar_category()) & flags); |
158 | } |
159 | |
160 | static inline bool |
161 | is_consonant (const hb_glyph_info_t &info) |
162 | { |
163 | return is_one_of (info, CONSONANT_FLAGS); |
164 | } |
165 | |
166 | |
167 | static inline void |
168 | set_myanmar_properties (hb_glyph_info_t &info) |
169 | { |
170 | hb_codepoint_t u = info.codepoint; |
171 | unsigned int type = hb_indic_get_categories (u); |
172 | indic_category_t cat = (indic_category_t) (type & 0x7Fu); |
173 | indic_position_t pos = (indic_position_t) (type >> 8); |
174 | |
175 | /* Myanmar |
176 | * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm#analyze |
177 | */ |
178 | if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xFE00u, 0xFE0Fu))) |
179 | cat = (indic_category_t) OT_VS; |
180 | |
181 | switch (u) |
182 | { |
183 | case 0x104Eu: |
184 | cat = (indic_category_t) OT_C; /* The spec says C, IndicSyllableCategory doesn't have. */ |
185 | break; |
186 | |
187 | case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u: |
188 | case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u: |
189 | case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu: |
190 | case 0x25FEu: |
191 | cat = (indic_category_t) OT_GB; |
192 | break; |
193 | |
194 | case 0x1004u: case 0x101Bu: case 0x105Au: |
195 | cat = (indic_category_t) OT_Ra; |
196 | break; |
197 | |
198 | case 0x1032u: case 0x1036u: |
199 | cat = (indic_category_t) OT_A; |
200 | break; |
201 | |
202 | case 0x1039u: |
203 | cat = (indic_category_t) OT_H; |
204 | break; |
205 | |
206 | case 0x103Au: |
207 | cat = (indic_category_t) OT_As; |
208 | break; |
209 | |
210 | case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u: |
211 | case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u: |
212 | case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u: |
213 | case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u: |
214 | case 0x1097u: case 0x1098u: case 0x1099u: |
215 | cat = (indic_category_t) OT_D; |
216 | break; |
217 | |
218 | case 0x1040u: |
219 | cat = (indic_category_t) OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */ |
220 | break; |
221 | |
222 | case 0x103Eu: case 0x1060u: |
223 | cat = (indic_category_t) OT_MH; |
224 | break; |
225 | |
226 | case 0x103Cu: |
227 | cat = (indic_category_t) OT_MR; |
228 | break; |
229 | |
230 | case 0x103Du: case 0x1082u: |
231 | cat = (indic_category_t) OT_MW; |
232 | break; |
233 | |
234 | case 0x103Bu: case 0x105Eu: case 0x105Fu: |
235 | cat = (indic_category_t) OT_MY; |
236 | break; |
237 | |
238 | case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au: |
239 | case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu: |
240 | cat = (indic_category_t) OT_PT; |
241 | break; |
242 | |
243 | case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u: |
244 | case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du: |
245 | case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu: |
246 | cat = (indic_category_t) OT_SM; |
247 | break; |
248 | |
249 | case 0x104Au: case 0x104Bu: |
250 | cat = (indic_category_t) OT_P; |
251 | break; |
252 | |
253 | case 0xAA74u: case 0xAA75u: case 0xAA76u: |
254 | /* https://github.com/roozbehp/unicode-data/issues/3 */ |
255 | cat = (indic_category_t) OT_C; |
256 | break; |
257 | } |
258 | |
259 | if (cat == OT_M) |
260 | { |
261 | switch ((int) pos) |
262 | { |
263 | case POS_PRE_C: cat = (indic_category_t) OT_VPre; |
264 | pos = POS_PRE_M; break; |
265 | case POS_ABOVE_C: cat = (indic_category_t) OT_VAbv; break; |
266 | case POS_BELOW_C: cat = (indic_category_t) OT_VBlw; break; |
267 | case POS_POST_C: cat = (indic_category_t) OT_VPst; break; |
268 | } |
269 | } |
270 | |
271 | info.myanmar_category() = (myanmar_category_t) cat; |
272 | info.myanmar_position() = pos; |
273 | } |
274 | |
275 | |
276 | |
277 | static void |
278 | setup_masks_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED, |
279 | hb_buffer_t *buffer, |
280 | hb_font_t *font HB_UNUSED) |
281 | { |
282 | HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_category); |
283 | HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_position); |
284 | |
285 | /* We cannot setup masks here. We save information about characters |
286 | * and setup masks later on in a pause-callback. */ |
287 | |
288 | unsigned int count = buffer->len; |
289 | hb_glyph_info_t *info = buffer->info; |
290 | for (unsigned int i = 0; i < count; i++) |
291 | set_myanmar_properties (info[i]); |
292 | } |
293 | |
294 | static void |
295 | setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED, |
296 | hb_font_t *font HB_UNUSED, |
297 | hb_buffer_t *buffer) |
298 | { |
299 | find_syllables (buffer); |
300 | foreach_syllable (buffer, start, end) |
301 | buffer->unsafe_to_break (start, end); |
302 | } |
303 | |
304 | static int |
305 | compare_myanmar_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) |
306 | { |
307 | int a = pa->myanmar_position(); |
308 | int b = pb->myanmar_position(); |
309 | |
310 | return a < b ? -1 : a == b ? 0 : +1; |
311 | } |
312 | |
313 | |
314 | /* Rules from: |
315 | * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm */ |
316 | |
317 | static void |
318 | initial_reordering_consonant_syllable (hb_buffer_t *buffer, |
319 | unsigned int start, unsigned int end) |
320 | { |
321 | hb_glyph_info_t *info = buffer->info; |
322 | |
323 | unsigned int base = end; |
324 | bool has_reph = false; |
325 | |
326 | { |
327 | unsigned int limit = start; |
328 | if (start + 3 <= end && |
329 | info[start ].myanmar_category() == OT_Ra && |
330 | info[start+1].myanmar_category() == OT_As && |
331 | info[start+2].myanmar_category() == OT_H) |
332 | { |
333 | limit += 3; |
334 | base = start; |
335 | has_reph = true; |
336 | } |
337 | |
338 | { |
339 | if (!has_reph) |
340 | base = limit; |
341 | |
342 | for (unsigned int i = limit; i < end; i++) |
343 | if (is_consonant (info: info[i])) |
344 | { |
345 | base = i; |
346 | break; |
347 | } |
348 | } |
349 | } |
350 | |
351 | /* Reorder! */ |
352 | { |
353 | unsigned int i = start; |
354 | for (; i < start + (has_reph ? 3 : 0); i++) |
355 | info[i].myanmar_position() = POS_AFTER_MAIN; |
356 | for (; i < base; i++) |
357 | info[i].myanmar_position() = POS_PRE_C; |
358 | if (i < end) |
359 | { |
360 | info[i].myanmar_position() = POS_BASE_C; |
361 | i++; |
362 | } |
363 | indic_position_t pos = POS_AFTER_MAIN; |
364 | /* The following loop may be ugly, but it implements all of |
365 | * Myanmar reordering! */ |
366 | for (; i < end; i++) |
367 | { |
368 | if (info[i].myanmar_category() == OT_MR) /* Pre-base reordering */ |
369 | { |
370 | info[i].myanmar_position() = POS_PRE_C; |
371 | continue; |
372 | } |
373 | if (info[i].myanmar_position() < POS_BASE_C) /* Left matra */ |
374 | { |
375 | continue; |
376 | } |
377 | |
378 | if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == OT_VBlw) |
379 | { |
380 | pos = POS_BELOW_C; |
381 | info[i].myanmar_position() = pos; |
382 | continue; |
383 | } |
384 | |
385 | if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_A) |
386 | { |
387 | info[i].myanmar_position() = POS_BEFORE_SUB; |
388 | continue; |
389 | } |
390 | if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_VBlw) |
391 | { |
392 | info[i].myanmar_position() = pos; |
393 | continue; |
394 | } |
395 | if (pos == POS_BELOW_C && info[i].myanmar_category() != OT_A) |
396 | { |
397 | pos = POS_AFTER_SUB; |
398 | info[i].myanmar_position() = pos; |
399 | continue; |
400 | } |
401 | info[i].myanmar_position() = pos; |
402 | } |
403 | } |
404 | |
405 | /* Sit tight, rock 'n roll! */ |
406 | buffer->sort (start, end, compar: compare_myanmar_order); |
407 | } |
408 | |
409 | static void |
410 | initial_reordering_syllable (const hb_ot_shape_plan_t *plan, |
411 | hb_face_t *face, |
412 | hb_buffer_t *buffer, |
413 | unsigned int start, unsigned int end) |
414 | { |
415 | syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F); |
416 | switch (syllable_type) { |
417 | |
418 | case broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */ |
419 | case consonant_syllable: |
420 | initial_reordering_consonant_syllable (buffer, start, end); |
421 | break; |
422 | |
423 | case punctuation_cluster: |
424 | case non_myanmar_cluster: |
425 | break; |
426 | } |
427 | } |
428 | |
429 | static inline void |
430 | insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED, |
431 | hb_font_t *font, |
432 | hb_buffer_t *buffer) |
433 | { |
434 | /* Note: This loop is extra overhead, but should not be measurable. */ |
435 | bool has_broken_syllables = false; |
436 | unsigned int count = buffer->len; |
437 | hb_glyph_info_t *info = buffer->info; |
438 | for (unsigned int i = 0; i < count; i++) |
439 | if ((info[i].syllable() & 0x0F) == broken_cluster) |
440 | { |
441 | has_broken_syllables = true; |
442 | break; |
443 | } |
444 | if (likely (!has_broken_syllables)) |
445 | return; |
446 | |
447 | |
448 | hb_codepoint_t dottedcircle_glyph; |
449 | if (!font->get_nominal_glyph (unicode: 0x25CCu, glyph: &dottedcircle_glyph)) |
450 | return; |
451 | |
452 | hb_glyph_info_t dottedcircle = {.codepoint: 0}; |
453 | dottedcircle.codepoint = 0x25CCu; |
454 | set_myanmar_properties (dottedcircle); |
455 | dottedcircle.codepoint = dottedcircle_glyph; |
456 | |
457 | buffer->clear_output (); |
458 | |
459 | buffer->idx = 0; |
460 | unsigned int last_syllable = 0; |
461 | while (buffer->idx < buffer->len && !buffer->in_error) |
462 | { |
463 | unsigned int syllable = buffer->cur().syllable(); |
464 | syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F); |
465 | if (unlikely (last_syllable != syllable && syllable_type == broken_cluster)) |
466 | { |
467 | last_syllable = syllable; |
468 | |
469 | hb_glyph_info_t ginfo = dottedcircle; |
470 | ginfo.cluster = buffer->cur().cluster; |
471 | ginfo.mask = buffer->cur().mask; |
472 | ginfo.syllable() = buffer->cur().syllable(); |
473 | |
474 | buffer->output_info (glyph_info: ginfo); |
475 | } |
476 | else |
477 | buffer->next_glyph (); |
478 | } |
479 | |
480 | buffer->swap_buffers (); |
481 | } |
482 | |
483 | static void |
484 | initial_reordering (const hb_ot_shape_plan_t *plan, |
485 | hb_font_t *font, |
486 | hb_buffer_t *buffer) |
487 | { |
488 | insert_dotted_circles (plan, font, buffer); |
489 | |
490 | foreach_syllable (buffer, start, end) |
491 | initial_reordering_syllable (plan, face: font->face, buffer, start, end); |
492 | } |
493 | |
494 | static void |
495 | final_reordering (const hb_ot_shape_plan_t *plan, |
496 | hb_font_t *font HB_UNUSED, |
497 | hb_buffer_t *buffer) |
498 | { |
499 | hb_glyph_info_t *info = buffer->info; |
500 | unsigned int count = buffer->len; |
501 | |
502 | /* Zero syllables now... */ |
503 | for (unsigned int i = 0; i < count; i++) |
504 | info[i].syllable() = 0; |
505 | |
506 | HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_category); |
507 | HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_position); |
508 | } |
509 | |
510 | |
511 | /* Uniscribe seems to have a shaper for 'mymr' that is like the |
512 | * generic shaper, except that it zeros mark advances GDEF_LATE. */ |
513 | const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar_old = |
514 | { |
515 | .collect_features: nullptr, /* collect_features */ |
516 | .override_features: nullptr, /* override_features */ |
517 | .data_create: nullptr, /* data_create */ |
518 | .data_destroy: nullptr, /* data_destroy */ |
519 | .preprocess_text: nullptr, /* preprocess_text */ |
520 | .postprocess_glyphs: nullptr, /* postprocess_glyphs */ |
521 | .normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, |
522 | .decompose: nullptr, /* decompose */ |
523 | .compose: nullptr, /* compose */ |
524 | .setup_masks: nullptr, /* setup_masks */ |
525 | .disable_otl: nullptr, /* disable_otl */ |
526 | .reorder_marks: nullptr, /* reorder_marks */ |
527 | .zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, |
528 | .fallback_position: true, /* fallback_position */ |
529 | }; |
530 | |
531 | const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar = |
532 | { |
533 | .collect_features: collect_features_myanmar, |
534 | .override_features: override_features_myanmar, |
535 | .data_create: nullptr, /* data_create */ |
536 | .data_destroy: nullptr, /* data_destroy */ |
537 | .preprocess_text: nullptr, /* preprocess_text */ |
538 | .postprocess_glyphs: nullptr, /* postprocess_glyphs */ |
539 | .normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, |
540 | .decompose: nullptr, /* decompose */ |
541 | .compose: nullptr, /* compose */ |
542 | .setup_masks: setup_masks_myanmar, |
543 | .disable_otl: nullptr, /* disable_otl */ |
544 | .reorder_marks: nullptr, /* reorder_marks */ |
545 | .zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY, |
546 | .fallback_position: false, /* fallback_position */ |
547 | }; |
548 | |