1 | /* |
2 | * Copyright © 2011,2012 Google, Inc. |
3 | * |
4 | * This is part of HarfBuzz, a text shaping library. |
5 | * |
6 | * Permission is hereby granted, without written agreement and without |
7 | * license or royalty fees, to use, copy, modify, and distribute this |
8 | * software and its documentation for any purpose, provided that the |
9 | * above copyright notice and the following two paragraphs appear in |
10 | * all copies of this software. |
11 | * |
12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | * DAMAGE. |
17 | * |
18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | * |
24 | * Google Author(s): Behdad Esfahbod |
25 | */ |
26 | |
27 | #include "hb-ot-shape-complex-indic-private.hh" |
28 | #include "hb-ot-layout-private.hh" |
29 | |
30 | /* buffer var allocations */ |
31 | #define indic_category() complex_var_u8_0() /* indic_category_t */ |
32 | #define indic_position() complex_var_u8_1() /* indic_position_t */ |
33 | |
34 | |
35 | /* |
36 | * Indic shaper. |
37 | */ |
38 | |
39 | |
40 | #define IN_HALF_BLOCK(u, Base) (((u) & ~0x7Fu) == (Base)) |
41 | |
42 | #define IS_DEVA(u) (IN_HALF_BLOCK (u, 0x0900u)) |
43 | #define IS_BENG(u) (IN_HALF_BLOCK (u, 0x0980u)) |
44 | #define IS_GURU(u) (IN_HALF_BLOCK (u, 0x0A00u)) |
45 | #define IS_GUJR(u) (IN_HALF_BLOCK (u, 0x0A80u)) |
46 | #define IS_ORYA(u) (IN_HALF_BLOCK (u, 0x0B00u)) |
47 | #define IS_TAML(u) (IN_HALF_BLOCK (u, 0x0B80u)) |
48 | #define IS_TELU(u) (IN_HALF_BLOCK (u, 0x0C00u)) |
49 | #define IS_KNDA(u) (IN_HALF_BLOCK (u, 0x0C80u)) |
50 | #define IS_MLYM(u) (IN_HALF_BLOCK (u, 0x0D00u)) |
51 | #define IS_SINH(u) (IN_HALF_BLOCK (u, 0x0D80u)) |
52 | #define IS_KHMR(u) (IN_HALF_BLOCK (u, 0x1780u)) |
53 | |
54 | |
55 | #define MATRA_POS_LEFT(u) POS_PRE_M |
56 | #define MATRA_POS_RIGHT(u) ( \ |
57 | IS_DEVA(u) ? POS_AFTER_SUB : \ |
58 | IS_BENG(u) ? POS_AFTER_POST : \ |
59 | IS_GURU(u) ? POS_AFTER_POST : \ |
60 | IS_GUJR(u) ? POS_AFTER_POST : \ |
61 | IS_ORYA(u) ? POS_AFTER_POST : \ |
62 | IS_TAML(u) ? POS_AFTER_POST : \ |
63 | IS_TELU(u) ? (u <= 0x0C42u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ |
64 | IS_KNDA(u) ? (u < 0x0CC3u || u > 0xCD6u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ |
65 | IS_MLYM(u) ? POS_AFTER_POST : \ |
66 | IS_SINH(u) ? POS_AFTER_SUB : \ |
67 | IS_KHMR(u) ? POS_AFTER_POST : \ |
68 | /*default*/ POS_AFTER_SUB \ |
69 | ) |
70 | #define MATRA_POS_TOP(u) ( /* BENG and MLYM don't have top matras. */ \ |
71 | IS_DEVA(u) ? POS_AFTER_SUB : \ |
72 | IS_GURU(u) ? POS_AFTER_POST : /* Deviate from spec */ \ |
73 | IS_GUJR(u) ? POS_AFTER_SUB : \ |
74 | IS_ORYA(u) ? POS_AFTER_MAIN : \ |
75 | IS_TAML(u) ? POS_AFTER_SUB : \ |
76 | IS_TELU(u) ? POS_BEFORE_SUB : \ |
77 | IS_KNDA(u) ? POS_BEFORE_SUB : \ |
78 | IS_SINH(u) ? POS_AFTER_SUB : \ |
79 | IS_KHMR(u) ? POS_AFTER_POST : \ |
80 | /*default*/ POS_AFTER_SUB \ |
81 | ) |
82 | #define MATRA_POS_BOTTOM(u) ( \ |
83 | IS_DEVA(u) ? POS_AFTER_SUB : \ |
84 | IS_BENG(u) ? POS_AFTER_SUB : \ |
85 | IS_GURU(u) ? POS_AFTER_POST : \ |
86 | IS_GUJR(u) ? POS_AFTER_POST : \ |
87 | IS_ORYA(u) ? POS_AFTER_SUB : \ |
88 | IS_TAML(u) ? POS_AFTER_POST : \ |
89 | IS_TELU(u) ? POS_BEFORE_SUB : \ |
90 | IS_KNDA(u) ? POS_BEFORE_SUB : \ |
91 | IS_MLYM(u) ? POS_AFTER_POST : \ |
92 | IS_SINH(u) ? POS_AFTER_SUB : \ |
93 | IS_KHMR(u) ? POS_AFTER_POST : \ |
94 | /*default*/ POS_AFTER_SUB \ |
95 | ) |
96 | |
97 | static inline indic_position_t |
98 | matra_position (hb_codepoint_t u, indic_position_t side) |
99 | { |
100 | switch ((int) side) |
101 | { |
102 | case POS_PRE_C: return MATRA_POS_LEFT (u); |
103 | case POS_POST_C: return MATRA_POS_RIGHT (u); |
104 | case POS_ABOVE_C: return MATRA_POS_TOP (u); |
105 | case POS_BELOW_C: return MATRA_POS_BOTTOM (u); |
106 | }; |
107 | return side; |
108 | } |
109 | |
110 | /* XXX |
111 | * This is a hack for now. We should move this data into the main Indic table. |
112 | * Or completely remove it and just check in the tables. |
113 | */ |
114 | static const hb_codepoint_t ra_chars[] = { |
115 | 0x0930u, /* Devanagari */ |
116 | 0x09B0u, /* Bengali */ |
117 | 0x09F0u, /* Bengali */ |
118 | 0x0A30u, /* Gurmukhi */ /* No Reph */ |
119 | 0x0AB0u, /* Gujarati */ |
120 | 0x0B30u, /* Oriya */ |
121 | 0x0BB0u, /* Tamil */ /* No Reph */ |
122 | 0x0C30u, /* Telugu */ /* Reph formed only with ZWJ */ |
123 | 0x0CB0u, /* Kannada */ |
124 | 0x0D30u, /* Malayalam */ /* No Reph, Logical Repha */ |
125 | |
126 | 0x0DBBu, /* Sinhala */ /* Reph formed only with ZWJ */ |
127 | |
128 | 0x179Au, /* Khmer */ /* No Reph, Visual Repha */ |
129 | }; |
130 | |
131 | static inline bool |
132 | is_ra (hb_codepoint_t u) |
133 | { |
134 | for (unsigned int i = 0; i < ARRAY_LENGTH (ra_chars); i++) |
135 | if (u == ra_chars[i]) |
136 | return true; |
137 | return false; |
138 | } |
139 | |
140 | static inline bool |
141 | is_one_of (const hb_glyph_info_t &info, unsigned int flags) |
142 | { |
143 | /* If it ligated, all bets are off. */ |
144 | if (_hb_glyph_info_ligated (info: &info)) return false; |
145 | return !!(FLAG_UNSAFE (info.indic_category()) & flags); |
146 | } |
147 | |
148 | static inline bool |
149 | is_joiner (const hb_glyph_info_t &info) |
150 | { |
151 | return is_one_of (info, JOINER_FLAGS); |
152 | } |
153 | |
154 | static inline bool |
155 | is_consonant (const hb_glyph_info_t &info) |
156 | { |
157 | return is_one_of (info, CONSONANT_FLAGS); |
158 | } |
159 | |
160 | static inline bool |
161 | is_halant_or_coeng (const hb_glyph_info_t &info) |
162 | { |
163 | return is_one_of (info, HALANT_OR_COENG_FLAGS); |
164 | } |
165 | |
166 | static inline void |
167 | set_indic_properties (hb_glyph_info_t &info) |
168 | { |
169 | hb_codepoint_t u = info.codepoint; |
170 | unsigned int type = hb_indic_get_categories (u); |
171 | indic_category_t cat = (indic_category_t) (type & 0x7Fu); |
172 | indic_position_t pos = (indic_position_t) (type >> 8); |
173 | |
174 | |
175 | /* |
176 | * Re-assign category |
177 | */ |
178 | |
179 | /* The following act more like the Bindus. */ |
180 | if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0953u, 0x0954u))) |
181 | cat = OT_SM; |
182 | /* The following act like consonants. */ |
183 | else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0x0A72u, 0x0A73u, |
184 | 0x1CF5u, 0x1CF6u))) |
185 | cat = OT_C; |
186 | /* TODO: The following should only be allowed after a Visarga. |
187 | * For now, just treat them like regular tone marks. */ |
188 | else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CE2u, 0x1CE8u))) |
189 | cat = OT_A; |
190 | /* TODO: The following should only be allowed after some of |
191 | * the nasalization marks, maybe only for U+1CE9..U+1CF1. |
192 | * For now, just treat them like tone marks. */ |
193 | else if (unlikely (u == 0x1CEDu)) |
194 | cat = OT_A; |
195 | /* The following take marks in standalone clusters, similar to Avagraha. */ |
196 | else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0xA8F2u, 0xA8F7u, |
197 | 0x1CE9u, 0x1CECu, |
198 | 0x1CEEu, 0x1CF1u))) |
199 | { |
200 | cat = OT_Symbol; |
201 | static_assert (((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol), "" ); |
202 | } |
203 | else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CDu, 0x17D1u) || |
204 | u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */ |
205 | { |
206 | /* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier. |
207 | * https://github.com/roozbehp/unicode-data/issues/5 */ |
208 | cat = OT_M; |
209 | pos = POS_ABOVE_C; |
210 | } |
211 | else if (unlikely (u == 0x0A51u)) |
212 | { |
213 | /* https://github.com/harfbuzz/harfbuzz/issues/524 */ |
214 | cat = OT_M; |
215 | pos = POS_BELOW_C; |
216 | } |
217 | |
218 | /* According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, |
219 | * so the Indic shaper needs to know their categories. */ |
220 | else if (unlikely (u == 0x11301u || u == 0x11303u)) cat = OT_SM; |
221 | else if (unlikely (u == 0x1133cu)) cat = OT_N; |
222 | |
223 | else if (unlikely (u == 0x0AFBu)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/552 */ |
224 | |
225 | else if (unlikely (u == 0x0980u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/issues/538 */ |
226 | else if (unlikely (u == 0x0C80u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/623 */ |
227 | else if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */ |
228 | else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) |
229 | cat = OT_PLACEHOLDER; |
230 | else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; |
231 | |
232 | |
233 | /* |
234 | * Re-assign position. |
235 | */ |
236 | |
237 | if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS)) |
238 | { |
239 | pos = POS_BASE_C; |
240 | if (is_ra (u)) |
241 | cat = OT_Ra; |
242 | } |
243 | else if (cat == OT_M) |
244 | { |
245 | pos = matra_position (u, side: pos); |
246 | } |
247 | else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_VD) | FLAG (OT_A) | FLAG (OT_Symbol)))) |
248 | { |
249 | pos = POS_SMVD; |
250 | } |
251 | |
252 | if (unlikely (u == 0x0B01u)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */ |
253 | |
254 | |
255 | |
256 | info.indic_category() = cat; |
257 | info.indic_position() = pos; |
258 | } |
259 | |
260 | /* |
261 | * Things above this line should ideally be moved to the Indic table itself. |
262 | */ |
263 | |
264 | |
265 | /* |
266 | * Indic configurations. Note that we do not want to keep every single script-specific |
267 | * behavior in these tables necessarily. This should mainly be used for per-script |
268 | * properties that are cheaper keeping here, than in the code. Ie. if, say, one and |
269 | * only one script has an exception, that one script can be if'ed directly in the code, |
270 | * instead of adding a new flag in these structs. |
271 | */ |
272 | |
273 | enum base_position_t { |
274 | BASE_POS_FIRST, |
275 | BASE_POS_LAST_SINHALA, |
276 | BASE_POS_LAST |
277 | }; |
278 | enum reph_position_t { |
279 | REPH_POS_AFTER_MAIN = POS_AFTER_MAIN, |
280 | REPH_POS_BEFORE_SUB = POS_BEFORE_SUB, |
281 | REPH_POS_AFTER_SUB = POS_AFTER_SUB, |
282 | REPH_POS_BEFORE_POST = POS_BEFORE_POST, |
283 | REPH_POS_AFTER_POST = POS_AFTER_POST, |
284 | REPH_POS_DONT_CARE = POS_RA_TO_BECOME_REPH |
285 | }; |
286 | enum reph_mode_t { |
287 | REPH_MODE_IMPLICIT, /* Reph formed out of initial Ra,H sequence. */ |
288 | REPH_MODE_EXPLICIT, /* Reph formed out of initial Ra,H,ZWJ sequence. */ |
289 | REPH_MODE_VIS_REPHA, /* Encoded Repha character, no reordering needed. */ |
290 | REPH_MODE_LOG_REPHA /* Encoded Repha character, needs reordering. */ |
291 | }; |
292 | enum blwf_mode_t { |
293 | BLWF_MODE_PRE_AND_POST, /* Below-forms feature applied to pre-base and post-base. */ |
294 | BLWF_MODE_POST_ONLY /* Below-forms feature applied to post-base only. */ |
295 | }; |
296 | struct indic_config_t |
297 | { |
298 | hb_script_t script; |
299 | bool has_old_spec; |
300 | hb_codepoint_t virama; |
301 | base_position_t base_pos; |
302 | reph_position_t reph_pos; |
303 | reph_mode_t reph_mode; |
304 | blwf_mode_t blwf_mode; |
305 | }; |
306 | |
307 | static const indic_config_t indic_configs[] = |
308 | { |
309 | /* Default. Should be first. */ |
310 | {.script: HB_SCRIPT_INVALID, .has_old_spec: false, .virama: 0,.base_pos: BASE_POS_LAST, .reph_pos: REPH_POS_BEFORE_POST,.reph_mode: REPH_MODE_IMPLICIT, .blwf_mode: BLWF_MODE_PRE_AND_POST}, |
311 | {.script: HB_SCRIPT_DEVANAGARI,.has_old_spec: true, .virama: 0x094Du,.base_pos: BASE_POS_LAST, .reph_pos: REPH_POS_BEFORE_POST,.reph_mode: REPH_MODE_IMPLICIT, .blwf_mode: BLWF_MODE_PRE_AND_POST}, |
312 | {.script: HB_SCRIPT_BENGALI, .has_old_spec: true, .virama: 0x09CDu,.base_pos: BASE_POS_LAST, .reph_pos: REPH_POS_AFTER_SUB, .reph_mode: REPH_MODE_IMPLICIT, .blwf_mode: BLWF_MODE_PRE_AND_POST}, |
313 | {.script: HB_SCRIPT_GURMUKHI, .has_old_spec: true, .virama: 0x0A4Du,.base_pos: BASE_POS_LAST, .reph_pos: REPH_POS_BEFORE_SUB, .reph_mode: REPH_MODE_IMPLICIT, .blwf_mode: BLWF_MODE_PRE_AND_POST}, |
314 | {.script: HB_SCRIPT_GUJARATI, .has_old_spec: true, .virama: 0x0ACDu,.base_pos: BASE_POS_LAST, .reph_pos: REPH_POS_BEFORE_POST,.reph_mode: REPH_MODE_IMPLICIT, .blwf_mode: BLWF_MODE_PRE_AND_POST}, |
315 | {.script: HB_SCRIPT_ORIYA, .has_old_spec: true, .virama: 0x0B4Du,.base_pos: BASE_POS_LAST, .reph_pos: REPH_POS_AFTER_MAIN, .reph_mode: REPH_MODE_IMPLICIT, .blwf_mode: BLWF_MODE_PRE_AND_POST}, |
316 | {.script: HB_SCRIPT_TAMIL, .has_old_spec: true, .virama: 0x0BCDu,.base_pos: BASE_POS_LAST, .reph_pos: REPH_POS_AFTER_POST, .reph_mode: REPH_MODE_IMPLICIT, .blwf_mode: BLWF_MODE_PRE_AND_POST}, |
317 | {.script: HB_SCRIPT_TELUGU, .has_old_spec: true, .virama: 0x0C4Du,.base_pos: BASE_POS_LAST, .reph_pos: REPH_POS_AFTER_POST, .reph_mode: REPH_MODE_EXPLICIT, .blwf_mode: BLWF_MODE_POST_ONLY}, |
318 | {.script: HB_SCRIPT_KANNADA, .has_old_spec: true, .virama: 0x0CCDu,.base_pos: BASE_POS_LAST, .reph_pos: REPH_POS_AFTER_POST, .reph_mode: REPH_MODE_IMPLICIT, .blwf_mode: BLWF_MODE_POST_ONLY}, |
319 | {.script: HB_SCRIPT_MALAYALAM, .has_old_spec: true, .virama: 0x0D4Du,.base_pos: BASE_POS_LAST, .reph_pos: REPH_POS_AFTER_MAIN, .reph_mode: REPH_MODE_LOG_REPHA,.blwf_mode: BLWF_MODE_PRE_AND_POST}, |
320 | {.script: HB_SCRIPT_SINHALA, .has_old_spec: false,.virama: 0x0DCAu,.base_pos: BASE_POS_LAST_SINHALA, |
321 | .reph_pos: REPH_POS_AFTER_MAIN, .reph_mode: REPH_MODE_EXPLICIT, .blwf_mode: BLWF_MODE_PRE_AND_POST}, |
322 | {.script: HB_SCRIPT_KHMER, .has_old_spec: false,.virama: 0x17D2u,.base_pos: BASE_POS_FIRST,.reph_pos: REPH_POS_DONT_CARE, .reph_mode: REPH_MODE_VIS_REPHA,.blwf_mode: BLWF_MODE_PRE_AND_POST}, |
323 | }; |
324 | |
325 | |
326 | |
327 | /* |
328 | * Indic shaper. |
329 | */ |
330 | |
331 | struct feature_list_t { |
332 | hb_tag_t tag; |
333 | hb_ot_map_feature_flags_t flags; |
334 | }; |
335 | |
336 | static const feature_list_t |
337 | indic_features[] = |
338 | { |
339 | /* |
340 | * Basic features. |
341 | * These features are applied in order, one at a time, after initial_reordering. |
342 | */ |
343 | {HB_TAG('n','u','k','t'), .flags: F_GLOBAL}, |
344 | {HB_TAG('a','k','h','n'), .flags: F_GLOBAL}, |
345 | {HB_TAG('r','p','h','f'), .flags: F_NONE}, |
346 | {HB_TAG('r','k','r','f'), .flags: F_GLOBAL}, |
347 | {HB_TAG('p','r','e','f'), .flags: F_NONE}, |
348 | {HB_TAG('b','l','w','f'), .flags: F_NONE}, |
349 | {HB_TAG('a','b','v','f'), .flags: F_NONE}, |
350 | {HB_TAG('h','a','l','f'), .flags: F_NONE}, |
351 | {HB_TAG('p','s','t','f'), .flags: F_NONE}, |
352 | {HB_TAG('v','a','t','u'), .flags: F_GLOBAL}, |
353 | {HB_TAG('c','j','c','t'), .flags: F_GLOBAL}, |
354 | {HB_TAG('c','f','a','r'), .flags: F_NONE}, |
355 | /* |
356 | * Other features. |
357 | * These features are applied all at once, after final_reordering. |
358 | * Default Bengali font in Windows for example has intermixed |
359 | * lookups for init,pres,abvs,blws features. |
360 | */ |
361 | {HB_TAG('i','n','i','t'), .flags: F_NONE}, |
362 | {HB_TAG('p','r','e','s'), .flags: F_GLOBAL}, |
363 | {HB_TAG('a','b','v','s'), .flags: F_GLOBAL}, |
364 | {HB_TAG('b','l','w','s'), .flags: F_GLOBAL}, |
365 | {HB_TAG('p','s','t','s'), .flags: F_GLOBAL}, |
366 | {HB_TAG('h','a','l','n'), .flags: F_GLOBAL}, |
367 | /* Positioning features, though we don't care about the types. */ |
368 | {HB_TAG('d','i','s','t'), .flags: F_GLOBAL}, |
369 | {HB_TAG('a','b','v','m'), .flags: F_GLOBAL}, |
370 | {HB_TAG('b','l','w','m'), .flags: F_GLOBAL}, |
371 | }; |
372 | |
373 | /* |
374 | * Must be in the same order as the indic_features array. |
375 | */ |
376 | enum { |
377 | _NUKT, |
378 | _AKHN, |
379 | RPHF, |
380 | _RKRF, |
381 | PREF, |
382 | BLWF, |
383 | ABVF, |
384 | HALF, |
385 | PSTF, |
386 | _VATU, |
387 | _CJCT, |
388 | CFAR, |
389 | |
390 | INIT, |
391 | _PRES, |
392 | _ABVS, |
393 | _BLWS, |
394 | _PSTS, |
395 | _HALN, |
396 | _DIST, |
397 | _ABVM, |
398 | _BLWM, |
399 | |
400 | INDIC_NUM_FEATURES, |
401 | INDIC_BASIC_FEATURES = INIT /* Don't forget to update this! */ |
402 | }; |
403 | |
404 | static void |
405 | setup_syllables (const hb_ot_shape_plan_t *plan, |
406 | hb_font_t *font, |
407 | hb_buffer_t *buffer); |
408 | static void |
409 | initial_reordering (const hb_ot_shape_plan_t *plan, |
410 | hb_font_t *font, |
411 | hb_buffer_t *buffer); |
412 | static void |
413 | final_reordering (const hb_ot_shape_plan_t *plan, |
414 | hb_font_t *font, |
415 | hb_buffer_t *buffer); |
416 | static void |
417 | clear_syllables (const hb_ot_shape_plan_t *plan, |
418 | hb_font_t *font, |
419 | hb_buffer_t *buffer); |
420 | |
421 | static void |
422 | collect_features_indic (hb_ot_shape_planner_t *plan) |
423 | { |
424 | hb_ot_map_builder_t *map = &plan->map; |
425 | |
426 | /* Do this before any lookups have been applied. */ |
427 | map->add_gsub_pause (pause_func: setup_syllables); |
428 | |
429 | map->add_global_bool_feature (HB_TAG('l','o','c','l')); |
430 | /* The Indic specs do not require ccmp, but we apply it here since if |
431 | * there is a use of it, it's typically at the beginning. */ |
432 | map->add_global_bool_feature (HB_TAG('c','c','m','p')); |
433 | |
434 | |
435 | unsigned int i = 0; |
436 | map->add_gsub_pause (pause_func: initial_reordering); |
437 | for (; i < INDIC_BASIC_FEATURES; i++) { |
438 | map->add_feature (tag: indic_features[i].tag, value: 1, flags: indic_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ); |
439 | map->add_gsub_pause (pause_func: nullptr); |
440 | } |
441 | map->add_gsub_pause (pause_func: final_reordering); |
442 | for (; i < INDIC_NUM_FEATURES; i++) { |
443 | map->add_feature (tag: indic_features[i].tag, value: 1, flags: indic_features[i].flags | F_MANUAL_ZWJ | F_MANUAL_ZWNJ); |
444 | } |
445 | |
446 | map->add_global_bool_feature (HB_TAG('c','a','l','t')); |
447 | map->add_global_bool_feature (HB_TAG('c','l','i','g')); |
448 | |
449 | map->add_gsub_pause (pause_func: clear_syllables); |
450 | } |
451 | |
452 | static void |
453 | override_features_indic (hb_ot_shape_planner_t *plan) |
454 | { |
455 | /* Uniscribe does not apply 'kern' in Khmer. */ |
456 | if (hb_options ().uniscribe_bug_compatible) |
457 | { |
458 | switch ((hb_tag_t) plan->props.script) |
459 | { |
460 | case HB_SCRIPT_KHMER: |
461 | plan->map.add_feature (HB_TAG('k','e','r','n'), value: 0, flags: F_GLOBAL); |
462 | break; |
463 | } |
464 | } |
465 | |
466 | plan->map.add_feature (HB_TAG('l','i','g','a'), value: 0, flags: F_GLOBAL); |
467 | } |
468 | |
469 | |
470 | struct would_substitute_feature_t |
471 | { |
472 | inline void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_) |
473 | { |
474 | zero_context = zero_context_; |
475 | map->get_stage_lookups (table_index: 0/*GSUB*/, |
476 | stage: map->get_feature_stage (table_index: 0/*GSUB*/, feature_tag), |
477 | plookups: &lookups, lookup_count: &count); |
478 | } |
479 | |
480 | inline bool would_substitute (const hb_codepoint_t *glyphs, |
481 | unsigned int glyphs_count, |
482 | hb_face_t *face) const |
483 | { |
484 | for (unsigned int i = 0; i < count; i++) |
485 | if (hb_ot_layout_lookup_would_substitute_fast (face, lookup_index: lookups[i].index, glyphs, glyphs_length: glyphs_count, zero_context)) |
486 | return true; |
487 | return false; |
488 | } |
489 | |
490 | private: |
491 | const hb_ot_map_t::lookup_map_t *lookups; |
492 | unsigned int count; |
493 | bool zero_context; |
494 | }; |
495 | |
496 | struct indic_shape_plan_t |
497 | { |
498 | ASSERT_POD (); |
499 | |
500 | inline bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const |
501 | { |
502 | hb_codepoint_t glyph = virama_glyph; |
503 | if (unlikely (virama_glyph == (hb_codepoint_t) -1)) |
504 | { |
505 | if (!config->virama || !font->get_nominal_glyph (unicode: config->virama, glyph: &glyph)) |
506 | glyph = 0; |
507 | /* Technically speaking, the spec says we should apply 'locl' to virama too. |
508 | * Maybe one day... */ |
509 | |
510 | /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph |
511 | * during shape planning... Instead, overwrite it here. It's safe. Don't worry! */ |
512 | virama_glyph = glyph; |
513 | } |
514 | |
515 | *pglyph = glyph; |
516 | return glyph != 0; |
517 | } |
518 | |
519 | const indic_config_t *config; |
520 | |
521 | bool is_old_spec; |
522 | mutable hb_codepoint_t virama_glyph; |
523 | |
524 | would_substitute_feature_t rphf; |
525 | would_substitute_feature_t pref; |
526 | would_substitute_feature_t blwf; |
527 | would_substitute_feature_t pstf; |
528 | |
529 | hb_mask_t mask_array[INDIC_NUM_FEATURES]; |
530 | }; |
531 | |
532 | static void * |
533 | data_create_indic (const hb_ot_shape_plan_t *plan) |
534 | { |
535 | indic_shape_plan_t *indic_plan = (indic_shape_plan_t *) calloc (nmemb: 1, size: sizeof (indic_shape_plan_t)); |
536 | if (unlikely (!indic_plan)) |
537 | return nullptr; |
538 | |
539 | indic_plan->config = &indic_configs[0]; |
540 | for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++) |
541 | if (plan->props.script == indic_configs[i].script) { |
542 | indic_plan->config = &indic_configs[i]; |
543 | break; |
544 | } |
545 | |
546 | indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FFu) != '2'); |
547 | indic_plan->virama_glyph = (hb_codepoint_t) -1; |
548 | |
549 | /* Use zero-context would_substitute() matching for new-spec of the main |
550 | * Indic scripts, and scripts with one spec only, but not for old-specs. |
551 | * The new-spec for all dual-spec scripts says zero-context matching happens. |
552 | * |
553 | * However, testing with Malayalam shows that old and new spec both allow |
554 | * context. Testing with Bengali new-spec however shows that it doesn't. |
555 | * So, the heuristic here is the way it is. It should *only* be changed, |
556 | * as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE. |
557 | */ |
558 | bool zero_context = !indic_plan->is_old_spec && plan->props.script != HB_SCRIPT_MALAYALAM; |
559 | indic_plan->rphf.init (map: &plan->map, HB_TAG('r','p','h','f'), zero_context_: zero_context); |
560 | indic_plan->pref.init (map: &plan->map, HB_TAG('p','r','e','f'), zero_context_: zero_context); |
561 | indic_plan->blwf.init (map: &plan->map, HB_TAG('b','l','w','f'), zero_context_: zero_context); |
562 | indic_plan->pstf.init (map: &plan->map, HB_TAG('p','s','t','f'), zero_context_: zero_context); |
563 | |
564 | for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++) |
565 | indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ? |
566 | 0 : plan->map.get_1_mask (feature_tag: indic_features[i].tag); |
567 | |
568 | return indic_plan; |
569 | } |
570 | |
571 | static void |
572 | data_destroy_indic (void *data) |
573 | { |
574 | free (ptr: data); |
575 | } |
576 | |
577 | static indic_position_t |
578 | consonant_position_from_face (const indic_shape_plan_t *indic_plan, |
579 | const hb_codepoint_t consonant, |
580 | const hb_codepoint_t virama, |
581 | hb_face_t *face) |
582 | { |
583 | /* For old-spec, the order of glyphs is Consonant,Virama, |
584 | * whereas for new-spec, it's Virama,Consonant. However, |
585 | * some broken fonts (like Free Sans) simply copied lookups |
586 | * from old-spec to new-spec without modification. |
587 | * And oddly enough, Uniscribe seems to respect those lookups. |
588 | * Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds |
589 | * base at 0. The font however, only has lookups matching |
590 | * 930,94D in 'blwf', not the expected 94D,930 (with new-spec |
591 | * table). As such, we simply match both sequences. Seems |
592 | * to work. */ |
593 | hb_codepoint_t glyphs[3] = {virama, consonant, virama}; |
594 | if (indic_plan->blwf.would_substitute (glyphs , glyphs_count: 2, face) || |
595 | indic_plan->blwf.would_substitute (glyphs: glyphs+1, glyphs_count: 2, face)) |
596 | return POS_BELOW_C; |
597 | if (indic_plan->pstf.would_substitute (glyphs , glyphs_count: 2, face) || |
598 | indic_plan->pstf.would_substitute (glyphs: glyphs+1, glyphs_count: 2, face)) |
599 | return POS_POST_C; |
600 | if (indic_plan->pref.would_substitute (glyphs , glyphs_count: 2, face) || |
601 | indic_plan->pref.would_substitute (glyphs: glyphs+1, glyphs_count: 2, face)) |
602 | return POS_POST_C; |
603 | return POS_BASE_C; |
604 | } |
605 | |
606 | |
607 | enum syllable_type_t { |
608 | consonant_syllable, |
609 | vowel_syllable, |
610 | standalone_cluster, |
611 | symbol_cluster, |
612 | broken_cluster, |
613 | non_indic_cluster, |
614 | }; |
615 | |
616 | #include "hb-ot-shape-complex-indic-machine.hh" |
617 | |
618 | |
619 | static void |
620 | setup_masks_indic (const hb_ot_shape_plan_t *plan HB_UNUSED, |
621 | hb_buffer_t *buffer, |
622 | hb_font_t *font HB_UNUSED) |
623 | { |
624 | HB_BUFFER_ALLOCATE_VAR (buffer, indic_category); |
625 | HB_BUFFER_ALLOCATE_VAR (buffer, indic_position); |
626 | |
627 | /* We cannot setup masks here. We save information about characters |
628 | * and setup masks later on in a pause-callback. */ |
629 | |
630 | unsigned int count = buffer->len; |
631 | hb_glyph_info_t *info = buffer->info; |
632 | for (unsigned int i = 0; i < count; i++) |
633 | set_indic_properties (info[i]); |
634 | } |
635 | |
636 | static void |
637 | setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED, |
638 | hb_font_t *font HB_UNUSED, |
639 | hb_buffer_t *buffer) |
640 | { |
641 | find_syllables (buffer); |
642 | foreach_syllable (buffer, start, end) |
643 | buffer->unsafe_to_break (start, end); |
644 | } |
645 | |
646 | static int |
647 | compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) |
648 | { |
649 | int a = pa->indic_position(); |
650 | int b = pb->indic_position(); |
651 | |
652 | return a < b ? -1 : a == b ? 0 : +1; |
653 | } |
654 | |
655 | |
656 | |
657 | static void |
658 | update_consonant_positions (const hb_ot_shape_plan_t *plan, |
659 | hb_font_t *font, |
660 | hb_buffer_t *buffer) |
661 | { |
662 | const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; |
663 | |
664 | if (indic_plan->config->base_pos != BASE_POS_LAST) |
665 | return; |
666 | |
667 | hb_codepoint_t virama; |
668 | if (indic_plan->get_virama_glyph (font, pglyph: &virama)) |
669 | { |
670 | hb_face_t *face = font->face; |
671 | unsigned int count = buffer->len; |
672 | hb_glyph_info_t *info = buffer->info; |
673 | for (unsigned int i = 0; i < count; i++) |
674 | if (info[i].indic_position() == POS_BASE_C) |
675 | { |
676 | hb_codepoint_t consonant = info[i].codepoint; |
677 | info[i].indic_position() = consonant_position_from_face (indic_plan, consonant, virama, face); |
678 | } |
679 | } |
680 | } |
681 | |
682 | |
683 | /* Rules from: |
684 | * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */ |
685 | |
686 | static void |
687 | initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, |
688 | hb_face_t *face, |
689 | hb_buffer_t *buffer, |
690 | unsigned int start, unsigned int end) |
691 | { |
692 | const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; |
693 | hb_glyph_info_t *info = buffer->info; |
694 | |
695 | /* https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167 |
696 | * // For compatibility with legacy usage in Kannada, |
697 | * // Ra+h+ZWJ must behave like Ra+ZWJ+h... |
698 | */ |
699 | if (buffer->props.script == HB_SCRIPT_KANNADA && |
700 | start + 3 <= end && |
701 | is_one_of (info: info[start ], FLAG (OT_Ra)) && |
702 | is_one_of (info: info[start+1], FLAG (OT_H)) && |
703 | is_one_of (info: info[start+2], FLAG (OT_ZWJ))) |
704 | { |
705 | buffer->merge_clusters (start: start+1, end: start+3); |
706 | hb_glyph_info_t tmp = info[start+1]; |
707 | info[start+1] = info[start+2]; |
708 | info[start+2] = tmp; |
709 | } |
710 | |
711 | /* 1. Find base consonant: |
712 | * |
713 | * The shaping engine finds the base consonant of the syllable, using the |
714 | * following algorithm: starting from the end of the syllable, move backwards |
715 | * until a consonant is found that does not have a below-base or post-base |
716 | * form (post-base forms have to follow below-base forms), or that is not a |
717 | * pre-base-reordering Ra, or arrive at the first consonant. The consonant |
718 | * stopped at will be the base. |
719 | * |
720 | * o If the syllable starts with Ra + Halant (in a script that has Reph) |
721 | * and has more than one consonant, Ra is excluded from candidates for |
722 | * base consonants. |
723 | */ |
724 | |
725 | unsigned int base = end; |
726 | bool has_reph = false; |
727 | |
728 | { |
729 | /* -> If the syllable starts with Ra + Halant (in a script that has Reph) |
730 | * and has more than one consonant, Ra is excluded from candidates for |
731 | * base consonants. */ |
732 | unsigned int limit = start; |
733 | if (indic_plan->config->reph_pos != REPH_POS_DONT_CARE && |
734 | indic_plan->mask_array[RPHF] && |
735 | start + 3 <= end && |
736 | ( |
737 | (indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info: info[start + 2])) || |
738 | (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == OT_ZWJ) |
739 | )) |
740 | { |
741 | /* See if it matches the 'rphf' feature. */ |
742 | hb_codepoint_t glyphs[3] = {info[start].codepoint, |
743 | info[start + 1].codepoint, |
744 | indic_plan->config->reph_mode == REPH_MODE_EXPLICIT ? |
745 | info[start + 2].codepoint : 0}; |
746 | if (indic_plan->rphf.would_substitute (glyphs, glyphs_count: 2, face) || |
747 | (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && |
748 | indic_plan->rphf.would_substitute (glyphs, glyphs_count: 3, face))) |
749 | { |
750 | limit += 2; |
751 | while (limit < end && is_joiner (info: info[limit])) |
752 | limit++; |
753 | base = start; |
754 | has_reph = true; |
755 | } |
756 | } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == OT_Repha) |
757 | { |
758 | limit += 1; |
759 | while (limit < end && is_joiner (info: info[limit])) |
760 | limit++; |
761 | base = start; |
762 | has_reph = true; |
763 | } |
764 | |
765 | switch (indic_plan->config->base_pos) |
766 | { |
767 | case BASE_POS_LAST: |
768 | { |
769 | /* -> starting from the end of the syllable, move backwards */ |
770 | unsigned int i = end; |
771 | bool seen_below = false; |
772 | do { |
773 | i--; |
774 | /* -> until a consonant is found */ |
775 | if (is_consonant (info: info[i])) |
776 | { |
777 | /* -> that does not have a below-base or post-base form |
778 | * (post-base forms have to follow below-base forms), */ |
779 | if (info[i].indic_position() != POS_BELOW_C && |
780 | (info[i].indic_position() != POS_POST_C || seen_below)) |
781 | { |
782 | base = i; |
783 | break; |
784 | } |
785 | if (info[i].indic_position() == POS_BELOW_C) |
786 | seen_below = true; |
787 | |
788 | /* -> or that is not a pre-base-reordering Ra, |
789 | * |
790 | * IMPLEMENTATION NOTES: |
791 | * |
792 | * Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped |
793 | * by the logic above already. |
794 | */ |
795 | |
796 | /* -> or arrive at the first consonant. The consonant stopped at will |
797 | * be the base. */ |
798 | base = i; |
799 | } |
800 | else |
801 | { |
802 | /* A ZWJ after a Halant stops the base search, and requests an explicit |
803 | * half form. |
804 | * A ZWJ before a Halant, requests a subjoined form instead, and hence |
805 | * search continues. This is particularly important for Bengali |
806 | * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */ |
807 | if (start < i && |
808 | info[i].indic_category() == OT_ZWJ && |
809 | info[i - 1].indic_category() == OT_H) |
810 | break; |
811 | } |
812 | } while (i > limit); |
813 | } |
814 | break; |
815 | |
816 | case BASE_POS_LAST_SINHALA: |
817 | { |
818 | /* Sinhala base positioning is slightly different from main Indic, in that: |
819 | * 1. Its ZWJ behavior is different, |
820 | * 2. We don't need to look into the font for consonant positions. |
821 | */ |
822 | |
823 | if (!has_reph) |
824 | base = limit; |
825 | |
826 | /* Find the last base consonant that is not blocked by ZWJ. If there is |
827 | * a ZWJ right before a base consonant, that would request a subjoined form. */ |
828 | for (unsigned int i = limit; i < end; i++) |
829 | if (is_consonant (info: info[i])) |
830 | { |
831 | if (limit < i && info[i - 1].indic_category() == OT_ZWJ) |
832 | break; |
833 | else |
834 | base = i; |
835 | } |
836 | |
837 | /* Mark all subsequent consonants as below. */ |
838 | for (unsigned int i = base + 1; i < end; i++) |
839 | if (is_consonant (info: info[i])) |
840 | info[i].indic_position() = POS_BELOW_C; |
841 | } |
842 | break; |
843 | |
844 | case BASE_POS_FIRST: |
845 | { |
846 | /* The first consonant is always the base. */ |
847 | |
848 | assert (indic_plan->config->reph_mode == REPH_MODE_VIS_REPHA); |
849 | assert (!has_reph); |
850 | |
851 | base = start; |
852 | |
853 | /* Mark all subsequent consonants as below. */ |
854 | for (unsigned int i = base + 1; i < end; i++) |
855 | if (is_consonant (info: info[i])) |
856 | info[i].indic_position() = POS_BELOW_C; |
857 | } |
858 | break; |
859 | } |
860 | |
861 | /* -> If the syllable starts with Ra + Halant (in a script that has Reph) |
862 | * and has more than one consonant, Ra is excluded from candidates for |
863 | * base consonants. |
864 | * |
865 | * Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */ |
866 | if (has_reph && base == start && limit - base <= 2) { |
867 | /* Have no other consonant, so Reph is not formed and Ra becomes base. */ |
868 | has_reph = false; |
869 | } |
870 | } |
871 | |
872 | |
873 | /* 2. Decompose and reorder Matras: |
874 | * |
875 | * Each matra and any syllable modifier sign in the syllable are moved to the |
876 | * appropriate position relative to the consonant(s) in the syllable. The |
877 | * shaping engine decomposes two- or three-part matras into their constituent |
878 | * parts before any repositioning. Matra characters are classified by which |
879 | * consonant in a conjunct they have affinity for and are reordered to the |
880 | * following positions: |
881 | * |
882 | * o Before first half form in the syllable |
883 | * o After subjoined consonants |
884 | * o After post-form consonant |
885 | * o After main consonant (for above marks) |
886 | * |
887 | * IMPLEMENTATION NOTES: |
888 | * |
889 | * The normalize() routine has already decomposed matras for us, so we don't |
890 | * need to worry about that. |
891 | */ |
892 | |
893 | |
894 | /* 3. Reorder marks to canonical order: |
895 | * |
896 | * Adjacent nukta and halant or nukta and vedic sign are always repositioned |
897 | * if necessary, so that the nukta is first. |
898 | * |
899 | * IMPLEMENTATION NOTES: |
900 | * |
901 | * We don't need to do this: the normalize() routine already did this for us. |
902 | */ |
903 | |
904 | |
905 | /* Reorder characters */ |
906 | |
907 | for (unsigned int i = start; i < base; i++) |
908 | info[i].indic_position() = MIN (a: POS_PRE_C, b: (indic_position_t) info[i].indic_position()); |
909 | |
910 | if (base < end) |
911 | info[base].indic_position() = POS_BASE_C; |
912 | |
913 | /* Mark final consonants. A final consonant is one appearing after a matra, |
914 | * like in Khmer. */ |
915 | for (unsigned int i = base + 1; i < end; i++) |
916 | if (info[i].indic_category() == OT_M) { |
917 | for (unsigned int j = i + 1; j < end; j++) |
918 | if (is_consonant (info: info[j])) { |
919 | info[j].indic_position() = POS_FINAL_C; |
920 | break; |
921 | } |
922 | break; |
923 | } |
924 | |
925 | /* Handle beginning Ra */ |
926 | if (has_reph) |
927 | info[start].indic_position() = POS_RA_TO_BECOME_REPH; |
928 | |
929 | /* For old-style Indic script tags, move the first post-base Halant after |
930 | * last consonant. |
931 | * |
932 | * Reports suggest that in some scripts Uniscribe does this only if there |
933 | * is *not* a Halant after last consonant already (eg. Kannada), while it |
934 | * does it unconditionally in other scripts (eg. Malayalam). We don't |
935 | * currently know about other scripts, so we single out Malayalam for now. |
936 | * |
937 | * Kannada test case: |
938 | * U+0C9A,U+0CCD,U+0C9A,U+0CCD |
939 | * With some versions of Lohit Kannada. |
940 | * https://bugs.freedesktop.org/show_bug.cgi?id=59118 |
941 | * |
942 | * Malayalam test case: |
943 | * U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D |
944 | * With lohit-ttf-20121122/Lohit-Malayalam.ttf |
945 | */ |
946 | if (indic_plan->is_old_spec) |
947 | { |
948 | bool disallow_double_halants = buffer->props.script != HB_SCRIPT_MALAYALAM; |
949 | for (unsigned int i = base + 1; i < end; i++) |
950 | if (info[i].indic_category() == OT_H) |
951 | { |
952 | unsigned int j; |
953 | for (j = end - 1; j > i; j--) |
954 | if (is_consonant (info: info[j]) || |
955 | (disallow_double_halants && info[j].indic_category() == OT_H)) |
956 | break; |
957 | if (info[j].indic_category() != OT_H && j > i) { |
958 | /* Move Halant to after last consonant. */ |
959 | hb_glyph_info_t t = info[i]; |
960 | memmove (dest: &info[i], src: &info[i + 1], n: (j - i) * sizeof (info[0])); |
961 | info[j] = t; |
962 | } |
963 | break; |
964 | } |
965 | } |
966 | |
967 | /* Attach misc marks to previous char to move with them. */ |
968 | { |
969 | indic_position_t last_pos = POS_START; |
970 | for (unsigned int i = start; i < end; i++) |
971 | { |
972 | if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | HALANT_OR_COENG_FLAGS))) |
973 | { |
974 | info[i].indic_position() = last_pos; |
975 | if (unlikely (info[i].indic_category() == OT_H && |
976 | info[i].indic_position() == POS_PRE_M)) |
977 | { |
978 | /* |
979 | * Uniscribe doesn't move the Halant with Left Matra. |
980 | * TEST: U+092B,U+093F,U+094DE |
981 | * We follow. This is important for the Sinhala |
982 | * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA |
983 | * where U+0DD9 is a left matra and U+0DCA is the virama. |
984 | * We don't want to move the virama with the left matra. |
985 | * TEST: U+0D9A,U+0DDA |
986 | */ |
987 | for (unsigned int j = i; j > start; j--) |
988 | if (info[j - 1].indic_position() != POS_PRE_M) { |
989 | info[i].indic_position() = info[j - 1].indic_position(); |
990 | break; |
991 | } |
992 | } |
993 | } else if (info[i].indic_position() != POS_SMVD) { |
994 | last_pos = (indic_position_t) info[i].indic_position(); |
995 | } |
996 | } |
997 | } |
998 | /* For post-base consonants let them own anything before them |
999 | * since the last consonant or matra. */ |
1000 | { |
1001 | unsigned int last = base; |
1002 | for (unsigned int i = base + 1; i < end; i++) |
1003 | if (is_consonant (info: info[i])) |
1004 | { |
1005 | for (unsigned int j = last + 1; j < i; j++) |
1006 | if (info[j].indic_position() < POS_SMVD) |
1007 | info[j].indic_position() = info[i].indic_position(); |
1008 | last = i; |
1009 | } else if (info[i].indic_category() == OT_M) |
1010 | last = i; |
1011 | } |
1012 | |
1013 | |
1014 | { |
1015 | /* Use syllable() for sort accounting temporarily. */ |
1016 | unsigned int syllable = info[start].syllable(); |
1017 | for (unsigned int i = start; i < end; i++) |
1018 | info[i].syllable() = i - start; |
1019 | |
1020 | /* Sit tight, rock 'n roll! */ |
1021 | hb_stable_sort (array: info + start, len: end - start, compar: compare_indic_order); |
1022 | /* Find base again */ |
1023 | base = end; |
1024 | for (unsigned int i = start; i < end; i++) |
1025 | if (info[i].indic_position() == POS_BASE_C) |
1026 | { |
1027 | base = i; |
1028 | break; |
1029 | } |
1030 | /* Things are out-of-control for post base positions, they may shuffle |
1031 | * around like crazy. In old-spec mode, we move halants around, so in |
1032 | * that case merge all clusters after base. Otherwise, check the sort |
1033 | * order and merge as needed. |
1034 | * For pre-base stuff, we handle cluster issues in final reordering. |
1035 | * |
1036 | * We could use buffer->sort() for this, if there was no special |
1037 | * reordering of pre-base stuff happening later... |
1038 | */ |
1039 | if (indic_plan->is_old_spec || end - base > 127) |
1040 | buffer->merge_clusters (start: base, end); |
1041 | else |
1042 | { |
1043 | /* Note! syllable() is a one-byte field. */ |
1044 | for (unsigned int i = base; i < end; i++) |
1045 | if (info[i].syllable() != 255) |
1046 | { |
1047 | unsigned int max = i; |
1048 | unsigned int j = start + info[i].syllable(); |
1049 | while (j != i) |
1050 | { |
1051 | max = MAX (a: max, b: j); |
1052 | unsigned int next = start + info[j].syllable(); |
1053 | info[j].syllable() = 255; /* So we don't process j later again. */ |
1054 | j = next; |
1055 | } |
1056 | if (i != max) |
1057 | buffer->merge_clusters (start: i, end: max + 1); |
1058 | } |
1059 | } |
1060 | |
1061 | /* Put syllable back in. */ |
1062 | for (unsigned int i = start; i < end; i++) |
1063 | info[i].syllable() = syllable; |
1064 | } |
1065 | |
1066 | /* Setup masks now */ |
1067 | |
1068 | { |
1069 | hb_mask_t mask; |
1070 | |
1071 | /* Reph */ |
1072 | for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_TO_BECOME_REPH; i++) |
1073 | info[i].mask |= indic_plan->mask_array[RPHF]; |
1074 | |
1075 | /* Pre-base */ |
1076 | mask = indic_plan->mask_array[HALF]; |
1077 | if (!indic_plan->is_old_spec && |
1078 | indic_plan->config->blwf_mode == BLWF_MODE_PRE_AND_POST) |
1079 | mask |= indic_plan->mask_array[BLWF]; |
1080 | for (unsigned int i = start; i < base; i++) |
1081 | info[i].mask |= mask; |
1082 | /* Base */ |
1083 | mask = 0; |
1084 | if (base < end) |
1085 | info[base].mask |= mask; |
1086 | /* Post-base */ |
1087 | mask = indic_plan->mask_array[BLWF] | indic_plan->mask_array[ABVF] | indic_plan->mask_array[PSTF]; |
1088 | for (unsigned int i = base + 1; i < end; i++) |
1089 | info[i].mask |= mask; |
1090 | } |
1091 | |
1092 | if (indic_plan->is_old_spec && |
1093 | buffer->props.script == HB_SCRIPT_DEVANAGARI) |
1094 | { |
1095 | /* Old-spec eye-lash Ra needs special handling. From the |
1096 | * spec: |
1097 | * |
1098 | * "The feature 'below-base form' is applied to consonants |
1099 | * having below-base forms and following the base consonant. |
1100 | * The exception is vattu, which may appear below half forms |
1101 | * as well as below the base glyph. The feature 'below-base |
1102 | * form' will be applied to all such occurrences of Ra as well." |
1103 | * |
1104 | * Test case: U+0924,U+094D,U+0930,U+094d,U+0915 |
1105 | * with Sanskrit 2003 font. |
1106 | * |
1107 | * However, note that Ra,Halant,ZWJ is the correct way to |
1108 | * request eyelash form of Ra, so we wouldbn't inhibit it |
1109 | * in that sequence. |
1110 | * |
1111 | * Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915 |
1112 | */ |
1113 | for (unsigned int i = start; i + 1 < base; i++) |
1114 | if (info[i ].indic_category() == OT_Ra && |
1115 | info[i+1].indic_category() == OT_H && |
1116 | (i + 2 == base || |
1117 | info[i+2].indic_category() != OT_ZWJ)) |
1118 | { |
1119 | info[i ].mask |= indic_plan->mask_array[BLWF]; |
1120 | info[i+1].mask |= indic_plan->mask_array[BLWF]; |
1121 | } |
1122 | } |
1123 | |
1124 | unsigned int pref_len = 2; |
1125 | if (indic_plan->mask_array[PREF] && base + pref_len < end) |
1126 | { |
1127 | /* Find a Halant,Ra sequence and mark it for pre-base-reordering processing. */ |
1128 | for (unsigned int i = base + 1; i + pref_len - 1 < end; i++) { |
1129 | hb_codepoint_t glyphs[2]; |
1130 | for (unsigned int j = 0; j < pref_len; j++) |
1131 | glyphs[j] = info[i + j].codepoint; |
1132 | if (indic_plan->pref.would_substitute (glyphs, glyphs_count: pref_len, face)) |
1133 | { |
1134 | for (unsigned int j = 0; j < pref_len; j++) |
1135 | info[i++].mask |= indic_plan->mask_array[PREF]; |
1136 | |
1137 | /* Mark the subsequent stuff with 'cfar'. Used in Khmer. |
1138 | * Read the feature spec. |
1139 | * This allows distinguishing the following cases with MS Khmer fonts: |
1140 | * U+1784,U+17D2,U+179A,U+17D2,U+1782 |
1141 | * U+1784,U+17D2,U+1782,U+17D2,U+179A |
1142 | */ |
1143 | if (indic_plan->mask_array[CFAR]) |
1144 | for (; i < end; i++) |
1145 | info[i].mask |= indic_plan->mask_array[CFAR]; |
1146 | |
1147 | break; |
1148 | } |
1149 | } |
1150 | } |
1151 | |
1152 | /* Apply ZWJ/ZWNJ effects */ |
1153 | for (unsigned int i = start + 1; i < end; i++) |
1154 | if (is_joiner (info: info[i])) { |
1155 | bool non_joiner = info[i].indic_category() == OT_ZWNJ; |
1156 | unsigned int j = i; |
1157 | |
1158 | do { |
1159 | j--; |
1160 | |
1161 | /* ZWJ/ZWNJ should disable CJCT. They do that by simply |
1162 | * being there, since we don't skip them for the CJCT |
1163 | * feature (ie. F_MANUAL_ZWJ) */ |
1164 | |
1165 | /* A ZWNJ disables HALF. */ |
1166 | if (non_joiner) |
1167 | info[j].mask &= ~indic_plan->mask_array[HALF]; |
1168 | |
1169 | } while (j > start && !is_consonant (info: info[j])); |
1170 | } |
1171 | } |
1172 | |
1173 | static void |
1174 | initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan, |
1175 | hb_face_t *face, |
1176 | hb_buffer_t *buffer, |
1177 | unsigned int start, unsigned int end) |
1178 | { |
1179 | /* We treat placeholder/dotted-circle as if they are consonants, so we |
1180 | * should just chain. Only if not in compatibility mode that is... */ |
1181 | |
1182 | if (hb_options ().uniscribe_bug_compatible) |
1183 | { |
1184 | /* For dotted-circle, this is what Uniscribe does: |
1185 | * If dotted-circle is the last glyph, it just does nothing. |
1186 | * Ie. It doesn't form Reph. */ |
1187 | if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE) |
1188 | return; |
1189 | } |
1190 | |
1191 | initial_reordering_consonant_syllable (plan, face, buffer, start, end); |
1192 | } |
1193 | |
1194 | static void |
1195 | initial_reordering_syllable (const hb_ot_shape_plan_t *plan, |
1196 | hb_face_t *face, |
1197 | hb_buffer_t *buffer, |
1198 | unsigned int start, unsigned int end) |
1199 | { |
1200 | syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F); |
1201 | switch (syllable_type) |
1202 | { |
1203 | case vowel_syllable: /* We made the vowels look like consonants. So let's call the consonant logic! */ |
1204 | case consonant_syllable: |
1205 | initial_reordering_consonant_syllable (plan, face, buffer, start, end); |
1206 | break; |
1207 | |
1208 | case broken_cluster: /* We already inserted dotted-circles, so just call the standalone_cluster. */ |
1209 | case standalone_cluster: |
1210 | initial_reordering_standalone_cluster (plan, face, buffer, start, end); |
1211 | break; |
1212 | |
1213 | case symbol_cluster: |
1214 | case non_indic_cluster: |
1215 | break; |
1216 | } |
1217 | } |
1218 | |
1219 | static inline void |
1220 | insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED, |
1221 | hb_font_t *font, |
1222 | hb_buffer_t *buffer) |
1223 | { |
1224 | /* Note: This loop is extra overhead, but should not be measurable. */ |
1225 | bool has_broken_syllables = false; |
1226 | unsigned int count = buffer->len; |
1227 | hb_glyph_info_t *info = buffer->info; |
1228 | for (unsigned int i = 0; i < count; i++) |
1229 | if ((info[i].syllable() & 0x0F) == broken_cluster) |
1230 | { |
1231 | has_broken_syllables = true; |
1232 | break; |
1233 | } |
1234 | if (likely (!has_broken_syllables)) |
1235 | return; |
1236 | |
1237 | |
1238 | hb_codepoint_t dottedcircle_glyph; |
1239 | if (!font->get_nominal_glyph (unicode: 0x25CCu, glyph: &dottedcircle_glyph)) |
1240 | return; |
1241 | |
1242 | hb_glyph_info_t dottedcircle = {.codepoint: 0}; |
1243 | dottedcircle.codepoint = 0x25CCu; |
1244 | set_indic_properties (dottedcircle); |
1245 | dottedcircle.codepoint = dottedcircle_glyph; |
1246 | |
1247 | buffer->clear_output (); |
1248 | |
1249 | buffer->idx = 0; |
1250 | unsigned int last_syllable = 0; |
1251 | while (buffer->idx < buffer->len && !buffer->in_error) |
1252 | { |
1253 | unsigned int syllable = buffer->cur().syllable(); |
1254 | syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F); |
1255 | if (unlikely (last_syllable != syllable && syllable_type == broken_cluster)) |
1256 | { |
1257 | last_syllable = syllable; |
1258 | |
1259 | hb_glyph_info_t ginfo = dottedcircle; |
1260 | ginfo.cluster = buffer->cur().cluster; |
1261 | ginfo.mask = buffer->cur().mask; |
1262 | ginfo.syllable() = buffer->cur().syllable(); |
1263 | /* TODO Set glyph_props? */ |
1264 | |
1265 | /* Insert dottedcircle after possible Repha. */ |
1266 | while (buffer->idx < buffer->len && !buffer->in_error && |
1267 | last_syllable == buffer->cur().syllable() && |
1268 | buffer->cur().indic_category() == OT_Repha) |
1269 | buffer->next_glyph (); |
1270 | |
1271 | buffer->output_info (glyph_info: ginfo); |
1272 | } |
1273 | else |
1274 | buffer->next_glyph (); |
1275 | } |
1276 | |
1277 | buffer->swap_buffers (); |
1278 | } |
1279 | |
1280 | static void |
1281 | initial_reordering (const hb_ot_shape_plan_t *plan, |
1282 | hb_font_t *font, |
1283 | hb_buffer_t *buffer) |
1284 | { |
1285 | update_consonant_positions (plan, font, buffer); |
1286 | insert_dotted_circles (plan, font, buffer); |
1287 | |
1288 | foreach_syllable (buffer, start, end) |
1289 | initial_reordering_syllable (plan, face: font->face, buffer, start, end); |
1290 | } |
1291 | |
1292 | static void |
1293 | final_reordering_syllable (const hb_ot_shape_plan_t *plan, |
1294 | hb_buffer_t *buffer, |
1295 | unsigned int start, unsigned int end) |
1296 | { |
1297 | const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; |
1298 | hb_glyph_info_t *info = buffer->info; |
1299 | |
1300 | |
1301 | /* This function relies heavily on halant glyphs. Lots of ligation |
1302 | * and possibly multiple substitutions happened prior to this |
1303 | * phase, and that might have messed up our properties. Recover |
1304 | * from a particular case of that where we're fairly sure that a |
1305 | * class of OT_H is desired but has been lost. */ |
1306 | if (indic_plan->virama_glyph) |
1307 | { |
1308 | unsigned int virama_glyph = indic_plan->virama_glyph; |
1309 | for (unsigned int i = start; i < end; i++) |
1310 | if (info[i].codepoint == virama_glyph && |
1311 | _hb_glyph_info_ligated (info: &info[i]) && |
1312 | _hb_glyph_info_multiplied (info: &info[i])) |
1313 | { |
1314 | /* This will make sure that this glyph passes is_halant_or_coeng() test. */ |
1315 | info[i].indic_category() = OT_H; |
1316 | _hb_glyph_info_clear_ligated_and_multiplied (info: &info[i]); |
1317 | } |
1318 | } |
1319 | |
1320 | |
1321 | /* 4. Final reordering: |
1322 | * |
1323 | * After the localized forms and basic shaping forms GSUB features have been |
1324 | * applied (see below), the shaping engine performs some final glyph |
1325 | * reordering before applying all the remaining font features to the entire |
1326 | * syllable. |
1327 | */ |
1328 | |
1329 | bool try_pref = !!indic_plan->mask_array[PREF]; |
1330 | |
1331 | /* Find base again */ |
1332 | unsigned int base; |
1333 | for (base = start; base < end; base++) |
1334 | if (info[base].indic_position() >= POS_BASE_C) |
1335 | { |
1336 | if (try_pref && base + 1 < end) |
1337 | { |
1338 | for (unsigned int i = base + 1; i < end; i++) |
1339 | if ((info[i].mask & indic_plan->mask_array[PREF]) != 0) |
1340 | { |
1341 | if (!(_hb_glyph_info_substituted (info: &info[i]) && |
1342 | _hb_glyph_info_ligated_and_didnt_multiply (info: &info[i]))) |
1343 | { |
1344 | /* Ok, this was a 'pref' candidate but didn't form any. |
1345 | * Base is around here... */ |
1346 | base = i; |
1347 | while (base < end && is_halant_or_coeng (info: info[base])) |
1348 | base++; |
1349 | info[base].indic_position() = POS_BASE_C; |
1350 | |
1351 | try_pref = false; |
1352 | } |
1353 | break; |
1354 | } |
1355 | } |
1356 | /* For Malayalam, skip over unformed below- (but NOT post-) forms. */ |
1357 | if (buffer->props.script == HB_SCRIPT_MALAYALAM) |
1358 | { |
1359 | for (unsigned int i = base + 1; i < end; i++) |
1360 | { |
1361 | while (i < end && is_joiner (info: info[i])) |
1362 | i++; |
1363 | if (i == end || !is_halant_or_coeng (info: info[i])) |
1364 | break; |
1365 | i++; /* Skip halant. */ |
1366 | while (i < end && is_joiner (info: info[i])) |
1367 | i++; |
1368 | if (i < end && is_consonant (info: info[i]) && info[i].indic_position() == POS_BELOW_C) |
1369 | { |
1370 | base = i; |
1371 | info[base].indic_position() = POS_BASE_C; |
1372 | } |
1373 | } |
1374 | } |
1375 | |
1376 | if (start < base && info[base].indic_position() > POS_BASE_C) |
1377 | base--; |
1378 | break; |
1379 | } |
1380 | if (base == end && start < base && |
1381 | is_one_of (info: info[base - 1], FLAG (OT_ZWJ))) |
1382 | base--; |
1383 | if (base < end) |
1384 | while (start < base && |
1385 | is_one_of (info: info[base], flags: (FLAG (OT_N) | HALANT_OR_COENG_FLAGS))) |
1386 | base--; |
1387 | |
1388 | |
1389 | /* o Reorder matras: |
1390 | * |
1391 | * If a pre-base matra character had been reordered before applying basic |
1392 | * features, the glyph can be moved closer to the main consonant based on |
1393 | * whether half-forms had been formed. Actual position for the matra is |
1394 | * defined as “after last standalone halant glyph, after initial matra |
1395 | * position and before the main consonant”. If ZWJ or ZWNJ follow this |
1396 | * halant, position is moved after it. |
1397 | */ |
1398 | |
1399 | if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */ |
1400 | { |
1401 | /* If we lost track of base, alas, position before last thingy. */ |
1402 | unsigned int new_pos = base == end ? base - 2 : base - 1; |
1403 | |
1404 | /* Malayalam / Tamil do not have "half" forms or explicit virama forms. |
1405 | * The glyphs formed by 'half' are Chillus or ligated explicit viramas. |
1406 | * We want to position matra after them. |
1407 | */ |
1408 | if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL) |
1409 | { |
1410 | while (new_pos > start && |
1411 | !(is_one_of (info: info[new_pos], flags: (FLAG (OT_M) | HALANT_OR_COENG_FLAGS)))) |
1412 | new_pos--; |
1413 | |
1414 | /* If we found no Halant we are done. |
1415 | * Otherwise only proceed if the Halant does |
1416 | * not belong to the Matra itself! */ |
1417 | if (is_halant_or_coeng (info: info[new_pos]) && |
1418 | info[new_pos].indic_position() != POS_PRE_M) |
1419 | { |
1420 | /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ |
1421 | if (new_pos + 1 < end && is_joiner (info: info[new_pos + 1])) |
1422 | new_pos++; |
1423 | } |
1424 | else |
1425 | new_pos = start; /* No move. */ |
1426 | } |
1427 | |
1428 | if (start < new_pos && info[new_pos].indic_position () != POS_PRE_M) |
1429 | { |
1430 | /* Now go see if there's actually any matras... */ |
1431 | for (unsigned int i = new_pos; i > start; i--) |
1432 | if (info[i - 1].indic_position () == POS_PRE_M) |
1433 | { |
1434 | unsigned int old_pos = i - 1; |
1435 | if (old_pos < base && base <= new_pos) /* Shouldn't actually happen. */ |
1436 | base--; |
1437 | |
1438 | hb_glyph_info_t tmp = info[old_pos]; |
1439 | memmove (dest: &info[old_pos], src: &info[old_pos + 1], n: (new_pos - old_pos) * sizeof (info[0])); |
1440 | info[new_pos] = tmp; |
1441 | |
1442 | /* Note: this merge_clusters() is intentionally *after* the reordering. |
1443 | * Indic matra reordering is special and tricky... */ |
1444 | buffer->merge_clusters (start: new_pos, end: MIN (a: end, b: base + 1)); |
1445 | |
1446 | new_pos--; |
1447 | } |
1448 | } else { |
1449 | for (unsigned int i = start; i < base; i++) |
1450 | if (info[i].indic_position () == POS_PRE_M) { |
1451 | buffer->merge_clusters (start: i, end: MIN (a: end, b: base + 1)); |
1452 | break; |
1453 | } |
1454 | } |
1455 | } |
1456 | |
1457 | |
1458 | /* o Reorder reph: |
1459 | * |
1460 | * Reph’s original position is always at the beginning of the syllable, |
1461 | * (i.e. it is not reordered at the character reordering stage). However, |
1462 | * it will be reordered according to the basic-forms shaping results. |
1463 | * Possible positions for reph, depending on the script, are; after main, |
1464 | * before post-base consonant forms, and after post-base consonant forms. |
1465 | */ |
1466 | |
1467 | /* Two cases: |
1468 | * |
1469 | * - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then |
1470 | * we should only move it if the sequence ligated to the repha form. |
1471 | * |
1472 | * - If repha is encoded separately and in the logical position, we should only |
1473 | * move it if it did NOT ligate. If it ligated, it's probably the font trying |
1474 | * to make it work without the reordering. |
1475 | */ |
1476 | if (start + 1 < end && |
1477 | info[start].indic_position() == POS_RA_TO_BECOME_REPH && |
1478 | ((info[start].indic_category() == OT_Repha) ^ |
1479 | _hb_glyph_info_ligated_and_didnt_multiply (info: &info[start]))) |
1480 | { |
1481 | unsigned int new_reph_pos; |
1482 | reph_position_t reph_pos = indic_plan->config->reph_pos; |
1483 | |
1484 | assert (reph_pos != REPH_POS_DONT_CARE); |
1485 | |
1486 | /* 1. If reph should be positioned after post-base consonant forms, |
1487 | * proceed to step 5. |
1488 | */ |
1489 | if (reph_pos == REPH_POS_AFTER_POST) |
1490 | { |
1491 | goto reph_step_5; |
1492 | } |
1493 | |
1494 | /* 2. If the reph repositioning class is not after post-base: target |
1495 | * position is after the first explicit halant glyph between the |
1496 | * first post-reph consonant and last main consonant. If ZWJ or ZWNJ |
1497 | * are following this halant, position is moved after it. If such |
1498 | * position is found, this is the target position. Otherwise, |
1499 | * proceed to the next step. |
1500 | * |
1501 | * Note: in old-implementation fonts, where classifications were |
1502 | * fixed in shaping engine, there was no case where reph position |
1503 | * will be found on this step. |
1504 | */ |
1505 | { |
1506 | new_reph_pos = start + 1; |
1507 | while (new_reph_pos < base && !is_halant_or_coeng (info: info[new_reph_pos])) |
1508 | new_reph_pos++; |
1509 | |
1510 | if (new_reph_pos < base && is_halant_or_coeng (info: info[new_reph_pos])) |
1511 | { |
1512 | /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ |
1513 | if (new_reph_pos + 1 < base && is_joiner (info: info[new_reph_pos + 1])) |
1514 | new_reph_pos++; |
1515 | goto reph_move; |
1516 | } |
1517 | } |
1518 | |
1519 | /* 3. If reph should be repositioned after the main consonant: find the |
1520 | * first consonant not ligated with main, or find the first |
1521 | * consonant that is not a potential pre-base-reordering Ra. |
1522 | */ |
1523 | if (reph_pos == REPH_POS_AFTER_MAIN) |
1524 | { |
1525 | new_reph_pos = base; |
1526 | while (new_reph_pos + 1 < end && info[new_reph_pos + 1].indic_position() <= POS_AFTER_MAIN) |
1527 | new_reph_pos++; |
1528 | if (new_reph_pos < end) |
1529 | goto reph_move; |
1530 | } |
1531 | |
1532 | /* 4. If reph should be positioned before post-base consonant, find |
1533 | * first post-base classified consonant not ligated with main. If no |
1534 | * consonant is found, the target position should be before the |
1535 | * first matra, syllable modifier sign or vedic sign. |
1536 | */ |
1537 | /* This is our take on what step 4 is trying to say (and failing, BADLY). */ |
1538 | if (reph_pos == REPH_POS_AFTER_SUB) |
1539 | { |
1540 | new_reph_pos = base; |
1541 | while (new_reph_pos + 1 < end && |
1542 | !( FLAG_UNSAFE (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD)))) |
1543 | new_reph_pos++; |
1544 | if (new_reph_pos < end) |
1545 | goto reph_move; |
1546 | } |
1547 | |
1548 | /* 5. If no consonant is found in steps 3 or 4, move reph to a position |
1549 | * immediately before the first post-base matra, syllable modifier |
1550 | * sign or vedic sign that has a reordering class after the intended |
1551 | * reph position. For example, if the reordering position for reph |
1552 | * is post-main, it will skip above-base matras that also have a |
1553 | * post-main position. |
1554 | */ |
1555 | reph_step_5: |
1556 | { |
1557 | /* Copied from step 2. */ |
1558 | new_reph_pos = start + 1; |
1559 | while (new_reph_pos < base && !is_halant_or_coeng (info: info[new_reph_pos])) |
1560 | new_reph_pos++; |
1561 | |
1562 | if (new_reph_pos < base && is_halant_or_coeng (info: info[new_reph_pos])) |
1563 | { |
1564 | /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ |
1565 | if (new_reph_pos + 1 < base && is_joiner (info: info[new_reph_pos + 1])) |
1566 | new_reph_pos++; |
1567 | goto reph_move; |
1568 | } |
1569 | } |
1570 | |
1571 | /* 6. Otherwise, reorder reph to the end of the syllable. |
1572 | */ |
1573 | { |
1574 | new_reph_pos = end - 1; |
1575 | while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_SMVD) |
1576 | new_reph_pos--; |
1577 | |
1578 | /* |
1579 | * If the Reph is to be ending up after a Matra,Halant sequence, |
1580 | * position it before that Halant so it can interact with the Matra. |
1581 | * However, if it's a plain Consonant,Halant we shouldn't do that. |
1582 | * Uniscribe doesn't do this. |
1583 | * TEST: U+0930,U+094D,U+0915,U+094B,U+094D |
1584 | */ |
1585 | if (!hb_options ().uniscribe_bug_compatible && |
1586 | unlikely (is_halant_or_coeng (info[new_reph_pos]))) { |
1587 | for (unsigned int i = base + 1; i < new_reph_pos; i++) |
1588 | if (info[i].indic_category() == OT_M) { |
1589 | /* Ok, got it. */ |
1590 | new_reph_pos--; |
1591 | } |
1592 | } |
1593 | goto reph_move; |
1594 | } |
1595 | |
1596 | reph_move: |
1597 | { |
1598 | /* Move */ |
1599 | buffer->merge_clusters (start, end: new_reph_pos + 1); |
1600 | hb_glyph_info_t reph = info[start]; |
1601 | memmove (dest: &info[start], src: &info[start + 1], n: (new_reph_pos - start) * sizeof (info[0])); |
1602 | info[new_reph_pos] = reph; |
1603 | |
1604 | if (start < base && base <= new_reph_pos) |
1605 | base--; |
1606 | } |
1607 | } |
1608 | |
1609 | |
1610 | /* o Reorder pre-base-reordering consonants: |
1611 | * |
1612 | * If a pre-base-reordering consonant is found, reorder it according to |
1613 | * the following rules: |
1614 | */ |
1615 | |
1616 | if (try_pref && base + 1 < end) /* Otherwise there can't be any pre-base-reordering Ra. */ |
1617 | { |
1618 | for (unsigned int i = base + 1; i < end; i++) |
1619 | if ((info[i].mask & indic_plan->mask_array[PREF]) != 0) |
1620 | { |
1621 | /* 1. Only reorder a glyph produced by substitution during application |
1622 | * of the <pref> feature. (Note that a font may shape a Ra consonant with |
1623 | * the feature generally but block it in certain contexts.) |
1624 | */ |
1625 | /* Note: We just check that something got substituted. We don't check that |
1626 | * the <pref> feature actually did it... |
1627 | * |
1628 | * Reorder pref only if it ligated. */ |
1629 | if (_hb_glyph_info_ligated_and_didnt_multiply (info: &info[i])) |
1630 | { |
1631 | /* |
1632 | * 2. Try to find a target position the same way as for pre-base matra. |
1633 | * If it is found, reorder pre-base consonant glyph. |
1634 | * |
1635 | * 3. If position is not found, reorder immediately before main |
1636 | * consonant. |
1637 | */ |
1638 | |
1639 | unsigned int new_pos = base; |
1640 | /* Malayalam / Tamil do not have "half" forms or explicit virama forms. |
1641 | * The glyphs formed by 'half' are Chillus or ligated explicit viramas. |
1642 | * We want to position matra after them. |
1643 | */ |
1644 | if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL) |
1645 | { |
1646 | while (new_pos > start && |
1647 | !(is_one_of (info: info[new_pos - 1], FLAG(OT_M) | HALANT_OR_COENG_FLAGS))) |
1648 | new_pos--; |
1649 | |
1650 | /* In Khmer coeng model, a H,Ra can go *after* matras. If it goes after a |
1651 | * split matra, it should be reordered to *before* the left part of such matra. */ |
1652 | if (new_pos > start && info[new_pos - 1].indic_category() == OT_M) |
1653 | { |
1654 | unsigned int old_pos = i; |
1655 | for (unsigned int j = base + 1; j < old_pos; j++) |
1656 | if (info[j].indic_category() == OT_M) |
1657 | { |
1658 | new_pos--; |
1659 | break; |
1660 | } |
1661 | } |
1662 | } |
1663 | |
1664 | if (new_pos > start && is_halant_or_coeng (info: info[new_pos - 1])) |
1665 | { |
1666 | /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ |
1667 | if (new_pos < end && is_joiner (info: info[new_pos])) |
1668 | new_pos++; |
1669 | } |
1670 | |
1671 | { |
1672 | unsigned int old_pos = i; |
1673 | |
1674 | buffer->merge_clusters (start: new_pos, end: old_pos + 1); |
1675 | hb_glyph_info_t tmp = info[old_pos]; |
1676 | memmove (dest: &info[new_pos + 1], src: &info[new_pos], n: (old_pos - new_pos) * sizeof (info[0])); |
1677 | info[new_pos] = tmp; |
1678 | |
1679 | if (new_pos <= base && base < old_pos) |
1680 | base++; |
1681 | } |
1682 | } |
1683 | |
1684 | break; |
1685 | } |
1686 | } |
1687 | |
1688 | |
1689 | /* Apply 'init' to the Left Matra if it's a word start. */ |
1690 | if (info[start].indic_position () == POS_PRE_M) |
1691 | { |
1692 | if (!start || |
1693 | !(FLAG_UNSAFE (_hb_glyph_info_get_general_category (&info[start - 1])) & |
1694 | FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))) |
1695 | info[start].mask |= indic_plan->mask_array[INIT]; |
1696 | else |
1697 | buffer->unsafe_to_break (start: start - 1, end: start + 1); |
1698 | } |
1699 | |
1700 | |
1701 | /* |
1702 | * Finish off the clusters and go home! |
1703 | */ |
1704 | if (hb_options ().uniscribe_bug_compatible) |
1705 | { |
1706 | switch ((hb_tag_t) plan->props.script) |
1707 | { |
1708 | case HB_SCRIPT_TAMIL: |
1709 | case HB_SCRIPT_SINHALA: |
1710 | break; |
1711 | |
1712 | default: |
1713 | /* Uniscribe merges the entire syllable into a single cluster... Except for Tamil & Sinhala. |
1714 | * This means, half forms are submerged into the main consonant's cluster. |
1715 | * This is unnecessary, and makes cursor positioning harder, but that's what |
1716 | * Uniscribe does. */ |
1717 | buffer->merge_clusters (start, end); |
1718 | break; |
1719 | } |
1720 | } |
1721 | } |
1722 | |
1723 | |
1724 | static void |
1725 | final_reordering (const hb_ot_shape_plan_t *plan, |
1726 | hb_font_t *font HB_UNUSED, |
1727 | hb_buffer_t *buffer) |
1728 | { |
1729 | unsigned int count = buffer->len; |
1730 | if (unlikely (!count)) return; |
1731 | |
1732 | foreach_syllable (buffer, start, end) |
1733 | final_reordering_syllable (plan, buffer, start, end); |
1734 | |
1735 | HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category); |
1736 | HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position); |
1737 | } |
1738 | |
1739 | |
1740 | static void |
1741 | clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED, |
1742 | hb_font_t *font HB_UNUSED, |
1743 | hb_buffer_t *buffer) |
1744 | { |
1745 | hb_glyph_info_t *info = buffer->info; |
1746 | unsigned int count = buffer->len; |
1747 | for (unsigned int i = 0; i < count; i++) |
1748 | info[i].syllable() = 0; |
1749 | } |
1750 | |
1751 | |
1752 | static bool |
1753 | decompose_indic (const hb_ot_shape_normalize_context_t *c, |
1754 | hb_codepoint_t ab, |
1755 | hb_codepoint_t *a, |
1756 | hb_codepoint_t *b) |
1757 | { |
1758 | switch (ab) |
1759 | { |
1760 | /* Don't decompose these. */ |
1761 | case 0x0931u : return false; /* DEVANAGARI LETTER RRA */ |
1762 | case 0x0B94u : return false; /* TAMIL LETTER AU */ |
1763 | |
1764 | |
1765 | /* |
1766 | * Decompose split matras that don't have Unicode decompositions. |
1767 | */ |
1768 | |
1769 | /* Khmer */ |
1770 | case 0x17BEu : *a = 0x17C1u; *b= 0x17BEu; return true; |
1771 | case 0x17BFu : *a = 0x17C1u; *b= 0x17BFu; return true; |
1772 | case 0x17C0u : *a = 0x17C1u; *b= 0x17C0u; return true; |
1773 | case 0x17C4u : *a = 0x17C1u; *b= 0x17C4u; return true; |
1774 | case 0x17C5u : *a = 0x17C1u; *b= 0x17C5u; return true; |
1775 | |
1776 | #if 0 |
1777 | /* Gujarati */ |
1778 | /* This one has no decomposition in Unicode, but needs no decomposition either. */ |
1779 | /* case 0x0AC9u : return false; */ |
1780 | |
1781 | /* Oriya */ |
1782 | case 0x0B57u : *a = no decomp, -> RIGHT; return true; |
1783 | #endif |
1784 | } |
1785 | |
1786 | if ((ab == 0x0DDAu || hb_in_range<hb_codepoint_t> (u: ab, lo: 0x0DDCu, hi: 0x0DDEu))) |
1787 | { |
1788 | /* |
1789 | * Sinhala split matras... Let the fun begin. |
1790 | * |
1791 | * These four characters have Unicode decompositions. However, Uniscribe |
1792 | * decomposes them "Khmer-style", that is, it uses the character itself to |
1793 | * get the second half. The first half of all four decompositions is always |
1794 | * U+0DD9. |
1795 | * |
1796 | * Now, there are buggy fonts, namely, the widely used lklug.ttf, that are |
1797 | * broken with Uniscribe. But we need to support them. As such, we only |
1798 | * do the Uniscribe-style decomposition if the character is transformed into |
1799 | * its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to |
1800 | * Unicode decomposition. |
1801 | * |
1802 | * Note that we can't unconditionally use Unicode decomposition. That would |
1803 | * break some other fonts, that are designed to work with Uniscribe, and |
1804 | * don't have positioning features for the Unicode-style decomposition. |
1805 | * |
1806 | * Argh... |
1807 | * |
1808 | * The Uniscribe behavior is now documented in the newly published Sinhala |
1809 | * spec in 2012: |
1810 | * |
1811 | * http://www.microsoft.com/typography/OpenTypeDev/sinhala/intro.htm#shaping |
1812 | */ |
1813 | |
1814 | const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) c->plan->data; |
1815 | |
1816 | hb_codepoint_t glyph; |
1817 | |
1818 | if (hb_options ().uniscribe_bug_compatible || |
1819 | (c->font->get_nominal_glyph (unicode: ab, glyph: &glyph) && |
1820 | indic_plan->pstf.would_substitute (glyphs: &glyph, glyphs_count: 1, face: c->font->face))) |
1821 | { |
1822 | /* Ok, safe to use Uniscribe-style decomposition. */ |
1823 | *a = 0x0DD9u; |
1824 | *b = ab; |
1825 | return true; |
1826 | } |
1827 | } |
1828 | |
1829 | return (bool) c->unicode->decompose (ab, a, b); |
1830 | } |
1831 | |
1832 | static bool |
1833 | compose_indic (const hb_ot_shape_normalize_context_t *c, |
1834 | hb_codepoint_t a, |
1835 | hb_codepoint_t b, |
1836 | hb_codepoint_t *ab) |
1837 | { |
1838 | /* Avoid recomposing split matras. */ |
1839 | if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) |
1840 | return false; |
1841 | |
1842 | /* Composition-exclusion exceptions that we want to recompose. */ |
1843 | if (a == 0x09AFu && b == 0x09BCu) { *ab = 0x09DFu; return true; } |
1844 | |
1845 | return (bool) c->unicode->compose (a, b, ab); |
1846 | } |
1847 | |
1848 | |
1849 | const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic = |
1850 | { |
1851 | .collect_features: collect_features_indic, |
1852 | .override_features: override_features_indic, |
1853 | .data_create: data_create_indic, |
1854 | .data_destroy: data_destroy_indic, |
1855 | .preprocess_text: nullptr, /* preprocess_text */ |
1856 | .postprocess_glyphs: nullptr, /* postprocess_glyphs */ |
1857 | .normalization_preference: HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, |
1858 | .decompose: decompose_indic, |
1859 | .compose: compose_indic, |
1860 | .setup_masks: setup_masks_indic, |
1861 | .disable_otl: nullptr, /* disable_otl */ |
1862 | .reorder_marks: nullptr, /* reorder_marks */ |
1863 | .zero_width_marks: HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, |
1864 | .fallback_position: false, /* fallback_position */ |
1865 | }; |
1866 | |