1/*
2 * Copyright © 2014 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_OT_CMAP_TABLE_HH
28#define HB_OT_CMAP_TABLE_HH
29
30#include "hb-open-type-private.hh"
31
32
33namespace OT {
34
35
36/*
37 * cmap -- Character To Glyph Index Mapping Table
38 */
39
40#define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
41
42
43struct CmapSubtableFormat0
44{
45 inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
46 {
47 hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
48 if (!gid)
49 return false;
50 *glyph = gid;
51 return true;
52 }
53
54 inline bool sanitize (hb_sanitize_context_t *c) const
55 {
56 TRACE_SANITIZE (this);
57 return_trace (c->check_struct (this));
58 }
59
60 protected:
61 UINT16 format; /* Format number is set to 0. */
62 UINT16 lengthZ; /* Byte length of this subtable. */
63 UINT16 languageZ; /* Ignore. */
64 UINT8 glyphIdArray[256];/* An array that maps character
65 * code to glyph index values. */
66 public:
67 DEFINE_SIZE_STATIC (6 + 256);
68};
69
70struct CmapSubtableFormat4
71{
72 struct accelerator_t
73 {
74 inline void init (const CmapSubtableFormat4 *subtable)
75 {
76 segCount = subtable->segCountX2 / 2;
77 endCount = subtable->values;
78 startCount = endCount + segCount + 1;
79 idDelta = startCount + segCount;
80 idRangeOffset = idDelta + segCount;
81 glyphIdArray = idRangeOffset + segCount;
82 glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
83 }
84
85 static inline bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
86 {
87 const accelerator_t *thiz = (const accelerator_t *) obj;
88
89 /* Custom two-array bsearch. */
90 int min = 0, max = (int) thiz->segCount - 1;
91 const UINT16 *startCount = thiz->startCount;
92 const UINT16 *endCount = thiz->endCount;
93 unsigned int i;
94 while (min <= max)
95 {
96 int mid = (min + max) / 2;
97 if (codepoint < startCount[mid])
98 max = mid - 1;
99 else if (codepoint > endCount[mid])
100 min = mid + 1;
101 else
102 {
103 i = mid;
104 goto found;
105 }
106 }
107 return false;
108
109 found:
110 hb_codepoint_t gid;
111 unsigned int rangeOffset = thiz->idRangeOffset[i];
112 if (rangeOffset == 0)
113 gid = codepoint + thiz->idDelta[i];
114 else
115 {
116 /* Somebody has been smoking... */
117 unsigned int index = rangeOffset / 2 + (codepoint - thiz->startCount[i]) + i - thiz->segCount;
118 if (unlikely (index >= thiz->glyphIdArrayLength))
119 return false;
120 gid = thiz->glyphIdArray[index];
121 if (unlikely (!gid))
122 return false;
123 gid += thiz->idDelta[i];
124 }
125
126 *glyph = gid & 0xFFFFu;
127 return true;
128 }
129
130 const UINT16 *endCount;
131 const UINT16 *startCount;
132 const UINT16 *idDelta;
133 const UINT16 *idRangeOffset;
134 const UINT16 *glyphIdArray;
135 unsigned int segCount;
136 unsigned int glyphIdArrayLength;
137 };
138
139 inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
140 {
141 accelerator_t accel;
142 accel.init (subtable: this);
143 return accel.get_glyph_func (obj: &accel, codepoint, glyph);
144 }
145
146 inline bool sanitize (hb_sanitize_context_t *c) const
147 {
148 TRACE_SANITIZE (this);
149 if (unlikely (!c->check_struct (this)))
150 return_trace (false);
151
152 if (unlikely (!c->check_range (this, length)))
153 {
154 /* Some broken fonts have too long of a "length" value.
155 * If that is the case, just change the value to truncate
156 * the subtable at the end of the blob. */
157 uint16_t new_length = (uint16_t) MIN (a: (uintptr_t) 65535,
158 b: (uintptr_t) (c->end -
159 (char *) this));
160 if (!c->try_set (obj: &length, v: new_length))
161 return_trace (false);
162 }
163
164 return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
165 }
166
167 protected:
168 UINT16 format; /* Format number is set to 4. */
169 UINT16 length; /* This is the length in bytes of the
170 * subtable. */
171 UINT16 languageZ; /* Ignore. */
172 UINT16 segCountX2; /* 2 x segCount. */
173 UINT16 searchRangeZ; /* 2 * (2**floor(log2(segCount))) */
174 UINT16 entrySelectorZ; /* log2(searchRange/2) */
175 UINT16 rangeShiftZ; /* 2 x segCount - searchRange */
176
177 UINT16 values[VAR];
178#if 0
179 UINT16 endCount[segCount]; /* End characterCode for each segment,
180 * last=0xFFFFu. */
181 UINT16 reservedPad; /* Set to 0. */
182 UINT16 startCount[segCount]; /* Start character code for each segment. */
183 INT16 idDelta[segCount]; /* Delta for all character codes in segment. */
184 UINT16 idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
185 UINT16 glyphIdArray[VAR]; /* Glyph index array (arbitrary length) */
186#endif
187
188 public:
189 DEFINE_SIZE_ARRAY (14, values);
190};
191
192struct CmapSubtableLongGroup
193{
194 friend struct CmapSubtableFormat12;
195 friend struct CmapSubtableFormat13;
196
197 int cmp (hb_codepoint_t codepoint) const
198 {
199 if (codepoint < startCharCode) return -1;
200 if (codepoint > endCharCode) return +1;
201 return 0;
202 }
203
204 inline bool sanitize (hb_sanitize_context_t *c) const
205 {
206 TRACE_SANITIZE (this);
207 return_trace (c->check_struct (this));
208 }
209
210 private:
211 UINT32 startCharCode; /* First character code in this group. */
212 UINT32 endCharCode; /* Last character code in this group. */
213 UINT32 glyphID; /* Glyph index; interpretation depends on
214 * subtable format. */
215 public:
216 DEFINE_SIZE_STATIC (12);
217};
218
219template <typename UINT>
220struct CmapSubtableTrimmed
221{
222 inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
223 {
224 /* Rely on our implicit array bound-checking. */
225 hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
226 if (!gid)
227 return false;
228 *glyph = gid;
229 return true;
230 }
231
232 inline bool sanitize (hb_sanitize_context_t *c) const
233 {
234 TRACE_SANITIZE (this);
235 return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
236 }
237
238 protected:
239 UINT formatReserved; /* Subtable format and (maybe) padding. */
240 UINT lengthZ; /* Byte length of this subtable. */
241 UINT languageZ; /* Ignore. */
242 UINT startCharCode; /* First character code covered. */
243 ArrayOf<GlyphID, UINT>
244 glyphIdArray; /* Array of glyph index values for character
245 * codes in the range. */
246 public:
247 DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
248};
249
250struct CmapSubtableFormat6 : CmapSubtableTrimmed<UINT16> {};
251struct CmapSubtableFormat10 : CmapSubtableTrimmed<UINT32 > {};
252
253template <typename T>
254struct CmapSubtableLongSegmented
255{
256 inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
257 {
258 int i = groups.bsearch (x: codepoint);
259 if (i == -1)
260 return false;
261 *glyph = T::group_get_glyph (groups[i], codepoint);
262 return true;
263 }
264
265 inline bool sanitize (hb_sanitize_context_t *c) const
266 {
267 TRACE_SANITIZE (this);
268 return_trace (c->check_struct (this) && groups.sanitize (c));
269 }
270
271 protected:
272 UINT16 format; /* Subtable format; set to 12. */
273 UINT16 reservedZ; /* Reserved; set to 0. */
274 UINT32 lengthZ; /* Byte length of this subtable. */
275 UINT32 languageZ; /* Ignore. */
276 SortedArrayOf<CmapSubtableLongGroup, UINT32>
277 groups; /* Groupings. */
278 public:
279 DEFINE_SIZE_ARRAY (16, groups);
280};
281
282struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
283{
284 static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
285 hb_codepoint_t u)
286 { return group.glyphID + (u - group.startCharCode); }
287};
288
289struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
290{
291 static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
292 hb_codepoint_t u HB_UNUSED)
293 { return group.glyphID; }
294};
295
296typedef enum
297{
298 GLYPH_VARIANT_NOT_FOUND = 0,
299 GLYPH_VARIANT_FOUND = 1,
300 GLYPH_VARIANT_USE_DEFAULT = 2
301} glyph_variant_t;
302
303struct UnicodeValueRange
304{
305 inline int cmp (const hb_codepoint_t &codepoint) const
306 {
307 if (codepoint < startUnicodeValue) return -1;
308 if (codepoint > startUnicodeValue + additionalCount) return +1;
309 return 0;
310 }
311
312 inline bool sanitize (hb_sanitize_context_t *c) const
313 {
314 TRACE_SANITIZE (this);
315 return_trace (c->check_struct (this));
316 }
317
318 UINT24 startUnicodeValue; /* First value in this range. */
319 UINT8 additionalCount; /* Number of additional values in this
320 * range. */
321 public:
322 DEFINE_SIZE_STATIC (4);
323};
324
325typedef SortedArrayOf<UnicodeValueRange, UINT32> DefaultUVS;
326
327struct UVSMapping
328{
329 inline int cmp (const hb_codepoint_t &codepoint) const
330 {
331 return unicodeValue.cmp (a: codepoint);
332 }
333
334 inline bool sanitize (hb_sanitize_context_t *c) const
335 {
336 TRACE_SANITIZE (this);
337 return_trace (c->check_struct (this));
338 }
339
340 UINT24 unicodeValue; /* Base Unicode value of the UVS */
341 GlyphID glyphID; /* Glyph ID of the UVS */
342 public:
343 DEFINE_SIZE_STATIC (5);
344};
345
346typedef SortedArrayOf<UVSMapping, UINT32> NonDefaultUVS;
347
348struct VariationSelectorRecord
349{
350 inline glyph_variant_t get_glyph (hb_codepoint_t codepoint,
351 hb_codepoint_t *glyph,
352 const void *base) const
353 {
354 int i;
355 const DefaultUVS &defaults = base+defaultUVS;
356 i = defaults.bsearch (x: codepoint);
357 if (i != -1)
358 return GLYPH_VARIANT_USE_DEFAULT;
359 const NonDefaultUVS &nonDefaults = base+nonDefaultUVS;
360 i = nonDefaults.bsearch (x: codepoint);
361 if (i != -1)
362 {
363 *glyph = nonDefaults[i].glyphID;
364 return GLYPH_VARIANT_FOUND;
365 }
366 return GLYPH_VARIANT_NOT_FOUND;
367 }
368
369 inline int cmp (const hb_codepoint_t &variation_selector) const
370 {
371 return varSelector.cmp (a: variation_selector);
372 }
373
374 inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
375 {
376 TRACE_SANITIZE (this);
377 return_trace (c->check_struct (this) &&
378 defaultUVS.sanitize (c, base) &&
379 nonDefaultUVS.sanitize (c, base));
380 }
381
382 UINT24 varSelector; /* Variation selector. */
383 LOffsetTo<DefaultUVS>
384 defaultUVS; /* Offset to Default UVS Table. May be 0. */
385 LOffsetTo<NonDefaultUVS>
386 nonDefaultUVS; /* Offset to Non-Default UVS Table. May be 0. */
387 public:
388 DEFINE_SIZE_STATIC (11);
389};
390
391struct CmapSubtableFormat14
392{
393 inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
394 hb_codepoint_t variation_selector,
395 hb_codepoint_t *glyph) const
396 {
397 return record[record.bsearch(x: variation_selector)].get_glyph (codepoint, glyph, base: this);
398 }
399
400 inline bool sanitize (hb_sanitize_context_t *c) const
401 {
402 TRACE_SANITIZE (this);
403 return_trace (c->check_struct (this) &&
404 record.sanitize (c, this));
405 }
406
407 protected:
408 UINT16 format; /* Format number is set to 14. */
409 UINT32 lengthZ; /* Byte length of this subtable. */
410 SortedArrayOf<VariationSelectorRecord, UINT32>
411 record; /* Variation selector records; sorted
412 * in increasing order of `varSelector'. */
413 public:
414 DEFINE_SIZE_ARRAY (10, record);
415};
416
417struct CmapSubtable
418{
419 /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
420
421 inline bool get_glyph (hb_codepoint_t codepoint,
422 hb_codepoint_t *glyph) const
423 {
424 switch (u.format) {
425 case 0: return u.format0 .get_glyph(codepoint, glyph);
426 case 4: return u.format4 .get_glyph(codepoint, glyph);
427 case 6: return u.format6 .get_glyph(codepoint, glyph);
428 case 10: return u.format10.get_glyph(codepoint, glyph);
429 case 12: return u.format12.get_glyph(codepoint, glyph);
430 case 13: return u.format13.get_glyph(codepoint, glyph);
431 case 14:
432 default: return false;
433 }
434 }
435
436 inline bool sanitize (hb_sanitize_context_t *c) const
437 {
438 TRACE_SANITIZE (this);
439 if (!u.format.sanitize (c)) return_trace (false);
440 switch (u.format) {
441 case 0: return_trace (u.format0 .sanitize (c));
442 case 4: return_trace (u.format4 .sanitize (c));
443 case 6: return_trace (u.format6 .sanitize (c));
444 case 10: return_trace (u.format10.sanitize (c));
445 case 12: return_trace (u.format12.sanitize (c));
446 case 13: return_trace (u.format13.sanitize (c));
447 case 14: return_trace (u.format14.sanitize (c));
448 default:return_trace (true);
449 }
450 }
451
452 public:
453 union {
454 UINT16 format; /* Format identifier */
455 CmapSubtableFormat0 format0;
456 CmapSubtableFormat4 format4;
457 CmapSubtableFormat6 format6;
458 CmapSubtableFormat10 format10;
459 CmapSubtableFormat12 format12;
460 CmapSubtableFormat13 format13;
461 CmapSubtableFormat14 format14;
462 } u;
463 public:
464 DEFINE_SIZE_UNION (2, format);
465};
466
467
468struct EncodingRecord
469{
470 inline int cmp (const EncodingRecord &other) const
471 {
472 int ret;
473 ret = platformID.cmp (a: other.platformID);
474 if (ret) return ret;
475 ret = encodingID.cmp (a: other.encodingID);
476 if (ret) return ret;
477 return 0;
478 }
479
480 inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
481 {
482 TRACE_SANITIZE (this);
483 return_trace (c->check_struct (this) &&
484 subtable.sanitize (c, base));
485 }
486
487 UINT16 platformID; /* Platform ID. */
488 UINT16 encodingID; /* Platform-specific encoding ID. */
489 LOffsetTo<CmapSubtable>
490 subtable; /* Byte offset from beginning of table to the subtable for this encoding. */
491 public:
492 DEFINE_SIZE_STATIC (8);
493};
494
495struct cmap
496{
497 static const hb_tag_t tableTag = HB_OT_TAG_cmap;
498
499 inline bool sanitize (hb_sanitize_context_t *c) const
500 {
501 TRACE_SANITIZE (this);
502 return_trace (c->check_struct (this) &&
503 likely (version == 0) &&
504 encodingRecord.sanitize (c, this));
505 }
506
507 struct accelerator_t
508 {
509 inline void init (hb_face_t *face)
510 {
511 this->blob = OT::Sanitizer<OT::cmap>::sanitize (blob: face->reference_table (HB_OT_TAG_cmap));
512 const OT::cmap *cmap = OT::Sanitizer<OT::cmap>::lock_instance (blob: this->blob);
513 const OT::CmapSubtable *subtable = nullptr;
514 const OT::CmapSubtableFormat14 *subtable_uvs = nullptr;
515
516 bool symbol = false;
517 /* 32-bit subtables. */
518 if (!subtable) subtable = cmap->find_subtable (platform_id: 3, encoding_id: 10);
519 if (!subtable) subtable = cmap->find_subtable (platform_id: 0, encoding_id: 6);
520 if (!subtable) subtable = cmap->find_subtable (platform_id: 0, encoding_id: 4);
521 /* 16-bit subtables. */
522 if (!subtable) subtable = cmap->find_subtable (platform_id: 3, encoding_id: 1);
523 if (!subtable) subtable = cmap->find_subtable (platform_id: 0, encoding_id: 3);
524 if (!subtable) subtable = cmap->find_subtable (platform_id: 0, encoding_id: 2);
525 if (!subtable) subtable = cmap->find_subtable (platform_id: 0, encoding_id: 1);
526 if (!subtable) subtable = cmap->find_subtable (platform_id: 0, encoding_id: 0);
527 if (!subtable)
528 {
529 subtable = cmap->find_subtable (platform_id: 3, encoding_id: 0);
530 if (subtable) symbol = true;
531 }
532 /* Meh. */
533 if (!subtable) subtable = &OT::Null(OT::CmapSubtable);
534
535 /* UVS subtable. */
536 if (!subtable_uvs)
537 {
538 const OT::CmapSubtable *st = cmap->find_subtable (platform_id: 0, encoding_id: 5);
539 if (st && st->u.format == 14)
540 subtable_uvs = &st->u.format14;
541 }
542 /* Meh. */
543 if (!subtable_uvs) subtable_uvs = &OT::Null(OT::CmapSubtableFormat14);
544
545 this->uvs_table = subtable_uvs;
546
547 this->get_glyph_data = subtable;
548 if (unlikely (symbol))
549 this->get_glyph_func = get_glyph_from_symbol<OT::CmapSubtable>;
550 else
551 switch (subtable->u.format) {
552 /* Accelerate format 4 and format 12. */
553 default: this->get_glyph_func = get_glyph_from<OT::CmapSubtable>; break;
554 case 12: this->get_glyph_func = get_glyph_from<OT::CmapSubtableFormat12>; break;
555 case 4:
556 {
557 this->format4_accel.init (subtable: &subtable->u.format4);
558 this->get_glyph_data = &this->format4_accel;
559 this->get_glyph_func = this->format4_accel.get_glyph_func;
560 }
561 break;
562 }
563 }
564
565 inline void fini (void)
566 {
567 hb_blob_destroy (blob: this->blob);
568 }
569
570 inline bool get_nominal_glyph (hb_codepoint_t unicode,
571 hb_codepoint_t *glyph) const
572 {
573 return this->get_glyph_func (this->get_glyph_data, unicode, glyph);
574 }
575
576 inline bool get_variation_glyph (hb_codepoint_t unicode,
577 hb_codepoint_t variation_selector,
578 hb_codepoint_t *glyph) const
579 {
580 switch (this->uvs_table->get_glyph_variant (codepoint: unicode,
581 variation_selector,
582 glyph))
583 {
584 case OT::GLYPH_VARIANT_NOT_FOUND: return false;
585 case OT::GLYPH_VARIANT_FOUND: return true;
586 case OT::GLYPH_VARIANT_USE_DEFAULT: break;
587 }
588
589 return get_nominal_glyph (unicode, glyph);
590 }
591
592 protected:
593 typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
594 hb_codepoint_t codepoint,
595 hb_codepoint_t *glyph);
596
597 template <typename Type>
598 static inline bool get_glyph_from (const void *obj,
599 hb_codepoint_t codepoint,
600 hb_codepoint_t *glyph)
601 {
602 const Type *typed_obj = (const Type *) obj;
603 return typed_obj->get_glyph (codepoint, glyph);
604 }
605
606 template <typename Type>
607 static inline bool get_glyph_from_symbol (const void *obj,
608 hb_codepoint_t codepoint,
609 hb_codepoint_t *glyph)
610 {
611 const Type *typed_obj = (const Type *) obj;
612 if (likely (typed_obj->get_glyph (codepoint, glyph)))
613 return true;
614
615 if (codepoint <= 0x00FFu)
616 {
617 /* For symbol-encoded OpenType fonts, we duplicate the
618 * U+F000..F0FF range at U+0000..U+00FF. That's what
619 * Windows seems to do, and that's hinted about at:
620 * http://www.microsoft.com/typography/otspec/recom.htm
621 * under "Non-Standard (Symbol) Fonts". */
622 return typed_obj->get_glyph (0xF000u + codepoint, glyph);
623 }
624
625 return false;
626 }
627
628 private:
629 hb_cmap_get_glyph_func_t get_glyph_func;
630 const void *get_glyph_data;
631 OT::CmapSubtableFormat4::accelerator_t format4_accel;
632
633 const OT::CmapSubtableFormat14 *uvs_table;
634 hb_blob_t *blob;
635 };
636
637 protected:
638
639 inline const CmapSubtable *find_subtable (unsigned int platform_id,
640 unsigned int encoding_id) const
641 {
642 EncodingRecord key;
643 key.platformID.set (platform_id);
644 key.encodingID.set (encoding_id);
645
646 /* Note: We can use bsearch, but since it has no performance
647 * implications, we use lsearch and as such accept fonts with
648 * unsorted subtable list. */
649 int result = encodingRecord./*bsearch*/lsearch (x: key);
650 if (result == -1 || !encodingRecord[result].subtable)
651 return nullptr;
652
653 return &(this+encodingRecord[result].subtable);
654 }
655
656 protected:
657 UINT16 version; /* Table version number (0). */
658 SortedArrayOf<EncodingRecord>
659 encodingRecord; /* Encoding tables. */
660 public:
661 DEFINE_SIZE_ARRAY (4, encodingRecord);
662};
663
664
665} /* namespace OT */
666
667
668#endif /* HB_OT_CMAP_TABLE_HH */
669

source code of qtbase/src/3rdparty/harfbuzz-ng/src/hb-ot-cmap-table.hh