1 | /* |
2 | * Copyright © 2012,2017 Google, Inc. |
3 | * |
4 | * This is part of HarfBuzz, a text shaping library. |
5 | * |
6 | * Permission is hereby granted, without written agreement and without |
7 | * license or royalty fees, to use, copy, modify, and distribute this |
8 | * software and its documentation for any purpose, provided that the |
9 | * above copyright notice and the following two paragraphs appear in |
10 | * all copies of this software. |
11 | * |
12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | * DAMAGE. |
17 | * |
18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | * |
24 | * Google Author(s): Behdad Esfahbod |
25 | */ |
26 | |
27 | #ifndef HB_SET_PRIVATE_HH |
28 | #define HB_SET_PRIVATE_HH |
29 | |
30 | #include "hb-private.hh" |
31 | #include "hb-object-private.hh" |
32 | |
33 | |
34 | /* |
35 | * hb_set_t |
36 | */ |
37 | |
38 | /* TODO Keep a free-list so we can free pages that are completely zeroed. At that |
39 | * point maybe also use a sentinel value for "all-1" pages? */ |
40 | |
41 | struct hb_set_t |
42 | { |
43 | struct page_map_t |
44 | { |
45 | inline int cmp (const page_map_t *o) const { return (int) o->major - (int) major; } |
46 | |
47 | uint32_t major; |
48 | uint32_t index; |
49 | }; |
50 | |
51 | struct page_t |
52 | { |
53 | inline void init0 (void) { memset (s: &v, c: 0, n: sizeof (v)); } |
54 | inline void init1 (void) { memset (s: &v, c: 0xff, n: sizeof (v)); } |
55 | |
56 | inline unsigned int len (void) const |
57 | { return ARRAY_LENGTH_CONST (v); } |
58 | |
59 | inline bool is_empty (void) const |
60 | { |
61 | for (unsigned int i = 0; i < len (); i++) |
62 | if (v[i]) |
63 | return false; |
64 | return true; |
65 | } |
66 | |
67 | inline void add (hb_codepoint_t g) { elt (g) |= mask (g); } |
68 | inline void del (hb_codepoint_t g) { elt (g) &= ~mask (g); } |
69 | inline bool has (hb_codepoint_t g) const { return !!(elt (g) & mask (g)); } |
70 | |
71 | inline void add_range (hb_codepoint_t a, hb_codepoint_t b) |
72 | { |
73 | elt_t *la = &elt (g: a); |
74 | elt_t *lb = &elt (g: b); |
75 | if (la == lb) |
76 | *la |= (mask (g: b) << 1) - mask(g: a); |
77 | else |
78 | { |
79 | *la |= ~(mask (g: a) - 1); |
80 | la++; |
81 | |
82 | memset (s: la, c: 0xff, n: (char *) lb - (char *) la); |
83 | |
84 | *lb |= ((mask (g: b) << 1) - 1); |
85 | } |
86 | } |
87 | |
88 | inline bool is_equal (const page_t *other) const |
89 | { |
90 | return 0 == memcmp (s1: &v, s2: &other->v, n: sizeof (v)); |
91 | } |
92 | |
93 | inline unsigned int get_population (void) const |
94 | { |
95 | unsigned int pop = 0; |
96 | for (unsigned int i = 0; i < len (); i++) |
97 | pop += _hb_popcount (mask: v[i]); |
98 | return pop; |
99 | } |
100 | |
101 | inline bool next (hb_codepoint_t *codepoint) const |
102 | { |
103 | unsigned int m = (*codepoint + 1) & MASK; |
104 | if (!m) |
105 | { |
106 | *codepoint = INVALID; |
107 | return false; |
108 | } |
109 | unsigned int i = m / ELT_BITS; |
110 | unsigned int j = m & ELT_MASK; |
111 | |
112 | for (; j < ELT_BITS; j++) |
113 | if (v[i] & (elt_t (1) << j)) |
114 | goto found; |
115 | for (i++; i < len (); i++) |
116 | if (v[i]) |
117 | for (j = 0; j < ELT_BITS; j++) |
118 | if (v[i] & (elt_t (1) << j)) |
119 | goto found; |
120 | |
121 | *codepoint = INVALID; |
122 | return false; |
123 | |
124 | found: |
125 | *codepoint = i * ELT_BITS + j; |
126 | return true; |
127 | } |
128 | inline hb_codepoint_t get_min (void) const |
129 | { |
130 | for (unsigned int i = 0; i < len (); i++) |
131 | if (v[i]) |
132 | { |
133 | elt_t e = v[i]; |
134 | for (unsigned int j = 0; j < ELT_BITS; j++) |
135 | if (e & (elt_t (1) << j)) |
136 | return i * ELT_BITS + j; |
137 | } |
138 | return INVALID; |
139 | } |
140 | inline hb_codepoint_t get_max (void) const |
141 | { |
142 | for (int i = len () - 1; i >= 0; i--) |
143 | if (v[i]) |
144 | { |
145 | elt_t e = v[i]; |
146 | for (int j = ELT_BITS - 1; j >= 0; j--) |
147 | if (e & (elt_t (1) << j)) |
148 | return i * ELT_BITS + j; |
149 | } |
150 | return 0; |
151 | } |
152 | |
153 | static const unsigned int PAGE_BITS = 8192; /* Use to tune. */ |
154 | static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, "" ); |
155 | |
156 | typedef uint64_t elt_t; |
157 | |
158 | #if 0 && HAVE_VECTOR_SIZE |
159 | /* The vectorized version does not work with clang as non-const |
160 | * elt() errs "non-const reference cannot bind to vector element". */ |
161 | typedef elt_t vector_t __attribute__((vector_size (PAGE_BITS / 8))); |
162 | #else |
163 | typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t; |
164 | #endif |
165 | |
166 | vector_t v; |
167 | |
168 | static const unsigned int ELT_BITS = sizeof (elt_t) * 8; |
169 | static const unsigned int ELT_MASK = ELT_BITS - 1; |
170 | static const unsigned int BITS = sizeof (vector_t) * 8; |
171 | static const unsigned int MASK = BITS - 1; |
172 | static_assert (PAGE_BITS == BITS, "" ); |
173 | |
174 | elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; } |
175 | elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; } |
176 | elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); } |
177 | }; |
178 | static_assert (page_t::PAGE_BITS == sizeof (page_t) * 8, "" ); |
179 | |
180 | hb_object_header_t ; |
181 | ASSERT_POD (); |
182 | bool in_error; |
183 | hb_prealloced_array_t<page_map_t, 8> page_map; |
184 | hb_prealloced_array_t<page_t, 1> pages; |
185 | |
186 | inline void init (void) |
187 | { |
188 | page_map.init (); |
189 | pages.init (); |
190 | } |
191 | inline void finish (void) |
192 | { |
193 | page_map.finish (); |
194 | pages.finish (); |
195 | } |
196 | |
197 | inline bool resize (unsigned int count) |
198 | { |
199 | if (unlikely (in_error)) return false; |
200 | if (!pages.resize (size: count) || !page_map.resize (size: count)) |
201 | { |
202 | pages.resize (size: page_map.len); |
203 | in_error = true; |
204 | return false; |
205 | } |
206 | return true; |
207 | } |
208 | |
209 | inline void clear (void) { |
210 | if (unlikely (hb_object_is_inert (this))) |
211 | return; |
212 | in_error = false; |
213 | page_map.resize (size: 0); |
214 | pages.resize (size: 0); |
215 | } |
216 | inline bool is_empty (void) const { |
217 | unsigned int count = pages.len; |
218 | for (unsigned int i = 0; i < count; i++) |
219 | if (!pages[i].is_empty ()) |
220 | return false; |
221 | return true; |
222 | } |
223 | |
224 | inline void add (hb_codepoint_t g) |
225 | { |
226 | if (unlikely (in_error)) return; |
227 | if (unlikely (g == INVALID)) return; |
228 | page_t *page = page_for_insert (g); if (unlikely (!page)) return; |
229 | page->add (g); |
230 | } |
231 | inline bool add_range (hb_codepoint_t a, hb_codepoint_t b) |
232 | { |
233 | if (unlikely (in_error)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ |
234 | if (unlikely (a > b || a == INVALID || b == INVALID)) return false; |
235 | unsigned int ma = get_major (g: a); |
236 | unsigned int mb = get_major (g: b); |
237 | if (ma == mb) |
238 | { |
239 | page_t *page = page_for_insert (g: a); if (unlikely (!page)) return false; |
240 | page->add_range (a, b); |
241 | } |
242 | else |
243 | { |
244 | page_t *page = page_for_insert (g: a); if (unlikely (!page)) return false; |
245 | page->add_range (a, b: major_start (major: ma + 1) - 1); |
246 | |
247 | for (unsigned int m = ma + 1; m < mb; m++) |
248 | { |
249 | page = page_for_insert (g: major_start (major: m)); if (unlikely (!page)) return false; |
250 | page->init1 (); |
251 | } |
252 | |
253 | page = page_for_insert (g: b); if (unlikely (!page)) return false; |
254 | page->add_range (a: major_start (major: mb), b); |
255 | } |
256 | return true; |
257 | } |
258 | |
259 | template <typename T> |
260 | inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
261 | { |
262 | if (unlikely (in_error)) return; |
263 | if (!count) return; |
264 | hb_codepoint_t g = *array; |
265 | while (count) |
266 | { |
267 | unsigned int m = get_major (g); |
268 | page_t *page = page_for_insert (g); if (unlikely (!page)) return; |
269 | unsigned int start = major_start (major: m); |
270 | unsigned int end = major_start (major: m + 1); |
271 | do |
272 | { |
273 | page->add (g); |
274 | |
275 | array++; |
276 | count--; |
277 | } |
278 | while (count && (g = *array, start <= g && g < end)); |
279 | } |
280 | } |
281 | |
282 | /* Might return false if array looks unsorted. |
283 | * Used for faster rejection of corrupt data. */ |
284 | template <typename T> |
285 | inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
286 | { |
287 | if (unlikely (in_error)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ |
288 | if (!count) return true; |
289 | hb_codepoint_t g = *array; |
290 | hb_codepoint_t last_g = g; |
291 | while (count) |
292 | { |
293 | unsigned int m = get_major (g); |
294 | page_t *page = page_for_insert (g); if (unlikely (!page)) return false; |
295 | unsigned int end = major_start (major: m + 1); |
296 | do |
297 | { |
298 | /* If we try harder we can change the following comparison to <=; |
299 | * Not sure if it's worth it. */ |
300 | if (g < last_g) return false; |
301 | last_g = g; |
302 | page->add (g); |
303 | |
304 | array++; |
305 | count--; |
306 | } |
307 | while (count && (g = *array, g < end)); |
308 | } |
309 | return true; |
310 | } |
311 | |
312 | inline void del (hb_codepoint_t g) |
313 | { |
314 | if (unlikely (in_error)) return; |
315 | page_t *p = page_for (g); |
316 | if (!p) |
317 | return; |
318 | p->del (g); |
319 | } |
320 | inline void del_range (hb_codepoint_t a, hb_codepoint_t b) |
321 | { |
322 | /* TODO Optimize, like add_range(). */ |
323 | if (unlikely (in_error)) return; |
324 | for (unsigned int i = a; i < b + 1; i++) |
325 | del (g: i); |
326 | } |
327 | inline bool has (hb_codepoint_t g) const |
328 | { |
329 | const page_t *p = page_for (g); |
330 | if (!p) |
331 | return false; |
332 | return p->has (g); |
333 | } |
334 | inline bool intersects (hb_codepoint_t first, |
335 | hb_codepoint_t last) const |
336 | { |
337 | hb_codepoint_t c = first - 1; |
338 | return next (codepoint: &c) && c <= last; |
339 | } |
340 | inline void set (const hb_set_t *other) |
341 | { |
342 | if (unlikely (in_error)) return; |
343 | unsigned int count = other->pages.len; |
344 | if (!resize (count)) |
345 | return; |
346 | |
347 | memcpy (dest: pages.array, src: other->pages.array, n: count * sizeof (pages.array[0])); |
348 | memcpy (dest: page_map.array, src: other->page_map.array, n: count * sizeof (page_map.array[0])); |
349 | } |
350 | |
351 | inline bool is_equal (const hb_set_t *other) const |
352 | { |
353 | unsigned int na = pages.len; |
354 | unsigned int nb = other->pages.len; |
355 | |
356 | unsigned int a = 0, b = 0; |
357 | for (; a < na && b < nb; ) |
358 | { |
359 | if (page_at (i: a).is_empty ()) { a++; continue; } |
360 | if (other->page_at (i: b).is_empty ()) { b++; continue; } |
361 | if (page_map[a].major != other->page_map[b].major || |
362 | !page_at (i: a).is_equal (other: &other->page_at (i: b))) |
363 | return false; |
364 | a++; |
365 | b++; |
366 | } |
367 | for (; a < na; a++) |
368 | if (!page_at (i: a).is_empty ()) { return false; } |
369 | for (; b < nb; b++) |
370 | if (!other->page_at (i: b).is_empty ()) { return false; } |
371 | |
372 | return true; |
373 | } |
374 | |
375 | template <class Op> |
376 | inline void process (const hb_set_t *other) |
377 | { |
378 | if (unlikely (in_error)) return; |
379 | |
380 | unsigned int na = pages.len; |
381 | unsigned int nb = other->pages.len; |
382 | |
383 | unsigned int count = 0; |
384 | unsigned int a = 0, b = 0; |
385 | for (; a < na && b < nb; ) |
386 | { |
387 | if (page_map[a].major == other->page_map[b].major) |
388 | { |
389 | count++; |
390 | a++; |
391 | b++; |
392 | } |
393 | else if (page_map[a].major < other->page_map[b].major) |
394 | { |
395 | if (Op::passthru_left) |
396 | count++; |
397 | a++; |
398 | } |
399 | else |
400 | { |
401 | if (Op::passthru_right) |
402 | count++; |
403 | b++; |
404 | } |
405 | } |
406 | if (Op::passthru_left) |
407 | count += na - a; |
408 | if (Op::passthru_right) |
409 | count += nb - b; |
410 | |
411 | if (!resize (count)) |
412 | return; |
413 | |
414 | /* Process in-place backward. */ |
415 | a = na; |
416 | b = nb; |
417 | for (; a && b; ) |
418 | { |
419 | if (page_map[a - 1].major == other->page_map[b - 1].major) |
420 | { |
421 | a--; |
422 | b--; |
423 | Op::process (page_at (i: --count).v, page_at (i: a).v, other->page_at (i: b).v); |
424 | } |
425 | else if (page_map[a - 1].major > other->page_map[b - 1].major) |
426 | { |
427 | a--; |
428 | if (Op::passthru_left) |
429 | page_at (i: --count).v = page_at (i: a).v; |
430 | } |
431 | else |
432 | { |
433 | b--; |
434 | if (Op::passthru_right) |
435 | page_at (i: --count).v = other->page_at (i: b).v; |
436 | } |
437 | } |
438 | if (Op::passthru_left) |
439 | while (a) |
440 | page_at (i: --count).v = page_at (i: --a).v; |
441 | if (Op::passthru_right) |
442 | while (b) |
443 | page_at (i: --count).v = other->page_at (i: --b).v; |
444 | assert (!count); |
445 | } |
446 | |
447 | inline void union_ (const hb_set_t *other) |
448 | { |
449 | process<HbOpOr> (other); |
450 | } |
451 | inline void intersect (const hb_set_t *other) |
452 | { |
453 | process<HbOpAnd> (other); |
454 | } |
455 | inline void subtract (const hb_set_t *other) |
456 | { |
457 | process<HbOpMinus> (other); |
458 | } |
459 | inline void symmetric_difference (const hb_set_t *other) |
460 | { |
461 | process<HbOpXor> (other); |
462 | } |
463 | inline bool next (hb_codepoint_t *codepoint) const |
464 | { |
465 | if (unlikely (*codepoint == INVALID)) { |
466 | *codepoint = get_min (); |
467 | return *codepoint != INVALID; |
468 | } |
469 | |
470 | page_map_t map = {.major: get_major (g: *codepoint), .index: 0}; |
471 | unsigned int i; |
472 | page_map.bfind (x: &map, i: &i); |
473 | if (i < page_map.len) |
474 | { |
475 | if (pages[page_map[i].index].next (codepoint)) |
476 | { |
477 | *codepoint += page_map[i].major * page_t::PAGE_BITS; |
478 | return true; |
479 | } |
480 | i++; |
481 | } |
482 | for (; i < page_map.len; i++) |
483 | { |
484 | hb_codepoint_t m = pages[page_map[i].index].get_min (); |
485 | if (m != INVALID) |
486 | { |
487 | *codepoint = page_map[i].major * page_t::PAGE_BITS + m; |
488 | return true; |
489 | } |
490 | } |
491 | *codepoint = INVALID; |
492 | return false; |
493 | } |
494 | inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const |
495 | { |
496 | hb_codepoint_t i; |
497 | |
498 | i = *last; |
499 | if (!next (codepoint: &i)) |
500 | { |
501 | *last = *first = INVALID; |
502 | return false; |
503 | } |
504 | |
505 | *last = *first = i; |
506 | while (next (codepoint: &i) && i == *last + 1) |
507 | (*last)++; |
508 | |
509 | return true; |
510 | } |
511 | |
512 | inline unsigned int get_population (void) const |
513 | { |
514 | unsigned int pop = 0; |
515 | unsigned int count = pages.len; |
516 | for (unsigned int i = 0; i < count; i++) |
517 | pop += pages[i].get_population (); |
518 | return pop; |
519 | } |
520 | inline hb_codepoint_t get_min (void) const |
521 | { |
522 | unsigned int count = pages.len; |
523 | for (unsigned int i = 0; i < count; i++) |
524 | if (!page_at (i).is_empty ()) |
525 | return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_min (); |
526 | return INVALID; |
527 | } |
528 | inline hb_codepoint_t get_max (void) const |
529 | { |
530 | unsigned int count = pages.len; |
531 | for (int i = count - 1; i >= 0; i++) |
532 | if (!page_at (i).is_empty ()) |
533 | return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_max (); |
534 | return INVALID; |
535 | } |
536 | |
537 | static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID; |
538 | |
539 | inline page_t *page_for_insert (hb_codepoint_t g) |
540 | { |
541 | page_map_t map = {.major: get_major (g), .index: pages.len}; |
542 | unsigned int i; |
543 | if (!page_map.bfind (x: &map, i: &i)) |
544 | { |
545 | if (!resize (count: pages.len + 1)) |
546 | return nullptr; |
547 | |
548 | pages[map.index].init0 (); |
549 | memmove (dest: &page_map[i + 1], src: &page_map[i], n: (page_map.len - 1 - i) * sizeof (page_map[0])); |
550 | page_map[i] = map; |
551 | } |
552 | return &pages[page_map[i].index]; |
553 | } |
554 | inline page_t *page_for (hb_codepoint_t g) |
555 | { |
556 | page_map_t key = {.major: get_major (g)}; |
557 | const page_map_t *found = page_map.bsearch (x: &key); |
558 | if (found) |
559 | return &pages[found->index]; |
560 | return nullptr; |
561 | } |
562 | inline const page_t *page_for (hb_codepoint_t g) const |
563 | { |
564 | page_map_t key = {.major: get_major (g)}; |
565 | const page_map_t *found = page_map.bsearch (x: &key); |
566 | if (found) |
567 | return &pages[found->index]; |
568 | return nullptr; |
569 | } |
570 | inline page_t &page_at (unsigned int i) { return pages[page_map[i].index]; } |
571 | inline const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; } |
572 | inline unsigned int get_major (hb_codepoint_t g) const { return g / page_t::PAGE_BITS; } |
573 | inline hb_codepoint_t major_start (unsigned int major) const { return major * page_t::PAGE_BITS; } |
574 | }; |
575 | |
576 | |
577 | #endif /* HB_SET_PRIVATE_HH */ |
578 | |