1 | /* A class for building vector constant patterns. |
2 | Copyright (C) 2017-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free |
8 | Software Foundation; either version 3, or (at your option) any later |
9 | version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #ifndef GCC_VECTOR_BUILDER_H |
21 | #define GCC_VECTOR_BUILDER_H |
22 | |
23 | /* This class is a wrapper around auto_vec<T> for building vectors of T. |
24 | It aims to encode each vector as npatterns interleaved patterns, |
25 | where each pattern represents a sequence: |
26 | |
27 | { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... } |
28 | |
29 | The first three elements in each pattern provide enough information |
30 | to derive the other elements. If all patterns have a STEP of zero, |
31 | we only need to encode the first two elements in each pattern. |
32 | If BASE1 is also equal to BASE0 for all patterns, we only need to |
33 | encode the first element in each pattern. The number of encoded |
34 | elements per pattern is given by nelts_per_pattern. |
35 | |
36 | The class can be used in two ways: |
37 | |
38 | 1. It can be used to build a full image of the vector, which is then |
39 | canonicalized by finalize (). In this case npatterns is initially |
40 | the number of elements in the vector and nelts_per_pattern is |
41 | initially 1. |
42 | |
43 | 2. It can be used to build a vector that already has a known encoding. |
44 | This is preferred since it is more efficient and copes with |
45 | variable-length vectors. finalize () then canonicalizes the encoding |
46 | to a simpler form if possible. |
47 | |
48 | Shape is the type that specifies the number of elements in the vector |
49 | and (where relevant) the type of each element. |
50 | |
51 | The derived class Derived provides the functionality of this class |
52 | for specific Ts. Derived needs to provide the following interface: |
53 | |
54 | bool equal_p (T elt1, T elt2) const; |
55 | |
56 | Return true if elements ELT1 and ELT2 are equal. |
57 | |
58 | bool allow_steps_p () const; |
59 | |
60 | Return true if a stepped representation is OK. We don't allow |
61 | linear series for anything other than integers, to avoid problems |
62 | with rounding. |
63 | |
64 | bool integral_p (T elt) const; |
65 | |
66 | Return true if element ELT can be interpreted as an integer. |
67 | |
68 | StepType step (T elt1, T elt2) const; |
69 | |
70 | Return the value of element ELT2 minus the value of element ELT1, |
71 | given integral_p (ELT1) && integral_p (ELT2). There is no fixed |
72 | choice of StepType. |
73 | |
74 | T apply_step (T base, unsigned int factor, StepType step) const; |
75 | |
76 | Return a vector element with the value BASE + FACTOR * STEP. |
77 | |
78 | bool can_elide_p (T elt) const; |
79 | |
80 | Return true if we can drop element ELT, even if the retained |
81 | elements are different. This is provided for TREE_OVERFLOW |
82 | handling. |
83 | |
84 | void note_representative (T *elt1_ptr, T elt2); |
85 | |
86 | Record that ELT2 is being elided, given that ELT1_PTR points to |
87 | the last encoded element for the containing pattern. This is |
88 | again provided for TREE_OVERFLOW handling. |
89 | |
90 | static poly_uint64 shape_nelts (Shape shape); |
91 | |
92 | Return the number of elements in SHAPE. |
93 | |
94 | The class provides additional functionality for the case in which |
95 | T can describe a vector constant as well as an individual element. |
96 | This functionality requires: |
97 | |
98 | static poly_uint64 nelts_of (T x); |
99 | |
100 | Return the number of elements in vector constant X. |
101 | |
102 | static unsigned int npatterns_of (T x); |
103 | |
104 | Return the number of patterns used to encode vector constant X. |
105 | |
106 | static unsigned int nelts_per_pattern_of (T x); |
107 | |
108 | Return the number of elements used to encode each pattern |
109 | in vector constant X. */ |
110 | |
111 | template<typename T, typename Shape, typename Derived> |
112 | class vector_builder : public auto_vec<T, 32> |
113 | { |
114 | public: |
115 | vector_builder (); |
116 | |
117 | poly_uint64 full_nelts () const { return m_full_nelts; } |
118 | unsigned int npatterns () const { return m_npatterns; } |
119 | unsigned int nelts_per_pattern () const { return m_nelts_per_pattern; } |
120 | unsigned int encoded_nelts () const; |
121 | bool encoded_full_vector_p () const; |
122 | T elt (unsigned int) const; |
123 | unsigned int count_dups (int, int, int) const; |
124 | |
125 | bool operator == (const Derived &) const; |
126 | bool operator != (const Derived &x) const { return !operator == (x); } |
127 | |
128 | bool new_unary_operation (Shape, T, bool); |
129 | bool new_binary_operation (Shape, T, T, bool); |
130 | |
131 | void finalize (); |
132 | |
133 | static unsigned int binary_encoded_nelts (T, T); |
134 | |
135 | protected: |
136 | void new_vector (poly_uint64, unsigned int, unsigned int); |
137 | void reshape (unsigned int, unsigned int); |
138 | bool repeating_sequence_p (unsigned int, unsigned int, unsigned int); |
139 | bool stepped_sequence_p (unsigned int, unsigned int, unsigned int); |
140 | bool try_npatterns (unsigned int); |
141 | |
142 | private: |
143 | vector_builder (const vector_builder &); |
144 | vector_builder &operator= (const vector_builder &); |
145 | Derived *derived () { return static_cast<Derived *> (this); } |
146 | const Derived *derived () const; |
147 | |
148 | poly_uint64 m_full_nelts; |
149 | unsigned int m_npatterns; |
150 | unsigned int m_nelts_per_pattern; |
151 | }; |
152 | |
153 | template<typename T, typename Shape, typename Derived> |
154 | inline const Derived * |
155 | vector_builder<T, Shape, Derived>::derived () const |
156 | { |
157 | return static_cast<const Derived *> (this); |
158 | } |
159 | |
160 | template<typename T, typename Shape, typename Derived> |
161 | inline |
162 | vector_builder<T, Shape, Derived>::vector_builder () |
163 | : m_full_nelts (0), |
164 | m_npatterns (0), |
165 | m_nelts_per_pattern (0) |
166 | {} |
167 | |
168 | /* Return the number of elements that are explicitly encoded. The vec |
169 | starts with these explicitly-encoded elements and may contain additional |
170 | elided elements. */ |
171 | |
172 | template<typename T, typename Shape, typename Derived> |
173 | inline unsigned int |
174 | vector_builder<T, Shape, Derived>::encoded_nelts () const |
175 | { |
176 | return m_npatterns * m_nelts_per_pattern; |
177 | } |
178 | |
179 | /* Return true if every element of the vector is explicitly encoded. */ |
180 | |
181 | template<typename T, typename Shape, typename Derived> |
182 | inline bool |
183 | vector_builder<T, Shape, Derived>::encoded_full_vector_p () const |
184 | { |
185 | return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts); |
186 | } |
187 | |
188 | /* Start building a vector that has FULL_NELTS elements. Initially |
189 | encode it using NPATTERNS patterns with NELTS_PER_PATTERN each. */ |
190 | |
191 | template<typename T, typename Shape, typename Derived> |
192 | void |
193 | vector_builder<T, Shape, Derived>::new_vector (poly_uint64 full_nelts, |
194 | unsigned int npatterns, |
195 | unsigned int nelts_per_pattern) |
196 | { |
197 | m_full_nelts = full_nelts; |
198 | m_npatterns = npatterns; |
199 | m_nelts_per_pattern = nelts_per_pattern; |
200 | this->reserve (encoded_nelts ()); |
201 | this->truncate (0); |
202 | } |
203 | |
204 | /* Return true if this vector and OTHER have the same elements and |
205 | are encoded in the same way. */ |
206 | |
207 | template<typename T, typename Shape, typename Derived> |
208 | bool |
209 | vector_builder<T, Shape, Derived>::operator == (const Derived &other) const |
210 | { |
211 | if (maybe_ne (m_full_nelts, other.m_full_nelts) |
212 | || m_npatterns != other.m_npatterns |
213 | || m_nelts_per_pattern != other.m_nelts_per_pattern) |
214 | return false; |
215 | |
216 | unsigned int nelts = encoded_nelts (); |
217 | for (unsigned int i = 0; i < nelts; ++i) |
218 | if (!derived ()->equal_p ((*this)[i], other[i])) |
219 | return false; |
220 | |
221 | return true; |
222 | } |
223 | |
224 | /* Return the value of vector element I, which might or might not be |
225 | encoded explicitly. */ |
226 | |
227 | template<typename T, typename Shape, typename Derived> |
228 | T |
229 | vector_builder<T, Shape, Derived>::elt (unsigned int i) const |
230 | { |
231 | /* First handle elements that are already present in the underlying |
232 | vector, regardless of whether they're part of the encoding or not. */ |
233 | if (i < this->length ()) |
234 | return (*this)[i]; |
235 | |
236 | /* Extrapolation is only possible if the encoding has been fully |
237 | populated. */ |
238 | gcc_checking_assert (encoded_nelts () <= this->length ()); |
239 | |
240 | /* Identify the pattern that contains element I and work out the index of |
241 | the last encoded element for that pattern. */ |
242 | unsigned int pattern = i % m_npatterns; |
243 | unsigned int count = i / m_npatterns; |
244 | unsigned int final_i = encoded_nelts () - m_npatterns + pattern; |
245 | T final = (*this)[final_i]; |
246 | |
247 | /* If there are no steps, the final encoded value is the right one. */ |
248 | if (m_nelts_per_pattern <= 2) |
249 | return final; |
250 | |
251 | /* Otherwise work out the value from the last two encoded elements. */ |
252 | T prev = (*this)[final_i - m_npatterns]; |
253 | return derived ()->apply_step (final, count - 2, |
254 | derived ()->step (prev, final)); |
255 | } |
256 | |
257 | /* Try to start building a new vector of shape SHAPE that holds the result of |
258 | a unary operation on vector constant VEC. ALLOW_STEPPED_P is true if the |
259 | operation can handle stepped encodings directly, without having to expand |
260 | the full sequence. |
261 | |
262 | Return true if the operation is possible, which it always is when |
263 | ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */ |
264 | |
265 | template<typename T, typename Shape, typename Derived> |
266 | bool |
267 | vector_builder<T, Shape, Derived>::new_unary_operation (Shape shape, T vec, |
268 | bool allow_stepped_p) |
269 | { |
270 | poly_uint64 full_nelts = Derived::shape_nelts (shape); |
271 | gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec))); |
272 | unsigned int npatterns = Derived::npatterns_of (vec); |
273 | unsigned int nelts_per_pattern = Derived::nelts_per_pattern_of (vec); |
274 | if (!allow_stepped_p && nelts_per_pattern > 2) |
275 | { |
276 | if (!full_nelts.is_constant ()) |
277 | return false; |
278 | npatterns = full_nelts.to_constant (); |
279 | nelts_per_pattern = 1; |
280 | } |
281 | derived ()->new_vector (shape, npatterns, nelts_per_pattern); |
282 | return true; |
283 | } |
284 | |
285 | /* Try to start building a new vector of shape SHAPE that holds the result of |
286 | a binary operation on vector constants VEC1 and VEC2. ALLOW_STEPPED_P is |
287 | true if the operation can handle stepped encodings directly, without |
288 | having to expand the full sequence. |
289 | |
290 | Return true if the operation is possible. Leave the builder unchanged |
291 | otherwise. */ |
292 | |
293 | template<typename T, typename Shape, typename Derived> |
294 | bool |
295 | vector_builder<T, Shape, Derived>::new_binary_operation (Shape shape, |
296 | T vec1, T vec2, |
297 | bool allow_stepped_p) |
298 | { |
299 | poly_uint64 full_nelts = Derived::shape_nelts (shape); |
300 | gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec1)) |
301 | && known_eq (full_nelts, Derived::nelts_of (vec2))); |
302 | /* Conceptually we split the patterns in VEC1 and VEC2 until we have |
303 | an equal number for both. Each split pattern requires the same |
304 | number of elements per pattern as the original. E.g. splitting: |
305 | |
306 | { 1, 2, 3, ... } |
307 | |
308 | into two gives: |
309 | |
310 | { 1, 3, 5, ... } |
311 | { 2, 4, 6, ... } |
312 | |
313 | while splitting: |
314 | |
315 | { 1, 0, ... } |
316 | |
317 | into two gives: |
318 | |
319 | { 1, 0, ... } |
320 | { 0, 0, ... }. */ |
321 | unsigned int npatterns |
322 | = least_common_multiple (Derived::npatterns_of (vec1), |
323 | Derived::npatterns_of (vec2)); |
324 | unsigned int nelts_per_pattern |
325 | = MAX (Derived::nelts_per_pattern_of (vec1), |
326 | Derived::nelts_per_pattern_of (vec2)); |
327 | if (!allow_stepped_p && nelts_per_pattern > 2) |
328 | { |
329 | if (!full_nelts.is_constant ()) |
330 | return false; |
331 | npatterns = full_nelts.to_constant (); |
332 | nelts_per_pattern = 1; |
333 | } |
334 | derived ()->new_vector (shape, npatterns, nelts_per_pattern); |
335 | return true; |
336 | } |
337 | |
338 | /* Return the number of elements that the caller needs to operate on in |
339 | order to handle a binary operation on vector constants VEC1 and VEC2. |
340 | This static function is used instead of new_binary_operation if the |
341 | result of the operation is not a constant vector. */ |
342 | |
343 | template<typename T, typename Shape, typename Derived> |
344 | unsigned int |
345 | vector_builder<T, Shape, Derived>::binary_encoded_nelts (T vec1, T vec2) |
346 | { |
347 | poly_uint64 nelts = Derived::nelts_of (vec1); |
348 | gcc_assert (known_eq (nelts, Derived::nelts_of (vec2))); |
349 | /* See new_binary_operation for details. */ |
350 | unsigned int npatterns |
351 | = least_common_multiple (Derived::npatterns_of (vec1), |
352 | Derived::npatterns_of (vec2)); |
353 | unsigned int nelts_per_pattern |
354 | = MAX (Derived::nelts_per_pattern_of (vec1), |
355 | Derived::nelts_per_pattern_of (vec2)); |
356 | unsigned HOST_WIDE_INT const_nelts; |
357 | if (nelts.is_constant (const_value: &const_nelts)) |
358 | return MIN (npatterns * nelts_per_pattern, const_nelts); |
359 | return npatterns * nelts_per_pattern; |
360 | } |
361 | |
362 | /* Return the number of leading duplicate elements in the range |
363 | [START:END:STEP]. The value is always at least 1. */ |
364 | |
365 | template<typename T, typename Shape, typename Derived> |
366 | unsigned int |
367 | vector_builder<T, Shape, Derived>::count_dups (int start, int end, |
368 | int step) const |
369 | { |
370 | gcc_assert ((end - start) % step == 0); |
371 | |
372 | unsigned int ndups = 1; |
373 | for (int i = start + step; |
374 | i != end && derived ()->equal_p (elt (i), elt (i: start)); |
375 | i += step) |
376 | ndups++; |
377 | return ndups; |
378 | } |
379 | |
380 | /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each, |
381 | but without changing the underlying vector. */ |
382 | |
383 | template<typename T, typename Shape, typename Derived> |
384 | void |
385 | vector_builder<T, Shape, Derived>::reshape (unsigned int npatterns, |
386 | unsigned int nelts_per_pattern) |
387 | { |
388 | unsigned int old_encoded_nelts = encoded_nelts (); |
389 | unsigned int new_encoded_nelts = npatterns * nelts_per_pattern; |
390 | gcc_checking_assert (new_encoded_nelts <= old_encoded_nelts); |
391 | unsigned int next = new_encoded_nelts - npatterns; |
392 | for (unsigned int i = new_encoded_nelts; i < old_encoded_nelts; ++i) |
393 | { |
394 | derived ()->note_representative (&(*this)[next], (*this)[i]); |
395 | next += 1; |
396 | if (next == new_encoded_nelts) |
397 | next -= npatterns; |
398 | } |
399 | m_npatterns = npatterns; |
400 | m_nelts_per_pattern = nelts_per_pattern; |
401 | } |
402 | |
403 | /* Return true if elements [START, END) contain a repeating sequence of |
404 | STEP elements. */ |
405 | |
406 | template<typename T, typename Shape, typename Derived> |
407 | bool |
408 | vector_builder<T, Shape, Derived>::repeating_sequence_p (unsigned int start, |
409 | unsigned int end, |
410 | unsigned int step) |
411 | { |
412 | for (unsigned int i = start; i < end - step; ++i) |
413 | if (!derived ()->equal_p ((*this)[i], (*this)[i + step])) |
414 | return false; |
415 | return true; |
416 | } |
417 | |
418 | /* Return true if elements [START, END) contain STEP interleaved linear |
419 | series. */ |
420 | |
421 | template<typename T, typename Shape, typename Derived> |
422 | bool |
423 | vector_builder<T, Shape, Derived>::stepped_sequence_p (unsigned int start, |
424 | unsigned int end, |
425 | unsigned int step) |
426 | { |
427 | if (!derived ()->allow_steps_p ()) |
428 | return false; |
429 | |
430 | for (unsigned int i = start + step * 2; i < end; ++i) |
431 | { |
432 | T elt1 = (*this)[i - step * 2]; |
433 | T elt2 = (*this)[i - step]; |
434 | T elt3 = (*this)[i]; |
435 | |
436 | if (!derived ()->integral_p (elt1) |
437 | || !derived ()->integral_p (elt2) |
438 | || !derived ()->integral_p (elt3)) |
439 | return false; |
440 | |
441 | if (maybe_ne (derived ()->step (elt1, elt2), |
442 | derived ()->step (elt2, elt3))) |
443 | return false; |
444 | |
445 | if (!derived ()->can_elide_p (elt3)) |
446 | return false; |
447 | } |
448 | return true; |
449 | } |
450 | |
451 | /* Try to change the number of encoded patterns to NPATTERNS, returning |
452 | true on success. */ |
453 | |
454 | template<typename T, typename Shape, typename Derived> |
455 | bool |
456 | vector_builder<T, Shape, Derived>::try_npatterns (unsigned int npatterns) |
457 | { |
458 | if (m_nelts_per_pattern == 1) |
459 | { |
460 | /* See whether NPATTERNS is valid with the current 1-element-per-pattern |
461 | encoding. */ |
462 | if (repeating_sequence_p (start: 0, end: encoded_nelts (), step: npatterns)) |
463 | { |
464 | reshape (npatterns, nelts_per_pattern: 1); |
465 | return true; |
466 | } |
467 | |
468 | /* We can only increase the number of elements per pattern if all |
469 | elements are still encoded explicitly. */ |
470 | if (!encoded_full_vector_p ()) |
471 | return false; |
472 | } |
473 | |
474 | if (m_nelts_per_pattern <= 2) |
475 | { |
476 | /* See whether NPATTERNS is valid with a 2-element-per-pattern |
477 | encoding. */ |
478 | if (repeating_sequence_p (start: npatterns, end: encoded_nelts (), step: npatterns)) |
479 | { |
480 | reshape (npatterns, nelts_per_pattern: 2); |
481 | return true; |
482 | } |
483 | |
484 | /* We can only increase the number of elements per pattern if all |
485 | elements are still encoded explicitly. */ |
486 | if (!encoded_full_vector_p ()) |
487 | return false; |
488 | } |
489 | |
490 | if (m_nelts_per_pattern <= 3) |
491 | { |
492 | /* See whether we have NPATTERNS interleaved linear series, |
493 | giving a 3-element-per-pattern encoding. */ |
494 | if (stepped_sequence_p (start: npatterns, end: encoded_nelts (), step: npatterns)) |
495 | { |
496 | reshape (npatterns, nelts_per_pattern: 3); |
497 | return true; |
498 | } |
499 | return false; |
500 | } |
501 | |
502 | gcc_unreachable (); |
503 | } |
504 | |
505 | /* Replace the current encoding with the canonical form. */ |
506 | |
507 | template<typename T, typename Shape, typename Derived> |
508 | void |
509 | vector_builder<T, Shape, Derived>::finalize () |
510 | { |
511 | /* The encoding requires the same number of elements to come from each |
512 | pattern. */ |
513 | gcc_assert (multiple_p (m_full_nelts, m_npatterns)); |
514 | |
515 | /* Allow the caller to build more elements than necessary. For example, |
516 | it's often convenient to build a stepped vector from the natural |
517 | encoding of three elements even if the vector itself only has two. */ |
518 | unsigned HOST_WIDE_INT const_full_nelts; |
519 | if (m_full_nelts.is_constant (const_value: &const_full_nelts) |
520 | && const_full_nelts <= encoded_nelts ()) |
521 | { |
522 | m_npatterns = const_full_nelts; |
523 | m_nelts_per_pattern = 1; |
524 | } |
525 | |
526 | /* Try to whittle down the number of elements per pattern. That is: |
527 | |
528 | 1. If we have stepped patterns whose steps are all 0, reduce the |
529 | number of elements per pattern from 3 to 2. |
530 | |
531 | 2. If we have background fill values that are the same as the |
532 | foreground values, reduce the number of elements per pattern |
533 | from 2 to 1. */ |
534 | while (m_nelts_per_pattern > 1 |
535 | && repeating_sequence_p (start: encoded_nelts () - m_npatterns * 2, |
536 | end: encoded_nelts (), step: m_npatterns)) |
537 | /* The last two sequences of M_NPATTERNS elements are equal, |
538 | so remove the last one. */ |
539 | reshape (npatterns: m_npatterns, nelts_per_pattern: m_nelts_per_pattern - 1); |
540 | |
541 | if (pow2p_hwi (x: m_npatterns)) |
542 | { |
543 | /* Try to halve the number of patterns while doing so gives a |
544 | valid pattern. This approach is linear in the number of |
545 | elements, whereas searcing from 1 up would be O(n*log(n)). |
546 | |
547 | Each halving step tries to keep the number of elements per pattern |
548 | the same. If that isn't possible, and if all elements are still |
549 | explicitly encoded, the halving step can instead increase the number |
550 | of elements per pattern. |
551 | |
552 | E.g. for: |
553 | |
554 | { 0, 2, 3, 4, 5, 6, 7, 8 } npatterns == 8 full_nelts == 8 |
555 | |
556 | we first realize that the second half of the sequence is not |
557 | equal to the first, so we cannot maintain 1 element per pattern |
558 | for npatterns == 4. Instead we halve the number of patterns |
559 | and double the number of elements per pattern, treating this |
560 | as a "foreground" { 0, 2, 3, 4 } against a "background" of |
561 | { 5, 6, 7, 8 | 5, 6, 7, 8 ... }: |
562 | |
563 | { 0, 2, 3, 4 | 5, 6, 7, 8 } npatterns == 4 |
564 | |
565 | Next we realize that this is *not* a foreround of { 0, 2 } |
566 | against a background of { 3, 4 | 3, 4 ... }, so the only |
567 | remaining option for reducing the number of patterns is |
568 | to use a foreground of { 0, 2 } against a stepped background |
569 | of { 1, 2 | 3, 4 | 5, 6 ... }. This is valid because we still |
570 | haven't elided any elements: |
571 | |
572 | { 0, 2 | 3, 4 | 5, 6 } npatterns == 2 |
573 | |
574 | This in turn can be reduced to a foreground of { 0 } against a |
575 | stepped background of { 1 | 2 | 3 ... }: |
576 | |
577 | { 0 | 2 | 3 } npatterns == 1 |
578 | |
579 | This last step would not have been possible for: |
580 | |
581 | { 0, 0 | 3, 4 | 5, 6 } npatterns == 2. */ |
582 | while ((m_npatterns & 1) == 0 && try_npatterns (npatterns: m_npatterns / 2)) |
583 | continue; |
584 | |
585 | /* Builders of arbitrary fixed-length vectors can use: |
586 | |
587 | new_vector (x, x, 1) |
588 | |
589 | so that every element is specified explicitly. Handle cases |
590 | that are actually wrapping series, like { 0, 1, 2, 3, 0, 1, 2, 3 } |
591 | would be for 2-bit elements. We'll have treated them as |
592 | duplicates in the loop above. */ |
593 | if (m_nelts_per_pattern == 1 |
594 | && m_full_nelts.is_constant (const_value: &const_full_nelts) |
595 | && this->length () >= const_full_nelts |
596 | && (m_npatterns & 3) == 0 |
597 | && stepped_sequence_p (start: m_npatterns / 4, end: const_full_nelts, |
598 | step: m_npatterns / 4)) |
599 | { |
600 | reshape (npatterns: m_npatterns / 4, nelts_per_pattern: 3); |
601 | while ((m_npatterns & 1) == 0 && try_npatterns (npatterns: m_npatterns / 2)) |
602 | continue; |
603 | } |
604 | } |
605 | else |
606 | /* For the non-power-of-2 case, do a simple search up from 1. */ |
607 | for (unsigned int i = 1; i <= m_npatterns / 2; ++i) |
608 | if (m_npatterns % i == 0 && try_npatterns (npatterns: i)) |
609 | break; |
610 | } |
611 | |
612 | #endif |
613 | |