1 | /* |
2 | * Amalgamated copy of CRoaring 0.2.66, modified for GTK to reduce compiler |
3 | * warnings. |
4 | * |
5 | * Copyright 2016-2020 The CRoaring authors |
6 | * Copyright 2020 Benjamin Otte |
7 | * |
8 | * Licensed under the Apache License, Version 2.0 (the "License"); |
9 | * you may not use this file except in compliance with the License. |
10 | * You may obtain a copy of the License at |
11 | * |
12 | * http://www.apache.org/licenses/LICENSE-2.0 |
13 | * |
14 | * Unless required by applicable law or agreed to in writing, software |
15 | * distributed under the License is distributed on an "AS IS" BASIS, |
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
17 | * See the License for the specific language governing permissions and |
18 | * limitations under the License. |
19 | * |
20 | * SPDX-License-Identifier: Apache-2.0 |
21 | */ |
22 | |
23 | #include "roaring.h" |
24 | |
25 | /* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */ |
26 | #ifdef DMALLOC |
27 | #include "dmalloc.h" |
28 | #endif |
29 | |
30 | /* begin file src/array_util.c */ |
31 | #include <assert.h> |
32 | #include <stdbool.h> |
33 | #include <stdint.h> |
34 | #include <stdio.h> |
35 | #include <stdlib.h> |
36 | #include <string.h> |
37 | |
38 | |
39 | #ifdef USESSE4 |
40 | // used by intersect_vector16 |
41 | ALIGNED(0x1000) |
42 | static const uint8_t shuffle_mask16[] = { |
43 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
44 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
45 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 0xFF, 0xFF, |
46 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
47 | 0, 1, 2, 3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
48 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
49 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
50 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
51 | 2, 3, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
52 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 0xFF, 0xFF, |
53 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 0xFF, 0xFF, |
54 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
55 | 0, 1, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
56 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, |
57 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
58 | 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
59 | 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
60 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 0xFF, 0xFF, |
61 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
62 | 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
63 | 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, |
64 | 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
65 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, |
66 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
67 | 2, 3, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
68 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 0xFF, 0xFF, |
69 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, |
70 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
71 | 0, 1, 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
72 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 0xFF, 0xFF, |
73 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
74 | 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
75 | 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
76 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 0xFF, 0xFF, |
77 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, |
78 | 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
79 | 0, 1, 2, 3, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, |
80 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, |
81 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
82 | 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
83 | 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, |
84 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, |
85 | 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 0xFF, 0xFF, |
86 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
87 | 0, 1, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
88 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
89 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
90 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
91 | 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
92 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 0xFF, 0xFF, |
93 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
94 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
95 | 0, 1, 2, 3, 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
96 | 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
97 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, |
98 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
99 | 2, 3, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
100 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, |
101 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, |
102 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
103 | 0, 1, 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
104 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, |
105 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
106 | 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
107 | 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
108 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 0xFF, 0xFF, |
109 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, |
110 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
111 | 0, 1, 2, 3, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
112 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, |
113 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
114 | 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
115 | 2, 3, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
116 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, |
117 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, |
118 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
119 | 0, 1, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
120 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, |
121 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
122 | 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
123 | 4, 5, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
124 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, |
125 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
126 | 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
127 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, |
128 | 0xFF, 0xFF, 0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
129 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, |
130 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
131 | 2, 3, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
132 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 12, 13, 0xFF, 0xFF, |
133 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, |
134 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
135 | 0, 1, 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
136 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 12, 13, 0xFF, 0xFF, |
137 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
138 | 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
139 | 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
140 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 12, 13, 0xFF, 0xFF, |
141 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, |
142 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
143 | 0, 1, 2, 3, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
144 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, |
145 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
146 | 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
147 | 2, 3, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
148 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, |
149 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, |
150 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
151 | 0, 1, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
152 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 12, 13, 0xFF, 0xFF, |
153 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
154 | 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
155 | 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
156 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 12, 13, |
157 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
158 | 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
159 | 0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, |
160 | 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, |
161 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, |
162 | 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
163 | 2, 3, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
164 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, |
165 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, |
166 | 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
167 | 0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, |
168 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, |
169 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
170 | 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
171 | 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
172 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, 12, 13, 0xFF, 0xFF, |
173 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, |
174 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
175 | 0, 1, 2, 3, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
176 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, |
177 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
178 | 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
179 | 2, 3, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
180 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 10, 11, |
181 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, |
182 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
183 | 0, 1, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
184 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 10, 11, 12, 13, |
185 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
186 | 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
187 | 4, 5, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
188 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 10, 11, |
189 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
190 | 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
191 | 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, |
192 | 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, |
193 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, |
194 | 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
195 | 2, 3, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
196 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 10, 11, |
197 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, |
198 | 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
199 | 0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, |
200 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 10, 11, |
201 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
202 | 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
203 | 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
204 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 10, 11, |
205 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, |
206 | 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
207 | 0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, |
208 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 10, 11, |
209 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
210 | 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
211 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, |
212 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, |
213 | 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 14, 15, 0xFF, 0xFF, |
214 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
215 | 0, 1, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
216 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
217 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
218 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
219 | 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
220 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 14, 15, 0xFF, 0xFF, |
221 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
222 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
223 | 0, 1, 2, 3, 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
224 | 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
225 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, |
226 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
227 | 2, 3, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
228 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 14, 15, |
229 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, |
230 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
231 | 0, 1, 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
232 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 14, 15, |
233 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
234 | 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
235 | 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
236 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 14, 15, 0xFF, 0xFF, |
237 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, |
238 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
239 | 0, 1, 2, 3, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
240 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, |
241 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
242 | 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
243 | 2, 3, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
244 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, |
245 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, |
246 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
247 | 0, 1, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
248 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 14, 15, |
249 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
250 | 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
251 | 4, 5, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
252 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, |
253 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
254 | 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
255 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 15, |
256 | 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
257 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, |
258 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
259 | 2, 3, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
260 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 10, 11, 14, 15, |
261 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, |
262 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
263 | 0, 1, 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
264 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 10, 11, 14, 15, |
265 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
266 | 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
267 | 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
268 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 10, 11, 14, 15, |
269 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, |
270 | 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
271 | 0, 1, 2, 3, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, |
272 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 10, 11, 14, 15, |
273 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
274 | 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
275 | 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, |
276 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, |
277 | 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, |
278 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
279 | 0, 1, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
280 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 10, 11, 14, 15, |
281 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
282 | 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
283 | 4, 5, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
284 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 10, 11, |
285 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
286 | 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
287 | 0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 14, 15, |
288 | 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 10, 11, 14, 15, |
289 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, |
290 | 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
291 | 2, 3, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, |
292 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, |
293 | 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, |
294 | 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
295 | 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, |
296 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, |
297 | 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
298 | 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, |
299 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
300 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, 14, 15, 0xFF, 0xFF, |
301 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 12, 13, |
302 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
303 | 0, 1, 2, 3, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
304 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, |
305 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
306 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
307 | 2, 3, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
308 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 12, 13, |
309 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 12, 13, |
310 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
311 | 0, 1, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
312 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 12, 13, 14, 15, |
313 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
314 | 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
315 | 4, 5, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
316 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 12, 13, |
317 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
318 | 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
319 | 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, |
320 | 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, |
321 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, |
322 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
323 | 2, 3, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
324 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 12, 13, |
325 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, |
326 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
327 | 0, 1, 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, |
328 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 12, 13, |
329 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
330 | 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
331 | 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
332 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 12, 13, |
333 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, |
334 | 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
335 | 0, 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, 15, |
336 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 12, 13, |
337 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
338 | 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
339 | 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, |
340 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, |
341 | 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 10, 11, 12, 13, |
342 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
343 | 0, 1, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
344 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 12, 13, 14, 15, |
345 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
346 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
347 | 4, 5, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
348 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 12, 13, |
349 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
350 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
351 | 0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15, |
352 | 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 12, 13, 14, 15, |
353 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, |
354 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
355 | 2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, |
356 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, |
357 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, |
358 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
359 | 0, 1, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, |
360 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, |
361 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
362 | 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, |
363 | 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
364 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 12, 13, |
365 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, |
366 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
367 | 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, |
368 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 12, 13, |
369 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
370 | 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
371 | 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, |
372 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, |
373 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 6, 7, 8, 9, |
374 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
375 | 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
376 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, |
377 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
378 | 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, |
379 | 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
380 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, |
381 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 2, 3, 4, 5, |
382 | 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, |
383 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, |
384 | 12, 13, 14, 15}; |
385 | |
386 | /** |
387 | * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions |
388 | * Optimized by D. Lemire on May 3rd 2013 |
389 | */ |
390 | int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, |
391 | const uint16_t *__restrict__ B, size_t s_b, |
392 | uint16_t *C) { |
393 | size_t count = 0; |
394 | size_t i_a = 0, i_b = 0; |
395 | const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); |
396 | const size_t st_a = (s_a / vectorlength) * vectorlength; |
397 | const size_t st_b = (s_b / vectorlength) * vectorlength; |
398 | __m128i v_a, v_b; |
399 | if ((i_a < st_a) && (i_b < st_b)) { |
400 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
401 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
402 | while ((A[i_a] == 0) || (B[i_b] == 0)) { |
403 | const __m128i res_v = _mm_cmpestrm( |
404 | v_b, vectorlength, v_a, vectorlength, |
405 | _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); |
406 | const int r = _mm_extract_epi32(res_v, 0); |
407 | __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + r); |
408 | __m128i p = _mm_shuffle_epi8(v_a, sm16); |
409 | _mm_storeu_si128((__m128i *)&C[count], p); // can overflow |
410 | count += _mm_popcnt_u32(r); |
411 | const uint16_t a_max = A[i_a + vectorlength - 1]; |
412 | const uint16_t b_max = B[i_b + vectorlength - 1]; |
413 | if (a_max <= b_max) { |
414 | i_a += vectorlength; |
415 | if (i_a == st_a) break; |
416 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
417 | } |
418 | if (b_max <= a_max) { |
419 | i_b += vectorlength; |
420 | if (i_b == st_b) break; |
421 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
422 | } |
423 | } |
424 | if ((i_a < st_a) && (i_b < st_b)) |
425 | while (true) { |
426 | const __m128i res_v = _mm_cmpistrm( |
427 | v_b, v_a, |
428 | _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); |
429 | const int r = _mm_extract_epi32(res_v, 0); |
430 | __m128i sm16 = |
431 | _mm_load_si128((const __m128i *)shuffle_mask16 + r); |
432 | __m128i p = _mm_shuffle_epi8(v_a, sm16); |
433 | _mm_storeu_si128((__m128i *)&C[count], p); // can overflow |
434 | count += _mm_popcnt_u32(r); |
435 | const uint16_t a_max = A[i_a + vectorlength - 1]; |
436 | const uint16_t b_max = B[i_b + vectorlength - 1]; |
437 | if (a_max <= b_max) { |
438 | i_a += vectorlength; |
439 | if (i_a == st_a) break; |
440 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
441 | } |
442 | if (b_max <= a_max) { |
443 | i_b += vectorlength; |
444 | if (i_b == st_b) break; |
445 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
446 | } |
447 | } |
448 | } |
449 | // intersect the tail using scalar intersection |
450 | while (i_a < s_a && i_b < s_b) { |
451 | uint16_t a = A[i_a]; |
452 | uint16_t b = B[i_b]; |
453 | if (a < b) { |
454 | i_a++; |
455 | } else if (b < a) { |
456 | i_b++; |
457 | } else { |
458 | C[count] = a; //==b; |
459 | count++; |
460 | i_a++; |
461 | i_b++; |
462 | } |
463 | } |
464 | return (int32_t)count; |
465 | } |
466 | |
467 | int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, |
468 | size_t s_a, |
469 | const uint16_t *__restrict__ B, |
470 | size_t s_b) { |
471 | size_t count = 0; |
472 | size_t i_a = 0, i_b = 0; |
473 | const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); |
474 | const size_t st_a = (s_a / vectorlength) * vectorlength; |
475 | const size_t st_b = (s_b / vectorlength) * vectorlength; |
476 | __m128i v_a, v_b; |
477 | if ((i_a < st_a) && (i_b < st_b)) { |
478 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
479 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
480 | while ((A[i_a] == 0) || (B[i_b] == 0)) { |
481 | const __m128i res_v = _mm_cmpestrm( |
482 | v_b, vectorlength, v_a, vectorlength, |
483 | _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); |
484 | const int r = _mm_extract_epi32(res_v, 0); |
485 | count += _mm_popcnt_u32(r); |
486 | const uint16_t a_max = A[i_a + vectorlength - 1]; |
487 | const uint16_t b_max = B[i_b + vectorlength - 1]; |
488 | if (a_max <= b_max) { |
489 | i_a += vectorlength; |
490 | if (i_a == st_a) break; |
491 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
492 | } |
493 | if (b_max <= a_max) { |
494 | i_b += vectorlength; |
495 | if (i_b == st_b) break; |
496 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
497 | } |
498 | } |
499 | if ((i_a < st_a) && (i_b < st_b)) |
500 | while (true) { |
501 | const __m128i res_v = _mm_cmpistrm( |
502 | v_b, v_a, |
503 | _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); |
504 | const int r = _mm_extract_epi32(res_v, 0); |
505 | count += _mm_popcnt_u32(r); |
506 | const uint16_t a_max = A[i_a + vectorlength - 1]; |
507 | const uint16_t b_max = B[i_b + vectorlength - 1]; |
508 | if (a_max <= b_max) { |
509 | i_a += vectorlength; |
510 | if (i_a == st_a) break; |
511 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
512 | } |
513 | if (b_max <= a_max) { |
514 | i_b += vectorlength; |
515 | if (i_b == st_b) break; |
516 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
517 | } |
518 | } |
519 | } |
520 | // intersect the tail using scalar intersection |
521 | while (i_a < s_a && i_b < s_b) { |
522 | uint16_t a = A[i_a]; |
523 | uint16_t b = B[i_b]; |
524 | if (a < b) { |
525 | i_a++; |
526 | } else if (b < a) { |
527 | i_b++; |
528 | } else { |
529 | count++; |
530 | i_a++; |
531 | i_b++; |
532 | } |
533 | } |
534 | return (int32_t)count; |
535 | } |
536 | |
537 | ///////// |
538 | // Warning: |
539 | // This function may not be safe if A == C or B == C. |
540 | ///////// |
541 | int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, |
542 | const uint16_t *__restrict__ B, size_t s_b, |
543 | uint16_t *C) { |
544 | // we handle the degenerate case |
545 | if (s_a == 0) return 0; |
546 | if (s_b == 0) { |
547 | if (A != C) memcpy(C, A, sizeof(uint16_t) * s_a); |
548 | return (int32_t)s_a; |
549 | } |
550 | // handle the leading zeroes, it is messy but it allows us to use the fast |
551 | // _mm_cmpistrm intrinsic safely |
552 | int32_t count = 0; |
553 | if ((A[0] == 0) || (B[0] == 0)) { |
554 | if ((A[0] == 0) && (B[0] == 0)) { |
555 | A++; |
556 | s_a--; |
557 | B++; |
558 | s_b--; |
559 | } else if (A[0] == 0) { |
560 | C[count++] = 0; |
561 | A++; |
562 | s_a--; |
563 | } else { |
564 | B++; |
565 | s_b--; |
566 | } |
567 | } |
568 | // at this point, we have two non-empty arrays, made of non-zero |
569 | // increasing values. |
570 | size_t i_a = 0, i_b = 0; |
571 | const size_t vectorlength = sizeof(__m128i) / sizeof(uint16_t); |
572 | const size_t st_a = (s_a / vectorlength) * vectorlength; |
573 | const size_t st_b = (s_b / vectorlength) * vectorlength; |
574 | if ((i_a < st_a) && (i_b < st_b)) { // this is the vectorized code path |
575 | __m128i v_a, v_b; //, v_bmax; |
576 | // we load a vector from A and a vector from B |
577 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
578 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
579 | // we have a runningmask which indicates which values from A have been |
580 | // spotted in B, these don't get written out. |
581 | __m128i runningmask_a_found_in_b = _mm_setzero_si128(); |
582 | /**** |
583 | * start of the main vectorized loop |
584 | *****/ |
585 | while (true) { |
586 | // afoundinb will contain a mask indicate for each entry in A |
587 | // whether it is seen |
588 | // in B |
589 | const __m128i a_found_in_b = |
590 | _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | |
591 | _SIDD_BIT_MASK); |
592 | runningmask_a_found_in_b = |
593 | _mm_or_si128(runningmask_a_found_in_b, a_found_in_b); |
594 | // we always compare the last values of A and B |
595 | const uint16_t a_max = A[i_a + vectorlength - 1]; |
596 | const uint16_t b_max = B[i_b + vectorlength - 1]; |
597 | if (a_max <= b_max) { |
598 | // Ok. In this code path, we are ready to write our v_a |
599 | // because there is no need to read more from B, they will |
600 | // all be large values. |
601 | const int bitmask_belongs_to_difference = |
602 | _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; |
603 | /*** next few lines are probably expensive *****/ |
604 | __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + |
605 | bitmask_belongs_to_difference); |
606 | __m128i p = _mm_shuffle_epi8(v_a, sm16); |
607 | _mm_storeu_si128((__m128i *)&C[count], p); // can overflow |
608 | count += _mm_popcnt_u32(bitmask_belongs_to_difference); |
609 | // we advance a |
610 | i_a += vectorlength; |
611 | if (i_a == st_a) // no more |
612 | break; |
613 | runningmask_a_found_in_b = _mm_setzero_si128(); |
614 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
615 | } |
616 | if (b_max <= a_max) { |
617 | // in this code path, the current v_b has become useless |
618 | i_b += vectorlength; |
619 | if (i_b == st_b) break; |
620 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
621 | } |
622 | } |
623 | // at this point, either we have i_a == st_a, which is the end of the |
624 | // vectorized processing, |
625 | // or we have i_b == st_b, and we are not done processing the vector... |
626 | // so we need to finish it off. |
627 | if (i_a < st_a) { // we have unfinished business... |
628 | uint16_t buffer[8]; // buffer to do a masked load |
629 | memset(buffer, 0, 8 * sizeof(uint16_t)); |
630 | memcpy(buffer, B + i_b, (s_b - i_b) * sizeof(uint16_t)); |
631 | v_b = _mm_lddqu_si128((__m128i *)buffer); |
632 | const __m128i a_found_in_b = |
633 | _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | |
634 | _SIDD_BIT_MASK); |
635 | runningmask_a_found_in_b = |
636 | _mm_or_si128(runningmask_a_found_in_b, a_found_in_b); |
637 | const int bitmask_belongs_to_difference = |
638 | _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; |
639 | __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + |
640 | bitmask_belongs_to_difference); |
641 | __m128i p = _mm_shuffle_epi8(v_a, sm16); |
642 | _mm_storeu_si128((__m128i *)&C[count], p); // can overflow |
643 | count += _mm_popcnt_u32(bitmask_belongs_to_difference); |
644 | i_a += vectorlength; |
645 | } |
646 | // at this point we should have i_a == st_a and i_b == st_b |
647 | } |
648 | // do the tail using scalar code |
649 | while (i_a < s_a && i_b < s_b) { |
650 | uint16_t a = A[i_a]; |
651 | uint16_t b = B[i_b]; |
652 | if (b < a) { |
653 | i_b++; |
654 | } else if (a < b) { |
655 | C[count] = a; |
656 | count++; |
657 | i_a++; |
658 | } else { //== |
659 | i_a++; |
660 | i_b++; |
661 | } |
662 | } |
663 | if (i_a < s_a) { |
664 | if(C == A) { |
665 | assert((size_t)count <= i_a); |
666 | if((size_t)count < i_a) { |
667 | memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a)); |
668 | } |
669 | } else { |
670 | for(size_t i = 0; i < (s_a - i_a); i++) { |
671 | C[count + i] = A[i + i_a]; |
672 | } |
673 | } |
674 | count += (int32_t)(s_a - i_a); |
675 | } |
676 | return count; |
677 | } |
678 | |
679 | #endif // USESSE4 |
680 | |
681 | |
682 | |
683 | #ifdef USE_OLD_SKEW_INTERSECT |
684 | // TODO: given enough experience with the new skew intersect, drop the old one from the code base. |
685 | |
686 | |
687 | /* Computes the intersection between one small and one large set of uint16_t. |
688 | * Stores the result into buffer and return the number of elements. */ |
689 | int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, |
690 | const uint16_t *large, size_t size_l, |
691 | uint16_t *buffer) { |
692 | size_t pos = 0, idx_l = 0, idx_s = 0; |
693 | |
694 | if (0 == size_s) { |
695 | return 0; |
696 | } |
697 | |
698 | uint16_t val_l = large[idx_l], val_s = small[idx_s]; |
699 | |
700 | while (true) { |
701 | if (val_l < val_s) { |
702 | idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); |
703 | if (idx_l == size_l) break; |
704 | val_l = large[idx_l]; |
705 | } else if (val_s < val_l) { |
706 | idx_s++; |
707 | if (idx_s == size_s) break; |
708 | val_s = small[idx_s]; |
709 | } else { |
710 | buffer[pos++] = val_s; |
711 | idx_s++; |
712 | if (idx_s == size_s) break; |
713 | val_s = small[idx_s]; |
714 | idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); |
715 | if (idx_l == size_l) break; |
716 | val_l = large[idx_l]; |
717 | } |
718 | } |
719 | |
720 | return (int32_t)pos; |
721 | } |
722 | #else // USE_OLD_SKEW_INTERSECT |
723 | |
724 | |
725 | /** |
726 | * Branchless binary search going after 4 values at once. |
727 | * Assumes that array is sorted. |
728 | * You have that array[*index1] >= target1, array[*index12] >= target2, ... |
729 | * except when *index1 = n, in which case you know that all values in array are |
730 | * smaller than target1, and so forth. |
731 | * It has logarithmic complexity. |
732 | */ |
733 | static void binarySearch4(const uint16_t *array, int32_t n, uint16_t target1, |
734 | uint16_t target2, uint16_t target3, uint16_t target4, |
735 | int32_t *index1, int32_t *index2, int32_t *index3, |
736 | int32_t *index4) { |
737 | const uint16_t *base1 = array; |
738 | const uint16_t *base2 = array; |
739 | const uint16_t *base3 = array; |
740 | const uint16_t *base4 = array; |
741 | if (n == 0) |
742 | return; |
743 | while (n > 1) { |
744 | int32_t half = n >> 1; |
745 | base1 = (base1[half] < target1) ? &base1[half] : base1; |
746 | base2 = (base2[half] < target2) ? &base2[half] : base2; |
747 | base3 = (base3[half] < target3) ? &base3[half] : base3; |
748 | base4 = (base4[half] < target4) ? &base4[half] : base4; |
749 | n -= half; |
750 | } |
751 | *index1 = (int32_t)((*base1 < target1) + base1 - array); |
752 | *index2 = (int32_t)((*base2 < target2) + base2 - array); |
753 | *index3 = (int32_t)((*base3 < target3) + base3 - array); |
754 | *index4 = (int32_t)((*base4 < target4) + base4 - array); |
755 | } |
756 | |
757 | /** |
758 | * Branchless binary search going after 2 values at once. |
759 | * Assumes that array is sorted. |
760 | * You have that array[*index1] >= target1, array[*index12] >= target2. |
761 | * except when *index1 = n, in which case you know that all values in array are |
762 | * smaller than target1, and so forth. |
763 | * It has logarithmic complexity. |
764 | */ |
765 | static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1, |
766 | uint16_t target2, int32_t *index1, int32_t *index2) { |
767 | const uint16_t *base1 = array; |
768 | const uint16_t *base2 = array; |
769 | if (n == 0) |
770 | return; |
771 | while (n > 1) { |
772 | int32_t half = n >> 1; |
773 | base1 = (base1[half] < target1) ? &base1[half] : base1; |
774 | base2 = (base2[half] < target2) ? &base2[half] : base2; |
775 | n -= half; |
776 | } |
777 | *index1 = (int32_t)((*base1 < target1) + base1 - array); |
778 | *index2 = (int32_t)((*base2 < target2) + base2 - array); |
779 | } |
780 | |
781 | /* Computes the intersection between one small and one large set of uint16_t. |
782 | * Stores the result into buffer and return the number of elements. |
783 | * Processes the small set in blocks of 4 values calling binarySearch4 |
784 | * and binarySearch2. This approach can be slightly superior to a conventional |
785 | * galloping search in some instances. |
786 | */ |
787 | int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, |
788 | const uint16_t *large, size_t size_l, |
789 | uint16_t *buffer) { |
790 | size_t pos = 0, idx_l = 0, idx_s = 0; |
791 | |
792 | if (0 == size_s) { |
793 | return 0; |
794 | } |
795 | int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0; |
796 | while ((idx_s + 4 <= size_s) && (idx_l < size_l)) { |
797 | uint16_t target1 = small[idx_s]; |
798 | uint16_t target2 = small[idx_s + 1]; |
799 | uint16_t target3 = small[idx_s + 2]; |
800 | uint16_t target4 = small[idx_s + 3]; |
801 | binarySearch4(array: large + idx_l, n: (int32_t)(size_l - idx_l), target1, target2, target3, |
802 | target4, index1: &index1, index2: &index2, index3: &index3, index4: &index4); |
803 | if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { |
804 | buffer[pos++] = target1; |
805 | } |
806 | if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { |
807 | buffer[pos++] = target2; |
808 | } |
809 | if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) { |
810 | buffer[pos++] = target3; |
811 | } |
812 | if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) { |
813 | buffer[pos++] = target4; |
814 | } |
815 | idx_s += 4; |
816 | idx_l += index4; |
817 | } |
818 | if ((idx_s + 2 <= size_s) && (idx_l < size_l)) { |
819 | uint16_t target1 = small[idx_s]; |
820 | uint16_t target2 = small[idx_s + 1]; |
821 | binarySearch2(array: large + idx_l, n: (int32_t)(size_l - idx_l), target1, target2, index1: &index1, |
822 | index2: &index2); |
823 | if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { |
824 | buffer[pos++] = target1; |
825 | } |
826 | if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { |
827 | buffer[pos++] = target2; |
828 | } |
829 | idx_s += 2; |
830 | idx_l += index2; |
831 | } |
832 | if ((idx_s < size_s) && (idx_l < size_l)) { |
833 | uint16_t val_s = small[idx_s]; |
834 | int32_t index = binarySearch(array: large + idx_l, lenarray: (int32_t)(size_l - idx_l), ikey: val_s); |
835 | if (index >= 0) |
836 | buffer[pos++] = val_s; |
837 | } |
838 | return (int32_t)pos; |
839 | } |
840 | |
841 | |
842 | #endif //USE_OLD_SKEW_INTERSECT |
843 | |
844 | |
845 | // TODO: this could be accelerated, possibly, by using binarySearch4 as above. |
846 | int32_t intersect_skewed_uint16_cardinality(const uint16_t *small, |
847 | size_t size_s, |
848 | const uint16_t *large, |
849 | size_t size_l) { |
850 | size_t pos = 0, idx_l = 0, idx_s = 0; |
851 | |
852 | if (0 == size_s) { |
853 | return 0; |
854 | } |
855 | |
856 | uint16_t val_l = large[idx_l], val_s = small[idx_s]; |
857 | |
858 | while (true) { |
859 | if (val_l < val_s) { |
860 | idx_l = advanceUntil(array: large, pos: (int32_t)idx_l, length: (int32_t)size_l, min: val_s); |
861 | if (idx_l == size_l) break; |
862 | val_l = large[idx_l]; |
863 | } else if (val_s < val_l) { |
864 | idx_s++; |
865 | if (idx_s == size_s) break; |
866 | val_s = small[idx_s]; |
867 | } else { |
868 | pos++; |
869 | idx_s++; |
870 | if (idx_s == size_s) break; |
871 | val_s = small[idx_s]; |
872 | idx_l = advanceUntil(array: large, pos: (int32_t)idx_l, length: (int32_t)size_l, min: val_s); |
873 | if (idx_l == size_l) break; |
874 | val_l = large[idx_l]; |
875 | } |
876 | } |
877 | |
878 | return (int32_t)pos; |
879 | } |
880 | |
881 | bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s, |
882 | const uint16_t *large, size_t size_l) { |
883 | size_t idx_l = 0, idx_s = 0; |
884 | |
885 | if (0 == size_s) { |
886 | return false; |
887 | } |
888 | |
889 | uint16_t val_l = large[idx_l], val_s = small[idx_s]; |
890 | |
891 | while (true) { |
892 | if (val_l < val_s) { |
893 | idx_l = advanceUntil(array: large, pos: (int32_t)idx_l, length: (int32_t)size_l, min: val_s); |
894 | if (idx_l == size_l) break; |
895 | val_l = large[idx_l]; |
896 | } else if (val_s < val_l) { |
897 | idx_s++; |
898 | if (idx_s == size_s) break; |
899 | val_s = small[idx_s]; |
900 | } else { |
901 | return true; |
902 | } |
903 | } |
904 | |
905 | return false; |
906 | } |
907 | |
908 | /** |
909 | * Generic intersection function. |
910 | */ |
911 | int32_t intersect_uint16(const uint16_t *A, const size_t lenA, |
912 | const uint16_t *B, const size_t lenB, uint16_t *out) { |
913 | const uint16_t *initout = out; |
914 | if (lenA == 0 || lenB == 0) return 0; |
915 | const uint16_t *endA = A + lenA; |
916 | const uint16_t *endB = B + lenB; |
917 | |
918 | while (1) { |
919 | while (*A < *B) { |
920 | SKIP_FIRST_COMPARE: |
921 | if (++A == endA) return (int32_t)(out - initout); |
922 | } |
923 | while (*A > *B) { |
924 | if (++B == endB) return (int32_t)(out - initout); |
925 | } |
926 | if (*A == *B) { |
927 | *out++ = *A; |
928 | if (++A == endA || ++B == endB) return (int32_t)(out - initout); |
929 | } else { |
930 | goto SKIP_FIRST_COMPARE; |
931 | } |
932 | } |
933 | return (int32_t)(out - initout); // NOTREACHED |
934 | } |
935 | |
936 | int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, |
937 | const uint16_t *B, const size_t lenB) { |
938 | int32_t answer = 0; |
939 | if (lenA == 0 || lenB == 0) return 0; |
940 | const uint16_t *endA = A + lenA; |
941 | const uint16_t *endB = B + lenB; |
942 | |
943 | while (1) { |
944 | while (*A < *B) { |
945 | SKIP_FIRST_COMPARE: |
946 | if (++A == endA) return answer; |
947 | } |
948 | while (*A > *B) { |
949 | if (++B == endB) return answer; |
950 | } |
951 | if (*A == *B) { |
952 | ++answer; |
953 | if (++A == endA || ++B == endB) return answer; |
954 | } else { |
955 | goto SKIP_FIRST_COMPARE; |
956 | } |
957 | } |
958 | return answer; // NOTREACHED |
959 | } |
960 | |
961 | |
962 | bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, |
963 | const uint16_t *B, const size_t lenB) { |
964 | if (lenA == 0 || lenB == 0) return 0; |
965 | const uint16_t *endA = A + lenA; |
966 | const uint16_t *endB = B + lenB; |
967 | |
968 | while (1) { |
969 | while (*A < *B) { |
970 | SKIP_FIRST_COMPARE: |
971 | if (++A == endA) return false; |
972 | } |
973 | while (*A > *B) { |
974 | if (++B == endB) return false; |
975 | } |
976 | if (*A == *B) { |
977 | return true; |
978 | } else { |
979 | goto SKIP_FIRST_COMPARE; |
980 | } |
981 | } |
982 | return false; // NOTREACHED |
983 | } |
984 | |
985 | |
986 | |
987 | /** |
988 | * Generic intersection function. |
989 | */ |
990 | size_t intersection_uint32(const uint32_t *A, const size_t lenA, |
991 | const uint32_t *B, const size_t lenB, |
992 | uint32_t *out) { |
993 | const uint32_t *initout = out; |
994 | if (lenA == 0 || lenB == 0) return 0; |
995 | const uint32_t *endA = A + lenA; |
996 | const uint32_t *endB = B + lenB; |
997 | |
998 | while (1) { |
999 | while (*A < *B) { |
1000 | SKIP_FIRST_COMPARE: |
1001 | if (++A == endA) return (out - initout); |
1002 | } |
1003 | while (*A > *B) { |
1004 | if (++B == endB) return (out - initout); |
1005 | } |
1006 | if (*A == *B) { |
1007 | *out++ = *A; |
1008 | if (++A == endA || ++B == endB) return (out - initout); |
1009 | } else { |
1010 | goto SKIP_FIRST_COMPARE; |
1011 | } |
1012 | } |
1013 | return (out - initout); // NOTREACHED |
1014 | } |
1015 | |
1016 | size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, |
1017 | const uint32_t *B, const size_t lenB) { |
1018 | if (lenA == 0 || lenB == 0) return 0; |
1019 | size_t card = 0; |
1020 | const uint32_t *endA = A + lenA; |
1021 | const uint32_t *endB = B + lenB; |
1022 | |
1023 | while (1) { |
1024 | while (*A < *B) { |
1025 | SKIP_FIRST_COMPARE: |
1026 | if (++A == endA) return card; |
1027 | } |
1028 | while (*A > *B) { |
1029 | if (++B == endB) return card; |
1030 | } |
1031 | if (*A == *B) { |
1032 | card++; |
1033 | if (++A == endA || ++B == endB) return card; |
1034 | } else { |
1035 | goto SKIP_FIRST_COMPARE; |
1036 | } |
1037 | } |
1038 | return card; // NOTREACHED |
1039 | } |
1040 | |
1041 | // can one vectorize the computation of the union? (Update: Yes! See |
1042 | // union_vector16). |
1043 | |
1044 | size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, |
1045 | size_t size_2, uint16_t *buffer) { |
1046 | size_t pos = 0, idx_1 = 0, idx_2 = 0; |
1047 | |
1048 | if (0 == size_2) { |
1049 | memmove(dest: buffer, src: set_1, n: size_1 * sizeof(uint16_t)); |
1050 | return size_1; |
1051 | } |
1052 | if (0 == size_1) { |
1053 | memmove(dest: buffer, src: set_2, n: size_2 * sizeof(uint16_t)); |
1054 | return size_2; |
1055 | } |
1056 | |
1057 | uint16_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; |
1058 | |
1059 | while (true) { |
1060 | if (val_1 < val_2) { |
1061 | buffer[pos++] = val_1; |
1062 | ++idx_1; |
1063 | if (idx_1 >= size_1) break; |
1064 | val_1 = set_1[idx_1]; |
1065 | } else if (val_2 < val_1) { |
1066 | buffer[pos++] = val_2; |
1067 | ++idx_2; |
1068 | if (idx_2 >= size_2) break; |
1069 | val_2 = set_2[idx_2]; |
1070 | } else { |
1071 | buffer[pos++] = val_1; |
1072 | ++idx_1; |
1073 | ++idx_2; |
1074 | if (idx_1 >= size_1 || idx_2 >= size_2) break; |
1075 | val_1 = set_1[idx_1]; |
1076 | val_2 = set_2[idx_2]; |
1077 | } |
1078 | } |
1079 | |
1080 | if (idx_1 < size_1) { |
1081 | const size_t n_elems = size_1 - idx_1; |
1082 | memmove(dest: buffer + pos, src: set_1 + idx_1, n: n_elems * sizeof(uint16_t)); |
1083 | pos += n_elems; |
1084 | } else if (idx_2 < size_2) { |
1085 | const size_t n_elems = size_2 - idx_2; |
1086 | memmove(dest: buffer + pos, src: set_2 + idx_2, n: n_elems * sizeof(uint16_t)); |
1087 | pos += n_elems; |
1088 | } |
1089 | |
1090 | return pos; |
1091 | } |
1092 | |
1093 | int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, |
1094 | int length2, uint16_t *a_out) { |
1095 | int out_card = 0; |
1096 | int k1 = 0, k2 = 0; |
1097 | if (length1 == 0) return 0; |
1098 | if (length2 == 0) { |
1099 | if (a1 != a_out) memcpy(dest: a_out, src: a1, n: sizeof(uint16_t) * length1); |
1100 | return length1; |
1101 | } |
1102 | uint16_t s1 = a1[k1]; |
1103 | uint16_t s2 = a2[k2]; |
1104 | while (true) { |
1105 | if (s1 < s2) { |
1106 | a_out[out_card++] = s1; |
1107 | ++k1; |
1108 | if (k1 >= length1) { |
1109 | break; |
1110 | } |
1111 | s1 = a1[k1]; |
1112 | } else if (s1 == s2) { |
1113 | ++k1; |
1114 | ++k2; |
1115 | if (k1 >= length1) { |
1116 | break; |
1117 | } |
1118 | if (k2 >= length2) { |
1119 | memmove(dest: a_out + out_card, src: a1 + k1, |
1120 | n: sizeof(uint16_t) * (length1 - k1)); |
1121 | return out_card + length1 - k1; |
1122 | } |
1123 | s1 = a1[k1]; |
1124 | s2 = a2[k2]; |
1125 | } else { // if (val1>val2) |
1126 | ++k2; |
1127 | if (k2 >= length2) { |
1128 | memmove(dest: a_out + out_card, src: a1 + k1, |
1129 | n: sizeof(uint16_t) * (length1 - k1)); |
1130 | return out_card + length1 - k1; |
1131 | } |
1132 | s2 = a2[k2]; |
1133 | } |
1134 | } |
1135 | return out_card; |
1136 | } |
1137 | |
1138 | int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, |
1139 | const uint16_t *array_2, int32_t card_2, uint16_t *out) { |
1140 | int32_t pos1 = 0, pos2 = 0, pos_out = 0; |
1141 | while (pos1 < card_1 && pos2 < card_2) { |
1142 | const uint16_t v1 = array_1[pos1]; |
1143 | const uint16_t v2 = array_2[pos2]; |
1144 | if (v1 == v2) { |
1145 | ++pos1; |
1146 | ++pos2; |
1147 | continue; |
1148 | } |
1149 | if (v1 < v2) { |
1150 | out[pos_out++] = v1; |
1151 | ++pos1; |
1152 | } else { |
1153 | out[pos_out++] = v2; |
1154 | ++pos2; |
1155 | } |
1156 | } |
1157 | if (pos1 < card_1) { |
1158 | const size_t n_elems = card_1 - pos1; |
1159 | memcpy(dest: out + pos_out, src: array_1 + pos1, n: n_elems * sizeof(uint16_t)); |
1160 | pos_out += (int32_t)n_elems; |
1161 | } else if (pos2 < card_2) { |
1162 | const size_t n_elems = card_2 - pos2; |
1163 | memcpy(dest: out + pos_out, src: array_2 + pos2, n: n_elems * sizeof(uint16_t)); |
1164 | pos_out += (int32_t)n_elems; |
1165 | } |
1166 | return pos_out; |
1167 | } |
1168 | |
1169 | #ifdef USESSE4 |
1170 | |
1171 | /*** |
1172 | * start of the SIMD 16-bit union code |
1173 | * |
1174 | */ |
1175 | |
1176 | // Assuming that vInput1 and vInput2 are sorted, produces a sorted output going |
1177 | // from vecMin all the way to vecMax |
1178 | // developed originally for merge sort using SIMD instructions. |
1179 | // Standard merge. See, e.g., Inoue and Taura, SIMD- and Cache-Friendly |
1180 | // Algorithm for Sorting an Array of Structures |
1181 | static inline void sse_merge(const __m128i *vInput1, |
1182 | const __m128i *vInput2, // input 1 & 2 |
1183 | __m128i *vecMin, __m128i *vecMax) { // output |
1184 | __m128i vecTmp; |
1185 | vecTmp = _mm_min_epu16(*vInput1, *vInput2); |
1186 | *vecMax = _mm_max_epu16(*vInput1, *vInput2); |
1187 | vecTmp = _mm_alignr_epi8(vecTmp, vecTmp, 2); |
1188 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1189 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1190 | vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1191 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1192 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1193 | vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1194 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1195 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1196 | vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1197 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1198 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1199 | vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1200 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1201 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1202 | vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1203 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1204 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1205 | vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1206 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1207 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1208 | *vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1209 | } |
1210 | |
1211 | // used by store_unique, generated by simdunion.py |
1212 | static uint8_t uniqshuf[] = { |
1213 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, |
1214 | 0xc, 0xd, 0xe, 0xf, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, |
1215 | 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1216 | 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1217 | 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, |
1218 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, |
1219 | 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, |
1220 | 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1221 | 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, |
1222 | 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, |
1223 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1224 | 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1225 | 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, |
1226 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, |
1227 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, |
1228 | 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1229 | 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, |
1230 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, |
1231 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, |
1232 | 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1233 | 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1234 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, |
1235 | 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1236 | 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1237 | 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, |
1238 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, |
1239 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1240 | 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1241 | 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1242 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, |
1243 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, |
1244 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1245 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, |
1246 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, |
1247 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1248 | 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1249 | 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1250 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, |
1251 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, |
1252 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1253 | 0x0, 0x1, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1254 | 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1255 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1256 | 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1257 | 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, |
1258 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, |
1259 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, |
1260 | 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1261 | 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, |
1262 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, |
1263 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, |
1264 | 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1265 | 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1266 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, |
1267 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1268 | 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1269 | 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1270 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, |
1271 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1272 | 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1273 | 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1274 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, |
1275 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, |
1276 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1277 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, |
1278 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, |
1279 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1280 | 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1281 | 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1282 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, |
1283 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, |
1284 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1285 | 0x0, 0x1, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1286 | 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1287 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1288 | 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1289 | 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1290 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, |
1291 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, |
1292 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1293 | 0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1294 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1295 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, |
1296 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1297 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1298 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, |
1299 | 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1300 | 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1301 | 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, |
1302 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, |
1303 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1304 | 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1305 | 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, |
1306 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, |
1307 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, |
1308 | 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1309 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, |
1310 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, |
1311 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1312 | 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1313 | 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1314 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, |
1315 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, |
1316 | 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1317 | 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1318 | 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, |
1319 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1320 | 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1321 | 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, |
1322 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, |
1323 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, |
1324 | 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1325 | 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, |
1326 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, |
1327 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, |
1328 | 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1329 | 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1330 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, |
1331 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1332 | 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1333 | 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1334 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, |
1335 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1336 | 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1337 | 0x2, 0x3, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1338 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, |
1339 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xe, 0xf, |
1340 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1341 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, |
1342 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, |
1343 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1344 | 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1345 | 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1346 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, |
1347 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, |
1348 | 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1349 | 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1350 | 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, |
1351 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1352 | 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1353 | 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1354 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, |
1355 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, |
1356 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1357 | 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1358 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, |
1359 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, |
1360 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1361 | 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1362 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, |
1363 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1364 | 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1365 | 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1366 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, |
1367 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1368 | 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1369 | 0x2, 0x3, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1370 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, |
1371 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xe, 0xf, |
1372 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1373 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1374 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, |
1375 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1376 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1377 | 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1378 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xe, 0xf, 0xFF, 0xFF, |
1379 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xe, 0xf, |
1380 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1381 | 0x0, 0x1, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1382 | 0xFF, 0xFF, 0xFF, 0xFF, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1383 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1384 | 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, |
1385 | 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, |
1386 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, |
1387 | 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, |
1388 | 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1389 | 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, |
1390 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, |
1391 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, |
1392 | 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1393 | 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1394 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, |
1395 | 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1396 | 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1397 | 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, |
1398 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, |
1399 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1400 | 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1401 | 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1402 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, |
1403 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, |
1404 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1405 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, |
1406 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, |
1407 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1408 | 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1409 | 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1410 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, |
1411 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, |
1412 | 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1413 | 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1414 | 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, |
1415 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1416 | 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1417 | 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1418 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, |
1419 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, |
1420 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1421 | 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1422 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, |
1423 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, |
1424 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1425 | 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1426 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, |
1427 | 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1428 | 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1429 | 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, |
1430 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, |
1431 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1432 | 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1433 | 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1434 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, |
1435 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, |
1436 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1437 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, |
1438 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, |
1439 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1440 | 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1441 | 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1442 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, |
1443 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, |
1444 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1445 | 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1446 | 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1447 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1448 | 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1449 | 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1450 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, |
1451 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, |
1452 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1453 | 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1454 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, |
1455 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, |
1456 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1457 | 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1458 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, |
1459 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1460 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1461 | 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1462 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1463 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1464 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1465 | 0x2, 0x3, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1466 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1467 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xc, 0xd, 0xFF, 0xFF, |
1468 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1469 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, |
1470 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, |
1471 | 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1472 | 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1473 | 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, |
1474 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, |
1475 | 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, |
1476 | 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1477 | 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, |
1478 | 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, |
1479 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1480 | 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1481 | 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, |
1482 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, |
1483 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, |
1484 | 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1485 | 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, |
1486 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, |
1487 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, |
1488 | 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1489 | 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1490 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, |
1491 | 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1492 | 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1493 | 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, |
1494 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, |
1495 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1496 | 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1497 | 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1498 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, |
1499 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, |
1500 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1501 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, |
1502 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, |
1503 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1504 | 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1505 | 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1506 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xFF, 0xFF, |
1507 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, |
1508 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1509 | 0x0, 0x1, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1510 | 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1511 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1512 | 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1513 | 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, |
1514 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, |
1515 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, |
1516 | 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1517 | 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, |
1518 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, |
1519 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, |
1520 | 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1521 | 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1522 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, |
1523 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1524 | 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1525 | 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1526 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, |
1527 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1528 | 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1529 | 0x2, 0x3, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1530 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, |
1531 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xFF, 0xFF, |
1532 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1533 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, |
1534 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, |
1535 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1536 | 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1537 | 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1538 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xFF, 0xFF, |
1539 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, |
1540 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1541 | 0x0, 0x1, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1542 | 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1543 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1544 | 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1545 | 0x2, 0x3, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1546 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, |
1547 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xFF, 0xFF, |
1548 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1549 | 0x0, 0x1, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1550 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1551 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xFF, 0xFF, |
1552 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1553 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1554 | 0xFF, 0xFF, 0xFF, 0xFF}; |
1555 | |
1556 | // write vector new, while omitting repeated values assuming that previously |
1557 | // written vector was "old" |
1558 | static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) { |
1559 | __m128i vecTmp = _mm_alignr_epi8(newval, old, 16 - 2); |
1560 | // lots of high latency instructions follow (optimize?) |
1561 | int M = _mm_movemask_epi8( |
1562 | _mm_packs_epi16(_mm_cmpeq_epi16(vecTmp, newval), _mm_setzero_si128())); |
1563 | int numberofnewvalues = 8 - _mm_popcnt_u32(M); |
1564 | __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); |
1565 | __m128i val = _mm_shuffle_epi8(newval, key); |
1566 | _mm_storeu_si128((__m128i *)output, val); |
1567 | return numberofnewvalues; |
1568 | } |
1569 | |
1570 | // working in-place, this function overwrites the repeated values |
1571 | // could be avoided? |
1572 | static inline uint32_t unique(uint16_t *out, uint32_t len) { |
1573 | uint32_t pos = 1; |
1574 | for (uint32_t i = 1; i < len; ++i) { |
1575 | if (out[i] != out[i - 1]) { |
1576 | out[pos++] = out[i]; |
1577 | } |
1578 | } |
1579 | return pos; |
1580 | } |
1581 | |
1582 | // use with qsort, could be avoided |
1583 | static int uint16_compare(const void *a, const void *b) { |
1584 | return (*(uint16_t *)a - *(uint16_t *)b); |
1585 | } |
1586 | |
1587 | // a one-pass SSE union algorithm |
1588 | // This function may not be safe if array1 == output or array2 == output. |
1589 | uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1, |
1590 | const uint16_t *__restrict__ array2, uint32_t length2, |
1591 | uint16_t *__restrict__ output) { |
1592 | if ((length1 < 8) || (length2 < 8)) { |
1593 | return (uint32_t)union_uint16(array1, length1, array2, length2, output); |
1594 | } |
1595 | __m128i vA, vB, V, vecMin, vecMax; |
1596 | __m128i laststore; |
1597 | uint16_t *initoutput = output; |
1598 | uint32_t len1 = length1 / 8; |
1599 | uint32_t len2 = length2 / 8; |
1600 | uint32_t pos1 = 0; |
1601 | uint32_t pos2 = 0; |
1602 | // we start the machine |
1603 | vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); |
1604 | pos1++; |
1605 | vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); |
1606 | pos2++; |
1607 | sse_merge(&vA, &vB, &vecMin, &vecMax); |
1608 | laststore = _mm_set1_epi16(-1); |
1609 | output += store_unique(laststore, vecMin, output); |
1610 | laststore = vecMin; |
1611 | if ((pos1 < len1) && (pos2 < len2)) { |
1612 | uint16_t curA, curB; |
1613 | curA = array1[8 * pos1]; |
1614 | curB = array2[8 * pos2]; |
1615 | while (true) { |
1616 | if (curA <= curB) { |
1617 | V = _mm_lddqu_si128((const __m128i *)array1 + pos1); |
1618 | pos1++; |
1619 | if (pos1 < len1) { |
1620 | curA = array1[8 * pos1]; |
1621 | } else { |
1622 | break; |
1623 | } |
1624 | } else { |
1625 | V = _mm_lddqu_si128((const __m128i *)array2 + pos2); |
1626 | pos2++; |
1627 | if (pos2 < len2) { |
1628 | curB = array2[8 * pos2]; |
1629 | } else { |
1630 | break; |
1631 | } |
1632 | } |
1633 | sse_merge(&V, &vecMax, &vecMin, &vecMax); |
1634 | output += store_unique(laststore, vecMin, output); |
1635 | laststore = vecMin; |
1636 | } |
1637 | sse_merge(&V, &vecMax, &vecMin, &vecMax); |
1638 | output += store_unique(laststore, vecMin, output); |
1639 | laststore = vecMin; |
1640 | } |
1641 | // we finish the rest off using a scalar algorithm |
1642 | // could be improved? |
1643 | // |
1644 | // copy the small end on a tmp buffer |
1645 | uint32_t len = (uint32_t)(output - initoutput); |
1646 | uint16_t buffer[16]; |
1647 | uint32_t leftoversize = store_unique(laststore, vecMax, buffer); |
1648 | if (pos1 == len1) { |
1649 | memcpy(buffer + leftoversize, array1 + 8 * pos1, |
1650 | (length1 - 8 * len1) * sizeof(uint16_t)); |
1651 | leftoversize += length1 - 8 * len1; |
1652 | qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); |
1653 | |
1654 | leftoversize = unique(buffer, leftoversize); |
1655 | len += (uint32_t)union_uint16(buffer, leftoversize, array2 + 8 * pos2, |
1656 | length2 - 8 * pos2, output); |
1657 | } else { |
1658 | memcpy(buffer + leftoversize, array2 + 8 * pos2, |
1659 | (length2 - 8 * len2) * sizeof(uint16_t)); |
1660 | leftoversize += length2 - 8 * len2; |
1661 | qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); |
1662 | leftoversize = unique(buffer, leftoversize); |
1663 | len += (uint32_t)union_uint16(buffer, leftoversize, array1 + 8 * pos1, |
1664 | length1 - 8 * pos1, output); |
1665 | } |
1666 | return len; |
1667 | } |
1668 | |
1669 | /** |
1670 | * End of the SIMD 16-bit union code |
1671 | * |
1672 | */ |
1673 | |
1674 | /** |
1675 | * Start of SIMD 16-bit XOR code |
1676 | */ |
1677 | |
1678 | // write vector new, while omitting repeated values assuming that previously |
1679 | // written vector was "old" |
1680 | static inline int store_unique_xor(__m128i old, __m128i newval, |
1681 | uint16_t *output) { |
1682 | __m128i vecTmp1 = _mm_alignr_epi8(newval, old, 16 - 4); |
1683 | __m128i vecTmp2 = _mm_alignr_epi8(newval, old, 16 - 2); |
1684 | __m128i equalleft = _mm_cmpeq_epi16(vecTmp2, vecTmp1); |
1685 | __m128i equalright = _mm_cmpeq_epi16(vecTmp2, newval); |
1686 | __m128i equalleftoright = _mm_or_si128(equalleft, equalright); |
1687 | int M = _mm_movemask_epi8( |
1688 | _mm_packs_epi16(equalleftoright, _mm_setzero_si128())); |
1689 | int numberofnewvalues = 8 - _mm_popcnt_u32(M); |
1690 | __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); |
1691 | __m128i val = _mm_shuffle_epi8(vecTmp2, key); |
1692 | _mm_storeu_si128((__m128i *)output, val); |
1693 | return numberofnewvalues; |
1694 | } |
1695 | |
1696 | // working in-place, this function overwrites the repeated values |
1697 | // could be avoided? Warning: assumes len > 0 |
1698 | static inline uint32_t unique_xor(uint16_t *out, uint32_t len) { |
1699 | uint32_t pos = 1; |
1700 | for (uint32_t i = 1; i < len; ++i) { |
1701 | if (out[i] != out[i - 1]) { |
1702 | out[pos++] = out[i]; |
1703 | } else |
1704 | pos--; // if it is identical to previous, delete it |
1705 | } |
1706 | return pos; |
1707 | } |
1708 | |
1709 | // a one-pass SSE xor algorithm |
1710 | uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, |
1711 | const uint16_t *__restrict__ array2, uint32_t length2, |
1712 | uint16_t *__restrict__ output) { |
1713 | if ((length1 < 8) || (length2 < 8)) { |
1714 | return xor_uint16(array1, length1, array2, length2, output); |
1715 | } |
1716 | __m128i vA, vB, V, vecMin, vecMax; |
1717 | __m128i laststore; |
1718 | uint16_t *initoutput = output; |
1719 | uint32_t len1 = length1 / 8; |
1720 | uint32_t len2 = length2 / 8; |
1721 | uint32_t pos1 = 0; |
1722 | uint32_t pos2 = 0; |
1723 | // we start the machine |
1724 | vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); |
1725 | pos1++; |
1726 | vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); |
1727 | pos2++; |
1728 | sse_merge(&vA, &vB, &vecMin, &vecMax); |
1729 | laststore = _mm_set1_epi16(-1); |
1730 | uint16_t buffer[17]; |
1731 | output += store_unique_xor(laststore, vecMin, output); |
1732 | |
1733 | laststore = vecMin; |
1734 | if ((pos1 < len1) && (pos2 < len2)) { |
1735 | uint16_t curA, curB; |
1736 | curA = array1[8 * pos1]; |
1737 | curB = array2[8 * pos2]; |
1738 | while (true) { |
1739 | if (curA <= curB) { |
1740 | V = _mm_lddqu_si128((const __m128i *)array1 + pos1); |
1741 | pos1++; |
1742 | if (pos1 < len1) { |
1743 | curA = array1[8 * pos1]; |
1744 | } else { |
1745 | break; |
1746 | } |
1747 | } else { |
1748 | V = _mm_lddqu_si128((const __m128i *)array2 + pos2); |
1749 | pos2++; |
1750 | if (pos2 < len2) { |
1751 | curB = array2[8 * pos2]; |
1752 | } else { |
1753 | break; |
1754 | } |
1755 | } |
1756 | sse_merge(&V, &vecMax, &vecMin, &vecMax); |
1757 | // conditionally stores the last value of laststore as well as all |
1758 | // but the |
1759 | // last value of vecMin |
1760 | output += store_unique_xor(laststore, vecMin, output); |
1761 | laststore = vecMin; |
1762 | } |
1763 | sse_merge(&V, &vecMax, &vecMin, &vecMax); |
1764 | // conditionally stores the last value of laststore as well as all but |
1765 | // the |
1766 | // last value of vecMin |
1767 | output += store_unique_xor(laststore, vecMin, output); |
1768 | laststore = vecMin; |
1769 | } |
1770 | uint32_t len = (uint32_t)(output - initoutput); |
1771 | |
1772 | // we finish the rest off using a scalar algorithm |
1773 | // could be improved? |
1774 | // conditionally stores the last value of laststore as well as all but the |
1775 | // last value of vecMax, |
1776 | // we store to "buffer" |
1777 | int leftoversize = store_unique_xor(laststore, vecMax, buffer); |
1778 | uint16_t vec7 = _mm_extract_epi16(vecMax, 7); |
1779 | uint16_t vec6 = _mm_extract_epi16(vecMax, 6); |
1780 | if (vec7 != vec6) buffer[leftoversize++] = vec7; |
1781 | if (pos1 == len1) { |
1782 | memcpy(buffer + leftoversize, array1 + 8 * pos1, |
1783 | (length1 - 8 * len1) * sizeof(uint16_t)); |
1784 | leftoversize += length1 - 8 * len1; |
1785 | if (leftoversize == 0) { // trivial case |
1786 | memcpy(output, array2 + 8 * pos2, |
1787 | (length2 - 8 * pos2) * sizeof(uint16_t)); |
1788 | len += (length2 - 8 * pos2); |
1789 | } else { |
1790 | qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); |
1791 | leftoversize = unique_xor(buffer, leftoversize); |
1792 | len += xor_uint16(buffer, leftoversize, array2 + 8 * pos2, |
1793 | length2 - 8 * pos2, output); |
1794 | } |
1795 | } else { |
1796 | memcpy(buffer + leftoversize, array2 + 8 * pos2, |
1797 | (length2 - 8 * len2) * sizeof(uint16_t)); |
1798 | leftoversize += length2 - 8 * len2; |
1799 | if (leftoversize == 0) { // trivial case |
1800 | memcpy(output, array1 + 8 * pos1, |
1801 | (length1 - 8 * pos1) * sizeof(uint16_t)); |
1802 | len += (length1 - 8 * pos1); |
1803 | } else { |
1804 | qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); |
1805 | leftoversize = unique_xor(buffer, leftoversize); |
1806 | len += xor_uint16(buffer, leftoversize, array1 + 8 * pos1, |
1807 | length1 - 8 * pos1, output); |
1808 | } |
1809 | } |
1810 | return len; |
1811 | } |
1812 | |
1813 | /** |
1814 | * End of SIMD 16-bit XOR code |
1815 | */ |
1816 | |
1817 | #endif // USESSE4 |
1818 | |
1819 | size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, |
1820 | size_t size_2, uint32_t *buffer) { |
1821 | size_t pos = 0, idx_1 = 0, idx_2 = 0; |
1822 | |
1823 | if (0 == size_2) { |
1824 | memmove(dest: buffer, src: set_1, n: size_1 * sizeof(uint32_t)); |
1825 | return size_1; |
1826 | } |
1827 | if (0 == size_1) { |
1828 | memmove(dest: buffer, src: set_2, n: size_2 * sizeof(uint32_t)); |
1829 | return size_2; |
1830 | } |
1831 | |
1832 | uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; |
1833 | |
1834 | while (true) { |
1835 | if (val_1 < val_2) { |
1836 | buffer[pos++] = val_1; |
1837 | ++idx_1; |
1838 | if (idx_1 >= size_1) break; |
1839 | val_1 = set_1[idx_1]; |
1840 | } else if (val_2 < val_1) { |
1841 | buffer[pos++] = val_2; |
1842 | ++idx_2; |
1843 | if (idx_2 >= size_2) break; |
1844 | val_2 = set_2[idx_2]; |
1845 | } else { |
1846 | buffer[pos++] = val_1; |
1847 | ++idx_1; |
1848 | ++idx_2; |
1849 | if (idx_1 >= size_1 || idx_2 >= size_2) break; |
1850 | val_1 = set_1[idx_1]; |
1851 | val_2 = set_2[idx_2]; |
1852 | } |
1853 | } |
1854 | |
1855 | if (idx_1 < size_1) { |
1856 | const size_t n_elems = size_1 - idx_1; |
1857 | memmove(dest: buffer + pos, src: set_1 + idx_1, n: n_elems * sizeof(uint32_t)); |
1858 | pos += n_elems; |
1859 | } else if (idx_2 < size_2) { |
1860 | const size_t n_elems = size_2 - idx_2; |
1861 | memmove(dest: buffer + pos, src: set_2 + idx_2, n: n_elems * sizeof(uint32_t)); |
1862 | pos += n_elems; |
1863 | } |
1864 | |
1865 | return pos; |
1866 | } |
1867 | |
1868 | size_t union_uint32_card(const uint32_t *set_1, size_t size_1, |
1869 | const uint32_t *set_2, size_t size_2) { |
1870 | size_t pos = 0, idx_1 = 0, idx_2 = 0; |
1871 | |
1872 | if (0 == size_2) { |
1873 | return size_1; |
1874 | } |
1875 | if (0 == size_1) { |
1876 | return size_2; |
1877 | } |
1878 | |
1879 | uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; |
1880 | |
1881 | while (true) { |
1882 | if (val_1 < val_2) { |
1883 | ++idx_1; |
1884 | ++pos; |
1885 | if (idx_1 >= size_1) break; |
1886 | val_1 = set_1[idx_1]; |
1887 | } else if (val_2 < val_1) { |
1888 | ++idx_2; |
1889 | ++pos; |
1890 | if (idx_2 >= size_2) break; |
1891 | val_2 = set_2[idx_2]; |
1892 | } else { |
1893 | ++idx_1; |
1894 | ++idx_2; |
1895 | ++pos; |
1896 | if (idx_1 >= size_1 || idx_2 >= size_2) break; |
1897 | val_1 = set_1[idx_1]; |
1898 | val_2 = set_2[idx_2]; |
1899 | } |
1900 | } |
1901 | |
1902 | if (idx_1 < size_1) { |
1903 | const size_t n_elems = size_1 - idx_1; |
1904 | pos += n_elems; |
1905 | } else if (idx_2 < size_2) { |
1906 | const size_t n_elems = size_2 - idx_2; |
1907 | pos += n_elems; |
1908 | } |
1909 | return pos; |
1910 | } |
1911 | |
1912 | |
1913 | |
1914 | size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, |
1915 | size_t size_2, uint16_t *buffer) { |
1916 | #ifdef ROARING_VECTOR_OPERATIONS_ENABLED |
1917 | // compute union with smallest array first |
1918 | if (size_1 < size_2) { |
1919 | return union_vector16(set_1, (uint32_t)size_1, |
1920 | set_2, (uint32_t)size_2, buffer); |
1921 | } else { |
1922 | return union_vector16(set_2, (uint32_t)size_2, |
1923 | set_1, (uint32_t)size_1, buffer); |
1924 | } |
1925 | #else |
1926 | // compute union with smallest array first |
1927 | if (size_1 < size_2) { |
1928 | return union_uint16( |
1929 | set_1, size_1, set_2, size_2, buffer); |
1930 | } else { |
1931 | return union_uint16( |
1932 | set_1: set_2, size_1: size_2, set_2: set_1, size_2: size_1, buffer); |
1933 | } |
1934 | #endif |
1935 | } |
1936 | |
1937 | bool memequals(const void *s1, const void *s2, size_t n) { |
1938 | if (n == 0) { |
1939 | return true; |
1940 | } |
1941 | #ifdef USEAVX |
1942 | const uint8_t *ptr1 = (const uint8_t *)s1; |
1943 | const uint8_t *ptr2 = (const uint8_t *)s2; |
1944 | const uint8_t *end1 = ptr1 + n; |
1945 | const uint8_t *end8 = ptr1 + n/8*8; |
1946 | const uint8_t *end32 = ptr1 + n/32*32; |
1947 | |
1948 | while (ptr1 < end32) { |
1949 | __m256i r1 = _mm256_loadu_si256((const __m256i*)ptr1); |
1950 | __m256i r2 = _mm256_loadu_si256((const __m256i*)ptr2); |
1951 | int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2)); |
1952 | if ((uint32_t)mask != UINT32_MAX) { |
1953 | return false; |
1954 | } |
1955 | ptr1 += 32; |
1956 | ptr2 += 32; |
1957 | } |
1958 | |
1959 | while (ptr1 < end8) { |
1960 | uint64_t v1 = *((const uint64_t*)ptr1); |
1961 | uint64_t v2 = *((const uint64_t*)ptr2); |
1962 | if (v1 != v2) { |
1963 | return false; |
1964 | } |
1965 | ptr1 += 8; |
1966 | ptr2 += 8; |
1967 | } |
1968 | |
1969 | while (ptr1 < end1) { |
1970 | if (*ptr1 != *ptr2) { |
1971 | return false; |
1972 | } |
1973 | ptr1++; |
1974 | ptr2++; |
1975 | } |
1976 | |
1977 | return true; |
1978 | #else |
1979 | return memcmp(s1: s1, s2: s2, n: n) == 0; |
1980 | #endif |
1981 | } |
1982 | /* end file src/array_util.c */ |
1983 | /* begin file src/bitset_util.c */ |
1984 | #include <assert.h> |
1985 | #include <stdint.h> |
1986 | #include <stdio.h> |
1987 | #include <stdlib.h> |
1988 | #include <string.h> |
1989 | |
1990 | |
1991 | #ifdef IS_X64 |
1992 | static uint8_t lengthTable[256] = { |
1993 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, |
1994 | 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
1995 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, |
1996 | 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
1997 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, |
1998 | 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
1999 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, |
2000 | 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
2001 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, |
2002 | 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
2003 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; |
2004 | #endif |
2005 | |
2006 | #ifdef USEAVX |
2007 | ALIGNED(32) |
2008 | static uint32_t vecDecodeTable[256][8] = { |
2009 | {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ |
2010 | {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ |
2011 | {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ |
2012 | {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ |
2013 | {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ |
2014 | {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ |
2015 | {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ |
2016 | {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ |
2017 | {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ |
2018 | {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ |
2019 | {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ |
2020 | {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ |
2021 | {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ |
2022 | {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ |
2023 | {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ |
2024 | {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ |
2025 | {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ |
2026 | {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ |
2027 | {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ |
2028 | {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ |
2029 | {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ |
2030 | {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ |
2031 | {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ |
2032 | {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ |
2033 | {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ |
2034 | {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ |
2035 | {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ |
2036 | {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ |
2037 | {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ |
2038 | {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ |
2039 | {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ |
2040 | {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ |
2041 | {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ |
2042 | {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ |
2043 | {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ |
2044 | {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ |
2045 | {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ |
2046 | {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ |
2047 | {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ |
2048 | {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ |
2049 | {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ |
2050 | {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ |
2051 | {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ |
2052 | {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ |
2053 | {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ |
2054 | {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ |
2055 | {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ |
2056 | {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ |
2057 | {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ |
2058 | {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ |
2059 | {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ |
2060 | {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ |
2061 | {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ |
2062 | {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ |
2063 | {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ |
2064 | {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ |
2065 | {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ |
2066 | {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ |
2067 | {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ |
2068 | {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ |
2069 | {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ |
2070 | {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ |
2071 | {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ |
2072 | {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ |
2073 | {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ |
2074 | {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ |
2075 | {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ |
2076 | {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ |
2077 | {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ |
2078 | {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ |
2079 | {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ |
2080 | {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ |
2081 | {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ |
2082 | {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ |
2083 | {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ |
2084 | {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ |
2085 | {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ |
2086 | {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ |
2087 | {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ |
2088 | {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ |
2089 | {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ |
2090 | {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ |
2091 | {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ |
2092 | {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ |
2093 | {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ |
2094 | {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ |
2095 | {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ |
2096 | {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ |
2097 | {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ |
2098 | {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ |
2099 | {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ |
2100 | {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ |
2101 | {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ |
2102 | {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ |
2103 | {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ |
2104 | {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ |
2105 | {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ |
2106 | {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ |
2107 | {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ |
2108 | {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ |
2109 | {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ |
2110 | {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ |
2111 | {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ |
2112 | {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ |
2113 | {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ |
2114 | {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ |
2115 | {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ |
2116 | {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ |
2117 | {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ |
2118 | {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ |
2119 | {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ |
2120 | {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ |
2121 | {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ |
2122 | {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ |
2123 | {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ |
2124 | {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ |
2125 | {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ |
2126 | {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ |
2127 | {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ |
2128 | {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ |
2129 | {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ |
2130 | {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ |
2131 | {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ |
2132 | {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ |
2133 | {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ |
2134 | {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ |
2135 | {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ |
2136 | {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ |
2137 | {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ |
2138 | {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ |
2139 | {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ |
2140 | {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ |
2141 | {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ |
2142 | {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ |
2143 | {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ |
2144 | {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ |
2145 | {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ |
2146 | {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ |
2147 | {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ |
2148 | {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ |
2149 | {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ |
2150 | {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ |
2151 | {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ |
2152 | {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ |
2153 | {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ |
2154 | {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ |
2155 | {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ |
2156 | {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ |
2157 | {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ |
2158 | {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ |
2159 | {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ |
2160 | {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ |
2161 | {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ |
2162 | {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ |
2163 | {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ |
2164 | {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ |
2165 | {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ |
2166 | {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ |
2167 | {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ |
2168 | {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ |
2169 | {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ |
2170 | {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ |
2171 | {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ |
2172 | {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ |
2173 | {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ |
2174 | {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ |
2175 | {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ |
2176 | {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ |
2177 | {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ |
2178 | {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ |
2179 | {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ |
2180 | {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ |
2181 | {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ |
2182 | {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ |
2183 | {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ |
2184 | {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ |
2185 | {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ |
2186 | {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ |
2187 | {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ |
2188 | {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ |
2189 | {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ |
2190 | {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ |
2191 | {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ |
2192 | {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ |
2193 | {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ |
2194 | {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ |
2195 | {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ |
2196 | {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ |
2197 | {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ |
2198 | {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ |
2199 | {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ |
2200 | {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ |
2201 | {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ |
2202 | {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ |
2203 | {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ |
2204 | {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ |
2205 | {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ |
2206 | {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ |
2207 | {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ |
2208 | {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ |
2209 | {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ |
2210 | {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ |
2211 | {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ |
2212 | {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ |
2213 | {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ |
2214 | {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ |
2215 | {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ |
2216 | {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ |
2217 | {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ |
2218 | {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ |
2219 | {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ |
2220 | {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ |
2221 | {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ |
2222 | {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ |
2223 | {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ |
2224 | {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ |
2225 | {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ |
2226 | {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ |
2227 | {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ |
2228 | {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ |
2229 | {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ |
2230 | {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ |
2231 | {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ |
2232 | {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ |
2233 | {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ |
2234 | {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ |
2235 | {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ |
2236 | {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ |
2237 | {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ |
2238 | {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ |
2239 | {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ |
2240 | {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ |
2241 | {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ |
2242 | {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ |
2243 | {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ |
2244 | {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ |
2245 | {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ |
2246 | {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ |
2247 | {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ |
2248 | {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ |
2249 | {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ |
2250 | {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ |
2251 | {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ |
2252 | {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ |
2253 | {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ |
2254 | {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ |
2255 | {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ |
2256 | {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ |
2257 | {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ |
2258 | {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ |
2259 | {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ |
2260 | {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ |
2261 | {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ |
2262 | {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ |
2263 | {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ |
2264 | {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ |
2265 | }; |
2266 | |
2267 | #endif // #ifdef USEAVX |
2268 | |
2269 | #ifdef IS_X64 |
2270 | // same as vecDecodeTable but in 16 bits |
2271 | ALIGNED(32) |
2272 | static uint16_t vecDecodeTable_uint16[256][8] = { |
2273 | {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ |
2274 | {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ |
2275 | {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ |
2276 | {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ |
2277 | {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ |
2278 | {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ |
2279 | {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ |
2280 | {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ |
2281 | {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ |
2282 | {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ |
2283 | {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ |
2284 | {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ |
2285 | {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ |
2286 | {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ |
2287 | {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ |
2288 | {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ |
2289 | {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ |
2290 | {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ |
2291 | {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ |
2292 | {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ |
2293 | {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ |
2294 | {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ |
2295 | {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ |
2296 | {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ |
2297 | {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ |
2298 | {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ |
2299 | {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ |
2300 | {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ |
2301 | {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ |
2302 | {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ |
2303 | {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ |
2304 | {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ |
2305 | {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ |
2306 | {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ |
2307 | {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ |
2308 | {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ |
2309 | {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ |
2310 | {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ |
2311 | {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ |
2312 | {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ |
2313 | {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ |
2314 | {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ |
2315 | {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ |
2316 | {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ |
2317 | {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ |
2318 | {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ |
2319 | {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ |
2320 | {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ |
2321 | {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ |
2322 | {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ |
2323 | {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ |
2324 | {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ |
2325 | {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ |
2326 | {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ |
2327 | {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ |
2328 | {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ |
2329 | {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ |
2330 | {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ |
2331 | {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ |
2332 | {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ |
2333 | {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ |
2334 | {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ |
2335 | {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ |
2336 | {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ |
2337 | {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ |
2338 | {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ |
2339 | {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ |
2340 | {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ |
2341 | {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ |
2342 | {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ |
2343 | {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ |
2344 | {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ |
2345 | {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ |
2346 | {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ |
2347 | {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ |
2348 | {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ |
2349 | {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ |
2350 | {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ |
2351 | {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ |
2352 | {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ |
2353 | {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ |
2354 | {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ |
2355 | {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ |
2356 | {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ |
2357 | {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ |
2358 | {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ |
2359 | {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ |
2360 | {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ |
2361 | {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ |
2362 | {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ |
2363 | {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ |
2364 | {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ |
2365 | {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ |
2366 | {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ |
2367 | {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ |
2368 | {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ |
2369 | {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ |
2370 | {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ |
2371 | {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ |
2372 | {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ |
2373 | {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ |
2374 | {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ |
2375 | {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ |
2376 | {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ |
2377 | {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ |
2378 | {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ |
2379 | {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ |
2380 | {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ |
2381 | {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ |
2382 | {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ |
2383 | {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ |
2384 | {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ |
2385 | {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ |
2386 | {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ |
2387 | {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ |
2388 | {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ |
2389 | {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ |
2390 | {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ |
2391 | {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ |
2392 | {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ |
2393 | {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ |
2394 | {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ |
2395 | {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ |
2396 | {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ |
2397 | {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ |
2398 | {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ |
2399 | {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ |
2400 | {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ |
2401 | {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ |
2402 | {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ |
2403 | {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ |
2404 | {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ |
2405 | {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ |
2406 | {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ |
2407 | {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ |
2408 | {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ |
2409 | {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ |
2410 | {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ |
2411 | {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ |
2412 | {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ |
2413 | {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ |
2414 | {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ |
2415 | {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ |
2416 | {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ |
2417 | {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ |
2418 | {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ |
2419 | {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ |
2420 | {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ |
2421 | {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ |
2422 | {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ |
2423 | {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ |
2424 | {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ |
2425 | {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ |
2426 | {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ |
2427 | {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ |
2428 | {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ |
2429 | {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ |
2430 | {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ |
2431 | {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ |
2432 | {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ |
2433 | {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ |
2434 | {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ |
2435 | {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ |
2436 | {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ |
2437 | {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ |
2438 | {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ |
2439 | {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ |
2440 | {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ |
2441 | {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ |
2442 | {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ |
2443 | {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ |
2444 | {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ |
2445 | {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ |
2446 | {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ |
2447 | {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ |
2448 | {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ |
2449 | {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ |
2450 | {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ |
2451 | {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ |
2452 | {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ |
2453 | {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ |
2454 | {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ |
2455 | {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ |
2456 | {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ |
2457 | {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ |
2458 | {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ |
2459 | {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ |
2460 | {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ |
2461 | {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ |
2462 | {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ |
2463 | {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ |
2464 | {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ |
2465 | {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ |
2466 | {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ |
2467 | {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ |
2468 | {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ |
2469 | {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ |
2470 | {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ |
2471 | {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ |
2472 | {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ |
2473 | {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ |
2474 | {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ |
2475 | {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ |
2476 | {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ |
2477 | {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ |
2478 | {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ |
2479 | {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ |
2480 | {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ |
2481 | {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ |
2482 | {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ |
2483 | {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ |
2484 | {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ |
2485 | {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ |
2486 | {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ |
2487 | {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ |
2488 | {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ |
2489 | {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ |
2490 | {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ |
2491 | {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ |
2492 | {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ |
2493 | {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ |
2494 | {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ |
2495 | {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ |
2496 | {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ |
2497 | {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ |
2498 | {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ |
2499 | {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ |
2500 | {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ |
2501 | {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ |
2502 | {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ |
2503 | {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ |
2504 | {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ |
2505 | {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ |
2506 | {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ |
2507 | {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ |
2508 | {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ |
2509 | {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ |
2510 | {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ |
2511 | {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ |
2512 | {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ |
2513 | {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ |
2514 | {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ |
2515 | {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ |
2516 | {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ |
2517 | {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ |
2518 | {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ |
2519 | {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ |
2520 | {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ |
2521 | {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ |
2522 | {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ |
2523 | {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ |
2524 | {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ |
2525 | {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ |
2526 | {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ |
2527 | {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ |
2528 | {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ |
2529 | }; |
2530 | |
2531 | #endif |
2532 | |
2533 | #ifdef USEAVX |
2534 | |
2535 | size_t bitset_extract_setbits_avx2(uint64_t *array, size_t length, void *vout, |
2536 | size_t outcapacity, uint32_t base) { |
2537 | uint32_t *out = (uint32_t *)vout; |
2538 | uint32_t *initout = out; |
2539 | __m256i baseVec = _mm256_set1_epi32(base - 1); |
2540 | __m256i incVec = _mm256_set1_epi32(64); |
2541 | __m256i add8 = _mm256_set1_epi32(8); |
2542 | uint32_t *safeout = out + outcapacity; |
2543 | size_t i = 0; |
2544 | for (; (i < length) && (out + 64 <= safeout); ++i) { |
2545 | uint64_t w = array[i]; |
2546 | if (w == 0) { |
2547 | baseVec = _mm256_add_epi32(baseVec, incVec); |
2548 | } else { |
2549 | for (int k = 0; k < 4; ++k) { |
2550 | uint8_t byteA = (uint8_t)w; |
2551 | uint8_t byteB = (uint8_t)(w >> 8); |
2552 | w >>= 16; |
2553 | __m256i vecA = |
2554 | _mm256_load_si256((const __m256i *)vecDecodeTable[byteA]); |
2555 | __m256i vecB = |
2556 | _mm256_load_si256((const __m256i *)vecDecodeTable[byteB]); |
2557 | uint8_t advanceA = lengthTable[byteA]; |
2558 | uint8_t advanceB = lengthTable[byteB]; |
2559 | vecA = _mm256_add_epi32(baseVec, vecA); |
2560 | baseVec = _mm256_add_epi32(baseVec, add8); |
2561 | vecB = _mm256_add_epi32(baseVec, vecB); |
2562 | baseVec = _mm256_add_epi32(baseVec, add8); |
2563 | _mm256_storeu_si256((__m256i *)out, vecA); |
2564 | out += advanceA; |
2565 | _mm256_storeu_si256((__m256i *)out, vecB); |
2566 | out += advanceB; |
2567 | } |
2568 | } |
2569 | } |
2570 | base += i * 64; |
2571 | for (; (i < length) && (out < safeout); ++i) { |
2572 | uint64_t w = array[i]; |
2573 | while ((w != 0) && (out < safeout)) { |
2574 | uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) |
2575 | int r = __builtin_ctzll(w); // on x64, should compile to TZCNT |
2576 | uint32_t val = r + base; |
2577 | memcpy(out, &val, |
2578 | sizeof(uint32_t)); // should be compiled as a MOV on x64 |
2579 | out++; |
2580 | w ^= t; |
2581 | } |
2582 | base += 64; |
2583 | } |
2584 | return out - initout; |
2585 | } |
2586 | #endif // USEAVX |
2587 | |
2588 | size_t (uint64_t *bitset, size_t length, void *vout, |
2589 | uint32_t base) { |
2590 | int outpos = 0; |
2591 | uint32_t *out = (uint32_t *)vout; |
2592 | for (size_t i = 0; i < length; ++i) { |
2593 | uint64_t w = bitset[i]; |
2594 | while (w != 0) { |
2595 | uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) |
2596 | int r = __builtin_ctzll(w); // on x64, should compile to TZCNT |
2597 | uint32_t val = r + base; |
2598 | memcpy(dest: out + outpos, src: &val, |
2599 | n: sizeof(uint32_t)); // should be compiled as a MOV on x64 |
2600 | outpos++; |
2601 | w ^= t; |
2602 | } |
2603 | base += 64; |
2604 | } |
2605 | return outpos; |
2606 | } |
2607 | |
2608 | size_t (const uint64_t * __restrict__ bitset1, |
2609 | const uint64_t * __restrict__ bitset2, |
2610 | size_t length, uint16_t *out, |
2611 | uint16_t base) { |
2612 | int outpos = 0; |
2613 | for (size_t i = 0; i < length; ++i) { |
2614 | uint64_t w = bitset1[i] & bitset2[i]; |
2615 | while (w != 0) { |
2616 | uint64_t t = w & (~w + 1); |
2617 | int r = __builtin_ctzll(w); |
2618 | out[outpos++] = r + base; |
2619 | w ^= t; |
2620 | } |
2621 | base += 64; |
2622 | } |
2623 | return outpos; |
2624 | } |
2625 | |
2626 | #ifdef IS_X64 |
2627 | /* |
2628 | * Given a bitset containing "length" 64-bit words, write out the position |
2629 | * of all the set bits to "out" as 16-bit integers, values start at "base" (can |
2630 | *be set to zero). |
2631 | * |
2632 | * The "out" pointer should be sufficient to store the actual number of bits |
2633 | *set. |
2634 | * |
2635 | * Returns how many values were actually decoded. |
2636 | * |
2637 | * This function uses SSE decoding. |
2638 | */ |
2639 | size_t (const uint64_t *bitset, size_t length, |
2640 | uint16_t *out, size_t outcapacity, |
2641 | uint16_t base) { |
2642 | uint16_t *initout = out; |
2643 | __m128i baseVec = _mm_set1_epi16(w: base - 1); |
2644 | __m128i incVec = _mm_set1_epi16(w: 64); |
2645 | __m128i add8 = _mm_set1_epi16(w: 8); |
2646 | uint16_t *safeout = out + outcapacity; |
2647 | const int numberofbytes = 2; // process two bytes at a time |
2648 | size_t i = 0; |
2649 | for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) { |
2650 | uint64_t w = bitset[i]; |
2651 | if (w == 0) { |
2652 | baseVec = _mm_add_epi16(a: baseVec, b: incVec); |
2653 | } else { |
2654 | for (int k = 0; k < 4; ++k) { |
2655 | uint8_t byteA = (uint8_t)w; |
2656 | uint8_t byteB = (uint8_t)(w >> 8); |
2657 | w >>= 16; |
2658 | __m128i vecA = _mm_load_si128( |
2659 | p: (const __m128i *)vecDecodeTable_uint16[byteA]); |
2660 | __m128i vecB = _mm_load_si128( |
2661 | p: (const __m128i *)vecDecodeTable_uint16[byteB]); |
2662 | uint8_t advanceA = lengthTable[byteA]; |
2663 | uint8_t advanceB = lengthTable[byteB]; |
2664 | vecA = _mm_add_epi16(a: baseVec, b: vecA); |
2665 | baseVec = _mm_add_epi16(a: baseVec, b: add8); |
2666 | vecB = _mm_add_epi16(a: baseVec, b: vecB); |
2667 | baseVec = _mm_add_epi16(a: baseVec, b: add8); |
2668 | _mm_storeu_si128(p: (__m128i *)out, b: vecA); |
2669 | out += advanceA; |
2670 | _mm_storeu_si128(p: (__m128i *)out, b: vecB); |
2671 | out += advanceB; |
2672 | } |
2673 | } |
2674 | } |
2675 | base += (uint16_t)(i * 64); |
2676 | for (; (i < length) && (out < safeout); ++i) { |
2677 | uint64_t w = bitset[i]; |
2678 | while ((w != 0) && (out < safeout)) { |
2679 | uint64_t t = w & (~w + 1); |
2680 | int r = __builtin_ctzll(w); |
2681 | *out = r + base; |
2682 | out++; |
2683 | w ^= t; |
2684 | } |
2685 | base += 64; |
2686 | } |
2687 | return out - initout; |
2688 | } |
2689 | #endif |
2690 | |
2691 | /* |
2692 | * Given a bitset containing "length" 64-bit words, write out the position |
2693 | * of all the set bits to "out", values start at "base" (can be set to zero). |
2694 | * |
2695 | * The "out" pointer should be sufficient to store the actual number of bits |
2696 | *set. |
2697 | * |
2698 | * Returns how many values were actually decoded. |
2699 | */ |
2700 | size_t (const uint64_t *bitset, size_t length, |
2701 | uint16_t *out, uint16_t base) { |
2702 | int outpos = 0; |
2703 | for (size_t i = 0; i < length; ++i) { |
2704 | uint64_t w = bitset[i]; |
2705 | while (w != 0) { |
2706 | uint64_t t = w & (~w + 1); |
2707 | int r = __builtin_ctzll(w); |
2708 | out[outpos++] = r + base; |
2709 | w ^= t; |
2710 | } |
2711 | base += 64; |
2712 | } |
2713 | return outpos; |
2714 | } |
2715 | |
2716 | #if defined(ASMBITMANIPOPTIMIZATION) |
2717 | |
2718 | uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, |
2719 | const uint16_t *list, uint64_t length) { |
2720 | uint64_t offset, load, pos; |
2721 | uint64_t shift = 6; |
2722 | const uint16_t *end = list + length; |
2723 | if (!length) return card; |
2724 | // TODO: could unroll for performance, see bitset_set_list |
2725 | // bts is not available as an intrinsic in GCC |
2726 | __asm volatile( |
2727 | "1:\n" |
2728 | "movzwq (%[list]), %[pos]\n" |
2729 | "shrx %[shift], %[pos], %[offset]\n" |
2730 | "mov (%[bitset],%[offset],8), %[load]\n" |
2731 | "bts %[pos], %[load]\n" |
2732 | "mov %[load], (%[bitset],%[offset],8)\n" |
2733 | "sbb $-1, %[card]\n" |
2734 | "add $2, %[list]\n" |
2735 | "cmp %[list], %[end]\n" |
2736 | "jnz 1b" |
2737 | : [card] "+&r" (card), [list] "+&r" (list), [load] "=&r" (load), |
2738 | [pos] "=&r" (pos), [offset] "=&r" (offset) |
2739 | : [end] "r" (end), [bitset] "r" (bitset), [shift] "r" (shift)); |
2740 | return card; |
2741 | } |
2742 | |
2743 | void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) { |
2744 | uint64_t pos; |
2745 | const uint16_t *end = list + length; |
2746 | |
2747 | uint64_t shift = 6; |
2748 | uint64_t offset; |
2749 | uint64_t load; |
2750 | for (; list + 3 < end; list += 4) { |
2751 | pos = list[0]; |
2752 | __asm volatile( |
2753 | "shrx %[shift], %[pos], %[offset]\n" |
2754 | "mov (%[bitset],%[offset],8), %[load]\n" |
2755 | "bts %[pos], %[load]\n" |
2756 | "mov %[load], (%[bitset],%[offset],8)" |
2757 | : [load] "=&r" (load), [offset] "=&r" (offset) |
2758 | : [bitset] "r" (bitset), [shift] "r" (shift), [pos] "r" (pos)); |
2759 | pos = list[1]; |
2760 | __asm volatile( |
2761 | "shrx %[shift], %[pos], %[offset]\n" |
2762 | "mov (%[bitset],%[offset],8), %[load]\n" |
2763 | "bts %[pos], %[load]\n" |
2764 | "mov %[load], (%[bitset],%[offset],8)" |
2765 | : [load] "=&r" (load), [offset] "=&r" (offset) |
2766 | : [bitset] "r" (bitset), [shift] "r" (shift), [pos] "r" (pos)); |
2767 | pos = list[2]; |
2768 | __asm volatile( |
2769 | "shrx %[shift], %[pos], %[offset]\n" |
2770 | "mov (%[bitset],%[offset],8), %[load]\n" |
2771 | "bts %[pos], %[load]\n" |
2772 | "mov %[load], (%[bitset],%[offset],8)" |
2773 | : [load] "=&r" (load), [offset] "=&r" (offset) |
2774 | : [bitset] "r" (bitset), [shift] "r" (shift), [pos] "r" (pos)); |
2775 | pos = list[3]; |
2776 | __asm volatile( |
2777 | "shrx %[shift], %[pos], %[offset]\n" |
2778 | "mov (%[bitset],%[offset],8), %[load]\n" |
2779 | "bts %[pos], %[load]\n" |
2780 | "mov %[load], (%[bitset],%[offset],8)" |
2781 | : [load] "=&r" (load), [offset] "=&r" (offset) |
2782 | : [bitset] "r" (bitset), [shift] "r" (shift), [pos] "r" (pos)); |
2783 | } |
2784 | |
2785 | while (list != end) { |
2786 | pos = list[0]; |
2787 | __asm volatile( |
2788 | "shrx %[shift], %[pos], %[offset]\n" |
2789 | "mov (%[bitset],%[offset],8), %[load]\n" |
2790 | "bts %[pos], %[load]\n" |
2791 | "mov %[load], (%[bitset],%[offset],8)" |
2792 | : [load] "=&r" (load), [offset] "=&r" (offset) |
2793 | : [bitset] "r" (bitset), [shift] "r" (shift), [pos] "r" (pos)); |
2794 | list++; |
2795 | } |
2796 | } |
2797 | |
2798 | uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, |
2799 | uint64_t length) { |
2800 | uint64_t offset, load, pos; |
2801 | uint64_t shift = 6; |
2802 | const uint16_t *end = list + length; |
2803 | if (!length) return card; |
2804 | // btr is not available as an intrinsic in GCC |
2805 | __asm volatile( |
2806 | "1:\n" |
2807 | "movzwq (%[list]), %[pos]\n" |
2808 | "shrx %[shift], %[pos], %[offset]\n" |
2809 | "mov (%[bitset],%[offset],8), %[load]\n" |
2810 | "btr %[pos], %[load]\n" |
2811 | "mov %[load], (%[bitset],%[offset],8)\n" |
2812 | "sbb $0, %[card]\n" |
2813 | "add $2, %[list]\n" |
2814 | "cmp %[list], %[end]\n" |
2815 | "jnz 1b" |
2816 | : [card] "+&r" (card), [list] "+&r" (list), [load] "=&r" (load), |
2817 | [pos] "=&r" (pos), [offset] "=&r" (offset) |
2818 | : [end] "r" (end), [bitset] "r" (bitset), [shift] "r" (shift) |
2819 | : |
2820 | /* clobbers */ "memory" ); |
2821 | return card; |
2822 | } |
2823 | |
2824 | #else |
2825 | uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, |
2826 | uint64_t length) { |
2827 | uint64_t offset, load, newload, pos, index; |
2828 | const uint16_t *end = list + length; |
2829 | while (list != end) { |
2830 | pos = *(const uint16_t *)list; |
2831 | offset = pos >> 6; |
2832 | index = pos % 64; |
2833 | load = ((uint64_t *)bitset)[offset]; |
2834 | newload = load & ~(UINT64_C(1) << index); |
2835 | card -= (load ^ newload) >> index; |
2836 | ((uint64_t *)bitset)[offset] = newload; |
2837 | list++; |
2838 | } |
2839 | return card; |
2840 | } |
2841 | |
2842 | uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, |
2843 | const uint16_t *list, uint64_t length) { |
2844 | uint64_t offset, load, newload, pos, index; |
2845 | const uint16_t *end = list + length; |
2846 | while (list != end) { |
2847 | pos = *(const uint16_t *)list; |
2848 | offset = pos >> 6; |
2849 | index = pos % 64; |
2850 | load = ((uint64_t *)bitset)[offset]; |
2851 | newload = load | (UINT64_C(1) << index); |
2852 | card += (load ^ newload) >> index; |
2853 | ((uint64_t *)bitset)[offset] = newload; |
2854 | list++; |
2855 | } |
2856 | return card; |
2857 | } |
2858 | |
2859 | void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) { |
2860 | uint64_t offset, load, newload, pos, index; |
2861 | const uint16_t *end = list + length; |
2862 | while (list != end) { |
2863 | pos = *(const uint16_t *)list; |
2864 | offset = pos >> 6; |
2865 | index = pos % 64; |
2866 | load = ((uint64_t *)bitset)[offset]; |
2867 | newload = load | (UINT64_C(1) << index); |
2868 | ((uint64_t *)bitset)[offset] = newload; |
2869 | list++; |
2870 | } |
2871 | } |
2872 | |
2873 | #endif |
2874 | |
2875 | /* flip specified bits */ |
2876 | /* TODO: consider whether worthwhile to make an asm version */ |
2877 | |
2878 | uint64_t bitset_flip_list_withcard(void *bitset, uint64_t card, |
2879 | const uint16_t *list, uint64_t length) { |
2880 | uint64_t offset, load, newload, pos, index; |
2881 | const uint16_t *end = list + length; |
2882 | while (list != end) { |
2883 | pos = *(const uint16_t *)list; |
2884 | offset = pos >> 6; |
2885 | index = pos % 64; |
2886 | load = ((uint64_t *)bitset)[offset]; |
2887 | newload = load ^ (UINT64_C(1) << index); |
2888 | // todo: is a branch here all that bad? |
2889 | card += |
2890 | (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1 |
2891 | ((uint64_t *)bitset)[offset] = newload; |
2892 | list++; |
2893 | } |
2894 | return card; |
2895 | } |
2896 | |
2897 | void bitset_flip_list(void *bitset, const uint16_t *list, uint64_t length) { |
2898 | uint64_t offset, load, newload, pos, index; |
2899 | const uint16_t *end = list + length; |
2900 | while (list != end) { |
2901 | pos = *(const uint16_t *)list; |
2902 | offset = pos >> 6; |
2903 | index = pos % 64; |
2904 | load = ((uint64_t *)bitset)[offset]; |
2905 | newload = load ^ (UINT64_C(1) << index); |
2906 | ((uint64_t *)bitset)[offset] = newload; |
2907 | list++; |
2908 | } |
2909 | } |
2910 | /* end file src/bitset_util.c */ |
2911 | /* begin file src/containers/array.c */ |
2912 | /* |
2913 | * array.c |
2914 | * |
2915 | */ |
2916 | |
2917 | #include <assert.h> |
2918 | #include <stdio.h> |
2919 | #include <stdlib.h> |
2920 | |
2921 | /* Create a new array with capacity size. Return NULL in case of failure. */ |
2922 | array_container_t *array_container_create_given_capacity(int32_t size) { |
2923 | array_container_t *container; |
2924 | |
2925 | container = (array_container_t *)malloc(size: sizeof(array_container_t)); |
2926 | assert (container); |
2927 | |
2928 | if( size <= 0 ) { // we don't want to rely on malloc(0) |
2929 | container->array = NULL; |
2930 | } else { |
2931 | container->array = (uint16_t *)malloc(size: sizeof(uint16_t) * size); |
2932 | assert (container->array); |
2933 | } |
2934 | |
2935 | container->capacity = size; |
2936 | container->cardinality = 0; |
2937 | |
2938 | return container; |
2939 | } |
2940 | |
2941 | /* Create a new array. Return NULL in case of failure. */ |
2942 | array_container_t *array_container_create(void) { |
2943 | return array_container_create_given_capacity(size: ARRAY_DEFAULT_INIT_SIZE); |
2944 | } |
2945 | |
2946 | /* Create a new array containing all values in [min,max). */ |
2947 | array_container_t * array_container_create_range(uint32_t min, uint32_t max) { |
2948 | array_container_t * answer = array_container_create_given_capacity(size: max - min + 1); |
2949 | if(answer == NULL) return answer; |
2950 | answer->cardinality = 0; |
2951 | for(uint32_t k = min; k < max; k++) { |
2952 | answer->array[answer->cardinality++] = k; |
2953 | } |
2954 | return answer; |
2955 | } |
2956 | |
2957 | /* Duplicate container */ |
2958 | array_container_t *array_container_clone(const array_container_t *src) { |
2959 | array_container_t *newcontainer = |
2960 | array_container_create_given_capacity(size: src->capacity); |
2961 | if (newcontainer == NULL) return NULL; |
2962 | |
2963 | newcontainer->cardinality = src->cardinality; |
2964 | |
2965 | memcpy(dest: newcontainer->array, src: src->array, |
2966 | n: src->cardinality * sizeof(uint16_t)); |
2967 | |
2968 | return newcontainer; |
2969 | } |
2970 | |
2971 | int array_container_shrink_to_fit(array_container_t *src) { |
2972 | if (src->cardinality == src->capacity) return 0; // nothing to do |
2973 | int savings = src->capacity - src->cardinality; |
2974 | src->capacity = src->cardinality; |
2975 | if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs |
2976 | free(ptr: src->array); |
2977 | src->array = NULL; |
2978 | } else { |
2979 | uint16_t *oldarray = src->array; |
2980 | src->array = |
2981 | (uint16_t *)realloc(ptr: oldarray, size: src->capacity * sizeof(uint16_t)); |
2982 | if (src->array == NULL) free(ptr: oldarray); // should never happen? |
2983 | } |
2984 | return savings; |
2985 | } |
2986 | |
2987 | /* Free memory. */ |
2988 | void array_container_free(array_container_t *arr) { |
2989 | if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise |
2990 | free(ptr: arr->array); |
2991 | arr->array = NULL; // pedantic |
2992 | } |
2993 | free(ptr: arr); |
2994 | } |
2995 | |
2996 | static inline int32_t grow_capacity(int32_t capacity) { |
2997 | return (capacity <= 0) ? ARRAY_DEFAULT_INIT_SIZE |
2998 | : capacity < 64 ? capacity * 2 |
2999 | : capacity < 1024 ? capacity * 3 / 2 |
3000 | : capacity * 5 / 4; |
3001 | } |
3002 | |
3003 | static inline int32_t clamp(int32_t val, int32_t min, int32_t max) { |
3004 | return ((val < min) ? min : (val > max) ? max : val); |
3005 | } |
3006 | |
3007 | void array_container_grow(array_container_t *container, int32_t min, |
3008 | bool preserve) { |
3009 | |
3010 | int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536); |
3011 | int32_t new_capacity = clamp(val: grow_capacity(capacity: container->capacity), min, max); |
3012 | |
3013 | container->capacity = new_capacity; |
3014 | uint16_t *array = container->array; |
3015 | |
3016 | if (preserve) { |
3017 | container->array = |
3018 | (uint16_t *)realloc(ptr: array, size: new_capacity * sizeof(uint16_t)); |
3019 | if (container->array == NULL) free(ptr: array); |
3020 | } else { |
3021 | // Jon Strabala reports that some tools complain otherwise |
3022 | if (array != NULL) { |
3023 | free(ptr: array); |
3024 | } |
3025 | container->array = (uint16_t *)malloc(size: new_capacity * sizeof(uint16_t)); |
3026 | } |
3027 | |
3028 | // handle the case where realloc fails |
3029 | if (container->array == NULL) { |
3030 | fprintf(stderr, format: "could not allocate memory\n" ); |
3031 | } |
3032 | assert(container->array != NULL); |
3033 | } |
3034 | |
3035 | /* Copy one container into another. We assume that they are distinct. */ |
3036 | void array_container_copy(const array_container_t *src, |
3037 | array_container_t *dst) { |
3038 | const int32_t cardinality = src->cardinality; |
3039 | if (cardinality > dst->capacity) { |
3040 | array_container_grow(container: dst, min: cardinality, false); |
3041 | } |
3042 | |
3043 | dst->cardinality = cardinality; |
3044 | memcpy(dest: dst->array, src: src->array, n: cardinality * sizeof(uint16_t)); |
3045 | } |
3046 | |
3047 | void array_container_add_from_range(array_container_t *arr, uint32_t min, |
3048 | uint32_t max, uint16_t step) { |
3049 | for (uint32_t value = min; value < max; value += step) { |
3050 | array_container_append(arr, pos: value); |
3051 | } |
3052 | } |
3053 | |
3054 | /* Computes the union of array1 and array2 and write the result to arrayout. |
3055 | * It is assumed that arrayout is distinct from both array1 and array2. |
3056 | */ |
3057 | void array_container_union(const array_container_t *array_1, |
3058 | const array_container_t *array_2, |
3059 | array_container_t *out) { |
3060 | const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; |
3061 | const int32_t max_cardinality = card_1 + card_2; |
3062 | |
3063 | if (out->capacity < max_cardinality) { |
3064 | array_container_grow(container: out, min: max_cardinality, false); |
3065 | } |
3066 | out->cardinality = (int32_t)fast_union_uint16(set_1: array_1->array, size_1: card_1, |
3067 | set_2: array_2->array, size_2: card_2, buffer: out->array); |
3068 | |
3069 | } |
3070 | |
3071 | /* Computes the difference of array1 and array2 and write the result |
3072 | * to array out. |
3073 | * Array out does not need to be distinct from array_1 |
3074 | */ |
3075 | void array_container_andnot(const array_container_t *array_1, |
3076 | const array_container_t *array_2, |
3077 | array_container_t *out) { |
3078 | if (out->capacity < array_1->cardinality) |
3079 | array_container_grow(container: out, min: array_1->cardinality, false); |
3080 | #ifdef ROARING_VECTOR_OPERATIONS_ENABLED |
3081 | if((out != array_1) && (out != array_2)) { |
3082 | out->cardinality = |
3083 | difference_vector16(array_1->array, array_1->cardinality, |
3084 | array_2->array, array_2->cardinality, out->array); |
3085 | } else { |
3086 | out->cardinality = |
3087 | difference_uint16(array_1->array, array_1->cardinality, array_2->array, |
3088 | array_2->cardinality, out->array); |
3089 | } |
3090 | #else |
3091 | out->cardinality = |
3092 | difference_uint16(a1: array_1->array, length1: array_1->cardinality, a2: array_2->array, |
3093 | length2: array_2->cardinality, a_out: out->array); |
3094 | #endif |
3095 | } |
3096 | |
3097 | /* Computes the symmetric difference of array1 and array2 and write the |
3098 | * result |
3099 | * to arrayout. |
3100 | * It is assumed that arrayout is distinct from both array1 and array2. |
3101 | */ |
3102 | void array_container_xor(const array_container_t *array_1, |
3103 | const array_container_t *array_2, |
3104 | array_container_t *out) { |
3105 | const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; |
3106 | const int32_t max_cardinality = card_1 + card_2; |
3107 | if (out->capacity < max_cardinality) { |
3108 | array_container_grow(container: out, min: max_cardinality, false); |
3109 | } |
3110 | |
3111 | #ifdef ROARING_VECTOR_OPERATIONS_ENABLED |
3112 | out->cardinality = |
3113 | xor_vector16(array_1->array, array_1->cardinality, array_2->array, |
3114 | array_2->cardinality, out->array); |
3115 | #else |
3116 | out->cardinality = |
3117 | xor_uint16(array_1: array_1->array, card_1: array_1->cardinality, array_2: array_2->array, |
3118 | card_2: array_2->cardinality, out: out->array); |
3119 | #endif |
3120 | } |
3121 | |
3122 | static inline int32_t minimum_int32(int32_t a, int32_t b) { |
3123 | return (a < b) ? a : b; |
3124 | } |
3125 | |
3126 | /* computes the intersection of array1 and array2 and write the result to |
3127 | * arrayout. |
3128 | * It is assumed that arrayout is distinct from both array1 and array2. |
3129 | * */ |
3130 | void array_container_intersection(const array_container_t *array1, |
3131 | const array_container_t *array2, |
3132 | array_container_t *out) { |
3133 | int32_t card_1 = array1->cardinality, card_2 = array2->cardinality, |
3134 | min_card = minimum_int32(a: card_1, b: card_2); |
3135 | const int threshold = 64; // subject to tuning |
3136 | #ifdef USEAVX |
3137 | if (out->capacity < min_card) { |
3138 | array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t), |
3139 | false); |
3140 | } |
3141 | #else |
3142 | if (out->capacity < min_card) { |
3143 | array_container_grow(container: out, min: min_card, false); |
3144 | } |
3145 | #endif |
3146 | |
3147 | if (card_1 * threshold < card_2) { |
3148 | out->cardinality = intersect_skewed_uint16( |
3149 | small: array1->array, size_s: card_1, large: array2->array, size_l: card_2, buffer: out->array); |
3150 | } else if (card_2 * threshold < card_1) { |
3151 | out->cardinality = intersect_skewed_uint16( |
3152 | small: array2->array, size_s: card_2, large: array1->array, size_l: card_1, buffer: out->array); |
3153 | } else { |
3154 | #ifdef USEAVX |
3155 | out->cardinality = intersect_vector16( |
3156 | array1->array, card_1, array2->array, card_2, out->array); |
3157 | #else |
3158 | out->cardinality = intersect_uint16(A: array1->array, lenA: card_1, |
3159 | B: array2->array, lenB: card_2, out: out->array); |
3160 | #endif |
3161 | } |
3162 | } |
3163 | |
3164 | /* computes the size of the intersection of array1 and array2 |
3165 | * */ |
3166 | int array_container_intersection_cardinality(const array_container_t *array1, |
3167 | const array_container_t *array2) { |
3168 | int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; |
3169 | const int threshold = 64; // subject to tuning |
3170 | if (card_1 * threshold < card_2) { |
3171 | return intersect_skewed_uint16_cardinality(small: array1->array, size_s: card_1, |
3172 | large: array2->array, size_l: card_2); |
3173 | } else if (card_2 * threshold < card_1) { |
3174 | return intersect_skewed_uint16_cardinality(small: array2->array, size_s: card_2, |
3175 | large: array1->array, size_l: card_1); |
3176 | } else { |
3177 | #ifdef USEAVX |
3178 | return intersect_vector16_cardinality(array1->array, card_1, |
3179 | array2->array, card_2); |
3180 | #else |
3181 | return intersect_uint16_cardinality(A: array1->array, lenA: card_1, |
3182 | B: array2->array, lenB: card_2); |
3183 | #endif |
3184 | } |
3185 | } |
3186 | |
3187 | bool array_container_intersect(const array_container_t *array1, |
3188 | const array_container_t *array2) { |
3189 | int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; |
3190 | const int threshold = 64; // subject to tuning |
3191 | if (card_1 * threshold < card_2) { |
3192 | return intersect_skewed_uint16_nonempty( |
3193 | small: array1->array, size_s: card_1, large: array2->array, size_l: card_2); |
3194 | } else if (card_2 * threshold < card_1) { |
3195 | return intersect_skewed_uint16_nonempty( |
3196 | small: array2->array, size_s: card_2, large: array1->array, size_l: card_1); |
3197 | } else { |
3198 | // we do not bother vectorizing |
3199 | return intersect_uint16_nonempty(A: array1->array, lenA: card_1, |
3200 | B: array2->array, lenB: card_2); |
3201 | } |
3202 | } |
3203 | |
3204 | /* computes the intersection of array1 and array2 and write the result to |
3205 | * array1. |
3206 | * */ |
3207 | void array_container_intersection_inplace(array_container_t *src_1, |
3208 | const array_container_t *src_2) { |
3209 | // todo: can any of this be vectorized? |
3210 | int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality; |
3211 | const int threshold = 64; // subject to tuning |
3212 | if (card_1 * threshold < card_2) { |
3213 | src_1->cardinality = intersect_skewed_uint16( |
3214 | small: src_1->array, size_s: card_1, large: src_2->array, size_l: card_2, buffer: src_1->array); |
3215 | } else if (card_2 * threshold < card_1) { |
3216 | src_1->cardinality = intersect_skewed_uint16( |
3217 | small: src_2->array, size_s: card_2, large: src_1->array, size_l: card_1, buffer: src_1->array); |
3218 | } else { |
3219 | src_1->cardinality = intersect_uint16( |
3220 | A: src_1->array, lenA: card_1, B: src_2->array, lenB: card_2, out: src_1->array); |
3221 | } |
3222 | } |
3223 | |
3224 | int array_container_to_uint32_array(void *vout, const array_container_t *cont, |
3225 | uint32_t base) { |
3226 | int outpos = 0; |
3227 | uint32_t *out = (uint32_t *)vout; |
3228 | for (int i = 0; i < cont->cardinality; ++i) { |
3229 | const uint32_t val = base + cont->array[i]; |
3230 | memcpy(dest: out + outpos, src: &val, |
3231 | n: sizeof(uint32_t)); // should be compiled as a MOV on x64 |
3232 | outpos++; |
3233 | } |
3234 | return outpos; |
3235 | } |
3236 | |
3237 | void array_container_printf(const array_container_t *v) { |
3238 | if (v->cardinality == 0) { |
3239 | printf(format: "{}" ); |
3240 | return; |
3241 | } |
3242 | printf(format: "{" ); |
3243 | printf(format: "%d" , v->array[0]); |
3244 | for (int i = 1; i < v->cardinality; ++i) { |
3245 | printf(format: ",%d" , v->array[i]); |
3246 | } |
3247 | printf(format: "}" ); |
3248 | } |
3249 | |
3250 | void array_container_printf_as_uint32_array(const array_container_t *v, |
3251 | uint32_t base) { |
3252 | if (v->cardinality == 0) { |
3253 | return; |
3254 | } |
3255 | printf(format: "%u" , v->array[0] + base); |
3256 | for (int i = 1; i < v->cardinality; ++i) { |
3257 | printf(format: ",%u" , v->array[i] + base); |
3258 | } |
3259 | } |
3260 | |
3261 | /* Compute the number of runs */ |
3262 | int32_t array_container_number_of_runs(const array_container_t *a) { |
3263 | // Can SIMD work here? |
3264 | int32_t nr_runs = 0; |
3265 | int32_t prev = -2; |
3266 | for (const uint16_t *p = a->array; p != a->array + a->cardinality; ++p) { |
3267 | if (*p != prev + 1) nr_runs++; |
3268 | prev = *p; |
3269 | } |
3270 | return nr_runs; |
3271 | } |
3272 | |
3273 | int32_t array_container_serialize(const array_container_t *container, char *buf) { |
3274 | int32_t l, off; |
3275 | uint16_t cardinality = (uint16_t)container->cardinality; |
3276 | |
3277 | memcpy(dest: buf, src: &cardinality, n: off = sizeof(cardinality)); |
3278 | l = sizeof(uint16_t) * container->cardinality; |
3279 | if (l) memcpy(dest: &buf[off], src: container->array, n: l); |
3280 | |
3281 | return (off + l); |
3282 | } |
3283 | |
3284 | /** |
3285 | * Writes the underlying array to buf, outputs how many bytes were written. |
3286 | * The number of bytes written should be |
3287 | * array_container_size_in_bytes(container). |
3288 | * |
3289 | */ |
3290 | int32_t array_container_write(const array_container_t *container, char *buf) { |
3291 | memcpy(dest: buf, src: container->array, n: container->cardinality * sizeof(uint16_t)); |
3292 | return array_container_size_in_bytes(container); |
3293 | } |
3294 | |
3295 | bool array_container_is_subset(const array_container_t *container1, |
3296 | const array_container_t *container2) { |
3297 | if (container1->cardinality > container2->cardinality) { |
3298 | return false; |
3299 | } |
3300 | int i1 = 0, i2 = 0; |
3301 | while (i1 < container1->cardinality && i2 < container2->cardinality) { |
3302 | if (container1->array[i1] == container2->array[i2]) { |
3303 | i1++; |
3304 | i2++; |
3305 | } else if (container1->array[i1] > container2->array[i2]) { |
3306 | i2++; |
3307 | } else { // container1->array[i1] < container2->array[i2] |
3308 | return false; |
3309 | } |
3310 | } |
3311 | if (i1 == container1->cardinality) { |
3312 | return true; |
3313 | } else { |
3314 | return false; |
3315 | } |
3316 | } |
3317 | |
3318 | int32_t array_container_read(int32_t cardinality, array_container_t *container, |
3319 | const char *buf) { |
3320 | if (container->capacity < cardinality) { |
3321 | array_container_grow(container, min: cardinality, false); |
3322 | } |
3323 | container->cardinality = cardinality; |
3324 | memcpy(dest: container->array, src: buf, n: container->cardinality * sizeof(uint16_t)); |
3325 | |
3326 | return array_container_size_in_bytes(container); |
3327 | } |
3328 | |
3329 | uint32_t array_container_serialization_len(const array_container_t *container) { |
3330 | return (sizeof(uint16_t) /* container->cardinality converted to 16 bit */ + |
3331 | (sizeof(uint16_t) * container->cardinality)); |
3332 | } |
3333 | |
3334 | void *array_container_deserialize(const char *buf, size_t buf_len) { |
3335 | array_container_t *ptr; |
3336 | |
3337 | if (buf_len < 2) /* capacity converted to 16 bit */ |
3338 | return (NULL); |
3339 | else |
3340 | buf_len -= 2; |
3341 | |
3342 | if ((ptr = (array_container_t *)malloc(size: sizeof(array_container_t))) != |
3343 | NULL) { |
3344 | size_t len; |
3345 | int32_t off; |
3346 | uint16_t cardinality; |
3347 | |
3348 | memcpy(dest: &cardinality, src: buf, n: off = sizeof(cardinality)); |
3349 | |
3350 | ptr->capacity = ptr->cardinality = (uint32_t)cardinality; |
3351 | len = sizeof(uint16_t) * ptr->cardinality; |
3352 | |
3353 | if (len != buf_len) { |
3354 | free(ptr: ptr); |
3355 | return (NULL); |
3356 | } |
3357 | |
3358 | if ((ptr->array = (uint16_t *)malloc(size: sizeof(uint16_t) * |
3359 | ptr->capacity)) == NULL) { |
3360 | free(ptr: ptr); |
3361 | return (NULL); |
3362 | } |
3363 | |
3364 | if (len) memcpy(dest: ptr->array, src: &buf[off], n: len); |
3365 | |
3366 | /* Check if returned values are monotonically increasing */ |
3367 | for (int32_t i = 0, j = 0; i < ptr->cardinality; i++) { |
3368 | if (ptr->array[i] < j) { |
3369 | free(ptr: ptr->array); |
3370 | free(ptr: ptr); |
3371 | return (NULL); |
3372 | } else |
3373 | j = ptr->array[i]; |
3374 | } |
3375 | } |
3376 | |
3377 | return (ptr); |
3378 | } |
3379 | |
3380 | bool array_container_iterate(const array_container_t *cont, uint32_t base, |
3381 | roaring_iterator iterator, void *ptr) { |
3382 | for (int i = 0; i < cont->cardinality; i++) |
3383 | if (!iterator(cont->array[i] + base, ptr)) return false; |
3384 | return true; |
3385 | } |
3386 | |
3387 | bool array_container_iterate64(const array_container_t *cont, uint32_t base, |
3388 | roaring_iterator64 iterator, uint64_t high_bits, |
3389 | void *ptr) { |
3390 | for (int i = 0; i < cont->cardinality; i++) |
3391 | if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr)) |
3392 | return false; |
3393 | return true; |
3394 | } |
3395 | /* end file src/containers/array.c */ |
3396 | /* begin file src/containers/bitset.c */ |
3397 | /* |
3398 | * bitset.c |
3399 | * |
3400 | */ |
3401 | #ifndef _POSIX_C_SOURCE |
3402 | #define _POSIX_C_SOURCE 200809L |
3403 | #endif |
3404 | #include <assert.h> |
3405 | #include <stdio.h> |
3406 | #include <stdlib.h> |
3407 | #include <string.h> |
3408 | |
3409 | |
3410 | void bitset_container_clear(bitset_container_t *bitset) { |
3411 | memset(s: bitset->array, c: 0, n: sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3412 | bitset->cardinality = 0; |
3413 | } |
3414 | |
3415 | void bitset_container_set_all(bitset_container_t *bitset) { |
3416 | memset(s: bitset->array, INT64_C(-1), |
3417 | n: sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3418 | bitset->cardinality = (1 << 16); |
3419 | } |
3420 | |
3421 | |
3422 | |
3423 | /* Create a new bitset. Return NULL in case of failure. */ |
3424 | bitset_container_t *bitset_container_create(void) { |
3425 | bitset_container_t *bitset = |
3426 | (bitset_container_t *)malloc(size: sizeof(bitset_container_t)); |
3427 | |
3428 | if (!bitset) { |
3429 | return NULL; |
3430 | } |
3431 | // sizeof(__m256i) == 32 |
3432 | bitset->array = (uint64_t *)roaring_bitmap_aligned_malloc( |
3433 | alignment: 32, size: sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3434 | if (!bitset->array) { |
3435 | free(ptr: bitset); |
3436 | return NULL; |
3437 | } |
3438 | bitset_container_clear(bitset); |
3439 | return bitset; |
3440 | } |
3441 | |
3442 | /* Copy one container into another. We assume that they are distinct. */ |
3443 | void bitset_container_copy(const bitset_container_t *source, |
3444 | bitset_container_t *dest) { |
3445 | dest->cardinality = source->cardinality; |
3446 | memcpy(dest: dest->array, src: source->array, |
3447 | n: sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3448 | } |
3449 | |
3450 | void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, |
3451 | uint32_t max, uint16_t step) { |
3452 | if (step == 0) return; // refuse to crash |
3453 | if ((64 % step) == 0) { // step divides 64 |
3454 | uint64_t mask = 0; // construct the repeated mask |
3455 | for (uint32_t value = (min % step); value < 64; value += step) { |
3456 | mask |= ((uint64_t)1 << value); |
3457 | } |
3458 | uint32_t firstword = min / 64; |
3459 | uint32_t endword = (max - 1) / 64; |
3460 | bitset->cardinality = (max - min + step - 1) / step; |
3461 | if (firstword == endword) { |
3462 | bitset->array[firstword] |= |
3463 | mask & (((~UINT64_C(0)) << (min % 64)) & |
3464 | ((~UINT64_C(0)) >> ((~max + 1) % 64))); |
3465 | return; |
3466 | } |
3467 | bitset->array[firstword] = mask & ((~UINT64_C(0)) << (min % 64)); |
3468 | for (uint32_t i = firstword + 1; i < endword; i++) |
3469 | bitset->array[i] = mask; |
3470 | bitset->array[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64)); |
3471 | } else { |
3472 | for (uint32_t value = min; value < max; value += step) { |
3473 | bitset_container_add(bitset, pos: value); |
3474 | } |
3475 | } |
3476 | } |
3477 | |
3478 | /* Free memory. */ |
3479 | void bitset_container_free(bitset_container_t *bitset) { |
3480 | if(bitset->array != NULL) {// Jon Strabala reports that some tools complain otherwise |
3481 | roaring_bitmap_aligned_free(memblock: bitset->array); |
3482 | bitset->array = NULL; // pedantic |
3483 | } |
3484 | free(ptr: bitset); |
3485 | } |
3486 | |
3487 | /* duplicate container. */ |
3488 | bitset_container_t *bitset_container_clone(const bitset_container_t *src) { |
3489 | bitset_container_t *bitset = |
3490 | (bitset_container_t *)malloc(size: sizeof(bitset_container_t)); |
3491 | assert(bitset); |
3492 | |
3493 | // sizeof(__m256i) == 32 |
3494 | bitset->array = (uint64_t *)roaring_bitmap_aligned_malloc( |
3495 | alignment: 32, size: sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3496 | assert(bitset->array); |
3497 | bitset->cardinality = src->cardinality; |
3498 | memcpy(dest: bitset->array, src: src->array, |
3499 | n: sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3500 | return bitset; |
3501 | } |
3502 | |
3503 | void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, |
3504 | uint32_t end) { |
3505 | bitset_set_range(bitmap: bitset->array, start: begin, end); |
3506 | bitset->cardinality = |
3507 | bitset_container_compute_cardinality(bitset); // could be smarter |
3508 | } |
3509 | |
3510 | |
3511 | bool bitset_container_intersect(const bitset_container_t *src_1, |
3512 | const bitset_container_t *src_2) { |
3513 | // could vectorize, but this is probably already quite fast in practice |
3514 | const uint64_t * __restrict__ array_1 = src_1->array; |
3515 | const uint64_t * __restrict__ array_2 = src_2->array; |
3516 | for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { |
3517 | if((array_1[i] & array_2[i]) != 0) return true; |
3518 | } |
3519 | return false; |
3520 | } |
3521 | |
3522 | |
3523 | #ifdef USEAVX |
3524 | #ifndef WORDS_IN_AVX2_REG |
3525 | #define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) |
3526 | #endif |
3527 | /* Get the number of bits set (force computation) */ |
3528 | int bitset_container_compute_cardinality(const bitset_container_t *bitset) { |
3529 | return (int) avx2_harley_seal_popcount256( |
3530 | (const __m256i *)bitset->array, |
3531 | BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); |
3532 | } |
3533 | |
3534 | #elif defined(USENEON) |
3535 | int bitset_container_compute_cardinality(const bitset_container_t *bitset) { |
3536 | uint16x8_t n0 = vdupq_n_u16(0); |
3537 | uint16x8_t n1 = vdupq_n_u16(0); |
3538 | uint16x8_t n2 = vdupq_n_u16(0); |
3539 | uint16x8_t n3 = vdupq_n_u16(0); |
3540 | for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { |
3541 | uint64x2_t c0 = vld1q_u64(&bitset->array[i + 0]); |
3542 | n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); |
3543 | uint64x2_t c1 = vld1q_u64(&bitset->array[i + 2]); |
3544 | n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); |
3545 | uint64x2_t c2 = vld1q_u64(&bitset->array[i + 4]); |
3546 | n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); |
3547 | uint64x2_t c3 = vld1q_u64(&bitset->array[i + 6]); |
3548 | n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); |
3549 | } |
3550 | uint64x2_t n = vdupq_n_u64(0); |
3551 | n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); |
3552 | n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); |
3553 | n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); |
3554 | n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); |
3555 | return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); |
3556 | } |
3557 | |
3558 | #else |
3559 | |
3560 | /* Get the number of bits set (force computation) */ |
3561 | int bitset_container_compute_cardinality(const bitset_container_t *bitset) { |
3562 | const uint64_t *array = bitset->array; |
3563 | int32_t sum = 0; |
3564 | for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) { |
3565 | sum += hamming(x: array[i]); |
3566 | sum += hamming(x: array[i + 1]); |
3567 | sum += hamming(x: array[i + 2]); |
3568 | sum += hamming(x: array[i + 3]); |
3569 | } |
3570 | return sum; |
3571 | } |
3572 | |
3573 | #endif |
3574 | |
3575 | #ifdef USEAVX |
3576 | |
3577 | #define BITSET_CONTAINER_FN_REPEAT 8 |
3578 | #ifndef WORDS_IN_AVX2_REG |
3579 | #define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) |
3580 | #endif |
3581 | #define LOOP_SIZE \ |
3582 | BITSET_CONTAINER_SIZE_IN_WORDS / \ |
3583 | ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT) |
3584 | |
3585 | /* Computes a binary operation (eg union) on bitset1 and bitset2 and write the |
3586 | result to bitsetout */ |
3587 | // clang-format off |
3588 | #define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ |
3589 | int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ |
3590 | const bitset_container_t *src_2, \ |
3591 | bitset_container_t *dst) { \ |
3592 | const uint8_t * __restrict__ array_1 = (const uint8_t *)src_1->array; \ |
3593 | const uint8_t * __restrict__ array_2 = (const uint8_t *)src_2->array; \ |
3594 | /* not using the blocking optimization for some reason*/ \ |
3595 | uint8_t *out = (uint8_t*)dst->array; \ |
3596 | const int innerloop = 8; \ |
3597 | for (size_t i = 0; \ |
3598 | i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \ |
3599 | i+=innerloop) {\ |
3600 | __m256i A1, A2, AO; \ |
3601 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1)); \ |
3602 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2)); \ |
3603 | AO = avx_intrinsic(A2, A1); \ |
3604 | _mm256_storeu_si256((__m256i *)out, AO); \ |
3605 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 32)); \ |
3606 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 32)); \ |
3607 | AO = avx_intrinsic(A2, A1); \ |
3608 | _mm256_storeu_si256((__m256i *)(out+32), AO); \ |
3609 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 64)); \ |
3610 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 64)); \ |
3611 | AO = avx_intrinsic(A2, A1); \ |
3612 | _mm256_storeu_si256((__m256i *)(out+64), AO); \ |
3613 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 96)); \ |
3614 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 96)); \ |
3615 | AO = avx_intrinsic(A2, A1); \ |
3616 | _mm256_storeu_si256((__m256i *)(out+96), AO); \ |
3617 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 128)); \ |
3618 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 128)); \ |
3619 | AO = avx_intrinsic(A2, A1); \ |
3620 | _mm256_storeu_si256((__m256i *)(out+128), AO); \ |
3621 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 160)); \ |
3622 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 160)); \ |
3623 | AO = avx_intrinsic(A2, A1); \ |
3624 | _mm256_storeu_si256((__m256i *)(out+160), AO); \ |
3625 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 192)); \ |
3626 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 192)); \ |
3627 | AO = avx_intrinsic(A2, A1); \ |
3628 | _mm256_storeu_si256((__m256i *)(out+192), AO); \ |
3629 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 224)); \ |
3630 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 224)); \ |
3631 | AO = avx_intrinsic(A2, A1); \ |
3632 | _mm256_storeu_si256((__m256i *)(out+224), AO); \ |
3633 | out+=256; \ |
3634 | array_1 += 256; \ |
3635 | array_2 += 256; \ |
3636 | } \ |
3637 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ |
3638 | return dst->cardinality; \ |
3639 | } \ |
3640 | /* next, a version that updates cardinality*/ \ |
3641 | int bitset_container_##opname(const bitset_container_t *src_1, \ |
3642 | const bitset_container_t *src_2, \ |
3643 | bitset_container_t *dst) { \ |
3644 | const __m256i * __restrict__ array_1 = (const __m256i *) src_1->array; \ |
3645 | const __m256i * __restrict__ array_2 = (const __m256i *) src_2->array; \ |
3646 | __m256i *out = (__m256i *) dst->array; \ |
3647 | dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname(array_2,\ |
3648 | array_1, out,BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\ |
3649 | return dst->cardinality; \ |
3650 | } \ |
3651 | /* next, a version that just computes the cardinality*/ \ |
3652 | int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ |
3653 | const bitset_container_t *src_2) { \ |
3654 | const __m256i * __restrict__ data1 = (const __m256i *) src_1->array; \ |
3655 | const __m256i * __restrict__ data2 = (const __m256i *) src_2->array; \ |
3656 | return (int)avx2_harley_seal_popcount256_##opname(data2, \ |
3657 | data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\ |
3658 | } |
3659 | |
3660 | #elif defined(USENEON) |
3661 | |
3662 | #define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ |
3663 | int bitset_container_##opname(const bitset_container_t *src_1, \ |
3664 | const bitset_container_t *src_2, \ |
3665 | bitset_container_t *dst) { \ |
3666 | const uint64_t * __restrict__ array_1 = src_1->array; \ |
3667 | const uint64_t * __restrict__ array_2 = src_2->array; \ |
3668 | uint64_t *out = dst->array; \ |
3669 | uint16x8_t n0 = vdupq_n_u16(0); \ |
3670 | uint16x8_t n1 = vdupq_n_u16(0); \ |
3671 | uint16x8_t n2 = vdupq_n_u16(0); \ |
3672 | uint16x8_t n3 = vdupq_n_u16(0); \ |
3673 | for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ |
3674 | uint64x2_t c0 = neon_intrinsic(vld1q_u64(&array_1[i + 0]), \ |
3675 | vld1q_u64(&array_2[i + 0])); \ |
3676 | n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \ |
3677 | vst1q_u64(&out[i + 0], c0); \ |
3678 | uint64x2_t c1 = neon_intrinsic(vld1q_u64(&array_1[i + 2]), \ |
3679 | vld1q_u64(&array_2[i + 2])); \ |
3680 | n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \ |
3681 | vst1q_u64(&out[i + 2], c1); \ |
3682 | uint64x2_t c2 = neon_intrinsic(vld1q_u64(&array_1[i + 4]), \ |
3683 | vld1q_u64(&array_2[i + 4])); \ |
3684 | n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \ |
3685 | vst1q_u64(&out[i + 4], c2); \ |
3686 | uint64x2_t c3 = neon_intrinsic(vld1q_u64(&array_1[i + 6]), \ |
3687 | vld1q_u64(&array_2[i + 6])); \ |
3688 | n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \ |
3689 | vst1q_u64(&out[i + 6], c3); \ |
3690 | } \ |
3691 | uint64x2_t n = vdupq_n_u64(0); \ |
3692 | n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \ |
3693 | n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \ |
3694 | n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \ |
3695 | n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \ |
3696 | dst->cardinality = vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \ |
3697 | return dst->cardinality; \ |
3698 | } \ |
3699 | int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ |
3700 | const bitset_container_t *src_2, \ |
3701 | bitset_container_t *dst) { \ |
3702 | const uint64_t * __restrict__ array_1 = src_1->array; \ |
3703 | const uint64_t * __restrict__ array_2 = src_2->array; \ |
3704 | uint64_t *out = dst->array; \ |
3705 | for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ |
3706 | vst1q_u64(&out[i + 0], neon_intrinsic(vld1q_u64(&array_1[i + 0]), \ |
3707 | vld1q_u64(&array_2[i + 0]))); \ |
3708 | vst1q_u64(&out[i + 2], neon_intrinsic(vld1q_u64(&array_1[i + 2]), \ |
3709 | vld1q_u64(&array_2[i + 2]))); \ |
3710 | vst1q_u64(&out[i + 4], neon_intrinsic(vld1q_u64(&array_1[i + 4]), \ |
3711 | vld1q_u64(&array_2[i + 4]))); \ |
3712 | vst1q_u64(&out[i + 6], neon_intrinsic(vld1q_u64(&array_1[i + 6]), \ |
3713 | vld1q_u64(&array_2[i + 6]))); \ |
3714 | } \ |
3715 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ |
3716 | return dst->cardinality; \ |
3717 | } \ |
3718 | int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ |
3719 | const bitset_container_t *src_2) { \ |
3720 | const uint64_t * __restrict__ array_1 = src_1->array; \ |
3721 | const uint64_t * __restrict__ array_2 = src_2->array; \ |
3722 | uint16x8_t n0 = vdupq_n_u16(0); \ |
3723 | uint16x8_t n1 = vdupq_n_u16(0); \ |
3724 | uint16x8_t n2 = vdupq_n_u16(0); \ |
3725 | uint16x8_t n3 = vdupq_n_u16(0); \ |
3726 | for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ |
3727 | uint64x2_t c0 = neon_intrinsic(vld1q_u64(&array_1[i + 0]), \ |
3728 | vld1q_u64(&array_2[i + 0])); \ |
3729 | n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \ |
3730 | uint64x2_t c1 = neon_intrinsic(vld1q_u64(&array_1[i + 2]), \ |
3731 | vld1q_u64(&array_2[i + 2])); \ |
3732 | n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \ |
3733 | uint64x2_t c2 = neon_intrinsic(vld1q_u64(&array_1[i + 4]), \ |
3734 | vld1q_u64(&array_2[i + 4])); \ |
3735 | n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \ |
3736 | uint64x2_t c3 = neon_intrinsic(vld1q_u64(&array_1[i + 6]), \ |
3737 | vld1q_u64(&array_2[i + 6])); \ |
3738 | n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \ |
3739 | } \ |
3740 | uint64x2_t n = vdupq_n_u64(0); \ |
3741 | n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \ |
3742 | n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \ |
3743 | n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \ |
3744 | n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \ |
3745 | return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \ |
3746 | } |
3747 | |
3748 | #else /* not USEAVX */ |
3749 | |
3750 | #define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ |
3751 | int bitset_container_##opname(const bitset_container_t *src_1, \ |
3752 | const bitset_container_t *src_2, \ |
3753 | bitset_container_t *dst) { \ |
3754 | const uint64_t * __restrict__ array_1 = src_1->array; \ |
3755 | const uint64_t * __restrict__ array_2 = src_2->array; \ |
3756 | uint64_t *out = dst->array; \ |
3757 | int32_t sum = 0; \ |
3758 | for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ |
3759 | const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \ |
3760 | word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \ |
3761 | out[i] = word_1; \ |
3762 | out[i + 1] = word_2; \ |
3763 | sum += hamming(word_1); \ |
3764 | sum += hamming(word_2); \ |
3765 | } \ |
3766 | dst->cardinality = sum; \ |
3767 | return dst->cardinality; \ |
3768 | } \ |
3769 | int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ |
3770 | const bitset_container_t *src_2, \ |
3771 | bitset_container_t *dst) { \ |
3772 | const uint64_t * __restrict__ array_1 = src_1->array; \ |
3773 | const uint64_t * __restrict__ array_2 = src_2->array; \ |
3774 | uint64_t *out = dst->array; \ |
3775 | for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \ |
3776 | out[i] = (array_1[i])opsymbol(array_2[i]); \ |
3777 | } \ |
3778 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ |
3779 | return dst->cardinality; \ |
3780 | } \ |
3781 | int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ |
3782 | const bitset_container_t *src_2) { \ |
3783 | const uint64_t * __restrict__ array_1 = src_1->array; \ |
3784 | const uint64_t * __restrict__ array_2 = src_2->array; \ |
3785 | int32_t sum = 0; \ |
3786 | for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ |
3787 | const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \ |
3788 | word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \ |
3789 | sum += hamming(word_1); \ |
3790 | sum += hamming(word_2); \ |
3791 | } \ |
3792 | return sum; \ |
3793 | } |
3794 | |
3795 | #endif |
3796 | |
3797 | // we duplicate the function because other containers use the "or" term, makes API more consistent |
3798 | BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64) |
3799 | BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64) |
3800 | |
3801 | // we duplicate the function because other containers use the "intersection" term, makes API more consistent |
3802 | BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64) |
3803 | BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64) |
3804 | |
3805 | BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64) |
3806 | BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64) |
3807 | // clang-format On |
3808 | |
3809 | |
3810 | |
3811 | int bitset_container_to_uint32_array( void *vout, const bitset_container_t *cont, uint32_t base) { |
3812 | #ifdef USEAVX2FORDECODING |
3813 | if(cont->cardinality >= 8192)// heuristic |
3814 | return (int) bitset_extract_setbits_avx2(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,cont->cardinality,base); |
3815 | else |
3816 | return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base); |
3817 | #else |
3818 | return (int) bitset_extract_setbits(bitset: cont->array, length: BITSET_CONTAINER_SIZE_IN_WORDS, vout,base); |
3819 | #endif |
3820 | } |
3821 | |
3822 | /* |
3823 | * Print this container using printf (useful for debugging). |
3824 | */ |
3825 | void bitset_container_printf(const bitset_container_t * v) { |
3826 | printf(format: "{" ); |
3827 | uint32_t base = 0; |
3828 | bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable |
3829 | for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { |
3830 | uint64_t w = v->array[i]; |
3831 | while (w != 0) { |
3832 | uint64_t t = w & (~w + 1); |
3833 | int r = __builtin_ctzll(w); |
3834 | if(iamfirst) {// predicted to be false |
3835 | printf(format: "%u" ,base + r); |
3836 | iamfirst = false; |
3837 | } else { |
3838 | printf(format: ",%u" ,base + r); |
3839 | } |
3840 | w ^= t; |
3841 | } |
3842 | base += 64; |
3843 | } |
3844 | printf(format: "}" ); |
3845 | } |
3846 | |
3847 | |
3848 | /* |
3849 | * Print this container using printf as a comma-separated list of 32-bit integers starting at base. |
3850 | */ |
3851 | void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) { |
3852 | bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable |
3853 | for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { |
3854 | uint64_t w = v->array[i]; |
3855 | while (w != 0) { |
3856 | uint64_t t = w & (~w + 1); |
3857 | int r = __builtin_ctzll(w); |
3858 | if(iamfirst) {// predicted to be false |
3859 | printf(format: "%u" , r + base); |
3860 | iamfirst = false; |
3861 | } else { |
3862 | printf(format: ",%u" ,r + base); |
3863 | } |
3864 | w ^= t; |
3865 | } |
3866 | base += 64; |
3867 | } |
3868 | } |
3869 | |
3870 | |
3871 | // TODO: use the fast lower bound, also |
3872 | int bitset_container_number_of_runs(bitset_container_t *b) { |
3873 | int num_runs = 0; |
3874 | uint64_t next_word = b->array[0]; |
3875 | |
3876 | for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) { |
3877 | uint64_t word = next_word; |
3878 | next_word = b->array[i+1]; |
3879 | num_runs += hamming(x: (~word) & (word << 1)) + ( (word >> 63) & ~next_word); |
3880 | } |
3881 | |
3882 | uint64_t word = next_word; |
3883 | num_runs += hamming(x: (~word) & (word << 1)); |
3884 | if((word & 0x8000000000000000ULL) != 0) |
3885 | num_runs++; |
3886 | return num_runs; |
3887 | } |
3888 | |
3889 | int32_t bitset_container_serialize(const bitset_container_t *container, char *buf) { |
3890 | int32_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS; |
3891 | memcpy(dest: buf, src: container->array, n: l); |
3892 | return(l); |
3893 | } |
3894 | |
3895 | |
3896 | |
3897 | int32_t bitset_container_write(const bitset_container_t *container, |
3898 | char *buf) { |
3899 | memcpy(dest: buf, src: container->array, n: BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); |
3900 | return bitset_container_size_in_bytes(container); |
3901 | } |
3902 | |
3903 | |
3904 | int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container, |
3905 | const char *buf) { |
3906 | container->cardinality = cardinality; |
3907 | memcpy(dest: container->array, src: buf, n: BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); |
3908 | return bitset_container_size_in_bytes(container); |
3909 | } |
3910 | |
3911 | uint32_t bitset_container_serialization_len(void) { |
3912 | return(sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3913 | } |
3914 | |
3915 | void* bitset_container_deserialize(const char *buf, size_t buf_len) { |
3916 | bitset_container_t *ptr; |
3917 | size_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS; |
3918 | |
3919 | if(l != buf_len) |
3920 | return(NULL); |
3921 | |
3922 | if((ptr = (bitset_container_t *)malloc(size: sizeof(bitset_container_t))) != NULL) { |
3923 | memcpy(dest: ptr, src: buf, n: sizeof(bitset_container_t)); |
3924 | // sizeof(__m256i) == 32 |
3925 | ptr->array = (uint64_t *) roaring_bitmap_aligned_malloc(alignment: 32, size: l); |
3926 | if (! ptr->array) { |
3927 | free(ptr: ptr); |
3928 | return NULL; |
3929 | } |
3930 | memcpy(dest: ptr->array, src: buf, n: l); |
3931 | ptr->cardinality = bitset_container_compute_cardinality(bitset: ptr); |
3932 | } |
3933 | |
3934 | return((void*)ptr); |
3935 | } |
3936 | |
3937 | bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) { |
3938 | for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { |
3939 | uint64_t w = cont->array[i]; |
3940 | while (w != 0) { |
3941 | uint64_t t = w & (~w + 1); |
3942 | int r = __builtin_ctzll(w); |
3943 | if(!iterator(r + base, ptr)) return false; |
3944 | w ^= t; |
3945 | } |
3946 | base += 64; |
3947 | } |
3948 | return true; |
3949 | } |
3950 | |
3951 | bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) { |
3952 | for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { |
3953 | uint64_t w = cont->array[i]; |
3954 | while (w != 0) { |
3955 | uint64_t t = w & (~w + 1); |
3956 | int r = __builtin_ctzll(w); |
3957 | if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false; |
3958 | w ^= t; |
3959 | } |
3960 | base += 64; |
3961 | } |
3962 | return true; |
3963 | } |
3964 | |
3965 | |
3966 | bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { |
3967 | if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { |
3968 | if(container1->cardinality != container2->cardinality) { |
3969 | return false; |
3970 | } |
3971 | if (container1->cardinality == INT32_C(0x10000)) { |
3972 | return true; |
3973 | } |
3974 | } |
3975 | #ifdef USEAVX |
3976 | const __m256i *ptr1 = (const __m256i*)container1->array; |
3977 | const __m256i *ptr2 = (const __m256i*)container2->array; |
3978 | for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) { |
3979 | __m256i r1 = _mm256_load_si256(ptr1+i); |
3980 | __m256i r2 = _mm256_load_si256(ptr2+i); |
3981 | int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2)); |
3982 | if ((uint32_t)mask != UINT32_MAX) { |
3983 | return false; |
3984 | } |
3985 | } |
3986 | #else |
3987 | return memcmp(s1: container1->array, |
3988 | s2: container2->array, |
3989 | n: BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)) == 0; |
3990 | #endif |
3991 | return true; |
3992 | } |
3993 | |
3994 | bool bitset_container_is_subset(const bitset_container_t *container1, |
3995 | const bitset_container_t *container2) { |
3996 | if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { |
3997 | if(container1->cardinality > container2->cardinality) { |
3998 | return false; |
3999 | } |
4000 | } |
4001 | for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { |
4002 | if((container1->array[i] & container2->array[i]) != container1->array[i]) { |
4003 | return false; |
4004 | } |
4005 | } |
4006 | return true; |
4007 | } |
4008 | |
4009 | bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) { |
4010 | int card = bitset_container_cardinality(bitset: container); |
4011 | if(rank >= *start_rank + card) { |
4012 | *start_rank += card; |
4013 | return false; |
4014 | } |
4015 | const uint64_t *array = container->array; |
4016 | int32_t size; |
4017 | for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) { |
4018 | size = hamming(x: array[i]); |
4019 | if(rank <= *start_rank + size) { |
4020 | uint64_t w = container->array[i]; |
4021 | uint16_t base = i*64; |
4022 | while (w != 0) { |
4023 | uint64_t t = w & (~w + 1); |
4024 | int r = __builtin_ctzll(w); |
4025 | if(*start_rank == rank) { |
4026 | *element = r+base; |
4027 | return true; |
4028 | } |
4029 | w ^= t; |
4030 | *start_rank += 1; |
4031 | } |
4032 | } |
4033 | else |
4034 | *start_rank += size; |
4035 | } |
4036 | assert(false); |
4037 | __builtin_unreachable(); |
4038 | } |
4039 | |
4040 | |
4041 | /* Returns the smallest value (assumes not empty) */ |
4042 | uint16_t bitset_container_minimum(const bitset_container_t *container) { |
4043 | for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { |
4044 | uint64_t w = container->array[i]; |
4045 | if (w != 0) { |
4046 | int r = __builtin_ctzll(w); |
4047 | return r + i * 64; |
4048 | } |
4049 | } |
4050 | return UINT16_MAX; |
4051 | } |
4052 | |
4053 | /* Returns the largest value (assumes not empty) */ |
4054 | uint16_t bitset_container_maximum(const bitset_container_t *container) { |
4055 | for (int32_t i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) { |
4056 | uint64_t w = container->array[i]; |
4057 | if (w != 0) { |
4058 | int r = __builtin_clzll(w); |
4059 | return i * 64 + 63 - r; |
4060 | } |
4061 | } |
4062 | return 0; |
4063 | } |
4064 | |
4065 | /* Returns the number of values equal or smaller than x */ |
4066 | int bitset_container_rank(const bitset_container_t *container, uint16_t x) { |
4067 | // credit: aqrit |
4068 | int sum = 0; |
4069 | int i = 0; |
4070 | for (int end = x / 64; i < end; i++){ |
4071 | sum += hamming(x: container->array[i]); |
4072 | } |
4073 | uint64_t lastword = container->array[i]; |
4074 | uint64_t lastpos = UINT64_C(1) << (x % 64); |
4075 | uint64_t mask = lastpos + lastpos - 1; // smear right |
4076 | sum += hamming(x: lastword & mask); |
4077 | return sum; |
4078 | } |
4079 | |
4080 | /* Returns the index of the first value equal or larger than x, or -1 */ |
4081 | int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) { |
4082 | uint32_t x32 = x; |
4083 | uint32_t k = x32 / 64; |
4084 | uint64_t word = container->array[k]; |
4085 | const int diff = x32 - k * 64; // in [0,64) |
4086 | word = (word >> diff) << diff; // a mask is faster, but we don't care |
4087 | while(word == 0) { |
4088 | k++; |
4089 | if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1; |
4090 | word = container->array[k]; |
4091 | } |
4092 | return k * 64 + __builtin_ctzll(word); |
4093 | } |
4094 | /* end file src/containers/bitset.c */ |
4095 | /* begin file src/containers/containers.c */ |
4096 | |
4097 | |
4098 | void container_free(void *container, uint8_t typecode) { |
4099 | switch (typecode) { |
4100 | case BITSET_CONTAINER_TYPE_CODE: |
4101 | bitset_container_free(bitset: (bitset_container_t *)container); |
4102 | break; |
4103 | case ARRAY_CONTAINER_TYPE_CODE: |
4104 | array_container_free(arr: (array_container_t *)container); |
4105 | break; |
4106 | case RUN_CONTAINER_TYPE_CODE: |
4107 | run_container_free(run: (run_container_t *)container); |
4108 | break; |
4109 | case SHARED_CONTAINER_TYPE_CODE: |
4110 | shared_container_free(container: (shared_container_t *)container); |
4111 | break; |
4112 | default: |
4113 | assert(false); |
4114 | __builtin_unreachable(); |
4115 | } |
4116 | } |
4117 | |
4118 | void container_printf(const void *container, uint8_t typecode) { |
4119 | container = container_unwrap_shared(candidate_shared_container: container, type: &typecode); |
4120 | switch (typecode) { |
4121 | case BITSET_CONTAINER_TYPE_CODE: |
4122 | bitset_container_printf(v: (const bitset_container_t *)container); |
4123 | return; |
4124 | case ARRAY_CONTAINER_TYPE_CODE: |
4125 | array_container_printf(v: (const array_container_t *)container); |
4126 | return; |
4127 | case RUN_CONTAINER_TYPE_CODE: |
4128 | run_container_printf(v: (const run_container_t *)container); |
4129 | return; |
4130 | default: |
4131 | __builtin_unreachable(); |
4132 | } |
4133 | } |
4134 | |
4135 | void container_printf_as_uint32_array(const void *container, uint8_t typecode, |
4136 | uint32_t base) { |
4137 | container = container_unwrap_shared(candidate_shared_container: container, type: &typecode); |
4138 | switch (typecode) { |
4139 | case BITSET_CONTAINER_TYPE_CODE: |
4140 | bitset_container_printf_as_uint32_array( |
4141 | v: (const bitset_container_t *)container, base); |
4142 | return; |
4143 | case ARRAY_CONTAINER_TYPE_CODE: |
4144 | array_container_printf_as_uint32_array( |
4145 | v: (const array_container_t *)container, base); |
4146 | return; |
4147 | case RUN_CONTAINER_TYPE_CODE: |
4148 | run_container_printf_as_uint32_array( |
4149 | v: (const run_container_t *)container, base); |
4150 | return; |
4151 | return; |
4152 | default: |
4153 | __builtin_unreachable(); |
4154 | } |
4155 | } |
4156 | |
4157 | int32_t container_serialize(const void *container, uint8_t typecode, |
4158 | char *buf) { |
4159 | container = container_unwrap_shared(candidate_shared_container: container, type: &typecode); |
4160 | switch (typecode) { |
4161 | case BITSET_CONTAINER_TYPE_CODE: |
4162 | return (bitset_container_serialize(container: (const bitset_container_t *)container, |
4163 | buf)); |
4164 | case ARRAY_CONTAINER_TYPE_CODE: |
4165 | return ( |
4166 | array_container_serialize(container: (const array_container_t *)container, buf)); |
4167 | case RUN_CONTAINER_TYPE_CODE: |
4168 | return (run_container_serialize(container: (const run_container_t *)container, buf)); |
4169 | default: |
4170 | assert(0); |
4171 | __builtin_unreachable(); |
4172 | return (-1); |
4173 | } |
4174 | } |
4175 | |
4176 | uint32_t container_serialization_len(const void *container, uint8_t typecode) { |
4177 | container = container_unwrap_shared(candidate_shared_container: container, type: &typecode); |
4178 | switch (typecode) { |
4179 | case BITSET_CONTAINER_TYPE_CODE: |
4180 | return bitset_container_serialization_len(); |
4181 | case ARRAY_CONTAINER_TYPE_CODE: |
4182 | return array_container_serialization_len( |
4183 | container: (const array_container_t *)container); |
4184 | case RUN_CONTAINER_TYPE_CODE: |
4185 | return run_container_serialization_len( |
4186 | container: (const run_container_t *)container); |
4187 | default: |
4188 | assert(0); |
4189 | __builtin_unreachable(); |
4190 | return (0); |
4191 | } |
4192 | } |
4193 | |
4194 | void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len) { |
4195 | switch (typecode) { |
4196 | case BITSET_CONTAINER_TYPE_CODE: |
4197 | return (bitset_container_deserialize(buf, buf_len)); |
4198 | case ARRAY_CONTAINER_TYPE_CODE: |
4199 | return (array_container_deserialize(buf, buf_len)); |
4200 | case RUN_CONTAINER_TYPE_CODE: |
4201 | return (run_container_deserialize(buf, buf_len)); |
4202 | case SHARED_CONTAINER_TYPE_CODE: |
4203 | printf(format: "this should never happen.\n" ); |
4204 | assert(0); |
4205 | __builtin_unreachable(); |
4206 | return (NULL); |
4207 | default: |
4208 | assert(0); |
4209 | __builtin_unreachable(); |
4210 | return (NULL); |
4211 | } |
4212 | } |
4213 | |
4214 | void *get_copy_of_container(void *container, uint8_t *typecode, |
4215 | bool copy_on_write) { |
4216 | if (copy_on_write) { |
4217 | shared_container_t *shared_container; |
4218 | if (*typecode == SHARED_CONTAINER_TYPE_CODE) { |
4219 | shared_container = (shared_container_t *)container; |
4220 | shared_container->counter += 1; |
4221 | return shared_container; |
4222 | } |
4223 | assert(*typecode != SHARED_CONTAINER_TYPE_CODE); |
4224 | |
4225 | if ((shared_container = (shared_container_t *)malloc( |
4226 | size: sizeof(shared_container_t))) == NULL) { |
4227 | return NULL; |
4228 | } |
4229 | |
4230 | shared_container->container = container; |
4231 | shared_container->typecode = *typecode; |
4232 | |
4233 | shared_container->counter = 2; |
4234 | *typecode = SHARED_CONTAINER_TYPE_CODE; |
4235 | |
4236 | return shared_container; |
4237 | } // copy_on_write |
4238 | // otherwise, no copy on write... |
4239 | const void *actualcontainer = |
4240 | container_unwrap_shared(candidate_shared_container: (const void *)container, type: typecode); |
4241 | assert(*typecode != SHARED_CONTAINER_TYPE_CODE); |
4242 | return container_clone(container: actualcontainer, typecode: *typecode); |
4243 | } |
4244 | /** |
4245 | * Copies a container, requires a typecode. This allocates new memory, caller |
4246 | * is responsible for deallocation. |
4247 | */ |
4248 | void *container_clone(const void *container, uint8_t typecode) { |
4249 | container = container_unwrap_shared(candidate_shared_container: container, type: &typecode); |
4250 | switch (typecode) { |
4251 | case BITSET_CONTAINER_TYPE_CODE: |
4252 | return bitset_container_clone(src: (const bitset_container_t *)container); |
4253 | case ARRAY_CONTAINER_TYPE_CODE: |
4254 | return array_container_clone(src: (const array_container_t *)container); |
4255 | case RUN_CONTAINER_TYPE_CODE: |
4256 | return run_container_clone(src: (const run_container_t *)container); |
4257 | case SHARED_CONTAINER_TYPE_CODE: |
4258 | printf(format: "shared containers are not clonable\n" ); |
4259 | assert(false); |
4260 | return NULL; |
4261 | default: |
4262 | assert(false); |
4263 | __builtin_unreachable(); |
4264 | return NULL; |
4265 | } |
4266 | } |
4267 | |
4268 | void *(shared_container_t *container, |
4269 | uint8_t *typecode) { |
4270 | assert(container->counter > 0); |
4271 | assert(container->typecode != SHARED_CONTAINER_TYPE_CODE); |
4272 | container->counter--; |
4273 | *typecode = container->typecode; |
4274 | void *answer; |
4275 | if (container->counter == 0) { |
4276 | answer = container->container; |
4277 | container->container = NULL; // paranoid |
4278 | free(ptr: container); |
4279 | } else { |
4280 | answer = container_clone(container: container->container, typecode: *typecode); |
4281 | } |
4282 | assert(*typecode != SHARED_CONTAINER_TYPE_CODE); |
4283 | return answer; |
4284 | } |
4285 | |
4286 | void shared_container_free(shared_container_t *container) { |
4287 | assert(container->counter > 0); |
4288 | container->counter--; |
4289 | if (container->counter == 0) { |
4290 | assert(container->typecode != SHARED_CONTAINER_TYPE_CODE); |
4291 | container_free(container: container->container, typecode: container->typecode); |
4292 | container->container = NULL; // paranoid |
4293 | free(ptr: container); |
4294 | } |
4295 | } |
4296 | |
4297 | /* end file src/containers/containers.c */ |
4298 | /* begin file src/containers/convert.c */ |
4299 | #include <stdio.h> |
4300 | |
4301 | |
4302 | // file contains grubby stuff that must know impl. details of all container |
4303 | // types. |
4304 | bitset_container_t *bitset_container_from_array(const array_container_t *a) { |
4305 | bitset_container_t *ans = bitset_container_create(); |
4306 | int limit = array_container_cardinality(array: a); |
4307 | for (int i = 0; i < limit; ++i) bitset_container_set(bitset: ans, pos: a->array[i]); |
4308 | return ans; |
4309 | } |
4310 | |
4311 | bitset_container_t *bitset_container_from_run(const run_container_t *arr) { |
4312 | int card = run_container_cardinality(run: arr); |
4313 | bitset_container_t *answer = bitset_container_create(); |
4314 | for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { |
4315 | rle16_t vl = arr->runs[rlepos]; |
4316 | bitset_set_lenrange(bitmap: answer->array, start: vl.value, lenminusone: vl.length); |
4317 | } |
4318 | answer->cardinality = card; |
4319 | return answer; |
4320 | } |
4321 | |
4322 | array_container_t *array_container_from_run(const run_container_t *arr) { |
4323 | array_container_t *answer = |
4324 | array_container_create_given_capacity(size: run_container_cardinality(run: arr)); |
4325 | answer->cardinality = 0; |
4326 | for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { |
4327 | int run_start = arr->runs[rlepos].value; |
4328 | int run_end = run_start + arr->runs[rlepos].length; |
4329 | |
4330 | for (int run_value = run_start; run_value <= run_end; ++run_value) { |
4331 | answer->array[answer->cardinality++] = (uint16_t)run_value; |
4332 | } |
4333 | } |
4334 | return answer; |
4335 | } |
4336 | |
4337 | array_container_t *array_container_from_bitset(const bitset_container_t *bits) { |
4338 | array_container_t *result = |
4339 | array_container_create_given_capacity(size: bits->cardinality); |
4340 | result->cardinality = bits->cardinality; |
4341 | // sse version ends up being slower here |
4342 | // (bitset_extract_setbits_sse_uint16) |
4343 | // because of the sparsity of the data |
4344 | bitset_extract_setbits_uint16(bitset: bits->array, length: BITSET_CONTAINER_SIZE_IN_WORDS, |
4345 | out: result->array, base: 0); |
4346 | return result; |
4347 | } |
4348 | |
4349 | /* assumes that container has adequate space. Run from [s,e] (inclusive) */ |
4350 | static void add_run(run_container_t *r, int s, int e) { |
4351 | r->runs[r->n_runs].value = s; |
4352 | r->runs[r->n_runs].length = e - s; |
4353 | r->n_runs++; |
4354 | } |
4355 | |
4356 | run_container_t *run_container_from_array(const array_container_t *c) { |
4357 | int32_t n_runs = array_container_number_of_runs(a: c); |
4358 | run_container_t *answer = run_container_create_given_capacity(size: n_runs); |
4359 | int prev = -2; |
4360 | int run_start = -1; |
4361 | int32_t card = c->cardinality; |
4362 | if (card == 0) return answer; |
4363 | for (int i = 0; i < card; ++i) { |
4364 | const uint16_t cur_val = c->array[i]; |
4365 | if (cur_val != prev + 1) { |
4366 | // new run starts; flush old one, if any |
4367 | if (run_start != -1) add_run(r: answer, s: run_start, e: prev); |
4368 | run_start = cur_val; |
4369 | } |
4370 | prev = c->array[i]; |
4371 | } |
4372 | // now prev is the last seen value |
4373 | add_run(r: answer, s: run_start, e: prev); |
4374 | // assert(run_container_cardinality(answer) == c->cardinality); |
4375 | return answer; |
4376 | } |
4377 | |
4378 | /** |
4379 | * Convert the runcontainer to either a Bitmap or an Array Container, depending |
4380 | * on the cardinality. Frees the container. |
4381 | * Allocates and returns new container, which caller is responsible for freeing. |
4382 | * It does not free the run container. |
4383 | */ |
4384 | |
4385 | void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card, |
4386 | uint8_t *resulttype) { |
4387 | if (card <= DEFAULT_MAX_SIZE) { |
4388 | array_container_t *answer = array_container_create_given_capacity(size: card); |
4389 | answer->cardinality = 0; |
4390 | for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) { |
4391 | uint16_t run_start = r->runs[rlepos].value; |
4392 | uint16_t run_end = run_start + r->runs[rlepos].length; |
4393 | for (uint16_t run_value = run_start; run_value <= run_end; |
4394 | ++run_value) { |
4395 | answer->array[answer->cardinality++] = run_value; |
4396 | } |
4397 | } |
4398 | assert(card == answer->cardinality); |
4399 | *resulttype = ARRAY_CONTAINER_TYPE_CODE; |
4400 | //run_container_free(r); |
4401 | return answer; |
4402 | } |
4403 | bitset_container_t *answer = bitset_container_create(); |
4404 | for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) { |
4405 | uint16_t run_start = r->runs[rlepos].value; |
4406 | bitset_set_lenrange(bitmap: answer->array, start: run_start, lenminusone: r->runs[rlepos].length); |
4407 | } |
4408 | answer->cardinality = card; |
4409 | *resulttype = BITSET_CONTAINER_TYPE_CODE; |
4410 | //run_container_free(r); |
4411 | return answer; |
4412 | } |
4413 | |
4414 | /* Converts a run container to either an array or a bitset, IF it saves space. |
4415 | */ |
4416 | /* If a conversion occurs, the caller is responsible to free the original |
4417 | * container and |
4418 | * he becomes responsible to free the new one. */ |
4419 | void *convert_run_to_efficient_container(run_container_t *c, |
4420 | uint8_t *typecode_after) { |
4421 | int32_t size_as_run_container = |
4422 | run_container_serialized_size_in_bytes(num_runs: c->n_runs); |
4423 | |
4424 | int32_t size_as_bitset_container = |
4425 | bitset_container_serialized_size_in_bytes(); |
4426 | int32_t card = run_container_cardinality(run: c); |
4427 | int32_t size_as_array_container = |
4428 | array_container_serialized_size_in_bytes(card); |
4429 | |
4430 | int32_t min_size_non_run = |
4431 | size_as_bitset_container < size_as_array_container |
4432 | ? size_as_bitset_container |
4433 | : size_as_array_container; |
4434 | if (size_as_run_container <= min_size_non_run) { // no conversion |
4435 | *typecode_after = RUN_CONTAINER_TYPE_CODE; |
4436 | return c; |
4437 | } |
4438 | if (card <= DEFAULT_MAX_SIZE) { |
4439 | // to array |
4440 | array_container_t *answer = array_container_create_given_capacity(size: card); |
4441 | answer->cardinality = 0; |
4442 | for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { |
4443 | int run_start = c->runs[rlepos].value; |
4444 | int run_end = run_start + c->runs[rlepos].length; |
4445 | |
4446 | for (int run_value = run_start; run_value <= run_end; ++run_value) { |
4447 | answer->array[answer->cardinality++] = (uint16_t)run_value; |
4448 | } |
4449 | } |
4450 | *typecode_after = ARRAY_CONTAINER_TYPE_CODE; |
4451 | return answer; |
4452 | } |
4453 | |
4454 | // else to bitset |
4455 | bitset_container_t *answer = bitset_container_create(); |
4456 | |
4457 | for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { |
4458 | int start = c->runs[rlepos].value; |
4459 | int end = start + c->runs[rlepos].length; |
4460 | bitset_set_range(bitmap: answer->array, start, end: end + 1); |
4461 | } |
4462 | answer->cardinality = card; |
4463 | *typecode_after = BITSET_CONTAINER_TYPE_CODE; |
4464 | return answer; |
4465 | } |
4466 | |
4467 | // like convert_run_to_efficient_container but frees the old result if needed |
4468 | void *convert_run_to_efficient_container_and_free(run_container_t *c, |
4469 | uint8_t *typecode_after) { |
4470 | void *answer = convert_run_to_efficient_container(c, typecode_after); |
4471 | if (answer != c) run_container_free(run: c); |
4472 | return answer; |
4473 | } |
4474 | |
4475 | /* once converted, the original container is disposed here, rather than |
4476 | in roaring_array |
4477 | */ |
4478 | |
4479 | // TODO: split into run- array- and bitset- subfunctions for sanity; |
4480 | // a few function calls won't really matter. |
4481 | |
4482 | void *convert_run_optimize(void *c, uint8_t typecode_original, |
4483 | uint8_t *typecode_after) { |
4484 | if (typecode_original == RUN_CONTAINER_TYPE_CODE) { |
4485 | void *newc = convert_run_to_efficient_container(c: (run_container_t *)c, |
4486 | typecode_after); |
4487 | if (newc != c) { |
4488 | container_free(container: c, typecode: typecode_original); |
4489 | } |
4490 | return newc; |
4491 | } else if (typecode_original == ARRAY_CONTAINER_TYPE_CODE) { |
4492 | // it might need to be converted to a run container. |
4493 | array_container_t *c_qua_array = (array_container_t *)c; |
4494 | int32_t n_runs = array_container_number_of_runs(a: c_qua_array); |
4495 | int32_t size_as_run_container = |
4496 | run_container_serialized_size_in_bytes(num_runs: n_runs); |
4497 | int32_t card = array_container_cardinality(array: c_qua_array); |
4498 | int32_t size_as_array_container = |
4499 | array_container_serialized_size_in_bytes(card); |
4500 | |
4501 | if (size_as_run_container >= size_as_array_container) { |
4502 | *typecode_after = ARRAY_CONTAINER_TYPE_CODE; |
4503 | return c; |
4504 | } |
4505 | // else convert array to run container |
4506 | run_container_t *answer = run_container_create_given_capacity(size: n_runs); |
4507 | int prev = -2; |
4508 | int run_start = -1; |
4509 | |
4510 | assert(card > 0); |
4511 | for (int i = 0; i < card; ++i) { |
4512 | uint16_t cur_val = c_qua_array->array[i]; |
4513 | if (cur_val != prev + 1) { |
4514 | // new run starts; flush old one, if any |
4515 | if (run_start != -1) add_run(r: answer, s: run_start, e: prev); |
4516 | run_start = cur_val; |
4517 | } |
4518 | prev = c_qua_array->array[i]; |
4519 | } |
4520 | assert(run_start >= 0); |
4521 | // now prev is the last seen value |
4522 | add_run(r: answer, s: run_start, e: prev); |
4523 | *typecode_after = RUN_CONTAINER_TYPE_CODE; |
4524 | array_container_free(arr: c_qua_array); |
4525 | return answer; |
4526 | } else if (typecode_original == |
4527 | BITSET_CONTAINER_TYPE_CODE) { // run conversions on bitset |
4528 | // does bitset need conversion to run? |
4529 | bitset_container_t *c_qua_bitset = (bitset_container_t *)c; |
4530 | int32_t n_runs = bitset_container_number_of_runs(b: c_qua_bitset); |
4531 | int32_t size_as_run_container = |
4532 | run_container_serialized_size_in_bytes(num_runs: n_runs); |
4533 | int32_t size_as_bitset_container = |
4534 | bitset_container_serialized_size_in_bytes(); |
4535 | |
4536 | if (size_as_bitset_container <= size_as_run_container) { |
4537 | // no conversion needed. |
4538 | *typecode_after = BITSET_CONTAINER_TYPE_CODE; |
4539 | return c; |
4540 | } |
4541 | // bitset to runcontainer (ported from Java RunContainer( |
4542 | // BitmapContainer bc, int nbrRuns)) |
4543 | assert(n_runs > 0); // no empty bitmaps |
4544 | run_container_t *answer = run_container_create_given_capacity(size: n_runs); |
4545 | |
4546 | int long_ctr = 0; |
4547 | uint64_t cur_word = c_qua_bitset->array[0]; |
4548 | int run_count = 0; |
4549 | while (true) { |
4550 | while (cur_word == UINT64_C(0) && |
4551 | long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) |
4552 | cur_word = c_qua_bitset->array[++long_ctr]; |
4553 | |
4554 | if (cur_word == UINT64_C(0)) { |
4555 | bitset_container_free(bitset: c_qua_bitset); |
4556 | *typecode_after = RUN_CONTAINER_TYPE_CODE; |
4557 | return answer; |
4558 | } |
4559 | |
4560 | int local_run_start = __builtin_ctzll(cur_word); |
4561 | int run_start = local_run_start + 64 * long_ctr; |
4562 | uint64_t cur_word_with_1s = cur_word | (cur_word - 1); |
4563 | |
4564 | int run_end = 0; |
4565 | while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) && |
4566 | long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) |
4567 | cur_word_with_1s = c_qua_bitset->array[++long_ctr]; |
4568 | |
4569 | if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) { |
4570 | run_end = 64 + long_ctr * 64; // exclusive, I guess |
4571 | add_run(r: answer, s: run_start, e: run_end - 1); |
4572 | bitset_container_free(bitset: c_qua_bitset); |
4573 | *typecode_after = RUN_CONTAINER_TYPE_CODE; |
4574 | return answer; |
4575 | } |
4576 | int local_run_end = __builtin_ctzll(~cur_word_with_1s); |
4577 | run_end = local_run_end + long_ctr * 64; |
4578 | add_run(r: answer, s: run_start, e: run_end - 1); |
4579 | run_count++; |
4580 | cur_word = cur_word_with_1s & (cur_word_with_1s + 1); |
4581 | } |
4582 | return answer; |
4583 | } else { |
4584 | assert(false); |
4585 | __builtin_unreachable(); |
4586 | return NULL; |
4587 | } |
4588 | } |
4589 | |
4590 | bitset_container_t *bitset_container_from_run_range(const run_container_t *run, |
4591 | uint32_t min, uint32_t max) { |
4592 | bitset_container_t *bitset = bitset_container_create(); |
4593 | int32_t union_cardinality = 0; |
4594 | for (int32_t i = 0; i < run->n_runs; ++i) { |
4595 | uint32_t rle_min = run->runs[i].value; |
4596 | uint32_t rle_max = rle_min + run->runs[i].length; |
4597 | bitset_set_lenrange(bitmap: bitset->array, start: rle_min, lenminusone: rle_max - rle_min); |
4598 | union_cardinality += run->runs[i].length + 1; |
4599 | } |
4600 | union_cardinality += max - min + 1; |
4601 | union_cardinality -= bitset_lenrange_cardinality(bitmap: bitset->array, start: min, lenminusone: max-min); |
4602 | bitset_set_lenrange(bitmap: bitset->array, start: min, lenminusone: max - min); |
4603 | bitset->cardinality = union_cardinality; |
4604 | return bitset; |
4605 | } |
4606 | /* end file src/containers/convert.c */ |
4607 | /* begin file src/containers/mixed_andnot.c */ |
4608 | /* |
4609 | * mixed_andnot.c. More methods since operation is not symmetric, |
4610 | * except no "wide" andnot , so no lazy options motivated. |
4611 | */ |
4612 | |
4613 | #include <assert.h> |
4614 | #include <string.h> |
4615 | |
4616 | |
4617 | /* Compute the andnot of src_1 and src_2 and write the result to |
4618 | * dst, a valid array container that could be the same as dst.*/ |
4619 | void array_bitset_container_andnot(const array_container_t *src_1, |
4620 | const bitset_container_t *src_2, |
4621 | array_container_t *dst) { |
4622 | // follows Java implementation as of June 2016 |
4623 | if (dst->capacity < src_1->cardinality) { |
4624 | array_container_grow(container: dst, min: src_1->cardinality, false); |
4625 | } |
4626 | int32_t newcard = 0; |
4627 | const int32_t origcard = src_1->cardinality; |
4628 | for (int i = 0; i < origcard; ++i) { |
4629 | uint16_t key = src_1->array[i]; |
4630 | dst->array[newcard] = key; |
4631 | newcard += 1 - bitset_container_contains(bitset: src_2, pos: key); |
4632 | } |
4633 | dst->cardinality = newcard; |
4634 | } |
4635 | |
4636 | /* Compute the andnot of src_1 and src_2 and write the result to |
4637 | * src_1 */ |
4638 | |
4639 | void array_bitset_container_iandnot(array_container_t *src_1, |
4640 | const bitset_container_t *src_2) { |
4641 | array_bitset_container_andnot(src_1, src_2, dst: src_1); |
4642 | } |
4643 | |
4644 | /* Compute the andnot of src_1 and src_2 and write the result to |
4645 | * dst, which does not initially have a valid container. |
4646 | * Return true for a bitset result; false for array |
4647 | */ |
4648 | |
4649 | bool bitset_array_container_andnot(const bitset_container_t *src_1, |
4650 | const array_container_t *src_2, void **dst) { |
4651 | // Java did this directly, but we have option of asm or avx |
4652 | bitset_container_t *result = bitset_container_create(); |
4653 | bitset_container_copy(source: src_1, dest: result); |
4654 | result->cardinality = |
4655 | (int32_t)bitset_clear_list(bitset: result->array, card: (uint64_t)result->cardinality, |
4656 | list: src_2->array, length: (uint64_t)src_2->cardinality); |
4657 | |
4658 | // do required type conversions. |
4659 | if (result->cardinality <= DEFAULT_MAX_SIZE) { |
4660 | *dst = array_container_from_bitset(bits: result); |
4661 | bitset_container_free(bitset: result); |
4662 | return false; |
4663 | } |
4664 | *dst = result; |
4665 | return true; |
4666 | } |
4667 | |
4668 | /* Compute the andnot of src_1 and src_2 and write the result to |
4669 | * dst (which has no container initially). It will modify src_1 |
4670 | * to be dst if the result is a bitset. Otherwise, it will |
4671 | * free src_1 and dst will be a new array container. In both |
4672 | * cases, the caller is responsible for deallocating dst. |
4673 | * Returns true iff dst is a bitset */ |
4674 | |
4675 | bool bitset_array_container_iandnot(bitset_container_t *src_1, |
4676 | const array_container_t *src_2, |
4677 | void **dst) { |
4678 | *dst = src_1; |
4679 | src_1->cardinality = |
4680 | (int32_t)bitset_clear_list(bitset: src_1->array, card: (uint64_t)src_1->cardinality, |
4681 | list: src_2->array, length: (uint64_t)src_2->cardinality); |
4682 | |
4683 | if (src_1->cardinality <= DEFAULT_MAX_SIZE) { |
4684 | *dst = array_container_from_bitset(bits: src_1); |
4685 | bitset_container_free(bitset: src_1); |
4686 | return false; // not bitset |
4687 | } else |
4688 | return true; |
4689 | } |
4690 | |
4691 | /* Compute the andnot of src_1 and src_2 and write the result to |
4692 | * dst. Result may be either a bitset or an array container |
4693 | * (returns "result is bitset"). dst does not initially have |
4694 | * any container, but becomes either a bitset container (return |
4695 | * result true) or an array container. |
4696 | */ |
4697 | |
4698 | bool run_bitset_container_andnot(const run_container_t *src_1, |
4699 | const bitset_container_t *src_2, void **dst) { |
4700 | // follows the Java implementation as of June 2016 |
4701 | int card = run_container_cardinality(run: src_1); |
4702 | if (card <= DEFAULT_MAX_SIZE) { |
4703 | // must be an array |
4704 | array_container_t *answer = array_container_create_given_capacity(size: card); |
4705 | answer->cardinality = 0; |
4706 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
4707 | rle16_t rle = src_1->runs[rlepos]; |
4708 | for (int run_value = rle.value; run_value <= rle.value + rle.length; |
4709 | ++run_value) { |
4710 | if (!bitset_container_get(bitset: src_2, pos: (uint16_t)run_value)) { |
4711 | answer->array[answer->cardinality++] = (uint16_t)run_value; |
4712 | } |
4713 | } |
4714 | } |
4715 | *dst = answer; |
4716 | return false; |
4717 | } else { // we guess it will be a bitset, though have to check guess when |
4718 | // done |
4719 | bitset_container_t *answer = bitset_container_clone(src: src_2); |
4720 | |
4721 | uint32_t last_pos = 0; |
4722 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
4723 | rle16_t rle = src_1->runs[rlepos]; |
4724 | |
4725 | uint32_t start = rle.value; |
4726 | uint32_t end = start + rle.length + 1; |
4727 | bitset_reset_range(bitmap: answer->array, start: last_pos, end: start); |
4728 | bitset_flip_range(bitmap: answer->array, start, end); |
4729 | last_pos = end; |
4730 | } |
4731 | bitset_reset_range(bitmap: answer->array, start: last_pos, end: (uint32_t)(1 << 16)); |
4732 | |
4733 | answer->cardinality = bitset_container_compute_cardinality(bitset: answer); |
4734 | |
4735 | if (answer->cardinality <= DEFAULT_MAX_SIZE) { |
4736 | *dst = array_container_from_bitset(bits: answer); |
4737 | bitset_container_free(bitset: answer); |
4738 | return false; // not bitset |
4739 | } |
4740 | *dst = answer; |
4741 | return true; // bitset |
4742 | } |
4743 | } |
4744 | |
4745 | /* Compute the andnot of src_1 and src_2 and write the result to |
4746 | * dst. Result may be either a bitset or an array container |
4747 | * (returns "result is bitset"). dst does not initially have |
4748 | * any container, but becomes either a bitset container (return |
4749 | * result true) or an array container. |
4750 | */ |
4751 | |
4752 | bool run_bitset_container_iandnot(run_container_t *src_1, |
4753 | const bitset_container_t *src_2, void **dst) { |
4754 | // dummy implementation |
4755 | bool ans = run_bitset_container_andnot(src_1, src_2, dst); |
4756 | run_container_free(run: src_1); |
4757 | return ans; |
4758 | } |
4759 | |
4760 | /* Compute the andnot of src_1 and src_2 and write the result to |
4761 | * dst. Result may be either a bitset or an array container |
4762 | * (returns "result is bitset"). dst does not initially have |
4763 | * any container, but becomes either a bitset container (return |
4764 | * result true) or an array container. |
4765 | */ |
4766 | |
4767 | bool bitset_run_container_andnot(const bitset_container_t *src_1, |
4768 | const run_container_t *src_2, void **dst) { |
4769 | // follows Java implementation |
4770 | bitset_container_t *result = bitset_container_create(); |
4771 | |
4772 | bitset_container_copy(source: src_1, dest: result); |
4773 | for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { |
4774 | rle16_t rle = src_2->runs[rlepos]; |
4775 | bitset_reset_range(bitmap: result->array, start: rle.value, |
4776 | end: rle.value + rle.length + UINT32_C(1)); |
4777 | } |
4778 | result->cardinality = bitset_container_compute_cardinality(bitset: result); |
4779 | |
4780 | if (result->cardinality <= DEFAULT_MAX_SIZE) { |
4781 | *dst = array_container_from_bitset(bits: result); |
4782 | bitset_container_free(bitset: result); |
4783 | return false; // not bitset |
4784 | } |
4785 | *dst = result; |
4786 | return true; // bitset |
4787 | } |
4788 | |
4789 | /* Compute the andnot of src_1 and src_2 and write the result to |
4790 | * dst (which has no container initially). It will modify src_1 |
4791 | * to be dst if the result is a bitset. Otherwise, it will |
4792 | * free src_1 and dst will be a new array container. In both |
4793 | * cases, the caller is responsible for deallocating dst. |
4794 | * Returns true iff dst is a bitset */ |
4795 | |
4796 | bool bitset_run_container_iandnot(bitset_container_t *src_1, |
4797 | const run_container_t *src_2, void **dst) { |
4798 | *dst = src_1; |
4799 | |
4800 | for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { |
4801 | rle16_t rle = src_2->runs[rlepos]; |
4802 | bitset_reset_range(bitmap: src_1->array, start: rle.value, |
4803 | end: rle.value + rle.length + UINT32_C(1)); |
4804 | } |
4805 | src_1->cardinality = bitset_container_compute_cardinality(bitset: src_1); |
4806 | |
4807 | if (src_1->cardinality <= DEFAULT_MAX_SIZE) { |
4808 | *dst = array_container_from_bitset(bits: src_1); |
4809 | bitset_container_free(bitset: src_1); |
4810 | return false; // not bitset |
4811 | } else |
4812 | return true; |
4813 | } |
4814 | |
4815 | /* helper. a_out must be a valid array container with adequate capacity. |
4816 | * Returns the cardinality of the output container. Partly Based on Java |
4817 | * implementation Util.unsignedDifference. |
4818 | * |
4819 | * TODO: Util.unsignedDifference does not use advanceUntil. Is it cheaper |
4820 | * to avoid advanceUntil? |
4821 | */ |
4822 | |
4823 | static int run_array_array_subtract(const run_container_t *r, |
4824 | const array_container_t *a_in, |
4825 | array_container_t *a_out) { |
4826 | int out_card = 0; |
4827 | int32_t in_array_pos = |
4828 | -1; // since advanceUntil always assumes we start the search AFTER this |
4829 | |
4830 | for (int rlepos = 0; rlepos < r->n_runs; rlepos++) { |
4831 | int32_t start = r->runs[rlepos].value; |
4832 | int32_t end = start + r->runs[rlepos].length + 1; |
4833 | |
4834 | in_array_pos = advanceUntil(array: a_in->array, pos: in_array_pos, |
4835 | length: a_in->cardinality, min: (uint16_t)start); |
4836 | |
4837 | if (in_array_pos >= a_in->cardinality) { // run has no items subtracted |
4838 | for (int32_t i = start; i < end; ++i) |
4839 | a_out->array[out_card++] = (uint16_t)i; |
4840 | } else { |
4841 | uint16_t next_nonincluded = a_in->array[in_array_pos]; |
4842 | if (next_nonincluded >= end) { |
4843 | // another case when run goes unaltered |
4844 | for (int32_t i = start; i < end; ++i) |
4845 | a_out->array[out_card++] = (uint16_t)i; |
4846 | in_array_pos--; // ensure we see this item again if necessary |
4847 | } else { |
4848 | for (int32_t i = start; i < end; ++i) |
4849 | if (i != next_nonincluded) |
4850 | a_out->array[out_card++] = (uint16_t)i; |
4851 | else // 0 should ensure we don't match |
4852 | next_nonincluded = |
4853 | (in_array_pos + 1 >= a_in->cardinality) |
4854 | ? 0 |
4855 | : a_in->array[++in_array_pos]; |
4856 | in_array_pos--; // see again |
4857 | } |
4858 | } |
4859 | } |
4860 | return out_card; |
4861 | } |
4862 | |
4863 | /* dst does not indicate a valid container initially. Eventually it |
4864 | * can become any type of container. |
4865 | */ |
4866 | |
4867 | int run_array_container_andnot(const run_container_t *src_1, |
4868 | const array_container_t *src_2, void **dst) { |
4869 | // follows the Java impl as of June 2016 |
4870 | |
4871 | int card = run_container_cardinality(run: src_1); |
4872 | const int arbitrary_threshold = 32; |
4873 | |
4874 | if (card <= arbitrary_threshold) { |
4875 | if (src_2->cardinality == 0) { |
4876 | *dst = run_container_clone(src: src_1); |
4877 | return RUN_CONTAINER_TYPE_CODE; |
4878 | } |
4879 | // Java's "lazyandNot.toEfficientContainer" thing |
4880 | run_container_t *answer = run_container_create_given_capacity( |
4881 | size: card + array_container_cardinality(array: src_2)); |
4882 | |
4883 | int rlepos = 0; |
4884 | int xrlepos = 0; // "x" is src_2 |
4885 | rle16_t rle = src_1->runs[rlepos]; |
4886 | int32_t start = rle.value; |
4887 | int32_t end = start + rle.length + 1; |
4888 | int32_t xstart = src_2->array[xrlepos]; |
4889 | |
4890 | while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) { |
4891 | if (end <= xstart) { |
4892 | // output the first run |
4893 | answer->runs[answer->n_runs++] = |
4894 | (rle16_t){.value = (uint16_t)start, |
4895 | .length = (uint16_t)(end - start - 1)}; |
4896 | rlepos++; |
4897 | if (rlepos < src_1->n_runs) { |
4898 | start = src_1->runs[rlepos].value; |
4899 | end = start + src_1->runs[rlepos].length + 1; |
4900 | } |
4901 | } else if (xstart + 1 <= start) { |
4902 | // exit the second run |
4903 | xrlepos++; |
4904 | if (xrlepos < src_2->cardinality) { |
4905 | xstart = src_2->array[xrlepos]; |
4906 | } |
4907 | } else { |
4908 | if (start < xstart) { |
4909 | answer->runs[answer->n_runs++] = |
4910 | (rle16_t){.value = (uint16_t)start, |
4911 | .length = (uint16_t)(xstart - start - 1)}; |
4912 | } |
4913 | if (xstart + 1 < end) { |
4914 | start = xstart + 1; |
4915 | } else { |
4916 | rlepos++; |
4917 | if (rlepos < src_1->n_runs) { |
4918 | start = src_1->runs[rlepos].value; |
4919 | end = start + src_1->runs[rlepos].length + 1; |
4920 | } |
4921 | } |
4922 | } |
4923 | } |
4924 | if (rlepos < src_1->n_runs) { |
4925 | answer->runs[answer->n_runs++] = |
4926 | (rle16_t){.value = (uint16_t)start, |
4927 | .length = (uint16_t)(end - start - 1)}; |
4928 | rlepos++; |
4929 | if (rlepos < src_1->n_runs) { |
4930 | memcpy(dest: answer->runs + answer->n_runs, src: src_1->runs + rlepos, |
4931 | n: (src_1->n_runs - rlepos) * sizeof(rle16_t)); |
4932 | answer->n_runs += (src_1->n_runs - rlepos); |
4933 | } |
4934 | } |
4935 | uint8_t return_type; |
4936 | *dst = convert_run_to_efficient_container(c: answer, typecode_after: &return_type); |
4937 | if (answer != *dst) run_container_free(run: answer); |
4938 | return return_type; |
4939 | } |
4940 | // else it's a bitmap or array |
4941 | |
4942 | if (card <= DEFAULT_MAX_SIZE) { |
4943 | array_container_t *ac = array_container_create_given_capacity(size: card); |
4944 | // nb Java code used a generic iterator-based merge to compute |
4945 | // difference |
4946 | ac->cardinality = run_array_array_subtract(r: src_1, a_in: src_2, a_out: ac); |
4947 | *dst = ac; |
4948 | return ARRAY_CONTAINER_TYPE_CODE; |
4949 | } |
4950 | bitset_container_t *ans = bitset_container_from_run(arr: src_1); |
4951 | bool result_is_bitset = bitset_array_container_iandnot(src_1: ans, src_2, dst); |
4952 | return (result_is_bitset ? BITSET_CONTAINER_TYPE_CODE |
4953 | : ARRAY_CONTAINER_TYPE_CODE); |
4954 | } |
4955 | |
4956 | /* Compute the andnot of src_1 and src_2 and write the result to |
4957 | * dst (which has no container initially). It will modify src_1 |
4958 | * to be dst if the result is a bitset. Otherwise, it will |
4959 | * free src_1 and dst will be a new array container. In both |
4960 | * cases, the caller is responsible for deallocating dst. |
4961 | * Returns true iff dst is a bitset */ |
4962 | |
4963 | int run_array_container_iandnot(run_container_t *src_1, |
4964 | const array_container_t *src_2, void **dst) { |
4965 | // dummy implementation same as June 2016 Java |
4966 | int ans = run_array_container_andnot(src_1, src_2, dst); |
4967 | run_container_free(run: src_1); |
4968 | return ans; |
4969 | } |
4970 | |
4971 | /* dst must be a valid array container, allowed to be src_1 */ |
4972 | |
4973 | void array_run_container_andnot(const array_container_t *src_1, |
4974 | const run_container_t *src_2, |
4975 | array_container_t *dst) { |
4976 | // basically following Java impl as of June 2016 |
4977 | if (src_1->cardinality > dst->capacity) { |
4978 | array_container_grow(container: dst, min: src_1->cardinality, false); |
4979 | } |
4980 | |
4981 | if (src_2->n_runs == 0) { |
4982 | memmove(dest: dst->array, src: src_1->array, |
4983 | n: sizeof(uint16_t) * src_1->cardinality); |
4984 | dst->cardinality = src_1->cardinality; |
4985 | return; |
4986 | } |
4987 | int32_t run_start = src_2->runs[0].value; |
4988 | int32_t run_end = run_start + src_2->runs[0].length; |
4989 | int which_run = 0; |
4990 | |
4991 | uint16_t val = 0; |
4992 | int dest_card = 0; |
4993 | for (int i = 0; i < src_1->cardinality; ++i) { |
4994 | val = src_1->array[i]; |
4995 | if (val < run_start) |
4996 | dst->array[dest_card++] = val; |
4997 | else if (val <= run_end) { |
4998 | ; // omitted item |
4999 | } else { |
5000 | do { |
5001 | if (which_run + 1 < src_2->n_runs) { |
5002 | ++which_run; |
5003 | run_start = src_2->runs[which_run].value; |
5004 | run_end = run_start + src_2->runs[which_run].length; |
5005 | |
5006 | } else |
5007 | run_start = run_end = (1 << 16) + 1; |
5008 | } while (val > run_end); |
5009 | --i; |
5010 | } |
5011 | } |
5012 | dst->cardinality = dest_card; |
5013 | } |
5014 | |
5015 | /* dst does not indicate a valid container initially. Eventually it |
5016 | * can become any kind of container. |
5017 | */ |
5018 | |
5019 | void array_run_container_iandnot(array_container_t *src_1, |
5020 | const run_container_t *src_2) { |
5021 | array_run_container_andnot(src_1, src_2, dst: src_1); |
5022 | } |
5023 | |
5024 | /* dst does not indicate a valid container initially. Eventually it |
5025 | * can become any kind of container. |
5026 | */ |
5027 | |
5028 | int run_run_container_andnot(const run_container_t *src_1, |
5029 | const run_container_t *src_2, void **dst) { |
5030 | run_container_t *ans = run_container_create(); |
5031 | run_container_andnot(src_1, src_2, dst: ans); |
5032 | uint8_t typecode_after; |
5033 | *dst = convert_run_to_efficient_container_and_free(c: ans, typecode_after: &typecode_after); |
5034 | return typecode_after; |
5035 | } |
5036 | |
5037 | /* Compute the andnot of src_1 and src_2 and write the result to |
5038 | * dst (which has no container initially). It will modify src_1 |
5039 | * to be dst if the result is a bitset. Otherwise, it will |
5040 | * free src_1 and dst will be a new array container. In both |
5041 | * cases, the caller is responsible for deallocating dst. |
5042 | * Returns true iff dst is a bitset */ |
5043 | |
5044 | int run_run_container_iandnot(run_container_t *src_1, |
5045 | const run_container_t *src_2, void **dst) { |
5046 | // following Java impl as of June 2016 (dummy) |
5047 | int ans = run_run_container_andnot(src_1, src_2, dst); |
5048 | run_container_free(run: src_1); |
5049 | return ans; |
5050 | } |
5051 | |
5052 | /* |
5053 | * dst is a valid array container and may be the same as src_1 |
5054 | */ |
5055 | |
5056 | void array_array_container_andnot(const array_container_t *src_1, |
5057 | const array_container_t *src_2, |
5058 | array_container_t *dst) { |
5059 | array_container_andnot(array_1: src_1, array_2: src_2, out: dst); |
5060 | } |
5061 | |
5062 | /* inplace array-array andnot will always be able to reuse the space of |
5063 | * src_1 */ |
5064 | void array_array_container_iandnot(array_container_t *src_1, |
5065 | const array_container_t *src_2) { |
5066 | array_container_andnot(array_1: src_1, array_2: src_2, out: src_1); |
5067 | } |
5068 | |
5069 | /* Compute the andnot of src_1 and src_2 and write the result to |
5070 | * dst (which has no container initially). Return value is |
5071 | * "dst is a bitset" |
5072 | */ |
5073 | |
5074 | bool bitset_bitset_container_andnot(const bitset_container_t *src_1, |
5075 | const bitset_container_t *src_2, |
5076 | void **dst) { |
5077 | bitset_container_t *ans = bitset_container_create(); |
5078 | int card = bitset_container_andnot(src_1, src_2, dst: ans); |
5079 | if (card <= DEFAULT_MAX_SIZE) { |
5080 | *dst = array_container_from_bitset(bits: ans); |
5081 | bitset_container_free(bitset: ans); |
5082 | return false; // not bitset |
5083 | } else { |
5084 | *dst = ans; |
5085 | return true; |
5086 | } |
5087 | } |
5088 | |
5089 | /* Compute the andnot of src_1 and src_2 and write the result to |
5090 | * dst (which has no container initially). It will modify src_1 |
5091 | * to be dst if the result is a bitset. Otherwise, it will |
5092 | * free src_1 and dst will be a new array container. In both |
5093 | * cases, the caller is responsible for deallocating dst. |
5094 | * Returns true iff dst is a bitset */ |
5095 | |
5096 | bool bitset_bitset_container_iandnot(bitset_container_t *src_1, |
5097 | const bitset_container_t *src_2, |
5098 | void **dst) { |
5099 | int card = bitset_container_andnot(src_1, src_2, dst: src_1); |
5100 | if (card <= DEFAULT_MAX_SIZE) { |
5101 | *dst = array_container_from_bitset(bits: src_1); |
5102 | bitset_container_free(bitset: src_1); |
5103 | return false; // not bitset |
5104 | } else { |
5105 | *dst = src_1; |
5106 | return true; |
5107 | } |
5108 | } |
5109 | /* end file src/containers/mixed_andnot.c */ |
5110 | /* begin file src/containers/mixed_equal.c */ |
5111 | |
5112 | bool array_container_equal_bitset(const array_container_t* container1, |
5113 | const bitset_container_t* container2) { |
5114 | if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { |
5115 | if (container2->cardinality != container1->cardinality) { |
5116 | return false; |
5117 | } |
5118 | } |
5119 | int32_t pos = 0; |
5120 | for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { |
5121 | uint64_t w = container2->array[i]; |
5122 | while (w != 0) { |
5123 | uint64_t t = w & (~w + 1); |
5124 | uint16_t r = i * 64 + __builtin_ctzll(w); |
5125 | if (pos >= container1->cardinality) { |
5126 | return false; |
5127 | } |
5128 | if (container1->array[pos] != r) { |
5129 | return false; |
5130 | } |
5131 | ++pos; |
5132 | w ^= t; |
5133 | } |
5134 | } |
5135 | return (pos == container1->cardinality); |
5136 | } |
5137 | |
5138 | bool run_container_equals_array(const run_container_t* container1, |
5139 | const array_container_t* container2) { |
5140 | if (run_container_cardinality(run: container1) != container2->cardinality) |
5141 | return false; |
5142 | int32_t pos = 0; |
5143 | for (int i = 0; i < container1->n_runs; ++i) { |
5144 | const uint32_t run_start = container1->runs[i].value; |
5145 | const uint32_t le = container1->runs[i].length; |
5146 | |
5147 | if (container2->array[pos] != run_start) { |
5148 | return false; |
5149 | } |
5150 | |
5151 | if (container2->array[pos + le] != run_start + le) { |
5152 | return false; |
5153 | } |
5154 | |
5155 | pos += le + 1; |
5156 | } |
5157 | return true; |
5158 | } |
5159 | |
5160 | bool run_container_equals_bitset(const run_container_t* container1, |
5161 | const bitset_container_t* container2) { |
5162 | |
5163 | int run_card = run_container_cardinality(run: container1); |
5164 | int bitset_card = (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) ? |
5165 | container2->cardinality : |
5166 | bitset_container_compute_cardinality(bitset: container2); |
5167 | if (bitset_card != run_card) { |
5168 | return false; |
5169 | } |
5170 | |
5171 | for (int32_t i = 0; i < container1->n_runs; i++) { |
5172 | uint32_t begin = container1->runs[i].value; |
5173 | if (container1->runs[i].length) { |
5174 | uint32_t end = begin + container1->runs[i].length + 1; |
5175 | if (!bitset_container_contains_range(bitset: container2, pos_start: begin, pos_end: end)) { |
5176 | return false; |
5177 | } |
5178 | } else { |
5179 | if (!bitset_container_contains(bitset: container2, pos: begin)) { |
5180 | return false; |
5181 | } |
5182 | } |
5183 | } |
5184 | |
5185 | return true; |
5186 | } |
5187 | /* end file src/containers/mixed_equal.c */ |
5188 | /* begin file src/containers/mixed_intersection.c */ |
5189 | /* |
5190 | * mixed_intersection.c |
5191 | * |
5192 | */ |
5193 | |
5194 | |
5195 | /* Compute the intersection of src_1 and src_2 and write the result to |
5196 | * dst. */ |
5197 | void array_bitset_container_intersection(const array_container_t *src_1, |
5198 | const bitset_container_t *src_2, |
5199 | array_container_t *dst) { |
5200 | if (dst->capacity < src_1->cardinality) { |
5201 | array_container_grow(container: dst, min: src_1->cardinality, false); |
5202 | } |
5203 | int32_t newcard = 0; // dst could be src_1 |
5204 | const int32_t origcard = src_1->cardinality; |
5205 | for (int i = 0; i < origcard; ++i) { |
5206 | uint16_t key = src_1->array[i]; |
5207 | // this branchless approach is much faster... |
5208 | dst->array[newcard] = key; |
5209 | newcard += bitset_container_contains(bitset: src_2, pos: key); |
5210 | /** |
5211 | * we could do it this way instead... |
5212 | * if (bitset_container_contains(src_2, key)) { |
5213 | * dst->array[newcard++] = key; |
5214 | * } |
5215 | * but if the result is unpredictable, the processor generates |
5216 | * many mispredicted branches. |
5217 | * Difference can be huge (from 3 cycles when predictable all the way |
5218 | * to 16 cycles when unpredictable. |
5219 | * See |
5220 | * https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/extra/bitset/c/arraybitsetintersection.c |
5221 | */ |
5222 | } |
5223 | dst->cardinality = newcard; |
5224 | } |
5225 | |
5226 | /* Compute the size of the intersection of src_1 and src_2. */ |
5227 | int array_bitset_container_intersection_cardinality( |
5228 | const array_container_t *src_1, const bitset_container_t *src_2) { |
5229 | int32_t newcard = 0; |
5230 | const int32_t origcard = src_1->cardinality; |
5231 | for (int i = 0; i < origcard; ++i) { |
5232 | uint16_t key = src_1->array[i]; |
5233 | newcard += bitset_container_contains(bitset: src_2, pos: key); |
5234 | } |
5235 | return newcard; |
5236 | } |
5237 | |
5238 | |
5239 | bool array_bitset_container_intersect(const array_container_t *src_1, |
5240 | const bitset_container_t *src_2) { |
5241 | const int32_t origcard = src_1->cardinality; |
5242 | for (int i = 0; i < origcard; ++i) { |
5243 | uint16_t key = src_1->array[i]; |
5244 | if(bitset_container_contains(bitset: src_2, pos: key)) return true; |
5245 | } |
5246 | return false; |
5247 | } |
5248 | |
5249 | /* Compute the intersection of src_1 and src_2 and write the result to |
5250 | * dst. It is allowed for dst to be equal to src_1. We assume that dst is a |
5251 | * valid container. */ |
5252 | void array_run_container_intersection(const array_container_t *src_1, |
5253 | const run_container_t *src_2, |
5254 | array_container_t *dst) { |
5255 | if (run_container_is_full(run: src_2)) { |
5256 | if (dst != src_1) array_container_copy(src: src_1, dst); |
5257 | return; |
5258 | } |
5259 | if (dst->capacity < src_1->cardinality) { |
5260 | array_container_grow(container: dst, min: src_1->cardinality, false); |
5261 | } |
5262 | if (src_2->n_runs == 0) { |
5263 | return; |
5264 | } |
5265 | int32_t rlepos = 0; |
5266 | int32_t arraypos = 0; |
5267 | rle16_t rle = src_2->runs[rlepos]; |
5268 | int32_t newcard = 0; |
5269 | while (arraypos < src_1->cardinality) { |
5270 | const uint16_t arrayval = src_1->array[arraypos]; |
5271 | while (rle.value + rle.length < |
5272 | arrayval) { // this will frequently be false |
5273 | ++rlepos; |
5274 | if (rlepos == src_2->n_runs) { |
5275 | dst->cardinality = newcard; |
5276 | return; // we are done |
5277 | } |
5278 | rle = src_2->runs[rlepos]; |
5279 | } |
5280 | if (rle.value > arrayval) { |
5281 | arraypos = advanceUntil(array: src_1->array, pos: arraypos, length: src_1->cardinality, |
5282 | min: rle.value); |
5283 | } else { |
5284 | dst->array[newcard] = arrayval; |
5285 | newcard++; |
5286 | arraypos++; |
5287 | } |
5288 | } |
5289 | dst->cardinality = newcard; |
5290 | } |
5291 | |
5292 | /* Compute the intersection of src_1 and src_2 and write the result to |
5293 | * *dst. If the result is true then the result is a bitset_container_t |
5294 | * otherwise is a array_container_t. If *dst == src_2, an in-place processing |
5295 | * is attempted.*/ |
5296 | bool run_bitset_container_intersection(const run_container_t *src_1, |
5297 | const bitset_container_t *src_2, |
5298 | void **dst) { |
5299 | if (run_container_is_full(run: src_1)) { |
5300 | if (*dst != src_2) *dst = bitset_container_clone(src: src_2); |
5301 | return true; |
5302 | } |
5303 | int32_t card = run_container_cardinality(run: src_1); |
5304 | if (card <= DEFAULT_MAX_SIZE) { |
5305 | // result can only be an array (assuming that we never make a |
5306 | // RunContainer) |
5307 | if (card > src_2->cardinality) { |
5308 | card = src_2->cardinality; |
5309 | } |
5310 | array_container_t *answer = array_container_create_given_capacity(size: card); |
5311 | *dst = answer; |
5312 | if (*dst == NULL) { |
5313 | return false; |
5314 | } |
5315 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
5316 | rle16_t rle = src_1->runs[rlepos]; |
5317 | uint32_t endofrun = (uint32_t)rle.value + rle.length; |
5318 | for (uint32_t runValue = rle.value; runValue <= endofrun; |
5319 | ++runValue) { |
5320 | answer->array[answer->cardinality] = (uint16_t)runValue; |
5321 | answer->cardinality += |
5322 | bitset_container_contains(bitset: src_2, pos: runValue); |
5323 | } |
5324 | } |
5325 | return false; |
5326 | } |
5327 | if (*dst == src_2) { // we attempt in-place |
5328 | bitset_container_t *answer = (bitset_container_t *)*dst; |
5329 | uint32_t start = 0; |
5330 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
5331 | const rle16_t rle = src_1->runs[rlepos]; |
5332 | uint32_t end = rle.value; |
5333 | bitset_reset_range(bitmap: src_2->array, start, end); |
5334 | |
5335 | start = end + rle.length + 1; |
5336 | } |
5337 | bitset_reset_range(bitmap: src_2->array, start, UINT32_C(1) << 16); |
5338 | answer->cardinality = bitset_container_compute_cardinality(bitset: answer); |
5339 | if (src_2->cardinality > DEFAULT_MAX_SIZE) { |
5340 | return true; |
5341 | } else { |
5342 | array_container_t *newanswer = array_container_from_bitset(bits: src_2); |
5343 | if (newanswer == NULL) { |
5344 | *dst = NULL; |
5345 | return false; |
5346 | } |
5347 | *dst = newanswer; |
5348 | return false; |
5349 | } |
5350 | } else { // no inplace |
5351 | // we expect the answer to be a bitmap (if we are lucky) |
5352 | bitset_container_t *answer = bitset_container_clone(src: src_2); |
5353 | |
5354 | *dst = answer; |
5355 | if (answer == NULL) { |
5356 | return true; |
5357 | } |
5358 | uint32_t start = 0; |
5359 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
5360 | const rle16_t rle = src_1->runs[rlepos]; |
5361 | uint32_t end = rle.value; |
5362 | bitset_reset_range(bitmap: answer->array, start, end); |
5363 | start = end + rle.length + 1; |
5364 | } |
5365 | bitset_reset_range(bitmap: answer->array, start, UINT32_C(1) << 16); |
5366 | answer->cardinality = bitset_container_compute_cardinality(bitset: answer); |
5367 | |
5368 | if (answer->cardinality > DEFAULT_MAX_SIZE) { |
5369 | return true; |
5370 | } else { |
5371 | array_container_t *newanswer = array_container_from_bitset(bits: answer); |
5372 | bitset_container_free(bitset: (bitset_container_t *)*dst); |
5373 | if (newanswer == NULL) { |
5374 | *dst = NULL; |
5375 | return false; |
5376 | } |
5377 | *dst = newanswer; |
5378 | return false; |
5379 | } |
5380 | } |
5381 | } |
5382 | |
5383 | /* Compute the size of the intersection between src_1 and src_2 . */ |
5384 | int array_run_container_intersection_cardinality(const array_container_t *src_1, |
5385 | const run_container_t *src_2) { |
5386 | if (run_container_is_full(run: src_2)) { |
5387 | return src_1->cardinality; |
5388 | } |
5389 | if (src_2->n_runs == 0) { |
5390 | return 0; |
5391 | } |
5392 | int32_t rlepos = 0; |
5393 | int32_t arraypos = 0; |
5394 | rle16_t rle = src_2->runs[rlepos]; |
5395 | int32_t newcard = 0; |
5396 | while (arraypos < src_1->cardinality) { |
5397 | const uint16_t arrayval = src_1->array[arraypos]; |
5398 | while (rle.value + rle.length < |
5399 | arrayval) { // this will frequently be false |
5400 | ++rlepos; |
5401 | if (rlepos == src_2->n_runs) { |
5402 | return newcard; // we are done |
5403 | } |
5404 | rle = src_2->runs[rlepos]; |
5405 | } |
5406 | if (rle.value > arrayval) { |
5407 | arraypos = advanceUntil(array: src_1->array, pos: arraypos, length: src_1->cardinality, |
5408 | min: rle.value); |
5409 | } else { |
5410 | newcard++; |
5411 | arraypos++; |
5412 | } |
5413 | } |
5414 | return newcard; |
5415 | } |
5416 | |
5417 | /* Compute the intersection between src_1 and src_2 |
5418 | **/ |
5419 | int run_bitset_container_intersection_cardinality( |
5420 | const run_container_t *src_1, const bitset_container_t *src_2) { |
5421 | if (run_container_is_full(run: src_1)) { |
5422 | return bitset_container_cardinality(bitset: src_2); |
5423 | } |
5424 | int answer = 0; |
5425 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
5426 | rle16_t rle = src_1->runs[rlepos]; |
5427 | answer += |
5428 | bitset_lenrange_cardinality(bitmap: src_2->array, start: rle.value, lenminusone: rle.length); |
5429 | } |
5430 | return answer; |
5431 | } |
5432 | |
5433 | |
5434 | bool array_run_container_intersect(const array_container_t *src_1, |
5435 | const run_container_t *src_2) { |
5436 | if( run_container_is_full(run: src_2) ) { |
5437 | return !array_container_empty(array: src_1); |
5438 | } |
5439 | if (src_2->n_runs == 0) { |
5440 | return false; |
5441 | } |
5442 | int32_t rlepos = 0; |
5443 | int32_t arraypos = 0; |
5444 | rle16_t rle = src_2->runs[rlepos]; |
5445 | while (arraypos < src_1->cardinality) { |
5446 | const uint16_t arrayval = src_1->array[arraypos]; |
5447 | while (rle.value + rle.length < |
5448 | arrayval) { // this will frequently be false |
5449 | ++rlepos; |
5450 | if (rlepos == src_2->n_runs) { |
5451 | return false; // we are done |
5452 | } |
5453 | rle = src_2->runs[rlepos]; |
5454 | } |
5455 | if (rle.value > arrayval) { |
5456 | arraypos = advanceUntil(array: src_1->array, pos: arraypos, length: src_1->cardinality, |
5457 | min: rle.value); |
5458 | } else { |
5459 | return true; |
5460 | } |
5461 | } |
5462 | return false; |
5463 | } |
5464 | |
5465 | /* Compute the intersection between src_1 and src_2 |
5466 | **/ |
5467 | bool run_bitset_container_intersect(const run_container_t *src_1, |
5468 | const bitset_container_t *src_2) { |
5469 | if( run_container_is_full(run: src_1) ) { |
5470 | return !bitset_container_empty(bitset: src_2); |
5471 | } |
5472 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
5473 | rle16_t rle = src_1->runs[rlepos]; |
5474 | if(!bitset_lenrange_empty(bitmap: src_2->array, start: rle.value,lenminusone: rle.length)) return true; |
5475 | } |
5476 | return false; |
5477 | } |
5478 | |
5479 | /* |
5480 | * Compute the intersection between src_1 and src_2 and write the result |
5481 | * to *dst. If the return function is true, the result is a bitset_container_t |
5482 | * otherwise is a array_container_t. |
5483 | */ |
5484 | bool bitset_bitset_container_intersection(const bitset_container_t *src_1, |
5485 | const bitset_container_t *src_2, |
5486 | void **dst) { |
5487 | const int newCardinality = bitset_container_and_justcard(src_1, src_2); |
5488 | if (newCardinality > DEFAULT_MAX_SIZE) { |
5489 | *dst = bitset_container_create(); |
5490 | if (*dst != NULL) { |
5491 | bitset_container_and_nocard(src_1, src_2, |
5492 | dst: (bitset_container_t *)*dst); |
5493 | ((bitset_container_t *)*dst)->cardinality = newCardinality; |
5494 | } |
5495 | return true; // it is a bitset |
5496 | } |
5497 | *dst = array_container_create_given_capacity(size: newCardinality); |
5498 | if (*dst != NULL) { |
5499 | ((array_container_t *)*dst)->cardinality = newCardinality; |
5500 | bitset_extract_intersection_setbits_uint16( |
5501 | bitset1: ((const bitset_container_t *)src_1)->array, |
5502 | bitset2: ((const bitset_container_t *)src_2)->array, |
5503 | length: BITSET_CONTAINER_SIZE_IN_WORDS, out: ((array_container_t *)*dst)->array, |
5504 | base: 0); |
5505 | } |
5506 | return false; // not a bitset |
5507 | } |
5508 | |
5509 | bool bitset_bitset_container_intersection_inplace( |
5510 | bitset_container_t *src_1, const bitset_container_t *src_2, void **dst) { |
5511 | const int newCardinality = bitset_container_and_justcard(src_1, src_2); |
5512 | if (newCardinality > DEFAULT_MAX_SIZE) { |
5513 | *dst = src_1; |
5514 | bitset_container_and_nocard(src_1, src_2, dst: src_1); |
5515 | ((bitset_container_t *)*dst)->cardinality = newCardinality; |
5516 | return true; // it is a bitset |
5517 | } |
5518 | *dst = array_container_create_given_capacity(size: newCardinality); |
5519 | if (*dst != NULL) { |
5520 | ((array_container_t *)*dst)->cardinality = newCardinality; |
5521 | bitset_extract_intersection_setbits_uint16( |
5522 | bitset1: ((const bitset_container_t *)src_1)->array, |
5523 | bitset2: ((const bitset_container_t *)src_2)->array, |
5524 | length: BITSET_CONTAINER_SIZE_IN_WORDS, out: ((array_container_t *)*dst)->array, |
5525 | base: 0); |
5526 | } |
5527 | return false; // not a bitset |
5528 | } |
5529 | /* end file src/containers/mixed_intersection.c */ |
5530 | /* begin file src/containers/mixed_negation.c */ |
5531 | /* |
5532 | * mixed_negation.c |
5533 | * |
5534 | */ |
5535 | |
5536 | #include <assert.h> |
5537 | #include <string.h> |
5538 | |
5539 | |
5540 | // TODO: make simplified and optimized negation code across |
5541 | // the full range. |
5542 | |
5543 | /* Negation across the entire range of the container. |
5544 | * Compute the negation of src and write the result |
5545 | * to *dst. The complement of a |
5546 | * sufficiently sparse set will always be dense and a hence a bitmap |
5547 | ' * We assume that dst is pre-allocated and a valid bitset container |
5548 | * There can be no in-place version. |
5549 | */ |
5550 | void array_container_negation(const array_container_t *src, |
5551 | bitset_container_t *dst) { |
5552 | uint64_t card = UINT64_C(1 << 16); |
5553 | bitset_container_set_all(bitset: dst); |
5554 | |
5555 | dst->cardinality = (int32_t)bitset_clear_list(bitset: dst->array, card, list: src->array, |
5556 | length: (uint64_t)src->cardinality); |
5557 | } |
5558 | |
5559 | /* Negation across the entire range of the container |
5560 | * Compute the negation of src and write the result |
5561 | * to *dst. A true return value indicates a bitset result, |
5562 | * otherwise the result is an array container. |
5563 | * We assume that dst is not pre-allocated. In |
5564 | * case of failure, *dst will be NULL. |
5565 | */ |
5566 | bool bitset_container_negation(const bitset_container_t *src, void **dst) { |
5567 | return bitset_container_negation_range(src, range_start: 0, range_end: (1 << 16), dst); |
5568 | } |
5569 | |
5570 | /* inplace version */ |
5571 | /* |
5572 | * Same as bitset_container_negation except that if the output is to |
5573 | * be a |
5574 | * bitset_container_t, then src is modified and no allocation is made. |
5575 | * If the output is to be an array_container_t, then caller is responsible |
5576 | * to free the container. |
5577 | * In all cases, the result is in *dst. |
5578 | */ |
5579 | bool bitset_container_negation_inplace(bitset_container_t *src, void **dst) { |
5580 | return bitset_container_negation_range_inplace(src, range_start: 0, range_end: (1 << 16), dst); |
5581 | } |
5582 | |
5583 | /* Negation across the entire range of container |
5584 | * Compute the negation of src and write the result |
5585 | * to *dst. Return values are the *_TYPECODES as defined * in containers.h |
5586 | * We assume that dst is not pre-allocated. In |
5587 | * case of failure, *dst will be NULL. |
5588 | */ |
5589 | int run_container_negation(const run_container_t *src, void **dst) { |
5590 | return run_container_negation_range(src, range_start: 0, range_end: (1 << 16), dst); |
5591 | } |
5592 | |
5593 | /* |
5594 | * Same as run_container_negation except that if the output is to |
5595 | * be a |
5596 | * run_container_t, and has the capacity to hold the result, |
5597 | * then src is modified and no allocation is made. |
5598 | * In all cases, the result is in *dst. |
5599 | */ |
5600 | int run_container_negation_inplace(run_container_t *src, void **dst) { |
5601 | return run_container_negation_range_inplace(src, range_start: 0, range_end: (1 << 16), dst); |
5602 | } |
5603 | |
5604 | /* Negation across a range of the container. |
5605 | * Compute the negation of src and write the result |
5606 | * to *dst. Returns true if the result is a bitset container |
5607 | * and false for an array container. *dst is not preallocated. |
5608 | */ |
5609 | bool array_container_negation_range(const array_container_t *src, |
5610 | const int range_start, const int range_end, |
5611 | void **dst) { |
5612 | /* close port of the Java implementation */ |
5613 | if (range_start >= range_end) { |
5614 | *dst = array_container_clone(src); |
5615 | return false; |
5616 | } |
5617 | |
5618 | int32_t start_index = |
5619 | binarySearch(array: src->array, lenarray: src->cardinality, ikey: (uint16_t)range_start); |
5620 | if (start_index < 0) start_index = -start_index - 1; |
5621 | |
5622 | int32_t last_index = |
5623 | binarySearch(array: src->array, lenarray: src->cardinality, ikey: (uint16_t)(range_end - 1)); |
5624 | if (last_index < 0) last_index = -last_index - 2; |
5625 | |
5626 | const int32_t current_values_in_range = last_index - start_index + 1; |
5627 | const int32_t span_to_be_flipped = range_end - range_start; |
5628 | const int32_t new_values_in_range = |
5629 | span_to_be_flipped - current_values_in_range; |
5630 | const int32_t cardinality_change = |
5631 | new_values_in_range - current_values_in_range; |
5632 | const int32_t new_cardinality = src->cardinality + cardinality_change; |
5633 | |
5634 | if (new_cardinality > DEFAULT_MAX_SIZE) { |
5635 | bitset_container_t *temp = bitset_container_from_array(a: src); |
5636 | bitset_flip_range(bitmap: temp->array, start: (uint32_t)range_start, |
5637 | end: (uint32_t)range_end); |
5638 | temp->cardinality = new_cardinality; |
5639 | *dst = temp; |
5640 | return true; |
5641 | } |
5642 | |
5643 | array_container_t *arr = |
5644 | array_container_create_given_capacity(size: new_cardinality); |
5645 | *dst = (void *)arr; |
5646 | if(new_cardinality == 0) { |
5647 | arr->cardinality = new_cardinality; |
5648 | return false; // we are done. |
5649 | } |
5650 | // copy stuff before the active area |
5651 | memcpy(dest: arr->array, src: src->array, n: start_index * sizeof(uint16_t)); |
5652 | |
5653 | // work on the range |
5654 | int32_t out_pos = start_index, in_pos = start_index; |
5655 | int32_t val_in_range = range_start; |
5656 | for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) { |
5657 | if ((uint16_t)val_in_range != src->array[in_pos]) { |
5658 | arr->array[out_pos++] = (uint16_t)val_in_range; |
5659 | } else { |
5660 | ++in_pos; |
5661 | } |
5662 | } |
5663 | for (; val_in_range < range_end; ++val_in_range) |
5664 | arr->array[out_pos++] = (uint16_t)val_in_range; |
5665 | |
5666 | // content after the active range |
5667 | memcpy(dest: arr->array + out_pos, src: src->array + (last_index + 1), |
5668 | n: (src->cardinality - (last_index + 1)) * sizeof(uint16_t)); |
5669 | arr->cardinality = new_cardinality; |
5670 | return false; |
5671 | } |
5672 | |
5673 | /* Even when the result would fit, it is unclear how to make an |
5674 | * inplace version without inefficient copying. |
5675 | */ |
5676 | |
5677 | bool array_container_negation_range_inplace(array_container_t *src, |
5678 | const int range_start, |
5679 | const int range_end, void **dst) { |
5680 | bool ans = array_container_negation_range(src, range_start, range_end, dst); |
5681 | // TODO : try a real inplace version |
5682 | array_container_free(arr: src); |
5683 | return ans; |
5684 | } |
5685 | |
5686 | /* Negation across a range of the container |
5687 | * Compute the negation of src and write the result |
5688 | * to *dst. A true return value indicates a bitset result, |
5689 | * otherwise the result is an array container. |
5690 | * We assume that dst is not pre-allocated. In |
5691 | * case of failure, *dst will be NULL. |
5692 | */ |
5693 | bool bitset_container_negation_range(const bitset_container_t *src, |
5694 | const int range_start, const int range_end, |
5695 | void **dst) { |
5696 | // TODO maybe consider density-based estimate |
5697 | // and sometimes build result directly as array, with |
5698 | // conversion back to bitset if wrong. Or determine |
5699 | // actual result cardinality, then go directly for the known final cont. |
5700 | |
5701 | // keep computation using bitsets as long as possible. |
5702 | bitset_container_t *t = bitset_container_clone(src); |
5703 | bitset_flip_range(bitmap: t->array, start: (uint32_t)range_start, end: (uint32_t)range_end); |
5704 | t->cardinality = bitset_container_compute_cardinality(bitset: t); |
5705 | |
5706 | if (t->cardinality > DEFAULT_MAX_SIZE) { |
5707 | *dst = t; |
5708 | return true; |
5709 | } else { |
5710 | *dst = array_container_from_bitset(bits: t); |
5711 | bitset_container_free(bitset: t); |
5712 | return false; |
5713 | } |
5714 | } |
5715 | |
5716 | /* inplace version */ |
5717 | /* |
5718 | * Same as bitset_container_negation except that if the output is to |
5719 | * be a |
5720 | * bitset_container_t, then src is modified and no allocation is made. |
5721 | * If the output is to be an array_container_t, then caller is responsible |
5722 | * to free the container. |
5723 | * In all cases, the result is in *dst. |
5724 | */ |
5725 | bool bitset_container_negation_range_inplace(bitset_container_t *src, |
5726 | const int range_start, |
5727 | const int range_end, void **dst) { |
5728 | bitset_flip_range(bitmap: src->array, start: (uint32_t)range_start, end: (uint32_t)range_end); |
5729 | src->cardinality = bitset_container_compute_cardinality(bitset: src); |
5730 | if (src->cardinality > DEFAULT_MAX_SIZE) { |
5731 | *dst = src; |
5732 | return true; |
5733 | } |
5734 | *dst = array_container_from_bitset(bits: src); |
5735 | bitset_container_free(bitset: src); |
5736 | return false; |
5737 | } |
5738 | |
5739 | /* Negation across a range of container |
5740 | * Compute the negation of src and write the result |
5741 | * to *dst. Return values are the *_TYPECODES as defined * in containers.h |
5742 | * We assume that dst is not pre-allocated. In |
5743 | * case of failure, *dst will be NULL. |
5744 | */ |
5745 | int run_container_negation_range(const run_container_t *src, |
5746 | const int range_start, const int range_end, |
5747 | void **dst) { |
5748 | uint8_t return_typecode; |
5749 | |
5750 | // follows the Java implementation |
5751 | if (range_end <= range_start) { |
5752 | *dst = run_container_clone(src); |
5753 | return RUN_CONTAINER_TYPE_CODE; |
5754 | } |
5755 | |
5756 | run_container_t *ans = run_container_create_given_capacity( |
5757 | size: src->n_runs + 1); // src->n_runs + 1); |
5758 | int k = 0; |
5759 | for (; k < src->n_runs && src->runs[k].value < range_start; ++k) { |
5760 | ans->runs[k] = src->runs[k]; |
5761 | ans->n_runs++; |
5762 | } |
5763 | |
5764 | run_container_smart_append_exclusive( |
5765 | src: ans, start: (uint16_t)range_start, length: (uint16_t)(range_end - range_start - 1)); |
5766 | |
5767 | for (; k < src->n_runs; ++k) { |
5768 | run_container_smart_append_exclusive(src: ans, start: src->runs[k].value, |
5769 | length: src->runs[k].length); |
5770 | } |
5771 | |
5772 | *dst = convert_run_to_efficient_container(c: ans, typecode_after: &return_typecode); |
5773 | if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(run: ans); |
5774 | |
5775 | return return_typecode; |
5776 | } |
5777 | |
5778 | /* |
5779 | * Same as run_container_negation except that if the output is to |
5780 | * be a |
5781 | * run_container_t, and has the capacity to hold the result, |
5782 | * then src is modified and no allocation is made. |
5783 | * In all cases, the result is in *dst. |
5784 | */ |
5785 | int run_container_negation_range_inplace(run_container_t *src, |
5786 | const int range_start, |
5787 | const int range_end, void **dst) { |
5788 | uint8_t return_typecode; |
5789 | |
5790 | if (range_end <= range_start) { |
5791 | *dst = src; |
5792 | return RUN_CONTAINER_TYPE_CODE; |
5793 | } |
5794 | |
5795 | // TODO: efficient special case when range is 0 to 65535 inclusive |
5796 | |
5797 | if (src->capacity == src->n_runs) { |
5798 | // no excess room. More checking to see if result can fit |
5799 | bool last_val_before_range = false; |
5800 | bool first_val_in_range = false; |
5801 | bool last_val_in_range = false; |
5802 | bool first_val_past_range = false; |
5803 | |
5804 | if (range_start > 0) |
5805 | last_val_before_range = |
5806 | run_container_contains(run: src, pos: (uint16_t)(range_start - 1)); |
5807 | first_val_in_range = run_container_contains(run: src, pos: (uint16_t)range_start); |
5808 | |
5809 | if (last_val_before_range == first_val_in_range) { |
5810 | last_val_in_range = |
5811 | run_container_contains(run: src, pos: (uint16_t)(range_end - 1)); |
5812 | if (range_end != 0x10000) |
5813 | first_val_past_range = |
5814 | run_container_contains(run: src, pos: (uint16_t)range_end); |
5815 | |
5816 | if (last_val_in_range == |
5817 | first_val_past_range) { // no space for inplace |
5818 | int ans = run_container_negation_range(src, range_start, |
5819 | range_end, dst); |
5820 | run_container_free(run: src); |
5821 | return ans; |
5822 | } |
5823 | } |
5824 | } |
5825 | // all other cases: result will fit |
5826 | |
5827 | run_container_t *ans = src; |
5828 | int my_nbr_runs = src->n_runs; |
5829 | |
5830 | ans->n_runs = 0; |
5831 | int k = 0; |
5832 | for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) { |
5833 | // ans->runs[k] = src->runs[k]; (would be self-copy) |
5834 | ans->n_runs++; |
5835 | } |
5836 | |
5837 | // as with Java implementation, use locals to give self a buffer of depth 1 |
5838 | rle16_t buffered = (rle16_t){.value = (uint16_t)0, .length = (uint16_t)0}; |
5839 | rle16_t next = buffered; |
5840 | if (k < my_nbr_runs) buffered = src->runs[k]; |
5841 | |
5842 | run_container_smart_append_exclusive( |
5843 | src: ans, start: (uint16_t)range_start, length: (uint16_t)(range_end - range_start - 1)); |
5844 | |
5845 | for (; k < my_nbr_runs; ++k) { |
5846 | if (k + 1 < my_nbr_runs) next = src->runs[k + 1]; |
5847 | |
5848 | run_container_smart_append_exclusive(src: ans, start: buffered.value, |
5849 | length: buffered.length); |
5850 | buffered = next; |
5851 | } |
5852 | |
5853 | *dst = convert_run_to_efficient_container(c: ans, typecode_after: &return_typecode); |
5854 | if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(run: ans); |
5855 | |
5856 | return return_typecode; |
5857 | } |
5858 | /* end file src/containers/mixed_negation.c */ |
5859 | /* begin file src/containers/mixed_subset.c */ |
5860 | |
5861 | bool array_container_is_subset_bitset(const array_container_t* container1, |
5862 | const bitset_container_t* container2) { |
5863 | if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { |
5864 | if (container2->cardinality < container1->cardinality) { |
5865 | return false; |
5866 | } |
5867 | } |
5868 | for (int i = 0; i < container1->cardinality; ++i) { |
5869 | if (!bitset_container_contains(bitset: container2, pos: container1->array[i])) { |
5870 | return false; |
5871 | } |
5872 | } |
5873 | return true; |
5874 | } |
5875 | |
5876 | bool run_container_is_subset_array(const run_container_t* container1, |
5877 | const array_container_t* container2) { |
5878 | if (run_container_cardinality(run: container1) > container2->cardinality) |
5879 | return false; |
5880 | int32_t start_pos = -1, stop_pos = -1; |
5881 | for (int i = 0; i < container1->n_runs; ++i) { |
5882 | int32_t start = container1->runs[i].value; |
5883 | int32_t stop = start + container1->runs[i].length; |
5884 | start_pos = advanceUntil(array: container2->array, pos: stop_pos, |
5885 | length: container2->cardinality, min: start); |
5886 | stop_pos = advanceUntil(array: container2->array, pos: stop_pos, |
5887 | length: container2->cardinality, min: stop); |
5888 | if (start_pos == container2->cardinality) { |
5889 | return false; |
5890 | } else if (stop_pos - start_pos != stop - start || |
5891 | container2->array[start_pos] != start || |
5892 | container2->array[stop_pos] != stop) { |
5893 | return false; |
5894 | } |
5895 | } |
5896 | return true; |
5897 | } |
5898 | |
5899 | bool array_container_is_subset_run(const array_container_t* container1, |
5900 | const run_container_t* container2) { |
5901 | if (container1->cardinality > run_container_cardinality(run: container2)) |
5902 | return false; |
5903 | int i_array = 0, i_run = 0; |
5904 | while (i_array < container1->cardinality && i_run < container2->n_runs) { |
5905 | uint32_t start = container2->runs[i_run].value; |
5906 | uint32_t stop = start + container2->runs[i_run].length; |
5907 | if (container1->array[i_array] < start) { |
5908 | return false; |
5909 | } else if (container1->array[i_array] > stop) { |
5910 | i_run++; |
5911 | } else { // the value of the array is in the run |
5912 | i_array++; |
5913 | } |
5914 | } |
5915 | if (i_array == container1->cardinality) { |
5916 | return true; |
5917 | } else { |
5918 | return false; |
5919 | } |
5920 | } |
5921 | |
5922 | bool run_container_is_subset_bitset(const run_container_t* container1, |
5923 | const bitset_container_t* container2) { |
5924 | // todo: this code could be much faster |
5925 | if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { |
5926 | if (container2->cardinality < run_container_cardinality(run: container1)) { |
5927 | return false; |
5928 | } |
5929 | } else { |
5930 | int32_t card = bitset_container_compute_cardinality( |
5931 | bitset: container2); // modify container2? |
5932 | if (card < run_container_cardinality(run: container1)) { |
5933 | return false; |
5934 | } |
5935 | } |
5936 | for (int i = 0; i < container1->n_runs; ++i) { |
5937 | uint32_t run_start = container1->runs[i].value; |
5938 | uint32_t le = container1->runs[i].length; |
5939 | for (uint32_t j = run_start; j <= run_start + le; ++j) { |
5940 | if (!bitset_container_contains(bitset: container2, pos: j)) { |
5941 | return false; |
5942 | } |
5943 | } |
5944 | } |
5945 | return true; |
5946 | } |
5947 | |
5948 | bool bitset_container_is_subset_run(const bitset_container_t* container1, |
5949 | const run_container_t* container2) { |
5950 | // todo: this code could be much faster |
5951 | if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) { |
5952 | if (container1->cardinality > run_container_cardinality(run: container2)) { |
5953 | return false; |
5954 | } |
5955 | } |
5956 | int32_t i_bitset = 0, i_run = 0; |
5957 | while (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS && |
5958 | i_run < container2->n_runs) { |
5959 | uint64_t w = container1->array[i_bitset]; |
5960 | while (w != 0 && i_run < container2->n_runs) { |
5961 | uint32_t start = container2->runs[i_run].value; |
5962 | uint32_t stop = start + container2->runs[i_run].length; |
5963 | uint64_t t = w & (~w + 1); |
5964 | uint16_t r = i_bitset * 64 + __builtin_ctzll(w); |
5965 | if (r < start) { |
5966 | return false; |
5967 | } else if (r > stop) { |
5968 | i_run++; |
5969 | continue; |
5970 | } else { |
5971 | w ^= t; |
5972 | } |
5973 | } |
5974 | if (w == 0) { |
5975 | i_bitset++; |
5976 | } else { |
5977 | return false; |
5978 | } |
5979 | } |
5980 | if (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS) { |
5981 | // terminated iterating on the run containers, check that rest of bitset |
5982 | // is empty |
5983 | for (; i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS; i_bitset++) { |
5984 | if (container1->array[i_bitset] != 0) { |
5985 | return false; |
5986 | } |
5987 | } |
5988 | } |
5989 | return true; |
5990 | } |
5991 | /* end file src/containers/mixed_subset.c */ |
5992 | /* begin file src/containers/mixed_union.c */ |
5993 | /* |
5994 | * mixed_union.c |
5995 | * |
5996 | */ |
5997 | |
5998 | #include <assert.h> |
5999 | #include <string.h> |
6000 | |
6001 | |
6002 | /* Compute the union of src_1 and src_2 and write the result to |
6003 | * dst. */ |
6004 | void array_bitset_container_union(const array_container_t *src_1, |
6005 | const bitset_container_t *src_2, |
6006 | bitset_container_t *dst) { |
6007 | if (src_2 != dst) bitset_container_copy(source: src_2, dest: dst); |
6008 | dst->cardinality = (int32_t)bitset_set_list_withcard( |
6009 | bitset: dst->array, card: dst->cardinality, list: src_1->array, length: src_1->cardinality); |
6010 | } |
6011 | |
6012 | /* Compute the union of src_1 and src_2 and write the result to |
6013 | * dst. It is allowed for src_2 to be dst. This version does not |
6014 | * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ |
6015 | void array_bitset_container_lazy_union(const array_container_t *src_1, |
6016 | const bitset_container_t *src_2, |
6017 | bitset_container_t *dst) { |
6018 | if (src_2 != dst) bitset_container_copy(source: src_2, dest: dst); |
6019 | bitset_set_list(bitset: dst->array, list: src_1->array, length: src_1->cardinality); |
6020 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
6021 | } |
6022 | |
6023 | void run_bitset_container_union(const run_container_t *src_1, |
6024 | const bitset_container_t *src_2, |
6025 | bitset_container_t *dst) { |
6026 | assert(!run_container_is_full(src_1)); // catch this case upstream |
6027 | if (src_2 != dst) bitset_container_copy(source: src_2, dest: dst); |
6028 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
6029 | rle16_t rle = src_1->runs[rlepos]; |
6030 | bitset_set_lenrange(bitmap: dst->array, start: rle.value, lenminusone: rle.length); |
6031 | } |
6032 | dst->cardinality = bitset_container_compute_cardinality(bitset: dst); |
6033 | } |
6034 | |
6035 | void run_bitset_container_lazy_union(const run_container_t *src_1, |
6036 | const bitset_container_t *src_2, |
6037 | bitset_container_t *dst) { |
6038 | assert(!run_container_is_full(src_1)); // catch this case upstream |
6039 | if (src_2 != dst) bitset_container_copy(source: src_2, dest: dst); |
6040 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
6041 | rle16_t rle = src_1->runs[rlepos]; |
6042 | bitset_set_lenrange(bitmap: dst->array, start: rle.value, lenminusone: rle.length); |
6043 | } |
6044 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
6045 | } |
6046 | |
6047 | // why do we leave the result as a run container?? |
6048 | void array_run_container_union(const array_container_t *src_1, |
6049 | const run_container_t *src_2, |
6050 | run_container_t *dst) { |
6051 | if (run_container_is_full(run: src_2)) { |
6052 | run_container_copy(src: src_2, dst); |
6053 | return; |
6054 | } |
6055 | // TODO: see whether the "2*" is spurious |
6056 | run_container_grow(run: dst, min: 2 * (src_1->cardinality + src_2->n_runs), false); |
6057 | int32_t rlepos = 0; |
6058 | int32_t arraypos = 0; |
6059 | rle16_t previousrle; |
6060 | if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { |
6061 | previousrle = run_container_append_first(run: dst, vl: src_2->runs[rlepos]); |
6062 | rlepos++; |
6063 | } else { |
6064 | previousrle = |
6065 | run_container_append_value_first(run: dst, val: src_1->array[arraypos]); |
6066 | arraypos++; |
6067 | } |
6068 | while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { |
6069 | if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { |
6070 | run_container_append(run: dst, vl: src_2->runs[rlepos], previousrl: &previousrle); |
6071 | rlepos++; |
6072 | } else { |
6073 | run_container_append_value(run: dst, val: src_1->array[arraypos], |
6074 | previousrl: &previousrle); |
6075 | arraypos++; |
6076 | } |
6077 | } |
6078 | if (arraypos < src_1->cardinality) { |
6079 | while (arraypos < src_1->cardinality) { |
6080 | run_container_append_value(run: dst, val: src_1->array[arraypos], |
6081 | previousrl: &previousrle); |
6082 | arraypos++; |
6083 | } |
6084 | } else { |
6085 | while (rlepos < src_2->n_runs) { |
6086 | run_container_append(run: dst, vl: src_2->runs[rlepos], previousrl: &previousrle); |
6087 | rlepos++; |
6088 | } |
6089 | } |
6090 | } |
6091 | |
6092 | void array_run_container_inplace_union(const array_container_t *src_1, |
6093 | run_container_t *src_2) { |
6094 | if (run_container_is_full(run: src_2)) { |
6095 | return; |
6096 | } |
6097 | const int32_t maxoutput = src_1->cardinality + src_2->n_runs; |
6098 | const int32_t neededcapacity = maxoutput + src_2->n_runs; |
6099 | if (src_2->capacity < neededcapacity) |
6100 | run_container_grow(run: src_2, min: neededcapacity, true); |
6101 | memmove(dest: src_2->runs + maxoutput, src: src_2->runs, |
6102 | n: src_2->n_runs * sizeof(rle16_t)); |
6103 | rle16_t *inputsrc2 = src_2->runs + maxoutput; |
6104 | int32_t rlepos = 0; |
6105 | int32_t arraypos = 0; |
6106 | int src2nruns = src_2->n_runs; |
6107 | src_2->n_runs = 0; |
6108 | |
6109 | rle16_t previousrle; |
6110 | |
6111 | if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { |
6112 | previousrle = run_container_append_first(run: src_2, vl: inputsrc2[rlepos]); |
6113 | rlepos++; |
6114 | } else { |
6115 | previousrle = |
6116 | run_container_append_value_first(run: src_2, val: src_1->array[arraypos]); |
6117 | arraypos++; |
6118 | } |
6119 | |
6120 | while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) { |
6121 | if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { |
6122 | run_container_append(run: src_2, vl: inputsrc2[rlepos], previousrl: &previousrle); |
6123 | rlepos++; |
6124 | } else { |
6125 | run_container_append_value(run: src_2, val: src_1->array[arraypos], |
6126 | previousrl: &previousrle); |
6127 | arraypos++; |
6128 | } |
6129 | } |
6130 | if (arraypos < src_1->cardinality) { |
6131 | while (arraypos < src_1->cardinality) { |
6132 | run_container_append_value(run: src_2, val: src_1->array[arraypos], |
6133 | previousrl: &previousrle); |
6134 | arraypos++; |
6135 | } |
6136 | } else { |
6137 | while (rlepos < src2nruns) { |
6138 | run_container_append(run: src_2, vl: inputsrc2[rlepos], previousrl: &previousrle); |
6139 | rlepos++; |
6140 | } |
6141 | } |
6142 | } |
6143 | |
6144 | bool array_array_container_union(const array_container_t *src_1, |
6145 | const array_container_t *src_2, void **dst) { |
6146 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
6147 | if (totalCardinality <= DEFAULT_MAX_SIZE) { |
6148 | *dst = array_container_create_given_capacity(size: totalCardinality); |
6149 | if (*dst != NULL) { |
6150 | array_container_union(array_1: src_1, array_2: src_2, out: (array_container_t *)*dst); |
6151 | } else { |
6152 | return true; // otherwise failure won't be caught |
6153 | } |
6154 | return false; // not a bitset |
6155 | } |
6156 | *dst = bitset_container_create(); |
6157 | bool returnval = true; // expect a bitset |
6158 | if (*dst != NULL) { |
6159 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
6160 | bitset_set_list(bitset: ourbitset->array, list: src_1->array, length: src_1->cardinality); |
6161 | ourbitset->cardinality = (int32_t)bitset_set_list_withcard( |
6162 | bitset: ourbitset->array, card: src_1->cardinality, list: src_2->array, |
6163 | length: src_2->cardinality); |
6164 | if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { |
6165 | // need to convert! |
6166 | *dst = array_container_from_bitset(bits: ourbitset); |
6167 | bitset_container_free(bitset: ourbitset); |
6168 | returnval = false; // not going to be a bitset |
6169 | } |
6170 | } |
6171 | return returnval; |
6172 | } |
6173 | |
6174 | bool array_array_container_inplace_union(array_container_t *src_1, |
6175 | const array_container_t *src_2, void **dst) { |
6176 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
6177 | *dst = NULL; |
6178 | if (totalCardinality <= DEFAULT_MAX_SIZE) { |
6179 | if(src_1->capacity < totalCardinality) { |
6180 | *dst = array_container_create_given_capacity(size: 2 * totalCardinality); // be purposefully generous |
6181 | if (*dst != NULL) { |
6182 | array_container_union(array_1: src_1, array_2: src_2, out: (array_container_t *)*dst); |
6183 | } else { |
6184 | return true; // otherwise failure won't be caught |
6185 | } |
6186 | return false; // not a bitset |
6187 | } else { |
6188 | memmove(dest: src_1->array + src_2->cardinality, src: src_1->array, n: src_1->cardinality * sizeof(uint16_t)); |
6189 | src_1->cardinality = (int32_t)union_uint16(set_1: src_1->array + src_2->cardinality, size_1: src_1->cardinality, |
6190 | set_2: src_2->array, size_2: src_2->cardinality, buffer: src_1->array); |
6191 | return false; // not a bitset |
6192 | } |
6193 | } |
6194 | *dst = bitset_container_create(); |
6195 | bool returnval = true; // expect a bitset |
6196 | if (*dst != NULL) { |
6197 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
6198 | bitset_set_list(bitset: ourbitset->array, list: src_1->array, length: src_1->cardinality); |
6199 | ourbitset->cardinality = (int32_t)bitset_set_list_withcard( |
6200 | bitset: ourbitset->array, card: src_1->cardinality, list: src_2->array, |
6201 | length: src_2->cardinality); |
6202 | if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { |
6203 | // need to convert! |
6204 | if(src_1->capacity < ourbitset->cardinality) { |
6205 | array_container_grow(container: src_1, min: ourbitset->cardinality, false); |
6206 | } |
6207 | |
6208 | bitset_extract_setbits_uint16(bitset: ourbitset->array, length: BITSET_CONTAINER_SIZE_IN_WORDS, |
6209 | out: src_1->array, base: 0); |
6210 | src_1->cardinality = ourbitset->cardinality; |
6211 | *dst = src_1; |
6212 | bitset_container_free(bitset: ourbitset); |
6213 | returnval = false; // not going to be a bitset |
6214 | } |
6215 | } |
6216 | return returnval; |
6217 | } |
6218 | |
6219 | |
6220 | bool array_array_container_lazy_union(const array_container_t *src_1, |
6221 | const array_container_t *src_2, |
6222 | void **dst) { |
6223 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
6224 | if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { |
6225 | *dst = array_container_create_given_capacity(size: totalCardinality); |
6226 | if (*dst != NULL) { |
6227 | array_container_union(array_1: src_1, array_2: src_2, out: (array_container_t *)*dst); |
6228 | } else { |
6229 | return true; // otherwise failure won't be caught |
6230 | } |
6231 | return false; // not a bitset |
6232 | } |
6233 | *dst = bitset_container_create(); |
6234 | bool returnval = true; // expect a bitset |
6235 | if (*dst != NULL) { |
6236 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
6237 | bitset_set_list(bitset: ourbitset->array, list: src_1->array, length: src_1->cardinality); |
6238 | bitset_set_list(bitset: ourbitset->array, list: src_2->array, length: src_2->cardinality); |
6239 | ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; |
6240 | } |
6241 | return returnval; |
6242 | } |
6243 | |
6244 | |
6245 | bool array_array_container_lazy_inplace_union(array_container_t *src_1, |
6246 | const array_container_t *src_2, |
6247 | void **dst) { |
6248 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
6249 | *dst = NULL; |
6250 | if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { |
6251 | if(src_1->capacity < totalCardinality) { |
6252 | *dst = array_container_create_given_capacity(size: 2 * totalCardinality); // be purposefully generous |
6253 | if (*dst != NULL) { |
6254 | array_container_union(array_1: src_1, array_2: src_2, out: (array_container_t *)*dst); |
6255 | } else { |
6256 | return true; // otherwise failure won't be caught |
6257 | } |
6258 | return false; // not a bitset |
6259 | } else { |
6260 | memmove(dest: src_1->array + src_2->cardinality, src: src_1->array, n: src_1->cardinality * sizeof(uint16_t)); |
6261 | src_1->cardinality = (int32_t)union_uint16(set_1: src_1->array + src_2->cardinality, size_1: src_1->cardinality, |
6262 | set_2: src_2->array, size_2: src_2->cardinality, buffer: src_1->array); |
6263 | return false; // not a bitset |
6264 | } |
6265 | } |
6266 | *dst = bitset_container_create(); |
6267 | bool returnval = true; // expect a bitset |
6268 | if (*dst != NULL) { |
6269 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
6270 | bitset_set_list(bitset: ourbitset->array, list: src_1->array, length: src_1->cardinality); |
6271 | bitset_set_list(bitset: ourbitset->array, list: src_2->array, length: src_2->cardinality); |
6272 | ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; |
6273 | } |
6274 | return returnval; |
6275 | } |
6276 | /* end file src/containers/mixed_union.c */ |
6277 | /* begin file src/containers/mixed_xor.c */ |
6278 | /* |
6279 | * mixed_xor.c |
6280 | */ |
6281 | |
6282 | #include <assert.h> |
6283 | #include <string.h> |
6284 | |
6285 | |
6286 | /* Compute the xor of src_1 and src_2 and write the result to |
6287 | * dst (which has no container initially). |
6288 | * Result is true iff dst is a bitset */ |
6289 | bool array_bitset_container_xor(const array_container_t *src_1, |
6290 | const bitset_container_t *src_2, void **dst) { |
6291 | bitset_container_t *result = bitset_container_create(); |
6292 | bitset_container_copy(source: src_2, dest: result); |
6293 | result->cardinality = (int32_t)bitset_flip_list_withcard( |
6294 | bitset: result->array, card: result->cardinality, list: src_1->array, length: src_1->cardinality); |
6295 | |
6296 | // do required type conversions. |
6297 | if (result->cardinality <= DEFAULT_MAX_SIZE) { |
6298 | *dst = array_container_from_bitset(bits: result); |
6299 | bitset_container_free(bitset: result); |
6300 | return false; // not bitset |
6301 | } |
6302 | *dst = result; |
6303 | return true; // bitset |
6304 | } |
6305 | |
6306 | /* Compute the xor of src_1 and src_2 and write the result to |
6307 | * dst. It is allowed for src_2 to be dst. This version does not |
6308 | * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). |
6309 | */ |
6310 | |
6311 | void array_bitset_container_lazy_xor(const array_container_t *src_1, |
6312 | const bitset_container_t *src_2, |
6313 | bitset_container_t *dst) { |
6314 | if (src_2 != dst) bitset_container_copy(source: src_2, dest: dst); |
6315 | bitset_flip_list(bitset: dst->array, list: src_1->array, length: src_1->cardinality); |
6316 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
6317 | } |
6318 | |
6319 | /* Compute the xor of src_1 and src_2 and write the result to |
6320 | * dst. Result may be either a bitset or an array container |
6321 | * (returns "result is bitset"). dst does not initially have |
6322 | * any container, but becomes either a bitset container (return |
6323 | * result true) or an array container. |
6324 | */ |
6325 | |
6326 | bool run_bitset_container_xor(const run_container_t *src_1, |
6327 | const bitset_container_t *src_2, void **dst) { |
6328 | bitset_container_t *result = bitset_container_create(); |
6329 | |
6330 | bitset_container_copy(source: src_2, dest: result); |
6331 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
6332 | rle16_t rle = src_1->runs[rlepos]; |
6333 | bitset_flip_range(bitmap: result->array, start: rle.value, |
6334 | end: rle.value + rle.length + UINT32_C(1)); |
6335 | } |
6336 | result->cardinality = bitset_container_compute_cardinality(bitset: result); |
6337 | |
6338 | if (result->cardinality <= DEFAULT_MAX_SIZE) { |
6339 | *dst = array_container_from_bitset(bits: result); |
6340 | bitset_container_free(bitset: result); |
6341 | return false; // not bitset |
6342 | } |
6343 | *dst = result; |
6344 | return true; // bitset |
6345 | } |
6346 | |
6347 | /* lazy xor. Dst is initialized and may be equal to src_2. |
6348 | * Result is left as a bitset container, even if actual |
6349 | * cardinality would dictate an array container. |
6350 | */ |
6351 | |
6352 | void run_bitset_container_lazy_xor(const run_container_t *src_1, |
6353 | const bitset_container_t *src_2, |
6354 | bitset_container_t *dst) { |
6355 | if (src_2 != dst) bitset_container_copy(source: src_2, dest: dst); |
6356 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
6357 | rle16_t rle = src_1->runs[rlepos]; |
6358 | bitset_flip_range(bitmap: dst->array, start: rle.value, |
6359 | end: rle.value + rle.length + UINT32_C(1)); |
6360 | } |
6361 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
6362 | } |
6363 | |
6364 | /* dst does not indicate a valid container initially. Eventually it |
6365 | * can become any kind of container. |
6366 | */ |
6367 | |
6368 | int array_run_container_xor(const array_container_t *src_1, |
6369 | const run_container_t *src_2, void **dst) { |
6370 | // semi following Java XOR implementation as of May 2016 |
6371 | // the C OR implementation works quite differently and can return a run |
6372 | // container |
6373 | // TODO could optimize for full run containers. |
6374 | |
6375 | // use of lazy following Java impl. |
6376 | const int arbitrary_threshold = 32; |
6377 | if (src_1->cardinality < arbitrary_threshold) { |
6378 | run_container_t *ans = run_container_create(); |
6379 | array_run_container_lazy_xor(src_1, src_2, dst: ans); // keeps runs. |
6380 | uint8_t typecode_after; |
6381 | *dst = |
6382 | convert_run_to_efficient_container_and_free(c: ans, typecode_after: &typecode_after); |
6383 | return typecode_after; |
6384 | } |
6385 | |
6386 | int card = run_container_cardinality(run: src_2); |
6387 | if (card <= DEFAULT_MAX_SIZE) { |
6388 | // Java implementation works with the array, xoring the run elements via |
6389 | // iterator |
6390 | array_container_t *temp = array_container_from_run(arr: src_2); |
6391 | bool ret_is_bitset = array_array_container_xor(src_1: temp, src_2: src_1, dst); |
6392 | array_container_free(arr: temp); |
6393 | return ret_is_bitset ? BITSET_CONTAINER_TYPE_CODE |
6394 | : ARRAY_CONTAINER_TYPE_CODE; |
6395 | |
6396 | } else { // guess that it will end up as a bitset |
6397 | bitset_container_t *result = bitset_container_from_run(arr: src_2); |
6398 | bool is_bitset = bitset_array_container_ixor(src_1: result, src_2: src_1, dst); |
6399 | // any necessary type conversion has been done by the ixor |
6400 | int retval = (is_bitset ? BITSET_CONTAINER_TYPE_CODE |
6401 | : ARRAY_CONTAINER_TYPE_CODE); |
6402 | return retval; |
6403 | } |
6404 | } |
6405 | |
6406 | /* Dst is a valid run container. (Can it be src_2? Let's say not.) |
6407 | * Leaves result as run container, even if other options are |
6408 | * smaller. |
6409 | */ |
6410 | |
6411 | void array_run_container_lazy_xor(const array_container_t *src_1, |
6412 | const run_container_t *src_2, |
6413 | run_container_t *dst) { |
6414 | run_container_grow(run: dst, min: src_1->cardinality + src_2->n_runs, false); |
6415 | int32_t rlepos = 0; |
6416 | int32_t arraypos = 0; |
6417 | dst->n_runs = 0; |
6418 | |
6419 | while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { |
6420 | if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { |
6421 | run_container_smart_append_exclusive(src: dst, start: src_2->runs[rlepos].value, |
6422 | length: src_2->runs[rlepos].length); |
6423 | rlepos++; |
6424 | } else { |
6425 | run_container_smart_append_exclusive(src: dst, start: src_1->array[arraypos], |
6426 | length: 0); |
6427 | arraypos++; |
6428 | } |
6429 | } |
6430 | while (arraypos < src_1->cardinality) { |
6431 | run_container_smart_append_exclusive(src: dst, start: src_1->array[arraypos], length: 0); |
6432 | arraypos++; |
6433 | } |
6434 | while (rlepos < src_2->n_runs) { |
6435 | run_container_smart_append_exclusive(src: dst, start: src_2->runs[rlepos].value, |
6436 | length: src_2->runs[rlepos].length); |
6437 | rlepos++; |
6438 | } |
6439 | } |
6440 | |
6441 | /* dst does not indicate a valid container initially. Eventually it |
6442 | * can become any kind of container. |
6443 | */ |
6444 | |
6445 | int run_run_container_xor(const run_container_t *src_1, |
6446 | const run_container_t *src_2, void **dst) { |
6447 | run_container_t *ans = run_container_create(); |
6448 | run_container_xor(src_1, src_2, dst: ans); |
6449 | uint8_t typecode_after; |
6450 | *dst = convert_run_to_efficient_container_and_free(c: ans, typecode_after: &typecode_after); |
6451 | return typecode_after; |
6452 | } |
6453 | |
6454 | /* |
6455 | * Java implementation (as of May 2016) for array_run, run_run |
6456 | * and bitset_run don't do anything different for inplace. |
6457 | * Could adopt the mixed_union.c approach instead (ie, using |
6458 | * smart_append_exclusive) |
6459 | * |
6460 | */ |
6461 | |
6462 | bool array_array_container_xor(const array_container_t *src_1, |
6463 | const array_container_t *src_2, void **dst) { |
6464 | int totalCardinality = |
6465 | src_1->cardinality + src_2->cardinality; // upper bound |
6466 | if (totalCardinality <= DEFAULT_MAX_SIZE) { |
6467 | *dst = array_container_create_given_capacity(size: totalCardinality); |
6468 | array_container_xor(array_1: src_1, array_2: src_2, out: (array_container_t *)*dst); |
6469 | return false; // not a bitset |
6470 | } |
6471 | *dst = bitset_container_from_array(a: src_1); |
6472 | bool returnval = true; // expect a bitset |
6473 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
6474 | ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard( |
6475 | bitset: ourbitset->array, card: src_1->cardinality, list: src_2->array, length: src_2->cardinality); |
6476 | if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { |
6477 | // need to convert! |
6478 | *dst = array_container_from_bitset(bits: ourbitset); |
6479 | bitset_container_free(bitset: ourbitset); |
6480 | returnval = false; // not going to be a bitset |
6481 | } |
6482 | |
6483 | return returnval; |
6484 | } |
6485 | |
6486 | bool array_array_container_lazy_xor(const array_container_t *src_1, |
6487 | const array_container_t *src_2, |
6488 | void **dst) { |
6489 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
6490 | // upper bound, but probably poor estimate for xor |
6491 | if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { |
6492 | *dst = array_container_create_given_capacity(size: totalCardinality); |
6493 | if (*dst != NULL) |
6494 | array_container_xor(array_1: src_1, array_2: src_2, out: (array_container_t *)*dst); |
6495 | return false; // not a bitset |
6496 | } |
6497 | *dst = bitset_container_from_array(a: src_1); |
6498 | bool returnval = true; // expect a bitset (maybe, for XOR??) |
6499 | if (*dst != NULL) { |
6500 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
6501 | bitset_flip_list(bitset: ourbitset->array, list: src_2->array, length: src_2->cardinality); |
6502 | ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; |
6503 | } |
6504 | return returnval; |
6505 | } |
6506 | |
6507 | /* Compute the xor of src_1 and src_2 and write the result to |
6508 | * dst (which has no container initially). Return value is |
6509 | * "dst is a bitset" |
6510 | */ |
6511 | |
6512 | bool bitset_bitset_container_xor(const bitset_container_t *src_1, |
6513 | const bitset_container_t *src_2, void **dst) { |
6514 | bitset_container_t *ans = bitset_container_create(); |
6515 | int card = bitset_container_xor(src_1, src_2, dst: ans); |
6516 | if (card <= DEFAULT_MAX_SIZE) { |
6517 | *dst = array_container_from_bitset(bits: ans); |
6518 | bitset_container_free(bitset: ans); |
6519 | return false; // not bitset |
6520 | } else { |
6521 | *dst = ans; |
6522 | return true; |
6523 | } |
6524 | } |
6525 | |
6526 | /* Compute the xor of src_1 and src_2 and write the result to |
6527 | * dst (which has no container initially). It will modify src_1 |
6528 | * to be dst if the result is a bitset. Otherwise, it will |
6529 | * free src_1 and dst will be a new array container. In both |
6530 | * cases, the caller is responsible for deallocating dst. |
6531 | * Returns true iff dst is a bitset */ |
6532 | |
6533 | bool bitset_array_container_ixor(bitset_container_t *src_1, |
6534 | const array_container_t *src_2, void **dst) { |
6535 | *dst = src_1; |
6536 | src_1->cardinality = (uint32_t)bitset_flip_list_withcard( |
6537 | bitset: src_1->array, card: src_1->cardinality, list: src_2->array, length: src_2->cardinality); |
6538 | |
6539 | if (src_1->cardinality <= DEFAULT_MAX_SIZE) { |
6540 | *dst = array_container_from_bitset(bits: src_1); |
6541 | bitset_container_free(bitset: src_1); |
6542 | return false; // not bitset |
6543 | } else |
6544 | return true; |
6545 | } |
6546 | |
6547 | /* a bunch of in-place, some of which may not *really* be inplace. |
6548 | * TODO: write actual inplace routine if efficiency warrants it |
6549 | * Anything inplace with a bitset is a good candidate |
6550 | */ |
6551 | |
6552 | bool bitset_bitset_container_ixor(bitset_container_t *src_1, |
6553 | const bitset_container_t *src_2, void **dst) { |
6554 | bool ans = bitset_bitset_container_xor(src_1, src_2, dst); |
6555 | bitset_container_free(bitset: src_1); |
6556 | return ans; |
6557 | } |
6558 | |
6559 | bool array_bitset_container_ixor(array_container_t *src_1, |
6560 | const bitset_container_t *src_2, void **dst) { |
6561 | bool ans = array_bitset_container_xor(src_1, src_2, dst); |
6562 | array_container_free(arr: src_1); |
6563 | return ans; |
6564 | } |
6565 | |
6566 | /* Compute the xor of src_1 and src_2 and write the result to |
6567 | * dst. Result may be either a bitset or an array container |
6568 | * (returns "result is bitset"). dst does not initially have |
6569 | * any container, but becomes either a bitset container (return |
6570 | * result true) or an array container. |
6571 | */ |
6572 | |
6573 | bool run_bitset_container_ixor(run_container_t *src_1, |
6574 | const bitset_container_t *src_2, void **dst) { |
6575 | bool ans = run_bitset_container_xor(src_1, src_2, dst); |
6576 | run_container_free(run: src_1); |
6577 | return ans; |
6578 | } |
6579 | |
6580 | bool bitset_run_container_ixor(bitset_container_t *src_1, |
6581 | const run_container_t *src_2, void **dst) { |
6582 | bool ans = run_bitset_container_xor(src_1: src_2, src_2: src_1, dst); |
6583 | bitset_container_free(bitset: src_1); |
6584 | return ans; |
6585 | } |
6586 | |
6587 | /* dst does not indicate a valid container initially. Eventually it |
6588 | * can become any kind of container. |
6589 | */ |
6590 | |
6591 | int array_run_container_ixor(array_container_t *src_1, |
6592 | const run_container_t *src_2, void **dst) { |
6593 | int ans = array_run_container_xor(src_1, src_2, dst); |
6594 | array_container_free(arr: src_1); |
6595 | return ans; |
6596 | } |
6597 | |
6598 | int run_array_container_ixor(run_container_t *src_1, |
6599 | const array_container_t *src_2, void **dst) { |
6600 | int ans = array_run_container_xor(src_1: src_2, src_2: src_1, dst); |
6601 | run_container_free(run: src_1); |
6602 | return ans; |
6603 | } |
6604 | |
6605 | bool array_array_container_ixor(array_container_t *src_1, |
6606 | const array_container_t *src_2, void **dst) { |
6607 | bool ans = array_array_container_xor(src_1, src_2, dst); |
6608 | array_container_free(arr: src_1); |
6609 | return ans; |
6610 | } |
6611 | |
6612 | int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, |
6613 | void **dst) { |
6614 | int ans = run_run_container_xor(src_1, src_2, dst); |
6615 | run_container_free(run: src_1); |
6616 | return ans; |
6617 | } |
6618 | /* end file src/containers/mixed_xor.c */ |
6619 | /* begin file src/containers/run.c */ |
6620 | #include <stdio.h> |
6621 | #include <stdlib.h> |
6622 | |
6623 | |
6624 | bool run_container_add(run_container_t *run, uint16_t pos) { |
6625 | int32_t index = interleavedBinarySearch(array: run->runs, lenarray: run->n_runs, ikey: pos); |
6626 | if (index >= 0) return false; // already there |
6627 | index = -index - 2; // points to preceding value, possibly -1 |
6628 | if (index >= 0) { // possible match |
6629 | int32_t offset = pos - run->runs[index].value; |
6630 | int32_t le = run->runs[index].length; |
6631 | if (offset <= le) return false; // already there |
6632 | if (offset == le + 1) { |
6633 | // we may need to fuse |
6634 | if (index + 1 < run->n_runs) { |
6635 | if (run->runs[index + 1].value == pos + 1) { |
6636 | // indeed fusion is needed |
6637 | run->runs[index].length = run->runs[index + 1].value + |
6638 | run->runs[index + 1].length - |
6639 | run->runs[index].value; |
6640 | recoverRoomAtIndex(run, index: (uint16_t)(index + 1)); |
6641 | return true; |
6642 | } |
6643 | } |
6644 | run->runs[index].length++; |
6645 | return true; |
6646 | } |
6647 | if (index + 1 < run->n_runs) { |
6648 | // we may need to fuse |
6649 | if (run->runs[index + 1].value == pos + 1) { |
6650 | // indeed fusion is needed |
6651 | run->runs[index + 1].value = pos; |
6652 | run->runs[index + 1].length = run->runs[index + 1].length + 1; |
6653 | return true; |
6654 | } |
6655 | } |
6656 | } |
6657 | if (index == -1) { |
6658 | // we may need to extend the first run |
6659 | if (0 < run->n_runs) { |
6660 | if (run->runs[0].value == pos + 1) { |
6661 | run->runs[0].length++; |
6662 | run->runs[0].value--; |
6663 | return true; |
6664 | } |
6665 | } |
6666 | } |
6667 | makeRoomAtIndex(run, index: (uint16_t)(index + 1)); |
6668 | run->runs[index + 1].value = pos; |
6669 | run->runs[index + 1].length = 0; |
6670 | return true; |
6671 | } |
6672 | |
6673 | /* Create a new run container. Return NULL in case of failure. */ |
6674 | run_container_t *run_container_create_given_capacity(int32_t size) { |
6675 | run_container_t *run; |
6676 | /* Allocate the run container itself. */ |
6677 | run = (run_container_t *)malloc(size: sizeof(run_container_t)); |
6678 | assert (run); |
6679 | if (size <= 0) // we don't want to rely on malloc(0) |
6680 | run->runs = NULL; |
6681 | run->runs = (rle16_t *)malloc(size: sizeof(rle16_t) * size); |
6682 | assert (run->runs); |
6683 | run->capacity = size; |
6684 | run->n_runs = 0; |
6685 | return run; |
6686 | } |
6687 | |
6688 | int run_container_shrink_to_fit(run_container_t *src) { |
6689 | if (src->n_runs == src->capacity) return 0; // nothing to do |
6690 | int savings = src->capacity - src->n_runs; |
6691 | src->capacity = src->n_runs; |
6692 | rle16_t *oldruns = src->runs; |
6693 | src->runs = (rle16_t *)realloc(ptr: oldruns, size: src->capacity * sizeof(rle16_t)); |
6694 | if (src->runs == NULL) free(ptr: oldruns); // should never happen? |
6695 | return savings; |
6696 | } |
6697 | /* Create a new run container. Return NULL in case of failure. */ |
6698 | run_container_t *run_container_create(void) { |
6699 | return run_container_create_given_capacity(size: RUN_DEFAULT_INIT_SIZE); |
6700 | } |
6701 | |
6702 | run_container_t *run_container_clone(const run_container_t *src) { |
6703 | run_container_t *run = run_container_create_given_capacity(size: src->capacity); |
6704 | if (run == NULL) return NULL; |
6705 | run->capacity = src->capacity; |
6706 | run->n_runs = src->n_runs; |
6707 | memcpy(dest: run->runs, src: src->runs, n: src->n_runs * sizeof(rle16_t)); |
6708 | return run; |
6709 | } |
6710 | |
6711 | /* Free memory. */ |
6712 | void run_container_free(run_container_t *run) { |
6713 | if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise |
6714 | free(ptr: run->runs); |
6715 | run->runs = NULL; // pedantic |
6716 | } |
6717 | free(ptr: run); |
6718 | } |
6719 | |
6720 | void run_container_grow(run_container_t *run, int32_t min, bool copy) { |
6721 | int32_t newCapacity = |
6722 | (run->capacity == 0) |
6723 | ? RUN_DEFAULT_INIT_SIZE |
6724 | : run->capacity < 64 ? run->capacity * 2 |
6725 | : run->capacity < 1024 ? run->capacity * 3 / 2 |
6726 | : run->capacity * 5 / 4; |
6727 | if (newCapacity < min) newCapacity = min; |
6728 | run->capacity = newCapacity; |
6729 | assert(run->capacity >= min); |
6730 | if (copy) { |
6731 | rle16_t *oldruns = run->runs; |
6732 | run->runs = |
6733 | (rle16_t *)realloc(ptr: oldruns, size: run->capacity * sizeof(rle16_t)); |
6734 | if (run->runs == NULL) free(ptr: oldruns); |
6735 | } else { |
6736 | // Jon Strabala reports that some tools complain otherwise |
6737 | if (run->runs != NULL) { |
6738 | free(ptr: run->runs); |
6739 | } |
6740 | run->runs = (rle16_t *)malloc(size: run->capacity * sizeof(rle16_t)); |
6741 | } |
6742 | // handle the case where realloc fails |
6743 | if (run->runs == NULL) { |
6744 | fprintf(stderr, format: "could not allocate memory\n" ); |
6745 | } |
6746 | assert(run->runs != NULL); |
6747 | } |
6748 | |
6749 | /* copy one container into another */ |
6750 | void run_container_copy(const run_container_t *src, run_container_t *dst) { |
6751 | const int32_t n_runs = src->n_runs; |
6752 | if (src->n_runs > dst->capacity) { |
6753 | run_container_grow(run: dst, min: n_runs, false); |
6754 | } |
6755 | dst->n_runs = n_runs; |
6756 | memcpy(dest: dst->runs, src: src->runs, n: sizeof(rle16_t) * n_runs); |
6757 | } |
6758 | |
6759 | /* Compute the union of `src_1' and `src_2' and write the result to `dst' |
6760 | * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ |
6761 | void run_container_union(const run_container_t *src_1, |
6762 | const run_container_t *src_2, run_container_t *dst) { |
6763 | // TODO: this could be a lot more efficient |
6764 | |
6765 | // we start out with inexpensive checks |
6766 | const bool if1 = run_container_is_full(run: src_1); |
6767 | const bool if2 = run_container_is_full(run: src_2); |
6768 | if (if1 || if2) { |
6769 | if (if1) { |
6770 | run_container_copy(src: src_1, dst); |
6771 | return; |
6772 | } |
6773 | if (if2) { |
6774 | run_container_copy(src: src_2, dst); |
6775 | return; |
6776 | } |
6777 | } |
6778 | const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; |
6779 | if (dst->capacity < neededcapacity) |
6780 | run_container_grow(run: dst, min: neededcapacity, false); |
6781 | dst->n_runs = 0; |
6782 | int32_t rlepos = 0; |
6783 | int32_t xrlepos = 0; |
6784 | |
6785 | rle16_t previousrle; |
6786 | if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { |
6787 | previousrle = run_container_append_first(run: dst, vl: src_1->runs[rlepos]); |
6788 | rlepos++; |
6789 | } else { |
6790 | previousrle = run_container_append_first(run: dst, vl: src_2->runs[xrlepos]); |
6791 | xrlepos++; |
6792 | } |
6793 | |
6794 | while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) { |
6795 | rle16_t newrl; |
6796 | if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { |
6797 | newrl = src_1->runs[rlepos]; |
6798 | rlepos++; |
6799 | } else { |
6800 | newrl = src_2->runs[xrlepos]; |
6801 | xrlepos++; |
6802 | } |
6803 | run_container_append(run: dst, vl: newrl, previousrl: &previousrle); |
6804 | } |
6805 | while (xrlepos < src_2->n_runs) { |
6806 | run_container_append(run: dst, vl: src_2->runs[xrlepos], previousrl: &previousrle); |
6807 | xrlepos++; |
6808 | } |
6809 | while (rlepos < src_1->n_runs) { |
6810 | run_container_append(run: dst, vl: src_1->runs[rlepos], previousrl: &previousrle); |
6811 | rlepos++; |
6812 | } |
6813 | } |
6814 | |
6815 | /* Compute the union of `src_1' and `src_2' and write the result to `src_1' |
6816 | */ |
6817 | void run_container_union_inplace(run_container_t *src_1, |
6818 | const run_container_t *src_2) { |
6819 | // TODO: this could be a lot more efficient |
6820 | |
6821 | // we start out with inexpensive checks |
6822 | const bool if1 = run_container_is_full(run: src_1); |
6823 | const bool if2 = run_container_is_full(run: src_2); |
6824 | if (if1 || if2) { |
6825 | if (if1) { |
6826 | return; |
6827 | } |
6828 | if (if2) { |
6829 | run_container_copy(src: src_2, dst: src_1); |
6830 | return; |
6831 | } |
6832 | } |
6833 | // we move the data to the end of the current array |
6834 | const int32_t maxoutput = src_1->n_runs + src_2->n_runs; |
6835 | const int32_t neededcapacity = maxoutput + src_1->n_runs; |
6836 | if (src_1->capacity < neededcapacity) |
6837 | run_container_grow(run: src_1, min: neededcapacity, true); |
6838 | memmove(dest: src_1->runs + maxoutput, src: src_1->runs, |
6839 | n: src_1->n_runs * sizeof(rle16_t)); |
6840 | rle16_t *inputsrc1 = src_1->runs + maxoutput; |
6841 | const int32_t input1nruns = src_1->n_runs; |
6842 | src_1->n_runs = 0; |
6843 | int32_t rlepos = 0; |
6844 | int32_t xrlepos = 0; |
6845 | |
6846 | rle16_t previousrle; |
6847 | if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { |
6848 | previousrle = run_container_append_first(run: src_1, vl: inputsrc1[rlepos]); |
6849 | rlepos++; |
6850 | } else { |
6851 | previousrle = run_container_append_first(run: src_1, vl: src_2->runs[xrlepos]); |
6852 | xrlepos++; |
6853 | } |
6854 | while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) { |
6855 | rle16_t newrl; |
6856 | if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { |
6857 | newrl = inputsrc1[rlepos]; |
6858 | rlepos++; |
6859 | } else { |
6860 | newrl = src_2->runs[xrlepos]; |
6861 | xrlepos++; |
6862 | } |
6863 | run_container_append(run: src_1, vl: newrl, previousrl: &previousrle); |
6864 | } |
6865 | while (xrlepos < src_2->n_runs) { |
6866 | run_container_append(run: src_1, vl: src_2->runs[xrlepos], previousrl: &previousrle); |
6867 | xrlepos++; |
6868 | } |
6869 | while (rlepos < input1nruns) { |
6870 | run_container_append(run: src_1, vl: inputsrc1[rlepos], previousrl: &previousrle); |
6871 | rlepos++; |
6872 | } |
6873 | } |
6874 | |
6875 | /* Compute the symmetric difference of `src_1' and `src_2' and write the result |
6876 | * to `dst' |
6877 | * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ |
6878 | void run_container_xor(const run_container_t *src_1, |
6879 | const run_container_t *src_2, run_container_t *dst) { |
6880 | // don't bother to convert xor with full range into negation |
6881 | // since negation is implemented similarly |
6882 | |
6883 | const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; |
6884 | if (dst->capacity < neededcapacity) |
6885 | run_container_grow(run: dst, min: neededcapacity, false); |
6886 | |
6887 | int32_t pos1 = 0; |
6888 | int32_t pos2 = 0; |
6889 | dst->n_runs = 0; |
6890 | |
6891 | while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) { |
6892 | if (src_1->runs[pos1].value <= src_2->runs[pos2].value) { |
6893 | run_container_smart_append_exclusive(src: dst, start: src_1->runs[pos1].value, |
6894 | length: src_1->runs[pos1].length); |
6895 | pos1++; |
6896 | } else { |
6897 | run_container_smart_append_exclusive(src: dst, start: src_2->runs[pos2].value, |
6898 | length: src_2->runs[pos2].length); |
6899 | pos2++; |
6900 | } |
6901 | } |
6902 | while (pos1 < src_1->n_runs) { |
6903 | run_container_smart_append_exclusive(src: dst, start: src_1->runs[pos1].value, |
6904 | length: src_1->runs[pos1].length); |
6905 | pos1++; |
6906 | } |
6907 | |
6908 | while (pos2 < src_2->n_runs) { |
6909 | run_container_smart_append_exclusive(src: dst, start: src_2->runs[pos2].value, |
6910 | length: src_2->runs[pos2].length); |
6911 | pos2++; |
6912 | } |
6913 | } |
6914 | |
6915 | /* Compute the intersection of src_1 and src_2 and write the result to |
6916 | * dst. It is assumed that dst is distinct from both src_1 and src_2. */ |
6917 | void run_container_intersection(const run_container_t *src_1, |
6918 | const run_container_t *src_2, |
6919 | run_container_t *dst) { |
6920 | const bool if1 = run_container_is_full(run: src_1); |
6921 | const bool if2 = run_container_is_full(run: src_2); |
6922 | if (if1 || if2) { |
6923 | if (if1) { |
6924 | run_container_copy(src: src_2, dst); |
6925 | return; |
6926 | } |
6927 | if (if2) { |
6928 | run_container_copy(src: src_1, dst); |
6929 | return; |
6930 | } |
6931 | } |
6932 | // TODO: this could be a lot more efficient, could use SIMD optimizations |
6933 | const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; |
6934 | if (dst->capacity < neededcapacity) |
6935 | run_container_grow(run: dst, min: neededcapacity, false); |
6936 | dst->n_runs = 0; |
6937 | int32_t rlepos = 0; |
6938 | int32_t xrlepos = 0; |
6939 | int32_t start = src_1->runs[rlepos].value; |
6940 | int32_t end = start + src_1->runs[rlepos].length + 1; |
6941 | int32_t xstart = src_2->runs[xrlepos].value; |
6942 | int32_t xend = xstart + src_2->runs[xrlepos].length + 1; |
6943 | while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { |
6944 | if (end <= xstart) { |
6945 | ++rlepos; |
6946 | if (rlepos < src_1->n_runs) { |
6947 | start = src_1->runs[rlepos].value; |
6948 | end = start + src_1->runs[rlepos].length + 1; |
6949 | } |
6950 | } else if (xend <= start) { |
6951 | ++xrlepos; |
6952 | if (xrlepos < src_2->n_runs) { |
6953 | xstart = src_2->runs[xrlepos].value; |
6954 | xend = xstart + src_2->runs[xrlepos].length + 1; |
6955 | } |
6956 | } else { // they overlap |
6957 | const int32_t lateststart = start > xstart ? start : xstart; |
6958 | int32_t earliestend; |
6959 | if (end == xend) { // improbable |
6960 | earliestend = end; |
6961 | rlepos++; |
6962 | xrlepos++; |
6963 | if (rlepos < src_1->n_runs) { |
6964 | start = src_1->runs[rlepos].value; |
6965 | end = start + src_1->runs[rlepos].length + 1; |
6966 | } |
6967 | if (xrlepos < src_2->n_runs) { |
6968 | xstart = src_2->runs[xrlepos].value; |
6969 | xend = xstart + src_2->runs[xrlepos].length + 1; |
6970 | } |
6971 | } else if (end < xend) { |
6972 | earliestend = end; |
6973 | rlepos++; |
6974 | if (rlepos < src_1->n_runs) { |
6975 | start = src_1->runs[rlepos].value; |
6976 | end = start + src_1->runs[rlepos].length + 1; |
6977 | } |
6978 | |
6979 | } else { // end > xend |
6980 | earliestend = xend; |
6981 | xrlepos++; |
6982 | if (xrlepos < src_2->n_runs) { |
6983 | xstart = src_2->runs[xrlepos].value; |
6984 | xend = xstart + src_2->runs[xrlepos].length + 1; |
6985 | } |
6986 | } |
6987 | dst->runs[dst->n_runs].value = (uint16_t)lateststart; |
6988 | dst->runs[dst->n_runs].length = |
6989 | (uint16_t)(earliestend - lateststart - 1); |
6990 | dst->n_runs++; |
6991 | } |
6992 | } |
6993 | } |
6994 | |
6995 | /* Compute the size of the intersection of src_1 and src_2 . */ |
6996 | int run_container_intersection_cardinality(const run_container_t *src_1, |
6997 | const run_container_t *src_2) { |
6998 | const bool if1 = run_container_is_full(run: src_1); |
6999 | const bool if2 = run_container_is_full(run: src_2); |
7000 | if (if1 || if2) { |
7001 | if (if1) { |
7002 | return run_container_cardinality(run: src_2); |
7003 | } |
7004 | if (if2) { |
7005 | return run_container_cardinality(run: src_1); |
7006 | } |
7007 | } |
7008 | int answer = 0; |
7009 | int32_t rlepos = 0; |
7010 | int32_t xrlepos = 0; |
7011 | int32_t start = src_1->runs[rlepos].value; |
7012 | int32_t end = start + src_1->runs[rlepos].length + 1; |
7013 | int32_t xstart = src_2->runs[xrlepos].value; |
7014 | int32_t xend = xstart + src_2->runs[xrlepos].length + 1; |
7015 | while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { |
7016 | if (end <= xstart) { |
7017 | ++rlepos; |
7018 | if (rlepos < src_1->n_runs) { |
7019 | start = src_1->runs[rlepos].value; |
7020 | end = start + src_1->runs[rlepos].length + 1; |
7021 | } |
7022 | } else if (xend <= start) { |
7023 | ++xrlepos; |
7024 | if (xrlepos < src_2->n_runs) { |
7025 | xstart = src_2->runs[xrlepos].value; |
7026 | xend = xstart + src_2->runs[xrlepos].length + 1; |
7027 | } |
7028 | } else { // they overlap |
7029 | const int32_t lateststart = start > xstart ? start : xstart; |
7030 | int32_t earliestend; |
7031 | if (end == xend) { // improbable |
7032 | earliestend = end; |
7033 | rlepos++; |
7034 | xrlepos++; |
7035 | if (rlepos < src_1->n_runs) { |
7036 | start = src_1->runs[rlepos].value; |
7037 | end = start + src_1->runs[rlepos].length + 1; |
7038 | } |
7039 | if (xrlepos < src_2->n_runs) { |
7040 | xstart = src_2->runs[xrlepos].value; |
7041 | xend = xstart + src_2->runs[xrlepos].length + 1; |
7042 | } |
7043 | } else if (end < xend) { |
7044 | earliestend = end; |
7045 | rlepos++; |
7046 | if (rlepos < src_1->n_runs) { |
7047 | start = src_1->runs[rlepos].value; |
7048 | end = start + src_1->runs[rlepos].length + 1; |
7049 | } |
7050 | |
7051 | } else { // end > xend |
7052 | earliestend = xend; |
7053 | xrlepos++; |
7054 | if (xrlepos < src_2->n_runs) { |
7055 | xstart = src_2->runs[xrlepos].value; |
7056 | xend = xstart + src_2->runs[xrlepos].length + 1; |
7057 | } |
7058 | } |
7059 | answer += earliestend - lateststart; |
7060 | } |
7061 | } |
7062 | return answer; |
7063 | } |
7064 | |
7065 | bool run_container_intersect(const run_container_t *src_1, |
7066 | const run_container_t *src_2) { |
7067 | const bool if1 = run_container_is_full(run: src_1); |
7068 | const bool if2 = run_container_is_full(run: src_2); |
7069 | if (if1 || if2) { |
7070 | if (if1) { |
7071 | return !run_container_empty(run: src_2); |
7072 | } |
7073 | if (if2) { |
7074 | return !run_container_empty(run: src_1); |
7075 | } |
7076 | } |
7077 | int32_t rlepos = 0; |
7078 | int32_t xrlepos = 0; |
7079 | int32_t start = src_1->runs[rlepos].value; |
7080 | int32_t end = start + src_1->runs[rlepos].length + 1; |
7081 | int32_t xstart = src_2->runs[xrlepos].value; |
7082 | int32_t xend = xstart + src_2->runs[xrlepos].length + 1; |
7083 | while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { |
7084 | if (end <= xstart) { |
7085 | ++rlepos; |
7086 | if (rlepos < src_1->n_runs) { |
7087 | start = src_1->runs[rlepos].value; |
7088 | end = start + src_1->runs[rlepos].length + 1; |
7089 | } |
7090 | } else if (xend <= start) { |
7091 | ++xrlepos; |
7092 | if (xrlepos < src_2->n_runs) { |
7093 | xstart = src_2->runs[xrlepos].value; |
7094 | xend = xstart + src_2->runs[xrlepos].length + 1; |
7095 | } |
7096 | } else { // they overlap |
7097 | return true; |
7098 | } |
7099 | } |
7100 | return false; |
7101 | } |
7102 | |
7103 | |
7104 | /* Compute the difference of src_1 and src_2 and write the result to |
7105 | * dst. It is assumed that dst is distinct from both src_1 and src_2. */ |
7106 | void run_container_andnot(const run_container_t *src_1, |
7107 | const run_container_t *src_2, run_container_t *dst) { |
7108 | // following Java implementation as of June 2016 |
7109 | |
7110 | if (dst->capacity < src_1->n_runs + src_2->n_runs) |
7111 | run_container_grow(run: dst, min: src_1->n_runs + src_2->n_runs, false); |
7112 | |
7113 | dst->n_runs = 0; |
7114 | |
7115 | int rlepos1 = 0; |
7116 | int rlepos2 = 0; |
7117 | int32_t start = src_1->runs[rlepos1].value; |
7118 | int32_t end = start + src_1->runs[rlepos1].length + 1; |
7119 | int32_t start2 = src_2->runs[rlepos2].value; |
7120 | int32_t end2 = start2 + src_2->runs[rlepos2].length + 1; |
7121 | |
7122 | while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) { |
7123 | if (end <= start2) { |
7124 | // output the first run |
7125 | dst->runs[dst->n_runs++] = |
7126 | (rle16_t){.value = (uint16_t)start, |
7127 | .length = (uint16_t)(end - start - 1)}; |
7128 | rlepos1++; |
7129 | if (rlepos1 < src_1->n_runs) { |
7130 | start = src_1->runs[rlepos1].value; |
7131 | end = start + src_1->runs[rlepos1].length + 1; |
7132 | } |
7133 | } else if (end2 <= start) { |
7134 | // exit the second run |
7135 | rlepos2++; |
7136 | if (rlepos2 < src_2->n_runs) { |
7137 | start2 = src_2->runs[rlepos2].value; |
7138 | end2 = start2 + src_2->runs[rlepos2].length + 1; |
7139 | } |
7140 | } else { |
7141 | if (start < start2) { |
7142 | dst->runs[dst->n_runs++] = |
7143 | (rle16_t){.value = (uint16_t)start, |
7144 | .length = (uint16_t)(start2 - start - 1)}; |
7145 | } |
7146 | if (end2 < end) { |
7147 | start = end2; |
7148 | } else { |
7149 | rlepos1++; |
7150 | if (rlepos1 < src_1->n_runs) { |
7151 | start = src_1->runs[rlepos1].value; |
7152 | end = start + src_1->runs[rlepos1].length + 1; |
7153 | } |
7154 | } |
7155 | } |
7156 | } |
7157 | if (rlepos1 < src_1->n_runs) { |
7158 | dst->runs[dst->n_runs++] = (rle16_t){ |
7159 | .value = (uint16_t)start, .length = (uint16_t)(end - start - 1)}; |
7160 | rlepos1++; |
7161 | if (rlepos1 < src_1->n_runs) { |
7162 | memcpy(dest: dst->runs + dst->n_runs, src: src_1->runs + rlepos1, |
7163 | n: sizeof(rle16_t) * (src_1->n_runs - rlepos1)); |
7164 | dst->n_runs += src_1->n_runs - rlepos1; |
7165 | } |
7166 | } |
7167 | } |
7168 | |
7169 | int run_container_to_uint32_array(void *vout, const run_container_t *cont, |
7170 | uint32_t base) { |
7171 | int outpos = 0; |
7172 | uint32_t *out = (uint32_t *)vout; |
7173 | for (int i = 0; i < cont->n_runs; ++i) { |
7174 | uint32_t run_start = base + cont->runs[i].value; |
7175 | uint16_t le = cont->runs[i].length; |
7176 | for (int j = 0; j <= le; ++j) { |
7177 | uint32_t val = run_start + j; |
7178 | memcpy(dest: out + outpos, src: &val, |
7179 | n: sizeof(uint32_t)); // should be compiled as a MOV on x64 |
7180 | outpos++; |
7181 | } |
7182 | } |
7183 | return outpos; |
7184 | } |
7185 | |
7186 | /* |
7187 | * Print this container using printf (useful for debugging). |
7188 | */ |
7189 | void run_container_printf(const run_container_t *cont) { |
7190 | for (int i = 0; i < cont->n_runs; ++i) { |
7191 | uint16_t run_start = cont->runs[i].value; |
7192 | uint16_t le = cont->runs[i].length; |
7193 | printf(format: "[%d,%d]" , run_start, run_start + le); |
7194 | } |
7195 | } |
7196 | |
7197 | /* |
7198 | * Print this container using printf as a comma-separated list of 32-bit |
7199 | * integers starting at base. |
7200 | */ |
7201 | void run_container_printf_as_uint32_array(const run_container_t *cont, |
7202 | uint32_t base) { |
7203 | if (cont->n_runs == 0) return; |
7204 | { |
7205 | uint32_t run_start = base + cont->runs[0].value; |
7206 | uint16_t le = cont->runs[0].length; |
7207 | printf(format: "%u" , run_start); |
7208 | for (uint32_t j = 1; j <= le; ++j) printf(format: ",%u" , run_start + j); |
7209 | } |
7210 | for (int32_t i = 1; i < cont->n_runs; ++i) { |
7211 | uint32_t run_start = base + cont->runs[i].value; |
7212 | uint16_t le = cont->runs[i].length; |
7213 | for (uint32_t j = 0; j <= le; ++j) printf(format: ",%u" , run_start + j); |
7214 | } |
7215 | } |
7216 | |
7217 | int32_t run_container_serialize(const run_container_t *container, char *buf) { |
7218 | int32_t l, off; |
7219 | |
7220 | memcpy(dest: buf, src: &container->n_runs, n: off = sizeof(container->n_runs)); |
7221 | memcpy(dest: &buf[off], src: &container->capacity, n: sizeof(container->capacity)); |
7222 | off += sizeof(container->capacity); |
7223 | |
7224 | l = sizeof(rle16_t) * container->n_runs; |
7225 | memcpy(dest: &buf[off], src: container->runs, n: l); |
7226 | return (off + l); |
7227 | } |
7228 | |
7229 | int32_t run_container_write(const run_container_t *container, char *buf) { |
7230 | memcpy(dest: buf, src: &container->n_runs, n: sizeof(uint16_t)); |
7231 | memcpy(dest: buf + sizeof(uint16_t), src: container->runs, |
7232 | n: container->n_runs * sizeof(rle16_t)); |
7233 | return run_container_size_in_bytes(container); |
7234 | } |
7235 | |
7236 | int32_t run_container_read(int32_t cardinality, run_container_t *container, |
7237 | const char *buf) { |
7238 | (void)cardinality; |
7239 | memcpy(dest: &container->n_runs, src: buf, n: sizeof(uint16_t)); |
7240 | if (container->n_runs > container->capacity) |
7241 | run_container_grow(run: container, min: container->n_runs, false); |
7242 | if(container->n_runs > 0) { |
7243 | memcpy(dest: container->runs, src: buf + sizeof(uint16_t), |
7244 | n: container->n_runs * sizeof(rle16_t)); |
7245 | } |
7246 | return run_container_size_in_bytes(container); |
7247 | } |
7248 | |
7249 | uint32_t run_container_serialization_len(const run_container_t *container) { |
7250 | return (sizeof(container->n_runs) + sizeof(container->capacity) + |
7251 | sizeof(rle16_t) * container->n_runs); |
7252 | } |
7253 | |
7254 | void *run_container_deserialize(const char *buf, size_t buf_len) { |
7255 | run_container_t *ptr; |
7256 | |
7257 | if (buf_len < 8 /* n_runs + capacity */) |
7258 | return (NULL); |
7259 | else |
7260 | buf_len -= 8; |
7261 | |
7262 | if ((ptr = (run_container_t *)malloc(size: sizeof(run_container_t))) != NULL) { |
7263 | size_t len; |
7264 | int32_t off; |
7265 | |
7266 | memcpy(dest: &ptr->n_runs, src: buf, n: off = 4); |
7267 | memcpy(dest: &ptr->capacity, src: &buf[off], n: 4); |
7268 | off += 4; |
7269 | |
7270 | len = sizeof(rle16_t) * ptr->n_runs; |
7271 | |
7272 | if (len != buf_len) { |
7273 | free(ptr: ptr); |
7274 | return (NULL); |
7275 | } |
7276 | |
7277 | if ((ptr->runs = (rle16_t *)malloc(size: len)) == NULL) { |
7278 | free(ptr: ptr); |
7279 | return (NULL); |
7280 | } |
7281 | |
7282 | memcpy(dest: ptr->runs, src: &buf[off], n: len); |
7283 | |
7284 | /* Check if returned values are monotonically increasing */ |
7285 | for (int32_t i = 0, j = 0; i < ptr->n_runs; i++) { |
7286 | if (ptr->runs[i].value < j) { |
7287 | free(ptr: ptr->runs); |
7288 | free(ptr: ptr); |
7289 | return (NULL); |
7290 | } else |
7291 | j = ptr->runs[i].value; |
7292 | } |
7293 | } |
7294 | |
7295 | return (ptr); |
7296 | } |
7297 | |
7298 | bool run_container_iterate(const run_container_t *cont, uint32_t base, |
7299 | roaring_iterator iterator, void *ptr) { |
7300 | for (int i = 0; i < cont->n_runs; ++i) { |
7301 | uint32_t run_start = base + cont->runs[i].value; |
7302 | uint16_t le = cont->runs[i].length; |
7303 | |
7304 | for (int j = 0; j <= le; ++j) |
7305 | if (!iterator(run_start + j, ptr)) return false; |
7306 | } |
7307 | return true; |
7308 | } |
7309 | |
7310 | bool run_container_iterate64(const run_container_t *cont, uint32_t base, |
7311 | roaring_iterator64 iterator, uint64_t high_bits, |
7312 | void *ptr) { |
7313 | for (int i = 0; i < cont->n_runs; ++i) { |
7314 | uint32_t run_start = base + cont->runs[i].value; |
7315 | uint16_t le = cont->runs[i].length; |
7316 | |
7317 | for (int j = 0; j <= le; ++j) |
7318 | if (!iterator(high_bits | (uint64_t)(run_start + j), ptr)) |
7319 | return false; |
7320 | } |
7321 | return true; |
7322 | } |
7323 | |
7324 | bool run_container_is_subset(const run_container_t *container1, |
7325 | const run_container_t *container2) { |
7326 | int i1 = 0, i2 = 0; |
7327 | while (i1 < container1->n_runs && i2 < container2->n_runs) { |
7328 | int start1 = container1->runs[i1].value; |
7329 | int stop1 = start1 + container1->runs[i1].length; |
7330 | int start2 = container2->runs[i2].value; |
7331 | int stop2 = start2 + container2->runs[i2].length; |
7332 | if (start1 < start2) { |
7333 | return false; |
7334 | } else { // start1 >= start2 |
7335 | if (stop1 < stop2) { |
7336 | i1++; |
7337 | } else if (stop1 == stop2) { |
7338 | i1++; |
7339 | i2++; |
7340 | } else { // stop1 > stop2 |
7341 | i2++; |
7342 | } |
7343 | } |
7344 | } |
7345 | if (i1 == container1->n_runs) { |
7346 | return true; |
7347 | } else { |
7348 | return false; |
7349 | } |
7350 | } |
7351 | |
7352 | // TODO: write smart_append_exclusive version to match the overloaded 1 param |
7353 | // Java version (or is it even used?) |
7354 | |
7355 | // follows the Java implementation closely |
7356 | // length is the rle-value. Ie, run [10,12) uses a length value 1. |
7357 | void run_container_smart_append_exclusive(run_container_t *src, |
7358 | const uint16_t start, |
7359 | const uint16_t length) { |
7360 | int old_end; |
7361 | rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL; |
7362 | rle16_t *appended_last_run = src->runs + src->n_runs; |
7363 | |
7364 | if (!src->n_runs || |
7365 | (start > (old_end = last_run->value + last_run->length + 1))) { |
7366 | *appended_last_run = (rle16_t){.value = start, .length = length}; |
7367 | src->n_runs++; |
7368 | return; |
7369 | } |
7370 | if (old_end == start) { |
7371 | // we merge |
7372 | last_run->length += (length + 1); |
7373 | return; |
7374 | } |
7375 | int new_end = start + length + 1; |
7376 | |
7377 | if (start == last_run->value) { |
7378 | // wipe out previous |
7379 | if (new_end < old_end) { |
7380 | *last_run = (rle16_t){.value = (uint16_t)new_end, |
7381 | .length = (uint16_t)(old_end - new_end - 1)}; |
7382 | return; |
7383 | } else if (new_end > old_end) { |
7384 | *last_run = (rle16_t){.value = (uint16_t)old_end, |
7385 | .length = (uint16_t)(new_end - old_end - 1)}; |
7386 | return; |
7387 | } else { |
7388 | src->n_runs--; |
7389 | return; |
7390 | } |
7391 | } |
7392 | last_run->length = start - last_run->value - 1; |
7393 | if (new_end < old_end) { |
7394 | *appended_last_run = |
7395 | (rle16_t){.value = (uint16_t)new_end, |
7396 | .length = (uint16_t)(old_end - new_end - 1)}; |
7397 | src->n_runs++; |
7398 | } else if (new_end > old_end) { |
7399 | *appended_last_run = |
7400 | (rle16_t){.value = (uint16_t)old_end, |
7401 | .length = (uint16_t)(new_end - old_end - 1)}; |
7402 | src->n_runs++; |
7403 | } |
7404 | } |
7405 | |
7406 | bool run_container_select(const run_container_t *container, |
7407 | uint32_t *start_rank, uint32_t rank, |
7408 | uint32_t *element) { |
7409 | for (int i = 0; i < container->n_runs; i++) { |
7410 | uint16_t length = container->runs[i].length; |
7411 | if (rank <= *start_rank + length) { |
7412 | uint16_t value = container->runs[i].value; |
7413 | *element = value + rank - (*start_rank); |
7414 | return true; |
7415 | } else |
7416 | *start_rank += length + 1; |
7417 | } |
7418 | return false; |
7419 | } |
7420 | |
7421 | int run_container_rank(const run_container_t *container, uint16_t x) { |
7422 | int sum = 0; |
7423 | uint32_t x32 = x; |
7424 | for (int i = 0; i < container->n_runs; i++) { |
7425 | uint32_t startpoint = container->runs[i].value; |
7426 | uint32_t length = container->runs[i].length; |
7427 | uint32_t endpoint = length + startpoint; |
7428 | if (x <= endpoint) { |
7429 | if (x < startpoint) break; |
7430 | return sum + (x32 - startpoint) + 1; |
7431 | } else { |
7432 | sum += length + 1; |
7433 | } |
7434 | } |
7435 | return sum; |
7436 | } |
7437 | /* end file src/containers/run.c */ |
7438 | /* begin file src/roaring.c */ |
7439 | #include <assert.h> |
7440 | #include <stdarg.h> |
7441 | #include <stdint.h> |
7442 | #include <stdio.h> |
7443 | #include <string.h> |
7444 | #include <inttypes.h> |
7445 | |
7446 | static inline bool is_cow(const roaring_bitmap_t *r) { |
7447 | return r->high_low_container.flags & ROARING_FLAG_COW; |
7448 | } |
7449 | static inline bool is_frozen(const roaring_bitmap_t *r) { |
7450 | return r->high_low_container.flags & ROARING_FLAG_FROZEN; |
7451 | } |
7452 | |
7453 | // this is like roaring_bitmap_add, but it populates pointer arguments in such a |
7454 | // way |
7455 | // that we can recover the container touched, which, in turn can be used to |
7456 | // accelerate some functions (when you repeatedly need to add to the same |
7457 | // container) |
7458 | static inline void *containerptr_roaring_bitmap_add(roaring_bitmap_t *r, |
7459 | uint32_t val, |
7460 | uint8_t *typecode, |
7461 | int *index) { |
7462 | uint16_t hb = val >> 16; |
7463 | const int i = ra_get_index(ra: &r->high_low_container, x: hb); |
7464 | if (i >= 0) { |
7465 | ra_unshare_container_at_index(ra: &r->high_low_container, i); |
7466 | void *container = |
7467 | ra_get_container_at_index(ra: &r->high_low_container, i, typecode); |
7468 | uint8_t newtypecode = *typecode; |
7469 | void *container2 = |
7470 | container_add(container, val: val & 0xFFFF, typecode: *typecode, new_typecode: &newtypecode); |
7471 | *index = i; |
7472 | if (container2 != container) { |
7473 | container_free(container, typecode: *typecode); |
7474 | ra_set_container_at_index(ra: &r->high_low_container, i, c: container2, |
7475 | typecode: newtypecode); |
7476 | *typecode = newtypecode; |
7477 | return container2; |
7478 | } else { |
7479 | return container; |
7480 | } |
7481 | } else { |
7482 | array_container_t *newac = array_container_create(); |
7483 | void *container = container_add(container: newac, val: val & 0xFFFF, |
7484 | ARRAY_CONTAINER_TYPE_CODE, new_typecode: typecode); |
7485 | // we could just assume that it stays an array container |
7486 | ra_insert_new_key_value_at(ra: &r->high_low_container, i: -i - 1, key: hb, |
7487 | container, typecode: *typecode); |
7488 | *index = -i - 1; |
7489 | return container; |
7490 | } |
7491 | } |
7492 | |
7493 | roaring_bitmap_t *roaring_bitmap_create(void) { |
7494 | roaring_bitmap_t *ans = |
7495 | (roaring_bitmap_t *)malloc(size: sizeof(roaring_bitmap_t)); |
7496 | if (!ans) { |
7497 | return NULL; |
7498 | } |
7499 | ra_init(t: &ans->high_low_container); |
7500 | return ans; |
7501 | } |
7502 | |
7503 | roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { |
7504 | roaring_bitmap_t *ans = |
7505 | (roaring_bitmap_t *)malloc(size: sizeof(roaring_bitmap_t)); |
7506 | if (!ans) { |
7507 | return NULL; |
7508 | } |
7509 | bool is_ok = ra_init_with_capacity(new_ra: &ans->high_low_container, cap); |
7510 | if (!is_ok) { |
7511 | free(ptr: ans); |
7512 | return NULL; |
7513 | } |
7514 | return ans; |
7515 | } |
7516 | |
7517 | void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, |
7518 | const uint32_t *vals) { |
7519 | void *container = NULL; // hold value of last container touched |
7520 | uint8_t typecode = 0; // typecode of last container touched |
7521 | uint32_t prev = 0; // previous valued inserted |
7522 | size_t i = 0; // index of value |
7523 | int containerindex = 0; |
7524 | if (n_args == 0) return; |
7525 | uint32_t val; |
7526 | memcpy(dest: &val, src: vals + i, n: sizeof(val)); |
7527 | container = |
7528 | containerptr_roaring_bitmap_add(r, val, typecode: &typecode, index: &containerindex); |
7529 | prev = val; |
7530 | i++; |
7531 | for (; i < n_args; i++) { |
7532 | memcpy(dest: &val, src: vals + i, n: sizeof(val)); |
7533 | if (((prev ^ val) >> 16) == |
7534 | 0) { // no need to seek the container, it is at hand |
7535 | // because we already have the container at hand, we can do the |
7536 | // insertion |
7537 | // automatically, bypassing the roaring_bitmap_add call |
7538 | uint8_t newtypecode = typecode; |
7539 | void *container2 = |
7540 | container_add(container, val: val & 0xFFFF, typecode, new_typecode: &newtypecode); |
7541 | if (container2 != container) { // rare instance when we need to |
7542 | // change the container type |
7543 | container_free(container, typecode); |
7544 | ra_set_container_at_index(ra: &r->high_low_container, |
7545 | i: containerindex, c: container2, |
7546 | typecode: newtypecode); |
7547 | typecode = newtypecode; |
7548 | container = container2; |
7549 | } |
7550 | } else { |
7551 | container = containerptr_roaring_bitmap_add(r, val, typecode: &typecode, |
7552 | index: &containerindex); |
7553 | } |
7554 | prev = val; |
7555 | } |
7556 | } |
7557 | |
7558 | roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { |
7559 | roaring_bitmap_t *answer = roaring_bitmap_create(); |
7560 | roaring_bitmap_add_many(r: answer, n_args, vals); |
7561 | return answer; |
7562 | } |
7563 | |
7564 | roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { |
7565 | // todo: could be greatly optimized but we do not expect this call to ever |
7566 | // include long lists |
7567 | roaring_bitmap_t *answer = roaring_bitmap_create(); |
7568 | va_list ap; |
7569 | va_start(ap, n_args); |
7570 | for (size_t i = 1; i <= n_args; i++) { |
7571 | uint32_t val = va_arg(ap, uint32_t); |
7572 | roaring_bitmap_add(r: answer, x: val); |
7573 | } |
7574 | va_end(ap); |
7575 | return answer; |
7576 | } |
7577 | |
7578 | static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) { |
7579 | return (a < b) ? a : b; |
7580 | } |
7581 | |
7582 | static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { |
7583 | return (a < b) ? a : b; |
7584 | } |
7585 | |
7586 | roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, |
7587 | uint32_t step) { |
7588 | if(max >= UINT64_C(0x100000000)) { |
7589 | max = UINT64_C(0x100000000); |
7590 | } |
7591 | if (step == 0) return NULL; |
7592 | if (max <= min) return NULL; |
7593 | roaring_bitmap_t *answer = roaring_bitmap_create(); |
7594 | if (step >= (1 << 16)) { |
7595 | for (uint32_t value = (uint32_t)min; value < max; value += step) { |
7596 | roaring_bitmap_add(r: answer, x: value); |
7597 | } |
7598 | return answer; |
7599 | } |
7600 | uint64_t min_tmp = min; |
7601 | do { |
7602 | uint32_t key = (uint32_t)min_tmp >> 16; |
7603 | uint32_t container_min = min_tmp & 0xFFFF; |
7604 | uint32_t container_max = (uint32_t)minimum_uint64(a: max - (key << 16), b: 1 << 16); |
7605 | uint8_t type; |
7606 | void *container = container_from_range(type: &type, min: container_min, |
7607 | max: container_max, step: (uint16_t)step); |
7608 | ra_append(ra: &answer->high_low_container, s: key, c: container, typecode: type); |
7609 | uint32_t gap = container_max - container_min + step - 1; |
7610 | min_tmp += gap - (gap % step); |
7611 | } while (min_tmp < max); |
7612 | // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step |
7613 | return answer; |
7614 | } |
7615 | |
7616 | void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { |
7617 | if (min > max) { |
7618 | return; |
7619 | } |
7620 | |
7621 | uint32_t min_key = min >> 16; |
7622 | uint32_t max_key = max >> 16; |
7623 | |
7624 | int32_t num_required_containers = max_key - min_key + 1; |
7625 | int32_t suffix_length = count_greater(array: ra->high_low_container.keys, |
7626 | lenarray: ra->high_low_container.size, |
7627 | ikey: max_key); |
7628 | int32_t prefix_length = count_less(array: ra->high_low_container.keys, |
7629 | lenarray: ra->high_low_container.size - suffix_length, |
7630 | ikey: min_key); |
7631 | int32_t common_length = ra->high_low_container.size - prefix_length - suffix_length; |
7632 | |
7633 | if (num_required_containers > common_length) { |
7634 | ra_shift_tail(ra: &ra->high_low_container, count: suffix_length, |
7635 | distance: num_required_containers - common_length); |
7636 | } |
7637 | |
7638 | int32_t src = prefix_length + common_length - 1; |
7639 | int32_t dst = ra->high_low_container.size - suffix_length - 1; |
7640 | for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0 |
7641 | uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; |
7642 | uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; |
7643 | void* new_container; |
7644 | uint8_t new_type; |
7645 | |
7646 | if (src >= 0 && ra->high_low_container.keys[src] == key) { |
7647 | ra_unshare_container_at_index(ra: &ra->high_low_container, i: src); |
7648 | new_container = container_add_range(container: ra->high_low_container.containers[src], |
7649 | type: ra->high_low_container.typecodes[src], |
7650 | min: container_min, max: container_max, result_type: &new_type); |
7651 | if (new_container != ra->high_low_container.containers[src]) { |
7652 | container_free(container: ra->high_low_container.containers[src], |
7653 | typecode: ra->high_low_container.typecodes[src]); |
7654 | } |
7655 | src--; |
7656 | } else { |
7657 | new_container = container_from_range(type: &new_type, min: container_min, |
7658 | max: container_max+1, step: 1); |
7659 | } |
7660 | ra_replace_key_and_container_at_index(ra: &ra->high_low_container, i: dst, |
7661 | key, c: new_container, typecode: new_type); |
7662 | dst--; |
7663 | } |
7664 | } |
7665 | |
7666 | void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { |
7667 | if (min > max) { |
7668 | return; |
7669 | } |
7670 | |
7671 | uint32_t min_key = min >> 16; |
7672 | uint32_t max_key = max >> 16; |
7673 | |
7674 | int32_t src = count_less(array: ra->high_low_container.keys, lenarray: ra->high_low_container.size, ikey: min_key); |
7675 | int32_t dst = src; |
7676 | while (src < ra->high_low_container.size && ra->high_low_container.keys[src] <= max_key) { |
7677 | uint32_t container_min = (min_key == ra->high_low_container.keys[src]) ? (min & 0xffff) : 0; |
7678 | uint32_t container_max = (max_key == ra->high_low_container.keys[src]) ? (max & 0xffff) : 0xffff; |
7679 | ra_unshare_container_at_index(ra: &ra->high_low_container, i: src); |
7680 | void *new_container; |
7681 | uint8_t new_type; |
7682 | new_container = container_remove_range(container: ra->high_low_container.containers[src], |
7683 | type: ra->high_low_container.typecodes[src], |
7684 | min: container_min, max: container_max, |
7685 | result_type: &new_type); |
7686 | if (new_container != ra->high_low_container.containers[src]) { |
7687 | container_free(container: ra->high_low_container.containers[src], |
7688 | typecode: ra->high_low_container.typecodes[src]); |
7689 | } |
7690 | if (new_container) { |
7691 | ra_replace_key_and_container_at_index(ra: &ra->high_low_container, i: dst, |
7692 | key: ra->high_low_container.keys[src], |
7693 | c: new_container, typecode: new_type); |
7694 | dst++; |
7695 | } |
7696 | src++; |
7697 | } |
7698 | if (src > dst) { |
7699 | ra_shift_tail(ra: &ra->high_low_container, count: ra->high_low_container.size - src, distance: dst - src); |
7700 | } |
7701 | } |
7702 | |
7703 | void roaring_bitmap_printf(const roaring_bitmap_t *ra) { |
7704 | printf(format: "{" ); |
7705 | for (int i = 0; i < ra->high_low_container.size; ++i) { |
7706 | container_printf_as_uint32_array( |
7707 | container: ra->high_low_container.containers[i], |
7708 | typecode: ra->high_low_container.typecodes[i], |
7709 | base: ((uint32_t)ra->high_low_container.keys[i]) << 16); |
7710 | if (i + 1 < ra->high_low_container.size) printf(format: "," ); |
7711 | } |
7712 | printf(format: "}" ); |
7713 | } |
7714 | |
7715 | void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra) { |
7716 | printf(format: "{" ); |
7717 | for (int i = 0; i < ra->high_low_container.size; ++i) { |
7718 | printf(format: "%d: %s (%d)" , ra->high_low_container.keys[i], |
7719 | get_full_container_name(container: ra->high_low_container.containers[i], |
7720 | typecode: ra->high_low_container.typecodes[i]), |
7721 | container_get_cardinality(container: ra->high_low_container.containers[i], |
7722 | typecode: ra->high_low_container.typecodes[i])); |
7723 | if (ra->high_low_container.typecodes[i] == SHARED_CONTAINER_TYPE_CODE) { |
7724 | printf( |
7725 | format: "(shared count = %" PRIu32 " )" , |
7726 | ((shared_container_t *)(ra->high_low_container.containers[i])) |
7727 | ->counter); |
7728 | } |
7729 | |
7730 | if (i + 1 < ra->high_low_container.size) printf(format: ", " ); |
7731 | } |
7732 | printf(format: "}" ); |
7733 | } |
7734 | |
7735 | typedef struct min_max_sum_s { |
7736 | uint32_t min; |
7737 | uint32_t max; |
7738 | uint64_t sum; |
7739 | } min_max_sum_t; |
7740 | |
7741 | static bool min_max_sum_fnc(uint32_t value, void *param) { |
7742 | min_max_sum_t *mms = (min_max_sum_t *)param; |
7743 | if (value > mms->max) mms->max = value; |
7744 | if (value < mms->min) mms->min = value; |
7745 | mms->sum += value; |
7746 | return true; // we always process all data points |
7747 | } |
7748 | |
7749 | /** |
7750 | * (For advanced users.) |
7751 | * Collect statistics about the bitmap |
7752 | */ |
7753 | void roaring_bitmap_statistics(const roaring_bitmap_t *ra, |
7754 | roaring_statistics_t *stat) { |
7755 | memset(s: stat, c: 0, n: sizeof(*stat)); |
7756 | stat->n_containers = ra->high_low_container.size; |
7757 | stat->cardinality = roaring_bitmap_get_cardinality(ra); |
7758 | min_max_sum_t mms; |
7759 | mms.min = UINT32_C(0xFFFFFFFF); |
7760 | mms.max = UINT32_C(0); |
7761 | mms.sum = 0; |
7762 | roaring_iterate(ra, iterator: &min_max_sum_fnc, ptr: &mms); |
7763 | stat->min_value = mms.min; |
7764 | stat->max_value = mms.max; |
7765 | stat->sum_value = mms.sum; |
7766 | |
7767 | for (int i = 0; i < ra->high_low_container.size; ++i) { |
7768 | uint8_t truetype = |
7769 | get_container_type(container: ra->high_low_container.containers[i], |
7770 | type: ra->high_low_container.typecodes[i]); |
7771 | uint32_t card = |
7772 | container_get_cardinality(container: ra->high_low_container.containers[i], |
7773 | typecode: ra->high_low_container.typecodes[i]); |
7774 | uint32_t sbytes = |
7775 | container_size_in_bytes(container: ra->high_low_container.containers[i], |
7776 | typecode: ra->high_low_container.typecodes[i]); |
7777 | switch (truetype) { |
7778 | case BITSET_CONTAINER_TYPE_CODE: |
7779 | stat->n_bitset_containers++; |
7780 | stat->n_values_bitset_containers += card; |
7781 | stat->n_bytes_bitset_containers += sbytes; |
7782 | break; |
7783 | case ARRAY_CONTAINER_TYPE_CODE: |
7784 | stat->n_array_containers++; |
7785 | stat->n_values_array_containers += card; |
7786 | stat->n_bytes_array_containers += sbytes; |
7787 | break; |
7788 | case RUN_CONTAINER_TYPE_CODE: |
7789 | stat->n_run_containers++; |
7790 | stat->n_values_run_containers += card; |
7791 | stat->n_bytes_run_containers += sbytes; |
7792 | break; |
7793 | default: |
7794 | assert(false); |
7795 | __builtin_unreachable(); |
7796 | } |
7797 | } |
7798 | } |
7799 | |
7800 | roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { |
7801 | roaring_bitmap_t *ans = |
7802 | (roaring_bitmap_t *)malloc(size: sizeof(roaring_bitmap_t)); |
7803 | if (!ans) { |
7804 | return NULL; |
7805 | } |
7806 | bool is_ok = ra_copy(source: &r->high_low_container, dest: &ans->high_low_container, |
7807 | copy_on_write: is_cow(r)); |
7808 | if (!is_ok) { |
7809 | free(ptr: ans); |
7810 | return NULL; |
7811 | } |
7812 | roaring_bitmap_set_copy_on_write(r: ans, cow: is_cow(r)); |
7813 | return ans; |
7814 | } |
7815 | |
7816 | bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, |
7817 | const roaring_bitmap_t *src) { |
7818 | return ra_overwrite(source: &src->high_low_container, dest: &dest->high_low_container, |
7819 | copy_on_write: is_cow(r: src)); |
7820 | } |
7821 | |
7822 | void roaring_bitmap_free(const roaring_bitmap_t *r) { |
7823 | if (!is_frozen(r)) { |
7824 | ra_clear(r: (roaring_array_t*)&r->high_low_container); |
7825 | } |
7826 | free(ptr: (roaring_bitmap_t*)r); |
7827 | } |
7828 | |
7829 | void roaring_bitmap_clear(roaring_bitmap_t *r) { |
7830 | ra_reset(ra: &r->high_low_container); |
7831 | } |
7832 | |
7833 | void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { |
7834 | const uint16_t hb = val >> 16; |
7835 | const int i = ra_get_index(ra: &r->high_low_container, x: hb); |
7836 | uint8_t typecode; |
7837 | if (i >= 0) { |
7838 | ra_unshare_container_at_index(ra: &r->high_low_container, i); |
7839 | void *container = |
7840 | ra_get_container_at_index(ra: &r->high_low_container, i, typecode: &typecode); |
7841 | uint8_t newtypecode = typecode; |
7842 | void *container2 = |
7843 | container_add(container, val: val & 0xFFFF, typecode, new_typecode: &newtypecode); |
7844 | if (container2 != container) { |
7845 | container_free(container, typecode); |
7846 | ra_set_container_at_index(ra: &r->high_low_container, i, c: container2, |
7847 | typecode: newtypecode); |
7848 | } |
7849 | } else { |
7850 | array_container_t *newac = array_container_create(); |
7851 | void *container = container_add(container: newac, val: val & 0xFFFF, |
7852 | ARRAY_CONTAINER_TYPE_CODE, new_typecode: &typecode); |
7853 | // we could just assume that it stays an array container |
7854 | ra_insert_new_key_value_at(ra: &r->high_low_container, i: -i - 1, key: hb, |
7855 | container, typecode); |
7856 | } |
7857 | } |
7858 | |
7859 | bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { |
7860 | const uint16_t hb = val >> 16; |
7861 | const int i = ra_get_index(ra: &r->high_low_container, x: hb); |
7862 | uint8_t typecode; |
7863 | bool result = false; |
7864 | if (i >= 0) { |
7865 | ra_unshare_container_at_index(ra: &r->high_low_container, i); |
7866 | void *container = |
7867 | ra_get_container_at_index(ra: &r->high_low_container, i, typecode: &typecode); |
7868 | |
7869 | const int oldCardinality = |
7870 | container_get_cardinality(container, typecode); |
7871 | |
7872 | uint8_t newtypecode = typecode; |
7873 | void *container2 = |
7874 | container_add(container, val: val & 0xFFFF, typecode, new_typecode: &newtypecode); |
7875 | if (container2 != container) { |
7876 | container_free(container, typecode); |
7877 | ra_set_container_at_index(ra: &r->high_low_container, i, c: container2, |
7878 | typecode: newtypecode); |
7879 | result = true; |
7880 | } else { |
7881 | const int newCardinality = |
7882 | container_get_cardinality(container, typecode: newtypecode); |
7883 | |
7884 | result = oldCardinality != newCardinality; |
7885 | } |
7886 | } else { |
7887 | array_container_t *newac = array_container_create(); |
7888 | void *container = container_add(container: newac, val: val & 0xFFFF, |
7889 | ARRAY_CONTAINER_TYPE_CODE, new_typecode: &typecode); |
7890 | // we could just assume that it stays an array container |
7891 | ra_insert_new_key_value_at(ra: &r->high_low_container, i: -i - 1, key: hb, |
7892 | container, typecode); |
7893 | result = true; |
7894 | } |
7895 | |
7896 | return result; |
7897 | } |
7898 | |
7899 | void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { |
7900 | const uint16_t hb = val >> 16; |
7901 | const int i = ra_get_index(ra: &r->high_low_container, x: hb); |
7902 | uint8_t typecode; |
7903 | if (i >= 0) { |
7904 | ra_unshare_container_at_index(ra: &r->high_low_container, i); |
7905 | void *container = |
7906 | ra_get_container_at_index(ra: &r->high_low_container, i, typecode: &typecode); |
7907 | uint8_t newtypecode = typecode; |
7908 | void *container2 = |
7909 | container_remove(container, val: val & 0xFFFF, typecode, new_typecode: &newtypecode); |
7910 | if (container2 != container) { |
7911 | container_free(container, typecode); |
7912 | ra_set_container_at_index(ra: &r->high_low_container, i, c: container2, |
7913 | typecode: newtypecode); |
7914 | } |
7915 | if (container_get_cardinality(container: container2, typecode: newtypecode) != 0) { |
7916 | ra_set_container_at_index(ra: &r->high_low_container, i, c: container2, |
7917 | typecode: newtypecode); |
7918 | } else { |
7919 | ra_remove_at_index_and_free(ra: &r->high_low_container, i); |
7920 | } |
7921 | } |
7922 | } |
7923 | |
7924 | bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { |
7925 | const uint16_t hb = val >> 16; |
7926 | const int i = ra_get_index(ra: &r->high_low_container, x: hb); |
7927 | uint8_t typecode; |
7928 | bool result = false; |
7929 | if (i >= 0) { |
7930 | ra_unshare_container_at_index(ra: &r->high_low_container, i); |
7931 | void *container = |
7932 | ra_get_container_at_index(ra: &r->high_low_container, i, typecode: &typecode); |
7933 | |
7934 | const int oldCardinality = |
7935 | container_get_cardinality(container, typecode); |
7936 | |
7937 | uint8_t newtypecode = typecode; |
7938 | void *container2 = |
7939 | container_remove(container, val: val & 0xFFFF, typecode, new_typecode: &newtypecode); |
7940 | if (container2 != container) { |
7941 | container_free(container, typecode); |
7942 | ra_set_container_at_index(ra: &r->high_low_container, i, c: container2, |
7943 | typecode: newtypecode); |
7944 | } |
7945 | |
7946 | const int newCardinality = |
7947 | container_get_cardinality(container: container2, typecode: newtypecode); |
7948 | |
7949 | if (newCardinality != 0) { |
7950 | ra_set_container_at_index(ra: &r->high_low_container, i, c: container2, |
7951 | typecode: newtypecode); |
7952 | } else { |
7953 | ra_remove_at_index_and_free(ra: &r->high_low_container, i); |
7954 | } |
7955 | |
7956 | result = oldCardinality != newCardinality; |
7957 | } |
7958 | return result; |
7959 | } |
7960 | |
7961 | void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, |
7962 | const uint32_t *vals) { |
7963 | if (n_args == 0 || r->high_low_container.size == 0) { |
7964 | return; |
7965 | } |
7966 | int32_t pos = -1; // position of the container used in the previous iteration |
7967 | for (size_t i = 0; i < n_args; i++) { |
7968 | uint16_t key = (uint16_t)(vals[i] >> 16); |
7969 | if (pos < 0 || key != r->high_low_container.keys[pos]) { |
7970 | pos = ra_get_index(ra: &r->high_low_container, x: key); |
7971 | } |
7972 | if (pos >= 0) { |
7973 | uint8_t new_typecode; |
7974 | void *new_container; |
7975 | new_container = container_remove(container: r->high_low_container.containers[pos], |
7976 | val: vals[i] & 0xffff, |
7977 | typecode: r->high_low_container.typecodes[pos], |
7978 | new_typecode: &new_typecode); |
7979 | if (new_container != r->high_low_container.containers[pos]) { |
7980 | container_free(container: r->high_low_container.containers[pos], |
7981 | typecode: r->high_low_container.typecodes[pos]); |
7982 | ra_replace_key_and_container_at_index(ra: &r->high_low_container, |
7983 | i: pos, key, c: new_container, |
7984 | typecode: new_typecode); |
7985 | } |
7986 | if (!container_nonzero_cardinality(container: new_container, typecode: new_typecode)) { |
7987 | container_free(container: new_container, typecode: new_typecode); |
7988 | ra_remove_at_index(ra: &r->high_low_container, i: pos); |
7989 | pos = -1; |
7990 | } |
7991 | } |
7992 | } |
7993 | } |
7994 | |
7995 | // there should be some SIMD optimizations possible here |
7996 | roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, |
7997 | const roaring_bitmap_t *x2) { |
7998 | uint8_t container_result_type = 0; |
7999 | const int length1 = x1->high_low_container.size, |
8000 | length2 = x2->high_low_container.size; |
8001 | uint32_t neededcap = length1 > length2 ? length2 : length1; |
8002 | roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(cap: neededcap); |
8003 | roaring_bitmap_set_copy_on_write(r: answer, cow: is_cow(r: x1) && is_cow(r: x2)); |
8004 | |
8005 | int pos1 = 0, pos2 = 0; |
8006 | |
8007 | while (pos1 < length1 && pos2 < length2) { |
8008 | const uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8009 | const uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8010 | |
8011 | if (s1 == s2) { |
8012 | uint8_t container_type_1, container_type_2; |
8013 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
8014 | typecode: &container_type_1); |
8015 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
8016 | typecode: &container_type_2); |
8017 | void *c = container_and(c1, type1: container_type_1, c2, type2: container_type_2, |
8018 | result_type: &container_result_type); |
8019 | if (container_nonzero_cardinality(container: c, typecode: container_result_type)) { |
8020 | ra_append(ra: &answer->high_low_container, s: s1, c, |
8021 | typecode: container_result_type); |
8022 | } else { |
8023 | container_free( |
8024 | container: c, typecode: container_result_type); // otherwise:memory leak! |
8025 | } |
8026 | ++pos1; |
8027 | ++pos2; |
8028 | } else if (s1 < s2) { // s1 < s2 |
8029 | pos1 = ra_advance_until(ra: &x1->high_low_container, x: s2, pos: pos1); |
8030 | } else { // s1 > s2 |
8031 | pos2 = ra_advance_until(ra: &x2->high_low_container, x: s1, pos: pos2); |
8032 | } |
8033 | } |
8034 | return answer; |
8035 | } |
8036 | |
8037 | /** |
8038 | * Compute the union of 'number' bitmaps. |
8039 | */ |
8040 | roaring_bitmap_t *roaring_bitmap_or_many(size_t number, |
8041 | const roaring_bitmap_t **x) { |
8042 | if (number == 0) { |
8043 | return roaring_bitmap_create(); |
8044 | } |
8045 | if (number == 1) { |
8046 | return roaring_bitmap_copy(r: x[0]); |
8047 | } |
8048 | roaring_bitmap_t *answer = |
8049 | roaring_bitmap_lazy_or(x1: x[0], x2: x[1], LAZY_OR_BITSET_CONVERSION); |
8050 | for (size_t i = 2; i < number; i++) { |
8051 | roaring_bitmap_lazy_or_inplace(x1: answer, x2: x[i], LAZY_OR_BITSET_CONVERSION); |
8052 | } |
8053 | roaring_bitmap_repair_after_lazy(x1: answer); |
8054 | return answer; |
8055 | } |
8056 | |
8057 | /** |
8058 | * Compute the xor of 'number' bitmaps. |
8059 | */ |
8060 | roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, |
8061 | const roaring_bitmap_t **x) { |
8062 | if (number == 0) { |
8063 | return roaring_bitmap_create(); |
8064 | } |
8065 | if (number == 1) { |
8066 | return roaring_bitmap_copy(r: x[0]); |
8067 | } |
8068 | roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x1: x[0], x2: x[1]); |
8069 | for (size_t i = 2; i < number; i++) { |
8070 | roaring_bitmap_lazy_xor_inplace(x1: answer, x2: x[i]); |
8071 | } |
8072 | roaring_bitmap_repair_after_lazy(x1: answer); |
8073 | return answer; |
8074 | } |
8075 | |
8076 | // inplace and (modifies its first argument). |
8077 | void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, |
8078 | const roaring_bitmap_t *x2) { |
8079 | if (x1 == x2) return; |
8080 | int pos1 = 0, pos2 = 0, intersection_size = 0; |
8081 | const int length1 = ra_get_size(ra: &x1->high_low_container); |
8082 | const int length2 = ra_get_size(ra: &x2->high_low_container); |
8083 | |
8084 | // any skipped-over or newly emptied containers in x1 |
8085 | // have to be freed. |
8086 | while (pos1 < length1 && pos2 < length2) { |
8087 | const uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8088 | const uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8089 | |
8090 | if (s1 == s2) { |
8091 | uint8_t typecode1, typecode2, typecode_result; |
8092 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
8093 | typecode: &typecode1); |
8094 | c1 = get_writable_copy_if_shared(candidate_shared_container: c1, type: &typecode1); |
8095 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
8096 | typecode: &typecode2); |
8097 | void *c = |
8098 | container_iand(c1, type1: typecode1, c2, type2: typecode2, result_type: &typecode_result); |
8099 | if (c != c1) { // in this instance a new container was created, and |
8100 | // we need to free the old one |
8101 | container_free(container: c1, typecode: typecode1); |
8102 | } |
8103 | if (container_nonzero_cardinality(container: c, typecode: typecode_result)) { |
8104 | ra_replace_key_and_container_at_index(ra: &x1->high_low_container, |
8105 | i: intersection_size, key: s1, c, |
8106 | typecode: typecode_result); |
8107 | intersection_size++; |
8108 | } else { |
8109 | container_free(container: c, typecode: typecode_result); |
8110 | } |
8111 | ++pos1; |
8112 | ++pos2; |
8113 | } else if (s1 < s2) { |
8114 | pos1 = ra_advance_until_freeing(ra: &x1->high_low_container, x: s2, pos: pos1); |
8115 | } else { // s1 > s2 |
8116 | pos2 = ra_advance_until(ra: &x2->high_low_container, x: s1, pos: pos2); |
8117 | } |
8118 | } |
8119 | |
8120 | // if we ended early because x2 ran out, then all remaining in x1 should be |
8121 | // freed |
8122 | while (pos1 < length1) { |
8123 | container_free(container: x1->high_low_container.containers[pos1], |
8124 | typecode: x1->high_low_container.typecodes[pos1]); |
8125 | ++pos1; |
8126 | } |
8127 | |
8128 | // all containers after this have either been copied or freed |
8129 | ra_downsize(ra: &x1->high_low_container, new_length: intersection_size); |
8130 | } |
8131 | |
8132 | roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, |
8133 | const roaring_bitmap_t *x2) { |
8134 | uint8_t container_result_type = 0; |
8135 | const int length1 = x1->high_low_container.size, |
8136 | length2 = x2->high_low_container.size; |
8137 | if (0 == length1) { |
8138 | return roaring_bitmap_copy(r: x2); |
8139 | } |
8140 | if (0 == length2) { |
8141 | return roaring_bitmap_copy(r: x1); |
8142 | } |
8143 | roaring_bitmap_t *answer = |
8144 | roaring_bitmap_create_with_capacity(cap: length1 + length2); |
8145 | roaring_bitmap_set_copy_on_write(r: answer, cow: is_cow(r: x1) && is_cow(r: x2)); |
8146 | int pos1 = 0, pos2 = 0; |
8147 | uint8_t container_type_1, container_type_2; |
8148 | uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8149 | uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8150 | while (true) { |
8151 | if (s1 == s2) { |
8152 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
8153 | typecode: &container_type_1); |
8154 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
8155 | typecode: &container_type_2); |
8156 | void *c = container_or(c1, type1: container_type_1, c2, type2: container_type_2, |
8157 | result_type: &container_result_type); |
8158 | // since we assume that the initial containers are non-empty, the |
8159 | // result here |
8160 | // can only be non-empty |
8161 | ra_append(ra: &answer->high_low_container, s: s1, c, |
8162 | typecode: container_result_type); |
8163 | ++pos1; |
8164 | ++pos2; |
8165 | if (pos1 == length1) break; |
8166 | if (pos2 == length2) break; |
8167 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8168 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8169 | |
8170 | } else if (s1 < s2) { // s1 < s2 |
8171 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
8172 | typecode: &container_type_1); |
8173 | // c1 = container_clone(c1, container_type_1); |
8174 | c1 = |
8175 | get_copy_of_container(container: c1, typecode: &container_type_1, copy_on_write: is_cow(r: x1)); |
8176 | if (is_cow(r: x1)) { |
8177 | ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c: c1, |
8178 | typecode: container_type_1); |
8179 | } |
8180 | ra_append(ra: &answer->high_low_container, s: s1, c: c1, typecode: container_type_1); |
8181 | pos1++; |
8182 | if (pos1 == length1) break; |
8183 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8184 | |
8185 | } else { // s1 > s2 |
8186 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
8187 | typecode: &container_type_2); |
8188 | // c2 = container_clone(c2, container_type_2); |
8189 | c2 = |
8190 | get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2)); |
8191 | if (is_cow(r: x2)) { |
8192 | ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2, |
8193 | typecode: container_type_2); |
8194 | } |
8195 | ra_append(ra: &answer->high_low_container, s: s2, c: c2, typecode: container_type_2); |
8196 | pos2++; |
8197 | if (pos2 == length2) break; |
8198 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8199 | } |
8200 | } |
8201 | if (pos1 == length1) { |
8202 | ra_append_copy_range(ra: &answer->high_low_container, |
8203 | sa: &x2->high_low_container, start_index: pos2, end_index: length2, |
8204 | copy_on_write: is_cow(r: x2)); |
8205 | } else if (pos2 == length2) { |
8206 | ra_append_copy_range(ra: &answer->high_low_container, |
8207 | sa: &x1->high_low_container, start_index: pos1, end_index: length1, |
8208 | copy_on_write: is_cow(r: x1)); |
8209 | } |
8210 | return answer; |
8211 | } |
8212 | |
8213 | // inplace or (modifies its first argument). |
8214 | void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, |
8215 | const roaring_bitmap_t *x2) { |
8216 | uint8_t container_result_type = 0; |
8217 | int length1 = x1->high_low_container.size; |
8218 | const int length2 = x2->high_low_container.size; |
8219 | |
8220 | if (0 == length2) return; |
8221 | |
8222 | if (0 == length1) { |
8223 | roaring_bitmap_overwrite(dest: x1, src: x2); |
8224 | return; |
8225 | } |
8226 | int pos1 = 0, pos2 = 0; |
8227 | uint8_t container_type_1, container_type_2; |
8228 | uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8229 | uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8230 | while (true) { |
8231 | if (s1 == s2) { |
8232 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
8233 | typecode: &container_type_1); |
8234 | if (!container_is_full(container: c1, typecode: container_type_1)) { |
8235 | c1 = get_writable_copy_if_shared(candidate_shared_container: c1, type: &container_type_1); |
8236 | |
8237 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, |
8238 | i: pos2, typecode: &container_type_2); |
8239 | void *c = |
8240 | container_ior(c1, type1: container_type_1, c2, type2: container_type_2, |
8241 | result_type: &container_result_type); |
8242 | if (c != |
8243 | c1) { // in this instance a new container was created, and |
8244 | // we need to free the old one |
8245 | container_free(container: c1, typecode: container_type_1); |
8246 | } |
8247 | |
8248 | ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c, |
8249 | typecode: container_result_type); |
8250 | } |
8251 | ++pos1; |
8252 | ++pos2; |
8253 | if (pos1 == length1) break; |
8254 | if (pos2 == length2) break; |
8255 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8256 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8257 | |
8258 | } else if (s1 < s2) { // s1 < s2 |
8259 | pos1++; |
8260 | if (pos1 == length1) break; |
8261 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8262 | |
8263 | } else { // s1 > s2 |
8264 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
8265 | typecode: &container_type_2); |
8266 | c2 = |
8267 | get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2)); |
8268 | if (is_cow(r: x2)) { |
8269 | ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2, |
8270 | typecode: container_type_2); |
8271 | } |
8272 | |
8273 | // void *c2_clone = container_clone(c2, container_type_2); |
8274 | ra_insert_new_key_value_at(ra: &x1->high_low_container, i: pos1, key: s2, container: c2, |
8275 | typecode: container_type_2); |
8276 | pos1++; |
8277 | length1++; |
8278 | pos2++; |
8279 | if (pos2 == length2) break; |
8280 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8281 | } |
8282 | } |
8283 | if (pos1 == length1) { |
8284 | ra_append_copy_range(ra: &x1->high_low_container, sa: &x2->high_low_container, |
8285 | start_index: pos2, end_index: length2, copy_on_write: is_cow(r: x2)); |
8286 | } |
8287 | } |
8288 | |
8289 | roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, |
8290 | const roaring_bitmap_t *x2) { |
8291 | uint8_t container_result_type = 0; |
8292 | const int length1 = x1->high_low_container.size, |
8293 | length2 = x2->high_low_container.size; |
8294 | if (0 == length1) { |
8295 | return roaring_bitmap_copy(r: x2); |
8296 | } |
8297 | if (0 == length2) { |
8298 | return roaring_bitmap_copy(r: x1); |
8299 | } |
8300 | roaring_bitmap_t *answer = |
8301 | roaring_bitmap_create_with_capacity(cap: length1 + length2); |
8302 | roaring_bitmap_set_copy_on_write(r: answer, cow: is_cow(r: x1) && is_cow(r: x2)); |
8303 | int pos1 = 0, pos2 = 0; |
8304 | uint8_t container_type_1, container_type_2; |
8305 | uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8306 | uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8307 | while (true) { |
8308 | if (s1 == s2) { |
8309 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
8310 | typecode: &container_type_1); |
8311 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
8312 | typecode: &container_type_2); |
8313 | void *c = container_xor(c1, type1: container_type_1, c2, type2: container_type_2, |
8314 | result_type: &container_result_type); |
8315 | |
8316 | if (container_nonzero_cardinality(container: c, typecode: container_result_type)) { |
8317 | ra_append(ra: &answer->high_low_container, s: s1, c, |
8318 | typecode: container_result_type); |
8319 | } else { |
8320 | container_free(container: c, typecode: container_result_type); |
8321 | } |
8322 | ++pos1; |
8323 | ++pos2; |
8324 | if (pos1 == length1) break; |
8325 | if (pos2 == length2) break; |
8326 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8327 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8328 | |
8329 | } else if (s1 < s2) { // s1 < s2 |
8330 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
8331 | typecode: &container_type_1); |
8332 | c1 = |
8333 | get_copy_of_container(container: c1, typecode: &container_type_1, copy_on_write: is_cow(r: x1)); |
8334 | if (is_cow(r: x1)) { |
8335 | ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c: c1, |
8336 | typecode: container_type_1); |
8337 | } |
8338 | ra_append(ra: &answer->high_low_container, s: s1, c: c1, typecode: container_type_1); |
8339 | pos1++; |
8340 | if (pos1 == length1) break; |
8341 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8342 | |
8343 | } else { // s1 > s2 |
8344 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
8345 | typecode: &container_type_2); |
8346 | c2 = |
8347 | get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2)); |
8348 | if (is_cow(r: x2)) { |
8349 | ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2, |
8350 | typecode: container_type_2); |
8351 | } |
8352 | ra_append(ra: &answer->high_low_container, s: s2, c: c2, typecode: container_type_2); |
8353 | pos2++; |
8354 | if (pos2 == length2) break; |
8355 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8356 | } |
8357 | } |
8358 | if (pos1 == length1) { |
8359 | ra_append_copy_range(ra: &answer->high_low_container, |
8360 | sa: &x2->high_low_container, start_index: pos2, end_index: length2, |
8361 | copy_on_write: is_cow(r: x2)); |
8362 | } else if (pos2 == length2) { |
8363 | ra_append_copy_range(ra: &answer->high_low_container, |
8364 | sa: &x1->high_low_container, start_index: pos1, end_index: length1, |
8365 | copy_on_write: is_cow(r: x1)); |
8366 | } |
8367 | return answer; |
8368 | } |
8369 | |
8370 | // inplace xor (modifies its first argument). |
8371 | |
8372 | void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, |
8373 | const roaring_bitmap_t *x2) { |
8374 | assert(x1 != x2); |
8375 | uint8_t container_result_type = 0; |
8376 | int length1 = x1->high_low_container.size; |
8377 | const int length2 = x2->high_low_container.size; |
8378 | |
8379 | if (0 == length2) return; |
8380 | |
8381 | if (0 == length1) { |
8382 | roaring_bitmap_overwrite(dest: x1, src: x2); |
8383 | return; |
8384 | } |
8385 | |
8386 | // XOR can have new containers inserted from x2, but can also |
8387 | // lose containers when x1 and x2 are nonempty and identical. |
8388 | |
8389 | int pos1 = 0, pos2 = 0; |
8390 | uint8_t container_type_1, container_type_2; |
8391 | uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8392 | uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8393 | while (true) { |
8394 | if (s1 == s2) { |
8395 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
8396 | typecode: &container_type_1); |
8397 | c1 = get_writable_copy_if_shared(candidate_shared_container: c1, type: &container_type_1); |
8398 | |
8399 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
8400 | typecode: &container_type_2); |
8401 | void *c = container_ixor(c1, type1: container_type_1, c2, type2: container_type_2, |
8402 | result_type: &container_result_type); |
8403 | |
8404 | if (container_nonzero_cardinality(container: c, typecode: container_result_type)) { |
8405 | ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c, |
8406 | typecode: container_result_type); |
8407 | ++pos1; |
8408 | } else { |
8409 | container_free(container: c, typecode: container_result_type); |
8410 | ra_remove_at_index(ra: &x1->high_low_container, i: pos1); |
8411 | --length1; |
8412 | } |
8413 | |
8414 | ++pos2; |
8415 | if (pos1 == length1) break; |
8416 | if (pos2 == length2) break; |
8417 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8418 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8419 | |
8420 | } else if (s1 < s2) { // s1 < s2 |
8421 | pos1++; |
8422 | if (pos1 == length1) break; |
8423 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8424 | |
8425 | } else { // s1 > s2 |
8426 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
8427 | typecode: &container_type_2); |
8428 | c2 = |
8429 | get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2)); |
8430 | if (is_cow(r: x2)) { |
8431 | ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2, |
8432 | typecode: container_type_2); |
8433 | } |
8434 | |
8435 | ra_insert_new_key_value_at(ra: &x1->high_low_container, i: pos1, key: s2, container: c2, |
8436 | typecode: container_type_2); |
8437 | pos1++; |
8438 | length1++; |
8439 | pos2++; |
8440 | if (pos2 == length2) break; |
8441 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8442 | } |
8443 | } |
8444 | if (pos1 == length1) { |
8445 | ra_append_copy_range(ra: &x1->high_low_container, sa: &x2->high_low_container, |
8446 | start_index: pos2, end_index: length2, copy_on_write: is_cow(r: x2)); |
8447 | } |
8448 | } |
8449 | |
8450 | roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, |
8451 | const roaring_bitmap_t *x2) { |
8452 | uint8_t container_result_type = 0; |
8453 | const int length1 = x1->high_low_container.size, |
8454 | length2 = x2->high_low_container.size; |
8455 | if (0 == length1) { |
8456 | roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); |
8457 | roaring_bitmap_set_copy_on_write(r: empty_bitmap, cow: is_cow(r: x1) && is_cow(r: x2)); |
8458 | return empty_bitmap; |
8459 | } |
8460 | if (0 == length2) { |
8461 | return roaring_bitmap_copy(r: x1); |
8462 | } |
8463 | roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(cap: length1); |
8464 | roaring_bitmap_set_copy_on_write(r: answer, cow: is_cow(r: x1) && is_cow(r: x2)); |
8465 | |
8466 | int pos1 = 0, pos2 = 0; |
8467 | uint8_t container_type_1, container_type_2; |
8468 | uint16_t s1 = 0; |
8469 | uint16_t s2 = 0; |
8470 | while (true) { |
8471 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8472 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8473 | |
8474 | if (s1 == s2) { |
8475 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
8476 | typecode: &container_type_1); |
8477 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
8478 | typecode: &container_type_2); |
8479 | void *c = |
8480 | container_andnot(c1, type1: container_type_1, c2, type2: container_type_2, |
8481 | result_type: &container_result_type); |
8482 | |
8483 | if (container_nonzero_cardinality(container: c, typecode: container_result_type)) { |
8484 | ra_append(ra: &answer->high_low_container, s: s1, c, |
8485 | typecode: container_result_type); |
8486 | } else { |
8487 | container_free(container: c, typecode: container_result_type); |
8488 | } |
8489 | ++pos1; |
8490 | ++pos2; |
8491 | if (pos1 == length1) break; |
8492 | if (pos2 == length2) break; |
8493 | } else if (s1 < s2) { // s1 < s2 |
8494 | const int next_pos1 = |
8495 | ra_advance_until(ra: &x1->high_low_container, x: s2, pos: pos1); |
8496 | ra_append_copy_range(ra: &answer->high_low_container, |
8497 | sa: &x1->high_low_container, start_index: pos1, end_index: next_pos1, |
8498 | copy_on_write: is_cow(r: x1)); |
8499 | // TODO : perhaps some of the copy_on_write should be based on |
8500 | // answer rather than x1 (more stringent?). Many similar cases |
8501 | pos1 = next_pos1; |
8502 | if (pos1 == length1) break; |
8503 | } else { // s1 > s2 |
8504 | pos2 = ra_advance_until(ra: &x2->high_low_container, x: s1, pos: pos2); |
8505 | if (pos2 == length2) break; |
8506 | } |
8507 | } |
8508 | if (pos2 == length2) { |
8509 | ra_append_copy_range(ra: &answer->high_low_container, |
8510 | sa: &x1->high_low_container, start_index: pos1, end_index: length1, |
8511 | copy_on_write: is_cow(r: x1)); |
8512 | } |
8513 | return answer; |
8514 | } |
8515 | |
8516 | // inplace andnot (modifies its first argument). |
8517 | |
8518 | void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, |
8519 | const roaring_bitmap_t *x2) { |
8520 | assert(x1 != x2); |
8521 | |
8522 | uint8_t container_result_type = 0; |
8523 | int length1 = x1->high_low_container.size; |
8524 | const int length2 = x2->high_low_container.size; |
8525 | int intersection_size = 0; |
8526 | |
8527 | if (0 == length2) return; |
8528 | |
8529 | if (0 == length1) { |
8530 | roaring_bitmap_clear(r: x1); |
8531 | return; |
8532 | } |
8533 | |
8534 | int pos1 = 0, pos2 = 0; |
8535 | uint8_t container_type_1, container_type_2; |
8536 | uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8537 | uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8538 | while (true) { |
8539 | if (s1 == s2) { |
8540 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
8541 | typecode: &container_type_1); |
8542 | c1 = get_writable_copy_if_shared(candidate_shared_container: c1, type: &container_type_1); |
8543 | |
8544 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
8545 | typecode: &container_type_2); |
8546 | void *c = |
8547 | container_iandnot(c1, type1: container_type_1, c2, type2: container_type_2, |
8548 | result_type: &container_result_type); |
8549 | |
8550 | if (container_nonzero_cardinality(container: c, typecode: container_result_type)) { |
8551 | ra_replace_key_and_container_at_index(ra: &x1->high_low_container, |
8552 | i: intersection_size++, key: s1, |
8553 | c, typecode: container_result_type); |
8554 | } else { |
8555 | container_free(container: c, typecode: container_result_type); |
8556 | } |
8557 | |
8558 | ++pos1; |
8559 | ++pos2; |
8560 | if (pos1 == length1) break; |
8561 | if (pos2 == length2) break; |
8562 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8563 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8564 | |
8565 | } else if (s1 < s2) { // s1 < s2 |
8566 | if (pos1 != intersection_size) { |
8567 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, |
8568 | i: pos1, typecode: &container_type_1); |
8569 | |
8570 | ra_replace_key_and_container_at_index(ra: &x1->high_low_container, |
8571 | i: intersection_size, key: s1, c: c1, |
8572 | typecode: container_type_1); |
8573 | } |
8574 | intersection_size++; |
8575 | pos1++; |
8576 | if (pos1 == length1) break; |
8577 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
8578 | |
8579 | } else { // s1 > s2 |
8580 | pos2 = ra_advance_until(ra: &x2->high_low_container, x: s1, pos: pos2); |
8581 | if (pos2 == length2) break; |
8582 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
8583 | } |
8584 | } |
8585 | |
8586 | if (pos1 < length1) { |
8587 | // all containers between intersection_size and |
8588 | // pos1 are junk. However, they have either been moved |
8589 | // (thus still referenced) or involved in an iandnot |
8590 | // that will clean up all containers that could not be reused. |
8591 | // Thus we should not free the junk containers between |
8592 | // intersection_size and pos1. |
8593 | if (pos1 > intersection_size) { |
8594 | // left slide of remaining items |
8595 | ra_copy_range(ra: &x1->high_low_container, begin: pos1, end: length1, |
8596 | new_begin: intersection_size); |
8597 | } |
8598 | // else current placement is fine |
8599 | intersection_size += (length1 - pos1); |
8600 | } |
8601 | ra_downsize(ra: &x1->high_low_container, new_length: intersection_size); |
8602 | } |
8603 | |
8604 | uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra) { |
8605 | uint64_t card = 0; |
8606 | for (int i = 0; i < ra->high_low_container.size; ++i) |
8607 | card += container_get_cardinality(container: ra->high_low_container.containers[i], |
8608 | typecode: ra->high_low_container.typecodes[i]); |
8609 | return card; |
8610 | } |
8611 | |
8612 | uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra, |
8613 | uint64_t range_start, |
8614 | uint64_t range_end) { |
8615 | if (range_end > UINT32_MAX) { |
8616 | range_end = UINT32_MAX + UINT64_C(1); |
8617 | } |
8618 | if (range_start >= range_end) { |
8619 | return 0; |
8620 | } |
8621 | range_end--; // make range_end inclusive |
8622 | // now we have: 0 <= range_start <= range_end <= UINT32_MAX |
8623 | |
8624 | uint16_t minhb = range_start >> 16; |
8625 | uint16_t maxhb = range_end >> 16; |
8626 | |
8627 | uint64_t card = 0; |
8628 | |
8629 | int i = ra_get_index(ra: &ra->high_low_container, x: minhb); |
8630 | if (i >= 0) { |
8631 | if (minhb == maxhb) { |
8632 | card += container_rank(container: ra->high_low_container.containers[i], |
8633 | typecode: ra->high_low_container.typecodes[i], |
8634 | x: range_end & 0xffff); |
8635 | } else { |
8636 | card += container_get_cardinality(container: ra->high_low_container.containers[i], |
8637 | typecode: ra->high_low_container.typecodes[i]); |
8638 | } |
8639 | if ((range_start & 0xffff) != 0) { |
8640 | card -= container_rank(container: ra->high_low_container.containers[i], |
8641 | typecode: ra->high_low_container.typecodes[i], |
8642 | x: (range_start & 0xffff) - 1); |
8643 | } |
8644 | i++; |
8645 | } else { |
8646 | i = -i - 1; |
8647 | } |
8648 | |
8649 | for (; i < ra->high_low_container.size; i++) { |
8650 | uint16_t key = ra->high_low_container.keys[i]; |
8651 | if (key < maxhb) { |
8652 | card += container_get_cardinality(container: ra->high_low_container.containers[i], |
8653 | typecode: ra->high_low_container.typecodes[i]); |
8654 | } else if (key == maxhb) { |
8655 | card += container_rank(container: ra->high_low_container.containers[i], |
8656 | typecode: ra->high_low_container.typecodes[i], |
8657 | x: range_end & 0xffff); |
8658 | break; |
8659 | } else { |
8660 | break; |
8661 | } |
8662 | } |
8663 | |
8664 | return card; |
8665 | } |
8666 | |
8667 | |
8668 | bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra) { |
8669 | return ra->high_low_container.size == 0; |
8670 | } |
8671 | |
8672 | void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans) { |
8673 | ra_to_uint32_array(ra: &ra->high_low_container, ans); |
8674 | } |
8675 | |
8676 | bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans) { |
8677 | return ra_range_uint32_array(ra: &ra->high_low_container, offset, limit, ans); |
8678 | } |
8679 | |
8680 | /** convert array and bitmap containers to run containers when it is more |
8681 | * efficient; |
8682 | * also convert from run containers when more space efficient. Returns |
8683 | * true if the result has at least one run container. |
8684 | */ |
8685 | bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { |
8686 | bool answer = false; |
8687 | for (int i = 0; i < r->high_low_container.size; i++) { |
8688 | uint8_t typecode_original, typecode_after; |
8689 | ra_unshare_container_at_index( |
8690 | ra: &r->high_low_container, i); // TODO: this introduces extra cloning! |
8691 | void *c = ra_get_container_at_index(ra: &r->high_low_container, i, |
8692 | typecode: &typecode_original); |
8693 | void *c1 = convert_run_optimize(c, typecode_original, typecode_after: &typecode_after); |
8694 | if (typecode_after == RUN_CONTAINER_TYPE_CODE) answer = true; |
8695 | ra_set_container_at_index(ra: &r->high_low_container, i, c: c1, |
8696 | typecode: typecode_after); |
8697 | } |
8698 | return answer; |
8699 | } |
8700 | |
8701 | size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { |
8702 | size_t answer = 0; |
8703 | for (int i = 0; i < r->high_low_container.size; i++) { |
8704 | uint8_t typecode_original; |
8705 | void *c = ra_get_container_at_index(ra: &r->high_low_container, i, |
8706 | typecode: &typecode_original); |
8707 | answer += container_shrink_to_fit(container: c, typecode: typecode_original); |
8708 | } |
8709 | answer += ra_shrink_to_fit(ra: &r->high_low_container); |
8710 | return answer; |
8711 | } |
8712 | |
8713 | /** |
8714 | * Remove run-length encoding even when it is more space efficient |
8715 | * return whether a change was applied |
8716 | */ |
8717 | bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { |
8718 | bool answer = false; |
8719 | for (int i = 0; i < r->high_low_container.size; i++) { |
8720 | uint8_t typecode_original, typecode_after; |
8721 | void *c = ra_get_container_at_index(ra: &r->high_low_container, i, |
8722 | typecode: &typecode_original); |
8723 | if (get_container_type(container: c, type: typecode_original) == |
8724 | RUN_CONTAINER_TYPE_CODE) { |
8725 | answer = true; |
8726 | if (typecode_original == SHARED_CONTAINER_TYPE_CODE) { |
8727 | run_container_t *truec = |
8728 | (run_container_t *)((shared_container_t *)c)->container; |
8729 | int32_t card = run_container_cardinality(run: truec); |
8730 | void *c1 = convert_to_bitset_or_array_container( |
8731 | r: truec, card, resulttype: &typecode_after); |
8732 | shared_container_free(container: (shared_container_t *)c);// will free the run container as needed |
8733 | ra_set_container_at_index(ra: &r->high_low_container, i, c: c1, |
8734 | typecode: typecode_after); |
8735 | |
8736 | } else { |
8737 | int32_t card = run_container_cardinality(run: (run_container_t *)c); |
8738 | void *c1 = convert_to_bitset_or_array_container( |
8739 | r: (run_container_t *)c, card, resulttype: &typecode_after); |
8740 | run_container_free(run: (run_container_t *)c); |
8741 | ra_set_container_at_index(ra: &r->high_low_container, i, c: c1, |
8742 | typecode: typecode_after); |
8743 | } |
8744 | } |
8745 | } |
8746 | return answer; |
8747 | } |
8748 | |
8749 | size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf) { |
8750 | size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); |
8751 | uint64_t cardinality = roaring_bitmap_get_cardinality(ra); |
8752 | uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); |
8753 | if (portablesize < sizeasarray) { |
8754 | buf[0] = SERIALIZATION_CONTAINER; |
8755 | return roaring_bitmap_portable_serialize(ra, buf: buf + 1) + 1; |
8756 | } else { |
8757 | buf[0] = SERIALIZATION_ARRAY_UINT32; |
8758 | memcpy(dest: buf + 1, src: &cardinality, n: sizeof(uint32_t)); |
8759 | roaring_bitmap_to_uint32_array( |
8760 | ra, ans: (uint32_t *)(buf + 1 + sizeof(uint32_t))); |
8761 | return 1 + (size_t)sizeasarray; |
8762 | } |
8763 | } |
8764 | |
8765 | size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra) { |
8766 | size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); |
8767 | uint64_t sizeasarray = roaring_bitmap_get_cardinality(ra) * sizeof(uint32_t) + |
8768 | sizeof(uint32_t); |
8769 | return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1; |
8770 | } |
8771 | |
8772 | size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra) { |
8773 | return ra_portable_size_in_bytes(ra: &ra->high_low_container); |
8774 | } |
8775 | |
8776 | |
8777 | roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) { |
8778 | roaring_bitmap_t *ans = |
8779 | (roaring_bitmap_t *)malloc(size: sizeof(roaring_bitmap_t)); |
8780 | if (ans == NULL) { |
8781 | return NULL; |
8782 | } |
8783 | size_t bytesread; |
8784 | bool is_ok = ra_portable_deserialize(ra: &ans->high_low_container, buf, maxbytes, readbytes: &bytesread); |
8785 | if(is_ok) assert(bytesread <= maxbytes); |
8786 | roaring_bitmap_set_copy_on_write(r: ans, false); |
8787 | if (!is_ok) { |
8788 | free(ptr: ans); |
8789 | return NULL; |
8790 | } |
8791 | return ans; |
8792 | } |
8793 | |
8794 | roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { |
8795 | return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); |
8796 | } |
8797 | |
8798 | |
8799 | size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) { |
8800 | return ra_portable_deserialize_size(buf, maxbytes); |
8801 | } |
8802 | |
8803 | |
8804 | size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, |
8805 | char *buf) { |
8806 | return ra_portable_serialize(ra: &ra->high_low_container, buf); |
8807 | } |
8808 | |
8809 | roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { |
8810 | const char *bufaschar = (const char *)buf; |
8811 | if (*(const unsigned char *)buf == SERIALIZATION_ARRAY_UINT32) { |
8812 | /* This looks like a compressed set of uint32_t elements */ |
8813 | uint32_t card; |
8814 | memcpy(dest: &card, src: bufaschar + 1, n: sizeof(uint32_t)); |
8815 | const uint32_t *elems = |
8816 | (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); |
8817 | |
8818 | return roaring_bitmap_of_ptr(n_args: card, vals: elems); |
8819 | } else if (bufaschar[0] == SERIALIZATION_CONTAINER) { |
8820 | return roaring_bitmap_portable_deserialize(buf: bufaschar + 1); |
8821 | } else |
8822 | return (NULL); |
8823 | } |
8824 | |
8825 | bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator, |
8826 | void *ptr) { |
8827 | for (int i = 0; i < ra->high_low_container.size; ++i) |
8828 | if (!container_iterate(container: ra->high_low_container.containers[i], |
8829 | typecode: ra->high_low_container.typecodes[i], |
8830 | base: ((uint32_t)ra->high_low_container.keys[i]) << 16, |
8831 | iterator, ptr)) { |
8832 | return false; |
8833 | } |
8834 | return true; |
8835 | } |
8836 | |
8837 | bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator, |
8838 | uint64_t high_bits, void *ptr) { |
8839 | for (int i = 0; i < ra->high_low_container.size; ++i) |
8840 | if (!container_iterate64( |
8841 | container: ra->high_low_container.containers[i], |
8842 | typecode: ra->high_low_container.typecodes[i], |
8843 | base: ((uint32_t)ra->high_low_container.keys[i]) << 16, iterator, |
8844 | high_bits, ptr)) { |
8845 | return false; |
8846 | } |
8847 | return true; |
8848 | } |
8849 | |
8850 | /**** |
8851 | * begin roaring_uint32_iterator_t |
8852 | *****/ |
8853 | |
8854 | // Partially initializes the roaring iterator when it begins looking at |
8855 | // a new container. |
8856 | static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) { |
8857 | newit->in_container_index = 0; |
8858 | newit->run_index = 0; |
8859 | newit->current_value = 0; |
8860 | if (newit->container_index >= newit->parent->high_low_container.size || |
8861 | newit->container_index < 0) { |
8862 | newit->current_value = UINT32_MAX; |
8863 | return (newit->has_value = false); |
8864 | } |
8865 | // assume not empty |
8866 | newit->has_value = true; |
8867 | // we precompute container, typecode and highbits so that successive |
8868 | // iterators do not have to grab them from odd memory locations |
8869 | // and have to worry about the (easily predicted) container_unwrap_shared |
8870 | // call. |
8871 | newit->container = |
8872 | newit->parent->high_low_container.containers[newit->container_index]; |
8873 | newit->typecode = |
8874 | newit->parent->high_low_container.typecodes[newit->container_index]; |
8875 | newit->highbits = |
8876 | ((uint32_t) |
8877 | newit->parent->high_low_container.keys[newit->container_index]) |
8878 | << 16; |
8879 | newit->container = |
8880 | container_unwrap_shared(candidate_shared_container: newit->container, type: &(newit->typecode)); |
8881 | return newit->has_value; |
8882 | } |
8883 | |
8884 | static bool loadfirstvalue(roaring_uint32_iterator_t *newit) { |
8885 | if (!iter_new_container_partial_init(newit)) |
8886 | return newit->has_value; |
8887 | |
8888 | uint32_t wordindex; |
8889 | uint64_t word; // used for bitsets |
8890 | switch (newit->typecode) { |
8891 | case BITSET_CONTAINER_TYPE_CODE: |
8892 | wordindex = 0; |
8893 | while ((word = ((const bitset_container_t *)(newit->container)) |
8894 | ->array[wordindex]) == 0) |
8895 | wordindex++; // advance |
8896 | // here "word" is non-zero |
8897 | newit->in_container_index = wordindex * 64 + __builtin_ctzll(word); |
8898 | newit->current_value = newit->highbits | newit->in_container_index; |
8899 | break; |
8900 | case ARRAY_CONTAINER_TYPE_CODE: |
8901 | newit->current_value = |
8902 | newit->highbits | |
8903 | ((const array_container_t *)(newit->container))->array[0]; |
8904 | break; |
8905 | case RUN_CONTAINER_TYPE_CODE: |
8906 | newit->current_value = |
8907 | newit->highbits | |
8908 | (((const run_container_t *)(newit->container))->runs[0].value); |
8909 | break; |
8910 | default: |
8911 | // if this ever happens, bug! |
8912 | assert(false); |
8913 | } // switch (typecode) |
8914 | return true; |
8915 | } |
8916 | |
8917 | static bool loadlastvalue(roaring_uint32_iterator_t* newit) { |
8918 | if (!iter_new_container_partial_init(newit)) |
8919 | return newit->has_value; |
8920 | |
8921 | switch(newit->typecode) { |
8922 | case BITSET_CONTAINER_TYPE_CODE: { |
8923 | uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1; |
8924 | uint64_t word; |
8925 | const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container; |
8926 | while ((word = bitset_container->array[wordindex]) == 0) |
8927 | --wordindex; |
8928 | |
8929 | int num_leading_zeros = __builtin_clzll(word); |
8930 | newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); |
8931 | newit->current_value = newit->highbits | newit->in_container_index; |
8932 | break; |
8933 | } |
8934 | case ARRAY_CONTAINER_TYPE_CODE: { |
8935 | const array_container_t* array_container = (const array_container_t*)newit->container; |
8936 | newit->in_container_index = array_container->cardinality - 1; |
8937 | newit->current_value = newit->highbits | array_container->array[newit->in_container_index]; |
8938 | break; |
8939 | } |
8940 | case RUN_CONTAINER_TYPE_CODE: { |
8941 | const run_container_t* run_container = (const run_container_t*)newit->container; |
8942 | newit->run_index = run_container->n_runs - 1; |
8943 | const rle16_t* last_run = &run_container->runs[newit->run_index]; |
8944 | newit->current_value = newit->highbits | (last_run->value + last_run->length); |
8945 | break; |
8946 | } |
8947 | default: |
8948 | // if this ever happens, bug! |
8949 | assert(false); |
8950 | } |
8951 | return true; |
8952 | } |
8953 | |
8954 | // prerequesite: the value should be in range of the container |
8955 | static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) { |
8956 | // Don't have to check return value because of prerequisite |
8957 | iter_new_container_partial_init(newit); |
8958 | uint16_t lb = val & 0xFFFF; |
8959 | |
8960 | switch (newit->typecode) { |
8961 | case BITSET_CONTAINER_TYPE_CODE: |
8962 | newit->in_container_index = bitset_container_index_equalorlarger(container: (const bitset_container_t *)(newit->container), x: lb); |
8963 | newit->current_value = newit->highbits | newit->in_container_index; |
8964 | break; |
8965 | case ARRAY_CONTAINER_TYPE_CODE: |
8966 | newit->in_container_index = array_container_index_equalorlarger(arr: (const array_container_t *)(newit->container), x: lb); |
8967 | newit->current_value = |
8968 | newit->highbits | |
8969 | ((const array_container_t *)(newit->container))->array[newit->in_container_index]; |
8970 | break; |
8971 | case RUN_CONTAINER_TYPE_CODE: |
8972 | newit->run_index = run_container_index_equalorlarger(arr: (const run_container_t *)(newit->container), x: lb); |
8973 | if(((const run_container_t *)(newit->container))->runs[newit->run_index].value <= lb) { |
8974 | newit->current_value = val; |
8975 | } else { |
8976 | newit->current_value = |
8977 | newit->highbits | |
8978 | (((const run_container_t *)(newit->container))->runs[newit->run_index].value); |
8979 | } |
8980 | break; |
8981 | default: |
8982 | // if this ever happens, bug! |
8983 | assert(false); |
8984 | } // switch (typecode) |
8985 | return true; |
8986 | } |
8987 | |
8988 | void roaring_init_iterator(const roaring_bitmap_t *ra, |
8989 | roaring_uint32_iterator_t *newit) { |
8990 | newit->parent = ra; |
8991 | newit->container_index = 0; |
8992 | newit->has_value = loadfirstvalue(newit); |
8993 | } |
8994 | |
8995 | void roaring_init_iterator_last(const roaring_bitmap_t *ra, |
8996 | roaring_uint32_iterator_t *newit) { |
8997 | newit->parent = ra; |
8998 | newit->container_index = newit->parent->high_low_container.size - 1; |
8999 | newit->has_value = loadlastvalue(newit); |
9000 | } |
9001 | |
9002 | roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra) { |
9003 | roaring_uint32_iterator_t *newit = |
9004 | (roaring_uint32_iterator_t *)malloc(size: sizeof(roaring_uint32_iterator_t)); |
9005 | if (newit == NULL) return NULL; |
9006 | roaring_init_iterator(ra, newit); |
9007 | return newit; |
9008 | } |
9009 | |
9010 | roaring_uint32_iterator_t *roaring_copy_uint32_iterator( |
9011 | const roaring_uint32_iterator_t *it) { |
9012 | roaring_uint32_iterator_t *newit = |
9013 | (roaring_uint32_iterator_t *)malloc(size: sizeof(roaring_uint32_iterator_t)); |
9014 | memcpy(dest: newit, src: it, n: sizeof(roaring_uint32_iterator_t)); |
9015 | return newit; |
9016 | } |
9017 | |
9018 | bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) { |
9019 | uint16_t hb = val >> 16; |
9020 | const int i = ra_get_index(ra: & it->parent->high_low_container, x: hb); |
9021 | if (i >= 0) { |
9022 | uint32_t lowvalue = container_maximum(container: it->parent->high_low_container.containers[i], typecode: it->parent->high_low_container.typecodes[i]); |
9023 | uint16_t lb = val & 0xFFFF; |
9024 | if(lowvalue < lb ) { |
9025 | it->container_index = i+1; // will have to load first value of next container |
9026 | } else {// the value is necessarily within the range of the container |
9027 | it->container_index = i; |
9028 | it->has_value = loadfirstvalue_largeorequal(newit: it, val); |
9029 | return it->has_value; |
9030 | } |
9031 | } else { |
9032 | // there is no matching, so we are going for the next container |
9033 | it->container_index = -i-1; |
9034 | } |
9035 | it->has_value = loadfirstvalue(newit: it); |
9036 | return it->has_value; |
9037 | } |
9038 | |
9039 | |
9040 | bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) { |
9041 | if (it->container_index >= it->parent->high_low_container.size) { |
9042 | return (it->has_value = false); |
9043 | } |
9044 | if (it->container_index < 0) { |
9045 | it->container_index = 0; |
9046 | return (it->has_value = loadfirstvalue(newit: it)); |
9047 | } |
9048 | |
9049 | uint32_t wordindex; // used for bitsets |
9050 | uint64_t word; // used for bitsets |
9051 | switch (it->typecode) { |
9052 | case BITSET_CONTAINER_TYPE_CODE: |
9053 | it->in_container_index++; |
9054 | wordindex = it->in_container_index / 64; |
9055 | if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break; |
9056 | word = ((const bitset_container_t *)(it->container)) |
9057 | ->array[wordindex] & |
9058 | (UINT64_MAX << (it->in_container_index % 64)); |
9059 | // next part could be optimized/simplified |
9060 | while ((word == 0) && |
9061 | (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) { |
9062 | wordindex++; |
9063 | word = ((const bitset_container_t *)(it->container)) |
9064 | ->array[wordindex]; |
9065 | } |
9066 | if (word != 0) { |
9067 | it->in_container_index = wordindex * 64 + __builtin_ctzll(word); |
9068 | it->current_value = it->highbits | it->in_container_index; |
9069 | return (it->has_value = true); |
9070 | } |
9071 | break; |
9072 | case ARRAY_CONTAINER_TYPE_CODE: |
9073 | it->in_container_index++; |
9074 | if (it->in_container_index < |
9075 | ((const array_container_t *)(it->container))->cardinality) { |
9076 | it->current_value = it->highbits | |
9077 | ((const array_container_t *)(it->container)) |
9078 | ->array[it->in_container_index]; |
9079 | return (it->has_value = true); |
9080 | } |
9081 | break; |
9082 | case RUN_CONTAINER_TYPE_CODE: { |
9083 | if(it->current_value == UINT32_MAX) { |
9084 | return (it->has_value = false); // without this, we risk an overflow to zero |
9085 | } |
9086 | |
9087 | const run_container_t* run_container = (const run_container_t*)it->container; |
9088 | if (++it->current_value <= (it->highbits | (run_container->runs[it->run_index].value + |
9089 | run_container->runs[it->run_index].length))) { |
9090 | return (it->has_value = true); |
9091 | } |
9092 | |
9093 | if (++it->run_index < run_container->n_runs) { |
9094 | // Assume the run has a value |
9095 | it->current_value = it->highbits | run_container->runs[it->run_index].value; |
9096 | return (it->has_value = true); |
9097 | } |
9098 | break; |
9099 | } |
9100 | default: |
9101 | // if this ever happens, bug! |
9102 | assert(false); |
9103 | } // switch (typecode) |
9104 | // moving to next container |
9105 | it->container_index++; |
9106 | return (it->has_value = loadfirstvalue(newit: it)); |
9107 | } |
9108 | |
9109 | bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) { |
9110 | if (it->container_index < 0) { |
9111 | return (it->has_value = false); |
9112 | } |
9113 | if (it->container_index >= it->parent->high_low_container.size) { |
9114 | it->container_index = it->parent->high_low_container.size - 1; |
9115 | return (it->has_value = loadlastvalue(newit: it)); |
9116 | } |
9117 | |
9118 | switch (it->typecode) { |
9119 | case BITSET_CONTAINER_TYPE_CODE: { |
9120 | if (--it->in_container_index < 0) |
9121 | break; |
9122 | |
9123 | const bitset_container_t* bitset_container = (const bitset_container_t*)it->container; |
9124 | int32_t wordindex = it->in_container_index / 64; |
9125 | uint64_t word = bitset_container->array[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64))); |
9126 | |
9127 | while (word == 0 && --wordindex >= 0) { |
9128 | word = bitset_container->array[wordindex]; |
9129 | } |
9130 | if (word == 0) |
9131 | break; |
9132 | |
9133 | int num_leading_zeros = __builtin_clzll(word); |
9134 | it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); |
9135 | it->current_value = it->highbits | it->in_container_index; |
9136 | return (it->has_value = true); |
9137 | } |
9138 | case ARRAY_CONTAINER_TYPE_CODE: { |
9139 | if (--it->in_container_index < 0) |
9140 | break; |
9141 | |
9142 | const array_container_t* array_container = (const array_container_t*)it->container; |
9143 | it->current_value = it->highbits | array_container->array[it->in_container_index]; |
9144 | return (it->has_value = true); |
9145 | } |
9146 | case RUN_CONTAINER_TYPE_CODE: { |
9147 | if(it->current_value == 0) |
9148 | return (it->has_value = false); |
9149 | |
9150 | const run_container_t* run_container = (const run_container_t*)it->container; |
9151 | if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) { |
9152 | return (it->has_value = true); |
9153 | } |
9154 | |
9155 | if (--it->run_index < 0) |
9156 | break; |
9157 | |
9158 | it->current_value = it->highbits | (run_container->runs[it->run_index].value + |
9159 | run_container->runs[it->run_index].length); |
9160 | return (it->has_value = true); |
9161 | } |
9162 | default: |
9163 | // if this ever happens, bug! |
9164 | assert(false); |
9165 | } // switch (typecode) |
9166 | |
9167 | // moving to previous container |
9168 | it->container_index--; |
9169 | return (it->has_value = loadlastvalue(newit: it)); |
9170 | } |
9171 | |
9172 | uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) { |
9173 | uint32_t ret = 0; |
9174 | uint32_t num_values; |
9175 | uint32_t wordindex; // used for bitsets |
9176 | uint64_t word; // used for bitsets |
9177 | const array_container_t* acont; //TODO remove |
9178 | const run_container_t* rcont; //TODO remove |
9179 | const bitset_container_t* bcont; //TODO remove |
9180 | |
9181 | while (it->has_value && ret < count) { |
9182 | switch (it->typecode) { |
9183 | case BITSET_CONTAINER_TYPE_CODE: |
9184 | bcont = (const bitset_container_t*)(it->container); |
9185 | wordindex = it->in_container_index / 64; |
9186 | word = bcont->array[wordindex] & (UINT64_MAX << (it->in_container_index % 64)); |
9187 | do { |
9188 | while (word != 0 && ret < count) { |
9189 | buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word)); |
9190 | word = word & (word - 1); |
9191 | buf++; |
9192 | ret++; |
9193 | } |
9194 | while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) { |
9195 | wordindex++; |
9196 | word = bcont->array[wordindex]; |
9197 | } |
9198 | } while (word != 0 && ret < count); |
9199 | it->has_value = (word != 0); |
9200 | if (it->has_value) { |
9201 | it->in_container_index = wordindex * 64 + __builtin_ctzll(word); |
9202 | it->current_value = it->highbits | it->in_container_index; |
9203 | } |
9204 | break; |
9205 | case ARRAY_CONTAINER_TYPE_CODE: |
9206 | acont = (const array_container_t *)(it->container); |
9207 | num_values = minimum_uint32(a: acont->cardinality - it->in_container_index, b: count - ret); |
9208 | for (uint32_t i = 0; i < num_values; i++) { |
9209 | buf[i] = it->highbits | acont->array[it->in_container_index + i]; |
9210 | } |
9211 | buf += num_values; |
9212 | ret += num_values; |
9213 | it->in_container_index += num_values; |
9214 | it->has_value = (it->in_container_index < acont->cardinality); |
9215 | if (it->has_value) { |
9216 | it->current_value = it->highbits | acont->array[it->in_container_index]; |
9217 | } |
9218 | break; |
9219 | case RUN_CONTAINER_TYPE_CODE: |
9220 | rcont = (const run_container_t*)(it->container); |
9221 | //"in_run_index" name is misleading, read it as "max_value_in_current_run" |
9222 | do { |
9223 | uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length); |
9224 | num_values = minimum_uint32(a: largest_run_value - it->current_value + 1, b: count - ret); |
9225 | for (uint32_t i = 0; i < num_values; i++) { |
9226 | buf[i] = it->current_value + i; |
9227 | } |
9228 | it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0 |
9229 | buf += num_values; |
9230 | ret += num_values; |
9231 | |
9232 | if (it->current_value > largest_run_value || it->current_value == 0) { |
9233 | it->run_index++; |
9234 | if (it->run_index < rcont->n_runs) { |
9235 | it->current_value = it->highbits | rcont->runs[it->run_index].value; |
9236 | } else { |
9237 | it->has_value = false; |
9238 | } |
9239 | } |
9240 | } while ((ret < count) && it->has_value); |
9241 | break; |
9242 | default: |
9243 | assert(false); |
9244 | } |
9245 | if (it->has_value) { |
9246 | assert(ret == count); |
9247 | return ret; |
9248 | } |
9249 | it->container_index++; |
9250 | it->has_value = loadfirstvalue(newit: it); |
9251 | } |
9252 | return ret; |
9253 | } |
9254 | |
9255 | |
9256 | |
9257 | void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { free(ptr: it); } |
9258 | |
9259 | /**** |
9260 | * end of roaring_uint32_iterator_t |
9261 | *****/ |
9262 | |
9263 | bool roaring_bitmap_equals(const roaring_bitmap_t *ra1, |
9264 | const roaring_bitmap_t *ra2) { |
9265 | if (ra1->high_low_container.size != ra2->high_low_container.size) { |
9266 | return false; |
9267 | } |
9268 | for (int i = 0; i < ra1->high_low_container.size; ++i) { |
9269 | if (ra1->high_low_container.keys[i] != |
9270 | ra2->high_low_container.keys[i]) { |
9271 | return false; |
9272 | } |
9273 | } |
9274 | for (int i = 0; i < ra1->high_low_container.size; ++i) { |
9275 | bool areequal = container_equals(c1: ra1->high_low_container.containers[i], |
9276 | type1: ra1->high_low_container.typecodes[i], |
9277 | c2: ra2->high_low_container.containers[i], |
9278 | type2: ra2->high_low_container.typecodes[i]); |
9279 | if (!areequal) { |
9280 | return false; |
9281 | } |
9282 | } |
9283 | return true; |
9284 | } |
9285 | |
9286 | bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1, |
9287 | const roaring_bitmap_t *ra2) { |
9288 | const int length1 = ra1->high_low_container.size, |
9289 | length2 = ra2->high_low_container.size; |
9290 | |
9291 | int pos1 = 0, pos2 = 0; |
9292 | |
9293 | while (pos1 < length1 && pos2 < length2) { |
9294 | const uint16_t s1 = ra_get_key_at_index(ra: &ra1->high_low_container, i: pos1); |
9295 | const uint16_t s2 = ra_get_key_at_index(ra: &ra2->high_low_container, i: pos2); |
9296 | |
9297 | if (s1 == s2) { |
9298 | uint8_t container_type_1, container_type_2; |
9299 | void *c1 = ra_get_container_at_index(ra: &ra1->high_low_container, i: pos1, |
9300 | typecode: &container_type_1); |
9301 | void *c2 = ra_get_container_at_index(ra: &ra2->high_low_container, i: pos2, |
9302 | typecode: &container_type_2); |
9303 | bool subset = |
9304 | container_is_subset(c1, type1: container_type_1, c2, type2: container_type_2); |
9305 | if (!subset) return false; |
9306 | ++pos1; |
9307 | ++pos2; |
9308 | } else if (s1 < s2) { // s1 < s2 |
9309 | return false; |
9310 | } else { // s1 > s2 |
9311 | pos2 = ra_advance_until(ra: &ra2->high_low_container, x: s1, pos: pos2); |
9312 | } |
9313 | } |
9314 | if (pos1 == length1) |
9315 | return true; |
9316 | else |
9317 | return false; |
9318 | } |
9319 | |
9320 | static void insert_flipped_container(roaring_array_t *ans_arr, |
9321 | const roaring_array_t *x1_arr, uint16_t hb, |
9322 | uint16_t lb_start, uint16_t lb_end) { |
9323 | const int i = ra_get_index(ra: x1_arr, x: hb); |
9324 | const int j = ra_get_index(ra: ans_arr, x: hb); |
9325 | uint8_t ctype_in, ctype_out; |
9326 | void *flipped_container = NULL; |
9327 | if (i >= 0) { |
9328 | void *container_to_flip = |
9329 | ra_get_container_at_index(ra: x1_arr, i, typecode: &ctype_in); |
9330 | flipped_container = |
9331 | container_not_range(c: container_to_flip, typ: ctype_in, range_start: (uint32_t)lb_start, |
9332 | range_end: (uint32_t)(lb_end + 1), result_type: &ctype_out); |
9333 | |
9334 | if (container_get_cardinality(container: flipped_container, typecode: ctype_out)) |
9335 | ra_insert_new_key_value_at(ra: ans_arr, i: -j - 1, key: hb, container: flipped_container, |
9336 | typecode: ctype_out); |
9337 | else { |
9338 | container_free(container: flipped_container, typecode: ctype_out); |
9339 | } |
9340 | } else { |
9341 | flipped_container = container_range_of_ones( |
9342 | range_start: (uint32_t)lb_start, range_end: (uint32_t)(lb_end + 1), result_type: &ctype_out); |
9343 | ra_insert_new_key_value_at(ra: ans_arr, i: -j - 1, key: hb, container: flipped_container, |
9344 | typecode: ctype_out); |
9345 | } |
9346 | } |
9347 | |
9348 | static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, |
9349 | uint16_t lb_start, uint16_t lb_end) { |
9350 | const int i = ra_get_index(ra: x1_arr, x: hb); |
9351 | uint8_t ctype_in, ctype_out; |
9352 | void *flipped_container = NULL; |
9353 | if (i >= 0) { |
9354 | void *container_to_flip = |
9355 | ra_get_container_at_index(ra: x1_arr, i, typecode: &ctype_in); |
9356 | flipped_container = container_inot_range( |
9357 | c: container_to_flip, typ: ctype_in, range_start: (uint32_t)lb_start, |
9358 | range_end: (uint32_t)(lb_end + 1), result_type: &ctype_out); |
9359 | // if a new container was created, the old one was already freed |
9360 | if (container_get_cardinality(container: flipped_container, typecode: ctype_out)) { |
9361 | ra_set_container_at_index(ra: x1_arr, i, c: flipped_container, typecode: ctype_out); |
9362 | } else { |
9363 | container_free(container: flipped_container, typecode: ctype_out); |
9364 | ra_remove_at_index(ra: x1_arr, i); |
9365 | } |
9366 | |
9367 | } else { |
9368 | flipped_container = container_range_of_ones( |
9369 | range_start: (uint32_t)lb_start, range_end: (uint32_t)(lb_end + 1), result_type: &ctype_out); |
9370 | ra_insert_new_key_value_at(ra: x1_arr, i: -i - 1, key: hb, container: flipped_container, |
9371 | typecode: ctype_out); |
9372 | } |
9373 | } |
9374 | |
9375 | static void insert_fully_flipped_container(roaring_array_t *ans_arr, |
9376 | const roaring_array_t *x1_arr, |
9377 | uint16_t hb) { |
9378 | const int i = ra_get_index(ra: x1_arr, x: hb); |
9379 | const int j = ra_get_index(ra: ans_arr, x: hb); |
9380 | uint8_t ctype_in, ctype_out; |
9381 | void *flipped_container = NULL; |
9382 | if (i >= 0) { |
9383 | void *container_to_flip = |
9384 | ra_get_container_at_index(ra: x1_arr, i, typecode: &ctype_in); |
9385 | flipped_container = |
9386 | container_not(c: container_to_flip, typ: ctype_in, result_type: &ctype_out); |
9387 | if (container_get_cardinality(container: flipped_container, typecode: ctype_out)) |
9388 | ra_insert_new_key_value_at(ra: ans_arr, i: -j - 1, key: hb, container: flipped_container, |
9389 | typecode: ctype_out); |
9390 | else { |
9391 | container_free(container: flipped_container, typecode: ctype_out); |
9392 | } |
9393 | } else { |
9394 | flipped_container = container_range_of_ones(range_start: 0U, range_end: 0x10000U, result_type: &ctype_out); |
9395 | ra_insert_new_key_value_at(ra: ans_arr, i: -j - 1, key: hb, container: flipped_container, |
9396 | typecode: ctype_out); |
9397 | } |
9398 | } |
9399 | |
9400 | static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { |
9401 | const int i = ra_get_index(ra: x1_arr, x: hb); |
9402 | uint8_t ctype_in, ctype_out; |
9403 | void *flipped_container = NULL; |
9404 | if (i >= 0) { |
9405 | void *container_to_flip = |
9406 | ra_get_container_at_index(ra: x1_arr, i, typecode: &ctype_in); |
9407 | flipped_container = |
9408 | container_inot(c: container_to_flip, typ: ctype_in, result_type: &ctype_out); |
9409 | |
9410 | if (container_get_cardinality(container: flipped_container, typecode: ctype_out)) { |
9411 | ra_set_container_at_index(ra: x1_arr, i, c: flipped_container, typecode: ctype_out); |
9412 | } else { |
9413 | container_free(container: flipped_container, typecode: ctype_out); |
9414 | ra_remove_at_index(ra: x1_arr, i); |
9415 | } |
9416 | |
9417 | } else { |
9418 | flipped_container = container_range_of_ones(range_start: 0U, range_end: 0x10000U, result_type: &ctype_out); |
9419 | ra_insert_new_key_value_at(ra: x1_arr, i: -i - 1, key: hb, container: flipped_container, |
9420 | typecode: ctype_out); |
9421 | } |
9422 | } |
9423 | |
9424 | roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, |
9425 | uint64_t range_start, |
9426 | uint64_t range_end) { |
9427 | if (range_start >= range_end) { |
9428 | return roaring_bitmap_copy(r: x1); |
9429 | } |
9430 | if(range_end >= UINT64_C(0x100000000)) { |
9431 | range_end = UINT64_C(0x100000000); |
9432 | } |
9433 | |
9434 | roaring_bitmap_t *ans = roaring_bitmap_create(); |
9435 | roaring_bitmap_set_copy_on_write(r: ans, cow: is_cow(r: x1)); |
9436 | |
9437 | uint16_t hb_start = (uint16_t)(range_start >> 16); |
9438 | const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; |
9439 | uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); |
9440 | const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF; |
9441 | |
9442 | ra_append_copies_until(ra: &ans->high_low_container, sa: &x1->high_low_container, |
9443 | stopping_key: hb_start, copy_on_write: is_cow(r: x1)); |
9444 | if (hb_start == hb_end) { |
9445 | insert_flipped_container(ans_arr: &ans->high_low_container, |
9446 | x1_arr: &x1->high_low_container, hb: hb_start, lb_start, |
9447 | lb_end); |
9448 | } else { |
9449 | // start and end containers are distinct |
9450 | if (lb_start > 0) { |
9451 | // handle first (partial) container |
9452 | insert_flipped_container(ans_arr: &ans->high_low_container, |
9453 | x1_arr: &x1->high_low_container, hb: hb_start, |
9454 | lb_start, lb_end: 0xFFFF); |
9455 | ++hb_start; // for the full containers. Can't wrap. |
9456 | } |
9457 | |
9458 | if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block |
9459 | |
9460 | for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { |
9461 | insert_fully_flipped_container(ans_arr: &ans->high_low_container, |
9462 | x1_arr: &x1->high_low_container, hb); |
9463 | } |
9464 | |
9465 | // handle a partial final container |
9466 | if (lb_end != 0xFFFF) { |
9467 | insert_flipped_container(ans_arr: &ans->high_low_container, |
9468 | x1_arr: &x1->high_low_container, hb: hb_end + 1, lb_start: 0, |
9469 | lb_end); |
9470 | ++hb_end; |
9471 | } |
9472 | } |
9473 | ra_append_copies_after(ra: &ans->high_low_container, sa: &x1->high_low_container, |
9474 | before_start: hb_end, copy_on_write: is_cow(r: x1)); |
9475 | return ans; |
9476 | } |
9477 | |
9478 | void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, |
9479 | uint64_t range_end) { |
9480 | if (range_start >= range_end) { |
9481 | return; // empty range |
9482 | } |
9483 | if(range_end >= UINT64_C(0x100000000)) { |
9484 | range_end = UINT64_C(0x100000000); |
9485 | } |
9486 | |
9487 | uint16_t hb_start = (uint16_t)(range_start >> 16); |
9488 | const uint16_t lb_start = (uint16_t)range_start; |
9489 | uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); |
9490 | const uint16_t lb_end = (uint16_t)(range_end - 1); |
9491 | |
9492 | if (hb_start == hb_end) { |
9493 | inplace_flip_container(x1_arr: &x1->high_low_container, hb: hb_start, lb_start, |
9494 | lb_end); |
9495 | } else { |
9496 | // start and end containers are distinct |
9497 | if (lb_start > 0) { |
9498 | // handle first (partial) container |
9499 | inplace_flip_container(x1_arr: &x1->high_low_container, hb: hb_start, lb_start, |
9500 | lb_end: 0xFFFF); |
9501 | ++hb_start; // for the full containers. Can't wrap. |
9502 | } |
9503 | |
9504 | if (lb_end != 0xFFFF) --hb_end; |
9505 | |
9506 | for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { |
9507 | inplace_fully_flip_container(x1_arr: &x1->high_low_container, hb); |
9508 | } |
9509 | // handle a partial final container |
9510 | if (lb_end != 0xFFFF) { |
9511 | inplace_flip_container(x1_arr: &x1->high_low_container, hb: hb_end + 1, lb_start: 0, |
9512 | lb_end); |
9513 | ++hb_end; |
9514 | } |
9515 | } |
9516 | } |
9517 | |
9518 | roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, |
9519 | const roaring_bitmap_t *x2, |
9520 | const bool bitsetconversion) { |
9521 | uint8_t container_result_type = 0; |
9522 | const int length1 = x1->high_low_container.size, |
9523 | length2 = x2->high_low_container.size; |
9524 | if (0 == length1) { |
9525 | return roaring_bitmap_copy(r: x2); |
9526 | } |
9527 | if (0 == length2) { |
9528 | return roaring_bitmap_copy(r: x1); |
9529 | } |
9530 | roaring_bitmap_t *answer = |
9531 | roaring_bitmap_create_with_capacity(cap: length1 + length2); |
9532 | roaring_bitmap_set_copy_on_write(r: answer, cow: is_cow(r: x1) && is_cow(r: x2)); |
9533 | int pos1 = 0, pos2 = 0; |
9534 | uint8_t container_type_1, container_type_2; |
9535 | uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
9536 | uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
9537 | while (true) { |
9538 | if (s1 == s2) { |
9539 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
9540 | typecode: &container_type_1); |
9541 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
9542 | typecode: &container_type_2); |
9543 | void *c; |
9544 | if (bitsetconversion && (get_container_type(container: c1, type: container_type_1) != |
9545 | BITSET_CONTAINER_TYPE_CODE) && |
9546 | (get_container_type(container: c2, type: container_type_2) != |
9547 | BITSET_CONTAINER_TYPE_CODE)) { |
9548 | void *newc1 = |
9549 | container_mutable_unwrap_shared(candidate_shared_container: c1, type: &container_type_1); |
9550 | newc1 = container_to_bitset(container: newc1, typecode: container_type_1); |
9551 | container_type_1 = BITSET_CONTAINER_TYPE_CODE; |
9552 | c = container_lazy_ior(c1: newc1, type1: container_type_1, c2, |
9553 | type2: container_type_2, |
9554 | result_type: &container_result_type); |
9555 | if (c != newc1) { // should not happen |
9556 | container_free(container: newc1, typecode: container_type_1); |
9557 | } |
9558 | } else { |
9559 | c = container_lazy_or(c1, type1: container_type_1, c2, |
9560 | type2: container_type_2, result_type: &container_result_type); |
9561 | } |
9562 | // since we assume that the initial containers are non-empty, |
9563 | // the |
9564 | // result here |
9565 | // can only be non-empty |
9566 | ra_append(ra: &answer->high_low_container, s: s1, c, |
9567 | typecode: container_result_type); |
9568 | ++pos1; |
9569 | ++pos2; |
9570 | if (pos1 == length1) break; |
9571 | if (pos2 == length2) break; |
9572 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
9573 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
9574 | |
9575 | } else if (s1 < s2) { // s1 < s2 |
9576 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
9577 | typecode: &container_type_1); |
9578 | c1 = |
9579 | get_copy_of_container(container: c1, typecode: &container_type_1, copy_on_write: is_cow(r: x1)); |
9580 | if (is_cow(r: x1)) { |
9581 | ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c: c1, |
9582 | typecode: container_type_1); |
9583 | } |
9584 | ra_append(ra: &answer->high_low_container, s: s1, c: c1, typecode: container_type_1); |
9585 | pos1++; |
9586 | if (pos1 == length1) break; |
9587 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
9588 | |
9589 | } else { // s1 > s2 |
9590 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
9591 | typecode: &container_type_2); |
9592 | c2 = |
9593 | get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2)); |
9594 | if (is_cow(r: x2)) { |
9595 | ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2, |
9596 | typecode: container_type_2); |
9597 | } |
9598 | ra_append(ra: &answer->high_low_container, s: s2, c: c2, typecode: container_type_2); |
9599 | pos2++; |
9600 | if (pos2 == length2) break; |
9601 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
9602 | } |
9603 | } |
9604 | if (pos1 == length1) { |
9605 | ra_append_copy_range(ra: &answer->high_low_container, |
9606 | sa: &x2->high_low_container, start_index: pos2, end_index: length2, |
9607 | copy_on_write: is_cow(r: x2)); |
9608 | } else if (pos2 == length2) { |
9609 | ra_append_copy_range(ra: &answer->high_low_container, |
9610 | sa: &x1->high_low_container, start_index: pos1, end_index: length1, |
9611 | copy_on_write: is_cow(r: x1)); |
9612 | } |
9613 | return answer; |
9614 | } |
9615 | |
9616 | void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, |
9617 | const roaring_bitmap_t *x2, |
9618 | const bool bitsetconversion) { |
9619 | uint8_t container_result_type = 0; |
9620 | int length1 = x1->high_low_container.size; |
9621 | const int length2 = x2->high_low_container.size; |
9622 | |
9623 | if (0 == length2) return; |
9624 | |
9625 | if (0 == length1) { |
9626 | roaring_bitmap_overwrite(dest: x1, src: x2); |
9627 | return; |
9628 | } |
9629 | int pos1 = 0, pos2 = 0; |
9630 | uint8_t container_type_1, container_type_2; |
9631 | uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
9632 | uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
9633 | while (true) { |
9634 | if (s1 == s2) { |
9635 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
9636 | typecode: &container_type_1); |
9637 | if (!container_is_full(container: c1, typecode: container_type_1)) { |
9638 | if ((bitsetconversion == false) || |
9639 | (get_container_type(container: c1, type: container_type_1) == |
9640 | BITSET_CONTAINER_TYPE_CODE)) { |
9641 | c1 = get_writable_copy_if_shared(candidate_shared_container: c1, type: &container_type_1); |
9642 | } else { |
9643 | // convert to bitset |
9644 | void *oldc1 = c1; |
9645 | uint8_t oldt1 = container_type_1; |
9646 | c1 = container_mutable_unwrap_shared(candidate_shared_container: c1, type: &container_type_1); |
9647 | c1 = container_to_bitset(container: c1, typecode: container_type_1); |
9648 | container_free(container: oldc1, typecode: oldt1); |
9649 | container_type_1 = BITSET_CONTAINER_TYPE_CODE; |
9650 | } |
9651 | |
9652 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, |
9653 | i: pos2, typecode: &container_type_2); |
9654 | void *c = container_lazy_ior(c1, type1: container_type_1, c2, |
9655 | type2: container_type_2, |
9656 | result_type: &container_result_type); |
9657 | if (c != |
9658 | c1) { // in this instance a new container was created, and |
9659 | // we need to free the old one |
9660 | container_free(container: c1, typecode: container_type_1); |
9661 | } |
9662 | |
9663 | ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c, |
9664 | typecode: container_result_type); |
9665 | } |
9666 | ++pos1; |
9667 | ++pos2; |
9668 | if (pos1 == length1) break; |
9669 | if (pos2 == length2) break; |
9670 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
9671 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
9672 | |
9673 | } else if (s1 < s2) { // s1 < s2 |
9674 | pos1++; |
9675 | if (pos1 == length1) break; |
9676 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
9677 | |
9678 | } else { // s1 > s2 |
9679 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
9680 | typecode: &container_type_2); |
9681 | // void *c2_clone = container_clone(c2, container_type_2); |
9682 | c2 = |
9683 | get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2)); |
9684 | if (is_cow(r: x2)) { |
9685 | ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2, |
9686 | typecode: container_type_2); |
9687 | } |
9688 | ra_insert_new_key_value_at(ra: &x1->high_low_container, i: pos1, key: s2, container: c2, |
9689 | typecode: container_type_2); |
9690 | pos1++; |
9691 | length1++; |
9692 | pos2++; |
9693 | if (pos2 == length2) break; |
9694 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
9695 | } |
9696 | } |
9697 | if (pos1 == length1) { |
9698 | ra_append_copy_range(ra: &x1->high_low_container, sa: &x2->high_low_container, |
9699 | start_index: pos2, end_index: length2, copy_on_write: is_cow(r: x2)); |
9700 | } |
9701 | } |
9702 | |
9703 | roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, |
9704 | const roaring_bitmap_t *x2) { |
9705 | uint8_t container_result_type = 0; |
9706 | const int length1 = x1->high_low_container.size, |
9707 | length2 = x2->high_low_container.size; |
9708 | if (0 == length1) { |
9709 | return roaring_bitmap_copy(r: x2); |
9710 | } |
9711 | if (0 == length2) { |
9712 | return roaring_bitmap_copy(r: x1); |
9713 | } |
9714 | roaring_bitmap_t *answer = |
9715 | roaring_bitmap_create_with_capacity(cap: length1 + length2); |
9716 | roaring_bitmap_set_copy_on_write(r: answer, cow: is_cow(r: x1) && is_cow(r: x2)); |
9717 | int pos1 = 0, pos2 = 0; |
9718 | uint8_t container_type_1, container_type_2; |
9719 | uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
9720 | uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
9721 | while (true) { |
9722 | if (s1 == s2) { |
9723 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
9724 | typecode: &container_type_1); |
9725 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
9726 | typecode: &container_type_2); |
9727 | void *c = |
9728 | container_lazy_xor(c1, type1: container_type_1, c2, type2: container_type_2, |
9729 | result_type: &container_result_type); |
9730 | |
9731 | if (container_nonzero_cardinality(container: c, typecode: container_result_type)) { |
9732 | ra_append(ra: &answer->high_low_container, s: s1, c, |
9733 | typecode: container_result_type); |
9734 | } else { |
9735 | container_free(container: c, typecode: container_result_type); |
9736 | } |
9737 | |
9738 | ++pos1; |
9739 | ++pos2; |
9740 | if (pos1 == length1) break; |
9741 | if (pos2 == length2) break; |
9742 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
9743 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
9744 | |
9745 | } else if (s1 < s2) { // s1 < s2 |
9746 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
9747 | typecode: &container_type_1); |
9748 | c1 = |
9749 | get_copy_of_container(container: c1, typecode: &container_type_1, copy_on_write: is_cow(r: x1)); |
9750 | if (is_cow(r: x1)) { |
9751 | ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c: c1, |
9752 | typecode: container_type_1); |
9753 | } |
9754 | ra_append(ra: &answer->high_low_container, s: s1, c: c1, typecode: container_type_1); |
9755 | pos1++; |
9756 | if (pos1 == length1) break; |
9757 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
9758 | |
9759 | } else { // s1 > s2 |
9760 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
9761 | typecode: &container_type_2); |
9762 | c2 = |
9763 | get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2)); |
9764 | if (is_cow(r: x2)) { |
9765 | ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2, |
9766 | typecode: container_type_2); |
9767 | } |
9768 | ra_append(ra: &answer->high_low_container, s: s2, c: c2, typecode: container_type_2); |
9769 | pos2++; |
9770 | if (pos2 == length2) break; |
9771 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
9772 | } |
9773 | } |
9774 | if (pos1 == length1) { |
9775 | ra_append_copy_range(ra: &answer->high_low_container, |
9776 | sa: &x2->high_low_container, start_index: pos2, end_index: length2, |
9777 | copy_on_write: is_cow(r: x2)); |
9778 | } else if (pos2 == length2) { |
9779 | ra_append_copy_range(ra: &answer->high_low_container, |
9780 | sa: &x1->high_low_container, start_index: pos1, end_index: length1, |
9781 | copy_on_write: is_cow(r: x1)); |
9782 | } |
9783 | return answer; |
9784 | } |
9785 | |
9786 | void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, |
9787 | const roaring_bitmap_t *x2) { |
9788 | assert(x1 != x2); |
9789 | uint8_t container_result_type = 0; |
9790 | int length1 = x1->high_low_container.size; |
9791 | const int length2 = x2->high_low_container.size; |
9792 | |
9793 | if (0 == length2) return; |
9794 | |
9795 | if (0 == length1) { |
9796 | roaring_bitmap_overwrite(dest: x1, src: x2); |
9797 | return; |
9798 | } |
9799 | int pos1 = 0, pos2 = 0; |
9800 | uint8_t container_type_1, container_type_2; |
9801 | uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
9802 | uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
9803 | while (true) { |
9804 | if (s1 == s2) { |
9805 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
9806 | typecode: &container_type_1); |
9807 | c1 = get_writable_copy_if_shared(candidate_shared_container: c1, type: &container_type_1); |
9808 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
9809 | typecode: &container_type_2); |
9810 | void *c = |
9811 | container_lazy_ixor(c1, type1: container_type_1, c2, type2: container_type_2, |
9812 | result_type: &container_result_type); |
9813 | if (container_nonzero_cardinality(container: c, typecode: container_result_type)) { |
9814 | ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c, |
9815 | typecode: container_result_type); |
9816 | ++pos1; |
9817 | } else { |
9818 | container_free(container: c, typecode: container_result_type); |
9819 | ra_remove_at_index(ra: &x1->high_low_container, i: pos1); |
9820 | --length1; |
9821 | } |
9822 | ++pos2; |
9823 | if (pos1 == length1) break; |
9824 | if (pos2 == length2) break; |
9825 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
9826 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
9827 | |
9828 | } else if (s1 < s2) { // s1 < s2 |
9829 | pos1++; |
9830 | if (pos1 == length1) break; |
9831 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
9832 | |
9833 | } else { // s1 > s2 |
9834 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
9835 | typecode: &container_type_2); |
9836 | // void *c2_clone = container_clone(c2, container_type_2); |
9837 | c2 = |
9838 | get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2)); |
9839 | if (is_cow(r: x2)) { |
9840 | ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2, |
9841 | typecode: container_type_2); |
9842 | } |
9843 | ra_insert_new_key_value_at(ra: &x1->high_low_container, i: pos1, key: s2, container: c2, |
9844 | typecode: container_type_2); |
9845 | pos1++; |
9846 | length1++; |
9847 | pos2++; |
9848 | if (pos2 == length2) break; |
9849 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
9850 | } |
9851 | } |
9852 | if (pos1 == length1) { |
9853 | ra_append_copy_range(ra: &x1->high_low_container, sa: &x2->high_low_container, |
9854 | start_index: pos2, end_index: length2, copy_on_write: is_cow(r: x2)); |
9855 | } |
9856 | } |
9857 | |
9858 | void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *ra) { |
9859 | for (int i = 0; i < ra->high_low_container.size; ++i) { |
9860 | const uint8_t original_typecode = ra->high_low_container.typecodes[i]; |
9861 | void *container = ra->high_low_container.containers[i]; |
9862 | uint8_t new_typecode = original_typecode; |
9863 | void *newcontainer = |
9864 | container_repair_after_lazy(container, typecode: &new_typecode); |
9865 | ra->high_low_container.containers[i] = newcontainer; |
9866 | ra->high_low_container.typecodes[i] = new_typecode; |
9867 | } |
9868 | } |
9869 | |
9870 | |
9871 | |
9872 | /** |
9873 | * roaring_bitmap_rank returns the number of integers that are smaller or equal |
9874 | * to x. |
9875 | */ |
9876 | uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { |
9877 | uint64_t size = 0; |
9878 | uint32_t xhigh = x >> 16; |
9879 | for (int i = 0; i < bm->high_low_container.size; i++) { |
9880 | uint32_t key = bm->high_low_container.keys[i]; |
9881 | if (xhigh > key) { |
9882 | size += |
9883 | container_get_cardinality(container: bm->high_low_container.containers[i], |
9884 | typecode: bm->high_low_container.typecodes[i]); |
9885 | } else if (xhigh == key) { |
9886 | return size + container_rank(container: bm->high_low_container.containers[i], |
9887 | typecode: bm->high_low_container.typecodes[i], |
9888 | x: x & 0xFFFF); |
9889 | } else { |
9890 | return size; |
9891 | } |
9892 | } |
9893 | return size; |
9894 | } |
9895 | |
9896 | /** |
9897 | * roaring_bitmap_smallest returns the smallest value in the set. |
9898 | * Returns UINT32_MAX if the set is empty. |
9899 | */ |
9900 | uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { |
9901 | if (bm->high_low_container.size > 0) { |
9902 | void *container = bm->high_low_container.containers[0]; |
9903 | uint8_t typecode = bm->high_low_container.typecodes[0]; |
9904 | uint32_t key = bm->high_low_container.keys[0]; |
9905 | uint32_t lowvalue = container_minimum(container, typecode); |
9906 | return lowvalue | (key << 16); |
9907 | } |
9908 | return UINT32_MAX; |
9909 | } |
9910 | |
9911 | /** |
9912 | * roaring_bitmap_smallest returns the greatest value in the set. |
9913 | * Returns 0 if the set is empty. |
9914 | */ |
9915 | uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) { |
9916 | if (bm->high_low_container.size > 0) { |
9917 | void *container = |
9918 | bm->high_low_container.containers[bm->high_low_container.size - 1]; |
9919 | uint8_t typecode = |
9920 | bm->high_low_container.typecodes[bm->high_low_container.size - 1]; |
9921 | uint32_t key = |
9922 | bm->high_low_container.keys[bm->high_low_container.size - 1]; |
9923 | uint32_t lowvalue = container_maximum(container, typecode); |
9924 | return lowvalue | (key << 16); |
9925 | } |
9926 | return 0; |
9927 | } |
9928 | |
9929 | bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank, |
9930 | uint32_t *element) { |
9931 | void *container; |
9932 | uint8_t typecode; |
9933 | uint16_t key; |
9934 | uint32_t start_rank = 0; |
9935 | int i = 0; |
9936 | bool valid = false; |
9937 | while (!valid && i < bm->high_low_container.size) { |
9938 | container = bm->high_low_container.containers[i]; |
9939 | typecode = bm->high_low_container.typecodes[i]; |
9940 | valid = |
9941 | container_select(container, typecode, start_rank: &start_rank, rank, element); |
9942 | i++; |
9943 | } |
9944 | |
9945 | if (valid) { |
9946 | key = bm->high_low_container.keys[i - 1]; |
9947 | *element |= (key << 16); |
9948 | return true; |
9949 | } else |
9950 | return false; |
9951 | } |
9952 | |
9953 | bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, |
9954 | const roaring_bitmap_t *x2) { |
9955 | const int length1 = x1->high_low_container.size, |
9956 | length2 = x2->high_low_container.size; |
9957 | uint64_t answer = 0; |
9958 | int pos1 = 0, pos2 = 0; |
9959 | |
9960 | while (pos1 < length1 && pos2 < length2) { |
9961 | const uint16_t s1 = ra_get_key_at_index(ra: & x1->high_low_container, i: pos1); |
9962 | const uint16_t s2 = ra_get_key_at_index(ra: & x2->high_low_container, i: pos2); |
9963 | |
9964 | if (s1 == s2) { |
9965 | uint8_t container_type_1, container_type_2; |
9966 | void *c1 = ra_get_container_at_index(ra: & x1->high_low_container, i: pos1, |
9967 | typecode: &container_type_1); |
9968 | void *c2 = ra_get_container_at_index(ra: & x2->high_low_container, i: pos2, |
9969 | typecode: &container_type_2); |
9970 | if( container_intersect(c1, type1: container_type_1, c2, type2: container_type_2) ) return true; |
9971 | ++pos1; |
9972 | ++pos2; |
9973 | } else if (s1 < s2) { // s1 < s2 |
9974 | pos1 = ra_advance_until(ra: & x1->high_low_container, x: s2, pos: pos1); |
9975 | } else { // s1 > s2 |
9976 | pos2 = ra_advance_until(ra: & x2->high_low_container, x: s1, pos: pos2); |
9977 | } |
9978 | } |
9979 | return answer; |
9980 | } |
9981 | |
9982 | |
9983 | uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, |
9984 | const roaring_bitmap_t *x2) { |
9985 | const int length1 = x1->high_low_container.size, |
9986 | length2 = x2->high_low_container.size; |
9987 | uint64_t answer = 0; |
9988 | int pos1 = 0, pos2 = 0; |
9989 | |
9990 | while (pos1 < length1 && pos2 < length2) { |
9991 | const uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
9992 | const uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
9993 | |
9994 | if (s1 == s2) { |
9995 | uint8_t container_type_1, container_type_2; |
9996 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
9997 | typecode: &container_type_1); |
9998 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
9999 | typecode: &container_type_2); |
10000 | answer += container_and_cardinality(c1, type1: container_type_1, c2, |
10001 | type2: container_type_2); |
10002 | ++pos1; |
10003 | ++pos2; |
10004 | } else if (s1 < s2) { // s1 < s2 |
10005 | pos1 = ra_advance_until(ra: &x1->high_low_container, x: s2, pos: pos1); |
10006 | } else { // s1 > s2 |
10007 | pos2 = ra_advance_until(ra: &x2->high_low_container, x: s1, pos: pos2); |
10008 | } |
10009 | } |
10010 | return answer; |
10011 | } |
10012 | |
10013 | double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, |
10014 | const roaring_bitmap_t *x2) { |
10015 | const uint64_t c1 = roaring_bitmap_get_cardinality(ra: x1); |
10016 | const uint64_t c2 = roaring_bitmap_get_cardinality(ra: x2); |
10017 | const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); |
10018 | return (double)inter / (double)(c1 + c2 - inter); |
10019 | } |
10020 | |
10021 | uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, |
10022 | const roaring_bitmap_t *x2) { |
10023 | const uint64_t c1 = roaring_bitmap_get_cardinality(ra: x1); |
10024 | const uint64_t c2 = roaring_bitmap_get_cardinality(ra: x2); |
10025 | const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); |
10026 | return c1 + c2 - inter; |
10027 | } |
10028 | |
10029 | uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, |
10030 | const roaring_bitmap_t *x2) { |
10031 | const uint64_t c1 = roaring_bitmap_get_cardinality(ra: x1); |
10032 | const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); |
10033 | return c1 - inter; |
10034 | } |
10035 | |
10036 | uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, |
10037 | const roaring_bitmap_t *x2) { |
10038 | const uint64_t c1 = roaring_bitmap_get_cardinality(ra: x1); |
10039 | const uint64_t c2 = roaring_bitmap_get_cardinality(ra: x2); |
10040 | const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); |
10041 | return c1 + c2 - 2 * inter; |
10042 | } |
10043 | |
10044 | |
10045 | /** |
10046 | * Check whether a range of values from range_start (included) to range_end (excluded) is present |
10047 | */ |
10048 | bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) { |
10049 | if(range_end >= UINT64_C(0x100000000)) { |
10050 | range_end = UINT64_C(0x100000000); |
10051 | } |
10052 | if (range_start >= range_end) return true; // empty range are always contained! |
10053 | if (range_end - range_start == 1) return roaring_bitmap_contains(r, val: (uint32_t)range_start); |
10054 | uint16_t hb_rs = (uint16_t)(range_start >> 16); |
10055 | uint16_t hb_re = (uint16_t)((range_end - 1) >> 16); |
10056 | const int32_t span = hb_re - hb_rs; |
10057 | const int32_t hlc_sz = ra_get_size(ra: &r->high_low_container); |
10058 | if (hlc_sz < span + 1) { |
10059 | return false; |
10060 | } |
10061 | int32_t is = ra_get_index(ra: &r->high_low_container, x: hb_rs); |
10062 | int32_t ie = ra_get_index(ra: &r->high_low_container, x: hb_re); |
10063 | ie = (ie < 0 ? -ie - 1 : ie); |
10064 | if ((is < 0) || ((ie - is) != span)) { |
10065 | return false; |
10066 | } |
10067 | const uint32_t lb_rs = range_start & 0xFFFF; |
10068 | const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1; |
10069 | uint8_t typecode; |
10070 | void *container = ra_get_container_at_index(ra: &r->high_low_container, i: is, typecode: &typecode); |
10071 | if (hb_rs == hb_re) { |
10072 | return container_contains_range(container, range_start: lb_rs, range_end: lb_re, typecode); |
10073 | } |
10074 | if (!container_contains_range(container, range_start: lb_rs, range_end: 1 << 16, typecode)) { |
10075 | return false; |
10076 | } |
10077 | assert(ie < hlc_sz); // would indicate an algorithmic bug |
10078 | container = ra_get_container_at_index(ra: &r->high_low_container, i: ie, typecode: &typecode); |
10079 | if (!container_contains_range(container, range_start: 0, range_end: lb_re, typecode)) { |
10080 | return false; |
10081 | } |
10082 | for (int32_t i = is + 1; i < ie; ++i) { |
10083 | container = ra_get_container_at_index(ra: &r->high_low_container, i, typecode: &typecode); |
10084 | if (!container_is_full(container, typecode) ) { |
10085 | return false; |
10086 | } |
10087 | } |
10088 | return true; |
10089 | } |
10090 | |
10091 | |
10092 | bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1, |
10093 | const roaring_bitmap_t *ra2) { |
10094 | return (roaring_bitmap_get_cardinality(ra: ra2) > |
10095 | roaring_bitmap_get_cardinality(ra: ra1) && |
10096 | roaring_bitmap_is_subset(ra1, ra2)); |
10097 | } |
10098 | |
10099 | |
10100 | /* |
10101 | * FROZEN SERIALIZATION FORMAT DESCRIPTION |
10102 | * |
10103 | * -- (beginning must be aligned by 32 bytes) -- |
10104 | * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers] |
10105 | * <run_data> rle16_t[total number of rle elements in all run containers] |
10106 | * <array_data> uint16_t[total number of array elements in all array containers] |
10107 | * <keys> uint16_t[num_containers] |
10108 | * <counts> uint16_t[num_containers] |
10109 | * <typecodes> uint8_t[num_containers] |
10110 | * <header> uint32_t |
10111 | * |
10112 | * <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits) |
10113 | * and the number of containers (17 bits). |
10114 | * |
10115 | * <counts> stores number of elements for every container. |
10116 | * Its meaning depends on container type. |
10117 | * For array and bitset containers, this value is the container cardinality minus one. |
10118 | * For run container, it is the number of rle_t elements (n_runs). |
10119 | * |
10120 | * <bitset_data>,<array_data>,<run_data> are flat arrays of elements of |
10121 | * all containers of respective type. |
10122 | * |
10123 | * <*_data> and <keys> are kept close together because they are not accessed |
10124 | * during deserilization. This may reduce IO in case of large mapped bitmaps. |
10125 | * All members have their native alignments during deserilization except <header>, |
10126 | * which is not guaranteed to be aligned by 4 bytes. |
10127 | */ |
10128 | |
10129 | size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) { |
10130 | const roaring_array_t *ra = &rb->high_low_container; |
10131 | size_t num_bytes = 0; |
10132 | for (int32_t i = 0; i < ra->size; i++) { |
10133 | switch (ra->typecodes[i]) { |
10134 | case BITSET_CONTAINER_TYPE_CODE: { |
10135 | num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); |
10136 | break; |
10137 | } |
10138 | case RUN_CONTAINER_TYPE_CODE: { |
10139 | const run_container_t *run = |
10140 | (const run_container_t *) ra->containers[i]; |
10141 | num_bytes += run->n_runs * sizeof(rle16_t); |
10142 | break; |
10143 | } |
10144 | case ARRAY_CONTAINER_TYPE_CODE: { |
10145 | const array_container_t *array = |
10146 | (const array_container_t *) ra->containers[i]; |
10147 | num_bytes += array->cardinality * sizeof(uint16_t); |
10148 | break; |
10149 | } |
10150 | default: |
10151 | __builtin_unreachable(); |
10152 | } |
10153 | } |
10154 | num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes |
10155 | num_bytes += 4; // header |
10156 | return num_bytes; |
10157 | } |
10158 | |
10159 | inline static void *arena_alloc(char **arena, size_t num_bytes) { |
10160 | char *res = *arena; |
10161 | *arena += num_bytes; |
10162 | return res; |
10163 | } |
10164 | |
10165 | void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) { |
10166 | /* |
10167 | * Note: we do not require user to supply spicificly aligned buffer. |
10168 | * Thus we have to use memcpy() everywhere. |
10169 | */ |
10170 | |
10171 | const roaring_array_t *ra = &rb->high_low_container; |
10172 | |
10173 | size_t bitset_zone_size = 0; |
10174 | size_t run_zone_size = 0; |
10175 | size_t array_zone_size = 0; |
10176 | for (int32_t i = 0; i < ra->size; i++) { |
10177 | switch (ra->typecodes[i]) { |
10178 | case BITSET_CONTAINER_TYPE_CODE: { |
10179 | bitset_zone_size += |
10180 | BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); |
10181 | break; |
10182 | } |
10183 | case RUN_CONTAINER_TYPE_CODE: { |
10184 | const run_container_t *run = |
10185 | (const run_container_t *) ra->containers[i]; |
10186 | run_zone_size += run->n_runs * sizeof(rle16_t); |
10187 | break; |
10188 | } |
10189 | case ARRAY_CONTAINER_TYPE_CODE: { |
10190 | const array_container_t *array = |
10191 | (const array_container_t *) ra->containers[i]; |
10192 | array_zone_size += array->cardinality * sizeof(uint16_t); |
10193 | break; |
10194 | } |
10195 | default: |
10196 | __builtin_unreachable(); |
10197 | } |
10198 | } |
10199 | |
10200 | uint64_t *bitset_zone = (uint64_t *)arena_alloc(arena: &buf, num_bytes: bitset_zone_size); |
10201 | rle16_t *run_zone = (rle16_t *)arena_alloc(arena: &buf, num_bytes: run_zone_size); |
10202 | uint16_t *array_zone = (uint16_t *)arena_alloc(arena: &buf, num_bytes: array_zone_size); |
10203 | uint16_t *key_zone = (uint16_t *)arena_alloc(arena: &buf, num_bytes: 2*ra->size); |
10204 | uint16_t *count_zone = (uint16_t *)arena_alloc(arena: &buf, num_bytes: 2*ra->size); |
10205 | uint8_t *typecode_zone = (uint8_t *)arena_alloc(arena: &buf, num_bytes: ra->size); |
10206 | uint32_t * = (uint32_t *)arena_alloc(arena: &buf, num_bytes: 4); |
10207 | |
10208 | for (int32_t i = 0; i < ra->size; i++) { |
10209 | uint16_t count; |
10210 | switch (ra->typecodes[i]) { |
10211 | case BITSET_CONTAINER_TYPE_CODE: { |
10212 | const bitset_container_t *bitset = |
10213 | (const bitset_container_t *) ra->containers[i]; |
10214 | memcpy(dest: bitset_zone, src: bitset->array, |
10215 | n: BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); |
10216 | bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; |
10217 | if (bitset->cardinality != BITSET_UNKNOWN_CARDINALITY) { |
10218 | count = bitset->cardinality - 1; |
10219 | } else { |
10220 | count = bitset_container_compute_cardinality(bitset) - 1; |
10221 | } |
10222 | break; |
10223 | } |
10224 | case RUN_CONTAINER_TYPE_CODE: { |
10225 | const run_container_t *run = |
10226 | (const run_container_t *) ra->containers[i]; |
10227 | size_t num_bytes = run->n_runs * sizeof(rle16_t); |
10228 | memcpy(dest: run_zone, src: run->runs, n: num_bytes); |
10229 | run_zone += run->n_runs; |
10230 | count = run->n_runs; |
10231 | break; |
10232 | } |
10233 | case ARRAY_CONTAINER_TYPE_CODE: { |
10234 | const array_container_t *array = |
10235 | (const array_container_t *) ra->containers[i]; |
10236 | size_t num_bytes = array->cardinality * sizeof(uint16_t); |
10237 | memcpy(dest: array_zone, src: array->array, n: num_bytes); |
10238 | array_zone += array->cardinality; |
10239 | count = array->cardinality - 1; |
10240 | break; |
10241 | } |
10242 | default: |
10243 | __builtin_unreachable(); |
10244 | } |
10245 | memcpy(dest: &count_zone[i], src: &count, n: 2); |
10246 | } |
10247 | memcpy(dest: key_zone, src: ra->keys, n: ra->size * sizeof(uint16_t)); |
10248 | memcpy(dest: typecode_zone, src: ra->typecodes, n: ra->size * sizeof(uint8_t)); |
10249 | uint32_t = ((uint32_t)ra->size << 15) | FROZEN_COOKIE; |
10250 | memcpy(dest: header_zone, src: &header, n: 4); |
10251 | } |
10252 | |
10253 | const roaring_bitmap_t * |
10254 | roaring_bitmap_frozen_view(const char *buf, size_t length) { |
10255 | if ((uintptr_t)buf % 32 != 0) { |
10256 | return NULL; |
10257 | } |
10258 | |
10259 | // cookie and num_containers |
10260 | if (length < 4) { |
10261 | return NULL; |
10262 | } |
10263 | uint32_t ; |
10264 | memcpy(dest: &header, src: buf + length - 4, n: 4); // header may be misaligned |
10265 | if ((header & 0x7FFF) != FROZEN_COOKIE) { |
10266 | return NULL; |
10267 | } |
10268 | int32_t num_containers = (header >> 15); |
10269 | |
10270 | // typecodes, counts and keys |
10271 | if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) { |
10272 | return NULL; |
10273 | } |
10274 | uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5); |
10275 | uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3); |
10276 | uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1); |
10277 | |
10278 | // {bitset,array,run}_zone |
10279 | int32_t num_bitset_containers = 0; |
10280 | int32_t num_run_containers = 0; |
10281 | int32_t num_array_containers = 0; |
10282 | size_t bitset_zone_size = 0; |
10283 | size_t run_zone_size = 0; |
10284 | size_t array_zone_size = 0; |
10285 | for (int32_t i = 0; i < num_containers; i++) { |
10286 | switch (typecodes[i]) { |
10287 | case BITSET_CONTAINER_TYPE_CODE: |
10288 | num_bitset_containers++; |
10289 | bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); |
10290 | break; |
10291 | case RUN_CONTAINER_TYPE_CODE: |
10292 | num_run_containers++; |
10293 | run_zone_size += counts[i] * sizeof(rle16_t); |
10294 | break; |
10295 | case ARRAY_CONTAINER_TYPE_CODE: |
10296 | num_array_containers++; |
10297 | array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t); |
10298 | break; |
10299 | default: |
10300 | return NULL; |
10301 | } |
10302 | } |
10303 | if (length != bitset_zone_size + run_zone_size + array_zone_size + |
10304 | 5 * num_containers + 4) { |
10305 | return NULL; |
10306 | } |
10307 | uint64_t *bitset_zone = (uint64_t*) (buf); |
10308 | rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size); |
10309 | uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size); |
10310 | |
10311 | size_t alloc_size = 0; |
10312 | alloc_size += sizeof(roaring_bitmap_t); |
10313 | alloc_size += num_containers * sizeof(void *); |
10314 | alloc_size += num_bitset_containers * sizeof(bitset_container_t); |
10315 | alloc_size += num_run_containers * sizeof(run_container_t); |
10316 | alloc_size += num_array_containers * sizeof(array_container_t); |
10317 | |
10318 | char *arena = (char *)malloc(size: alloc_size); |
10319 | if (arena == NULL) { |
10320 | return NULL; |
10321 | } |
10322 | |
10323 | roaring_bitmap_t *rb = (roaring_bitmap_t *) |
10324 | arena_alloc(arena: &arena, num_bytes: sizeof(roaring_bitmap_t)); |
10325 | rb->high_low_container.flags = ROARING_FLAG_FROZEN; |
10326 | rb->high_low_container.allocation_size = num_containers; |
10327 | rb->high_low_container.size = num_containers; |
10328 | rb->high_low_container.keys = (uint16_t *)keys; |
10329 | rb->high_low_container.typecodes = (uint8_t *)typecodes; |
10330 | rb->high_low_container.containers = |
10331 | (void **)arena_alloc(arena: &arena, num_bytes: sizeof(void*) * num_containers); |
10332 | for (int32_t i = 0; i < num_containers; i++) { |
10333 | switch (typecodes[i]) { |
10334 | case BITSET_CONTAINER_TYPE_CODE: { |
10335 | bitset_container_t *bitset = (bitset_container_t *) |
10336 | arena_alloc(arena: &arena, num_bytes: sizeof(bitset_container_t)); |
10337 | bitset->array = bitset_zone; |
10338 | bitset->cardinality = counts[i] + UINT32_C(1); |
10339 | rb->high_low_container.containers[i] = bitset; |
10340 | bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; |
10341 | break; |
10342 | } |
10343 | case RUN_CONTAINER_TYPE_CODE: { |
10344 | run_container_t *run = (run_container_t *) |
10345 | arena_alloc(arena: &arena, num_bytes: sizeof(run_container_t)); |
10346 | run->capacity = counts[i]; |
10347 | run->n_runs = counts[i]; |
10348 | run->runs = run_zone; |
10349 | rb->high_low_container.containers[i] = run; |
10350 | run_zone += run->n_runs; |
10351 | break; |
10352 | } |
10353 | case ARRAY_CONTAINER_TYPE_CODE: { |
10354 | array_container_t *array = (array_container_t *) |
10355 | arena_alloc(arena: &arena, num_bytes: sizeof(array_container_t)); |
10356 | array->capacity = counts[i] + UINT32_C(1); |
10357 | array->cardinality = counts[i] + UINT32_C(1); |
10358 | array->array = array_zone; |
10359 | rb->high_low_container.containers[i] = array; |
10360 | array_zone += counts[i] + UINT32_C(1); |
10361 | break; |
10362 | } |
10363 | default: |
10364 | free(ptr: arena); |
10365 | return NULL; |
10366 | } |
10367 | } |
10368 | |
10369 | return rb; |
10370 | } |
10371 | /* end file src/roaring.c */ |
10372 | /* begin file src/roaring_array.c */ |
10373 | #include <assert.h> |
10374 | #include <stdbool.h> |
10375 | #include <stdio.h> |
10376 | #include <stdlib.h> |
10377 | #include <string.h> |
10378 | #include <inttypes.h> |
10379 | |
10380 | |
10381 | // Convention: [0,ra->size) all elements are initialized |
10382 | // [ra->size, ra->allocation_size) is junk and contains nothing needing freeing |
10383 | |
10384 | static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { |
10385 | // because we combine the allocations, it is not possible to use realloc |
10386 | /*ra->keys = |
10387 | (uint16_t *)realloc(ra->keys, sizeof(uint16_t) * new_capacity); |
10388 | ra->containers = |
10389 | (void **)realloc(ra->containers, sizeof(void *) * new_capacity); |
10390 | ra->typecodes = |
10391 | (uint8_t *)realloc(ra->typecodes, sizeof(uint8_t) * new_capacity); |
10392 | if (!ra->keys || !ra->containers || !ra->typecodes) { |
10393 | free(ra->keys); |
10394 | free(ra->containers); |
10395 | free(ra->typecodes); |
10396 | return false; |
10397 | }*/ |
10398 | |
10399 | if ( new_capacity == 0 ) { |
10400 | free(ptr: ra->containers); |
10401 | ra->containers = NULL; |
10402 | ra->keys = NULL; |
10403 | ra->typecodes = NULL; |
10404 | ra->allocation_size = 0; |
10405 | return true; |
10406 | } |
10407 | const size_t memoryneeded = |
10408 | new_capacity * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); |
10409 | void *bigalloc = malloc(size: memoryneeded); |
10410 | if (!bigalloc) return false; |
10411 | void *oldbigalloc = ra->containers; |
10412 | void **newcontainers = (void **)bigalloc; |
10413 | uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); |
10414 | uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); |
10415 | assert((char *)(newtypecodes + new_capacity) == |
10416 | (char *)bigalloc + memoryneeded); |
10417 | if(ra->size > 0) { |
10418 | memcpy(dest: newcontainers, src: ra->containers, n: sizeof(void *) * ra->size); |
10419 | memcpy(dest: newkeys, src: ra->keys, n: sizeof(uint16_t) * ra->size); |
10420 | memcpy(dest: newtypecodes, src: ra->typecodes, n: sizeof(uint8_t) * ra->size); |
10421 | } |
10422 | ra->containers = newcontainers; |
10423 | ra->keys = newkeys; |
10424 | ra->typecodes = newtypecodes; |
10425 | ra->allocation_size = new_capacity; |
10426 | free(ptr: oldbigalloc); |
10427 | return true; |
10428 | } |
10429 | |
10430 | bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) { |
10431 | if (!new_ra) return false; |
10432 | ra_init(t: new_ra); |
10433 | |
10434 | if (cap > INT32_MAX) { return false; } |
10435 | |
10436 | if(cap > 0) { |
10437 | void *bigalloc = |
10438 | malloc(size: cap * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t))); |
10439 | if( bigalloc == NULL ) return false; |
10440 | new_ra->containers = (void **)bigalloc; |
10441 | new_ra->keys = (uint16_t *)(new_ra->containers + cap); |
10442 | new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); |
10443 | // Narrowing is safe because of above check |
10444 | new_ra->allocation_size = (int32_t)cap; |
10445 | } |
10446 | return true; |
10447 | } |
10448 | |
10449 | int ra_shrink_to_fit(roaring_array_t *ra) { |
10450 | int savings = (ra->allocation_size - ra->size) * |
10451 | (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); |
10452 | if (!realloc_array(ra, new_capacity: ra->size)) { |
10453 | return 0; |
10454 | } |
10455 | ra->allocation_size = ra->size; |
10456 | return savings; |
10457 | } |
10458 | |
10459 | void ra_init(roaring_array_t *new_ra) { |
10460 | if (!new_ra) { return; } |
10461 | new_ra->keys = NULL; |
10462 | new_ra->containers = NULL; |
10463 | new_ra->typecodes = NULL; |
10464 | |
10465 | new_ra->allocation_size = 0; |
10466 | new_ra->size = 0; |
10467 | new_ra->flags = 0; |
10468 | } |
10469 | |
10470 | bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, |
10471 | bool copy_on_write) { |
10472 | if (!ra_init_with_capacity(new_ra: dest, cap: source->size)) return false; |
10473 | dest->size = source->size; |
10474 | dest->allocation_size = source->size; |
10475 | if(dest->size > 0) { |
10476 | memcpy(dest: dest->keys, src: source->keys, n: dest->size * sizeof(uint16_t)); |
10477 | } |
10478 | // we go through the containers, turning them into shared containers... |
10479 | if (copy_on_write) { |
10480 | for (int32_t i = 0; i < dest->size; ++i) { |
10481 | source->containers[i] = get_copy_of_container( |
10482 | container: source->containers[i], typecode: &source->typecodes[i], copy_on_write); |
10483 | } |
10484 | // we do a shallow copy to the other bitmap |
10485 | if(dest->size > 0) { |
10486 | memcpy(dest: dest->containers, src: source->containers, |
10487 | n: dest->size * sizeof(void *)); |
10488 | memcpy(dest: dest->typecodes, src: source->typecodes, |
10489 | n: dest->size * sizeof(uint8_t)); |
10490 | } |
10491 | } else { |
10492 | if(dest->size > 0) { |
10493 | memcpy(dest: dest->typecodes, src: source->typecodes, |
10494 | n: dest->size * sizeof(uint8_t)); |
10495 | } |
10496 | for (int32_t i = 0; i < dest->size; i++) { |
10497 | dest->containers[i] = |
10498 | container_clone(container: source->containers[i], typecode: source->typecodes[i]); |
10499 | if (dest->containers[i] == NULL) { |
10500 | for (int32_t j = 0; j < i; j++) { |
10501 | container_free(container: dest->containers[j], typecode: dest->typecodes[j]); |
10502 | } |
10503 | ra_clear_without_containers(r: dest); |
10504 | return false; |
10505 | } |
10506 | } |
10507 | } |
10508 | return true; |
10509 | } |
10510 | |
10511 | bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, |
10512 | bool copy_on_write) { |
10513 | ra_clear_containers(ra: dest); // we are going to overwrite them |
10514 | if (dest->allocation_size < source->size) { |
10515 | if (!realloc_array(ra: dest, new_capacity: source->size)) { |
10516 | return false; |
10517 | } |
10518 | } |
10519 | dest->size = source->size; |
10520 | memcpy(dest: dest->keys, src: source->keys, n: dest->size * sizeof(uint16_t)); |
10521 | // we go through the containers, turning them into shared containers... |
10522 | if (copy_on_write) { |
10523 | for (int32_t i = 0; i < dest->size; ++i) { |
10524 | source->containers[i] = get_copy_of_container( |
10525 | container: source->containers[i], typecode: &source->typecodes[i], copy_on_write); |
10526 | } |
10527 | // we do a shallow copy to the other bitmap |
10528 | memcpy(dest: dest->containers, src: source->containers, |
10529 | n: dest->size * sizeof(void *)); |
10530 | memcpy(dest: dest->typecodes, src: source->typecodes, |
10531 | n: dest->size * sizeof(uint8_t)); |
10532 | } else { |
10533 | memcpy(dest: dest->typecodes, src: source->typecodes, |
10534 | n: dest->size * sizeof(uint8_t)); |
10535 | for (int32_t i = 0; i < dest->size; i++) { |
10536 | dest->containers[i] = |
10537 | container_clone(container: source->containers[i], typecode: source->typecodes[i]); |
10538 | if (dest->containers[i] == NULL) { |
10539 | for (int32_t j = 0; j < i; j++) { |
10540 | container_free(container: dest->containers[j], typecode: dest->typecodes[j]); |
10541 | } |
10542 | ra_clear_without_containers(r: dest); |
10543 | return false; |
10544 | } |
10545 | } |
10546 | } |
10547 | return true; |
10548 | } |
10549 | |
10550 | void ra_clear_containers(roaring_array_t *ra) { |
10551 | for (int32_t i = 0; i < ra->size; ++i) { |
10552 | container_free(container: ra->containers[i], typecode: ra->typecodes[i]); |
10553 | } |
10554 | } |
10555 | |
10556 | void ra_reset(roaring_array_t *ra) { |
10557 | ra_clear_containers(ra); |
10558 | ra->size = 0; |
10559 | ra_shrink_to_fit(ra); |
10560 | } |
10561 | |
10562 | void ra_clear_without_containers(roaring_array_t *ra) { |
10563 | free(ptr: ra->containers); // keys and typecodes are allocated with containers |
10564 | ra->size = 0; |
10565 | ra->allocation_size = 0; |
10566 | ra->containers = NULL; |
10567 | ra->keys = NULL; |
10568 | ra->typecodes = NULL; |
10569 | } |
10570 | |
10571 | void ra_clear(roaring_array_t *ra) { |
10572 | ra_clear_containers(ra); |
10573 | ra_clear_without_containers(ra); |
10574 | } |
10575 | |
10576 | bool extend_array(roaring_array_t *ra, int32_t k) { |
10577 | int32_t desired_size = ra->size + k; |
10578 | assert(desired_size <= MAX_CONTAINERS); |
10579 | if (desired_size > ra->allocation_size) { |
10580 | int32_t new_capacity = |
10581 | (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; |
10582 | if (new_capacity > MAX_CONTAINERS) { |
10583 | new_capacity = MAX_CONTAINERS; |
10584 | } |
10585 | |
10586 | return realloc_array(ra, new_capacity); |
10587 | } |
10588 | return true; |
10589 | } |
10590 | |
10591 | void ra_append(roaring_array_t *ra, uint16_t key, void *container, |
10592 | uint8_t typecode) { |
10593 | extend_array(ra, k: 1); |
10594 | const int32_t pos = ra->size; |
10595 | |
10596 | ra->keys[pos] = key; |
10597 | ra->containers[pos] = container; |
10598 | ra->typecodes[pos] = typecode; |
10599 | ra->size++; |
10600 | } |
10601 | |
10602 | void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, |
10603 | uint16_t index, bool copy_on_write) { |
10604 | extend_array(ra, k: 1); |
10605 | const int32_t pos = ra->size; |
10606 | |
10607 | // old contents is junk not needing freeing |
10608 | ra->keys[pos] = sa->keys[index]; |
10609 | // the shared container will be in two bitmaps |
10610 | if (copy_on_write) { |
10611 | sa->containers[index] = get_copy_of_container( |
10612 | container: sa->containers[index], typecode: &sa->typecodes[index], copy_on_write); |
10613 | ra->containers[pos] = sa->containers[index]; |
10614 | ra->typecodes[pos] = sa->typecodes[index]; |
10615 | } else { |
10616 | ra->containers[pos] = |
10617 | container_clone(container: sa->containers[index], typecode: sa->typecodes[index]); |
10618 | ra->typecodes[pos] = sa->typecodes[index]; |
10619 | } |
10620 | ra->size++; |
10621 | } |
10622 | |
10623 | void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, |
10624 | uint16_t stopping_key, bool copy_on_write) { |
10625 | for (int32_t i = 0; i < sa->size; ++i) { |
10626 | if (sa->keys[i] >= stopping_key) break; |
10627 | ra_append_copy(ra, sa, index: i, copy_on_write); |
10628 | } |
10629 | } |
10630 | |
10631 | void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, |
10632 | int32_t start_index, int32_t end_index, |
10633 | bool copy_on_write) { |
10634 | extend_array(ra, k: end_index - start_index); |
10635 | for (int32_t i = start_index; i < end_index; ++i) { |
10636 | const int32_t pos = ra->size; |
10637 | ra->keys[pos] = sa->keys[i]; |
10638 | if (copy_on_write) { |
10639 | sa->containers[i] = get_copy_of_container( |
10640 | container: sa->containers[i], typecode: &sa->typecodes[i], copy_on_write); |
10641 | ra->containers[pos] = sa->containers[i]; |
10642 | ra->typecodes[pos] = sa->typecodes[i]; |
10643 | } else { |
10644 | ra->containers[pos] = |
10645 | container_clone(container: sa->containers[i], typecode: sa->typecodes[i]); |
10646 | ra->typecodes[pos] = sa->typecodes[i]; |
10647 | } |
10648 | ra->size++; |
10649 | } |
10650 | } |
10651 | |
10652 | void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, |
10653 | uint16_t before_start, bool copy_on_write) { |
10654 | int start_location = ra_get_index(ra: sa, x: before_start); |
10655 | if (start_location >= 0) |
10656 | ++start_location; |
10657 | else |
10658 | start_location = -start_location - 1; |
10659 | ra_append_copy_range(ra, sa, start_index: start_location, end_index: sa->size, copy_on_write); |
10660 | } |
10661 | |
10662 | void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, |
10663 | int32_t start_index, int32_t end_index) { |
10664 | extend_array(ra, k: end_index - start_index); |
10665 | |
10666 | for (int32_t i = start_index; i < end_index; ++i) { |
10667 | const int32_t pos = ra->size; |
10668 | |
10669 | ra->keys[pos] = sa->keys[i]; |
10670 | ra->containers[pos] = sa->containers[i]; |
10671 | ra->typecodes[pos] = sa->typecodes[i]; |
10672 | ra->size++; |
10673 | } |
10674 | } |
10675 | |
10676 | void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, |
10677 | int32_t start_index, int32_t end_index, |
10678 | bool copy_on_write) { |
10679 | extend_array(ra, k: end_index - start_index); |
10680 | |
10681 | for (int32_t i = start_index; i < end_index; ++i) { |
10682 | const int32_t pos = ra->size; |
10683 | ra->keys[pos] = sa->keys[i]; |
10684 | if (copy_on_write) { |
10685 | sa->containers[i] = get_copy_of_container( |
10686 | container: sa->containers[i], typecode: &sa->typecodes[i], copy_on_write); |
10687 | ra->containers[pos] = sa->containers[i]; |
10688 | ra->typecodes[pos] = sa->typecodes[i]; |
10689 | } else { |
10690 | ra->containers[pos] = |
10691 | container_clone(container: sa->containers[i], typecode: sa->typecodes[i]); |
10692 | ra->typecodes[pos] = sa->typecodes[i]; |
10693 | } |
10694 | ra->size++; |
10695 | } |
10696 | } |
10697 | |
10698 | uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { |
10699 | return ra->keys[i]; |
10700 | } |
10701 | |
10702 | // everything skipped over is freed |
10703 | int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) { |
10704 | while (pos < ra->size && ra->keys[pos] < x) { |
10705 | container_free(container: ra->containers[pos], typecode: ra->typecodes[pos]); |
10706 | ++pos; |
10707 | } |
10708 | return pos; |
10709 | } |
10710 | |
10711 | void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, |
10712 | void *container, uint8_t typecode) { |
10713 | extend_array(ra, k: 1); |
10714 | // May be an optimization opportunity with DIY memmove |
10715 | memmove(dest: &(ra->keys[i + 1]), src: &(ra->keys[i]), |
10716 | n: sizeof(uint16_t) * (ra->size - i)); |
10717 | memmove(dest: &(ra->containers[i + 1]), src: &(ra->containers[i]), |
10718 | n: sizeof(void *) * (ra->size - i)); |
10719 | memmove(dest: &(ra->typecodes[i + 1]), src: &(ra->typecodes[i]), |
10720 | n: sizeof(uint8_t) * (ra->size - i)); |
10721 | ra->keys[i] = key; |
10722 | ra->containers[i] = container; |
10723 | ra->typecodes[i] = typecode; |
10724 | ra->size++; |
10725 | } |
10726 | |
10727 | // note: Java routine set things to 0, enabling GC. |
10728 | // Java called it "resize" but it was always used to downsize. |
10729 | // Allowing upsize would break the conventions about |
10730 | // valid containers below ra->size. |
10731 | |
10732 | void ra_downsize(roaring_array_t *ra, int32_t new_length) { |
10733 | assert(new_length <= ra->size); |
10734 | ra->size = new_length; |
10735 | } |
10736 | |
10737 | void ra_remove_at_index(roaring_array_t *ra, int32_t i) { |
10738 | memmove(dest: &(ra->containers[i]), src: &(ra->containers[i + 1]), |
10739 | n: sizeof(void *) * (ra->size - i - 1)); |
10740 | memmove(dest: &(ra->keys[i]), src: &(ra->keys[i + 1]), |
10741 | n: sizeof(uint16_t) * (ra->size - i - 1)); |
10742 | memmove(dest: &(ra->typecodes[i]), src: &(ra->typecodes[i + 1]), |
10743 | n: sizeof(uint8_t) * (ra->size - i - 1)); |
10744 | ra->size--; |
10745 | } |
10746 | |
10747 | void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { |
10748 | container_free(container: ra->containers[i], typecode: ra->typecodes[i]); |
10749 | ra_remove_at_index(ra, i); |
10750 | } |
10751 | |
10752 | // used in inplace andNot only, to slide left the containers from |
10753 | // the mutated RoaringBitmap that are after the largest container of |
10754 | // the argument RoaringBitmap. In use it should be followed by a call to |
10755 | // downsize. |
10756 | // |
10757 | void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, |
10758 | uint32_t new_begin) { |
10759 | assert(begin <= end); |
10760 | assert(new_begin < begin); |
10761 | |
10762 | const int range = end - begin; |
10763 | |
10764 | // We ensure to previously have freed overwritten containers |
10765 | // that are not copied elsewhere |
10766 | |
10767 | memmove(dest: &(ra->containers[new_begin]), src: &(ra->containers[begin]), |
10768 | n: sizeof(void *) * range); |
10769 | memmove(dest: &(ra->keys[new_begin]), src: &(ra->keys[begin]), |
10770 | n: sizeof(uint16_t) * range); |
10771 | memmove(dest: &(ra->typecodes[new_begin]), src: &(ra->typecodes[begin]), |
10772 | n: sizeof(uint8_t) * range); |
10773 | } |
10774 | |
10775 | void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) { |
10776 | if (distance > 0) { |
10777 | extend_array(ra, k: distance); |
10778 | } |
10779 | int32_t srcpos = ra->size - count; |
10780 | int32_t dstpos = srcpos + distance; |
10781 | memmove(dest: &(ra->keys[dstpos]), src: &(ra->keys[srcpos]), |
10782 | n: sizeof(uint16_t) * count); |
10783 | memmove(dest: &(ra->containers[dstpos]), src: &(ra->containers[srcpos]), |
10784 | n: sizeof(void *) * count); |
10785 | memmove(dest: &(ra->typecodes[dstpos]), src: &(ra->typecodes[srcpos]), |
10786 | n: sizeof(uint8_t) * count); |
10787 | ra->size += distance; |
10788 | } |
10789 | |
10790 | |
10791 | void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) { |
10792 | size_t ctr = 0; |
10793 | for (int32_t i = 0; i < ra->size; ++i) { |
10794 | int num_added = container_to_uint32_array( |
10795 | output: ans + ctr, container: ra->containers[i], typecode: ra->typecodes[i], |
10796 | base: ((uint32_t)ra->keys[i]) << 16); |
10797 | ctr += num_added; |
10798 | } |
10799 | } |
10800 | |
10801 | bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) { |
10802 | size_t ctr = 0; |
10803 | size_t dtr = 0; |
10804 | |
10805 | size_t t_limit = 0; |
10806 | |
10807 | bool first = false; |
10808 | size_t first_skip = 0; |
10809 | |
10810 | uint32_t *t_ans = NULL; |
10811 | size_t cur_len = 0; |
10812 | |
10813 | for (int i = 0; i < ra->size; ++i) { |
10814 | |
10815 | const void *container = container_unwrap_shared(candidate_shared_container: ra->containers[i], type: &ra->typecodes[i]); |
10816 | switch (ra->typecodes[i]) { |
10817 | case BITSET_CONTAINER_TYPE_CODE: |
10818 | t_limit = ((const bitset_container_t *)container)->cardinality; |
10819 | break; |
10820 | case ARRAY_CONTAINER_TYPE_CODE: |
10821 | t_limit = ((const array_container_t *)container)->cardinality; |
10822 | break; |
10823 | case RUN_CONTAINER_TYPE_CODE: |
10824 | t_limit = run_container_cardinality(run: (const run_container_t *)container); |
10825 | break; |
10826 | case SHARED_CONTAINER_TYPE_CODE: |
10827 | default: |
10828 | __builtin_unreachable(); |
10829 | } |
10830 | if (ctr + t_limit - 1 >= offset && ctr < offset + limit){ |
10831 | if (!first){ |
10832 | //first_skip = t_limit - (ctr + t_limit - offset); |
10833 | first_skip = offset - ctr; |
10834 | first = true; |
10835 | t_ans = (uint32_t *)malloc(size: sizeof(*t_ans) * (first_skip + limit)); |
10836 | if(t_ans == NULL) { |
10837 | return false; |
10838 | } |
10839 | memset(s: t_ans, c: 0, n: sizeof(*t_ans) * (first_skip + limit)) ; |
10840 | cur_len = first_skip + limit; |
10841 | } |
10842 | if (dtr + t_limit > cur_len){ |
10843 | uint32_t * append_ans = (uint32_t *)malloc(size: sizeof(*append_ans) * (cur_len + t_limit)); |
10844 | if(append_ans == NULL) { |
10845 | if(t_ans != NULL) free(ptr: t_ans); |
10846 | return false; |
10847 | } |
10848 | memset(s: append_ans, c: 0, n: sizeof(*append_ans) * (cur_len + t_limit)); |
10849 | cur_len = cur_len + t_limit; |
10850 | memcpy(dest: append_ans, src: t_ans, n: dtr * sizeof(uint32_t)); |
10851 | free(ptr: t_ans); |
10852 | t_ans = append_ans; |
10853 | } |
10854 | switch (ra->typecodes[i]) { |
10855 | case BITSET_CONTAINER_TYPE_CODE: |
10856 | container_to_uint32_array( |
10857 | output: t_ans + dtr, container: (const bitset_container_t *)container, typecode: ra->typecodes[i], |
10858 | base: ((uint32_t)ra->keys[i]) << 16); |
10859 | break; |
10860 | case ARRAY_CONTAINER_TYPE_CODE: |
10861 | container_to_uint32_array( |
10862 | output: t_ans + dtr, container: (const array_container_t *)container, typecode: ra->typecodes[i], |
10863 | base: ((uint32_t)ra->keys[i]) << 16); |
10864 | break; |
10865 | case RUN_CONTAINER_TYPE_CODE: |
10866 | container_to_uint32_array( |
10867 | output: t_ans + dtr, container: (const run_container_t *)container, typecode: ra->typecodes[i], |
10868 | base: ((uint32_t)ra->keys[i]) << 16); |
10869 | break; |
10870 | case SHARED_CONTAINER_TYPE_CODE: |
10871 | default: |
10872 | __builtin_unreachable(); |
10873 | } |
10874 | dtr += t_limit; |
10875 | } |
10876 | ctr += t_limit; |
10877 | if (dtr-first_skip >= limit) break; |
10878 | } |
10879 | if(t_ans != NULL) { |
10880 | memcpy(dest: ans, src: t_ans+first_skip, n: limit * sizeof(uint32_t)); |
10881 | free(ptr: t_ans); |
10882 | } |
10883 | return true; |
10884 | } |
10885 | |
10886 | bool ra_has_run_container(const roaring_array_t *ra) { |
10887 | for (int32_t k = 0; k < ra->size; ++k) { |
10888 | if (get_container_type(container: ra->containers[k], type: ra->typecodes[k]) == |
10889 | RUN_CONTAINER_TYPE_CODE) |
10890 | return true; |
10891 | } |
10892 | return false; |
10893 | } |
10894 | |
10895 | uint32_t (const roaring_array_t *ra) { |
10896 | if (ra_has_run_container(ra)) { |
10897 | if (ra->size < |
10898 | NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets |
10899 | return 4 + (ra->size + 7) / 8 + 4 * ra->size; |
10900 | } |
10901 | return 4 + (ra->size + 7) / 8 + |
10902 | 8 * ra->size; // - 4 because we pack the size with the cookie |
10903 | } else { |
10904 | return 4 + 4 + 8 * ra->size; |
10905 | } |
10906 | } |
10907 | |
10908 | size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { |
10909 | size_t count = ra_portable_header_size(ra); |
10910 | |
10911 | for (int32_t k = 0; k < ra->size; ++k) { |
10912 | count += container_size_in_bytes(container: ra->containers[k], typecode: ra->typecodes[k]); |
10913 | } |
10914 | return count; |
10915 | } |
10916 | |
10917 | size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { |
10918 | char *initbuf = buf; |
10919 | uint32_t startOffset = 0; |
10920 | bool hasrun = ra_has_run_container(ra); |
10921 | if (hasrun) { |
10922 | uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16); |
10923 | memcpy(dest: buf, src: &cookie, n: sizeof(cookie)); |
10924 | buf += sizeof(cookie); |
10925 | uint32_t s = (ra->size + 7) / 8; |
10926 | uint8_t *bitmapOfRunContainers = (uint8_t *)calloc(nmemb: s, size: 1); |
10927 | assert(bitmapOfRunContainers != NULL); // todo: handle |
10928 | for (int32_t i = 0; i < ra->size; ++i) { |
10929 | if (get_container_type(container: ra->containers[i], type: ra->typecodes[i]) == |
10930 | RUN_CONTAINER_TYPE_CODE) { |
10931 | bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); |
10932 | } |
10933 | } |
10934 | memcpy(dest: buf, src: bitmapOfRunContainers, n: s); |
10935 | buf += s; |
10936 | free(ptr: bitmapOfRunContainers); |
10937 | if (ra->size < NO_OFFSET_THRESHOLD) { |
10938 | startOffset = 4 + 4 * ra->size + s; |
10939 | } else { |
10940 | startOffset = 4 + 8 * ra->size + s; |
10941 | } |
10942 | } else { // backwards compatibility |
10943 | uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; |
10944 | |
10945 | memcpy(dest: buf, src: &cookie, n: sizeof(cookie)); |
10946 | buf += sizeof(cookie); |
10947 | memcpy(dest: buf, src: &ra->size, n: sizeof(ra->size)); |
10948 | buf += sizeof(ra->size); |
10949 | |
10950 | startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; |
10951 | } |
10952 | for (int32_t k = 0; k < ra->size; ++k) { |
10953 | memcpy(dest: buf, src: &ra->keys[k], n: sizeof(ra->keys[k])); |
10954 | buf += sizeof(ra->keys[k]); |
10955 | // get_cardinality returns a value in [1,1<<16], subtracting one |
10956 | // we get [0,1<<16 - 1] which fits in 16 bits |
10957 | uint16_t card = (uint16_t)( |
10958 | container_get_cardinality(container: ra->containers[k], typecode: ra->typecodes[k]) - 1); |
10959 | memcpy(dest: buf, src: &card, n: sizeof(card)); |
10960 | buf += sizeof(card); |
10961 | } |
10962 | if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { |
10963 | // writing the containers offsets |
10964 | for (int32_t k = 0; k < ra->size; k++) { |
10965 | memcpy(dest: buf, src: &startOffset, n: sizeof(startOffset)); |
10966 | buf += sizeof(startOffset); |
10967 | startOffset = |
10968 | startOffset + |
10969 | container_size_in_bytes(container: ra->containers[k], typecode: ra->typecodes[k]); |
10970 | } |
10971 | } |
10972 | for (int32_t k = 0; k < ra->size; ++k) { |
10973 | buf += container_write(container: ra->containers[k], typecode: ra->typecodes[k], buf); |
10974 | } |
10975 | return buf - initbuf; |
10976 | } |
10977 | |
10978 | // Quickly checks whether there is a serialized bitmap at the pointer, |
10979 | // not exceeding size "maxbytes" in bytes. This function does not allocate |
10980 | // memory dynamically. |
10981 | // |
10982 | // This function returns 0 if and only if no valid bitmap is found. |
10983 | // Otherwise, it returns how many bytes are occupied. |
10984 | // |
10985 | size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { |
10986 | size_t bytestotal = sizeof(int32_t);// for cookie |
10987 | if(bytestotal > maxbytes) return 0; |
10988 | uint32_t cookie; |
10989 | memcpy(dest: &cookie, src: buf, n: sizeof(int32_t)); |
10990 | buf += sizeof(uint32_t); |
10991 | if ((cookie & 0xFFFF) != SERIAL_COOKIE && |
10992 | cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { |
10993 | return 0; |
10994 | } |
10995 | int32_t size; |
10996 | |
10997 | if ((cookie & 0xFFFF) == SERIAL_COOKIE) |
10998 | size = (cookie >> 16) + 1; |
10999 | else { |
11000 | bytestotal += sizeof(int32_t); |
11001 | if(bytestotal > maxbytes) return 0; |
11002 | memcpy(dest: &size, src: buf, n: sizeof(int32_t)); |
11003 | buf += sizeof(uint32_t); |
11004 | } |
11005 | if (size > (1<<16)) { |
11006 | return 0; // logically impossible |
11007 | } |
11008 | char *bitmapOfRunContainers = NULL; |
11009 | bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; |
11010 | if (hasrun) { |
11011 | int32_t s = (size + 7) / 8; |
11012 | bytestotal += s; |
11013 | if(bytestotal > maxbytes) return 0; |
11014 | bitmapOfRunContainers = (char *)buf; |
11015 | buf += s; |
11016 | } |
11017 | bytestotal += size * 2 * sizeof(uint16_t); |
11018 | if(bytestotal > maxbytes) return 0; |
11019 | uint16_t *keyscards = (uint16_t *)buf; |
11020 | buf += size * 2 * sizeof(uint16_t); |
11021 | if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { |
11022 | // skipping the offsets |
11023 | bytestotal += size * 4; |
11024 | if(bytestotal > maxbytes) return 0; |
11025 | buf += size * 4; |
11026 | } |
11027 | // Reading the containers |
11028 | for (int32_t k = 0; k < size; ++k) { |
11029 | uint16_t tmp; |
11030 | memcpy(dest: &tmp, src: keyscards + 2*k+1, n: sizeof(tmp)); |
11031 | uint32_t thiscard = tmp + 1; |
11032 | bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); |
11033 | bool isrun = false; |
11034 | if(hasrun) { |
11035 | if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { |
11036 | isbitmap = false; |
11037 | isrun = true; |
11038 | } |
11039 | } |
11040 | if (isbitmap) { |
11041 | size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); |
11042 | bytestotal += containersize; |
11043 | if(bytestotal > maxbytes) return 0; |
11044 | buf += containersize; |
11045 | } else if (isrun) { |
11046 | bytestotal += sizeof(uint16_t); |
11047 | if(bytestotal > maxbytes) return 0; |
11048 | uint16_t n_runs; |
11049 | memcpy(dest: &n_runs, src: buf, n: sizeof(uint16_t)); |
11050 | buf += sizeof(uint16_t); |
11051 | size_t containersize = n_runs * sizeof(rle16_t); |
11052 | bytestotal += containersize; |
11053 | if(bytestotal > maxbytes) return 0; |
11054 | buf += containersize; |
11055 | } else { |
11056 | size_t containersize = thiscard * sizeof(uint16_t); |
11057 | bytestotal += containersize; |
11058 | if(bytestotal > maxbytes) return 0; |
11059 | buf += containersize; |
11060 | } |
11061 | } |
11062 | return bytestotal; |
11063 | } |
11064 | |
11065 | |
11066 | // this function populates answer from the content of buf (reading up to maxbytes bytes). |
11067 | // The function returns false if a properly serialized bitmap cannot be found. |
11068 | // if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes. |
11069 | bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) { |
11070 | *readbytes = sizeof(int32_t);// for cookie |
11071 | if(*readbytes > maxbytes) { |
11072 | fprintf(stderr, format: "Ran out of bytes while reading first 4 bytes.\n" ); |
11073 | return false; |
11074 | } |
11075 | uint32_t cookie; |
11076 | memcpy(dest: &cookie, src: buf, n: sizeof(int32_t)); |
11077 | buf += sizeof(uint32_t); |
11078 | if ((cookie & 0xFFFF) != SERIAL_COOKIE && |
11079 | cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { |
11080 | fprintf(stderr, format: "I failed to find one of the right cookies. Found %" PRIu32 "\n" , |
11081 | cookie); |
11082 | return false; |
11083 | } |
11084 | int32_t size; |
11085 | |
11086 | if ((cookie & 0xFFFF) == SERIAL_COOKIE) |
11087 | size = (cookie >> 16) + 1; |
11088 | else { |
11089 | *readbytes += sizeof(int32_t); |
11090 | if(*readbytes > maxbytes) { |
11091 | fprintf(stderr, format: "Ran out of bytes while reading second part of the cookie.\n" ); |
11092 | return false; |
11093 | } |
11094 | memcpy(dest: &size, src: buf, n: sizeof(int32_t)); |
11095 | buf += sizeof(uint32_t); |
11096 | } |
11097 | if (size > (1<<16)) { |
11098 | fprintf(stderr, format: "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n" , |
11099 | size); |
11100 | return false; // logically impossible |
11101 | } |
11102 | const char *bitmapOfRunContainers = NULL; |
11103 | bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; |
11104 | if (hasrun) { |
11105 | int32_t s = (size + 7) / 8; |
11106 | *readbytes += s; |
11107 | if(*readbytes > maxbytes) {// data is corrupted? |
11108 | fprintf(stderr, format: "Ran out of bytes while reading run bitmap.\n" ); |
11109 | return false; |
11110 | } |
11111 | bitmapOfRunContainers = buf; |
11112 | buf += s; |
11113 | } |
11114 | uint16_t *keyscards = (uint16_t *)buf; |
11115 | |
11116 | *readbytes += size * 2 * sizeof(uint16_t); |
11117 | if(*readbytes > maxbytes) { |
11118 | fprintf(stderr, format: "Ran out of bytes while reading key-cardinality array.\n" ); |
11119 | return false; |
11120 | } |
11121 | buf += size * 2 * sizeof(uint16_t); |
11122 | |
11123 | bool is_ok = ra_init_with_capacity(new_ra: answer, cap: size); |
11124 | if (!is_ok) { |
11125 | fprintf(stderr, format: "Failed to allocate memory for roaring array. Bailing out.\n" ); |
11126 | return false; |
11127 | } |
11128 | |
11129 | for (int32_t k = 0; k < size; ++k) { |
11130 | uint16_t tmp; |
11131 | memcpy(dest: &tmp, src: keyscards + 2*k, n: sizeof(tmp)); |
11132 | answer->keys[k] = tmp; |
11133 | } |
11134 | if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { |
11135 | *readbytes += size * 4; |
11136 | if(*readbytes > maxbytes) {// data is corrupted? |
11137 | fprintf(stderr, format: "Ran out of bytes while reading offsets.\n" ); |
11138 | ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array |
11139 | return false; |
11140 | } |
11141 | |
11142 | // skipping the offsets |
11143 | buf += size * 4; |
11144 | } |
11145 | // Reading the containers |
11146 | for (int32_t k = 0; k < size; ++k) { |
11147 | uint16_t tmp; |
11148 | memcpy(dest: &tmp, src: keyscards + 2*k+1, n: sizeof(tmp)); |
11149 | uint32_t thiscard = tmp + 1; |
11150 | bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); |
11151 | bool isrun = false; |
11152 | if(hasrun) { |
11153 | if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { |
11154 | isbitmap = false; |
11155 | isrun = true; |
11156 | } |
11157 | } |
11158 | if (isbitmap) { |
11159 | // we check that the read is allowed |
11160 | size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); |
11161 | *readbytes += containersize; |
11162 | if(*readbytes > maxbytes) { |
11163 | fprintf(stderr, format: "Running out of bytes while reading a bitset container.\n" ); |
11164 | ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array |
11165 | return false; |
11166 | } |
11167 | // it is now safe to read |
11168 | bitset_container_t *c = bitset_container_create(); |
11169 | if(c == NULL) {// memory allocation failure |
11170 | fprintf(stderr, format: "Failed to allocate memory for a bitset container.\n" ); |
11171 | ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array |
11172 | return false; |
11173 | } |
11174 | answer->size++; |
11175 | buf += bitset_container_read(cardinality: thiscard, container: c, buf); |
11176 | answer->containers[k] = c; |
11177 | answer->typecodes[k] = BITSET_CONTAINER_TYPE_CODE; |
11178 | } else if (isrun) { |
11179 | // we check that the read is allowed |
11180 | *readbytes += sizeof(uint16_t); |
11181 | if(*readbytes > maxbytes) { |
11182 | fprintf(stderr, format: "Running out of bytes while reading a run container (header).\n" ); |
11183 | ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array |
11184 | return false; |
11185 | } |
11186 | uint16_t n_runs; |
11187 | memcpy(dest: &n_runs, src: buf, n: sizeof(uint16_t)); |
11188 | size_t containersize = n_runs * sizeof(rle16_t); |
11189 | *readbytes += containersize; |
11190 | if(*readbytes > maxbytes) {// data is corrupted? |
11191 | fprintf(stderr, format: "Running out of bytes while reading a run container.\n" ); |
11192 | ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array |
11193 | return false; |
11194 | } |
11195 | // it is now safe to read |
11196 | |
11197 | run_container_t *c = run_container_create(); |
11198 | if(c == NULL) {// memory allocation failure |
11199 | fprintf(stderr, format: "Failed to allocate memory for a run container.\n" ); |
11200 | ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array |
11201 | return false; |
11202 | } |
11203 | answer->size++; |
11204 | buf += run_container_read(cardinality: thiscard, container: c, buf); |
11205 | answer->containers[k] = c; |
11206 | answer->typecodes[k] = RUN_CONTAINER_TYPE_CODE; |
11207 | } else { |
11208 | // we check that the read is allowed |
11209 | size_t containersize = thiscard * sizeof(uint16_t); |
11210 | *readbytes += containersize; |
11211 | if(*readbytes > maxbytes) {// data is corrupted? |
11212 | fprintf(stderr, format: "Running out of bytes while reading an array container.\n" ); |
11213 | ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array |
11214 | return false; |
11215 | } |
11216 | // it is now safe to read |
11217 | array_container_t *c = |
11218 | array_container_create_given_capacity(size: thiscard); |
11219 | if(c == NULL) {// memory allocation failure |
11220 | fprintf(stderr, format: "Failed to allocate memory for an array container.\n" ); |
11221 | ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array |
11222 | return false; |
11223 | } |
11224 | answer->size++; |
11225 | buf += array_container_read(cardinality: thiscard, container: c, buf); |
11226 | answer->containers[k] = c; |
11227 | answer->typecodes[k] = ARRAY_CONTAINER_TYPE_CODE; |
11228 | } |
11229 | } |
11230 | return true; |
11231 | } |
11232 | /* end file src/roaring_array.c */ |
11233 | /* begin file src/roaring_priority_queue.c */ |
11234 | |
11235 | struct roaring_pq_element_s { |
11236 | uint64_t size; |
11237 | bool is_temporary; |
11238 | roaring_bitmap_t *bitmap; |
11239 | }; |
11240 | |
11241 | typedef struct roaring_pq_element_s roaring_pq_element_t; |
11242 | |
11243 | struct roaring_pq_s { |
11244 | roaring_pq_element_t *elements; |
11245 | uint64_t size; |
11246 | }; |
11247 | |
11248 | typedef struct roaring_pq_s roaring_pq_t; |
11249 | |
11250 | static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) { |
11251 | return t1->size < t2->size; |
11252 | } |
11253 | |
11254 | static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) { |
11255 | uint64_t i = pq->size; |
11256 | pq->elements[pq->size++] = *t; |
11257 | while (i > 0) { |
11258 | uint64_t p = (i - 1) >> 1; |
11259 | roaring_pq_element_t ap = pq->elements[p]; |
11260 | if (!compare(t1: t, t2: &ap)) break; |
11261 | pq->elements[i] = ap; |
11262 | i = p; |
11263 | } |
11264 | pq->elements[i] = *t; |
11265 | } |
11266 | |
11267 | static void pq_free(roaring_pq_t *pq) { |
11268 | free(ptr: pq->elements); |
11269 | pq->elements = NULL; // paranoid |
11270 | free(ptr: pq); |
11271 | } |
11272 | |
11273 | static void percolate_down(roaring_pq_t *pq, uint32_t i) { |
11274 | uint32_t size = (uint32_t)pq->size; |
11275 | uint32_t hsize = size >> 1; |
11276 | roaring_pq_element_t ai = pq->elements[i]; |
11277 | while (i < hsize) { |
11278 | uint32_t l = (i << 1) + 1; |
11279 | uint32_t r = l + 1; |
11280 | roaring_pq_element_t bestc = pq->elements[l]; |
11281 | if (r < size) { |
11282 | if (compare(t1: pq->elements + r, t2: &bestc)) { |
11283 | l = r; |
11284 | bestc = pq->elements[r]; |
11285 | } |
11286 | } |
11287 | if (!compare(t1: &bestc, t2: &ai)) { |
11288 | break; |
11289 | } |
11290 | pq->elements[i] = bestc; |
11291 | i = l; |
11292 | } |
11293 | pq->elements[i] = ai; |
11294 | } |
11295 | |
11296 | static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) { |
11297 | roaring_pq_t *answer = (roaring_pq_t *)malloc(size: sizeof(roaring_pq_t)); |
11298 | answer->elements = |
11299 | (roaring_pq_element_t *)malloc(size: sizeof(roaring_pq_element_t) * length); |
11300 | answer->size = length; |
11301 | for (uint32_t i = 0; i < length; i++) { |
11302 | answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i]; |
11303 | answer->elements[i].is_temporary = false; |
11304 | answer->elements[i].size = |
11305 | roaring_bitmap_portable_size_in_bytes(ra: arr[i]); |
11306 | } |
11307 | for (int32_t i = (length >> 1); i >= 0; i--) { |
11308 | percolate_down(pq: answer, i); |
11309 | } |
11310 | return answer; |
11311 | } |
11312 | |
11313 | static roaring_pq_element_t pq_poll(roaring_pq_t *pq) { |
11314 | roaring_pq_element_t ans = *pq->elements; |
11315 | if (pq->size > 1) { |
11316 | pq->elements[0] = pq->elements[--pq->size]; |
11317 | percolate_down(pq, i: 0); |
11318 | } else |
11319 | --pq->size; |
11320 | // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size; |
11321 | return ans; |
11322 | } |
11323 | |
11324 | // this function consumes and frees the inputs |
11325 | static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, |
11326 | roaring_bitmap_t *x2) { |
11327 | uint8_t container_result_type = 0; |
11328 | const int length1 = ra_get_size(ra: &x1->high_low_container), |
11329 | length2 = ra_get_size(ra: &x2->high_low_container); |
11330 | if (0 == length1) { |
11331 | roaring_bitmap_free(r: x1); |
11332 | return x2; |
11333 | } |
11334 | if (0 == length2) { |
11335 | roaring_bitmap_free(r: x2); |
11336 | return x1; |
11337 | } |
11338 | uint32_t neededcap = length1 > length2 ? length2 : length1; |
11339 | roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(cap: neededcap); |
11340 | int pos1 = 0, pos2 = 0; |
11341 | uint8_t container_type_1, container_type_2; |
11342 | uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
11343 | uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
11344 | while (true) { |
11345 | if (s1 == s2) { |
11346 | // todo: unsharing can be inefficient as it may create a clone where |
11347 | // none |
11348 | // is needed, but it has the benefit of being easy to reason about. |
11349 | ra_unshare_container_at_index(ra: &x1->high_low_container, i: pos1); |
11350 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
11351 | typecode: &container_type_1); |
11352 | assert(container_type_1 != SHARED_CONTAINER_TYPE_CODE); |
11353 | ra_unshare_container_at_index(ra: &x2->high_low_container, i: pos2); |
11354 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
11355 | typecode: &container_type_2); |
11356 | assert(container_type_2 != SHARED_CONTAINER_TYPE_CODE); |
11357 | void *c; |
11358 | |
11359 | if ((container_type_2 == BITSET_CONTAINER_TYPE_CODE) && |
11360 | (container_type_1 != BITSET_CONTAINER_TYPE_CODE)) { |
11361 | c = container_lazy_ior(c1: c2, type1: container_type_2, c2: c1, |
11362 | type2: container_type_1, |
11363 | result_type: &container_result_type); |
11364 | container_free(container: c1, typecode: container_type_1); |
11365 | if (c != c2) { |
11366 | container_free(container: c2, typecode: container_type_2); |
11367 | } |
11368 | } else { |
11369 | c = container_lazy_ior(c1, type1: container_type_1, c2, |
11370 | type2: container_type_2, |
11371 | result_type: &container_result_type); |
11372 | container_free(container: c2, typecode: container_type_2); |
11373 | if (c != c1) { |
11374 | container_free(container: c1, typecode: container_type_1); |
11375 | } |
11376 | } |
11377 | // since we assume that the initial containers are non-empty, the |
11378 | // result here |
11379 | // can only be non-empty |
11380 | ra_append(ra: &answer->high_low_container, key: s1, container: c, |
11381 | typecode: container_result_type); |
11382 | ++pos1; |
11383 | ++pos2; |
11384 | if (pos1 == length1) break; |
11385 | if (pos2 == length2) break; |
11386 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
11387 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
11388 | |
11389 | } else if (s1 < s2) { // s1 < s2 |
11390 | void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1, |
11391 | typecode: &container_type_1); |
11392 | ra_append(ra: &answer->high_low_container, key: s1, container: c1, typecode: container_type_1); |
11393 | pos1++; |
11394 | if (pos1 == length1) break; |
11395 | s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1); |
11396 | |
11397 | } else { // s1 > s2 |
11398 | void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2, |
11399 | typecode: &container_type_2); |
11400 | ra_append(ra: &answer->high_low_container, key: s2, container: c2, typecode: container_type_2); |
11401 | pos2++; |
11402 | if (pos2 == length2) break; |
11403 | s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2); |
11404 | } |
11405 | } |
11406 | if (pos1 == length1) { |
11407 | ra_append_move_range(ra: &answer->high_low_container, |
11408 | sa: &x2->high_low_container, start_index: pos2, end_index: length2); |
11409 | } else if (pos2 == length2) { |
11410 | ra_append_move_range(ra: &answer->high_low_container, |
11411 | sa: &x1->high_low_container, start_index: pos1, end_index: length1); |
11412 | } |
11413 | ra_clear_without_containers(ra: &x1->high_low_container); |
11414 | ra_clear_without_containers(ra: &x2->high_low_container); |
11415 | free(ptr: x1); |
11416 | free(ptr: x2); |
11417 | return answer; |
11418 | } |
11419 | |
11420 | /** |
11421 | * Compute the union of 'number' bitmaps using a heap. This can |
11422 | * sometimes be faster than roaring_bitmap_or_many which uses |
11423 | * a naive algorithm. Caller is responsible for freeing the |
11424 | * result. |
11425 | */ |
11426 | roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, |
11427 | const roaring_bitmap_t **x) { |
11428 | if (number == 0) { |
11429 | return roaring_bitmap_create(); |
11430 | } |
11431 | if (number == 1) { |
11432 | return roaring_bitmap_copy(r: x[0]); |
11433 | } |
11434 | roaring_pq_t *pq = create_pq(arr: x, length: number); |
11435 | while (pq->size > 1) { |
11436 | roaring_pq_element_t x1 = pq_poll(pq); |
11437 | roaring_pq_element_t x2 = pq_poll(pq); |
11438 | |
11439 | if (x1.is_temporary && x2.is_temporary) { |
11440 | roaring_bitmap_t *newb = |
11441 | lazy_or_from_lazy_inputs(x1: x1.bitmap, x2: x2.bitmap); |
11442 | // should normally return a fresh new bitmap *except* that |
11443 | // it can return x1.bitmap or x2.bitmap in degenerate cases |
11444 | bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap)); |
11445 | uint64_t bsize = roaring_bitmap_portable_size_in_bytes(ra: newb); |
11446 | roaring_pq_element_t newelement = { |
11447 | .size = bsize, .is_temporary = temporary, .bitmap = newb}; |
11448 | pq_add(pq, t: &newelement); |
11449 | } else if (x2.is_temporary) { |
11450 | roaring_bitmap_lazy_or_inplace(x1: x2.bitmap, x2: x1.bitmap, false); |
11451 | x2.size = roaring_bitmap_portable_size_in_bytes(ra: x2.bitmap); |
11452 | pq_add(pq, t: &x2); |
11453 | } else if (x1.is_temporary) { |
11454 | roaring_bitmap_lazy_or_inplace(x1: x1.bitmap, x2: x2.bitmap, false); |
11455 | x1.size = roaring_bitmap_portable_size_in_bytes(ra: x1.bitmap); |
11456 | |
11457 | pq_add(pq, t: &x1); |
11458 | } else { |
11459 | roaring_bitmap_t *newb = |
11460 | roaring_bitmap_lazy_or(x1: x1.bitmap, x2: x2.bitmap, false); |
11461 | uint64_t bsize = roaring_bitmap_portable_size_in_bytes(ra: newb); |
11462 | roaring_pq_element_t newelement = { |
11463 | .size = bsize, .is_temporary = true, .bitmap = newb}; |
11464 | |
11465 | pq_add(pq, t: &newelement); |
11466 | } |
11467 | } |
11468 | roaring_pq_element_t X = pq_poll(pq); |
11469 | roaring_bitmap_t *answer = X.bitmap; |
11470 | roaring_bitmap_repair_after_lazy(ra: answer); |
11471 | pq_free(pq); |
11472 | return answer; |
11473 | } |
11474 | /* end file src/roaring_priority_queue.c */ |
11475 | |