1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtNetwork module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "bitstreams_p.h"
41#include "huffman_p.h"
42
43#include <QtCore/qbytearray.h>
44
45#include <algorithm>
46#include <limits>
47
48QT_BEGIN_NAMESPACE
49
50namespace HPack
51{
52
53/*
54 The static Huffman code used here was extracted from:
55 https://http2.github.io/http2-spec/compression.html#huffman.code
56
57 This code was generated from statistics obtained on a large
58 sample of HTTP headers. It is a canonical Huffman code
59 with some tweaking to ensure that no symbol has a unique
60 code length. All codes were left-aligned - for implementation
61 convenience.
62
63 Using binary trees to implement decoding would be prohibitively
64 expensive (both memory and time-wise). Instead we use a table-based
65 approach and any given code itself works as an index into such table(s).
66 We have 256 possible byte values and code lengths in
67 a range [5, 26]. This would require a huge table (and most of entries
68 would be 'wasted', since we only have to encode 256 elements).
69 Instead we use multi-level tables. The first level table
70 is using 9-bit length index; some entries in this table are 'terminal',
71 some reference the next level table(s).
72
73 For example, bytes with values 48 and 49 (ASCII codes for '0' and '1')
74 both have code length 5, Huffman codes are: 00000 and 00001. They
75 both are placed in the 'root' table,
76 the 'root' table has index length == 9:
77 [00000 | 4 remaining bits]
78 ...
79 [00001 | 4 remaining bits]
80
81 All entires with indices between these two will 'point' to value 48
82 with bitLength == 5 so that bit stream (for example) 000001010 will be
83 decoded as: 48 + "put 1010 back into bitstream".
84
85 A good description can be found here:
86 http://commandlinefanatic.com/cgi-bin/showarticle.cgi?article=art007
87 or just google "Efficient Huffman Decoding".
88 Also see comments below about 'filling holes'.
89*/
90
91namespace
92{
93
94const CodeEntry staticHuffmanCodeTable[]
95{
96 { .byteValue: 0, .huffmanCode: 0xffc00000ul, .bitLength: 13}, // 11111111|11000
97 { .byteValue: 1, .huffmanCode: 0xffffb000ul, .bitLength: 23}, // 11111111|11111111|1011000
98 { .byteValue: 2, .huffmanCode: 0xfffffe20ul, .bitLength: 28}, // 11111111|11111111|11111110|0010
99 { .byteValue: 3, .huffmanCode: 0xfffffe30ul, .bitLength: 28}, // 11111111|11111111|11111110|0011
100 { .byteValue: 4, .huffmanCode: 0xfffffe40ul, .bitLength: 28}, // 11111111|11111111|11111110|0100
101 { .byteValue: 5, .huffmanCode: 0xfffffe50ul, .bitLength: 28}, // 11111111|11111111|11111110|0101
102 { .byteValue: 6, .huffmanCode: 0xfffffe60ul, .bitLength: 28}, // 11111111|11111111|11111110|0110
103 { .byteValue: 7, .huffmanCode: 0xfffffe70ul, .bitLength: 28}, // 11111111|11111111|11111110|0111
104 { .byteValue: 8, .huffmanCode: 0xfffffe80ul, .bitLength: 28}, // 11111111|11111111|11111110|1000
105 { .byteValue: 9, .huffmanCode: 0xffffea00ul, .bitLength: 24}, // 11111111|11111111|11101010
106 { .byteValue: 10, .huffmanCode: 0xfffffff0ul, .bitLength: 30}, // 11111111|11111111|11111111|111100
107 { .byteValue: 11, .huffmanCode: 0xfffffe90ul, .bitLength: 28}, // 11111111|11111111|11111110|1001
108 { .byteValue: 12, .huffmanCode: 0xfffffea0ul, .bitLength: 28}, // 11111111|11111111|11111110|1010
109 { .byteValue: 13, .huffmanCode: 0xfffffff4ul, .bitLength: 30}, // 11111111|11111111|11111111|111101
110 { .byteValue: 14, .huffmanCode: 0xfffffeb0ul, .bitLength: 28}, // 11111111|11111111|11111110|1011
111 { .byteValue: 15, .huffmanCode: 0xfffffec0ul, .bitLength: 28}, // 11111111|11111111|11111110|1100
112 { .byteValue: 16, .huffmanCode: 0xfffffed0ul, .bitLength: 28}, // 11111111|11111111|11111110|1101
113 { .byteValue: 17, .huffmanCode: 0xfffffee0ul, .bitLength: 28}, // 11111111|11111111|11111110|1110
114 { .byteValue: 18, .huffmanCode: 0xfffffef0ul, .bitLength: 28}, // 11111111|11111111|11111110|1111
115 { .byteValue: 19, .huffmanCode: 0xffffff00ul, .bitLength: 28}, // 11111111|11111111|11111111|0000
116 { .byteValue: 20, .huffmanCode: 0xffffff10ul, .bitLength: 28}, // 11111111|11111111|11111111|0001
117 { .byteValue: 21, .huffmanCode: 0xffffff20ul, .bitLength: 28}, // 11111111|11111111|11111111|0010
118 { .byteValue: 22, .huffmanCode: 0xfffffff8ul, .bitLength: 30}, // 11111111|11111111|11111111|111110
119 { .byteValue: 23, .huffmanCode: 0xffffff30ul, .bitLength: 28}, // 11111111|11111111|11111111|0011
120 { .byteValue: 24, .huffmanCode: 0xffffff40ul, .bitLength: 28}, // 11111111|11111111|11111111|0100
121 { .byteValue: 25, .huffmanCode: 0xffffff50ul, .bitLength: 28}, // 11111111|11111111|11111111|0101
122 { .byteValue: 26, .huffmanCode: 0xffffff60ul, .bitLength: 28}, // 11111111|11111111|11111111|0110
123 { .byteValue: 27, .huffmanCode: 0xffffff70ul, .bitLength: 28}, // 11111111|11111111|11111111|0111
124 { .byteValue: 28, .huffmanCode: 0xffffff80ul, .bitLength: 28}, // 11111111|11111111|11111111|1000
125 { .byteValue: 29, .huffmanCode: 0xffffff90ul, .bitLength: 28}, // 11111111|11111111|11111111|1001
126 { .byteValue: 30, .huffmanCode: 0xffffffa0ul, .bitLength: 28}, // 11111111|11111111|11111111|1010
127 { .byteValue: 31, .huffmanCode: 0xffffffb0ul, .bitLength: 28}, // 11111111|11111111|11111111|1011
128 { .byteValue: 32, .huffmanCode: 0x50000000ul, .bitLength: 6}, // ' ' 010100
129 { .byteValue: 33, .huffmanCode: 0xfe000000ul, .bitLength: 10}, // '!' 11111110|00
130 { .byteValue: 34, .huffmanCode: 0xfe400000ul, .bitLength: 10}, // '"' 11111110|01
131 { .byteValue: 35, .huffmanCode: 0xffa00000ul, .bitLength: 12}, // '#' 11111111|1010
132 { .byteValue: 36, .huffmanCode: 0xffc80000ul, .bitLength: 13}, // '$' 11111111|11001
133 { .byteValue: 37, .huffmanCode: 0x54000000ul, .bitLength: 6}, // '%' 010101
134 { .byteValue: 38, .huffmanCode: 0xf8000000ul, .bitLength: 8}, // '&' 11111000
135 { .byteValue: 39, .huffmanCode: 0xff400000ul, .bitLength: 11}, // ''' 11111111|010
136 { .byteValue: 40, .huffmanCode: 0xfe800000ul, .bitLength: 10}, // '(' 11111110|10
137 { .byteValue: 41, .huffmanCode: 0xfec00000ul, .bitLength: 10}, // ')' 11111110|11
138 { .byteValue: 42, .huffmanCode: 0xf9000000ul, .bitLength: 8}, // '*' 11111001
139 { .byteValue: 43, .huffmanCode: 0xff600000ul, .bitLength: 11}, // '+' 11111111|011
140 { .byteValue: 44, .huffmanCode: 0xfa000000ul, .bitLength: 8}, // ',' 11111010
141 { .byteValue: 45, .huffmanCode: 0x58000000ul, .bitLength: 6}, // '-' 010110
142 { .byteValue: 46, .huffmanCode: 0x5c000000ul, .bitLength: 6}, // '.' 010111
143 { .byteValue: 47, .huffmanCode: 0x60000000ul, .bitLength: 6}, // '/' 011000
144 { .byteValue: 48, .huffmanCode: 0x00000000ul, .bitLength: 5}, // '0' 00000
145 { .byteValue: 49, .huffmanCode: 0x08000000ul, .bitLength: 5}, // '1' 00001
146 { .byteValue: 50, .huffmanCode: 0x10000000ul, .bitLength: 5}, // '2' 00010
147 { .byteValue: 51, .huffmanCode: 0x64000000ul, .bitLength: 6}, // '3' 011001
148 { .byteValue: 52, .huffmanCode: 0x68000000ul, .bitLength: 6}, // '4' 011010
149 { .byteValue: 53, .huffmanCode: 0x6c000000ul, .bitLength: 6}, // '5' 011011
150 { .byteValue: 54, .huffmanCode: 0x70000000ul, .bitLength: 6}, // '6' 011100
151 { .byteValue: 55, .huffmanCode: 0x74000000ul, .bitLength: 6}, // '7' 011101
152 { .byteValue: 56, .huffmanCode: 0x78000000ul, .bitLength: 6}, // '8' 011110
153 { .byteValue: 57, .huffmanCode: 0x7c000000ul, .bitLength: 6}, // '9' 011111
154 { .byteValue: 58, .huffmanCode: 0xb8000000ul, .bitLength: 7}, // ':' 1011100
155 { .byteValue: 59, .huffmanCode: 0xfb000000ul, .bitLength: 8}, // ';' 11111011
156 { .byteValue: 60, .huffmanCode: 0xfff80000ul, .bitLength: 15}, // '<' 11111111|1111100
157 { .byteValue: 61, .huffmanCode: 0x80000000ul, .bitLength: 6}, // '=' 100000
158 { .byteValue: 62, .huffmanCode: 0xffb00000ul, .bitLength: 12}, // '>' 11111111|1011
159 { .byteValue: 63, .huffmanCode: 0xff000000ul, .bitLength: 10}, // '?' 11111111|00
160 { .byteValue: 64, .huffmanCode: 0xffd00000ul, .bitLength: 13}, // '@' 11111111|11010
161 { .byteValue: 65, .huffmanCode: 0x84000000ul, .bitLength: 6}, // 'A' 100001
162 { .byteValue: 66, .huffmanCode: 0xba000000ul, .bitLength: 7}, // 'B' 1011101
163 { .byteValue: 67, .huffmanCode: 0xbc000000ul, .bitLength: 7}, // 'C' 1011110
164 { .byteValue: 68, .huffmanCode: 0xbe000000ul, .bitLength: 7}, // 'D' 1011111
165 { .byteValue: 69, .huffmanCode: 0xc0000000ul, .bitLength: 7}, // 'E' 1100000
166 { .byteValue: 70, .huffmanCode: 0xc2000000ul, .bitLength: 7}, // 'F' 1100001
167 { .byteValue: 71, .huffmanCode: 0xc4000000ul, .bitLength: 7}, // 'G' 1100010
168 { .byteValue: 72, .huffmanCode: 0xc6000000ul, .bitLength: 7}, // 'H' 1100011
169 { .byteValue: 73, .huffmanCode: 0xc8000000ul, .bitLength: 7}, // 'I' 1100100
170 { .byteValue: 74, .huffmanCode: 0xca000000ul, .bitLength: 7}, // 'J' 1100101
171 { .byteValue: 75, .huffmanCode: 0xcc000000ul, .bitLength: 7}, // 'K' 1100110
172 { .byteValue: 76, .huffmanCode: 0xce000000ul, .bitLength: 7}, // 'L' 1100111
173 { .byteValue: 77, .huffmanCode: 0xd0000000ul, .bitLength: 7}, // 'M' 1101000
174 { .byteValue: 78, .huffmanCode: 0xd2000000ul, .bitLength: 7}, // 'N' 1101001
175 { .byteValue: 79, .huffmanCode: 0xd4000000ul, .bitLength: 7}, // 'O' 1101010
176 { .byteValue: 80, .huffmanCode: 0xd6000000ul, .bitLength: 7}, // 'P' 1101011
177 { .byteValue: 81, .huffmanCode: 0xd8000000ul, .bitLength: 7}, // 'Q' 1101100
178 { .byteValue: 82, .huffmanCode: 0xda000000ul, .bitLength: 7}, // 'R' 1101101
179 { .byteValue: 83, .huffmanCode: 0xdc000000ul, .bitLength: 7}, // 'S' 1101110
180 { .byteValue: 84, .huffmanCode: 0xde000000ul, .bitLength: 7}, // 'T' 1101111
181 { .byteValue: 85, .huffmanCode: 0xe0000000ul, .bitLength: 7}, // 'U' 1110000
182 { .byteValue: 86, .huffmanCode: 0xe2000000ul, .bitLength: 7}, // 'V' 1110001
183 { .byteValue: 87, .huffmanCode: 0xe4000000ul, .bitLength: 7}, // 'W' 1110010
184 { .byteValue: 88, .huffmanCode: 0xfc000000ul, .bitLength: 8}, // 'X' 11111100
185 { .byteValue: 89, .huffmanCode: 0xe6000000ul, .bitLength: 7}, // 'Y' 1110011
186 { .byteValue: 90, .huffmanCode: 0xfd000000ul, .bitLength: 8}, // 'Z' 11111101
187 { .byteValue: 91, .huffmanCode: 0xffd80000ul, .bitLength: 13}, // '[' 11111111|11011
188 { .byteValue: 92, .huffmanCode: 0xfffe0000ul, .bitLength: 19}, // '\' 11111111|11111110|000
189 { .byteValue: 93, .huffmanCode: 0xffe00000ul, .bitLength: 13}, // ']' 11111111|11100
190 { .byteValue: 94, .huffmanCode: 0xfff00000ul, .bitLength: 14}, // '^' 11111111|111100
191 { .byteValue: 95, .huffmanCode: 0x88000000ul, .bitLength: 6}, // '_' 100010
192 { .byteValue: 96, .huffmanCode: 0xfffa0000ul, .bitLength: 15}, // '`' 11111111|1111101
193 { .byteValue: 97, .huffmanCode: 0x18000000ul, .bitLength: 5}, // 'a' 00011
194 { .byteValue: 98, .huffmanCode: 0x8c000000ul, .bitLength: 6}, // 'b' 100011
195 { .byteValue: 99, .huffmanCode: 0x20000000ul, .bitLength: 5}, // 'c' 00100
196 {.byteValue: 100, .huffmanCode: 0x90000000ul, .bitLength: 6}, // 'd' 100100
197 {.byteValue: 101, .huffmanCode: 0x28000000ul, .bitLength: 5}, // 'e' 00101
198 {.byteValue: 102, .huffmanCode: 0x94000000ul, .bitLength: 6}, // 'f' 100101
199 {.byteValue: 103, .huffmanCode: 0x98000000ul, .bitLength: 6}, // 'g' 100110
200 {.byteValue: 104, .huffmanCode: 0x9c000000ul, .bitLength: 6}, // 'h' 100111
201 {.byteValue: 105, .huffmanCode: 0x30000000ul, .bitLength: 5}, // 'i' 00110
202 {.byteValue: 106, .huffmanCode: 0xe8000000ul, .bitLength: 7}, // 'j' 1110100
203 {.byteValue: 107, .huffmanCode: 0xea000000ul, .bitLength: 7}, // 'k' 1110101
204 {.byteValue: 108, .huffmanCode: 0xa0000000ul, .bitLength: 6}, // 'l' 101000
205 {.byteValue: 109, .huffmanCode: 0xa4000000ul, .bitLength: 6}, // 'm' 101001
206 {.byteValue: 110, .huffmanCode: 0xa8000000ul, .bitLength: 6}, // 'n' 101010
207 {.byteValue: 111, .huffmanCode: 0x38000000ul, .bitLength: 5}, // 'o' 00111
208 {.byteValue: 112, .huffmanCode: 0xac000000ul, .bitLength: 6}, // 'p' 101011
209 {.byteValue: 113, .huffmanCode: 0xec000000ul, .bitLength: 7}, // 'q' 1110110
210 {.byteValue: 114, .huffmanCode: 0xb0000000ul, .bitLength: 6}, // 'r' 101100
211 {.byteValue: 115, .huffmanCode: 0x40000000ul, .bitLength: 5}, // 's' 01000
212 {.byteValue: 116, .huffmanCode: 0x48000000ul, .bitLength: 5}, // 't' 01001
213 {.byteValue: 117, .huffmanCode: 0xb4000000ul, .bitLength: 6}, // 'u' 101101
214 {.byteValue: 118, .huffmanCode: 0xee000000ul, .bitLength: 7}, // 'v' 1110111
215 {.byteValue: 119, .huffmanCode: 0xf0000000ul, .bitLength: 7}, // 'w' 1111000
216 {.byteValue: 120, .huffmanCode: 0xf2000000ul, .bitLength: 7}, // 'x' 1111001
217 {.byteValue: 121, .huffmanCode: 0xf4000000ul, .bitLength: 7}, // 'y' 1111010
218 {.byteValue: 122, .huffmanCode: 0xf6000000ul, .bitLength: 7}, // 'z' 1111011
219 {.byteValue: 123, .huffmanCode: 0xfffc0000ul, .bitLength: 15}, // '{' 11111111|1111110
220 {.byteValue: 124, .huffmanCode: 0xff800000ul, .bitLength: 11}, // '|' 11111111|100
221 {.byteValue: 125, .huffmanCode: 0xfff40000ul, .bitLength: 14}, // '}' 11111111|111101
222 {.byteValue: 126, .huffmanCode: 0xffe80000ul, .bitLength: 13}, // '~' 11111111|11101
223 {.byteValue: 127, .huffmanCode: 0xffffffc0ul, .bitLength: 28}, // 11111111|11111111|11111111|1100
224 {.byteValue: 128, .huffmanCode: 0xfffe6000ul, .bitLength: 20}, // 11111111|11111110|0110
225 {.byteValue: 129, .huffmanCode: 0xffff4800ul, .bitLength: 22}, // 11111111|11111111|010010
226 {.byteValue: 130, .huffmanCode: 0xfffe7000ul, .bitLength: 20}, // 11111111|11111110|0111
227 {.byteValue: 131, .huffmanCode: 0xfffe8000ul, .bitLength: 20}, // 11111111|11111110|1000
228 {.byteValue: 132, .huffmanCode: 0xffff4c00ul, .bitLength: 22}, // 11111111|11111111|010011
229 {.byteValue: 133, .huffmanCode: 0xffff5000ul, .bitLength: 22}, // 11111111|11111111|010100
230 {.byteValue: 134, .huffmanCode: 0xffff5400ul, .bitLength: 22}, // 11111111|11111111|010101
231 {.byteValue: 135, .huffmanCode: 0xffffb200ul, .bitLength: 23}, // 11111111|11111111|1011001
232 {.byteValue: 136, .huffmanCode: 0xffff5800ul, .bitLength: 22}, // 11111111|11111111|010110
233 {.byteValue: 137, .huffmanCode: 0xffffb400ul, .bitLength: 23}, // 11111111|11111111|1011010
234 {.byteValue: 138, .huffmanCode: 0xffffb600ul, .bitLength: 23}, // 11111111|11111111|1011011
235 {.byteValue: 139, .huffmanCode: 0xffffb800ul, .bitLength: 23}, // 11111111|11111111|1011100
236 {.byteValue: 140, .huffmanCode: 0xffffba00ul, .bitLength: 23}, // 11111111|11111111|1011101
237 {.byteValue: 141, .huffmanCode: 0xffffbc00ul, .bitLength: 23}, // 11111111|11111111|1011110
238 {.byteValue: 142, .huffmanCode: 0xffffeb00ul, .bitLength: 24}, // 11111111|11111111|11101011
239 {.byteValue: 143, .huffmanCode: 0xffffbe00ul, .bitLength: 23}, // 11111111|11111111|1011111
240 {.byteValue: 144, .huffmanCode: 0xffffec00ul, .bitLength: 24}, // 11111111|11111111|11101100
241 {.byteValue: 145, .huffmanCode: 0xffffed00ul, .bitLength: 24}, // 11111111|11111111|11101101
242 {.byteValue: 146, .huffmanCode: 0xffff5c00ul, .bitLength: 22}, // 11111111|11111111|010111
243 {.byteValue: 147, .huffmanCode: 0xffffc000ul, .bitLength: 23}, // 11111111|11111111|1100000
244 {.byteValue: 148, .huffmanCode: 0xffffee00ul, .bitLength: 24}, // 11111111|11111111|11101110
245 {.byteValue: 149, .huffmanCode: 0xffffc200ul, .bitLength: 23}, // 11111111|11111111|1100001
246 {.byteValue: 150, .huffmanCode: 0xffffc400ul, .bitLength: 23}, // 11111111|11111111|1100010
247 {.byteValue: 151, .huffmanCode: 0xffffc600ul, .bitLength: 23}, // 11111111|11111111|1100011
248 {.byteValue: 152, .huffmanCode: 0xffffc800ul, .bitLength: 23}, // 11111111|11111111|1100100
249 {.byteValue: 153, .huffmanCode: 0xfffee000ul, .bitLength: 21}, // 11111111|11111110|11100
250 {.byteValue: 154, .huffmanCode: 0xffff6000ul, .bitLength: 22}, // 11111111|11111111|011000
251 {.byteValue: 155, .huffmanCode: 0xffffca00ul, .bitLength: 23}, // 11111111|11111111|1100101
252 {.byteValue: 156, .huffmanCode: 0xffff6400ul, .bitLength: 22}, // 11111111|11111111|011001
253 {.byteValue: 157, .huffmanCode: 0xffffcc00ul, .bitLength: 23}, // 11111111|11111111|1100110
254 {.byteValue: 158, .huffmanCode: 0xffffce00ul, .bitLength: 23}, // 11111111|11111111|1100111
255 {.byteValue: 159, .huffmanCode: 0xffffef00ul, .bitLength: 24}, // 11111111|11111111|11101111
256 {.byteValue: 160, .huffmanCode: 0xffff6800ul, .bitLength: 22}, // 11111111|11111111|011010
257 {.byteValue: 161, .huffmanCode: 0xfffee800ul, .bitLength: 21}, // 11111111|11111110|11101
258 {.byteValue: 162, .huffmanCode: 0xfffe9000ul, .bitLength: 20}, // 11111111|11111110|1001
259 {.byteValue: 163, .huffmanCode: 0xffff6c00ul, .bitLength: 22}, // 11111111|11111111|011011
260 {.byteValue: 164, .huffmanCode: 0xffff7000ul, .bitLength: 22}, // 11111111|11111111|011100
261 {.byteValue: 165, .huffmanCode: 0xffffd000ul, .bitLength: 23}, // 11111111|11111111|1101000
262 {.byteValue: 166, .huffmanCode: 0xffffd200ul, .bitLength: 23}, // 11111111|11111111|1101001
263 {.byteValue: 167, .huffmanCode: 0xfffef000ul, .bitLength: 21}, // 11111111|11111110|11110
264 {.byteValue: 168, .huffmanCode: 0xffffd400ul, .bitLength: 23}, // 11111111|11111111|1101010
265 {.byteValue: 169, .huffmanCode: 0xffff7400ul, .bitLength: 22}, // 11111111|11111111|011101
266 {.byteValue: 170, .huffmanCode: 0xffff7800ul, .bitLength: 22}, // 11111111|11111111|011110
267 {.byteValue: 171, .huffmanCode: 0xfffff000ul, .bitLength: 24}, // 11111111|11111111|11110000
268 {.byteValue: 172, .huffmanCode: 0xfffef800ul, .bitLength: 21}, // 11111111|11111110|11111
269 {.byteValue: 173, .huffmanCode: 0xffff7c00ul, .bitLength: 22}, // 11111111|11111111|011111
270 {.byteValue: 174, .huffmanCode: 0xffffd600ul, .bitLength: 23}, // 11111111|11111111|1101011
271 {.byteValue: 175, .huffmanCode: 0xffffd800ul, .bitLength: 23}, // 11111111|11111111|1101100
272 {.byteValue: 176, .huffmanCode: 0xffff0000ul, .bitLength: 21}, // 11111111|11111111|00000
273 {.byteValue: 177, .huffmanCode: 0xffff0800ul, .bitLength: 21}, // 11111111|11111111|00001
274 {.byteValue: 178, .huffmanCode: 0xffff8000ul, .bitLength: 22}, // 11111111|11111111|100000
275 {.byteValue: 179, .huffmanCode: 0xffff1000ul, .bitLength: 21}, // 11111111|11111111|00010
276 {.byteValue: 180, .huffmanCode: 0xffffda00ul, .bitLength: 23}, // 11111111|11111111|1101101
277 {.byteValue: 181, .huffmanCode: 0xffff8400ul, .bitLength: 22}, // 11111111|11111111|100001
278 {.byteValue: 182, .huffmanCode: 0xffffdc00ul, .bitLength: 23}, // 11111111|11111111|1101110
279 {.byteValue: 183, .huffmanCode: 0xffffde00ul, .bitLength: 23}, // 11111111|11111111|1101111
280 {.byteValue: 184, .huffmanCode: 0xfffea000ul, .bitLength: 20}, // 11111111|11111110|1010
281 {.byteValue: 185, .huffmanCode: 0xffff8800ul, .bitLength: 22}, // 11111111|11111111|100010
282 {.byteValue: 186, .huffmanCode: 0xffff8c00ul, .bitLength: 22}, // 11111111|11111111|100011
283 {.byteValue: 187, .huffmanCode: 0xffff9000ul, .bitLength: 22}, // 11111111|11111111|100100
284 {.byteValue: 188, .huffmanCode: 0xffffe000ul, .bitLength: 23}, // 11111111|11111111|1110000
285 {.byteValue: 189, .huffmanCode: 0xffff9400ul, .bitLength: 22}, // 11111111|11111111|100101
286 {.byteValue: 190, .huffmanCode: 0xffff9800ul, .bitLength: 22}, // 11111111|11111111|100110
287 {.byteValue: 191, .huffmanCode: 0xffffe200ul, .bitLength: 23}, // 11111111|11111111|1110001
288 {.byteValue: 192, .huffmanCode: 0xfffff800ul, .bitLength: 26}, // 11111111|11111111|11111000|00
289 {.byteValue: 193, .huffmanCode: 0xfffff840ul, .bitLength: 26}, // 11111111|11111111|11111000|01
290 {.byteValue: 194, .huffmanCode: 0xfffeb000ul, .bitLength: 20}, // 11111111|11111110|1011
291 {.byteValue: 195, .huffmanCode: 0xfffe2000ul, .bitLength: 19}, // 11111111|11111110|001
292 {.byteValue: 196, .huffmanCode: 0xffff9c00ul, .bitLength: 22}, // 11111111|11111111|100111
293 {.byteValue: 197, .huffmanCode: 0xffffe400ul, .bitLength: 23}, // 11111111|11111111|1110010
294 {.byteValue: 198, .huffmanCode: 0xffffa000ul, .bitLength: 22}, // 11111111|11111111|101000
295 {.byteValue: 199, .huffmanCode: 0xfffff600ul, .bitLength: 25}, // 11111111|11111111|11110110|0
296 {.byteValue: 200, .huffmanCode: 0xfffff880ul, .bitLength: 26}, // 11111111|11111111|11111000|10
297 {.byteValue: 201, .huffmanCode: 0xfffff8c0ul, .bitLength: 26}, // 11111111|11111111|11111000|11
298 {.byteValue: 202, .huffmanCode: 0xfffff900ul, .bitLength: 26}, // 11111111|11111111|11111001|00
299 {.byteValue: 203, .huffmanCode: 0xfffffbc0ul, .bitLength: 27}, // 11111111|11111111|11111011|110
300 {.byteValue: 204, .huffmanCode: 0xfffffbe0ul, .bitLength: 27}, // 11111111|11111111|11111011|111
301 {.byteValue: 205, .huffmanCode: 0xfffff940ul, .bitLength: 26}, // 11111111|11111111|11111001|01
302 {.byteValue: 206, .huffmanCode: 0xfffff100ul, .bitLength: 24}, // 11111111|11111111|11110001
303 {.byteValue: 207, .huffmanCode: 0xfffff680ul, .bitLength: 25}, // 11111111|11111111|11110110|1
304 {.byteValue: 208, .huffmanCode: 0xfffe4000ul, .bitLength: 19}, // 11111111|11111110|010
305 {.byteValue: 209, .huffmanCode: 0xffff1800ul, .bitLength: 21}, // 11111111|11111111|00011
306 {.byteValue: 210, .huffmanCode: 0xfffff980ul, .bitLength: 26}, // 11111111|11111111|11111001|10
307 {.byteValue: 211, .huffmanCode: 0xfffffc00ul, .bitLength: 27}, // 11111111|11111111|11111100|000
308 {.byteValue: 212, .huffmanCode: 0xfffffc20ul, .bitLength: 27}, // 11111111|11111111|11111100|001
309 {.byteValue: 213, .huffmanCode: 0xfffff9c0ul, .bitLength: 26}, // 11111111|11111111|11111001|11
310 {.byteValue: 214, .huffmanCode: 0xfffffc40ul, .bitLength: 27}, // 11111111|11111111|11111100|010
311 {.byteValue: 215, .huffmanCode: 0xfffff200ul, .bitLength: 24}, // 11111111|11111111|11110010
312 {.byteValue: 216, .huffmanCode: 0xffff2000ul, .bitLength: 21}, // 11111111|11111111|00100
313 {.byteValue: 217, .huffmanCode: 0xffff2800ul, .bitLength: 21}, // 11111111|11111111|00101
314 {.byteValue: 218, .huffmanCode: 0xfffffa00ul, .bitLength: 26}, // 11111111|11111111|11111010|00
315 {.byteValue: 219, .huffmanCode: 0xfffffa40ul, .bitLength: 26}, // 11111111|11111111|11111010|01
316 {.byteValue: 220, .huffmanCode: 0xffffffd0ul, .bitLength: 28}, // 11111111|11111111|11111111|1101
317 {.byteValue: 221, .huffmanCode: 0xfffffc60ul, .bitLength: 27}, // 11111111|11111111|11111100|011
318 {.byteValue: 222, .huffmanCode: 0xfffffc80ul, .bitLength: 27}, // 11111111|11111111|11111100|100
319 {.byteValue: 223, .huffmanCode: 0xfffffca0ul, .bitLength: 27}, // 11111111|11111111|11111100|101
320 {.byteValue: 224, .huffmanCode: 0xfffec000ul, .bitLength: 20}, // 11111111|11111110|1100
321 {.byteValue: 225, .huffmanCode: 0xfffff300ul, .bitLength: 24}, // 11111111|11111111|11110011
322 {.byteValue: 226, .huffmanCode: 0xfffed000ul, .bitLength: 20}, // 11111111|11111110|1101
323 {.byteValue: 227, .huffmanCode: 0xffff3000ul, .bitLength: 21}, // 11111111|11111111|00110
324 {.byteValue: 228, .huffmanCode: 0xffffa400ul, .bitLength: 22}, // 11111111|11111111|101001
325 {.byteValue: 229, .huffmanCode: 0xffff3800ul, .bitLength: 21}, // 11111111|11111111|00111
326 {.byteValue: 230, .huffmanCode: 0xffff4000ul, .bitLength: 21}, // 11111111|11111111|01000
327 {.byteValue: 231, .huffmanCode: 0xffffe600ul, .bitLength: 23}, // 11111111|11111111|1110011
328 {.byteValue: 232, .huffmanCode: 0xffffa800ul, .bitLength: 22}, // 11111111|11111111|101010
329 {.byteValue: 233, .huffmanCode: 0xffffac00ul, .bitLength: 22}, // 11111111|11111111|101011
330 {.byteValue: 234, .huffmanCode: 0xfffff700ul, .bitLength: 25}, // 11111111|11111111|11110111|0
331 {.byteValue: 235, .huffmanCode: 0xfffff780ul, .bitLength: 25}, // 11111111|11111111|11110111|1
332 {.byteValue: 236, .huffmanCode: 0xfffff400ul, .bitLength: 24}, // 11111111|11111111|11110100
333 {.byteValue: 237, .huffmanCode: 0xfffff500ul, .bitLength: 24}, // 11111111|11111111|11110101
334 {.byteValue: 238, .huffmanCode: 0xfffffa80ul, .bitLength: 26}, // 11111111|11111111|11111010|10
335 {.byteValue: 239, .huffmanCode: 0xffffe800ul, .bitLength: 23}, // 11111111|11111111|1110100
336 {.byteValue: 240, .huffmanCode: 0xfffffac0ul, .bitLength: 26}, // 11111111|11111111|11111010|11
337 {.byteValue: 241, .huffmanCode: 0xfffffcc0ul, .bitLength: 27}, // 11111111|11111111|11111100|110
338 {.byteValue: 242, .huffmanCode: 0xfffffb00ul, .bitLength: 26}, // 11111111|11111111|11111011|00
339 {.byteValue: 243, .huffmanCode: 0xfffffb40ul, .bitLength: 26}, // 11111111|11111111|11111011|01
340 {.byteValue: 244, .huffmanCode: 0xfffffce0ul, .bitLength: 27}, // 11111111|11111111|11111100|111
341 {.byteValue: 245, .huffmanCode: 0xfffffd00ul, .bitLength: 27}, // 11111111|11111111|11111101|000
342 {.byteValue: 246, .huffmanCode: 0xfffffd20ul, .bitLength: 27}, // 11111111|11111111|11111101|001
343 {.byteValue: 247, .huffmanCode: 0xfffffd40ul, .bitLength: 27}, // 11111111|11111111|11111101|010
344 {.byteValue: 248, .huffmanCode: 0xfffffd60ul, .bitLength: 27}, // 11111111|11111111|11111101|011
345 {.byteValue: 249, .huffmanCode: 0xffffffe0ul, .bitLength: 28}, // 11111111|11111111|11111111|1110
346 {.byteValue: 250, .huffmanCode: 0xfffffd80ul, .bitLength: 27}, // 11111111|11111111|11111101|100
347 {.byteValue: 251, .huffmanCode: 0xfffffda0ul, .bitLength: 27}, // 11111111|11111111|11111101|101
348 {.byteValue: 252, .huffmanCode: 0xfffffdc0ul, .bitLength: 27}, // 11111111|11111111|11111101|110
349 {.byteValue: 253, .huffmanCode: 0xfffffde0ul, .bitLength: 27}, // 11111111|11111111|11111101|111
350 {.byteValue: 254, .huffmanCode: 0xfffffe00ul, .bitLength: 27}, // 11111111|11111111|11111110|000
351 {.byteValue: 255, .huffmanCode: 0xfffffb80ul, .bitLength: 26}, // 11111111|11111111|11111011|10
352 {.byteValue: 256, .huffmanCode: 0xfffffffcul, .bitLength: 30} // EOS 11111111|11111111|11111111|111111
353};
354
355void write_huffman_code(BitOStream &outputStream, const CodeEntry &code)
356{
357 // Append octet by octet.
358 auto bitLength = code.bitLength;
359 const auto hc = code.huffmanCode >> (32 - bitLength);
360
361 if (bitLength > 24) {
362 outputStream.writeBits(bits: uchar(hc >> 24), bitLength: bitLength - 24);
363 bitLength = 24;
364 }
365
366 if (bitLength > 16) {
367 outputStream.writeBits(bits: uchar(hc >> 16), bitLength: bitLength - 16);
368 bitLength = 16;
369 }
370
371 if (bitLength > 8) {
372 outputStream.writeBits(bits: uchar(hc >> 8), bitLength: bitLength - 8);
373 bitLength = 8;
374 }
375
376 outputStream.writeBits(bits: uchar(hc), bitLength);
377}
378
379}
380
381// That's from HPACK's specs - we deal with octets.
382static_assert(std::numeric_limits<uchar>::digits == 8, "octets expected");
383
384quint64 huffman_encoded_bit_length(const QByteArray &inputData)
385{
386 quint64 bitLength = 0;
387 for (int i = 0, e = inputData.size(); i < e; ++i)
388 bitLength += staticHuffmanCodeTable[int(inputData[i])].bitLength;
389
390 return bitLength;
391}
392
393void huffman_encode_string(const QByteArray &inputData, BitOStream &outputStream)
394{
395 for (int i = 0, e = inputData.size(); i < e; ++i) {
396 const auto value = uchar(inputData[i]);
397 write_huffman_code(outputStream, code: staticHuffmanCodeTable[value]);
398 }
399
400 // Pad bits ...
401 if (outputStream.bitLength() % 8)
402 outputStream.writeBits(bits: 0xff, bitLength: 8 - outputStream.bitLength() % 8);
403}
404
405bool padding_is_valid(quint32 chunk, quint32 nBits)
406{
407 Q_ASSERT(nBits);
408
409 // HPACK, 5.2: "A padding strictly longer than 7 bits MUST be
410 // treated as a decoding error."
411 if (nBits > 7)
412 return false;
413 // HPACK, 5.2:
414 // "A padding not corresponding to the most significant bits
415 // of the code for the EOS symbol MUST be treated as a decoding error."
416 return (chunk >> (32 - nBits)) == quint32((1 << nBits) - 1);
417}
418
419HuffmanDecoder::HuffmanDecoder()
420 : minCodeLength()
421{
422 const auto nCodes = sizeof staticHuffmanCodeTable / sizeof staticHuffmanCodeTable[0];
423
424 std::vector<CodeEntry> symbols(staticHuffmanCodeTable, staticHuffmanCodeTable + nCodes);
425 // Now we sort: by bit length first (in the descending order) and by the symbol
426 // value (descending). Descending order: to make sure we do not create prefix tables with
427 // short 'indexLength' first and having longer codes that do not fit into such tables later.
428 std::sort(first: symbols.begin(), last: symbols.end(), comp: [](const CodeEntry &code1, const CodeEntry &code2) {
429 if (code1.bitLength == code2.bitLength)
430 return code1.byteValue > code2.byteValue;
431 return code1.bitLength > code2.bitLength;
432 });
433
434 minCodeLength = symbols.back().bitLength; // The shortest one, currently it's 5.
435
436 // TODO: add a verification - Huffman codes
437 // within a given bit length range also
438 // should be in descending order.
439
440 // Initialize 'prefixTables' and 'tableData'.
441 addTable(prefixLength: 0, indexLength: quint32(BitConstants::rootPrefix)); // 'root' table.
442
443 for (const auto &s : symbols) {
444 quint32 tableIndex = 0;
445 while (true) {
446 Q_ASSERT(tableIndex < prefixTables.size());
447 // Note, by value - since prefixTables will be updated in between.
448 const auto table = prefixTables[tableIndex];
449 // We skip prefixed bits (if any) and use indexed bits only:
450 const auto entryIndex = s.huffmanCode << table.prefixLength >> (32 - table.indexLength);
451 // Again, by value.
452 PrefixTableEntry entry = tableEntry(table, index: entryIndex);
453 // How many bits were coded by previous tables and this table:
454 const auto codedLength = table.prefixLength + table.indexLength;
455 if (codedLength < s.bitLength) {
456 // We have to add a new prefix table ... (if it's not done yet).
457 if (!entry.bitLength) {
458 entry.nextTable = addTable(prefixLength: codedLength, indexLength: std::min<quint32>(a: quint32(BitConstants::childPrefix),
459 b: s.bitLength - codedLength));
460 entry.bitLength = s.bitLength;
461 entry.byteValue = s.byteValue;
462 setTableEntry(table, index: entryIndex, entry);
463 }
464 tableIndex = entry.nextTable;
465 } else {
466 // We found the slot for our code (terminal entry):
467 entry.byteValue = s.byteValue;
468 entry.bitLength = s.bitLength;
469 // Refer to our own table as 'nextTable':
470 entry.nextTable = tableIndex;
471 setTableEntry(table, index: entryIndex, entry);
472 break;
473 }
474 }
475 }
476
477 // Now, we have a table(s) and have to fill 'holes' to
478 // 'fix' terminal entries.
479 for (const auto &table : prefixTables) {
480 const quint32 codedLength = table.prefixLength + table.indexLength;
481 for (quint32 j = 0; j < table.size();) {
482 const PrefixTableEntry &entry = tableEntry(table, index: j);
483 if (entry.bitLength && entry.bitLength < codedLength) {
484 const quint32 range = 1 << (codedLength - entry.bitLength);
485 for (quint32 k = 1; k < range; ++k)
486 setTableEntry(table, index: j + k, entry);
487 j += range;
488 } else {
489 ++j;
490 }
491 }
492 }
493}
494
495bool HuffmanDecoder::decodeStream(BitIStream &inputStream, QByteArray &outputBuffer)
496{
497 while (true) {
498 quint32 chunk = 0;
499 const quint32 readBits = inputStream.peekBits(from: inputStream.streamOffset(), length: 32, dstPtr: &chunk);
500 if (!readBits)
501 return !inputStream.hasMoreBits();
502
503 if (readBits < minCodeLength) {
504 inputStream.skipBits(nBits: readBits);
505 return padding_is_valid(chunk, nBits: readBits);
506 }
507
508 quint32 tableIndex = 0;
509 const PrefixTable *table = &prefixTables[tableIndex];
510 quint32 entryIndex = chunk >> (32 - table->indexLength);
511 PrefixTableEntry entry = tableEntry(table: *table, index: entryIndex);
512
513 while (true) {
514 if (entry.nextTable == tableIndex)
515 break;
516
517 tableIndex = entry.nextTable;
518 table = &prefixTables[tableIndex];
519 entryIndex = chunk << table->prefixLength >> (32 - table->indexLength);
520 entry = tableEntry(table: *table, index: entryIndex);
521 }
522
523 if (entry.bitLength > readBits) {
524 inputStream.skipBits(nBits: readBits);
525 return padding_is_valid(chunk, nBits: readBits);
526 }
527
528 if (!entry.bitLength || entry.byteValue == 256) {
529 //EOS (256) == compression error (HPACK).
530 inputStream.skipBits(nBits: readBits);
531 return false;
532 }
533
534 outputBuffer.append(c: entry.byteValue);
535 inputStream.skipBits(nBits: entry.bitLength);
536 }
537
538 return false;
539}
540
541quint32 HuffmanDecoder::addTable(quint32 prefix, quint32 index)
542{
543 PrefixTable newTable{prefix, index};
544 newTable.offset = quint32(tableData.size());
545 prefixTables.push_back(x: newTable);
546 // Add entries for this table:
547 tableData.resize(new_size: tableData.size() + newTable.size());
548
549 return quint32(prefixTables.size() - 1);
550}
551
552PrefixTableEntry HuffmanDecoder::tableEntry(const PrefixTable &table, quint32 index)
553{
554 Q_ASSERT(index < table.size());
555 return tableData[table.offset + index];
556}
557
558void HuffmanDecoder::setTableEntry(const PrefixTable &table, quint32 index,
559 const PrefixTableEntry &entry)
560{
561 Q_ASSERT(index < table.size());
562 tableData[table.offset + index] = entry;
563}
564
565bool huffman_decode_string(BitIStream &inputStream, QByteArray *outputBuffer)
566{
567 Q_ASSERT(outputBuffer);
568
569 static HuffmanDecoder decoder;
570 return decoder.decodeStream(inputStream, outputBuffer&: *outputBuffer);
571}
572
573}
574
575QT_END_NAMESPACE
576

source code of qtbase/src/network/access/http2/huffman.cpp