1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "bitstreams_p.h"
5#include "huffman_p.h"
6
7#include <QtCore/qbytearray.h>
8
9#include <algorithm>
10#include <limits>
11
12QT_BEGIN_NAMESPACE
13
14namespace HPack
15{
16
17/*
18 The static Huffman code used here was extracted from:
19 https://http2.github.io/http2-spec/compression.html#huffman.code
20
21 This code was generated from statistics obtained on a large
22 sample of HTTP headers. It is a canonical Huffman code
23 with some tweaking to ensure that no symbol has a unique
24 code length. All codes were left-aligned - for implementation
25 convenience.
26
27 Using binary trees to implement decoding would be prohibitively
28 expensive (both memory and time-wise). Instead we use a table-based
29 approach and any given code itself works as an index into such table(s).
30 We have 256 possible byte values and code lengths in
31 a range [5, 26]. This would require a huge table (and most of entries
32 would be 'wasted', since we only have to encode 256 elements).
33 Instead we use multi-level tables. The first level table
34 is using 9-bit length index; some entries in this table are 'terminal',
35 some reference the next level table(s).
36
37 For example, bytes with values 48 and 49 (ASCII codes for '0' and '1')
38 both have code length 5, Huffman codes are: 00000 and 00001. They
39 both are placed in the 'root' table,
40 the 'root' table has index length == 9:
41 [00000 | 4 remaining bits]
42 ...
43 [00001 | 4 remaining bits]
44
45 All entries with indices between these two will 'point' to value 48
46 with bitLength == 5 so that bit stream (for example) 000001010 will be
47 decoded as: 48 + "put 1010 back into bitstream".
48
49 A good description can be found here:
50 http://commandlinefanatic.com/cgi-bin/showarticle.cgi?article=art007
51 or just google "Efficient Huffman Decoding".
52 Also see comments below about 'filling holes'.
53*/
54
55namespace
56{
57
58const CodeEntry staticHuffmanCodeTable[]
59{
60 { .byteValue: 0, .huffmanCode: 0xffc00000ul, .bitLength: 13}, // 11111111|11000
61 { .byteValue: 1, .huffmanCode: 0xffffb000ul, .bitLength: 23}, // 11111111|11111111|1011000
62 { .byteValue: 2, .huffmanCode: 0xfffffe20ul, .bitLength: 28}, // 11111111|11111111|11111110|0010
63 { .byteValue: 3, .huffmanCode: 0xfffffe30ul, .bitLength: 28}, // 11111111|11111111|11111110|0011
64 { .byteValue: 4, .huffmanCode: 0xfffffe40ul, .bitLength: 28}, // 11111111|11111111|11111110|0100
65 { .byteValue: 5, .huffmanCode: 0xfffffe50ul, .bitLength: 28}, // 11111111|11111111|11111110|0101
66 { .byteValue: 6, .huffmanCode: 0xfffffe60ul, .bitLength: 28}, // 11111111|11111111|11111110|0110
67 { .byteValue: 7, .huffmanCode: 0xfffffe70ul, .bitLength: 28}, // 11111111|11111111|11111110|0111
68 { .byteValue: 8, .huffmanCode: 0xfffffe80ul, .bitLength: 28}, // 11111111|11111111|11111110|1000
69 { .byteValue: 9, .huffmanCode: 0xffffea00ul, .bitLength: 24}, // 11111111|11111111|11101010
70 { .byteValue: 10, .huffmanCode: 0xfffffff0ul, .bitLength: 30}, // 11111111|11111111|11111111|111100
71 { .byteValue: 11, .huffmanCode: 0xfffffe90ul, .bitLength: 28}, // 11111111|11111111|11111110|1001
72 { .byteValue: 12, .huffmanCode: 0xfffffea0ul, .bitLength: 28}, // 11111111|11111111|11111110|1010
73 { .byteValue: 13, .huffmanCode: 0xfffffff4ul, .bitLength: 30}, // 11111111|11111111|11111111|111101
74 { .byteValue: 14, .huffmanCode: 0xfffffeb0ul, .bitLength: 28}, // 11111111|11111111|11111110|1011
75 { .byteValue: 15, .huffmanCode: 0xfffffec0ul, .bitLength: 28}, // 11111111|11111111|11111110|1100
76 { .byteValue: 16, .huffmanCode: 0xfffffed0ul, .bitLength: 28}, // 11111111|11111111|11111110|1101
77 { .byteValue: 17, .huffmanCode: 0xfffffee0ul, .bitLength: 28}, // 11111111|11111111|11111110|1110
78 { .byteValue: 18, .huffmanCode: 0xfffffef0ul, .bitLength: 28}, // 11111111|11111111|11111110|1111
79 { .byteValue: 19, .huffmanCode: 0xffffff00ul, .bitLength: 28}, // 11111111|11111111|11111111|0000
80 { .byteValue: 20, .huffmanCode: 0xffffff10ul, .bitLength: 28}, // 11111111|11111111|11111111|0001
81 { .byteValue: 21, .huffmanCode: 0xffffff20ul, .bitLength: 28}, // 11111111|11111111|11111111|0010
82 { .byteValue: 22, .huffmanCode: 0xfffffff8ul, .bitLength: 30}, // 11111111|11111111|11111111|111110
83 { .byteValue: 23, .huffmanCode: 0xffffff30ul, .bitLength: 28}, // 11111111|11111111|11111111|0011
84 { .byteValue: 24, .huffmanCode: 0xffffff40ul, .bitLength: 28}, // 11111111|11111111|11111111|0100
85 { .byteValue: 25, .huffmanCode: 0xffffff50ul, .bitLength: 28}, // 11111111|11111111|11111111|0101
86 { .byteValue: 26, .huffmanCode: 0xffffff60ul, .bitLength: 28}, // 11111111|11111111|11111111|0110
87 { .byteValue: 27, .huffmanCode: 0xffffff70ul, .bitLength: 28}, // 11111111|11111111|11111111|0111
88 { .byteValue: 28, .huffmanCode: 0xffffff80ul, .bitLength: 28}, // 11111111|11111111|11111111|1000
89 { .byteValue: 29, .huffmanCode: 0xffffff90ul, .bitLength: 28}, // 11111111|11111111|11111111|1001
90 { .byteValue: 30, .huffmanCode: 0xffffffa0ul, .bitLength: 28}, // 11111111|11111111|11111111|1010
91 { .byteValue: 31, .huffmanCode: 0xffffffb0ul, .bitLength: 28}, // 11111111|11111111|11111111|1011
92 { .byteValue: 32, .huffmanCode: 0x50000000ul, .bitLength: 6}, // ' ' 010100
93 { .byteValue: 33, .huffmanCode: 0xfe000000ul, .bitLength: 10}, // '!' 11111110|00
94 { .byteValue: 34, .huffmanCode: 0xfe400000ul, .bitLength: 10}, // '"' 11111110|01
95 { .byteValue: 35, .huffmanCode: 0xffa00000ul, .bitLength: 12}, // '#' 11111111|1010
96 { .byteValue: 36, .huffmanCode: 0xffc80000ul, .bitLength: 13}, // '$' 11111111|11001
97 { .byteValue: 37, .huffmanCode: 0x54000000ul, .bitLength: 6}, // '%' 010101
98 { .byteValue: 38, .huffmanCode: 0xf8000000ul, .bitLength: 8}, // '&' 11111000
99 { .byteValue: 39, .huffmanCode: 0xff400000ul, .bitLength: 11}, // ''' 11111111|010
100 { .byteValue: 40, .huffmanCode: 0xfe800000ul, .bitLength: 10}, // '(' 11111110|10
101 { .byteValue: 41, .huffmanCode: 0xfec00000ul, .bitLength: 10}, // ')' 11111110|11
102 { .byteValue: 42, .huffmanCode: 0xf9000000ul, .bitLength: 8}, // '*' 11111001
103 { .byteValue: 43, .huffmanCode: 0xff600000ul, .bitLength: 11}, // '+' 11111111|011
104 { .byteValue: 44, .huffmanCode: 0xfa000000ul, .bitLength: 8}, // ',' 11111010
105 { .byteValue: 45, .huffmanCode: 0x58000000ul, .bitLength: 6}, // '-' 010110
106 { .byteValue: 46, .huffmanCode: 0x5c000000ul, .bitLength: 6}, // '.' 010111
107 { .byteValue: 47, .huffmanCode: 0x60000000ul, .bitLength: 6}, // '/' 011000
108 { .byteValue: 48, .huffmanCode: 0x00000000ul, .bitLength: 5}, // '0' 00000
109 { .byteValue: 49, .huffmanCode: 0x08000000ul, .bitLength: 5}, // '1' 00001
110 { .byteValue: 50, .huffmanCode: 0x10000000ul, .bitLength: 5}, // '2' 00010
111 { .byteValue: 51, .huffmanCode: 0x64000000ul, .bitLength: 6}, // '3' 011001
112 { .byteValue: 52, .huffmanCode: 0x68000000ul, .bitLength: 6}, // '4' 011010
113 { .byteValue: 53, .huffmanCode: 0x6c000000ul, .bitLength: 6}, // '5' 011011
114 { .byteValue: 54, .huffmanCode: 0x70000000ul, .bitLength: 6}, // '6' 011100
115 { .byteValue: 55, .huffmanCode: 0x74000000ul, .bitLength: 6}, // '7' 011101
116 { .byteValue: 56, .huffmanCode: 0x78000000ul, .bitLength: 6}, // '8' 011110
117 { .byteValue: 57, .huffmanCode: 0x7c000000ul, .bitLength: 6}, // '9' 011111
118 { .byteValue: 58, .huffmanCode: 0xb8000000ul, .bitLength: 7}, // ':' 1011100
119 { .byteValue: 59, .huffmanCode: 0xfb000000ul, .bitLength: 8}, // ';' 11111011
120 { .byteValue: 60, .huffmanCode: 0xfff80000ul, .bitLength: 15}, // '<' 11111111|1111100
121 { .byteValue: 61, .huffmanCode: 0x80000000ul, .bitLength: 6}, // '=' 100000
122 { .byteValue: 62, .huffmanCode: 0xffb00000ul, .bitLength: 12}, // '>' 11111111|1011
123 { .byteValue: 63, .huffmanCode: 0xff000000ul, .bitLength: 10}, // '?' 11111111|00
124 { .byteValue: 64, .huffmanCode: 0xffd00000ul, .bitLength: 13}, // '@' 11111111|11010
125 { .byteValue: 65, .huffmanCode: 0x84000000ul, .bitLength: 6}, // 'A' 100001
126 { .byteValue: 66, .huffmanCode: 0xba000000ul, .bitLength: 7}, // 'B' 1011101
127 { .byteValue: 67, .huffmanCode: 0xbc000000ul, .bitLength: 7}, // 'C' 1011110
128 { .byteValue: 68, .huffmanCode: 0xbe000000ul, .bitLength: 7}, // 'D' 1011111
129 { .byteValue: 69, .huffmanCode: 0xc0000000ul, .bitLength: 7}, // 'E' 1100000
130 { .byteValue: 70, .huffmanCode: 0xc2000000ul, .bitLength: 7}, // 'F' 1100001
131 { .byteValue: 71, .huffmanCode: 0xc4000000ul, .bitLength: 7}, // 'G' 1100010
132 { .byteValue: 72, .huffmanCode: 0xc6000000ul, .bitLength: 7}, // 'H' 1100011
133 { .byteValue: 73, .huffmanCode: 0xc8000000ul, .bitLength: 7}, // 'I' 1100100
134 { .byteValue: 74, .huffmanCode: 0xca000000ul, .bitLength: 7}, // 'J' 1100101
135 { .byteValue: 75, .huffmanCode: 0xcc000000ul, .bitLength: 7}, // 'K' 1100110
136 { .byteValue: 76, .huffmanCode: 0xce000000ul, .bitLength: 7}, // 'L' 1100111
137 { .byteValue: 77, .huffmanCode: 0xd0000000ul, .bitLength: 7}, // 'M' 1101000
138 { .byteValue: 78, .huffmanCode: 0xd2000000ul, .bitLength: 7}, // 'N' 1101001
139 { .byteValue: 79, .huffmanCode: 0xd4000000ul, .bitLength: 7}, // 'O' 1101010
140 { .byteValue: 80, .huffmanCode: 0xd6000000ul, .bitLength: 7}, // 'P' 1101011
141 { .byteValue: 81, .huffmanCode: 0xd8000000ul, .bitLength: 7}, // 'Q' 1101100
142 { .byteValue: 82, .huffmanCode: 0xda000000ul, .bitLength: 7}, // 'R' 1101101
143 { .byteValue: 83, .huffmanCode: 0xdc000000ul, .bitLength: 7}, // 'S' 1101110
144 { .byteValue: 84, .huffmanCode: 0xde000000ul, .bitLength: 7}, // 'T' 1101111
145 { .byteValue: 85, .huffmanCode: 0xe0000000ul, .bitLength: 7}, // 'U' 1110000
146 { .byteValue: 86, .huffmanCode: 0xe2000000ul, .bitLength: 7}, // 'V' 1110001
147 { .byteValue: 87, .huffmanCode: 0xe4000000ul, .bitLength: 7}, // 'W' 1110010
148 { .byteValue: 88, .huffmanCode: 0xfc000000ul, .bitLength: 8}, // 'X' 11111100
149 { .byteValue: 89, .huffmanCode: 0xe6000000ul, .bitLength: 7}, // 'Y' 1110011
150 { .byteValue: 90, .huffmanCode: 0xfd000000ul, .bitLength: 8}, // 'Z' 11111101
151 { .byteValue: 91, .huffmanCode: 0xffd80000ul, .bitLength: 13}, // '[' 11111111|11011
152 { .byteValue: 92, .huffmanCode: 0xfffe0000ul, .bitLength: 19}, // '\' 11111111|11111110|000
153 { .byteValue: 93, .huffmanCode: 0xffe00000ul, .bitLength: 13}, // ']' 11111111|11100
154 { .byteValue: 94, .huffmanCode: 0xfff00000ul, .bitLength: 14}, // '^' 11111111|111100
155 { .byteValue: 95, .huffmanCode: 0x88000000ul, .bitLength: 6}, // '_' 100010
156 { .byteValue: 96, .huffmanCode: 0xfffa0000ul, .bitLength: 15}, // '`' 11111111|1111101
157 { .byteValue: 97, .huffmanCode: 0x18000000ul, .bitLength: 5}, // 'a' 00011
158 { .byteValue: 98, .huffmanCode: 0x8c000000ul, .bitLength: 6}, // 'b' 100011
159 { .byteValue: 99, .huffmanCode: 0x20000000ul, .bitLength: 5}, // 'c' 00100
160 {.byteValue: 100, .huffmanCode: 0x90000000ul, .bitLength: 6}, // 'd' 100100
161 {.byteValue: 101, .huffmanCode: 0x28000000ul, .bitLength: 5}, // 'e' 00101
162 {.byteValue: 102, .huffmanCode: 0x94000000ul, .bitLength: 6}, // 'f' 100101
163 {.byteValue: 103, .huffmanCode: 0x98000000ul, .bitLength: 6}, // 'g' 100110
164 {.byteValue: 104, .huffmanCode: 0x9c000000ul, .bitLength: 6}, // 'h' 100111
165 {.byteValue: 105, .huffmanCode: 0x30000000ul, .bitLength: 5}, // 'i' 00110
166 {.byteValue: 106, .huffmanCode: 0xe8000000ul, .bitLength: 7}, // 'j' 1110100
167 {.byteValue: 107, .huffmanCode: 0xea000000ul, .bitLength: 7}, // 'k' 1110101
168 {.byteValue: 108, .huffmanCode: 0xa0000000ul, .bitLength: 6}, // 'l' 101000
169 {.byteValue: 109, .huffmanCode: 0xa4000000ul, .bitLength: 6}, // 'm' 101001
170 {.byteValue: 110, .huffmanCode: 0xa8000000ul, .bitLength: 6}, // 'n' 101010
171 {.byteValue: 111, .huffmanCode: 0x38000000ul, .bitLength: 5}, // 'o' 00111
172 {.byteValue: 112, .huffmanCode: 0xac000000ul, .bitLength: 6}, // 'p' 101011
173 {.byteValue: 113, .huffmanCode: 0xec000000ul, .bitLength: 7}, // 'q' 1110110
174 {.byteValue: 114, .huffmanCode: 0xb0000000ul, .bitLength: 6}, // 'r' 101100
175 {.byteValue: 115, .huffmanCode: 0x40000000ul, .bitLength: 5}, // 's' 01000
176 {.byteValue: 116, .huffmanCode: 0x48000000ul, .bitLength: 5}, // 't' 01001
177 {.byteValue: 117, .huffmanCode: 0xb4000000ul, .bitLength: 6}, // 'u' 101101
178 {.byteValue: 118, .huffmanCode: 0xee000000ul, .bitLength: 7}, // 'v' 1110111
179 {.byteValue: 119, .huffmanCode: 0xf0000000ul, .bitLength: 7}, // 'w' 1111000
180 {.byteValue: 120, .huffmanCode: 0xf2000000ul, .bitLength: 7}, // 'x' 1111001
181 {.byteValue: 121, .huffmanCode: 0xf4000000ul, .bitLength: 7}, // 'y' 1111010
182 {.byteValue: 122, .huffmanCode: 0xf6000000ul, .bitLength: 7}, // 'z' 1111011
183 {.byteValue: 123, .huffmanCode: 0xfffc0000ul, .bitLength: 15}, // '{' 11111111|1111110
184 {.byteValue: 124, .huffmanCode: 0xff800000ul, .bitLength: 11}, // '|' 11111111|100
185 {.byteValue: 125, .huffmanCode: 0xfff40000ul, .bitLength: 14}, // '}' 11111111|111101
186 {.byteValue: 126, .huffmanCode: 0xffe80000ul, .bitLength: 13}, // '~' 11111111|11101
187 {.byteValue: 127, .huffmanCode: 0xffffffc0ul, .bitLength: 28}, // 11111111|11111111|11111111|1100
188 {.byteValue: 128, .huffmanCode: 0xfffe6000ul, .bitLength: 20}, // 11111111|11111110|0110
189 {.byteValue: 129, .huffmanCode: 0xffff4800ul, .bitLength: 22}, // 11111111|11111111|010010
190 {.byteValue: 130, .huffmanCode: 0xfffe7000ul, .bitLength: 20}, // 11111111|11111110|0111
191 {.byteValue: 131, .huffmanCode: 0xfffe8000ul, .bitLength: 20}, // 11111111|11111110|1000
192 {.byteValue: 132, .huffmanCode: 0xffff4c00ul, .bitLength: 22}, // 11111111|11111111|010011
193 {.byteValue: 133, .huffmanCode: 0xffff5000ul, .bitLength: 22}, // 11111111|11111111|010100
194 {.byteValue: 134, .huffmanCode: 0xffff5400ul, .bitLength: 22}, // 11111111|11111111|010101
195 {.byteValue: 135, .huffmanCode: 0xffffb200ul, .bitLength: 23}, // 11111111|11111111|1011001
196 {.byteValue: 136, .huffmanCode: 0xffff5800ul, .bitLength: 22}, // 11111111|11111111|010110
197 {.byteValue: 137, .huffmanCode: 0xffffb400ul, .bitLength: 23}, // 11111111|11111111|1011010
198 {.byteValue: 138, .huffmanCode: 0xffffb600ul, .bitLength: 23}, // 11111111|11111111|1011011
199 {.byteValue: 139, .huffmanCode: 0xffffb800ul, .bitLength: 23}, // 11111111|11111111|1011100
200 {.byteValue: 140, .huffmanCode: 0xffffba00ul, .bitLength: 23}, // 11111111|11111111|1011101
201 {.byteValue: 141, .huffmanCode: 0xffffbc00ul, .bitLength: 23}, // 11111111|11111111|1011110
202 {.byteValue: 142, .huffmanCode: 0xffffeb00ul, .bitLength: 24}, // 11111111|11111111|11101011
203 {.byteValue: 143, .huffmanCode: 0xffffbe00ul, .bitLength: 23}, // 11111111|11111111|1011111
204 {.byteValue: 144, .huffmanCode: 0xffffec00ul, .bitLength: 24}, // 11111111|11111111|11101100
205 {.byteValue: 145, .huffmanCode: 0xffffed00ul, .bitLength: 24}, // 11111111|11111111|11101101
206 {.byteValue: 146, .huffmanCode: 0xffff5c00ul, .bitLength: 22}, // 11111111|11111111|010111
207 {.byteValue: 147, .huffmanCode: 0xffffc000ul, .bitLength: 23}, // 11111111|11111111|1100000
208 {.byteValue: 148, .huffmanCode: 0xffffee00ul, .bitLength: 24}, // 11111111|11111111|11101110
209 {.byteValue: 149, .huffmanCode: 0xffffc200ul, .bitLength: 23}, // 11111111|11111111|1100001
210 {.byteValue: 150, .huffmanCode: 0xffffc400ul, .bitLength: 23}, // 11111111|11111111|1100010
211 {.byteValue: 151, .huffmanCode: 0xffffc600ul, .bitLength: 23}, // 11111111|11111111|1100011
212 {.byteValue: 152, .huffmanCode: 0xffffc800ul, .bitLength: 23}, // 11111111|11111111|1100100
213 {.byteValue: 153, .huffmanCode: 0xfffee000ul, .bitLength: 21}, // 11111111|11111110|11100
214 {.byteValue: 154, .huffmanCode: 0xffff6000ul, .bitLength: 22}, // 11111111|11111111|011000
215 {.byteValue: 155, .huffmanCode: 0xffffca00ul, .bitLength: 23}, // 11111111|11111111|1100101
216 {.byteValue: 156, .huffmanCode: 0xffff6400ul, .bitLength: 22}, // 11111111|11111111|011001
217 {.byteValue: 157, .huffmanCode: 0xffffcc00ul, .bitLength: 23}, // 11111111|11111111|1100110
218 {.byteValue: 158, .huffmanCode: 0xffffce00ul, .bitLength: 23}, // 11111111|11111111|1100111
219 {.byteValue: 159, .huffmanCode: 0xffffef00ul, .bitLength: 24}, // 11111111|11111111|11101111
220 {.byteValue: 160, .huffmanCode: 0xffff6800ul, .bitLength: 22}, // 11111111|11111111|011010
221 {.byteValue: 161, .huffmanCode: 0xfffee800ul, .bitLength: 21}, // 11111111|11111110|11101
222 {.byteValue: 162, .huffmanCode: 0xfffe9000ul, .bitLength: 20}, // 11111111|11111110|1001
223 {.byteValue: 163, .huffmanCode: 0xffff6c00ul, .bitLength: 22}, // 11111111|11111111|011011
224 {.byteValue: 164, .huffmanCode: 0xffff7000ul, .bitLength: 22}, // 11111111|11111111|011100
225 {.byteValue: 165, .huffmanCode: 0xffffd000ul, .bitLength: 23}, // 11111111|11111111|1101000
226 {.byteValue: 166, .huffmanCode: 0xffffd200ul, .bitLength: 23}, // 11111111|11111111|1101001
227 {.byteValue: 167, .huffmanCode: 0xfffef000ul, .bitLength: 21}, // 11111111|11111110|11110
228 {.byteValue: 168, .huffmanCode: 0xffffd400ul, .bitLength: 23}, // 11111111|11111111|1101010
229 {.byteValue: 169, .huffmanCode: 0xffff7400ul, .bitLength: 22}, // 11111111|11111111|011101
230 {.byteValue: 170, .huffmanCode: 0xffff7800ul, .bitLength: 22}, // 11111111|11111111|011110
231 {.byteValue: 171, .huffmanCode: 0xfffff000ul, .bitLength: 24}, // 11111111|11111111|11110000
232 {.byteValue: 172, .huffmanCode: 0xfffef800ul, .bitLength: 21}, // 11111111|11111110|11111
233 {.byteValue: 173, .huffmanCode: 0xffff7c00ul, .bitLength: 22}, // 11111111|11111111|011111
234 {.byteValue: 174, .huffmanCode: 0xffffd600ul, .bitLength: 23}, // 11111111|11111111|1101011
235 {.byteValue: 175, .huffmanCode: 0xffffd800ul, .bitLength: 23}, // 11111111|11111111|1101100
236 {.byteValue: 176, .huffmanCode: 0xffff0000ul, .bitLength: 21}, // 11111111|11111111|00000
237 {.byteValue: 177, .huffmanCode: 0xffff0800ul, .bitLength: 21}, // 11111111|11111111|00001
238 {.byteValue: 178, .huffmanCode: 0xffff8000ul, .bitLength: 22}, // 11111111|11111111|100000
239 {.byteValue: 179, .huffmanCode: 0xffff1000ul, .bitLength: 21}, // 11111111|11111111|00010
240 {.byteValue: 180, .huffmanCode: 0xffffda00ul, .bitLength: 23}, // 11111111|11111111|1101101
241 {.byteValue: 181, .huffmanCode: 0xffff8400ul, .bitLength: 22}, // 11111111|11111111|100001
242 {.byteValue: 182, .huffmanCode: 0xffffdc00ul, .bitLength: 23}, // 11111111|11111111|1101110
243 {.byteValue: 183, .huffmanCode: 0xffffde00ul, .bitLength: 23}, // 11111111|11111111|1101111
244 {.byteValue: 184, .huffmanCode: 0xfffea000ul, .bitLength: 20}, // 11111111|11111110|1010
245 {.byteValue: 185, .huffmanCode: 0xffff8800ul, .bitLength: 22}, // 11111111|11111111|100010
246 {.byteValue: 186, .huffmanCode: 0xffff8c00ul, .bitLength: 22}, // 11111111|11111111|100011
247 {.byteValue: 187, .huffmanCode: 0xffff9000ul, .bitLength: 22}, // 11111111|11111111|100100
248 {.byteValue: 188, .huffmanCode: 0xffffe000ul, .bitLength: 23}, // 11111111|11111111|1110000
249 {.byteValue: 189, .huffmanCode: 0xffff9400ul, .bitLength: 22}, // 11111111|11111111|100101
250 {.byteValue: 190, .huffmanCode: 0xffff9800ul, .bitLength: 22}, // 11111111|11111111|100110
251 {.byteValue: 191, .huffmanCode: 0xffffe200ul, .bitLength: 23}, // 11111111|11111111|1110001
252 {.byteValue: 192, .huffmanCode: 0xfffff800ul, .bitLength: 26}, // 11111111|11111111|11111000|00
253 {.byteValue: 193, .huffmanCode: 0xfffff840ul, .bitLength: 26}, // 11111111|11111111|11111000|01
254 {.byteValue: 194, .huffmanCode: 0xfffeb000ul, .bitLength: 20}, // 11111111|11111110|1011
255 {.byteValue: 195, .huffmanCode: 0xfffe2000ul, .bitLength: 19}, // 11111111|11111110|001
256 {.byteValue: 196, .huffmanCode: 0xffff9c00ul, .bitLength: 22}, // 11111111|11111111|100111
257 {.byteValue: 197, .huffmanCode: 0xffffe400ul, .bitLength: 23}, // 11111111|11111111|1110010
258 {.byteValue: 198, .huffmanCode: 0xffffa000ul, .bitLength: 22}, // 11111111|11111111|101000
259 {.byteValue: 199, .huffmanCode: 0xfffff600ul, .bitLength: 25}, // 11111111|11111111|11110110|0
260 {.byteValue: 200, .huffmanCode: 0xfffff880ul, .bitLength: 26}, // 11111111|11111111|11111000|10
261 {.byteValue: 201, .huffmanCode: 0xfffff8c0ul, .bitLength: 26}, // 11111111|11111111|11111000|11
262 {.byteValue: 202, .huffmanCode: 0xfffff900ul, .bitLength: 26}, // 11111111|11111111|11111001|00
263 {.byteValue: 203, .huffmanCode: 0xfffffbc0ul, .bitLength: 27}, // 11111111|11111111|11111011|110
264 {.byteValue: 204, .huffmanCode: 0xfffffbe0ul, .bitLength: 27}, // 11111111|11111111|11111011|111
265 {.byteValue: 205, .huffmanCode: 0xfffff940ul, .bitLength: 26}, // 11111111|11111111|11111001|01
266 {.byteValue: 206, .huffmanCode: 0xfffff100ul, .bitLength: 24}, // 11111111|11111111|11110001
267 {.byteValue: 207, .huffmanCode: 0xfffff680ul, .bitLength: 25}, // 11111111|11111111|11110110|1
268 {.byteValue: 208, .huffmanCode: 0xfffe4000ul, .bitLength: 19}, // 11111111|11111110|010
269 {.byteValue: 209, .huffmanCode: 0xffff1800ul, .bitLength: 21}, // 11111111|11111111|00011
270 {.byteValue: 210, .huffmanCode: 0xfffff980ul, .bitLength: 26}, // 11111111|11111111|11111001|10
271 {.byteValue: 211, .huffmanCode: 0xfffffc00ul, .bitLength: 27}, // 11111111|11111111|11111100|000
272 {.byteValue: 212, .huffmanCode: 0xfffffc20ul, .bitLength: 27}, // 11111111|11111111|11111100|001
273 {.byteValue: 213, .huffmanCode: 0xfffff9c0ul, .bitLength: 26}, // 11111111|11111111|11111001|11
274 {.byteValue: 214, .huffmanCode: 0xfffffc40ul, .bitLength: 27}, // 11111111|11111111|11111100|010
275 {.byteValue: 215, .huffmanCode: 0xfffff200ul, .bitLength: 24}, // 11111111|11111111|11110010
276 {.byteValue: 216, .huffmanCode: 0xffff2000ul, .bitLength: 21}, // 11111111|11111111|00100
277 {.byteValue: 217, .huffmanCode: 0xffff2800ul, .bitLength: 21}, // 11111111|11111111|00101
278 {.byteValue: 218, .huffmanCode: 0xfffffa00ul, .bitLength: 26}, // 11111111|11111111|11111010|00
279 {.byteValue: 219, .huffmanCode: 0xfffffa40ul, .bitLength: 26}, // 11111111|11111111|11111010|01
280 {.byteValue: 220, .huffmanCode: 0xffffffd0ul, .bitLength: 28}, // 11111111|11111111|11111111|1101
281 {.byteValue: 221, .huffmanCode: 0xfffffc60ul, .bitLength: 27}, // 11111111|11111111|11111100|011
282 {.byteValue: 222, .huffmanCode: 0xfffffc80ul, .bitLength: 27}, // 11111111|11111111|11111100|100
283 {.byteValue: 223, .huffmanCode: 0xfffffca0ul, .bitLength: 27}, // 11111111|11111111|11111100|101
284 {.byteValue: 224, .huffmanCode: 0xfffec000ul, .bitLength: 20}, // 11111111|11111110|1100
285 {.byteValue: 225, .huffmanCode: 0xfffff300ul, .bitLength: 24}, // 11111111|11111111|11110011
286 {.byteValue: 226, .huffmanCode: 0xfffed000ul, .bitLength: 20}, // 11111111|11111110|1101
287 {.byteValue: 227, .huffmanCode: 0xffff3000ul, .bitLength: 21}, // 11111111|11111111|00110
288 {.byteValue: 228, .huffmanCode: 0xffffa400ul, .bitLength: 22}, // 11111111|11111111|101001
289 {.byteValue: 229, .huffmanCode: 0xffff3800ul, .bitLength: 21}, // 11111111|11111111|00111
290 {.byteValue: 230, .huffmanCode: 0xffff4000ul, .bitLength: 21}, // 11111111|11111111|01000
291 {.byteValue: 231, .huffmanCode: 0xffffe600ul, .bitLength: 23}, // 11111111|11111111|1110011
292 {.byteValue: 232, .huffmanCode: 0xffffa800ul, .bitLength: 22}, // 11111111|11111111|101010
293 {.byteValue: 233, .huffmanCode: 0xffffac00ul, .bitLength: 22}, // 11111111|11111111|101011
294 {.byteValue: 234, .huffmanCode: 0xfffff700ul, .bitLength: 25}, // 11111111|11111111|11110111|0
295 {.byteValue: 235, .huffmanCode: 0xfffff780ul, .bitLength: 25}, // 11111111|11111111|11110111|1
296 {.byteValue: 236, .huffmanCode: 0xfffff400ul, .bitLength: 24}, // 11111111|11111111|11110100
297 {.byteValue: 237, .huffmanCode: 0xfffff500ul, .bitLength: 24}, // 11111111|11111111|11110101
298 {.byteValue: 238, .huffmanCode: 0xfffffa80ul, .bitLength: 26}, // 11111111|11111111|11111010|10
299 {.byteValue: 239, .huffmanCode: 0xffffe800ul, .bitLength: 23}, // 11111111|11111111|1110100
300 {.byteValue: 240, .huffmanCode: 0xfffffac0ul, .bitLength: 26}, // 11111111|11111111|11111010|11
301 {.byteValue: 241, .huffmanCode: 0xfffffcc0ul, .bitLength: 27}, // 11111111|11111111|11111100|110
302 {.byteValue: 242, .huffmanCode: 0xfffffb00ul, .bitLength: 26}, // 11111111|11111111|11111011|00
303 {.byteValue: 243, .huffmanCode: 0xfffffb40ul, .bitLength: 26}, // 11111111|11111111|11111011|01
304 {.byteValue: 244, .huffmanCode: 0xfffffce0ul, .bitLength: 27}, // 11111111|11111111|11111100|111
305 {.byteValue: 245, .huffmanCode: 0xfffffd00ul, .bitLength: 27}, // 11111111|11111111|11111101|000
306 {.byteValue: 246, .huffmanCode: 0xfffffd20ul, .bitLength: 27}, // 11111111|11111111|11111101|001
307 {.byteValue: 247, .huffmanCode: 0xfffffd40ul, .bitLength: 27}, // 11111111|11111111|11111101|010
308 {.byteValue: 248, .huffmanCode: 0xfffffd60ul, .bitLength: 27}, // 11111111|11111111|11111101|011
309 {.byteValue: 249, .huffmanCode: 0xffffffe0ul, .bitLength: 28}, // 11111111|11111111|11111111|1110
310 {.byteValue: 250, .huffmanCode: 0xfffffd80ul, .bitLength: 27}, // 11111111|11111111|11111101|100
311 {.byteValue: 251, .huffmanCode: 0xfffffda0ul, .bitLength: 27}, // 11111111|11111111|11111101|101
312 {.byteValue: 252, .huffmanCode: 0xfffffdc0ul, .bitLength: 27}, // 11111111|11111111|11111101|110
313 {.byteValue: 253, .huffmanCode: 0xfffffde0ul, .bitLength: 27}, // 11111111|11111111|11111101|111
314 {.byteValue: 254, .huffmanCode: 0xfffffe00ul, .bitLength: 27}, // 11111111|11111111|11111110|000
315 {.byteValue: 255, .huffmanCode: 0xfffffb80ul, .bitLength: 26}, // 11111111|11111111|11111011|10
316 {.byteValue: 256, .huffmanCode: 0xfffffffcul, .bitLength: 30} // EOS 11111111|11111111|11111111|111111
317};
318
319void write_huffman_code(BitOStream &outputStream, const CodeEntry &code)
320{
321 // Append octet by octet.
322 auto bitLength = code.bitLength;
323 const auto hc = code.huffmanCode >> (32 - bitLength);
324
325 if (bitLength > 24) {
326 outputStream.writeBits(bits: uchar(hc >> 24), bitLength: bitLength - 24);
327 bitLength = 24;
328 }
329
330 if (bitLength > 16) {
331 outputStream.writeBits(bits: uchar(hc >> 16), bitLength: bitLength - 16);
332 bitLength = 16;
333 }
334
335 if (bitLength > 8) {
336 outputStream.writeBits(bits: uchar(hc >> 8), bitLength: bitLength - 8);
337 bitLength = 8;
338 }
339
340 outputStream.writeBits(bits: uchar(hc), bitLength);
341}
342
343}
344
345// That's from HPACK's specs - we deal with octets.
346static_assert(std::numeric_limits<uchar>::digits == 8, "octets expected");
347
348quint64 huffman_encoded_bit_length(QByteArrayView inputData)
349{
350 quint64 bitLength = 0;
351 for (int i = 0, e = inputData.size(); i < e; ++i)
352 bitLength += staticHuffmanCodeTable[int(inputData[i])].bitLength;
353
354 return bitLength;
355}
356
357void huffman_encode_string(QByteArrayView inputData, BitOStream &outputStream)
358{
359 for (int i = 0, e = inputData.size(); i < e; ++i) {
360 const auto value = uchar(inputData[i]);
361 write_huffman_code(outputStream, code: staticHuffmanCodeTable[value]);
362 }
363
364 // Pad bits ...
365 if (outputStream.bitLength() % 8)
366 outputStream.writeBits(bits: 0xff, bitLength: 8 - outputStream.bitLength() % 8);
367}
368
369static constexpr
370bool padding_is_valid(quint32 chunk, quint32 nBits)
371{
372 Q_ASSERT(nBits);
373
374 // HPACK, 5.2: "A padding strictly longer than 7 bits MUST be
375 // treated as a decoding error."
376 if (nBits > 7)
377 return false;
378 // HPACK, 5.2:
379 // "A padding not corresponding to the most significant bits
380 // of the code for the EOS symbol MUST be treated as a decoding error."
381 return (chunk >> (32 - nBits)) == quint32((1 << nBits) - 1);
382}
383
384HuffmanDecoder::HuffmanDecoder()
385 : minCodeLength()
386{
387 const auto nCodes = sizeof staticHuffmanCodeTable / sizeof staticHuffmanCodeTable[0];
388
389 std::vector<CodeEntry> symbols(staticHuffmanCodeTable, staticHuffmanCodeTable + nCodes);
390 // Now we sort: by bit length first (in the descending order) and by the symbol
391 // value (descending). Descending order: to make sure we do not create prefix tables with
392 // short 'indexLength' first and having longer codes that do not fit into such tables later.
393 std::sort(first: symbols.begin(), last: symbols.end(), comp: [](const CodeEntry &code1, const CodeEntry &code2) {
394 if (code1.bitLength == code2.bitLength)
395 return code1.byteValue > code2.byteValue;
396 return code1.bitLength > code2.bitLength;
397 });
398
399 minCodeLength = symbols.back().bitLength; // The shortest one, currently it's 5.
400
401 // TODO: add a verification - Huffman codes
402 // within a given bit length range also
403 // should be in descending order.
404
405 // Initialize 'prefixTables' and 'tableData'.
406 addTable(prefixLength: 0, indexLength: quint32(BitConstants::rootPrefix)); // 'root' table.
407
408 for (const auto &s : symbols) {
409 quint32 tableIndex = 0;
410 while (true) {
411 Q_ASSERT(tableIndex < prefixTables.size());
412 // Note, by value - since prefixTables will be updated in between.
413 const auto table = prefixTables[tableIndex];
414 // We skip prefixed bits (if any) and use indexed bits only:
415 const auto entryIndex = s.huffmanCode << table.prefixLength >> (32 - table.indexLength);
416 // Again, by value.
417 PrefixTableEntry entry = tableEntry(table, index: entryIndex);
418 // How many bits were coded by previous tables and this table:
419 const auto codedLength = table.prefixLength + table.indexLength;
420 if (codedLength < s.bitLength) {
421 // We have to add a new prefix table ... (if it's not done yet).
422 if (!entry.bitLength) {
423 entry.nextTable = addTable(prefixLength: codedLength, indexLength: std::min<quint32>(a: quint32(BitConstants::childPrefix),
424 b: s.bitLength - codedLength));
425 entry.bitLength = s.bitLength;
426 entry.byteValue = s.byteValue;
427 setTableEntry(table, index: entryIndex, entry);
428 }
429 tableIndex = entry.nextTable;
430 } else {
431 // We found the slot for our code (terminal entry):
432 entry.byteValue = s.byteValue;
433 entry.bitLength = s.bitLength;
434 // Refer to our own table as 'nextTable':
435 entry.nextTable = tableIndex;
436 setTableEntry(table, index: entryIndex, entry);
437 break;
438 }
439 }
440 }
441
442 // Now, we have a table(s) and have to fill 'holes' to
443 // 'fix' terminal entries.
444 for (const auto &table : prefixTables) {
445 const quint32 codedLength = table.prefixLength + table.indexLength;
446 for (quint32 j = 0; j < table.size();) {
447 const PrefixTableEntry &entry = tableEntry(table, index: j);
448 if (entry.bitLength && entry.bitLength < codedLength) {
449 const quint32 range = 1 << (codedLength - entry.bitLength);
450 for (quint32 k = 1; k < range; ++k)
451 setTableEntry(table, index: j + k, entry);
452 j += range;
453 } else {
454 ++j;
455 }
456 }
457 }
458}
459
460bool HuffmanDecoder::decodeStream(BitIStream &inputStream, QByteArray &outputBuffer)
461{
462 while (true) {
463 quint32 chunk = 0;
464 const quint32 readBits = inputStream.peekBits(from: inputStream.streamOffset(), length: 32, dstPtr: &chunk);
465 if (!readBits)
466 return !inputStream.hasMoreBits();
467
468 if (readBits < minCodeLength) {
469 inputStream.skipBits(nBits: readBits);
470 return padding_is_valid(chunk, nBits: readBits);
471 }
472
473 quint32 tableIndex = 0;
474 const PrefixTable *table = &prefixTables[tableIndex];
475 quint32 entryIndex = chunk >> (32 - table->indexLength);
476 PrefixTableEntry entry = tableEntry(table: *table, index: entryIndex);
477
478 while (true) {
479 if (entry.nextTable == tableIndex)
480 break;
481
482 tableIndex = entry.nextTable;
483 table = &prefixTables[tableIndex];
484 entryIndex = chunk << table->prefixLength >> (32 - table->indexLength);
485 entry = tableEntry(table: *table, index: entryIndex);
486 }
487
488 if (entry.bitLength > readBits) {
489 inputStream.skipBits(nBits: readBits);
490 return padding_is_valid(chunk, nBits: readBits);
491 }
492
493 if (!entry.bitLength || entry.byteValue == 256) {
494 //EOS (256) == compression error (HPACK).
495 inputStream.skipBits(nBits: readBits);
496 return false;
497 }
498
499 outputBuffer.append(c: entry.byteValue);
500 inputStream.skipBits(nBits: entry.bitLength);
501 }
502
503 return false;
504}
505
506quint32 HuffmanDecoder::addTable(quint32 prefix, quint32 index)
507{
508 PrefixTable newTable{prefix, index};
509 newTable.offset = quint32(tableData.size());
510 prefixTables.push_back(x: newTable);
511 // Add entries for this table:
512 tableData.resize(new_size: tableData.size() + newTable.size());
513
514 return quint32(prefixTables.size() - 1);
515}
516
517PrefixTableEntry HuffmanDecoder::tableEntry(const PrefixTable &table, quint32 index)
518{
519 Q_ASSERT(index < table.size());
520 return tableData[table.offset + index];
521}
522
523void HuffmanDecoder::setTableEntry(const PrefixTable &table, quint32 index,
524 const PrefixTableEntry &entry)
525{
526 Q_ASSERT(index < table.size());
527 tableData[table.offset + index] = entry;
528}
529
530bool huffman_decode_string(BitIStream &inputStream, QByteArray *outputBuffer)
531{
532 Q_ASSERT(outputBuffer);
533
534 static HuffmanDecoder decoder;
535 return decoder.decodeStream(inputStream, outputBuffer&: *outputBuffer);
536}
537
538}
539
540QT_END_NAMESPACE
541

source code of qtbase/src/network/access/http2/huffman.cpp