1// Copyright (C) 2017 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qhsts_p.h"
5
6#include "QtCore/private/qipaddress_p.h"
7#include "QtCore/qlist.h"
8
9#if QT_CONFIG(settings)
10#include "qhstsstore_p.h"
11#endif // QT_CONFIG(settings)
12
13QT_BEGIN_NAMESPACE
14
15static bool is_valid_domain_name(const QString &host)
16{
17 if (!host.size())
18 return false;
19
20 // RFC6797 8.1.1
21 // If the substring matching the host production from the Request-URI
22 // (of the message to which the host responded) syntactically matches
23 //the IP-literal or IPv4address productions from Section 3.2.2 of
24 //[RFC3986], then the UA MUST NOT note this host as a Known HSTS Host.
25 using namespace QIPAddressUtils;
26
27 IPv4Address ipv4Addr = {};
28 if (parseIp4(address&: ipv4Addr, begin: host.constBegin(), end: host.constEnd()))
29 return false;
30
31 IPv6Address ipv6Addr = {};
32 // Unlike parseIp4, parseIp6 returns nullptr if it managed to parse IPv6
33 // address successfully.
34 if (!parseIp6(address&: ipv6Addr, begin: host.constBegin(), end: host.constEnd()))
35 return false;
36
37 // TODO: for now we do not test IPvFuture address, it must be addressed
38 // by introducing parseIpFuture (actually, there is an implementation
39 // in QUrl that can be adopted/modified/moved to QIPAddressUtils).
40 return true;
41}
42
43void QHstsCache::updateFromHeaders(const QList<QPair<QByteArray, QByteArray>> &headers,
44 const QUrl &url)
45{
46 if (!url.isValid())
47 return;
48
49 QHstsHeaderParser parser;
50 if (parser.parse(headers)) {
51 updateKnownHost(hostName: url.host(), expires: parser.expirationDate(), includeSubDomains: parser.includeSubDomains());
52#if QT_CONFIG(settings)
53 if (hstsStore)
54 hstsStore->synchronize();
55#endif // QT_CONFIG(settings)
56 }
57}
58
59void QHstsCache::updateFromPolicies(const QList<QHstsPolicy> &policies)
60{
61 for (const auto &policy : policies)
62 updateKnownHost(hostName: policy.host(), expires: policy.expiry(), includeSubDomains: policy.includesSubDomains());
63
64#if QT_CONFIG(settings)
65 if (hstsStore && policies.size()) {
66 // These policies are coming either from store or from QNAM's setter
67 // function. As a result we can notice expired or new policies, time
68 // to sync ...
69 hstsStore->synchronize();
70 }
71#endif // QT_CONFIG(settings)
72}
73
74void QHstsCache::updateKnownHost(const QUrl &url, const QDateTime &expires,
75 bool includeSubDomains)
76{
77 if (!url.isValid())
78 return;
79
80 updateKnownHost(hostName: url.host(), expires, includeSubDomains);
81#if QT_CONFIG(settings)
82 if (hstsStore)
83 hstsStore->synchronize();
84#endif // QT_CONFIG(settings)
85}
86
87void QHstsCache::updateKnownHost(const QString &host, const QDateTime &expires,
88 bool includeSubDomains)
89{
90 if (!is_valid_domain_name(host))
91 return;
92
93 // HSTS is a per-host policy, regardless of protocol, port or any of the other
94 // details in an URL; so we only want the host part. QUrl::host handles
95 // IDNA 2003 (RFC3490) for us, as required by HSTS (RFC6797, section 10).
96 const HostName hostName(host);
97 const auto pos = knownHosts.find(x: hostName);
98 QHstsPolicy::PolicyFlags flags;
99 if (includeSubDomains)
100 flags = QHstsPolicy::IncludeSubDomains;
101
102 const QHstsPolicy newPolicy(expires, flags, hostName.name);
103 if (pos == knownHosts.end()) {
104 // A new, previously unknown host.
105 if (newPolicy.isExpired()) {
106 // Nothing to do at all - we did not know this host previously,
107 // we do not have to - since its policy expired.
108 return;
109 }
110
111 knownHosts.insert(x: {hostName, newPolicy});
112#if QT_CONFIG(settings)
113 if (hstsStore)
114 hstsStore->addToObserved(policy: newPolicy);
115#endif // QT_CONFIG(settings)
116 return;
117 }
118
119 if (newPolicy.isExpired())
120 knownHosts.erase(position: pos);
121 else if (pos->second != newPolicy)
122 pos->second = newPolicy;
123 else
124 return;
125
126#if QT_CONFIG(settings)
127 if (hstsStore)
128 hstsStore->addToObserved(policy: newPolicy);
129#endif // QT_CONFIG(settings)
130}
131
132bool QHstsCache::isKnownHost(const QUrl &url) const
133{
134 if (!url.isValid() || !is_valid_domain_name(host: url.host()))
135 return false;
136
137 /*
138 RFC6797, 8.2. Known HSTS Host Domain Name Matching
139
140 * Superdomain Match
141 If a label-for-label match between an entire Known HSTS Host's
142 domain name and a right-hand portion of the given domain name
143 is found, then this Known HSTS Host's domain name is a
144 superdomain match for the given domain name. There could be
145 multiple superdomain matches for a given domain name.
146 * Congruent Match
147 If a label-for-label match between a Known HSTS Host's domain
148 name and the given domain name is found -- i.e., there are no
149 further labels to compare -- then the given domain name
150 congruently matches this Known HSTS Host.
151
152 We start from the congruent match, and then chop labels and dots and
153 proceed with superdomain match. While RFC6797 recommends to start from
154 superdomain, the result is the same - some valid policy will make a host
155 known.
156 */
157
158 bool superDomainMatch = false;
159 const QString hostNameAsString(url.host());
160 HostName nameToTest(QStringView{hostNameAsString});
161 while (nameToTest.fragment.size()) {
162 auto const pos = knownHosts.find(x: nameToTest);
163 if (pos != knownHosts.end()) {
164 if (pos->second.isExpired()) {
165 knownHosts.erase(position: pos);
166#if QT_CONFIG(settings)
167 if (hstsStore) {
168 // Inform our store that this policy has expired.
169 hstsStore->addToObserved(policy: pos->second);
170 }
171#endif // QT_CONFIG(settings)
172 } else if (!superDomainMatch || pos->second.includesSubDomains()) {
173 return true;
174 }
175 }
176
177 const qsizetype dot = nameToTest.fragment.indexOf(c: u'.');
178 if (dot == -1)
179 break;
180
181 nameToTest.fragment = nameToTest.fragment.mid(pos: dot + 1);
182 superDomainMatch = true;
183 }
184
185 return false;
186}
187
188void QHstsCache::clear()
189{
190 knownHosts.clear();
191}
192
193QList<QHstsPolicy> QHstsCache::policies() const
194{
195 QList<QHstsPolicy> values;
196 values.reserve(asize: int(knownHosts.size()));
197 for (const auto &host : knownHosts)
198 values << host.second;
199 return values;
200}
201
202#if QT_CONFIG(settings)
203void QHstsCache::setStore(QHstsStore *store)
204{
205 // Caller retains ownership of store, which must outlive this cache.
206 if (store != hstsStore) {
207 hstsStore = store;
208
209 if (!hstsStore)
210 return;
211
212 // First we augment our store with the policies we already know about
213 // (and thus the cached policy takes priority over whatever policy we
214 // had in the store for the same host, if any).
215 if (knownHosts.size()) {
216 const QList<QHstsPolicy> observed(policies());
217 for (const auto &policy : observed)
218 hstsStore->addToObserved(policy);
219 hstsStore->synchronize();
220 }
221
222 // Now we update the cache with anything we have not observed yet, but
223 // the store knows about (well, it can happen we synchronize again as a
224 // result if some policies managed to expire or if we add a new one
225 // from the store to cache):
226 const QList<QHstsPolicy> restored(store->readPolicies());
227 updateFromPolicies(policies: restored);
228 }
229}
230#endif // QT_CONFIG(settings)
231
232// The parser is quite simple: 'nextToken' knowns exactly what kind of tokens
233// are valid and it will return false if something else was found; then
234// we immediately stop parsing. 'parseDirective' knows how these tokens can
235// be combined into a valid directive and if some weird combination of
236// valid tokens is found - we immediately stop.
237// And finally we call parseDirective again and again until some error found or
238// we have no more bytes in the header.
239
240// The following isXXX functions are based on RFC2616, 2.2 Basic Rules.
241
242static bool isCHAR(int c)
243{
244 // CHAR = <any US-ASCII character (octets 0 - 127)>
245 return c >= 0 && c <= 127;
246}
247
248static bool isCTL(int c)
249{
250 // CTL = <any US-ASCII control character
251 // (octets 0 - 31) and DEL (127)>
252 return (c >= 0 && c <= 31) || c == 127;
253}
254
255
256static bool isLWS(int c)
257{
258 // LWS = [CRLF] 1*( SP | HT )
259 //
260 // CRLF = CR LF
261 // CR = <US-ASCII CR, carriage return (13)>
262 // LF = <US-ASCII LF, linefeed (10)>
263 // SP = <US-ASCII SP, space (32)>
264 // HT = <US-ASCII HT, horizontal-tab (9)>
265 //
266 // CRLF is handled by the time we parse a header (they were replaced with
267 // spaces). We only have to deal with remaining SP|HT
268 return c == ' ' || c == '\t';
269}
270
271static bool isTEXT(char c)
272{
273 // TEXT = <any OCTET except CTLs,
274 // but including LWS>
275 return !isCTL(c) || isLWS(c);
276}
277
278static bool isSeparator(char c)
279{
280 // separators = "(" | ")" | "<" | ">" | "@"
281 // | "," | ";" | ":" | "\" | <">
282 // | "/" | "[" | "]" | "?" | "="
283 // | "{" | "}" | SP | HT
284 static const char separators[] = "()<>@,;:\\\"/[]?={}";
285 static const char *end = separators + sizeof separators - 1;
286 return isLWS(c) || std::find(first: separators, last: end, val: c) != end;
287}
288
289static QByteArray unescapeMaxAge(const QByteArray &value)
290{
291 if (value.size() < 2 || value[0] != '"')
292 return value;
293
294 Q_ASSERT(value[value.size() - 1] == '"');
295 return value.mid(index: 1, len: value.size() - 2);
296}
297
298static bool isTOKEN(char c)
299{
300 // token = 1*<any CHAR except CTLs or separators>
301 return isCHAR(c) && !isCTL(c) && !isSeparator(c);
302}
303
304/*
305
306RFC6797, 6.1 Strict-Transport-Security HTTP Response Header Field.
307Syntax:
308
309Strict-Tranposrt-Security = "Strict-Transport-Security" ":"
310 [ directive ] *( ";" [ directive ] )
311
312directive = directive-name [ "=" directive-value ]
313directive-name = token
314directive-value = token | quoted-string
315
316RFC 2616, 2.2 Basic Rules.
317
318token = 1*<any CHAR except CTLs or separators>
319quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
320
321
322qdtext = <any TEXT except <">>
323quoted-pair = "\" CHAR
324
325*/
326
327bool QHstsHeaderParser::parse(const QList<QPair<QByteArray, QByteArray>> &headers)
328{
329 for (const auto &h : headers) {
330 // We compare directly because header name was already 'trimmed' for us:
331 if (h.first.compare(a: "Strict-Transport-Security", cs: Qt::CaseInsensitive) == 0) {
332 header = h.second;
333 // RFC6797, 8.1:
334 //
335 // The UA MUST ignore any STS header fields not conforming to the
336 // grammar specified in Section 6.1 ("Strict-Transport-Security HTTP
337 // Response Header Field").
338 //
339 // If a UA receives more than one STS header field in an HTTP
340 // response message over secure transport, then the UA MUST process
341 // only the first such header field.
342 //
343 // We read this as: ignore all invalid headers and take the first valid:
344 if (parseSTSHeader() && maxAgeFound) {
345 expiry = QDateTime::currentDateTimeUtc().addSecs(secs: maxAge);
346 return true;
347 }
348 }
349 }
350
351 // In case it was set by a syntactically correct header (but without
352 // REQUIRED max-age directive):
353 subDomainsFound = false;
354
355 return false;
356}
357
358bool QHstsHeaderParser::parseSTSHeader()
359{
360 expiry = QDateTime();
361 maxAgeFound = false;
362 subDomainsFound = false;
363 maxAge = 0;
364 tokenPos = 0;
365 token.clear();
366
367 while (tokenPos < header.size()) {
368 if (!parseDirective())
369 return false;
370
371 if (token.size() && token != ";") {
372 // After a directive we can only have a ";" or no more tokens.
373 // Invalid syntax.
374 return false;
375 }
376 }
377
378 return true;
379}
380
381bool QHstsHeaderParser::parseDirective()
382{
383 // RFC 6797, 6.1:
384 //
385 // directive = directive-name [ "=" directive-value ]
386 // directive-name = token
387 // directive-value = token | quoted-string
388
389
390 // RFC 2616, 2.2:
391 //
392 // token = 1*<any CHAR except CTLs or separators>
393
394 if (!nextToken())
395 return false;
396
397 if (!token.size()) // No more data, but no error.
398 return true;
399
400 if (token == ";") // That's a weird grammar, but that's what it is.
401 return true;
402
403 if (!isTOKEN(c: token[0])) // Not a valid directive-name.
404 return false;
405
406 const QByteArray directiveName = token;
407 // 2. Try to read "=" or ";".
408 if (!nextToken())
409 return false;
410
411 QByteArray directiveValue;
412 if (token == ";") // No directive-value
413 return processDirective(name: directiveName, value: directiveValue);
414
415 if (token == "=") {
416 // We expect a directive-value now:
417 if (!nextToken() || !token.size())
418 return false;
419 directiveValue = token;
420 } else if (token.size()) {
421 // Invalid syntax:
422 return false;
423 }
424
425 if (!processDirective(name: directiveName, value: directiveValue))
426 return false;
427
428 // Read either ";", or 'end of header', or some invalid token.
429 return nextToken();
430}
431
432bool QHstsHeaderParser::processDirective(const QByteArray &name, const QByteArray &value)
433{
434 Q_ASSERT(name.size());
435 // RFC6797 6.1/3 Directive names are case-insensitive
436 if (name.compare(a: "max-age", cs: Qt::CaseInsensitive) == 0) {
437 // RFC 6797, 6.1.1
438 // The syntax of the max-age directive's REQUIRED value (after
439 // quoted-string unescaping, if necessary) is defined as:
440 //
441 // max-age-value = delta-seconds
442 if (maxAgeFound) {
443 // RFC 6797, 6.1/2:
444 // All directives MUST appear only once in an STS header field.
445 return false;
446 }
447
448 const QByteArray unescapedValue = unescapeMaxAge(value);
449 if (!unescapedValue.size())
450 return false;
451
452 bool ok = false;
453 const qint64 age = unescapedValue.toLongLong(ok: &ok);
454 if (!ok || age < 0)
455 return false;
456
457 maxAge = age;
458 maxAgeFound = true;
459 } else if (name.compare(a: "includesubdomains", cs: Qt::CaseInsensitive) == 0) {
460 // RFC 6797, 6.1.2. The includeSubDomains Directive.
461 // The OPTIONAL "includeSubDomains" directive is a valueless directive.
462
463 if (subDomainsFound) {
464 // RFC 6797, 6.1/2:
465 // All directives MUST appear only once in an STS header field.
466 return false;
467 }
468
469 subDomainsFound = true;
470 } // else we do nothing, skip unknown directives (RFC 6797, 6.1/5)
471
472 return true;
473}
474
475bool QHstsHeaderParser::nextToken()
476{
477 // Returns true if we found a valid token or we have no more data (token is
478 // empty then).
479
480 token.clear();
481
482 // Fortunately enough, by this point qhttpnetworkreply already got rid of
483 // [CRLF] parts, but we can have 1*(SP|HT) yet.
484 while (tokenPos < header.size() && isLWS(c: header[tokenPos]))
485 ++tokenPos;
486
487 if (tokenPos == header.size())
488 return true;
489
490 const char ch = header[tokenPos];
491 if (ch == ';' || ch == '=') {
492 token.append(c: ch);
493 ++tokenPos;
494 return true;
495 }
496
497 // RFC 2616, 2.2.
498 //
499 // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
500 // qdtext = <any TEXT except <">>
501 if (ch == '"') {
502 int last = tokenPos + 1;
503 while (last < header.size()) {
504 if (header[last] == '"') {
505 // The end of a quoted-string.
506 break;
507 } else if (header[last] == '\\') {
508 // quoted-pair = "\" CHAR
509 if (last + 1 < header.size() && isCHAR(c: header[last + 1]))
510 last += 2;
511 else
512 return false;
513 } else {
514 if (!isTEXT(c: header[last]))
515 return false;
516 ++last;
517 }
518 }
519
520 if (last >= header.size()) // no closing '"':
521 return false;
522
523 token = header.mid(index: tokenPos, len: last - tokenPos + 1);
524 tokenPos = last + 1;
525 return true;
526 }
527
528 // RFC 2616, 2.2:
529 //
530 // token = 1*<any CHAR except CTLs or separators>
531 if (!isTOKEN(c: ch))
532 return false;
533
534 int last = tokenPos + 1;
535 while (last < header.size() && isTOKEN(c: header[last]))
536 ++last;
537
538 token = header.mid(index: tokenPos, len: last - tokenPos);
539 tokenPos = last;
540
541 return true;
542}
543
544QT_END_NAMESPACE
545

source code of qtbase/src/network/access/qhsts.cpp