1/****************************************************************************
2**
3** Copyright (C) 2017 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtNetwork module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qhsts_p.h"
41
42#include "QtCore/private/qipaddress_p.h"
43#include "QtCore/qvector.h"
44#include "QtCore/qlist.h"
45
46#if QT_CONFIG(settings)
47#include "qhstsstore_p.h"
48#endif // QT_CONFIG(settings)
49
50QT_BEGIN_NAMESPACE
51
52static bool is_valid_domain_name(const QString &host)
53{
54 if (!host.size())
55 return false;
56
57 // RFC6797 8.1.1
58 // If the substring matching the host production from the Request-URI
59 // (of the message to which the host responded) syntactically matches
60 //the IP-literal or IPv4address productions from Section 3.2.2 of
61 //[RFC3986], then the UA MUST NOT note this host as a Known HSTS Host.
62 using namespace QIPAddressUtils;
63
64 IPv4Address ipv4Addr = {};
65 if (parseIp4(address&: ipv4Addr, begin: host.constBegin(), end: host.constEnd()))
66 return false;
67
68 IPv6Address ipv6Addr = {};
69 // Unlike parseIp4, parseIp6 returns nullptr if it managed to parse IPv6
70 // address successfully.
71 if (!parseIp6(address&: ipv6Addr, begin: host.constBegin(), end: host.constEnd()))
72 return false;
73
74 // TODO: for now we do not test IPvFuture address, it must be addressed
75 // by introducing parseIpFuture (actually, there is an implementation
76 // in QUrl that can be adopted/modified/moved to QIPAddressUtils).
77 return true;
78}
79
80void QHstsCache::updateFromHeaders(const QList<QPair<QByteArray, QByteArray>> &headers,
81 const QUrl &url)
82{
83 if (!url.isValid())
84 return;
85
86 QHstsHeaderParser parser;
87 if (parser.parse(headers)) {
88 updateKnownHost(hostName: url.host(), expires: parser.expirationDate(), includeSubDomains: parser.includeSubDomains());
89#if QT_CONFIG(settings)
90 if (hstsStore)
91 hstsStore->synchronize();
92#endif // QT_CONFIG(settings)
93 }
94}
95
96void QHstsCache::updateFromPolicies(const QVector<QHstsPolicy> &policies)
97{
98 for (const auto &policy : policies)
99 updateKnownHost(hostName: policy.host(), expires: policy.expiry(), includeSubDomains: policy.includesSubDomains());
100
101#if QT_CONFIG(settings)
102 if (hstsStore && policies.size()) {
103 // These policies are coming either from store or from QNAM's setter
104 // function. As a result we can notice expired or new policies, time
105 // to sync ...
106 hstsStore->synchronize();
107 }
108#endif // QT_CONFIG(settings)
109}
110
111void QHstsCache::updateKnownHost(const QUrl &url, const QDateTime &expires,
112 bool includeSubDomains)
113{
114 if (!url.isValid())
115 return;
116
117 updateKnownHost(hostName: url.host(), expires, includeSubDomains);
118#if QT_CONFIG(settings)
119 if (hstsStore)
120 hstsStore->synchronize();
121#endif // QT_CONFIG(settings)
122}
123
124void QHstsCache::updateKnownHost(const QString &host, const QDateTime &expires,
125 bool includeSubDomains)
126{
127 if (!is_valid_domain_name(host))
128 return;
129
130 // HSTS is a per-host policy, regardless of protocol, port or any of the other
131 // details in an URL; so we only want the host part. QUrl::host handles
132 // IDNA 2003 (RFC3490) for us, as required by HSTS (RFC6797, section 10).
133 const HostName hostName(host);
134 const auto pos = knownHosts.find(x: hostName);
135 QHstsPolicy::PolicyFlags flags;
136 if (includeSubDomains)
137 flags = QHstsPolicy::IncludeSubDomains;
138
139 const QHstsPolicy newPolicy(expires, flags, hostName.name);
140 if (pos == knownHosts.end()) {
141 // A new, previously unknown host.
142 if (newPolicy.isExpired()) {
143 // Nothing to do at all - we did not know this host previously,
144 // we do not have to - since its policy expired.
145 return;
146 }
147
148 knownHosts.insert(x: {hostName, newPolicy});
149#if QT_CONFIG(settings)
150 if (hstsStore)
151 hstsStore->addToObserved(policy: newPolicy);
152#endif // QT_CONFIG(settings)
153 return;
154 }
155
156 if (newPolicy.isExpired())
157 knownHosts.erase(position: pos);
158 else if (pos->second != newPolicy)
159 pos->second = newPolicy;
160 else
161 return;
162
163#if QT_CONFIG(settings)
164 if (hstsStore)
165 hstsStore->addToObserved(policy: newPolicy);
166#endif // QT_CONFIG(settings)
167}
168
169bool QHstsCache::isKnownHost(const QUrl &url) const
170{
171 if (!url.isValid() || !is_valid_domain_name(host: url.host()))
172 return false;
173
174 /*
175 RFC6797, 8.2. Known HSTS Host Domain Name Matching
176
177 * Superdomain Match
178 If a label-for-label match between an entire Known HSTS Host's
179 domain name and a right-hand portion of the given domain name
180 is found, then this Known HSTS Host's domain name is a
181 superdomain match for the given domain name. There could be
182 multiple superdomain matches for a given domain name.
183 * Congruent Match
184 If a label-for-label match between a Known HSTS Host's domain
185 name and the given domain name is found -- i.e., there are no
186 further labels to compare -- then the given domain name
187 congruently matches this Known HSTS Host.
188
189 We start from the congruent match, and then chop labels and dots and
190 proceed with superdomain match. While RFC6797 recommends to start from
191 superdomain, the result is the same - some valid policy will make a host
192 known.
193 */
194
195 bool superDomainMatch = false;
196 const QString hostNameAsString(url.host());
197 HostName nameToTest(static_cast<QStringRef>(&hostNameAsString));
198 while (nameToTest.fragment.size()) {
199 auto const pos = knownHosts.find(x: nameToTest);
200 if (pos != knownHosts.end()) {
201 if (pos->second.isExpired()) {
202 knownHosts.erase(position: pos);
203#if QT_CONFIG(settings)
204 if (hstsStore) {
205 // Inform our store that this policy has expired.
206 hstsStore->addToObserved(policy: pos->second);
207 }
208#endif // QT_CONFIG(settings)
209 } else if (!superDomainMatch || pos->second.includesSubDomains()) {
210 return true;
211 }
212 }
213
214 const int dot = nameToTest.fragment.indexOf(ch: QLatin1Char('.'));
215 if (dot == -1)
216 break;
217
218 nameToTest.fragment = nameToTest.fragment.mid(pos: dot + 1);
219 superDomainMatch = true;
220 }
221
222 return false;
223}
224
225void QHstsCache::clear()
226{
227 knownHosts.clear();
228}
229
230QVector<QHstsPolicy> QHstsCache::policies() const
231{
232 QVector<QHstsPolicy> values;
233 values.reserve(asize: int(knownHosts.size()));
234 for (const auto &host : knownHosts)
235 values << host.second;
236 return values;
237}
238
239#if QT_CONFIG(settings)
240void QHstsCache::setStore(QHstsStore *store)
241{
242 // Caller retains ownership of store, which must outlive this cache.
243 if (store != hstsStore) {
244 hstsStore = store;
245
246 if (!hstsStore)
247 return;
248
249 // First we augment our store with the policies we already know about
250 // (and thus the cached policy takes priority over whatever policy we
251 // had in the store for the same host, if any).
252 if (knownHosts.size()) {
253 const QVector<QHstsPolicy> observed(policies());
254 for (const auto &policy : observed)
255 hstsStore->addToObserved(policy);
256 hstsStore->synchronize();
257 }
258
259 // Now we update the cache with anything we have not observed yet, but
260 // the store knows about (well, it can happen we synchronize again as a
261 // result if some policies managed to expire or if we add a new one
262 // from the store to cache):
263 const QVector<QHstsPolicy> restored(store->readPolicies());
264 updateFromPolicies(policies: restored);
265 }
266}
267#endif // QT_CONFIG(settings)
268
269// The parser is quite simple: 'nextToken' knowns exactly what kind of tokens
270// are valid and it will return false if something else was found; then
271// we immediately stop parsing. 'parseDirective' knows how these tokens can
272// be combined into a valid directive and if some weird combination of
273// valid tokens is found - we immediately stop.
274// And finally we call parseDirective again and again until some error found or
275// we have no more bytes in the header.
276
277// The following isXXX functions are based on RFC2616, 2.2 Basic Rules.
278
279static bool isCHAR(int c)
280{
281 // CHAR = <any US-ASCII character (octets 0 - 127)>
282 return c >= 0 && c <= 127;
283}
284
285static bool isCTL(int c)
286{
287 // CTL = <any US-ASCII control character
288 // (octets 0 - 31) and DEL (127)>
289 return (c >= 0 && c <= 31) || c == 127;
290}
291
292
293static bool isLWS(int c)
294{
295 // LWS = [CRLF] 1*( SP | HT )
296 //
297 // CRLF = CR LF
298 // CR = <US-ASCII CR, carriage return (13)>
299 // LF = <US-ASCII LF, linefeed (10)>
300 // SP = <US-ASCII SP, space (32)>
301 // HT = <US-ASCII HT, horizontal-tab (9)>
302 //
303 // CRLF is handled by the time we parse a header (they were replaced with
304 // spaces). We only have to deal with remaining SP|HT
305 return c == ' ' || c == '\t';
306}
307
308static bool isTEXT(char c)
309{
310 // TEXT = <any OCTET except CTLs,
311 // but including LWS>
312 return !isCTL(c) || isLWS(c);
313}
314
315static bool isSeparator(char c)
316{
317 // separators = "(" | ")" | "<" | ">" | "@"
318 // | "," | ";" | ":" | "\" | <">
319 // | "/" | "[" | "]" | "?" | "="
320 // | "{" | "}" | SP | HT
321 static const char separators[] = "()<>@,;:\\\"/[]?={}";
322 static const char *end = separators + sizeof separators - 1;
323 return isLWS(c) || std::find(first: separators, last: end, val: c) != end;
324}
325
326static QByteArray unescapeMaxAge(const QByteArray &value)
327{
328 if (value.size() < 2 || value[0] != '"')
329 return value;
330
331 Q_ASSERT(value[value.size() - 1] == '"');
332 return value.mid(index: 1, len: value.size() - 2);
333}
334
335static bool isTOKEN(char c)
336{
337 // token = 1*<any CHAR except CTLs or separators>
338 return isCHAR(c) && !isCTL(c) && !isSeparator(c);
339}
340
341/*
342
343RFC6797, 6.1 Strict-Transport-Security HTTP Response Header Field.
344Syntax:
345
346Strict-Tranposrt-Security = "Strict-Transport-Security" ":"
347 [ directive ] *( ";" [ directive ] )
348
349directive = directive-name [ "=" directive-value ]
350directive-name = token
351directive-value = token | quoted-string
352
353RFC 2616, 2.2 Basic Rules.
354
355token = 1*<any CHAR except CTLs or separators>
356quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
357
358
359qdtext = <any TEXT except <">>
360quoted-pair = "\" CHAR
361
362*/
363
364bool QHstsHeaderParser::parse(const QList<QPair<QByteArray, QByteArray>> &headers)
365{
366 for (const auto &h : headers) {
367 // We use '==' since header name was already 'trimmed' for us:
368 if (h.first == "Strict-Transport-Security") {
369 header = h.second;
370 // RFC6797, 8.1:
371 //
372 // The UA MUST ignore any STS header fields not conforming to the
373 // grammar specified in Section 6.1 ("Strict-Transport-Security HTTP
374 // Response Header Field").
375 //
376 // If a UA receives more than one STS header field in an HTTP
377 // response message over secure transport, then the UA MUST process
378 // only the first such header field.
379 //
380 // We read this as: ignore all invalid headers and take the first valid:
381 if (parseSTSHeader() && maxAgeFound) {
382 expiry = QDateTime::currentDateTimeUtc().addSecs(secs: maxAge);
383 return true;
384 }
385 }
386 }
387
388 // In case it was set by a syntactically correct header (but without
389 // REQUIRED max-age directive):
390 subDomainsFound = false;
391
392 return false;
393}
394
395bool QHstsHeaderParser::parseSTSHeader()
396{
397 expiry = QDateTime();
398 maxAgeFound = false;
399 subDomainsFound = false;
400 maxAge = 0;
401 tokenPos = 0;
402 token.clear();
403
404 while (tokenPos < header.size()) {
405 if (!parseDirective())
406 return false;
407
408 if (token.size() && token != ";") {
409 // After a directive we can only have a ";" or no more tokens.
410 // Invalid syntax.
411 return false;
412 }
413 }
414
415 return true;
416}
417
418bool QHstsHeaderParser::parseDirective()
419{
420 // RFC 6797, 6.1:
421 //
422 // directive = directive-name [ "=" directive-value ]
423 // directive-name = token
424 // directive-value = token | quoted-string
425
426
427 // RFC 2616, 2.2:
428 //
429 // token = 1*<any CHAR except CTLs or separators>
430
431 if (!nextToken())
432 return false;
433
434 if (!token.size()) // No more data, but no error.
435 return true;
436
437 if (token == ";") // That's a weird grammar, but that's what it is.
438 return true;
439
440 if (!isTOKEN(c: token[0])) // Not a valid directive-name.
441 return false;
442
443 const QByteArray directiveName = token;
444 // 2. Try to read "=" or ";".
445 if (!nextToken())
446 return false;
447
448 QByteArray directiveValue;
449 if (token == ";") // No directive-value
450 return processDirective(name: directiveName, value: directiveValue);
451
452 if (token == "=") {
453 // We expect a directive-value now:
454 if (!nextToken() || !token.size())
455 return false;
456 directiveValue = token;
457 } else if (token.size()) {
458 // Invalid syntax:
459 return false;
460 }
461
462 if (!processDirective(name: directiveName, value: directiveValue))
463 return false;
464
465 // Read either ";", or 'end of header', or some invalid token.
466 return nextToken();
467}
468
469bool QHstsHeaderParser::processDirective(const QByteArray &name, const QByteArray &value)
470{
471 Q_ASSERT(name.size());
472 // RFC6797 6.1/3 Directive names are case-insensitive
473 if (name.compare(c: "max-age", cs: Qt::CaseInsensitive) == 0) {
474 // RFC 6797, 6.1.1
475 // The syntax of the max-age directive's REQUIRED value (after
476 // quoted-string unescaping, if necessary) is defined as:
477 //
478 // max-age-value = delta-seconds
479 if (maxAgeFound) {
480 // RFC 6797, 6.1/2:
481 // All directives MUST appear only once in an STS header field.
482 return false;
483 }
484
485 const QByteArray unescapedValue = unescapeMaxAge(value);
486 if (!unescapedValue.size())
487 return false;
488
489 bool ok = false;
490 const qint64 age = unescapedValue.toLongLong(ok: &ok);
491 if (!ok || age < 0)
492 return false;
493
494 maxAge = age;
495 maxAgeFound = true;
496 } else if (name.compare(c: "includesubdomains", cs: Qt::CaseInsensitive) == 0) {
497 // RFC 6797, 6.1.2. The includeSubDomains Directive.
498 // The OPTIONAL "includeSubDomains" directive is a valueless directive.
499
500 if (subDomainsFound) {
501 // RFC 6797, 6.1/2:
502 // All directives MUST appear only once in an STS header field.
503 return false;
504 }
505
506 subDomainsFound = true;
507 } // else we do nothing, skip unknown directives (RFC 6797, 6.1/5)
508
509 return true;
510}
511
512bool QHstsHeaderParser::nextToken()
513{
514 // Returns true if we found a valid token or we have no more data (token is
515 // empty then).
516
517 token.clear();
518
519 // Fortunately enough, by this point qhttpnetworkreply already got rid of
520 // [CRLF] parts, but we can have 1*(SP|HT) yet.
521 while (tokenPos < header.size() && isLWS(c: header[tokenPos]))
522 ++tokenPos;
523
524 if (tokenPos == header.size())
525 return true;
526
527 const char ch = header[tokenPos];
528 if (ch == ';' || ch == '=') {
529 token.append(c: ch);
530 ++tokenPos;
531 return true;
532 }
533
534 // RFC 2616, 2.2.
535 //
536 // quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
537 // qdtext = <any TEXT except <">>
538 if (ch == '"') {
539 int last = tokenPos + 1;
540 while (last < header.size()) {
541 if (header[last] == '"') {
542 // The end of a quoted-string.
543 break;
544 } else if (header[last] == '\\') {
545 // quoted-pair = "\" CHAR
546 if (last + 1 < header.size() && isCHAR(c: header[last + 1]))
547 last += 2;
548 else
549 return false;
550 } else {
551 if (!isTEXT(c: header[last]))
552 return false;
553 ++last;
554 }
555 }
556
557 if (last >= header.size()) // no closing '"':
558 return false;
559
560 token = header.mid(index: tokenPos, len: last - tokenPos + 1);
561 tokenPos = last + 1;
562 return true;
563 }
564
565 // RFC 2616, 2.2:
566 //
567 // token = 1*<any CHAR except CTLs or separators>
568 if (!isTOKEN(c: ch))
569 return false;
570
571 int last = tokenPos + 1;
572 while (last < header.size() && isTOKEN(c: header[last]))
573 ++last;
574
575 token = header.mid(index: tokenPos, len: last - tokenPos);
576 tokenPos = last;
577
578 return true;
579}
580
581QT_END_NAMESPACE
582

source code of qtbase/src/network/access/qhsts.cpp