Qt 6.x
The Qt SDK
Loading...
Searching...
No Matches
qurlidna.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// Copyright (C) 2016 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5#include "qurl_p.h"
6
7#include <QtCore/qstringlist.h>
8#include <QtCore/private/qnumeric_p.h>
9#include <QtCore/private/qoffsetstringarray_p.h>
10#include <QtCore/private/qstringiterator_p.h>
11#include <QtCore/private/qunicodetables_p.h>
12
13#include <algorithm>
14
16
17using namespace Qt::StringLiterals;
18
19// needed by the punycode encoder/decoder
20static const uint base = 36;
21static const uint tmin = 1;
22static const uint tmax = 26;
23static const uint skew = 38;
24static const uint damp = 700;
25static const uint initial_bias = 72;
26static const uint initial_n = 128;
27
28static constexpr qsizetype MaxDomainLabelLength = 63;
29
30static inline uint encodeDigit(uint digit)
31{
32 return digit + 22 + 75 * (digit < 26);
33}
34
35static inline uint adapt(uint delta, uint numpoints, bool firsttime)
36{
37 delta /= (firsttime ? damp : 2);
38 delta += (delta / numpoints);
39
40 uint k = 0;
41 for (; delta > ((base - tmin) * tmax) / 2; k += base)
42 delta /= (base - tmin);
43
44 return k + (((base - tmin + 1) * delta) / (delta + skew));
45}
46
47static inline void appendEncode(QString *output, uint delta, uint bias)
48{
49 uint qq;
50 uint k;
51 uint t;
52
53 // insert the variable length delta integer.
54 for (qq = delta, k = base;; k += base) {
55 // stop generating digits when the threshold is
56 // detected.
57 t = (k <= bias) ? tmin : (k >= bias + tmax) ? tmax : k - bias;
58 if (qq < t) break;
59
60 *output += QChar(encodeDigit(t + (qq - t) % (base - t)));
61 qq = (qq - t) / (base - t);
62 }
63
64 *output += QChar(encodeDigit(qq));
65}
66
68{
70 uint delta = 0;
72
73 // Do not try to encode strings that certainly will result in output
74 // that is longer than allowable domain name label length. Note that
75 // non-BMP codepoints are encoded as two QChars.
76 if (in.size() > MaxDomainLabelLength * 2)
77 return;
78
79 int outLen = output->size();
80 output->resize(outLen + in.size());
81
82 QChar *d = output->data() + outLen;
83 bool skipped = false;
84 // copy all basic code points verbatim to output.
85 for (QChar c : in) {
86 if (c.unicode() < 0x80)
87 *d++ = c;
88 else
89 skipped = true;
90 }
91
92 // if there were only basic code points, just return them
93 // directly; don't do any encoding.
94 if (!skipped)
95 return;
96
97 output->truncate(d - output->constData());
98 int copied = output->size() - outLen;
99
100 // h and b now contain the number of basic code points in input.
101 uint b = copied;
102 uint h = copied;
103
104 // if basic code points were copied, add the delimiter character.
105 if (h > 0)
106 *output += u'-';
107
108 // compute the input length in Unicode code points.
109 uint inputLength = 0;
110 for (QStringIterator iter(in); iter.hasNext();) {
111 inputLength++;
112
113 if (iter.next(char32_t(-1)) == char32_t(-1)) {
114 output->truncate(outLen);
115 return; // invalid surrogate pair
116 }
117 }
118
119 // while there are still unprocessed non-basic code points left in
120 // the input string...
121 while (h < inputLength) {
122 // find the character in the input string with the lowest unprocessed value.
123 uint m = std::numeric_limits<uint>::max();
124 for (QStringIterator iter(in); iter.hasNext();) {
125 auto c = iter.nextUnchecked();
126 static_assert(std::numeric_limits<decltype(m)>::max()
127 >= std::numeric_limits<decltype(c)>::max(),
128 "Punycode uint should be able to cover all codepoints");
129 if (c >= n && c < m)
130 m = c;
131 }
132
133 // delta = delta + (m - n) * (h + 1), fail on overflow
134 uint tmp;
135 if (qMulOverflow<uint>(m - n, h + 1, &tmp) || qAddOverflow<uint>(delta, tmp, &delta)) {
136 output->truncate(outLen);
137 return; // punycode_overflow
138 }
139 n = m;
140
141 for (QStringIterator iter(in); iter.hasNext();) {
142 auto c = iter.nextUnchecked();
143
144 // increase delta until we reach the character processed in this iteration;
145 // fail if delta overflows.
146 if (c < n) {
147 if (qAddOverflow<uint>(delta, 1, &delta)) {
148 output->truncate(outLen);
149 return; // punycode_overflow
150 }
151 }
152
153 if (c == n) {
154 appendEncode(output, delta, bias);
155
156 bias = adapt(delta, h + 1, h == b);
157 delta = 0;
158 ++h;
159 }
160 }
161
162 ++delta;
163 ++n;
164 }
165
166 // prepend ACE prefix
167 output->insert(outLen, "xn--"_L1);
168 return;
169}
170
172{
173 uint n = initial_n;
174 uint i = 0;
176
177 // Do not try to decode strings longer than allowable for a domain label.
178 // Non-ASCII strings are not allowed here anyway, so there is no need
179 // to account for surrogates.
180 if (pc.size() > MaxDomainLabelLength)
181 return QString();
182
183 // strip any ACE prefix
184 int start = pc.startsWith("xn--"_L1) ? 4 : 0;
185 if (!start)
186 return pc;
187
188 // find the last delimiter character '-' in the input array. copy
189 // all data before this delimiter directly to the output array.
190 int delimiterPos = pc.lastIndexOf(u'-');
191 auto output = delimiterPos < 4 ? std::u32string()
192 : pc.mid(start, delimiterPos - start).toStdU32String();
193
194 // if a delimiter was found, skip to the position after it;
195 // otherwise start at the front of the input string. everything
196 // before the delimiter is assumed to be basic code points.
197 uint cnt = delimiterPos + 1;
198
199 // loop through the rest of the input string, inserting non-basic
200 // characters into output as we go.
201 while (cnt < (uint) pc.size()) {
202 uint oldi = i;
203 uint w = 1;
204
205 // find the next index for inserting a non-basic character.
206 for (uint k = base; cnt < (uint) pc.size(); k += base) {
207 // grab a character from the punycode input and find its
208 // delta digit (each digit code is part of the
209 // variable-length integer delta)
210 uint digit = pc.at(cnt++).unicode();
211 if (digit - 48 < 10) digit -= 22;
212 else if (digit - 65 < 26) digit -= 65;
213 else if (digit - 97 < 26) digit -= 97;
214 else digit = base;
215
216 // Fail if the code point has no digit value
217 if (digit >= base)
218 return QString();
219
220 // i = i + digit * w, fail on overflow
221 uint tmp;
222 if (qMulOverflow<uint>(digit, w, &tmp) || qAddOverflow<uint>(i, tmp, &i))
223 return QString();
224
225 // detect threshold to stop reading delta digits
226 uint t;
227 if (k <= bias) t = tmin;
228 else if (k >= bias + tmax) t = tmax;
229 else t = k - bias;
230
231 if (digit < t) break;
232
233 // w = w * (base - t), fail on overflow
234 if (qMulOverflow<uint>(w, base - t, &w))
235 return QString();
236 }
237
238 // find new bias and calculate the next non-basic code
239 // character.
240 uint outputLength = static_cast<uint>(output.length());
241 bias = adapt(i - oldi, outputLength + 1, oldi == 0);
242
243 // n = n + i div (length(output) + 1), fail on overflow
244 if (qAddOverflow<uint>(n, i / (outputLength + 1), &n))
245 return QString();
246
247 // allow the deltas to wrap around
248 i %= (outputLength + 1);
249
250 // if n is a basic code point then fail; this should not happen with
251 // correct implementation of Punycode, but check just n case.
252 if (n < initial_n) {
253 // Don't use Q_ASSERT() to avoid possibility of DoS
254 qWarning("Attempt to insert a basic codepoint. Unhandled overflow?");
255 return QString();
256 }
257
258 // Surrogates should normally be rejected later by other IDNA code.
259 // But because of Qt's use of UTF-16 to represent strings the
260 // IDNA code is not able to distinguish characters represented as pairs
261 // of surrogates from normal code points. This is why surrogates are
262 // not allowed here.
263 //
264 // Allowing surrogates would lead to non-unique (after normalization)
265 // encoding of strings with non-BMP characters.
266 //
267 // Punycode that encodes characters outside the Unicode range is also
268 // invalid and is rejected here.
270 return QString();
271
272 // insert the character n at position i
273 output.insert(i, 1, static_cast<char32_t>(n));
274 ++i;
275 }
276
278}
279
280static constexpr auto idn_whitelist = qOffsetStringArray(
281 "ac", "ar", "asia", "at",
282 "biz", "br",
283 "cat", "ch", "cl", "cn", "com",
284 "de", "dk",
285 "es",
286 "fi",
287 "gr",
288 "hu",
289 "il", "info", "io", "is", "ir",
290 "jp",
291 "kr",
292 "li", "lt", "lu", "lv",
293 "museum",
294 "name", "net", "no", "nu", "nz",
295 "org",
296 "pl", "pr",
297 "se", "sh",
298 "tel", "th", "tm", "tw",
299 "ua",
300 "vn",
301 "xn--fiqs8s", // China
302 "xn--fiqz9s", // China
303 "xn--fzc2c9e2c", // Sri Lanka
304 "xn--j6w193g", // Hong Kong
305 "xn--kprw13d", // Taiwan
306 "xn--kpry57d", // Taiwan
307 "xn--mgba3a4f16a", // Iran
308 "xn--mgba3a4fra", // Iran
309 "xn--mgbaam7a8h", // UAE
310 "xn--mgbayh7gpa", // Jordan
311 "xn--mgberp4a5d4ar", // Saudi Arabia
312 "xn--ogbpf8fl", // Syria
313 "xn--p1ai", // Russian Federation
314 "xn--wgbh1c", // Egypt
315 "xn--wgbl6a", // Qatar
316 "xn--xkc2al3hye2a" // Sri Lanka
317);
318
319Q_CONSTINIT static QStringList *user_idn_whitelist = nullptr;
320
321static bool lessThan(const QChar *a, int l, const char *c)
322{
323 const auto *uc = reinterpret_cast<const char16_t *>(a);
324 const char16_t *e = uc + l;
325
326 if (!c || *c == 0)
327 return false;
328
329 while (*c) {
330 if (uc == e || *uc != static_cast<unsigned char>(*c))
331 break;
332 ++uc;
333 ++c;
334 }
335 return uc == e ? *c : (*uc < static_cast<unsigned char>(*c));
336}
337
338static bool equal(const QChar *a, int l, const char *b)
339{
340 while (l && a->unicode() && *b) {
341 if (*a != QLatin1Char(*b))
342 return false;
343 ++a;
344 ++b;
345 --l;
346 }
347 return l == 0;
348}
349
350static bool qt_is_idn_enabled(QStringView aceDomain)
351{
352 auto idx = aceDomain.lastIndexOf(u'.');
353 if (idx == -1)
354 return false;
355
356 auto tldString = aceDomain.mid(idx + 1);
357 const auto len = tldString.size();
358
359 const QChar *tld = tldString.constData();
360
362 return user_idn_whitelist->contains(tldString);
363
364 int l = 0;
365 int r = idn_whitelist.count() - 1;
366 int i = (l + r + 1) / 2;
367
368 while (r != l) {
369 if (lessThan(tld, len, idn_whitelist.at(i)))
370 r = i - 1;
371 else
372 l = i;
373 i = (l + r + 1) / 2;
374 }
375 return equal(tld, len, idn_whitelist.at(i));
376}
377
378template<typename C>
379static inline bool isValidInNormalizedAsciiLabel(C c)
380{
381 return c == u'-' || c == u'_' || (c >= u'0' && c <= u'9') || (c >= u'a' && c <= u'z');
382}
383
384template<typename C>
385static inline bool isValidInNormalizedAsciiName(C c)
386{
387 return isValidInNormalizedAsciiLabel(c) || c == u'.';
388}
389
390/*
391 Map domain name according to algorithm in UTS #46, 4.1
392
393 Returns empty string if there are disallowed characters in the input.
394
395 Sets resultIsAscii if the result is known for sure to be all ASCII.
396*/
397static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions options,
398 bool *resultIsAscii)
399{
400 *resultIsAscii = true;
401
402 // Check if the input is already normalized ASCII first and can be returned as is.
403 int i = 0;
404 for (auto c : in) {
405 if (c.unicode() >= 0x80 || !isValidInNormalizedAsciiName(c))
406 break;
407 i++;
408 }
409
410 if (i == in.size())
411 return in;
412
414 result.reserve(in.size());
415 result.append(in.constData(), i);
416 bool allAscii = true;
417
418 for (QStringIterator iter(QStringView(in).sliced(i)); iter.hasNext();) {
419 char32_t uc = iter.next();
420
421 // Fast path for ASCII-only inputs
422 if (Q_LIKELY(uc < 0x80)) {
423 if (uc >= U'A' && uc <= U'Z')
424 uc |= 0x20; // lower-case it
425
427 return {};
428
429 result.append(static_cast<char16_t>(uc));
430 continue;
431 }
432 allAscii = false;
433
435
437 status = options.testFlag(QUrl::AceTransitionalProcessing)
440
441 switch (status) {
443 continue;
445 for (auto c : QChar::fromUcs4(uc))
446 result.append(c);
447 break;
450 break;
452 return {};
453 default:
454 Q_UNREACHABLE();
455 }
456 }
457
458 *resultIsAscii = allAscii;
459 return result;
460}
461
462/*
463 Check the rules for an ASCII label.
464
465 Check the size restriction and that the label does not start or end with dashes.
466
467 The label should be nonempty.
468*/
470{
471 if (label.size() > MaxDomainLabelLength)
472 return false;
473
474 if (label.first() == u'-' || label.last() == u'-')
475 return false;
476
477 return std::all_of(label.begin(), label.end(), isValidInNormalizedAsciiLabel<QChar>);
478}
479
480namespace {
481
482class DomainValidityChecker
483{
484 bool domainNameIsBidi = false;
485 bool hadBidiErrors = false;
486
487 static constexpr char32_t ZWNJ = U'\u200C';
488 static constexpr char32_t ZWJ = U'\u200D';
489
490public:
491 DomainValidityChecker() { }
492 bool checkLabel(const QString &label, QUrl::AceProcessingOptions options);
493
494private:
495 static bool checkContextJRules(QStringView label);
496 static bool checkBidiRules(QStringView label);
497};
498
499} // anonymous namespace
500
501/*
502 Check CONTEXTJ rules according to RFC 5892, appendix A.1 & A.2.
503
504 Rule Set for U+200C (ZWNJ):
505
506 False;
507
508 If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
509
510 If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C
511
512 (Joining_Type:T)*(Joining_Type:{R,D})) Then True;
513
514 Rule Set for U+200D (ZWJ):
515
516 False;
517
518 If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True;
519
520*/
521bool DomainValidityChecker::checkContextJRules(QStringView label)
522{
523 constexpr unsigned char CombiningClassVirama = 9;
524
525 enum class State {
526 Initial,
527 LD_T, // L,D with possible following T*
528 ZWNJ_T, // ZWNJ with possible following T*
529 };
530 State regexpState = State::Initial;
531 bool previousIsVirama = false;
532
533 for (QStringIterator iter(label); iter.hasNext();) {
534 auto ch = iter.next();
535
536 if (ch == ZWJ) {
537 if (!previousIsVirama)
538 return false;
539 regexpState = State::Initial;
540 } else if (ch == ZWNJ) {
541 if (!previousIsVirama && regexpState != State::LD_T)
542 return false;
543 regexpState = previousIsVirama ? State::Initial : State::ZWNJ_T;
544 } else {
545 switch (QChar::joiningType(ch)) {
547 if (regexpState == State::ZWNJ_T)
548 return false;
549 regexpState = State::LD_T;
550 break;
552 regexpState = State::Initial;
553 break;
555 regexpState = State::LD_T;
556 break;
558 break;
559 default:
560 regexpState = State::Initial;
561 break;
562 }
563 }
564
565 previousIsVirama = QChar::combiningClass(ch) == CombiningClassVirama;
566 }
567
568 return regexpState != State::ZWNJ_T;
569}
570
571/*
572 Check if the label conforms to BiDi rule of RFC 5893.
573
574 1. The first character must be a character with Bidi property L, R,
575 or AL. If it has the R or AL property, it is an RTL label; if it
576 has the L property, it is an LTR label.
577
578 2. In an RTL label, only characters with the Bidi properties R, AL,
579 AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
580
581 3. In an RTL label, the end of the label must be a character with
582 Bidi property R, AL, EN, or AN, followed by zero or more
583 characters with Bidi property NSM.
584
585 4. In an RTL label, if an EN is present, no AN may be present, and
586 vice versa.
587
588 5. In an LTR label, only characters with the Bidi properties L, EN,
589 ES, CS, ET, ON, BN, or NSM are allowed.
590
591 6. In an LTR label, the end of the label must be a character with
592 Bidi property L or EN, followed by zero or more characters with
593 Bidi property NSM.
594*/
595bool DomainValidityChecker::checkBidiRules(QStringView label)
596{
597 if (label.isEmpty())
598 return true;
599
601 Q_ASSERT(iter.hasNext());
602
603 char32_t ch = iter.next();
604 bool labelIsRTL = false;
605
606 switch (QChar::direction(ch)) {
607 case QChar::DirL:
608 break;
609 case QChar::DirR:
610 case QChar::DirAL:
611 labelIsRTL = true;
612 break;
613 default:
614 return false;
615 }
616
617 bool tailOk = true;
618 bool labelHasEN = false;
619 bool labelHasAN = false;
620
621 while (iter.hasNext()) {
622 ch = iter.next();
623
624 switch (QChar::direction(ch)) {
625 case QChar::DirR:
626 case QChar::DirAL:
627 if (!labelIsRTL)
628 return false;
629 tailOk = true;
630 break;
631
632 case QChar::DirL:
633 if (labelIsRTL)
634 return false;
635 tailOk = true;
636 break;
637
638 case QChar::DirES:
639 case QChar::DirCS:
640 case QChar::DirET:
641 case QChar::DirON:
642 case QChar::DirBN:
643 tailOk = false;
644 break;
645
646 case QChar::DirNSM:
647 break;
648
649 case QChar::DirAN:
650 if (labelIsRTL) {
651 if (labelHasEN)
652 return false;
653 labelHasAN = true;
654 tailOk = true;
655 } else {
656 return false;
657 }
658 break;
659
660 case QChar::DirEN:
661 if (labelIsRTL) {
662 if (labelHasAN)
663 return false;
664 labelHasEN = true;
665 }
666 tailOk = true;
667 break;
668
669 default:
670 return false;
671 }
672 }
673
674 return tailOk;
675}
676
677/*
678 Check if the given label is valid according to UTS #46 validity criteria.
679
680 NFC check can be skipped if the label was transformed to NFC before calling
681 this function (as optimization).
682
683 The domain name is considered invalid if this function returns false at least
684 once.
685
686 1. The label must be in Unicode Normalization Form NFC.
687 2. If CheckHyphens, the label must not contain a U+002D HYPHEN-MINUS character
688 in both the third and fourth positions.
689 3. If CheckHyphens, the label must neither begin nor end with a U+002D HYPHEN-MINUS character.
690 4. The label must not contain a U+002E ( . ) FULL STOP.
691 5. The label must not begin with a combining mark, that is: General_Category=Mark.
692 6. Each code point in the label must only have certain status values according to Section 5,
693 IDNA Mapping Table:
694 1. For Transitional Processing, each value must be valid.
695 2. For Nontransitional Processing, each value must be either valid or deviation.
696 7. If CheckJoiners, the label must satisfy the ContextJ rules from Appendix A, in The Unicode
697 Code Points and Internationalized Domain Names for Applications (IDNA).
698 8. If CheckBidi, and if the domain name is a Bidi domain name, then the label must satisfy
699 all six of the numbered conditions in RFC 5893, Section 2.
700
701 NOTE: Don't use QStringView for label, so that call to QString::normalized() can avoid
702 memory allocation when there is nothing to normalize.
703*/
704bool DomainValidityChecker::checkLabel(const QString &label, QUrl::AceProcessingOptions options)
705{
706 if (label.isEmpty())
707 return true;
708
709 if (label != label.normalized(QString::NormalizationForm_C))
710 return false;
711
712 if (label.size() >= 4) {
713 // This assumes that the first two characters are in BMP, but that's ok
714 // because non-BMP characters are unlikely to be used for specifying
715 // future extensions.
716 if (label[2] == u'-' && label[3] == u'-')
717 return false;
718 }
719
720 if (label.startsWith(u'-') || label.endsWith(u'-'))
721 return false;
722
723 if (label.contains(u'.'))
724 return false;
725
727 auto c = iter.next();
728
729 if (QChar::isMark(c))
730 return false;
731
732 // As optimization, CONTEXTJ rules check can be skipped if no
733 // ZWJ/ZWNJ characters were found during the first pass.
734 bool hasJoiners = false;
735
736 for (;;) {
737 hasJoiners = hasJoiners || c == ZWNJ || c == ZWJ;
738
739 if (!domainNameIsBidi) {
740 switch (QChar::direction(c)) {
741 case QChar::DirR:
742 case QChar::DirAL:
743 case QChar::DirAN:
744 domainNameIsBidi = true;
745 if (hadBidiErrors)
746 return false;
747 break;
748 default:
749 break;
750 }
751 }
752
753 switch (QUnicodeTables::idnaStatus(c)) {
755 break;
757 if (options.testFlag(QUrl::AceTransitionalProcessing))
758 return false;
759 break;
760 default:
761 return false;
762 }
763
764 if (!iter.hasNext())
765 break;
766 c = iter.next();
767 }
768
769 if (hasJoiners && !checkContextJRules(label))
770 return false;
771
772 hadBidiErrors = hadBidiErrors || !checkBidiRules(label);
773
774 if (domainNameIsBidi && hadBidiErrors)
775 return false;
776
777 return true;
778}
779
780static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot)
781{
782 qsizetype lastIdx = 0;
783 QString aceForm; // this variable is here for caching
784 QString aceResult;
785
786 while (true) {
787 auto idx = normalizedDomain.indexOf(u'.', lastIdx);
788 if (idx == -1)
789 idx = normalizedDomain.size();
790
791 const auto labelLength = idx - lastIdx;
792 if (labelLength == 0) {
793 if (idx == normalizedDomain.size())
794 break;
795 if (dot == ForbidLeadingDot || idx > 0)
796 return {}; // two delimiters in a row -- empty label not allowed
797 } else {
798 const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength);
799 aceForm.clear();
800 qt_punycodeEncoder(label, &aceForm);
801 if (aceForm.isEmpty())
802 return {};
803
804 aceResult.append(aceForm);
805 }
806
807 if (idx == normalizedDomain.size())
808 break;
809
810 lastIdx = idx + 1;
811 aceResult += u'.';
812 }
813
814 return aceResult;
815}
816
817static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot dot,
818 bool *usesPunycode)
819{
820 qsizetype lastIdx = 0;
821 bool hasPunycode = false;
822 *usesPunycode = false;
823
824 while (lastIdx < normalizedDomain.size()) {
825 auto idx = normalizedDomain.indexOf(u'.', lastIdx);
826 if (idx == -1)
827 idx = normalizedDomain.size();
828
829 const auto labelLength = idx - lastIdx;
830 if (labelLength == 0) {
831 if (idx == normalizedDomain.size())
832 break;
833 if (dot == ForbidLeadingDot || idx > 0)
834 return false; // two delimiters in a row -- empty label not allowed
835 } else {
836 const auto label = QStringView(normalizedDomain).sliced(lastIdx, labelLength);
838 return false;
839
840 hasPunycode = hasPunycode || label.startsWith("xn--"_L1);
841 }
842
843 lastIdx = idx + 1;
844 }
845
846 *usesPunycode = hasPunycode;
847 return true;
848}
849
850static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingOptions options)
851{
853 result.reserve(asciiDomain.size());
854 qsizetype lastIdx = 0;
855
856 DomainValidityChecker checker;
857
858 while (true) {
859 auto idx = asciiDomain.indexOf(u'.', lastIdx);
860 if (idx == -1)
861 idx = asciiDomain.size();
862
863 const auto labelLength = idx - lastIdx;
864 if (labelLength == 0) {
865 if (idx == asciiDomain.size())
866 break;
867 } else {
868 const auto label = asciiDomain.sliced(lastIdx, labelLength);
869 const auto unicodeLabel = qt_punycodeDecoder(label);
870
871 if (unicodeLabel.isEmpty())
872 return asciiDomain;
873
874 if (!checker.checkLabel(unicodeLabel, options))
875 return asciiDomain;
876
877 result.append(unicodeLabel);
878 }
879
880 if (idx == asciiDomain.size())
881 break;
882
883 lastIdx = idx + 1;
884 result += u'.';
885 }
886 return result;
887}
888
890 QUrl::AceProcessingOptions options)
891{
892 if (domain.isEmpty())
893 return {};
894
895 bool mappedToAscii;
896 const QString mapped = mapDomainName(domain, options, &mappedToAscii);
897 const QString normalized =
898 mappedToAscii ? mapped : mapped.normalized(QString::NormalizationForm_C);
899
900 if (normalized.isEmpty())
901 return {};
902
903 bool needsCoversionToUnicode;
904 const QString aceResult = mappedToAscii ? normalized : convertToAscii(normalized, dot);
905 if (aceResult.isEmpty() || !checkAsciiDomainName(aceResult, dot, &needsCoversionToUnicode))
906 return {};
907
908 if (op == ToAceOnly || !needsCoversionToUnicode
909 || (!options.testFlag(QUrl::IgnoreIDNWhitelist) && !qt_is_idn_enabled(aceResult))) {
910 return aceResult;
911 }
912
913 return convertToUnicode(aceResult, options);
914}
915
927{
929 return *user_idn_whitelist;
930 static const QStringList list = [] {
932 list.reserve(idn_whitelist.count());
933 int i = 0;
934 while (i < idn_whitelist.count()) {
936 ++i;
937 }
938 return list;
939 }();
940 return list;
941}
942
965{
969}
970
\inmodule QtCore
Definition qchar.h:48
@ LastValidCodePoint
Definition qchar.h:66
Direction direction() const noexcept
Returns the character's direction.
Definition qchar.h:437
static constexpr auto fromUcs4(char32_t c) noexcept
@ DirNSM
Definition qchar.h:347
@ DirON
Definition qchar.h:346
@ DirL
Definition qchar.h:346
@ DirEN
Definition qchar.h:346
@ DirCS
Definition qchar.h:346
@ DirAN
Definition qchar.h:346
@ DirAL
Definition qchar.h:347
@ DirBN
Definition qchar.h:347
@ DirET
Definition qchar.h:346
@ DirR
Definition qchar.h:346
@ DirES
Definition qchar.h:346
constexpr bool isSurrogate() const noexcept
Definition qchar.h:481
constexpr char16_t unicode() const noexcept
Returns the numeric Unicode value of the QChar.
Definition qchar.h:458
@ Joining_Dual
Definition qchar.h:376
@ Joining_Left
Definition qchar.h:378
@ Joining_Right
Definition qchar.h:377
@ Joining_Transparent
Definition qchar.h:379
unsigned char combiningClass() const noexcept
Returns the combining class for the character as defined in the Unicode standard.
Definition qchar.h:439
JoiningType joiningType() const noexcept
Definition qchar.h:438
bool isMark() const noexcept
Returns true if the character is a mark (Mark_* categories); otherwise returns false.
Definition qchar.h:467
void reserve(qsizetype size)
Definition qlist.h:746
\inmodule QtCore
\inmodule QtCore
Definition qstringview.h:76
bool startsWith(QStringView s, Qt::CaseSensitivity cs=Qt::CaseSensitive) const noexcept
constexpr qsizetype size() const noexcept
Returns the size of this string view, in UTF-16 code units (that is, surrogate pairs count as two for...
constexpr QStringView mid(qsizetype pos, qsizetype n=-1) const noexcept
Returns the substring of length length starting at position start in this object.
constexpr QStringView sliced(qsizetype pos) const noexcept
qsizetype lastIndexOf(QChar c, Qt::CaseSensitivity cs=Qt::CaseSensitive) const noexcept
\macro QT_RESTRICTED_CAST_FROM_ASCII
Definition qstring.h:127
std::u32string toStdU32String() const
Definition qstring.h:1345
qsizetype lastIndexOf(QChar c, Qt::CaseSensitivity cs=Qt::CaseSensitive) const noexcept
Definition qstring.h:279
bool startsWith(const QString &s, Qt::CaseSensitivity cs=Qt::CaseSensitive) const
Returns true if the string starts with s; otherwise returns false.
Definition qstring.cpp:5299
void clear()
Clears the contents of the string and makes it null.
Definition qstring.h:1107
qsizetype size() const
Returns the number of characters in this string.
Definition qstring.h:182
QString mid(qsizetype position, qsizetype n=-1) const
Returns a string that contains n characters of this string, starting at the specified position index.
Definition qstring.cpp:5204
const QChar at(qsizetype i) const
Returns the character at the given index position in the string.
Definition qstring.h:1079
bool isEmpty() const
Returns true if the string has no characters; otherwise returns false.
Definition qstring.h:1083
static QString fromStdU32String(const std::u32string &s)
Definition qstring.h:1342
QString sliced(qsizetype pos) const
Definition qstring.h:341
QString & append(QChar c)
Definition qstring.cpp:3227
static QString static QString qsizetype indexOf(QChar c, qsizetype from=0, Qt::CaseSensitivity cs=Qt::CaseSensitive) const
Definition qstring.cpp:4420
@ NormalizationForm_C
Definition qstring.h:548
@ IgnoreIDNWhitelist
Definition qurl.h:252
@ AceTransitionalProcessing
Definition qurl.h:253
static QStringList idnWhitelist()
Definition qurlidna.cpp:926
static void setIdnWhitelist(const QStringList &)
Definition qurlidna.cpp:964
double e
Combined button and popup list for selecting options.
Q_CORE_EXPORT IdnaStatus QT_FASTCALL idnaStatus(char32_t ucs4) noexcept
Q_CORE_EXPORT QStringView QT_FASTCALL idnaMapping(char32_t ucs4) noexcept
#define Q_LIKELY(x)
DBusConnection const char DBusError DBusBusType DBusError return DBusConnection DBusHandleMessageFunction void DBusFreeFunction return DBusConnection return DBusConnection return const char DBusError return DBusConnection DBusMessage dbus_uint32_t return DBusConnection dbus_bool_t DBusConnection DBusAddWatchFunction DBusRemoveWatchFunction DBusWatchToggledFunction void DBusFreeFunction return DBusConnection DBusDispatchStatusFunction void DBusFreeFunction DBusTimeout return DBusTimeout return DBusWatch return DBusWatch unsigned int return DBusError const DBusError return const DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessageIter * iter
#define qWarning
Definition qlogging.h:162
constexpr auto qOffsetStringArray(const char(&...strings)[Nx]) noexcept
GLboolean GLboolean GLboolean b
const GLfloat * m
GLfloat GLfloat GLfloat w
[0]
GLboolean GLboolean GLboolean GLboolean a
[7]
GLboolean r
[2]
GLuint GLsizei const GLchar * label
[43]
GLuint start
GLfloat n
GLfloat GLfloat GLfloat GLfloat h
const GLubyte * c
GLfloat bias
GLenum GLsizei len
GLdouble GLdouble t
Definition qopenglext.h:243
GLuint in
GLint GLenum GLboolean normalized
Definition qopenglext.h:752
GLuint64EXT * result
[6]
static qreal dot(const QPointF &a, const QPointF &b)
#define Q_ASSERT(cond)
Definition qrandom.cpp:47
#define Q_AUTOTEST_EXPORT
ptrdiff_t qsizetype
Definition qtypes.h:70
unsigned int uint
Definition qtypes.h:29
AceLeadingDot
Definition qurl_p.h:30
@ ForbidLeadingDot
Definition qurl_p.h:30
AceOperation
Definition qurl_p.h:31
@ ToAceOnly
Definition qurl_p.h:31
static bool checkAsciiDomainName(const QString &normalizedDomain, AceLeadingDot dot, bool *usesPunycode)
Definition qurlidna.cpp:817
static const uint damp
Definition qurlidna.cpp:24
static const uint initial_bias
Definition qurlidna.cpp:25
QString qt_ACE_do(const QString &domain, AceOperation op, AceLeadingDot dot, QUrl::AceProcessingOptions options)
Definition qurlidna.cpp:889
static const uint tmax
Definition qurlidna.cpp:22
static bool isValidInNormalizedAsciiName(C c)
Definition qurlidna.cpp:385
static bool qt_is_idn_enabled(QStringView aceDomain)
Definition qurlidna.cpp:350
static const uint skew
Definition qurlidna.cpp:23
static constexpr auto idn_whitelist
Definition qurlidna.cpp:280
static constexpr qsizetype MaxDomainLabelLength
Definition qurlidna.cpp:28
static const uint base
Definition qurlidna.cpp:20
static bool validateAsciiLabel(QStringView label)
Definition qurlidna.cpp:469
static bool isValidInNormalizedAsciiLabel(C c)
Definition qurlidna.cpp:379
static void appendEncode(QString *output, uint delta, uint bias)
Definition qurlidna.cpp:47
static Q_CONSTINIT QStringList * user_idn_whitelist
Definition qurlidna.cpp:319
static bool lessThan(const QChar *a, int l, const char *c)
Definition qurlidna.cpp:321
static QString mapDomainName(const QString &in, QUrl::AceProcessingOptions options, bool *resultIsAscii)
Definition qurlidna.cpp:397
static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingOptions options)
Definition qurlidna.cpp:850
static QString convertToAscii(const QString &normalizedDomain, AceLeadingDot dot)
Definition qurlidna.cpp:780
Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc)
Definition qurlidna.cpp:171
static const uint initial_n
Definition qurlidna.cpp:26
Q_AUTOTEST_EXPORT void qt_punycodeEncoder(QStringView in, QString *output)
Definition qurlidna.cpp:67
static bool equal(const QChar *a, int l, const char *b)
Definition qurlidna.cpp:338
static const uint tmin
Definition qurlidna.cpp:21
static uint encodeDigit(uint digit)
Definition qurlidna.cpp:30
static uint adapt(uint delta, uint numpoints, bool firsttime)
Definition qurlidna.cpp:35
QT_BEGIN_NAMESPACE typedef uchar * output
QList< int > list
[14]
\inmodule QtCore \reentrant
Definition qchar.h:17