Qt 6.x
The Qt SDK
Loading...
Searching...
No Matches
qstringconverter.cpp
Go to the documentation of this file.
1// Copyright (C) 2020 The Qt Company Ltd.
2// Copyright (C) 2020 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5#include <qstringconverter.h>
6#include <private/qstringconverter_p.h>
7#include "qendian.h"
8
9#include "private/qsimd_p.h"
10#include "private/qstringiterator_p.h"
11#include "private/qtools_p.h"
12#include "qbytearraymatcher.h"
13
14#if QT_CONFIG(icu)
15#include <unicode/ucnv.h>
16#include <unicode/ucnv_cb.h>
17#include <unicode/ucnv_err.h>
18#include <unicode/ustring.h>
19#endif
20
21#ifdef Q_OS_WIN
22#include <qt_windows.h>
23#ifndef QT_BOOTSTRAPPED
24#include <QtCore/qvarlengtharray.h>
25#endif // !QT_BOOTSTRAPPED
26#endif
27
28#if __has_include(<bit>) && __cplusplus > 201703L
29#include <bit>
30#endif
31
33
34using namespace QtMiscUtils;
35
36static_assert(std::is_nothrow_move_constructible_v<QStringEncoder>);
37static_assert(std::is_nothrow_move_assignable_v<QStringEncoder>);
38static_assert(std::is_nothrow_move_constructible_v<QStringDecoder>);
39static_assert(std::is_nothrow_move_assignable_v<QStringDecoder>);
40
41enum { Endian = 0, Data = 1 };
42
43static const uchar utf8bom[] = { 0xef, 0xbb, 0xbf };
44
45#if defined(__SSE2__) || defined(__ARM_NEON__)
46static Q_ALWAYS_INLINE uint qBitScanReverse(unsigned v) noexcept
47{
48#if defined(__cpp_lib_int_pow2) && __cpp_lib_int_pow2 >= 202002L
49 return std::bit_width(v) - 1;
50#else
52 // Now Invert the result: clz will count *down* from the msb to the lsb, so the msb index is 31
53 // and the lsb index is 0. The result for _bit_scan_reverse is expected to be the index when
54 // counting up: msb index is 0 (because it starts there), and the lsb index is 31.
55 result ^= sizeof(unsigned) * 8 - 1;
56 return result;
57#endif
58}
59#endif
60
61#if defined(__SSE2__)
62static inline bool simdEncodeAscii(uchar *&dst, const char16_t *&nextAscii, const char16_t *&src, const char16_t *end)
63{
64 // do sixteen characters at a time
65 for ( ; end - src >= 16; src += 16, dst += 16) {
66# ifdef __AVX2__
67 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
68 __m128i data1 = _mm256_castsi256_si128(data);
69 __m128i data2 = _mm256_extracti128_si256(data, 1);
70# else
71 __m128i data1 = _mm_loadu_si128((const __m128i*)src);
72 __m128i data2 = _mm_loadu_si128(1+(const __m128i*)src);
73# endif
74
75 // check if everything is ASCII
76 // the highest ASCII value is U+007F
77 // Do the packing directly:
78 // The PACKUSWB instruction has packs a signed 16-bit integer to an unsigned 8-bit
79 // with saturation. That is, anything from 0x0100 to 0x7fff is saturated to 0xff,
80 // while all negatives (0x8000 to 0xffff) get saturated to 0x00. To detect non-ASCII,
81 // we simply do a signed greater-than comparison to 0x00. That means we detect NULs as
82 // "non-ASCII", but it's an acceptable compromise.
83 __m128i packed = _mm_packus_epi16(data1, data2);
84 __m128i nonAscii = _mm_cmpgt_epi8(packed, _mm_setzero_si128());
85
86 // store, even if there are non-ASCII characters here
87 _mm_storeu_si128((__m128i*)dst, packed);
88
89 // n will contain 1 bit set per character in [data1, data2] that is non-ASCII (or NUL)
90 ushort n = ~_mm_movemask_epi8(nonAscii);
91 if (n) {
92 // find the next probable ASCII character
93 // we don't want to load 32 bytes again in this loop if we know there are non-ASCII
94 // characters still coming
95 nextAscii = src + qBitScanReverse(n) + 1;
96
98 dst += n;
99 src += n;
100 return false;
101 }
102 }
103
104 if (end - src >= 8) {
105 // do eight characters at a time
106 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
107 __m128i packed = _mm_packus_epi16(data, data);
108 __m128i nonAscii = _mm_cmpgt_epi8(packed, _mm_setzero_si128());
109
110 // store even non-ASCII
111 _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), packed);
112
113 uchar n = ~_mm_movemask_epi8(nonAscii);
114 if (n) {
115 nextAscii = src + qBitScanReverse(n) + 1;
117 dst += n;
118 src += n;
119 return false;
120 }
121 }
122
123 return src == end;
124}
125
126static inline bool simdDecodeAscii(char16_t *&dst, const uchar *&nextAscii, const uchar *&src, const uchar *end)
127{
128 // do sixteen characters at a time
129 for ( ; end - src >= 16; src += 16, dst += 16) {
130 __m128i data = _mm_loadu_si128((const __m128i*)src);
131
132#ifdef __AVX2__
133 const int BitSpacing = 2;
134 // load and zero extend to an YMM register
135 const __m256i extended = _mm256_cvtepu8_epi16(data);
136
137 uint n = _mm256_movemask_epi8(extended);
138 if (!n) {
139 // store
140 _mm256_storeu_si256((__m256i*)dst, extended);
141 continue;
142 }
143#else
144 const int BitSpacing = 1;
145
146 // check if everything is ASCII
147 // movemask extracts the high bit of every byte, so n is non-zero if something isn't ASCII
148 uint n = _mm_movemask_epi8(data);
149 if (!n) {
150 // unpack
151 _mm_storeu_si128((__m128i*)dst, _mm_unpacklo_epi8(data, _mm_setzero_si128()));
152 _mm_storeu_si128(1+(__m128i*)dst, _mm_unpackhi_epi8(data, _mm_setzero_si128()));
153 continue;
154 }
155#endif
156
157 // copy the front part that is still ASCII
158 while (!(n & 1)) {
159 *dst++ = *src++;
160 n >>= BitSpacing;
161 }
162
163 // find the next probable ASCII character
164 // we don't want to load 16 bytes again in this loop if we know there are non-ASCII
165 // characters still coming
166 n = qBitScanReverse(n);
167 nextAscii = src + (n / BitSpacing) + 1;
168 return false;
169
170 }
171
172 if (end - src >= 8) {
173 __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src));
174 uint n = _mm_movemask_epi8(data) & 0xff;
175 if (!n) {
176 // unpack and store
177 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), _mm_unpacklo_epi8(data, _mm_setzero_si128()));
178 } else {
179 while (!(n & 1)) {
180 *dst++ = *src++;
181 n >>= 1;
182 }
183
184 n = qBitScanReverse(n);
185 nextAscii = src + n + 1;
186 return false;
187 }
188 }
189
190 return src == end;
191}
192
193static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii)
194{
195#ifdef __AVX2__
196 // do 32 characters at a time
197 // (this is similar to simdTestMask in qstring.cpp)
198 const __m256i mask = _mm256_set1_epi8(char(0x80));
199 for ( ; end - src >= 32; src += 32) {
200 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
201 if (_mm256_testz_si256(mask, data))
202 continue;
203
204 uint n = _mm256_movemask_epi8(data);
205 Q_ASSUME(n);
206
207 // find the next probable ASCII character
208 // we don't want to load 32 bytes again in this loop if we know there are non-ASCII
209 // characters still coming
210 nextAscii = src + qBitScanReverse(n) + 1;
211
212 // return the non-ASCII character
213 return src + qCountTrailingZeroBits(n);
214 }
215#endif
216
217 // do sixteen characters at a time
218 for ( ; end - src >= 16; src += 16) {
219 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src));
220
221 // check if everything is ASCII
222 // movemask extracts the high bit of every byte, so n is non-zero if something isn't ASCII
223 uint n = _mm_movemask_epi8(data);
224 if (!n)
225 continue;
226
227 // find the next probable ASCII character
228 // we don't want to load 16 bytes again in this loop if we know there are non-ASCII
229 // characters still coming
230 nextAscii = src + qBitScanReverse(n) + 1;
231
232 // return the non-ASCII character
233 return src + qCountTrailingZeroBits(n);
234 }
235
236 // do four characters at a time
237 for ( ; end - src >= 4; src += 4) {
238 quint32 data = qFromUnaligned<quint32>(src);
239 data &= 0x80808080U;
240 if (!data)
241 continue;
242
243 // We don't try to guess which of the three bytes is ASCII and which
244 // one isn't. The chance that at least two of them are non-ASCII is
245 // better than 75%.
246 nextAscii = src;
247 return src;
248 }
249 nextAscii = end;
250 return src;
251}
252
253// Compare only the US-ASCII beginning of [src8, end8) and [src16, end16)
254// and advance src8 and src16 to the first character that could not be compared
255static void simdCompareAscii(const qchar8_t *&src8, const qchar8_t *end8, const char16_t *&src16, const char16_t *end16)
256{
257 int bitSpacing = 1;
258 qptrdiff len = qMin(end8 - src8, end16 - src16);
259 qptrdiff offset = 0;
260 uint mask = 0;
261
262 // do sixteen characters at a time
263 for ( ; offset + 16 < len; offset += 16) {
264 __m128i data8 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src8 + offset));
265#ifdef __AVX2__
266 // AVX2 version, use 256-bit registers and VPMOVXZBW
267 __m256i data16 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src16 + offset));
268
269 // expand US-ASCII as if it were Latin1 and confirm it's US-ASCII
270 __m256i datax8 = _mm256_cvtepu8_epi16(data8);
271 mask = _mm256_movemask_epi8(datax8);
272 if (mask)
273 break;
274
275 // compare Latin1 to UTF-16
276 __m256i latin1cmp = _mm256_cmpeq_epi16(datax8, data16);
277 mask = ~_mm256_movemask_epi8(latin1cmp);
278 if (mask)
279 break;
280#else
281 // non-AVX2 code
282 __m128i datalo16 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src16 + offset));
283 __m128i datahi16 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src16 + offset) + 1);
284
285 // expand US-ASCII as if it were Latin1, we'll confirm later
286 __m128i datalo8 = _mm_unpacklo_epi8(data8, _mm_setzero_si128());
287 __m128i datahi8 = _mm_unpackhi_epi8(data8, _mm_setzero_si128());
288
289 // compare Latin1 to UTF-16
290 __m128i latin1cmplo = _mm_cmpeq_epi16(datalo8, datalo16);
291 __m128i latin1cmphi = _mm_cmpeq_epi16(datahi8, datahi16);
292 mask = _mm_movemask_epi8(latin1cmphi) << 16;
293 mask |= ushort(_mm_movemask_epi8(latin1cmplo));
294 mask = ~mask;
295 if (mask)
296 break;
297
298 // confirm it was US-ASCII
299 mask = _mm_movemask_epi8(data8);
300 if (mask) {
301 bitSpacing = 0;
302 break;
303 }
304#endif
305 }
306
307 // helper for comparing 4 or 8 characters
308 auto cmp_lt_16 = [&mask, &offset](int n, __m128i data8, __m128i data16) {
309 // n = 4 -> sizemask = 0xff
310 // n = 8 -> sizemask = 0xffff
311 unsigned sizemask = (1U << (2 * n)) - 1;
312
313 // expand as if Latin1
314 data8 = _mm_unpacklo_epi8(data8, _mm_setzero_si128());
315
316 // compare and confirm it's US-ASCII
317 __m128i latin1cmp = _mm_cmpeq_epi16(data8, data16);
318 mask = ~_mm_movemask_epi8(latin1cmp) & sizemask;
319 mask |= _mm_movemask_epi8(data8);
320 if (mask == 0)
321 offset += n;
322 };
323
324 // do eight characters at a time
325 if (mask == 0 && offset + 8 < len) {
326 __m128i data8 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src8 + offset));
327 __m128i data16 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src16 + offset));
328 cmp_lt_16(8, data8, data16);
329 }
330
331 // do four characters
332 if (mask == 0 && offset + 4 < len) {
333 __m128i data8 = _mm_cvtsi32_si128(qFromUnaligned<quint32>(src8 + offset));
334 __m128i data16 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src16 + offset));
335 cmp_lt_16(4, data8, data16);
336 }
337
338 // correct the source pointers to point to the first character we couldn't deal with
339 if (mask)
340 offset += qCountTrailingZeroBits(mask) >> bitSpacing;
341 src8 += offset;
342 src16 += offset;
343}
344#elif defined(__ARM_NEON__)
345static inline bool simdEncodeAscii(uchar *&dst, const char16_t *&nextAscii, const char16_t *&src, const char16_t *end)
346{
347 uint16x8_t maxAscii = vdupq_n_u16(0x7f);
348 uint16x8_t mask1 = { 1, 1 << 2, 1 << 4, 1 << 6, 1 << 8, 1 << 10, 1 << 12, 1 << 14 };
349 uint16x8_t mask2 = vshlq_n_u16(mask1, 1);
350
351 // do sixteen characters at a time
352 for ( ; end - src >= 16; src += 16, dst += 16) {
353 // load 2 lanes (or: "load interleaved")
354 uint16x8x2_t in = vld2q_u16(reinterpret_cast<const uint16_t *>(src));
355
356 // check if any of the elements > 0x7f, select 1 bit per element (element 0 -> bit 0, element 1 -> bit 1, etc),
357 // add those together into a scalar, and merge the scalars.
358 uint16_t nonAscii = vaddvq_u16(vandq_u16(vcgtq_u16(in.val[0], maxAscii), mask1))
359 | vaddvq_u16(vandq_u16(vcgtq_u16(in.val[1], maxAscii), mask2));
360
361 // merge the two lanes by shifting the values of the second by 8 and inserting them
362 uint16x8_t out = vsliq_n_u16(in.val[0], in.val[1], 8);
363
364 // store, even if there are non-ASCII characters here
365 vst1q_u8(dst, vreinterpretq_u8_u16(out));
366
367 if (nonAscii) {
368 // find the next probable ASCII character
369 // we don't want to load 32 bytes again in this loop if we know there are non-ASCII
370 // characters still coming
371 nextAscii = src + qBitScanReverse(nonAscii) + 1;
372
373 nonAscii = qCountTrailingZeroBits(nonAscii);
374 dst += nonAscii;
375 src += nonAscii;
376 return false;
377 }
378 }
379 return src == end;
380}
381
382static inline bool simdDecodeAscii(char16_t *&dst, const uchar *&nextAscii, const uchar *&src, const uchar *end)
383{
384 // do eight characters at a time
385 uint8x8_t msb_mask = vdup_n_u8(0x80);
386 uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
387 for ( ; end - src >= 8; src += 8, dst += 8) {
388 uint8x8_t c = vld1_u8(src);
389 uint8_t n = vaddv_u8(vand_u8(vcge_u8(c, msb_mask), add_mask));
390 if (!n) {
391 // store
392 vst1q_u16(reinterpret_cast<uint16_t *>(dst), vmovl_u8(c));
393 continue;
394 }
395
396 // copy the front part that is still ASCII
397 while (!(n & 1)) {
398 *dst++ = *src++;
399 n >>= 1;
400 }
401
402 // find the next probable ASCII character
403 // we don't want to load 16 bytes again in this loop if we know there are non-ASCII
404 // characters still coming
405 n = qBitScanReverse(n);
406 nextAscii = src + n + 1;
407 return false;
408
409 }
410 return src == end;
411}
412
413static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii)
414{
415 // The SIMD code below is untested, so just force an early return until
416 // we've had the time to verify it works.
417 nextAscii = end;
418 return src;
419
420 // do eight characters at a time
421 uint8x8_t msb_mask = vdup_n_u8(0x80);
422 uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
423 for ( ; end - src >= 8; src += 8) {
424 uint8x8_t c = vld1_u8(src);
425 uint8_t n = vaddv_u8(vand_u8(vcge_u8(c, msb_mask), add_mask));
426 if (!n)
427 continue;
428
429 // find the next probable ASCII character
430 // we don't want to load 16 bytes again in this loop if we know there are non-ASCII
431 // characters still coming
432 nextAscii = src + qBitScanReverse(n) + 1;
433
434 // return the non-ASCII character
435 return src + qCountTrailingZeroBits(n);
436 }
437 nextAscii = end;
438 return src;
439}
440
441static void simdCompareAscii(const qchar8_t *&, const qchar8_t *, const char16_t *&, const char16_t *)
442{
443}
444#else
445static inline bool simdEncodeAscii(uchar *, const char16_t *, const char16_t *, const char16_t *)
446{
447 return false;
448}
449
450static inline bool simdDecodeAscii(char16_t *, const uchar *, const uchar *, const uchar *)
451{
452 return false;
453}
454
455static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii)
456{
457 nextAscii = end;
458 return src;
459}
460
461static void simdCompareAscii(const qchar8_t *&, const qchar8_t *, const char16_t *&, const char16_t *)
462{
463}
464#endif
465
466enum { HeaderDone = 1 };
467
469{
470 qsizetype len = in.size();
471
472 // create a QByteArray with the worst case scenario size
474 uchar *dst = reinterpret_cast<uchar *>(const_cast<char *>(result.constData()));
475 const char16_t *src = reinterpret_cast<const char16_t *>(in.data());
476 const char16_t *const end = src + len;
477
478 while (src != end) {
479 const char16_t *nextAscii = end;
480 if (simdEncodeAscii(dst, nextAscii, src, end))
481 break;
482
483 do {
484 char16_t u = *src++;
485 int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(u, dst, src, end);
486 if (res < 0) {
487 // encoding error - append '?'
488 *dst++ = '?';
489 }
490 } while (src < nextAscii);
491 }
492
493 result.truncate(dst - reinterpret_cast<uchar *>(const_cast<char *>(result.constData())));
494 return result;
495}
496
498{
499 QByteArray ba(3*in.size() +3, Qt::Uninitialized);
500 char *end = convertFromUnicode(ba.data(), in, state);
501 ba.truncate(end - ba.data());
502 return ba;
503}
504
506{
508 qsizetype len = in.size();
509 if (!len)
510 return out;
511
512 auto appendReplacementChar = [state](uchar *cursor) -> uchar * {
514 *cursor++ = 0;
515 } else {
516 // QChar::replacement encoded in utf8
517 *cursor++ = 0xef;
518 *cursor++ = 0xbf;
519 *cursor++ = 0xbd;
520 }
521 return cursor;
522 };
523
524 uchar *cursor = reinterpret_cast<uchar *>(out);
525 const char16_t *src = in.utf16();
526 const char16_t *const end = src + len;
527
528 if (!(state->flags & QStringDecoder::Flag::Stateless)) {
529 if (state->remainingChars) {
530 int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(state->state_data[0], cursor, src, end);
531 if (res < 0)
532 cursor = appendReplacementChar(cursor);
533 state->state_data[0] = 0;
534 state->remainingChars = 0;
535 } else if (!(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom) {
536 // append UTF-8 BOM
537 *cursor++ = utf8bom[0];
538 *cursor++ = utf8bom[1];
539 *cursor++ = utf8bom[2];
540 state->internalState |= HeaderDone;
541 }
542 }
543
544 while (src != end) {
545 const char16_t *nextAscii = end;
546 if (simdEncodeAscii(cursor, nextAscii, src, end))
547 break;
548
549 do {
550 char16_t uc = *src++;
551 int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc, cursor, src, end);
552 if (Q_LIKELY(res >= 0))
553 continue;
554
556 // encoding error
557 ++state->invalidChars;
558 cursor = appendReplacementChar(cursor);
559 } else if (res == QUtf8BaseTraits::EndOfString) {
561 ++state->invalidChars;
562 cursor = appendReplacementChar(cursor);
563 } else {
564 state->remainingChars = 1;
565 state->state_data[0] = uc;
566 }
567 return reinterpret_cast<char *>(cursor);
568 }
569 } while (src < nextAscii);
570 }
571
572 return reinterpret_cast<char *>(cursor);
573}
574
576{
577 // ### SIMD-optimize:
578 for (uchar ch : in) {
579 if (ch < 128) {
580 *out++ = ch;
581 } else {
582 // as per https://en.wikipedia.org/wiki/UTF-8#Encoding, 2nd row
583 *out++ = 0b110'0'0000u | (ch >> 6);
584 *out++ = 0b10'00'0000u | (ch & 0b0011'1111);
585 }
586 }
587 return out;
588}
589
591{
592 // UTF-8 to UTF-16 always needs the exact same number of words or less:
593 // UTF-8 UTF-16
594 // 1 byte 1 word
595 // 2 bytes 1 word
596 // 3 bytes 1 word
597 // 4 bytes 2 words (one surrogate pair)
598 // That is, we'll use the full buffer if the input is US-ASCII (1-byte UTF-8),
599 // half the buffer for U+0080-U+07FF text (e.g., Greek, Cyrillic, Arabic) or
600 // non-BMP text, and one third of the buffer for U+0800-U+FFFF text (e.g, CJK).
601 //
602 // The table holds for invalid sequences too: we'll insert one replacement char
603 // per invalid byte.
605 QChar *data = const_cast<QChar*>(result.constData()); // we know we're not shared
606 const QChar *end = convertToUnicode(data, in);
607 result.truncate(end - data);
608 return result;
609}
610
630char16_t *QUtf8::convertToUnicode(char16_t *dst, QByteArrayView in) noexcept
631{
632 const uchar *const start = reinterpret_cast<const uchar *>(in.data());
633 const uchar *src = start;
634 const uchar *end = src + in.size();
635
636 // attempt to do a full decoding in SIMD
637 const uchar *nextAscii = end;
638 if (!simdDecodeAscii(dst, nextAscii, src, end)) {
639 // at least one non-ASCII entry
640 // check if we failed to decode the UTF-8 BOM; if so, skip it
641 if (Q_UNLIKELY(src == start)
642 && end - src >= 3
643 && Q_UNLIKELY(src[0] == utf8bom[0] && src[1] == utf8bom[1] && src[2] == utf8bom[2])) {
644 src += 3;
645 }
646
647 while (src < end) {
648 nextAscii = end;
649 if (simdDecodeAscii(dst, nextAscii, src, end))
650 break;
651
652 do {
653 uchar b = *src++;
654 const qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst, src, end);
655 if (res < 0) {
656 // decoding error
658 }
659 } while (src < nextAscii);
660 }
661 }
662
663 return dst;
664}
665
667{
668 // See above for buffer requirements for stateless decoding. However, that
669 // fails if the state is not empty. The following situations can add to the
670 // requirements:
671 // state contains chars starts with requirement
672 // 1 of 2 bytes valid continuation 0
673 // 2 of 3 bytes same 0
674 // 3 bytes of 4 same +1 (need to insert surrogate pair)
675 // 1 of 2 bytes invalid continuation +1 (need to insert replacement and restart)
676 // 2 of 3 bytes same +1 (same)
677 // 3 of 4 bytes same +1 (same)
678 QString result(in.size() + 1, Qt::Uninitialized);
680 result.truncate(end - result.constData());
681 return result;
682}
683
685{
686 qsizetype len = in.size();
687
689 if (!len)
690 return dst;
691
692
693 char16_t replacement = QChar::ReplacementCharacter;
695 replacement = QChar::Null;
696
698 uchar ch = 0;
699
700 const uchar *src = reinterpret_cast<const uchar *>(in.data());
701 const uchar *end = src + len;
702
704 bool headerdone = state->internalState & HeaderDone || state->flags & QStringConverter::Flag::ConvertInitialBom;
705 if (state->remainingChars || !headerdone) {
706 // handle incoming state first
707 uchar remainingCharsData[4]; // longest UTF-8 sequence possible
708 qsizetype remainingCharsCount = state->remainingChars;
709 qsizetype newCharsToCopy = qMin<qsizetype>(sizeof(remainingCharsData) - remainingCharsCount, end - src);
710
711 memset(remainingCharsData, 0, sizeof(remainingCharsData));
712 memcpy(remainingCharsData, &state->state_data[0], remainingCharsCount);
713 memcpy(remainingCharsData + remainingCharsCount, src, newCharsToCopy);
714
715 const uchar *begin = &remainingCharsData[1];
716 res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(remainingCharsData[0], dst, begin,
717 static_cast<const uchar *>(remainingCharsData) + remainingCharsCount + newCharsToCopy);
719 ++state->invalidChars;
720 *dst++ = replacement;
721 ++src;
722 } else if (res == QUtf8BaseTraits::EndOfString) {
723 // if we got EndOfString again, then there were too few bytes in src;
724 // copy to our state and return
725 state->remainingChars = remainingCharsCount + newCharsToCopy;
726 memcpy(&state->state_data[0], remainingCharsData, state->remainingChars);
727 return dst;
728 } else if (!headerdone) {
729 // eat the UTF-8 BOM
730 if (dst[-1] == 0xfeff)
731 --dst;
732 }
733 state->internalState |= HeaderDone;
734
735 // adjust src now that we have maybe consumed a few chars
736 if (res >= 0) {
737 Q_ASSERT(res > remainingCharsCount);
738 src += res - remainingCharsCount;
739 }
740 }
741 } else if (!(state->flags & QStringConverter::Flag::ConvertInitialBom)) {
742 // stateless, remove initial BOM
743 if (len > 2 && src[0] == utf8bom[0] && src[1] == utf8bom[1] && src[2] == utf8bom[2])
744 // skip BOM
745 src += 3;
746 }
747
748 // main body, stateless decoding
749 res = 0;
750 const uchar *nextAscii = src;
751 while (res >= 0 && src < end) {
752 if (src >= nextAscii && simdDecodeAscii(dst, nextAscii, src, end))
753 break;
754
755 ch = *src++;
756 res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(ch, dst, src, end);
758 res = 0;
759 ++state->invalidChars;
760 *dst++ = replacement;
761 }
762 }
763
765 // unterminated UTF sequence
768 ++state->invalidChars;
769 while (src++ < end) {
771 ++state->invalidChars;
772 }
773 state->remainingChars = 0;
774 } else {
775 --src; // unread the byte in ch
776 state->remainingChars = end - src;
777 memcpy(&state->state_data[0], src, end - src);
778 }
779 } else {
780 state->remainingChars = 0;
781 }
782
783 return dst;
784}
785
787{
788 struct NoOutput {};
789 static void appendUtf16(const NoOutput &, char16_t) {}
790 static void appendUcs4(const NoOutput &, char32_t) {}
791};
792
794{
795 const uchar *src = reinterpret_cast<const uchar *>(in.data());
796 const uchar *end = src + in.size();
797 const uchar *nextAscii = src;
798 bool isValidAscii = true;
799
800 while (src < end) {
801 if (src >= nextAscii)
802 src = simdFindNonAscii(src, end, nextAscii);
803 if (src == end)
804 break;
805
806 do {
807 uchar b = *src++;
808 if ((b & 0x80) == 0)
809 continue;
810
811 isValidAscii = false;
813 const qsizetype res = QUtf8Functions::fromUtf8<QUtf8NoOutputTraits>(b, output, src, end);
814 if (res < 0) {
815 // decoding error
816 return { false, false };
817 }
818 } while (src < nextAscii);
819 }
820
821 return { true, isValidAscii };
822}
823
825{
826 auto src1 = reinterpret_cast<const qchar8_t *>(utf8.data());
827 auto end1 = src1 + utf8.size();
828 auto src2 = reinterpret_cast<const char16_t *>(utf16.data());
829 auto end2 = src2 + utf16.size();
830
831 do {
832 simdCompareAscii(src1, end1, src2, end2);
833
834 if (src1 < end1 && src2 < end2) {
835 char32_t uc1 = *src1++;
836 char32_t uc2 = *src2++;
837
838 if (uc1 >= 0x80) {
839 char32_t *output = &uc1;
840 qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraitsNoAscii>(uc1, output, src1, end1);
841 if (res < 0) {
842 // decoding error
844 }
845
846 // Only decode the UTF-16 surrogate pair if the UTF-8 code point
847 // wasn't US-ASCII (a surrogate cannot match US-ASCII).
848 if (QChar::isHighSurrogate(uc2) && src2 < end2 && QChar::isLowSurrogate(*src2))
849 uc2 = QChar::surrogateToUcs4(uc2, *src2++);
850 }
851 if (cs == Qt::CaseInsensitive) {
852 uc1 = QChar::toCaseFolded(uc1);
853 uc2 = QChar::toCaseFolded(uc2);
854 }
855 if (uc1 != uc2)
856 return int(uc1) - int(uc2);
857 }
858 } while (src1 < end1 && src2 < end2);
859
860 // the shorter string sorts first
861 return (end1 > src1) - int(end2 > src2);
862}
863
865{
866 char32_t uc1 = QChar::Null;
867 auto src1 = reinterpret_cast<const uchar *>(utf8.data());
868 auto end1 = src1 + utf8.size();
869 auto src2 = reinterpret_cast<const uchar *>(s.latin1());
870 auto end2 = src2 + s.size();
871
872 while (src1 < end1 && src2 < end2) {
873 uchar b = *src1++;
874 char32_t *output = &uc1;
875 const qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
876 if (res < 0) {
877 // decoding error
879 }
880
881 char32_t uc2 = *src2++;
882 if (cs == Qt::CaseInsensitive) {
883 uc1 = QChar::toCaseFolded(uc1);
884 uc2 = QChar::toCaseFolded(uc2);
885 }
886 if (uc1 != uc2)
887 return int(uc1) - int(uc2);
888 }
889
890 // the shorter string sorts first
891 return (end1 > src1) - (end2 > src2);
892}
893
895{
896 if (lhs.isEmpty())
897 return qt_lencmp(0, rhs.size());
898
899 if (cs == Qt::CaseSensitive) {
900 const auto l = std::min(lhs.size(), rhs.size());
901 int r = memcmp(lhs.data(), rhs.data(), l);
902 return r ? r : qt_lencmp(lhs.size(), rhs.size());
903 }
904
905 char32_t uc1 = QChar::Null;
906 auto src1 = reinterpret_cast<const uchar *>(lhs.data());
907 auto end1 = src1 + lhs.size();
908 char32_t uc2 = QChar::Null;
909 auto src2 = reinterpret_cast<const uchar *>(rhs.data());
910 auto end2 = src2 + rhs.size();
911
912 while (src1 < end1 && src2 < end2) {
913 uchar b = *src1++;
914 char32_t *output = &uc1;
915 qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
916 if (res < 0) {
917 // decoding error
919 }
920
921 b = *src2++;
922 output = &uc2;
923 res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src2, end2);
924 if (res < 0) {
925 // decoding error
927 }
928
929 uc1 = QChar::toCaseFolded(uc1);
930 uc2 = QChar::toCaseFolded(uc2);
931 if (uc1 != uc2)
932 return int(uc1) - int(uc2);
933 }
934
935 // the shorter string sorts first
936 return (end1 > src1) - (end2 > src2);
937}
938
940{
941 bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
942 qsizetype length = 2 * in.size();
943 if (writeBom)
944 length += 2;
945
947 char *end = convertFromUnicode(d.data(), in, state, endian);
948 Q_ASSERT(end - d.constData() == d.size());
949 Q_UNUSED(end);
950 return d;
951}
952
954{
956 bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
957
958 if (endian == DetectEndianness)
960
961 if (writeBom) {
962 // set them up the BOM
964 if (endian == BigEndianness)
965 qToBigEndian(bom.unicode(), out);
966 else
967 qToLittleEndian(bom.unicode(), out);
968 out += 2;
969 }
970 if (endian == BigEndianness)
971 qToBigEndian<char16_t>(in.data(), in.size(), out);
972 else
973 qToLittleEndian<char16_t>(in.data(), in.size(), out);
974
975 state->remainingChars = 0;
976 state->internalState |= HeaderDone;
977 return out + 2*in.size();
978}
979
981{
982 QString result((in.size() + 1) >> 1, Qt::Uninitialized); // worst case
983 QChar *qch = convertToUnicode(result.data(), in, state, endian);
984 result.truncate(qch - result.constData());
985 return result;
986}
987
989{
990 qsizetype len = in.size();
991 const char *chars = in.data();
992
994
995 if (endian == DetectEndianness)
996 endian = (DataEndianness)state->state_data[Endian];
997
998 const char *end = chars + len;
999
1000 // make sure we can decode at least one char
1001 if (state->remainingChars + len < 2) {
1002 if (len) {
1003 Q_ASSERT(state->remainingChars == 0 && len == 1);
1004 state->remainingChars = 1;
1005 state->state_data[Data] = *chars;
1006 }
1007 return out;
1008 }
1009
1010 bool headerdone = state && state->internalState & HeaderDone;
1012 headerdone = true;
1013
1014 if (!headerdone || state->remainingChars) {
1015 uchar buf;
1016 if (state->remainingChars)
1017 buf = state->state_data[Data];
1018 else
1019 buf = *chars++;
1020
1021 // detect BOM, set endianness
1022 state->internalState |= HeaderDone;
1023 QChar ch(buf, *chars++);
1024 if (endian == DetectEndianness) {
1025 // someone set us up the BOM
1026 if (ch == QChar::ByteOrderSwapped) {
1027 endian = BigEndianness;
1028 } else if (ch == QChar::ByteOrderMark) {
1029 endian = LittleEndianness;
1030 } else {
1032 endian = BigEndianness;
1033 } else {
1034 endian = LittleEndianness;
1035 }
1036 }
1037 }
1038 if (endian == BigEndianness)
1039 ch = QChar::fromUcs2((ch.unicode() >> 8) | ((ch.unicode() & 0xff) << 8));
1040 if (headerdone || ch != QChar::ByteOrderMark)
1041 *out++ = ch;
1042 } else if (endian == DetectEndianness) {
1044 }
1045
1046 qsizetype nPairs = (end - chars) >> 1;
1047 if (endian == BigEndianness)
1048 qFromBigEndian<char16_t>(chars, nPairs, out);
1049 else
1050 qFromLittleEndian<char16_t>(chars, nPairs, out);
1051 out += nPairs;
1052
1053 state->state_data[Endian] = endian;
1054 state->remainingChars = 0;
1055 if ((end - chars) & 1) {
1058 } else {
1059 state->remainingChars = 1;
1060 state->state_data[Data] = *(end - 1);
1061 }
1062 } else {
1063 state->state_data[Data] = 0;
1064 }
1065
1066 return out;
1067}
1068
1070{
1071 bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
1072 qsizetype length = 4*in.size();
1073 if (writeBom)
1074 length += 4;
1076 char *end = convertFromUnicode(ba.data(), in, state, endian);
1077 ba.truncate(end - ba.constData());
1078 return ba;
1079}
1080
1082{
1083 Q_ASSERT(state);
1084
1085 bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
1086 if (endian == DetectEndianness)
1088
1089 if (writeBom) {
1090 // set them up the BOM
1091 if (endian == BigEndianness) {
1092 out[0] = 0;
1093 out[1] = 0;
1094 out[2] = (char)0xfe;
1095 out[3] = (char)0xff;
1096 } else {
1097 out[0] = (char)0xff;
1098 out[1] = (char)0xfe;
1099 out[2] = 0;
1100 out[3] = 0;
1101 }
1102 out += 4;
1103 state->internalState |= HeaderDone;
1104 }
1105
1106 const QChar *uc = in.data();
1107 const QChar *end = in.data() + in.size();
1108 QChar ch;
1109 char32_t ucs4;
1110 if (state->remainingChars == 1) {
1111 auto character = state->state_data[Data];
1112 Q_ASSERT(character <= 0xFFFF);
1113 ch = QChar(character);
1114 // this is ugly, but shortcuts a whole lot of logic that would otherwise be required
1115 state->remainingChars = 0;
1116 goto decode_surrogate;
1117 }
1118
1119 while (uc < end) {
1120 ch = *uc++;
1121 if (Q_LIKELY(!ch.isSurrogate())) {
1122 ucs4 = ch.unicode();
1123 } else if (Q_LIKELY(ch.isHighSurrogate())) {
1124decode_surrogate:
1125 if (uc == end) {
1128 } else {
1129 state->remainingChars = 1;
1130 state->state_data[Data] = ch.unicode();
1131 return out;
1132 }
1133 } else if (uc->isLowSurrogate()) {
1134 ucs4 = QChar::surrogateToUcs4(ch, *uc++);
1135 } else {
1137 }
1138 } else {
1140 }
1141 if (endian == BigEndianness)
1142 qToBigEndian(ucs4, out);
1143 else
1144 qToLittleEndian(ucs4, out);
1145 out += 4;
1146 }
1147
1148 return out;
1149}
1150
1152{
1154 result.resize((in.size() + 7) >> 1); // worst case
1155 QChar *end = convertToUnicode(result.data(), in, state, endian);
1156 result.truncate(end - result.constData());
1157 return result;
1158}
1159
1161{
1162 qsizetype len = in.size();
1163 const char *chars = in.data();
1164
1165 Q_ASSERT(state);
1166 if (endian == DetectEndianness)
1167 endian = (DataEndianness)state->state_data[Endian];
1168
1169 const char *end = chars + len;
1170
1171 uchar tuple[4];
1172 memcpy(tuple, &state->state_data[Data], 4);
1173
1174 // make sure we can decode at least one char
1175 if (state->remainingChars + len < 4) {
1176 if (len) {
1177 while (chars < end) {
1178 tuple[state->remainingChars] = *chars;
1179 ++state->remainingChars;
1180 ++chars;
1181 }
1182 Q_ASSERT(state->remainingChars < 4);
1183 memcpy(&state->state_data[Data], tuple, 4);
1184 }
1185 return out;
1186 }
1187
1188 bool headerdone = state->internalState & HeaderDone;
1190 headerdone = true;
1191
1192 qsizetype num = state->remainingChars;
1193 state->remainingChars = 0;
1194
1195 if (!headerdone || endian == DetectEndianness || num) {
1196 while (num < 4)
1197 tuple[num++] = *chars++;
1198 if (endian == DetectEndianness) {
1199 // someone set us up the BOM?
1200 if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0) {
1201 endian = LittleEndianness;
1202 } else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff) {
1203 endian = BigEndianness;
1205 endian = BigEndianness;
1206 } else {
1207 endian = LittleEndianness;
1208 }
1209 }
1210 char32_t code = (endian == BigEndianness) ? qFromBigEndian<char32_t>(tuple) : qFromLittleEndian<char32_t>(tuple);
1211 if (headerdone || code != QChar::ByteOrderMark) {
1212 if (QChar::requiresSurrogates(code)) {
1213 *out++ = QChar(QChar::highSurrogate(code));
1214 *out++ = QChar(QChar::lowSurrogate(code));
1215 } else {
1216 *out++ = QChar(code);
1217 }
1218 }
1219 num = 0;
1220 } else if (endian == DetectEndianness) {
1222 }
1223 state->state_data[Endian] = endian;
1224 state->internalState |= HeaderDone;
1225
1226 while (chars < end) {
1227 tuple[num++] = *chars++;
1228 if (num == 4) {
1229 char32_t code = (endian == BigEndianness) ? qFromBigEndian<char32_t>(tuple) : qFromLittleEndian<char32_t>(tuple);
1230 for (char16_t c : QChar::fromUcs4(code))
1231 *out++ = c;
1232 num = 0;
1233 }
1234 }
1235
1236 if (num) {
1239 } else {
1240 state->state_data[Endian] = endian;
1241 state->remainingChars = num;
1242 memcpy(&state->state_data[Data], tuple, 4);
1243 }
1244 }
1245
1246 return out;
1247}
1248
1249#if defined(Q_OS_WIN) && !defined(QT_BOOTSTRAPPED)
1250int QLocal8Bit::checkUtf8()
1251{
1252 return GetACP() == CP_UTF8 ? 1 : -1;
1253}
1254
1255static QString convertToUnicodeCharByChar(QByteArrayView in, QStringConverter::State *state)
1256{
1257 qsizetype length = in.size();
1258 const char *chars = in.data();
1259
1260 Q_ASSERT(state);
1261 if (state->flags & QStringConverter::Flag::Stateless) // temporary
1262 state = nullptr;
1263
1264 if (!chars || !length)
1265 return QString();
1266
1267 qsizetype copyLocation = 0;
1268 qsizetype extra = 2;
1269 if (state && state->remainingChars) {
1270 copyLocation = state->remainingChars;
1271 extra += copyLocation;
1272 }
1273 qsizetype newLength = length + extra;
1274 char *mbcs = new char[newLength];
1275 //ensure that we have a NULL terminated string
1276 mbcs[newLength-1] = 0;
1277 mbcs[newLength-2] = 0;
1278 memcpy(&(mbcs[copyLocation]), chars, length);
1279 if (copyLocation) {
1280 //copy the last character from the state
1281 mbcs[0] = (char)state->state_data[0];
1282 state->remainingChars = 0;
1283 }
1284 const char *mb = mbcs;
1285 const char *next = 0;
1286 QString s;
1287 while ((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
1288 wchar_t wc[2] ={0};
1289 int charlength = int(next - mb); // always just a few bytes
1290 int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
1291 if (len>0) {
1292 s.append(QChar(wc[0]));
1293 } else {
1294 int r = GetLastError();
1295 //check if the character being dropped is the last character
1296 if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) {
1297 state->remainingChars = 1;
1298 state->state_data[0] = (char)*mb;
1299 }
1300 }
1301 mb = next;
1302 }
1303 delete [] mbcs;
1304 return s;
1305}
1306
1307
1308QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, QStringConverter::State *state)
1309{
1310 qsizetype length = in.size();
1311
1312 Q_ASSERT(length < INT_MAX); // ### FIXME
1313 const char *mb = in.data();
1314 int mblen = length;
1315
1316 if (!mb || !mblen)
1317 return QString();
1318
1320 int len;
1321 QString sp;
1322 bool prepend = false;
1323 char state_data = 0;
1324 int remainingChars = 0;
1325
1326 //save the current state information
1327 if (state) {
1328 state_data = (char)state->state_data[0];
1329 remainingChars = state->remainingChars;
1330 }
1331
1332 //convert the pending character (if available)
1333 if (state && remainingChars) {
1334 char prev[3] = {0};
1335 prev[0] = state_data;
1336 prev[1] = mb[0];
1337 remainingChars = 0;
1338 len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
1339 prev, 2, wc.data(), wc.length());
1340 if (len) {
1341 sp.append(QChar(wc[0]));
1342 if (mblen == 1) {
1343 state->remainingChars = 0;
1344 return sp;
1345 }
1346 prepend = true;
1347 mb++;
1348 mblen--;
1349 wc[0] = 0;
1350 }
1351 }
1352
1353 while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
1354 mb, mblen, wc.data(), wc.length()))) {
1355 int r = GetLastError();
1356 if (r == ERROR_INSUFFICIENT_BUFFER) {
1357 const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
1358 mb, mblen, 0, 0);
1359 wc.resize(wclen);
1360 } else if (r == ERROR_NO_UNICODE_TRANSLATION) {
1361 //find the last non NULL character
1362 while (mblen > 1 && !(mb[mblen-1]))
1363 mblen--;
1364 //check whether, we hit an invalid character in the middle
1365 if ((mblen <= 1) || (remainingChars && state_data))
1366 return convertToUnicodeCharByChar(in, state);
1367 //Remove the last character and try again...
1368 state_data = mb[mblen-1];
1369 remainingChars = 1;
1370 mblen--;
1371 } else {
1372 // Fail.
1373 qWarning("MultiByteToWideChar: Cannot convert multibyte text");
1374 break;
1375 }
1376 }
1377
1378 if (len <= 0)
1379 return QString();
1380
1381 if (wc[len-1] == 0) // len - 1: we don't want terminator
1382 --len;
1383
1384 //save the new state information
1385 if (state) {
1386 state->state_data[0] = (char)state_data;
1387 state->remainingChars = remainingChars;
1388 }
1389 QString s((QChar*)wc.data(), len);
1390 if (prepend) {
1391 return sp+s;
1392 }
1393 return s;
1394}
1395
1396QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, QStringConverter::State *state)
1397{
1398 const QChar *ch = in.data();
1399 qsizetype uclen = in.size();
1400
1401 Q_ASSERT(uclen < INT_MAX); // ### FIXME
1402 Q_ASSERT(state);
1403 Q_UNUSED(state); // ### Fixme
1404 if (state->flags & QStringConverter::Flag::Stateless) // temporary
1405 state = nullptr;
1406
1407 if (!ch)
1408 return QByteArray();
1409 if (uclen == 0)
1410 return QByteArray("");
1411 BOOL used_def;
1412 QByteArray mb(4096, 0);
1413 int len;
1414 while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen,
1415 mb.data(), mb.size()-1, 0, &used_def)))
1416 {
1417 int r = GetLastError();
1418 if (r == ERROR_INSUFFICIENT_BUFFER) {
1419 mb.resize(1+WideCharToMultiByte(CP_ACP, 0,
1420 (const wchar_t*)ch, uclen,
1421 0, 0, 0, &used_def));
1422 // and try again...
1423 } else {
1424 // Fail. Probably can't happen in fact (dwFlags is 0).
1425#ifndef QT_NO_DEBUG
1426 // Can't use qWarning(), as it'll recurse to handle %ls
1427 fprintf(stderr,
1428 "WideCharToMultiByte: Cannot convert multibyte text (error %d): %ls\n",
1429 r, reinterpret_cast<const wchar_t*>(QString(ch, uclen).utf16()));
1430#endif
1431 break;
1432 }
1433 }
1434 mb.resize(len);
1435 return mb;
1436}
1437#endif
1438
1439void QStringConverter::State::clear() noexcept
1440{
1441 if (clearFn)
1442 clearFn(this);
1443 else
1444 state_data[0] = state_data[1] = state_data[2] = state_data[3] = 0;
1445 remainingChars = 0;
1446 invalidChars = 0;
1447 internalState = 0;
1448}
1449
1450void QStringConverter::State::reset() noexcept
1451{
1452 if (flags & Flag::UsesIcu) {
1453#if QT_CONFIG(icu)
1454 UConverter *converter = static_cast<UConverter *>(d[0]);
1455 if (converter)
1456 ucnv_reset(converter);
1457#else
1458 Q_UNREACHABLE();
1459#endif
1460 } else {
1461 clear();
1462 }
1463}
1464
1466{
1468}
1469
1471{
1473}
1474
1476{
1478}
1479
1481{
1483}
1484
1486{
1488}
1489
1491{
1493}
1494
1496{
1498}
1499
1501{
1503}
1504
1506{
1508}
1509
1511{
1513}
1514
1516{
1518}
1519
1521{
1523}
1524
1526{
1527 Q_ASSERT(state);
1528 if (state->flags & QStringConverter::Flag::Stateless) // temporary
1529 state = nullptr;
1530
1531 const char replacement = (state && state->flags & QStringConverter::Flag::ConvertInvalidToNull) ? 0 : '?';
1532 qsizetype invalid = 0;
1533 for (qsizetype i = 0; i < in.size(); ++i) {
1534 if (in[i] > QChar(0xff)) {
1535 *out = replacement;
1536 ++invalid;
1537 } else {
1538 *out = (char)in[i].cell();
1539 }
1540 ++out;
1541 }
1542 if (state)
1543 state->invalidChars += invalid;
1544 return out;
1545}
1546
1548{
1550 memcpy(out, s.constData(), s.size()*sizeof(QChar));
1551 return out + s.size();
1552}
1553
1555{
1557 memcpy(out, s.constData(), s.size());
1558 return out + s.size();
1559}
1560
1561
1562static qsizetype fromUtf8Len(qsizetype l) { return l + 1; }
1563static qsizetype toUtf8Len(qsizetype l) { return 3*(l + 1); }
1564
1565static qsizetype fromUtf16Len(qsizetype l) { return l/2 + 2; }
1566static qsizetype toUtf16Len(qsizetype l) { return 2*(l + 1); }
1567
1568static qsizetype fromUtf32Len(qsizetype l) { return l/2 + 2; }
1569static qsizetype toUtf32Len(qsizetype l) { return 4*(l + 1); }
1570
1571static qsizetype fromLatin1Len(qsizetype l) { return l + 1; }
1572static qsizetype toLatin1Len(qsizetype l) { return l + 1; }
1573
1574
1575
1702const QStringConverter::Interface QStringConverter::encodingInterfaces[QStringConverter::LastEncoding + 1] =
1703{
1705 { "UTF-16", fromUtf16, fromUtf16Len, toUtf16, toUtf16Len },
1706 { "UTF-16LE", fromUtf16LE, fromUtf16Len, toUtf16LE, toUtf16Len },
1707 { "UTF-16BE", fromUtf16BE, fromUtf16Len, toUtf16BE, toUtf16Len },
1708 { "UTF-32", fromUtf32, fromUtf32Len, toUtf32, toUtf32Len },
1709 { "UTF-32LE", fromUtf32LE, fromUtf32Len, toUtf32LE, toUtf32Len },
1710 { "UTF-32BE", fromUtf32BE, fromUtf32Len, toUtf32BE, toUtf32Len },
1713};
1714
1715// match names case insensitive and skipping '-' and '_'
1716static bool nameMatch(const char *a, const char *b)
1717{
1718 while (*a && *b) {
1719 if (*a == '-' || *a == '_') {
1720 ++a;
1721 continue;
1722 }
1723 if (*b == '-' || *b == '_') {
1724 ++b;
1725 continue;
1726 }
1728 return false;
1729 ++a;
1730 ++b;
1731 }
1732 return !*a && !*b;
1733}
1734
1735
1747#if QT_CONFIG(icu)
1748// only derives from QStringConverter to get access to protected types
1749struct QStringConverterICU : QStringConverter
1750{
1751 static void clear_function(QStringConverterBase::State *state) noexcept
1752 {
1753 ucnv_close(static_cast<UConverter *>(state->d[0]));
1754 state->d[0] = nullptr;
1755 }
1756
1757 static void ensureConverter(QStringConverter::State *state)
1758 {
1759 // old code might reset the state via clear instead of reset
1760 // in that case, the converter has been closed, and we have to reopen it
1761 if (state->d[0] == nullptr)
1762 state->d[0] = createConverterForName(static_cast<const char *>(state->d[1]), state);
1763 }
1764
1766 {
1767 ensureConverter(state);
1768
1769 auto icu_conv = static_cast<UConverter *>(state->d[0]);
1770 UErrorCode err = U_ZERO_ERROR;
1771 auto source = in.data();
1772 auto sourceLimit = in.data() + in.size();
1773
1774 qsizetype length = toLen(in.size());
1775
1776 UChar *target = reinterpret_cast<UChar *>(out);
1777 auto targetLimit = target + length;
1778 // We explicitly clean up anyway, so no need to set flush to true,
1779 // which would just reset the converter.
1780 UBool flush = false;
1781
1782 // If the QStringConverter was moved, the state that we used as a context is stale now.
1783 UConverterToUCallback action;
1784 const void *context;
1785 ucnv_getToUCallBack(icu_conv, &action, &context);
1786 if (context != state)
1787 ucnv_setToUCallBack(icu_conv, action, &state, nullptr, nullptr, &err);
1788
1789 ucnv_toUnicode(icu_conv, &target, targetLimit, &source, sourceLimit, nullptr, flush, &err);
1790 // We did reserve enough space:
1791 Q_ASSERT(err != U_BUFFER_OVERFLOW_ERROR);
1792 if (state->flags.testFlag(QStringConverter::Flag::Stateless)) {
1793 if (auto leftOver = ucnv_toUCountPending(icu_conv, &err)) {
1794 ucnv_reset(icu_conv);
1795 state->invalidChars += leftOver;
1796 }
1797 }
1798 return reinterpret_cast<QChar *>(target);
1799 }
1800
1802 {
1803 ensureConverter(state);
1804 auto icu_conv = static_cast<UConverter *>(state->d[0]);
1805 UErrorCode err = U_ZERO_ERROR;
1806 auto source = reinterpret_cast<const UChar *>(in.data());
1807 auto sourceLimit = reinterpret_cast<const UChar *>(in.data() + in.size());
1808
1809 qsizetype length = UCNV_GET_MAX_BYTES_FOR_STRING(in.size(), ucnv_getMaxCharSize(icu_conv));
1810
1811 char *target = out;
1812 char *targetLimit = out + length;
1813 UBool flush = false;
1814
1815 // If the QStringConverter was moved, the state that we used as a context is stale now.
1816 UConverterFromUCallback action;
1817 const void *context;
1818 ucnv_getFromUCallBack(icu_conv, &action, &context);
1819 if (context != state)
1820 ucnv_setFromUCallBack(icu_conv, action, &state, nullptr, nullptr, &err);
1821
1822 ucnv_fromUnicode(icu_conv, &target, targetLimit, &source, sourceLimit, nullptr, flush, &err);
1823 // We did reserve enough space:
1824 Q_ASSERT(err != U_BUFFER_OVERFLOW_ERROR);
1825 if (state->flags.testFlag(QStringConverter::Flag::Stateless)) {
1826 if (auto leftOver = ucnv_fromUCountPending(icu_conv, &err)) {
1827 ucnv_reset(icu_conv);
1828 state->invalidChars += leftOver;
1829 }
1830 }
1831 return target;
1832 }
1833
1834 Q_DISABLE_COPY_MOVE(QStringConverterICU)
1835
1836 template<qsizetype X>
1837 static qsizetype fromLen(qsizetype inLength)
1838 {
1839 return X * inLength * sizeof(UChar);
1840 }
1841
1842 static qsizetype toLen(qsizetype inLength)
1843 {
1844
1845 /* Assumption: each input char might map to a different codepoint
1846 Each codepoint can take up to 4 bytes == 2 QChar
1847 We can ignore reserving space for a BOM, as only UTF encodings use one
1848 and those are not handled by the ICU converter.
1849 */
1850 return 2 * inLength;
1851 }
1852
1853 static constexpr QStringConverter::Interface forLength[] = {
1854 {"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<1>},
1855 {"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<2>},
1856 {"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<3>},
1857 {"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<4>},
1858 {"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<5>},
1859 {"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<6>},
1860 {"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<7>},
1861 {"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<8>}
1862 };
1863
1864 static UConverter *createConverterForName(const char *name, const State *state)
1865 {
1866 Q_ASSERT(name);
1867 Q_ASSERT(state);
1868 UErrorCode status = U_ZERO_ERROR;
1869 UConverter *conv = ucnv_open(name, &status);
1870 if (status != U_ZERO_ERROR && status != U_AMBIGUOUS_ALIAS_WARNING) {
1871 ucnv_close(conv);
1872 return nullptr;
1873 }
1874
1875 if (state->flags.testFlag(Flag::ConvertInvalidToNull)) {
1876 UErrorCode error = U_ZERO_ERROR;
1877
1878 auto nullToSubstituter = [](const void *context, UConverterToUnicodeArgs *toUArgs,
1879 const char *, int32_t length,
1880 UConverterCallbackReason reason, UErrorCode *err) {
1881 if (reason <= UCNV_IRREGULAR) {
1882 *err = U_ZERO_ERROR;
1883 UChar c = '\0';
1884 ucnv_cbToUWriteUChars(toUArgs, &c, 1, 0, err);
1885 // Recover outer scope's state (which isn't const) from context:
1886 auto state = const_cast<State *>(static_cast<const State *>(context));
1887 state->invalidChars += length;
1888 }
1889 };
1890 ucnv_setToUCallBack(conv, nullToSubstituter, state, nullptr, nullptr, &error);
1891
1892 auto nullFromSubstituter = [](const void *context, UConverterFromUnicodeArgs *fromUArgs,
1893 const UChar *, int32_t length,
1894 UChar32, UConverterCallbackReason reason, UErrorCode *err) {
1895 if (reason <= UCNV_IRREGULAR) {
1896 *err = U_ZERO_ERROR;
1897 const UChar replacement[] = { 0 };
1898 const UChar *stringBegin = std::begin(replacement);
1899 ucnv_cbFromUWriteUChars(fromUArgs, &stringBegin, std::end(replacement), 0, err);
1900 // Recover outer scope's state (which isn't const) from context:
1901 auto state = const_cast<State *>(static_cast<const State *>(context));
1902 state->invalidChars += length;
1903 }
1904 };
1905 ucnv_setFromUCallBack(conv, nullFromSubstituter, state, nullptr, nullptr, &error);
1906 } else {
1907 UErrorCode error = U_ZERO_ERROR;
1908
1909 auto qmarkToSubstituter = [](const void *context, UConverterToUnicodeArgs *toUArgs,
1910 const char *codeUnits,int32_t length,
1911 UConverterCallbackReason reason, UErrorCode *err) {
1912 if (reason <= UCNV_IRREGULAR) {
1913 // Recover outer scope's state (which isn't const) from context:
1914 auto state = const_cast<State *>(static_cast<const State *>(context));
1915 state->invalidChars += length;
1916 }
1917 // use existing ICU callback for logic
1918 UCNV_TO_U_CALLBACK_SUBSTITUTE(nullptr, toUArgs, codeUnits, length, reason, err);
1919
1920 };
1921 ucnv_setToUCallBack(conv, qmarkToSubstituter, state, nullptr, nullptr, &error);
1922
1923 auto qmarkFromSubstituter = [](const void *context, UConverterFromUnicodeArgs *fromUArgs,
1924 const UChar *codeUnits, int32_t length,
1925 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode *err) {
1926 if (reason <= UCNV_IRREGULAR) {
1927 // Recover outer scope's state (which isn't const) from context:
1928 auto state = const_cast<State *>(static_cast<const State *>(context));
1929 state->invalidChars += length;
1930 }
1931 // use existing ICU callback for logic
1932 UCNV_FROM_U_CALLBACK_SUBSTITUTE(nullptr, fromUArgs, codeUnits, length,
1933 codePoint, reason, err);
1934 };
1935 ucnv_setFromUCallBack(conv, qmarkFromSubstituter, state, nullptr, nullptr, &error);
1936 }
1937 return conv;
1938 }
1939
1940 static const QStringConverter::Interface *make_icu_converter(
1942 const char *name)
1943 {
1944 UErrorCode status = U_ZERO_ERROR;
1945 UConverter *conv = createConverterForName(name, state);
1946 if (!conv)
1947 return nullptr;
1948
1949 const char *icuName = ucnv_getName(conv, &status);
1950 // ucnv_getStandardName returns a name which is owned by the library
1951 // we can thus store it in the state without worrying aobut its lifetime
1952 const char *persistentName = ucnv_getStandardName(icuName, "MIME", &status);
1953 if (U_FAILURE(status) || !persistentName) {
1954 status = U_ZERO_ERROR;
1955 persistentName = ucnv_getStandardName(icuName, "IANA", &status);
1956 }
1957 state->d[1] = const_cast<char *>(persistentName);
1958 state->d[0] = conv;
1960 qsizetype maxCharSize = ucnv_getMaxCharSize(conv);
1961 state->clearFn = QStringConverterICU::clear_function;
1962 if (maxCharSize > 8 || maxCharSize < 1) {
1963 qWarning("Encountered unexpected codec \"%s\" which requires >8x space", name);
1964 return nullptr;
1965 } else {
1966 return &forLength[maxCharSize - 1];
1967 }
1968
1969 }
1970
1971};
1972#endif
1973
1978 : iface(nullptr), state(f)
1979{
1980 auto e = encodingForName(name);
1981 if (e)
1982 iface = encodingInterfaces + int(*e);
1983#if QT_CONFIG(icu)
1984 else
1985 iface = QStringConverterICU::make_icu_converter(&state, name);
1986#endif
1987}
1988
1989
1990const char *QStringConverter::name() const noexcept
1991{
1992 if (!iface)
1993 return nullptr;
1995#if QT_CONFIG(icu)
1996 return static_cast<const char*>(state.d[1]);
1997#else
1998 return nullptr;
1999#endif
2000 } else {
2001 return iface->name;
2002 }
2003}
2004
2047std::optional<QStringConverter::Encoding> QStringConverter::encodingForName(const char *name) noexcept
2048{
2049 for (qsizetype i = 0; i < LastEncoding + 1; ++i) {
2050 if (nameMatch(encodingInterfaces[i].name, name))
2052 }
2053 if (nameMatch(name, "latin1"))
2055 return std::nullopt;
2056}
2057
2065std::optional<QStringConverter::Encoding>
2066QStringConverter::encodingForData(QByteArrayView data, char16_t expectedFirstCharacter) noexcept
2067{
2068 // someone set us up the BOM?
2069 qsizetype arraySize = data.size();
2070 if (arraySize > 3) {
2071 char32_t uc = qFromUnaligned<char32_t>(data.data());
2072 if (uc == qToBigEndian(char32_t(QChar::ByteOrderMark)))
2074 if (uc == qToLittleEndian(char32_t(QChar::ByteOrderMark)))
2076 if (expectedFirstCharacter) {
2077 // catch also anything starting with the expected character
2078 if (qToLittleEndian(uc) == expectedFirstCharacter)
2080 else if (qToBigEndian(uc) == expectedFirstCharacter)
2082 }
2083 }
2084
2085 if (arraySize > 2) {
2086 if (memcmp(data.data(), utf8bom, sizeof(utf8bom)) == 0)
2088 }
2089
2090 if (arraySize > 1) {
2091 char16_t uc = qFromUnaligned<char16_t>(data.data());
2092 if (uc == qToBigEndian(char16_t(QChar::ByteOrderMark)))
2094 if (uc == qToLittleEndian(char16_t(QChar::ByteOrderMark)))
2096 if (expectedFirstCharacter) {
2097 // catch also anything starting with the expected character
2098 if (qToLittleEndian(uc) == expectedFirstCharacter)
2100 else if (qToBigEndian(uc) == expectedFirstCharacter)
2102 }
2103 }
2104 return std::nullopt;
2105}
2106
2108{
2109 static constexpr auto metaSearcher = qMakeStaticByteArrayMatcher("meta ");
2110 static constexpr auto charsetSearcher = qMakeStaticByteArrayMatcher("charset=");
2111
2112 QByteArray header = data.first(qMin(data.size(), qsizetype(1024))).toByteArray().toLower();
2113 qsizetype pos = metaSearcher.indexIn(header);
2114 if (pos != -1) {
2115 pos = charsetSearcher.indexIn(header, pos);
2116 if (pos != -1) {
2117 pos += qstrlen("charset=");
2118 if (pos < header.size() && (header.at(pos) == '\"' || header.at(pos) == '\''))
2119 ++pos;
2120
2121 qsizetype pos2 = pos;
2122 // The attribute can be closed with either """, "'", ">" or "/",
2123 // none of which are valid charset characters.
2124 while (++pos2 < header.size()) {
2125 char ch = header.at(pos2);
2126 if (ch == '\"' || ch == '\'' || ch == '>' || ch == '/') {
2127 QByteArray name = header.mid(pos, pos2 - pos);
2128 qsizetype colon = name.indexOf(':');
2129 if (colon > 0)
2130 name = name.left(colon);
2131 name = name.simplified();
2132 if (name == "unicode") // QTBUG-41998, ICU will return UTF-16.
2133 name = QByteArrayLiteral("UTF-8");
2134 if (!name.isEmpty())
2135 return name;
2136 }
2137 }
2138 }
2139 }
2140 return QByteArray();
2141}
2142
2151std::optional<QStringConverter::Encoding> QStringConverter::encodingForHtml(QByteArrayView data)
2152{
2153 // determine charset
2154 std::optional<QStringConverter::Encoding> encoding = encodingForData(data);
2155 if (encoding)
2156 // trust the initial BOM
2157 return encoding;
2158
2160 if (!encodingTag.isEmpty())
2161 return encodingForName(encodingTag);
2162
2163 return Utf8;
2164}
2165
2176{
2177 // determine charset
2178 std::optional<QStringConverter::Encoding> encoding = encodingForData(data);
2179 if (encoding)
2180 // trust the initial BOM
2181 return QStringDecoder(encoding.value());
2182
2184 if (!encodingTag.isEmpty())
2185 return QStringDecoder(encodingTag);
2186
2187 return QStringDecoder(Utf8);
2188}
2189
2190
2195{
2196 return encodingInterfaces[int(e)].name;
2197}
2198
constexpr qsizetype size() const noexcept
constexpr const_pointer data() const noexcept
\inmodule QtCore
Definition qbytearray.h:57
char * data()
\macro QT_NO_CAST_FROM_BYTEARRAY
Definition qbytearray.h:534
qsizetype size() const noexcept
Returns the number of bytes in this byte array.
Definition qbytearray.h:474
const char * constData() const noexcept
Returns a pointer to the const data stored in the byte array.
Definition qbytearray.h:122
void truncate(qsizetype pos)
Truncates the byte array at index position pos.
bool isEmpty() const noexcept
Returns true if the byte array has size 0; otherwise returns false.
Definition qbytearray.h:106
\inmodule QtCore
Definition qchar.h:48
static constexpr QChar fromUcs2(char16_t c) noexcept
Definition qchar.h:98
static constexpr char32_t surrogateToUcs4(char16_t high, char16_t low) noexcept
Converts a UTF16 surrogate pair with the given high and low values to it's UCS-4-encoded code point.
Definition qchar.h:508
@ ReplacementCharacter
Definition qchar.h:59
@ ByteOrderSwapped
Definition qchar.h:62
@ ByteOrderMark
Definition qchar.h:61
@ Null
Definition qchar.h:51
static constexpr bool requiresSurrogates(char32_t ucs4) noexcept
Returns true if the UCS-4-encoded character specified by ucs4 can be split into the high and low part...
Definition qchar.h:504
static constexpr auto fromUcs4(char32_t c) noexcept
static constexpr char16_t highSurrogate(char32_t ucs4) noexcept
Returns the high surrogate part of a UCS-4-encoded code point.
Definition qchar.h:518
constexpr bool isLowSurrogate() const noexcept
Returns true if the QChar is the low part of a UTF16 surrogate (for example if its code point is in r...
Definition qchar.h:480
QChar toCaseFolded() const noexcept
Returns the case folded equivalent of the character.
Definition qchar.h:451
static constexpr char16_t lowSurrogate(char32_t ucs4) noexcept
Returns the low surrogate part of a UCS-4-encoded code point.
Definition qchar.h:522
constexpr bool isHighSurrogate() const noexcept
Returns true if the QChar is the high part of a UTF16 surrogate (for example if its code point is in ...
Definition qchar.h:479
Q_CORE_EXPORT const char * name() const noexcept
Returns the canonical name of the encoding this QStringConverter can encode or decode.
static Q_CORE_EXPORT std::optional< Encoding > encodingForHtml(QByteArrayView data)
Tries to determine the encoding of the HTML in data by looking at leading byte order marks or a chars...
static Q_CORE_EXPORT const char * nameForEncoding(Encoding e)
Returns the canonical name for encoding e.
Encoding
\value Utf8 Create a converter to or from UTF-8 \value Utf16 Create a converter to or from UTF-16.
const Interface * iface
static Q_CORE_EXPORT std::optional< Encoding > encodingForName(const char *name) noexcept
Convert name to the corresponding \l Encoding member, if there is one.
constexpr QStringConverter() noexcept
static Q_CORE_EXPORT std::optional< Encoding > encodingForData(QByteArrayView data, char16_t expectedFirstCharacter=0) noexcept
Returns the encoding for the content of data if it can be determined.
\inmodule QtCore
static Q_CORE_EXPORT QStringDecoder decoderForHtml(QByteArrayView data)
Tries to determine the encoding of the HTML in data by looking at leading byte order marks or a chars...
constexpr QStringDecoder() noexcept
Default constructs an decoder.
\inmodule QtCore
Definition qstringview.h:76
\macro QT_RESTRICTED_CAST_FROM_ASCII
Definition qstring.h:127
qsizetype size() const
Returns the number of characters in this string.
Definition qstring.h:182
QString mid(qsizetype position, qsizetype n=-1) const
Returns a string that contains n characters of this string, starting at the specified position index.
Definition qstring.cpp:5204
QString first(qsizetype n) const
Definition qstring.h:337
QString simplified() const &
Definition qstring.h:384
const QChar at(qsizetype i) const
Returns the character at the given index position in the string.
Definition qstring.h:1079
QString toLower() const &
Definition qstring.h:368
QString left(qsizetype n) const
Returns a substring that contains the n leftmost characters of the string.
Definition qstring.cpp:5161
static QString static QString qsizetype indexOf(QChar c, qsizetype from=0, Qt::CaseSensitivity cs=Qt::CaseSensitive) const
Definition qstring.cpp:4420
@ BigEndian
Definition qsysinfo.h:29
@ ByteOrder
Definition qsysinfo.h:34
b clear()
QCursor cursor
double e
else opt state
[0]
short next
Definition keywords.cpp:445
Combined button and popup list for selecting options.
constexpr int qt_lencmp(qsizetype lhs, qsizetype rhs) noexcept
Definition qtools_p.h:109
constexpr char toAsciiLower(char ch) noexcept
Definition qtools_p.h:87
QTextStream & flush(QTextStream &stream)
Calls QTextStream::flush() on stream and returns stream.
CaseSensitivity
@ CaseInsensitive
@ CaseSensitive
constexpr Initialization Uninitialized
QT_POPCOUNT_RELAXED_CONSTEXPR uint qCountLeadingZeroBits(quint32 v) noexcept
constexpr uint qCountTrailingZeroBits(quint32 v) noexcept
static void * context
#define QByteArrayLiteral(str)
Definition qbytearray.h:52
size_t qstrlen(const char *str)
constexpr QStaticByteArrayMatcher< N > qMakeStaticByteArrayMatcher(const char(&pattern)[N]) noexcept
#define Q_UNLIKELY(x)
#define Q_LIKELY(x)
#define Q_ALWAYS_INLINE
DBusConnection const char DBusError * error
static QString header(const QString &name)
constexpr T qToBigEndian(T source)
Definition qendian.h:172
constexpr T qToLittleEndian(T source)
Definition qendian.h:176
Flags
#define qWarning
Definition qlogging.h:162
constexpr const T & qMin(const T &a, const T &b)
Definition qminmax.h:40
GLboolean GLboolean GLboolean b
GLsizei const GLfloat * v
[13]
GLboolean GLboolean GLboolean GLboolean a
[7]
GLboolean r
[2]
GLuint GLuint end
GLenum GLuint GLenum GLsizei length
GLfloat GLfloat f
GLenum src
GLenum GLenum dst
GLenum GLuint GLenum GLsizei const GLchar * buf
GLenum target
GLbitfield flags
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
GLuint start
GLenum GLuint GLintptr offset
GLuint name
GLint GLint GLint GLint GLint GLint GLint GLbitfield mask
GLfloat n
GLsizei GLsizei GLchar * source
GLuint res
const GLubyte * c
GLenum GLsizei len
GLuint in
GLuint64EXT * result
[6]
GLdouble s
[6]
Definition qopenglext.h:235
GLuint num
GLuint GLenum GLsizei GLsizei GLint GLint GLboolean packed
#define X(name)
#define Q_ASSERT(cond)
Definition qrandom.cpp:47
QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIterator begin(const QRegularExpressionMatchIterator &iterator)
static const uchar utf8bom[]
static QChar * fromUtf32LE(QChar *out, QByteArrayView in, QStringConverter::State *state)
static QChar * fromUtf16LE(QChar *out, QByteArrayView in, QStringConverter::State *state)
static QByteArray parseHtmlMetaForEncoding(QByteArrayView data)
static QChar * fromUtf32BE(QChar *out, QByteArrayView in, QStringConverter::State *state)
static qsizetype toUtf8Len(qsizetype l)
static QChar * fromLocal8Bit(QChar *out, QByteArrayView in, QStringConverter::State *state)
static bool simdDecodeAscii(char16_t *, const uchar *, const uchar *, const uchar *)
static void simdCompareAscii(const qchar8_t *&, const qchar8_t *, const char16_t *&, const char16_t *)
static qsizetype toLatin1Len(qsizetype l)
static const uchar * simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii)
static bool simdEncodeAscii(uchar *, const char16_t *, const char16_t *, const char16_t *)
static QChar * fromUtf32(QChar *out, QByteArrayView in, QStringConverter::State *state)
static char * toUtf32(char *out, QStringView in, QStringConverter::State *state)
static char * toUtf16LE(char *out, QStringView in, QStringConverter::State *state)
static qsizetype fromUtf8Len(qsizetype l)
static char * toLocal8Bit(char *out, QStringView in, QStringConverter::State *state)
static qsizetype toUtf16Len(qsizetype l)
static qsizetype fromLatin1Len(qsizetype l)
static char * toUtf16BE(char *out, QStringView in, QStringConverter::State *state)
static char * toUtf32LE(char *out, QStringView in, QStringConverter::State *state)
static qsizetype fromUtf32Len(qsizetype l)
static bool nameMatch(const char *a, const char *b)
static QChar * fromUtf16BE(QChar *out, QByteArrayView in, QStringConverter::State *state)
static qsizetype toUtf32Len(qsizetype l)
static qsizetype fromUtf16Len(qsizetype l)
@ HeaderDone
static char * toUtf32BE(char *out, QStringView in, QStringConverter::State *state)
@ LittleEndianness
@ DetectEndianness
@ BigEndianness
#define sp
#define Q_UNUSED(x)
unsigned int quint32
Definition qtypes.h:45
unsigned char uchar
Definition qtypes.h:27
ptrdiff_t qptrdiff
Definition qtypes.h:69
ptrdiff_t qsizetype
Definition qtypes.h:70
unsigned int uint
Definition qtypes.h:29
unsigned short ushort
Definition qtypes.h:28
QT_BEGIN_NAMESPACE typedef uchar * output
QByteArray ba
[0]
QTextStream out(stdout)
[7]
QObject::connect nullptr
list prepend("one")
static char16_t * convertToUnicode(char16_t *dst, QLatin1StringView in) noexcept
Definition qstring.cpp:5526
static char * convertFromUnicode(char *out, QStringView in, QStringConverter::State *state) noexcept
static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state)
static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state)
static Q_CORE_EXPORT QString convertToUnicode(QByteArrayView, QStringConverter::State *, DataEndianness=DetectEndianness)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness=DetectEndianness)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness=DetectEndianness)
static QChar * convertToUnicode(QChar *out, QByteArrayView, QStringConverter::State *state, DataEndianness endian)
static const int Error
static const int EndOfString
static void appendUtf16(const NoOutput &, char16_t)
static void appendUcs4(const NoOutput &, char32_t)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView in)
static int compareUtf8(QByteArrayView utf8, QStringView utf16, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
static QChar * convertToUnicode(QChar *buffer, QByteArrayView in) noexcept
static ValidUtf8Result isValidUtf8(QByteArrayView in)
static Q_CORE_EXPORT char * convertFromLatin1(char *out, QLatin1StringView in)