6#include <private/qstringconverter_p.h>
9#include "private/qsimd_p.h"
10#include "private/qstringiterator_p.h"
11#include "private/qtools_p.h"
15#include <unicode/ucnv.h>
16#include <unicode/ucnv_cb.h>
17#include <unicode/ucnv_err.h>
18#include <unicode/ustring.h>
23#ifndef QT_BOOTSTRAPPED
24#include <QtCore/qvarlengtharray.h>
28#if __has_include(<bit>) && __cplusplus > 201703L
36static_assert(std::is_nothrow_move_constructible_v<QStringEncoder>);
37static_assert(std::is_nothrow_move_assignable_v<QStringEncoder>);
38static_assert(std::is_nothrow_move_constructible_v<QStringDecoder>);
39static_assert(std::is_nothrow_move_assignable_v<QStringDecoder>);
45#if defined(__SSE2__) || defined(__ARM_NEON__)
48#if defined(__cpp_lib_int_pow2) && __cpp_lib_int_pow2 >= 202002L
49 return std::bit_width(
v) - 1;
55 result ^=
sizeof(unsigned) * 8 - 1;
67 __m256i
data = _mm256_loadu_si256(
reinterpret_cast<const __m256i *
>(
src));
68 __m128i data1 = _mm256_castsi256_si128(
data);
69 __m128i data2 = _mm256_extracti128_si256(
data, 1);
71 __m128i data1 = _mm_loadu_si128((
const __m128i*)
src);
72 __m128i data2 = _mm_loadu_si128(1+(
const __m128i*)
src);
83 __m128i
packed = _mm_packus_epi16(data1, data2);
84 __m128i nonAscii = _mm_cmpgt_epi8(
packed, _mm_setzero_si128());
90 ushort n = ~_mm_movemask_epi8(nonAscii);
95 nextAscii =
src + qBitScanReverse(
n) + 1;
106 __m128i
data = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(
src));
108 __m128i nonAscii = _mm_cmpgt_epi8(
packed, _mm_setzero_si128());
111 _mm_storel_epi64(
reinterpret_cast<__m128i *
>(
dst),
packed);
113 uchar n = ~_mm_movemask_epi8(nonAscii);
115 nextAscii =
src + qBitScanReverse(
n) + 1;
130 __m128i
data = _mm_loadu_si128((
const __m128i*)
src);
133 const int BitSpacing = 2;
135 const __m256i extended = _mm256_cvtepu8_epi16(
data);
137 uint n = _mm256_movemask_epi8(extended);
140 _mm256_storeu_si256((__m256i*)
dst, extended);
144 const int BitSpacing = 1;
151 _mm_storeu_si128((__m128i*)
dst, _mm_unpacklo_epi8(
data, _mm_setzero_si128()));
152 _mm_storeu_si128(1+(__m128i*)
dst, _mm_unpackhi_epi8(
data, _mm_setzero_si128()));
166 n = qBitScanReverse(
n);
167 nextAscii =
src + (
n / BitSpacing) + 1;
173 __m128i
data = _mm_loadl_epi64(
reinterpret_cast<const __m128i *
>(
src));
177 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(
dst), _mm_unpacklo_epi8(
data, _mm_setzero_si128()));
184 n = qBitScanReverse(
n);
185 nextAscii =
src +
n + 1;
198 const __m256i
mask = _mm256_set1_epi8(
char(0x80));
200 __m256i
data = _mm256_loadu_si256(
reinterpret_cast<const __m256i *
>(
src));
210 nextAscii =
src + qBitScanReverse(
n) + 1;
219 __m128i
data = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(
src));
230 nextAscii =
src + qBitScanReverse(
n) + 1;
264 __m128i data8 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(src8 +
offset));
267 __m256i data16 = _mm256_loadu_si256(
reinterpret_cast<const __m256i *
>(src16 +
offset));
270 __m256i datax8 = _mm256_cvtepu8_epi16(data8);
271 mask = _mm256_movemask_epi8(datax8);
276 __m256i latin1cmp = _mm256_cmpeq_epi16(datax8, data16);
277 mask = ~_mm256_movemask_epi8(latin1cmp);
282 __m128i datalo16 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(src16 +
offset));
283 __m128i datahi16 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(src16 +
offset) + 1);
286 __m128i datalo8 = _mm_unpacklo_epi8(data8, _mm_setzero_si128());
287 __m128i datahi8 = _mm_unpackhi_epi8(data8, _mm_setzero_si128());
290 __m128i latin1cmplo = _mm_cmpeq_epi16(datalo8, datalo16);
291 __m128i latin1cmphi = _mm_cmpeq_epi16(datahi8, datahi16);
292 mask = _mm_movemask_epi8(latin1cmphi) << 16;
293 mask |=
ushort(_mm_movemask_epi8(latin1cmplo));
299 mask = _mm_movemask_epi8(data8);
308 auto cmp_lt_16 = [&
mask, &
offset](
int n, __m128i data8, __m128i data16) {
311 unsigned sizemask = (1U << (2 *
n)) - 1;
314 data8 = _mm_unpacklo_epi8(data8, _mm_setzero_si128());
317 __m128i latin1cmp = _mm_cmpeq_epi16(data8, data16);
318 mask = ~_mm_movemask_epi8(latin1cmp) & sizemask;
319 mask |= _mm_movemask_epi8(data8);
326 __m128i data8 = _mm_loadl_epi64(
reinterpret_cast<const __m128i *
>(src8 +
offset));
327 __m128i data16 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(src16 +
offset));
328 cmp_lt_16(8, data8, data16);
333 __m128i data8 = _mm_cvtsi32_si128(qFromUnaligned<quint32>(src8 +
offset));
334 __m128i data16 = _mm_loadl_epi64(
reinterpret_cast<const __m128i *
>(src16 +
offset));
335 cmp_lt_16(4, data8, data16);
344#elif defined(__ARM_NEON__)
347 uint16x8_t maxAscii = vdupq_n_u16(0x7f);
348 uint16x8_t mask1 = { 1, 1 << 2, 1 << 4, 1 << 6, 1 << 8, 1 << 10, 1 << 12, 1 << 14 };
349 uint16x8_t mask2 = vshlq_n_u16(mask1, 1);
354 uint16x8x2_t
in = vld2q_u16(
reinterpret_cast<const uint16_t *
>(
src));
358 uint16_t nonAscii = vaddvq_u16(vandq_u16(vcgtq_u16(
in.val[0], maxAscii), mask1))
359 | vaddvq_u16(vandq_u16(vcgtq_u16(
in.val[1], maxAscii), mask2));
362 uint16x8_t
out = vsliq_n_u16(
in.val[0],
in.val[1], 8);
365 vst1q_u8(
dst, vreinterpretq_u8_u16(
out));
371 nextAscii =
src + qBitScanReverse(nonAscii) + 1;
385 uint8x8_t msb_mask = vdup_n_u8(0x80);
386 uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
388 uint8x8_t
c = vld1_u8(
src);
389 uint8_t
n = vaddv_u8(vand_u8(vcge_u8(
c, msb_mask), add_mask));
392 vst1q_u16(
reinterpret_cast<uint16_t *
>(
dst), vmovl_u8(
c));
405 n = qBitScanReverse(
n);
406 nextAscii =
src +
n + 1;
421 uint8x8_t msb_mask = vdup_n_u8(0x80);
422 uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
424 uint8x8_t
c = vld1_u8(
src);
425 uint8_t
n = vaddv_u8(vand_u8(vcge_u8(
c, msb_mask), add_mask));
432 nextAscii =
src + qBitScanReverse(
n) + 1;
475 const char16_t *
src =
reinterpret_cast<const char16_t *
>(
in.data());
479 const char16_t *nextAscii =
end;
485 int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(u,
dst,
src,
end);
490 }
while (
src < nextAscii);
525 const char16_t *
src =
in.utf16();
529 if (
state->remainingChars) {
533 state->state_data[0] = 0;
534 state->remainingChars = 0;
545 const char16_t *nextAscii =
end;
550 char16_t uc = *
src++;
551 int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc,
cursor,
src,
end);
557 ++
state->invalidChars;
561 ++
state->invalidChars;
564 state->remainingChars = 1;
565 state->state_data[0] = uc;
567 return reinterpret_cast<char *
>(
cursor);
569 }
while (
src < nextAscii);
572 return reinterpret_cast<char *
>(
cursor);
583 *
out++ = 0b110'0'0000u | (
ch >> 6);
584 *
out++ = 0b10'00'0000u | (
ch & 0b0011'1111);
659 }
while (
src < nextAscii);
705 if (
state->remainingChars || !headerdone) {
707 uchar remainingCharsData[4];
709 qsizetype newCharsToCopy = qMin<qsizetype>(
sizeof(remainingCharsData) - remainingCharsCount,
end -
src);
711 memset(remainingCharsData, 0,
sizeof(remainingCharsData));
712 memcpy(remainingCharsData, &
state->state_data[0], remainingCharsCount);
713 memcpy(remainingCharsData + remainingCharsCount,
src, newCharsToCopy);
716 res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(remainingCharsData[0],
dst,
begin,
717 static_cast<const uchar *
>(remainingCharsData) + remainingCharsCount + newCharsToCopy);
719 ++
state->invalidChars;
720 *
dst++ = replacement;
725 state->remainingChars = remainingCharsCount + newCharsToCopy;
726 memcpy(&
state->state_data[0], remainingCharsData,
state->remainingChars);
728 }
else if (!headerdone) {
730 if (
dst[-1] == 0xfeff)
738 src +=
res - remainingCharsCount;
756 res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(
ch,
dst,
src,
end);
759 ++
state->invalidChars;
760 *
dst++ = replacement;
768 ++
state->invalidChars;
771 ++
state->invalidChars;
773 state->remainingChars = 0;
780 state->remainingChars = 0;
798 bool isValidAscii =
true;
801 if (
src >= nextAscii)
811 isValidAscii =
false;
816 return {
false,
false };
818 }
while (
src < nextAscii);
821 return {
true, isValidAscii };
826 auto src1 =
reinterpret_cast<const qchar8_t *
>(utf8.data());
827 auto end1 = src1 + utf8.size();
828 auto src2 =
reinterpret_cast<const char16_t *
>(utf16.data());
829 auto end2 = src2 + utf16.size();
834 if (src1 < end1 && src2 < end2) {
835 char32_t uc1 = *src1++;
836 char32_t uc2 = *src2++;
840 qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraitsNoAscii>(uc1,
output, src1, end1);
856 return int(uc1) - int(uc2);
858 }
while (src1 < end1 && src2 < end2);
861 return (end1 > src1) - int(end2 > src2);
867 auto src1 =
reinterpret_cast<const uchar *
>(utf8.
data());
868 auto end1 = src1 + utf8.
size();
869 auto src2 =
reinterpret_cast<const uchar *
>(
s.latin1());
870 auto end2 = src2 +
s.size();
872 while (src1 < end1 && src2 < end2) {
881 char32_t uc2 = *src2++;
887 return int(uc1) - int(uc2);
891 return (end1 > src1) - (end2 > src2);
900 const auto l = std::min(lhs.size(), rhs.size());
901 int r = memcmp(lhs.data(), rhs.data(), l);
906 auto src1 =
reinterpret_cast<const uchar *
>(lhs.data());
907 auto end1 = src1 + lhs.size();
909 auto src2 =
reinterpret_cast<const uchar *
>(rhs.data());
910 auto end2 = src2 + rhs.size();
912 while (src1 < end1 && src2 < end2) {
923 res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(
b,
output, src2, end2);
932 return int(uc1) - int(uc2);
936 return (end1 > src1) - (end2 > src2);
971 qToBigEndian<char16_t>(
in.data(),
in.size(),
out);
973 qToLittleEndian<char16_t>(
in.data(),
in.size(),
out);
975 state->remainingChars = 0;
977 return out + 2*
in.size();
991 const char *chars =
in.data();
998 const char *
end = chars +
len;
1001 if (
state->remainingChars +
len < 2) {
1004 state->remainingChars = 1;
1014 if (!headerdone ||
state->remainingChars) {
1016 if (
state->remainingChars)
1048 qFromBigEndian<char16_t>(chars, nPairs,
out);
1050 qFromLittleEndian<char16_t>(chars, nPairs,
out);
1054 state->remainingChars = 0;
1055 if ((
end - chars) & 1) {
1059 state->remainingChars = 1;
1094 out[2] = (char)0xfe;
1095 out[3] = (char)0xff;
1097 out[0] = (char)0xff;
1098 out[1] = (char)0xfe;
1110 if (
state->remainingChars == 1) {
1111 auto character =
state->state_data[
Data];
1115 state->remainingChars = 0;
1116 goto decode_surrogate;
1122 ucs4 =
ch.unicode();
1129 state->remainingChars = 1;
1154 result.resize((
in.size() + 7) >> 1);
1163 const char *chars =
in.data();
1169 const char *
end = chars +
len;
1172 memcpy(tuple, &
state->state_data[
Data], 4);
1175 if (
state->remainingChars +
len < 4) {
1177 while (chars <
end) {
1178 tuple[
state->remainingChars] = *chars;
1179 ++
state->remainingChars;
1183 memcpy(&
state->state_data[
Data], tuple, 4);
1193 state->remainingChars = 0;
1197 tuple[
num++] = *chars++;
1200 if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0) {
1202 }
else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff) {
1210 char32_t code = (endian ==
BigEndianness) ? qFromBigEndian<char32_t>(tuple) : qFromLittleEndian<char32_t>(tuple);
1226 while (chars <
end) {
1227 tuple[
num++] = *chars++;
1229 char32_t code = (endian ==
BigEndianness) ? qFromBigEndian<char32_t>(tuple) : qFromLittleEndian<char32_t>(tuple);
1242 memcpy(&
state->state_data[
Data], tuple, 4);
1249#if defined(Q_OS_WIN) && !defined(QT_BOOTSTRAPPED)
1250int QLocal8Bit::checkUtf8()
1252 return GetACP() == CP_UTF8 ? 1 : -1;
1258 const char *chars =
in.data();
1270 copyLocation =
state->remainingChars;
1271 extra += copyLocation;
1274 char *mbcs =
new char[newLength];
1276 mbcs[newLength-1] = 0;
1277 mbcs[newLength-2] = 0;
1278 memcpy(&(mbcs[copyLocation]), chars,
length);
1281 mbcs[0] = (char)
state->state_data[0];
1282 state->remainingChars = 0;
1284 const char *mb = mbcs;
1285 const char *
next = 0;
1287 while ((
next = CharNextExA(CP_ACP, mb, 0)) != mb) {
1289 int charlength = int(
next - mb);
1290 int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
1294 int r = GetLastError();
1296 if (
r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) &&
state) {
1297 state->remainingChars = 1;
1298 state->state_data[0] = (char)*mb;
1313 const char *mb =
in.data();
1323 char state_data = 0;
1324 int remainingChars = 0;
1328 state_data = (char)
state->state_data[0];
1329 remainingChars =
state->remainingChars;
1333 if (
state && remainingChars) {
1335 prev[0] = state_data;
1338 len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
1339 prev, 2, wc.data(), wc.length());
1343 state->remainingChars = 0;
1353 while (!(
len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
1354 mb, mblen, wc.data(), wc.length()))) {
1355 int r = GetLastError();
1356 if (
r == ERROR_INSUFFICIENT_BUFFER) {
1357 const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
1360 }
else if (
r == ERROR_NO_UNICODE_TRANSLATION) {
1362 while (mblen > 1 && !(mb[mblen-1]))
1365 if ((mblen <= 1) || (remainingChars && state_data))
1366 return convertToUnicodeCharByChar(
in,
state);
1368 state_data = mb[mblen-1];
1373 qWarning(
"MultiByteToWideChar: Cannot convert multibyte text");
1386 state->state_data[0] = (char)state_data;
1387 state->remainingChars = remainingChars;
1414 while (!(
len=WideCharToMultiByte(CP_ACP, 0, (
const wchar_t*)
ch, uclen,
1415 mb.data(), mb.size()-1, 0, &used_def)))
1417 int r = GetLastError();
1418 if (
r == ERROR_INSUFFICIENT_BUFFER) {
1419 mb.resize(1+WideCharToMultiByte(CP_ACP, 0,
1420 (
const wchar_t*)
ch, uclen,
1421 0, 0, 0, &used_def));
1428 "WideCharToMultiByte: Cannot convert multibyte text (error %d): %ls\n",
1429 r,
reinterpret_cast<const wchar_t*
>(
QString(
ch, uclen).utf16()));
1439void QStringConverter::State::clear() noexcept
1450void QStringConverter::State::reset() noexcept
1454 UConverter *converter =
static_cast<UConverter *
>(
d[0]);
1456 ucnv_reset(converter);
1538 *
out = (char)
in[
i].cell();
1543 state->invalidChars += invalid;
1550 memcpy(
out,
s.constData(),
s.size()*
sizeof(
QChar));
1551 return out +
s.size();
1557 memcpy(
out,
s.constData(),
s.size());
1558 return out +
s.size();
1719 if (*
a ==
'-' || *
a ==
'_') {
1723 if (*
b ==
'-' || *
b ==
'_') {
1753 ucnv_close(
static_cast<UConverter *
>(
state->d[0]));
1754 state->d[0] =
nullptr;
1761 if (
state->d[0] ==
nullptr)
1762 state->d[0] = createConverterForName(
static_cast<const char *
>(
state->d[1]),
state);
1767 ensureConverter(
state);
1769 auto icu_conv =
static_cast<UConverter *
>(
state->d[0]);
1770 UErrorCode err = U_ZERO_ERROR;
1772 auto sourceLimit =
in.data() +
in.size();
1776 UChar *
target =
reinterpret_cast<UChar *
>(
out);
1780 UBool
flush =
false;
1783 UConverterToUCallback action;
1785 ucnv_getToUCallBack(icu_conv, &action, &
context);
1787 ucnv_setToUCallBack(icu_conv, action, &
state,
nullptr,
nullptr, &err);
1789 ucnv_toUnicode(icu_conv, &
target, targetLimit, &
source, sourceLimit,
nullptr, flush, &err);
1791 Q_ASSERT(err != U_BUFFER_OVERFLOW_ERROR);
1793 if (
auto leftOver = ucnv_toUCountPending(icu_conv, &err)) {
1794 ucnv_reset(icu_conv);
1795 state->invalidChars += leftOver;
1803 ensureConverter(
state);
1804 auto icu_conv =
static_cast<UConverter *
>(
state->d[0]);
1805 UErrorCode err = U_ZERO_ERROR;
1806 auto source =
reinterpret_cast<const UChar *
>(
in.data());
1807 auto sourceLimit =
reinterpret_cast<const UChar *
>(
in.data() +
in.size());
1809 qsizetype length = UCNV_GET_MAX_BYTES_FOR_STRING(
in.size(), ucnv_getMaxCharSize(icu_conv));
1813 UBool
flush =
false;
1816 UConverterFromUCallback action;
1818 ucnv_getFromUCallBack(icu_conv, &action, &
context);
1820 ucnv_setFromUCallBack(icu_conv, action, &
state,
nullptr,
nullptr, &err);
1822 ucnv_fromUnicode(icu_conv, &
target, targetLimit, &
source, sourceLimit,
nullptr, flush, &err);
1824 Q_ASSERT(err != U_BUFFER_OVERFLOW_ERROR);
1826 if (
auto leftOver = ucnv_fromUCountPending(icu_conv, &err)) {
1827 ucnv_reset(icu_conv);
1828 state->invalidChars += leftOver;
1834 Q_DISABLE_COPY_MOVE(QStringConverterICU)
1836 template<qsizetype X>
1839 return X * inLength *
sizeof(UChar);
1850 return 2 * inLength;
1854 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<1>},
1855 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<2>},
1856 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<3>},
1857 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<4>},
1858 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<5>},
1859 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<6>},
1860 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<7>},
1861 {
"icu, recompile if you see this", QStringConverterICU::toUtf16, QStringConverterICU::toLen, QStringConverterICU::fromUtf16, QStringConverterICU::fromLen<8>}
1864 static UConverter *createConverterForName(
const char *
name,
const State *
state)
1868 UErrorCode status = U_ZERO_ERROR;
1869 UConverter *conv = ucnv_open(
name, &status);
1870 if (status != U_ZERO_ERROR && status != U_AMBIGUOUS_ALIAS_WARNING) {
1875 if (
state->flags.testFlag(Flag::ConvertInvalidToNull)) {
1876 UErrorCode
error = U_ZERO_ERROR;
1878 auto nullToSubstituter = [](
const void *
context, UConverterToUnicodeArgs *toUArgs,
1879 const char *, int32_t
length,
1880 UConverterCallbackReason reason, UErrorCode *err) {
1881 if (reason <= UCNV_IRREGULAR) {
1882 *err = U_ZERO_ERROR;
1884 ucnv_cbToUWriteUChars(toUArgs, &
c, 1, 0, err);
1890 ucnv_setToUCallBack(conv, nullToSubstituter,
state,
nullptr,
nullptr, &
error);
1892 auto nullFromSubstituter = [](
const void *
context, UConverterFromUnicodeArgs *fromUArgs,
1893 const UChar *, int32_t
length,
1894 UChar32, UConverterCallbackReason reason, UErrorCode *err) {
1895 if (reason <= UCNV_IRREGULAR) {
1896 *err = U_ZERO_ERROR;
1897 const UChar replacement[] = { 0 };
1898 const UChar *stringBegin = std::begin(replacement);
1899 ucnv_cbFromUWriteUChars(fromUArgs, &stringBegin, std::end(replacement), 0, err);
1905 ucnv_setFromUCallBack(conv, nullFromSubstituter,
state,
nullptr,
nullptr, &
error);
1907 UErrorCode
error = U_ZERO_ERROR;
1909 auto qmarkToSubstituter = [](
const void *
context, UConverterToUnicodeArgs *toUArgs,
1910 const char *codeUnits,int32_t
length,
1911 UConverterCallbackReason reason, UErrorCode *err) {
1912 if (reason <= UCNV_IRREGULAR) {
1918 UCNV_TO_U_CALLBACK_SUBSTITUTE(
nullptr, toUArgs, codeUnits,
length, reason, err);
1921 ucnv_setToUCallBack(conv, qmarkToSubstituter,
state,
nullptr,
nullptr, &
error);
1923 auto qmarkFromSubstituter = [](
const void *
context, UConverterFromUnicodeArgs *fromUArgs,
1924 const UChar *codeUnits, int32_t
length,
1925 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode *err) {
1926 if (reason <= UCNV_IRREGULAR) {
1932 UCNV_FROM_U_CALLBACK_SUBSTITUTE(
nullptr, fromUArgs, codeUnits,
length,
1933 codePoint, reason, err);
1935 ucnv_setFromUCallBack(conv, qmarkFromSubstituter,
state,
nullptr,
nullptr, &
error);
1944 UErrorCode status = U_ZERO_ERROR;
1945 UConverter *conv = createConverterForName(
name,
state);
1949 const char *icuName = ucnv_getName(conv, &status);
1952 const char *persistentName = ucnv_getStandardName(icuName,
"MIME", &status);
1953 if (U_FAILURE(status) || !persistentName) {
1954 status = U_ZERO_ERROR;
1955 persistentName = ucnv_getStandardName(icuName,
"IANA", &status);
1957 state->d[1] =
const_cast<char *
>(persistentName);
1960 qsizetype maxCharSize = ucnv_getMaxCharSize(conv);
1961 state->clearFn = QStringConverterICU::clear_function;
1962 if (maxCharSize > 8 || maxCharSize < 1) {
1963 qWarning(
"Encountered unexpected codec \"%s\" which requires >8x space",
name);
1966 return &forLength[maxCharSize - 1];
1982 iface = encodingInterfaces + int(*
e);
1996 return static_cast<const char*
>(
state.
d[1]);
2055 return std::nullopt;
2065std::optional<QStringConverter::Encoding>
2070 if (arraySize > 3) {
2071 char32_t uc = qFromUnaligned<char32_t>(
data.data());
2076 if (expectedFirstCharacter) {
2085 if (arraySize > 2) {
2090 if (arraySize > 1) {
2091 char16_t uc = qFromUnaligned<char16_t>(
data.data());
2096 if (expectedFirstCharacter) {
2104 return std::nullopt;
2126 if (
ch ==
'\"' ||
ch ==
'\'' ||
ch ==
'>' ||
ch ==
'/') {
2132 if (
name ==
"unicode")
2134 if (!
name.isEmpty())
2196 return encodingInterfaces[int(
e)].
name;
constexpr qsizetype size() const noexcept
constexpr const_pointer data() const noexcept
char * data()
\macro QT_NO_CAST_FROM_BYTEARRAY
qsizetype size() const noexcept
Returns the number of bytes in this byte array.
const char * constData() const noexcept
Returns a pointer to the const data stored in the byte array.
void truncate(qsizetype pos)
Truncates the byte array at index position pos.
bool isEmpty() const noexcept
Returns true if the byte array has size 0; otherwise returns false.
static constexpr QChar fromUcs2(char16_t c) noexcept
static constexpr char32_t surrogateToUcs4(char16_t high, char16_t low) noexcept
Converts a UTF16 surrogate pair with the given high and low values to it's UCS-4-encoded code point.
static constexpr bool requiresSurrogates(char32_t ucs4) noexcept
Returns true if the UCS-4-encoded character specified by ucs4 can be split into the high and low part...
static constexpr auto fromUcs4(char32_t c) noexcept
static constexpr char16_t highSurrogate(char32_t ucs4) noexcept
Returns the high surrogate part of a UCS-4-encoded code point.
constexpr bool isLowSurrogate() const noexcept
Returns true if the QChar is the low part of a UTF16 surrogate (for example if its code point is in r...
QChar toCaseFolded() const noexcept
Returns the case folded equivalent of the character.
static constexpr char16_t lowSurrogate(char32_t ucs4) noexcept
Returns the low surrogate part of a UCS-4-encoded code point.
constexpr bool isHighSurrogate() const noexcept
Returns true if the QChar is the high part of a UTF16 surrogate (for example if its code point is in ...
Q_CORE_EXPORT const char * name() const noexcept
Returns the canonical name of the encoding this QStringConverter can encode or decode.
static Q_CORE_EXPORT std::optional< Encoding > encodingForHtml(QByteArrayView data)
Tries to determine the encoding of the HTML in data by looking at leading byte order marks or a chars...
static Q_CORE_EXPORT const char * nameForEncoding(Encoding e)
Returns the canonical name for encoding e.
Encoding
\value Utf8 Create a converter to or from UTF-8 \value Utf16 Create a converter to or from UTF-16.
static Q_CORE_EXPORT std::optional< Encoding > encodingForName(const char *name) noexcept
Convert name to the corresponding \l Encoding member, if there is one.
constexpr QStringConverter() noexcept
static Q_CORE_EXPORT std::optional< Encoding > encodingForData(QByteArrayView data, char16_t expectedFirstCharacter=0) noexcept
Returns the encoding for the content of data if it can be determined.
static Q_CORE_EXPORT QStringDecoder decoderForHtml(QByteArrayView data)
Tries to determine the encoding of the HTML in data by looking at leading byte order marks or a chars...
constexpr QStringDecoder() noexcept
Default constructs an decoder.
\macro QT_RESTRICTED_CAST_FROM_ASCII
qsizetype size() const
Returns the number of characters in this string.
QString mid(qsizetype position, qsizetype n=-1) const
Returns a string that contains n characters of this string, starting at the specified position index.
QString first(qsizetype n) const
QString simplified() const &
const QChar at(qsizetype i) const
Returns the character at the given index position in the string.
QString toLower() const &
QString left(qsizetype n) const
Returns a substring that contains the n leftmost characters of the string.
static QString static QString qsizetype indexOf(QChar c, qsizetype from=0, Qt::CaseSensitivity cs=Qt::CaseSensitive) const
Combined button and popup list for selecting options.
constexpr int qt_lencmp(qsizetype lhs, qsizetype rhs) noexcept
constexpr char toAsciiLower(char ch) noexcept
QTextStream & flush(QTextStream &stream)
Calls QTextStream::flush() on stream and returns stream.
constexpr Initialization Uninitialized
QT_POPCOUNT_RELAXED_CONSTEXPR uint qCountLeadingZeroBits(quint32 v) noexcept
constexpr uint qCountTrailingZeroBits(quint32 v) noexcept
#define QByteArrayLiteral(str)
size_t qstrlen(const char *str)
constexpr QStaticByteArrayMatcher< N > qMakeStaticByteArrayMatcher(const char(&pattern)[N]) noexcept
DBusConnection const char DBusError * error
static QString header(const QString &name)
constexpr T qToBigEndian(T source)
constexpr T qToLittleEndian(T source)
constexpr const T & qMin(const T &a, const T &b)
GLboolean GLboolean GLboolean b
GLsizei const GLfloat * v
[13]
GLboolean GLboolean GLboolean GLboolean a
[7]
GLenum GLuint GLenum GLsizei length
GLenum GLuint GLenum GLsizei const GLchar * buf
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
GLenum GLuint GLintptr offset
GLint GLint GLint GLint GLint GLint GLint GLbitfield mask
GLsizei GLsizei GLchar * source
GLuint GLenum GLsizei GLsizei GLint GLint GLboolean packed
QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIterator begin(const QRegularExpressionMatchIterator &iterator)
static const uchar utf8bom[]
static QChar * fromUtf32LE(QChar *out, QByteArrayView in, QStringConverter::State *state)
static QChar * fromUtf16LE(QChar *out, QByteArrayView in, QStringConverter::State *state)
static QByteArray parseHtmlMetaForEncoding(QByteArrayView data)
static QChar * fromUtf32BE(QChar *out, QByteArrayView in, QStringConverter::State *state)
static qsizetype toUtf8Len(qsizetype l)
static QChar * fromLocal8Bit(QChar *out, QByteArrayView in, QStringConverter::State *state)
static bool simdDecodeAscii(char16_t *, const uchar *, const uchar *, const uchar *)
static void simdCompareAscii(const qchar8_t *&, const qchar8_t *, const char16_t *&, const char16_t *)
static qsizetype toLatin1Len(qsizetype l)
static const uchar * simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii)
static bool simdEncodeAscii(uchar *, const char16_t *, const char16_t *, const char16_t *)
static QChar * fromUtf32(QChar *out, QByteArrayView in, QStringConverter::State *state)
static char * toUtf32(char *out, QStringView in, QStringConverter::State *state)
static char * toUtf16LE(char *out, QStringView in, QStringConverter::State *state)
static qsizetype fromUtf8Len(qsizetype l)
static char * toLocal8Bit(char *out, QStringView in, QStringConverter::State *state)
static qsizetype toUtf16Len(qsizetype l)
static qsizetype fromLatin1Len(qsizetype l)
static char * toUtf16BE(char *out, QStringView in, QStringConverter::State *state)
static char * toUtf32LE(char *out, QStringView in, QStringConverter::State *state)
static qsizetype fromUtf32Len(qsizetype l)
static bool nameMatch(const char *a, const char *b)
static QChar * fromUtf16BE(QChar *out, QByteArrayView in, QStringConverter::State *state)
static qsizetype toUtf32Len(qsizetype l)
static qsizetype fromUtf16Len(qsizetype l)
static char * toUtf32BE(char *out, QStringView in, QStringConverter::State *state)
QT_BEGIN_NAMESPACE typedef uchar * output
QTextStream out(stdout)
[7]
static char16_t * convertToUnicode(char16_t *dst, QLatin1StringView in) noexcept
static char * convertFromUnicode(char *out, QStringView in, QStringConverter::State *state) noexcept
static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state)
static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state)
static Q_CORE_EXPORT QString convertToUnicode(QByteArrayView, QStringConverter::State *, DataEndianness=DetectEndianness)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness=DetectEndianness)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness=DetectEndianness)
static QChar * convertToUnicode(QChar *out, QByteArrayView, QStringConverter::State *state, DataEndianness endian)
static const int EndOfString
static void appendUtf16(const NoOutput &, char16_t)
static void appendUcs4(const NoOutput &, char32_t)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView in)
static int compareUtf8(QByteArrayView utf8, QStringView utf16, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
static QChar * convertToUnicode(QChar *buffer, QByteArrayView in) noexcept
static ValidUtf8Result isValidUtf8(QByteArrayView in)
static Q_CORE_EXPORT char * convertFromLatin1(char *out, QLatin1StringView in)