Qt 6.x
The Qt SDK
Loading...
Searching...
No Matches
qstringconverter_p.h
Go to the documentation of this file.
1// Copyright (C) 2020 The Qt Company Ltd.
2// Copyright (C) 2020 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5#ifndef QSTRINGCONVERTER_P_H
6#define QSTRINGCONVERTER_P_H
7
8//
9// W A R N I N G
10// -------------
11//
12// This file is not part of the Qt API. It exists purely as an
13// implementation detail. This header file may change from version to
14// version without notice, or even be removed.
15//
16// We mean it.
17//
18
19#include <QtCore/qstring.h>
20#include <QtCore/qendian.h>
21#include <QtCore/qstringconverter.h>
22#include <QtCore/private/qglobal_p.h>
23
25
26#ifndef __cpp_char8_t
27enum qchar8_t : uchar {};
28#else
29using qchar8_t = char8_t;
30#endif
31
32struct QLatin1
33{
34 // Defined in qstring.cpp
35 static char16_t *convertToUnicode(char16_t *dst, QLatin1StringView in) noexcept;
36
38 {
39 char16_t *dst = reinterpret_cast<char16_t *>(buffer);
41 return reinterpret_cast<QChar *>(dst);
42 }
43
45 [[maybe_unused]] QStringConverterBase::State *state) noexcept
46 {
48
49 return convertToUnicode(dst, QLatin1StringView(in.data(), in.size()));
50 }
51
52 static char *convertFromUnicode(char *out, QStringView in, QStringConverter::State *state) noexcept;
53
54 // Defined in qstring.cpp
55 static char *convertFromUnicode(char *out, QStringView in) noexcept;
56};
57
59{
60 static const bool isTrusted = false;
61 static const bool allowNonCharacters = true;
62 static const bool skipAsciiHandling = false;
63 static const int Error = -1;
64 static const int EndOfString = -2;
65
66 static void appendByte(uchar *&ptr, uchar b)
67 { *ptr++ = b; }
68
70 { *ptr++ = b; }
71
72 static uchar peekByte(const uchar *ptr, qsizetype n = 0)
73 { return ptr[n]; }
74
75 static uchar peekByte(const qchar8_t *ptr, qsizetype n = 0)
76 { return ptr[n]; }
77
78 static qptrdiff availableBytes(const uchar *ptr, const uchar *end)
79 { return end - ptr; }
80
82 { return end - ptr; }
83
84 static void advanceByte(const uchar *&ptr, qsizetype n = 1)
85 { ptr += n; }
86
87 static void advanceByte(const qchar8_t *&ptr, qsizetype n = 1)
88 { ptr += n; }
89
90 static void appendUtf16(char16_t *&ptr, char16_t uc)
91 { *ptr++ = char16_t(uc); }
92
93 static void appendUcs4(char16_t *&ptr, char32_t uc)
94 {
97 }
98
99 static char16_t peekUtf16(const char16_t *ptr, qsizetype n = 0) { return ptr[n]; }
100
101 static qptrdiff availableUtf16(const char16_t *ptr, const char16_t *end)
102 { return end - ptr; }
103
104 static void advanceUtf16(const char16_t *&ptr, qsizetype n = 1) { ptr += n; }
105
106 static void appendUtf16(char32_t *&ptr, char16_t uc)
107 { *ptr++ = char32_t(uc); }
108
109 static void appendUcs4(char32_t *&ptr, char32_t uc)
110 { *ptr++ = uc; }
111};
112
114{
115 static const bool skipAsciiHandling = true;
116};
117
119{
124 template <typename Traits, typename OutputPtr, typename InputPtr> inline
125 int toUtf8(char16_t u, OutputPtr &dst, InputPtr &src, InputPtr end)
126 {
127 if (!Traits::skipAsciiHandling && u < 0x80) {
128 // U+0000 to U+007F (US-ASCII) - one byte
129 Traits::appendByte(dst, uchar(u));
130 return 0;
131 } else if (u < 0x0800) {
132 // U+0080 to U+07FF - two bytes
133 // first of two bytes
134 Traits::appendByte(dst, 0xc0 | uchar(u >> 6));
135 } else {
136 if (!QChar::isSurrogate(u)) {
137 // U+0800 to U+FFFF (except U+D800-U+DFFF) - three bytes
138 if (!Traits::allowNonCharacters && QChar::isNonCharacter(u))
139 return Traits::Error;
140
141 // first of three bytes
142 Traits::appendByte(dst, 0xe0 | uchar(u >> 12));
143 } else {
144 // U+10000 to U+10FFFF - four bytes
145 // need to get one extra codepoint
146 if (Traits::availableUtf16(src, end) == 0)
147 return Traits::EndOfString;
148
149 char16_t low = Traits::peekUtf16(src);
151 return Traits::Error;
152 if (!QChar::isLowSurrogate(low))
153 return Traits::Error;
154
155 Traits::advanceUtf16(src);
156 char32_t ucs4 = QChar::surrogateToUcs4(u, low);
157
158 if (!Traits::allowNonCharacters && QChar::isNonCharacter(ucs4))
159 return Traits::Error;
160
161 // first byte
162 Traits::appendByte(dst, 0xf0 | (uchar(ucs4 >> 18) & 0xf));
163
164 // second of four bytes
165 Traits::appendByte(dst, 0x80 | (uchar(ucs4 >> 12) & 0x3f));
166
167 // for the rest of the bytes
168 u = char16_t(ucs4);
169 }
170
171 // second to last byte
172 Traits::appendByte(dst, 0x80 | (uchar(u >> 6) & 0x3f));
173 }
174
175 // last byte
176 Traits::appendByte(dst, 0x80 | (u & 0x3f));
177 return 0;
178 }
179
181 {
182 return (b & 0xc0) == 0x80;
183 }
184
187 template <typename Traits, typename OutputPtr, typename InputPtr> inline
188 qsizetype fromUtf8(uchar b, OutputPtr &dst, InputPtr &src, InputPtr end)
189 {
190 qsizetype charsNeeded;
191 char32_t min_uc;
192 char32_t uc;
193
194 if (!Traits::skipAsciiHandling && b < 0x80) {
195 // US-ASCII
196 Traits::appendUtf16(dst, b);
197 return 1;
198 }
199
200 if (!Traits::isTrusted && Q_UNLIKELY(b <= 0xC1)) {
201 // an UTF-8 first character must be at least 0xC0
202 // however, all 0xC0 and 0xC1 first bytes can only produce overlong sequences
203 return Traits::Error;
204 } else if (b < 0xe0) {
205 charsNeeded = 2;
206 min_uc = 0x80;
207 uc = b & 0x1f;
208 } else if (b < 0xf0) {
209 charsNeeded = 3;
210 min_uc = 0x800;
211 uc = b & 0x0f;
212 } else if (b < 0xf5) {
213 charsNeeded = 4;
214 min_uc = 0x10000;
215 uc = b & 0x07;
216 } else {
217 // the last Unicode character is U+10FFFF
218 // it's encoded in UTF-8 as "\xF4\x8F\xBF\xBF"
219 // therefore, a byte higher than 0xF4 is not the UTF-8 first byte
220 return Traits::Error;
221 }
222
223 qptrdiff bytesAvailable = Traits::availableBytes(src, end);
224 if (Q_UNLIKELY(bytesAvailable < charsNeeded - 1)) {
225 // it's possible that we have an error instead of just unfinished bytes
226 if (bytesAvailable > 0 && !isContinuationByte(Traits::peekByte(src, 0)))
227 return Traits::Error;
228 if (bytesAvailable > 1 && !isContinuationByte(Traits::peekByte(src, 1)))
229 return Traits::Error;
230 return Traits::EndOfString;
231 }
232
233 // first continuation character
234 b = Traits::peekByte(src, 0);
235 if (!isContinuationByte(b))
236 return Traits::Error;
237 uc <<= 6;
238 uc |= b & 0x3f;
239
240 if (charsNeeded > 2) {
241 // second continuation character
242 b = Traits::peekByte(src, 1);
243 if (!isContinuationByte(b))
244 return Traits::Error;
245 uc <<= 6;
246 uc |= b & 0x3f;
247
248 if (charsNeeded > 3) {
249 // third continuation character
250 b = Traits::peekByte(src, 2);
251 if (!isContinuationByte(b))
252 return Traits::Error;
253 uc <<= 6;
254 uc |= b & 0x3f;
255 }
256 }
257
258 // we've decoded something; safety-check it
259 if (!Traits::isTrusted) {
260 if (uc < min_uc)
261 return Traits::Error;
263 return Traits::Error;
264 if (!Traits::allowNonCharacters && QChar::isNonCharacter(uc))
265 return Traits::Error;
266 }
267
268 // write the UTF-16 sequence
269 if (!QChar::requiresSurrogates(uc)) {
270 // UTF-8 decoded and no surrogates are required
271 // detach if necessary
272 Traits::appendUtf16(dst, char16_t(uc));
273 } else {
274 // UTF-8 decoded to something that requires a surrogate pair
275 Traits::appendUcs4(dst, uc);
276 }
277
278 Traits::advanceByte(src, charsNeeded - 1);
279 return charsNeeded;
280 }
281}
282
284{
289
290struct QUtf8
291{
293 {
294 char16_t *dst = reinterpret_cast<char16_t *>(buffer);
296 return reinterpret_cast<QChar *>(dst);
297 }
298
299 Q_CORE_EXPORT static char16_t* convertToUnicode(char16_t *dst, QByteArrayView in) noexcept;
302
304 {
305 char16_t *buffer = reinterpret_cast<char16_t *>(out);
307 return reinterpret_cast<QChar *>(buffer);
308 }
309
310 static char16_t *convertToUnicode(char16_t *dst, QByteArrayView in, QStringConverter::State *state);
311
312 Q_CORE_EXPORT static QByteArray convertFromUnicode(QStringView in);
315 Q_CORE_EXPORT static char *convertFromLatin1(char *out, QLatin1StringView in);
319 };
321 static int compareUtf8(QByteArrayView utf8, QStringView utf16,
325 static int compareUtf8(QByteArrayView lhs, QByteArrayView rhs,
327};
328
329struct QUtf16
330{
335};
336
337struct QUtf32
338{
343};
344
345struct Q_CORE_EXPORT QLocal8Bit
346{
347#if !defined(Q_OS_WIN) || defined(QT_BOOTSTRAPPED)
349 { return QUtf8::convertToUnicode(in, state); }
352#else
353 static int checkUtf8();
354 static bool isUtf8()
355 {
356 Q_CONSTINIT
357 static QBasicAtomicInteger<qint8> result = { 0 };
358 int r = result.loadRelaxed();
359 if (r == 0) {
360 r = checkUtf8();
361 result.storeRelaxed(r);
362 }
363 return r > 0;
364 }
365 static QString convertToUnicode_sys(QByteArrayView, QStringConverter::State *);
367 {
368 if (isUtf8())
370 return convertToUnicode_sys(in, state);
371 }
372 static QByteArray convertFromUnicode_sys(QStringView, QStringConverter::State *);
373 static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state)
374 {
375 if (isUtf8())
377 return convertFromUnicode_sys(in, state);
378 }
379#endif
380};
381
383
384#endif // QSTRINGCONVERTER_P_H
T loadRelaxed() const noexcept
\inmodule QtCore
Definition qbytearray.h:57
\inmodule QtCore
Definition qchar.h:48
constexpr bool isNonCharacter() const noexcept
Definition qchar.h:478
static constexpr char32_t surrogateToUcs4(char16_t high, char16_t low) noexcept
Converts a UTF16 surrogate pair with the given high and low values to it's UCS-4-encoded code point.
Definition qchar.h:508
@ LastValidCodePoint
Definition qchar.h:66
static constexpr bool requiresSurrogates(char32_t ucs4) noexcept
Returns true if the UCS-4-encoded character specified by ucs4 can be split into the high and low part...
Definition qchar.h:504
static constexpr char16_t highSurrogate(char32_t ucs4) noexcept
Returns the high surrogate part of a UCS-4-encoded code point.
Definition qchar.h:518
constexpr bool isLowSurrogate() const noexcept
Returns true if the QChar is the low part of a UTF16 surrogate (for example if its code point is in r...
Definition qchar.h:480
constexpr bool isSurrogate() const noexcept
Definition qchar.h:481
static constexpr char16_t lowSurrogate(char32_t ucs4) noexcept
Returns the low surrogate part of a UCS-4-encoded code point.
Definition qchar.h:522
constexpr bool isHighSurrogate() const noexcept
Returns true if the QChar is the high part of a UTF16 surrogate (for example if its code point is in ...
Definition qchar.h:479
\inmodule QtCore
Definition qstringview.h:76
\macro QT_RESTRICTED_CAST_FROM_ASCII
Definition qstring.h:127
else opt state
[0]
Combined button and popup list for selecting options.
qsizetype fromUtf8(uchar b, OutputPtr &dst, InputPtr &src, InputPtr end)
int toUtf8(char16_t u, OutputPtr &dst, InputPtr &src, InputPtr end)
bool isContinuationByte(uchar b)
CaseSensitivity
@ CaseSensitive
#define Q_UNLIKELY(x)
static ControlElement< T > * ptr(QWidget *widget)
GLboolean GLboolean GLboolean b
GLboolean r
[2]
GLuint GLuint end
GLenum src
GLenum GLuint buffer
GLenum GLenum dst
GLfloat n
GLuint in
GLuint64EXT * result
[6]
GLdouble s
[6]
Definition qopenglext.h:235
#define Q_ASSERT(cond)
Definition qrandom.cpp:47
@ LittleEndianness
@ DetectEndianness
@ BigEndianness
unsigned char uchar
Definition qtypes.h:27
ptrdiff_t qptrdiff
Definition qtypes.h:69
ptrdiff_t qsizetype
Definition qtypes.h:70
static QString convertToUnicode(const QString &asciiDomain, QUrl::AceProcessingOptions options)
Definition qurlidna.cpp:850
QTextStream out(stdout)
[7]
static char16_t * convertToUnicode(char16_t *dst, QLatin1StringView in) noexcept
Definition qstring.cpp:5526
static QChar * convertToUnicode(QChar *dst, QByteArrayView in, QStringConverterBase::State *state) noexcept
static char * convertFromUnicode(char *out, QStringView in, QStringConverter::State *state) noexcept
static QChar * convertToUnicode(QChar *buffer, QLatin1StringView in) noexcept
static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state)
static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state)
static Q_CORE_EXPORT QString convertToUnicode(QByteArrayView, QStringConverter::State *, DataEndianness=DetectEndianness)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness=DetectEndianness)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView, QStringConverter::State *, DataEndianness=DetectEndianness)
static QChar * convertToUnicode(QChar *out, QByteArrayView, QStringConverter::State *state, DataEndianness endian)
static const bool skipAsciiHandling
static void appendByte(qchar8_t *&ptr, qchar8_t b)
static uchar peekByte(const uchar *ptr, qsizetype n=0)
static qptrdiff availableBytes(const qchar8_t *ptr, const qchar8_t *end)
static void appendByte(uchar *&ptr, uchar b)
static void advanceByte(const uchar *&ptr, qsizetype n=1)
static const bool isTrusted
static void appendUtf16(char32_t *&ptr, char16_t uc)
static const bool skipAsciiHandling
static char16_t peekUtf16(const char16_t *ptr, qsizetype n=0)
static const int Error
static void appendUcs4(char16_t *&ptr, char32_t uc)
static const int EndOfString
static void advanceUtf16(const char16_t *&ptr, qsizetype n=1)
static uchar peekByte(const qchar8_t *ptr, qsizetype n=0)
static void appendUtf16(char16_t *&ptr, char16_t uc)
static const bool allowNonCharacters
static qptrdiff availableUtf16(const char16_t *ptr, const char16_t *end)
static void appendUcs4(char32_t *&ptr, char32_t uc)
static qptrdiff availableBytes(const uchar *ptr, const uchar *end)
static void advanceByte(const qchar8_t *&ptr, qsizetype n=1)
static Q_CORE_EXPORT QByteArray convertFromUnicode(QStringView in)
static int compareUtf8(QByteArrayView utf8, QStringView utf16, Qt::CaseSensitivity cs=Qt::CaseSensitive) noexcept
static QChar * convertToUnicode(QChar *buffer, QByteArrayView in) noexcept
static ValidUtf8Result isValidUtf8(QByteArrayView in)
static Q_CORE_EXPORT char * convertFromLatin1(char *out, QLatin1StringView in)
static QChar * convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::State *state)