6#include "private/qsimd_p.h"
9#include <QtCore/qdatastream.h>
10#include <QtCore/qmetatype.h>
11#include <QtCore/qtextstream.h>
134 return isInf() ? FP_INFINITE : isNaN() ? FP_NAN
135 : !(b16 & 0x7fff) ? FP_ZERO : isNormal() ? FP_NORMAL : FP_SUBNORMAL;
163#if QT_COMPILER_SUPPORTS_HERE(F16C)
171#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
172static bool hasFastF16Avx256()
181 __mmask16
mask = _bzhi_u32(-1,
len);
182 __m256 f32 = _mm256_maskz_loadu_ps(
mask,
in );
183 __m128i f16 = _mm256_maskz_cvtps_ph(
mask, f32, _MM_FROUND_TO_NEAREST_INT);
184 _mm_mask_storeu_epi16(
out,
mask, f16);
190 __mmask16
mask = _bzhi_u32(-1,
len);
191 __m128i f16 = _mm_maskz_loadu_epi16(
mask,
in);
192 __m256 f32 = _mm256_cvtph_ps(f16);
193 _mm256_mask_storeu_ps(
out,
mask, f32);
200 constexpr qsizetype Step =
sizeof(__m256i) /
sizeof(
float);
201 constexpr qsizetype HalfStep =
sizeof(__m128i) /
sizeof(
float);
206 __m256 f32 = _mm256_loadu_ps(
in +
offset);
207 __m128i f16 = _mm256_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT);
208 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(
out +
offset), f16);
212 for ( ;
i + Step <
len;
i += Step)
217 return convertOneChunk(
len - Step);
220#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
221 if (hasFastF16Avx256())
222 return qFloatToFloat16_tail_avx256(
out,
in,
len);
225 if (
len >= HalfStep) {
227 __m128 f32 = _mm_loadu_ps(
in +
offset);
228 __m128i f16 = _mm_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT);
229 _mm_storel_epi64(
reinterpret_cast<__m128i *
>(
out +
offset), f16);
234 return convertOneChunk(
len - HalfStep);
238 for ( ;
i <
len; ++
i)
239 out[
i] = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(
in[
i]), 0), 0);
245 constexpr qsizetype Step =
sizeof(__m256i) /
sizeof(
float);
246 constexpr qsizetype HalfStep =
sizeof(__m128i) /
sizeof(
float);
251 __m128i f16 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(
in +
offset));
252 __m256 f32 = _mm256_cvtph_ps(f16);
257 for ( ;
i + Step <
len;
i += Step)
262 return convertOneChunk(
len - Step);
265#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
266 if (hasFastF16Avx256())
267 return qFloatFromFloat16_tail_avx256(
out,
in,
len);
270 if (
len >= HalfStep) {
272 __m128i f16 = _mm_loadl_epi64(
reinterpret_cast<const __m128i *
>(
in +
offset));
273 __m128 f32 = _mm_cvtph_ps(f16);
279 return convertOneChunk(
len - HalfStep);
283 for ( ;
i <
len; ++
i)
284 out[
i] = _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(
in[
i])));
287#elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__) && (__ARM_FP & 2)
295 __fp16 *out_f16 =
reinterpret_cast<__fp16 *
>(
out);
297 for (;
i <
len - 3;
i += 4)
298 vst1_f16(out_f16 +
i, vcvt_f16_f32(vld1q_f32(
in +
i)));
300 out_f16[
i] = __fp16(
in[
i]);
305 const __fp16 *in_f16 =
reinterpret_cast<const __fp16 *
>(
in);
307 for (;
i <
len - 3;
i += 4)
308 vst1q_f32(
out +
i, vcvt_f32_f16(vld1_f16(in_f16 +
i)));
310 out[
i] =
float(in_f16[
i]);
366#ifndef QT_NO_DATASTREAM
411 return ts << float(
f);
\inmodule QtCore\reentrant
\keyword 16-bit Floating Point Support\inmodule QtCore \inheaderfile QFloat16
Q_CORE_EXPORT void qFloatFromFloat16(float *out, const qfloat16 *in, qsizetype len) noexcept
Q_CORE_EXPORT void qFloatToFloat16(qfloat16 *out, const float *in, qsizetype len) noexcept
Combined button and popup list for selecting options.
QDataStream & operator<<(QDataStream &ds, qfloat16 f)
static void qFloatToFloat16_fast(quint16 *, const float *, qsizetype) noexcept
static void qFloatFromFloat16_fast(float *, const quint16 *, qsizetype) noexcept
QDataStream & operator>>(QDataStream &ds, qfloat16 &f)
GLenum GLuint GLintptr offset
GLint GLint GLint GLint GLint GLint GLint GLbitfield mask
#define qCpuHasFeature(feature)
#define QT_FUNCTION_TARGET(x)
#define SIMD_EPILOGUE(i, length, max)
QTextStream out(stdout)
[7]