6 static const uint16_t FLOAT16_SIGN_MASK = UINT16_C(0x8000);
7 static const uint16_t FLOAT16_EXPONENT_MASK = UINT16_C(0x7C00);
8 static const uint16_t FLOAT16_SIGNIFICAND_MASK = UINT16_C(0x03FF);
10 static const uint16_t FLOAT16_SIGN_BIT_POSITION = UINT16_C(15);
11 static const uint16_t FLOAT16_EXPONENT_BIT_POSITION = UINT16_C(10);
13 static const uint16_t FLOAT16_SIGNIFICAND_NUM_BITS = FLOAT16_EXPONENT_BIT_POSITION;
15 static const uint16_t FLOAT16_EXPONENT_INFINITY_NAN = UINT16_C(0x001F);
16 static const uint16_t FLOAT16_EXPONENT_BIAS = UINT16_C(15);
18 static const uint32_t FLOAT32_SIGN_MASK = UINT32_C(0x80000000);
19 static const uint32_t FLOAT32_EXPONENT_MASK = UINT32_C(0x7F800000);
20 static const uint32_t FLOAT32_SIGNIFICAND_MASK = UINT32_C(0x007FFFFF);
22 static const uint32_t FLOAT32_SIGN_BIT_POSITION = UINT32_C(31);
23 static const uint32_t FLOAT32_EXPONENT_BIT_POSITION = UINT32_C(23);
25 static const uint32_t FLOAT32_SIGNIFICAND_NUM_BITS = FLOAT32_EXPONENT_BIT_POSITION;
27 static const uint32_t FLOAT32_EXPONENT_INFINITY_NAN = UINT32_C(0x00FF);
28 static const uint32_t FLOAT32_EXPONENT_BIAS = UINT32_C(127);
33 const uint16_t sign16Shifted = (float16Value & FLOAT16_SIGN_MASK);
34 const uint16_t exponent16 =
static_cast<uint16_t
>(
35 static_cast<uint16_t
>(float16Value & FLOAT16_EXPONENT_MASK) >> FLOAT16_EXPONENT_BIT_POSITION);
36 const uint16_t significand16 = (float16Value & FLOAT16_SIGNIFICAND_MASK);
39 uint32_t significand32 =
static_cast<uint32_t
>(significand16)
40 << (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS);
43 uint32_t exponent32 = 0;
46 if (significand32 != 0)
49 exponent32 = 1 + FLOAT32_EXPONENT_BIAS - FLOAT16_EXPONENT_BIAS;
51 while ((significand32 & (FLOAT32_SIGNIFICAND_MASK + 1)) == 0)
57 significand32 &= FLOAT32_SIGNIFICAND_MASK;
60 else if (exponent16 == FLOAT16_EXPONENT_INFINITY_NAN)
63 exponent32 = FLOAT32_EXPONENT_INFINITY_NAN;
68 exponent32 =
static_cast<uint32_t
>(exponent16) - FLOAT16_EXPONENT_BIAS + FLOAT32_EXPONENT_BIAS;
72 const uint32_t sign32Shifted =
static_cast<uint32_t
>(sign16Shifted)
73 << (FLOAT32_SIGN_BIT_POSITION - FLOAT16_SIGN_BIT_POSITION);
74 const uint32_t exponent32Shifted = exponent32 << FLOAT32_EXPONENT_BIT_POSITION;
75 const uint32_t float32Value = sign32Shifted | exponent32Shifted | significand32;
86 const uint32_t sign32Shifted = (float32Value & FLOAT32_SIGN_MASK);
87 const uint32_t exponent32 = (float32Value & FLOAT32_EXPONENT_MASK) >> FLOAT32_EXPONENT_BIT_POSITION;
88 const uint32_t significand32 = (float32Value & FLOAT32_SIGNIFICAND_MASK);
91 uint16_t significand16 =
static_cast<uint16_t
>(
92 (significand32 >> (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS)));
95 bool needsRounding =
false;
96 uint16_t exponent16 = 0;
99 if (significand32 != 0)
105 else if (exponent32 == FLOAT32_EXPONENT_INFINITY_NAN)
108 exponent16 = FLOAT16_EXPONENT_INFINITY_NAN;
113 const int16_t signedExponent16 =
static_cast<int16_t
>(
static_cast<int32_t
>(exponent32) -
114 static_cast<int32_t
>(FLOAT32_EXPONENT_BIAS) +
static_cast<int32_t
>(FLOAT16_EXPONENT_BIAS));
115 if (signedExponent16 > FLOAT16_EXPONENT_INFINITY_NAN)
118 exponent16 = FLOAT16_EXPONENT_INFINITY_NAN;
120 else if (signedExponent16 <= 0)
123 if (signedExponent16 <=
static_cast<int16_t
>(-FLOAT16_SIGNIFICAND_NUM_BITS))
131 const uint32_t fullSignificand32 = significand32 | (FLOAT32_SIGNIFICAND_MASK + 1);
132 const uint32_t significandShift =
static_cast<uint32_t
>(1 - signedExponent16);
133 significand16 =
static_cast<uint16_t
>(fullSignificand32 >>
134 (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS + significandShift));
137 ((fullSignificand32 >> (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS +
138 significandShift - 1)) &
145 exponent16 =
static_cast<uint16_t
>(signedExponent16);
147 ((significand32 >> (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS - 1)) &
153 const uint16_t sign16Shifted =
154 static_cast<uint16_t
>(sign32Shifted >> (FLOAT32_SIGN_BIT_POSITION - FLOAT16_SIGN_BIT_POSITION));
155 const uint16_t exponent16Shifted =
static_cast<uint16_t
>(exponent16 << FLOAT16_EXPONENT_BIT_POSITION);
156 uint16_t float16Value =
static_cast<uint16_t
>(sign16Shifted | exponent16Shifted) | significand16;
169 const float* convertedFloat =
static_cast<const float*
>(
static_cast<void*
>(&float32Value));
171 return *convertedFloat;
176 const uint32_t* float32ValuePtr =
static_cast<const uint32_t*
>(
static_cast<void*
>(&float32));
178 return *float32ValuePtr;
183 const double* convertedDouble =
static_cast<const double*
>(
static_cast<void*
>(&float64Value));
185 return *convertedDouble;
190 const uint64_t* float64ValuePtr =
static_cast<const uint64_t*
>(
static_cast<void*
>(&float64));
192 return *float64ValuePtr;
uint64_t convertDoubleToUInt64(double float64)
uint32_t convertFloatToUInt32(float float32)
float convertUInt32ToFloat(uint32_t float32Value)
double convertUInt64ToDouble(uint64_t float64Value)
uint16_t convertFloatToUInt16(float float32)
float convertUInt16ToFloat(uint16_t float16Value)