8 static const uint16_t FLOAT16_SIGN_MASK = UINT16_C(0x8000);
9 static const uint16_t FLOAT16_EXPONENT_MASK = UINT16_C(0x7C00);
10 static const uint16_t FLOAT16_SIGNIFICAND_MASK = UINT16_C(0x03FF);
12 static const uint16_t FLOAT16_SIGN_BIT_POSITION = UINT16_C(15);
13 static const uint16_t FLOAT16_EXPONENT_BIT_POSITION = UINT16_C(10);
15 static const uint16_t FLOAT16_SIGNIFICAND_NUM_BITS = FLOAT16_EXPONENT_BIT_POSITION;
17 static const uint16_t FLOAT16_EXPONENT_INFINITY_NAN = UINT16_C(0x001F);
18 static const uint16_t FLOAT16_EXPONENT_BIAS = UINT16_C(15);
20 static const uint32_t FLOAT32_SIGN_MASK = UINT32_C(0x80000000);
21 static const uint32_t FLOAT32_EXPONENT_MASK = UINT32_C(0x7F800000);
22 static const uint32_t FLOAT32_SIGNIFICAND_MASK = UINT32_C(0x007FFFFF);
24 static const uint32_t FLOAT32_SIGN_BIT_POSITION = UINT32_C(31);
25 static const uint32_t FLOAT32_EXPONENT_BIT_POSITION = UINT32_C(23);
27 static const uint32_t FLOAT32_SIGNIFICAND_NUM_BITS = FLOAT32_EXPONENT_BIT_POSITION;
29 static const uint32_t FLOAT32_EXPONENT_INFINITY_NAN = UINT32_C(0x00FF);
30 static const uint32_t FLOAT32_EXPONENT_BIAS = UINT32_C(127);
35 const uint16_t sign16Shifted = (float16Value & FLOAT16_SIGN_MASK);
36 const uint16_t exponent16 =
static_cast<uint16_t
>(
37 static_cast<uint16_t
>(float16Value & FLOAT16_EXPONENT_MASK) >> FLOAT16_EXPONENT_BIT_POSITION);
38 const uint16_t significand16 = (float16Value & FLOAT16_SIGNIFICAND_MASK);
41 uint32_t significand32 =
static_cast<uint32_t
>(significand16)
42 << (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS);
45 uint32_t exponent32 = 0;
48 if (significand32 != 0)
51 exponent32 = 1 + FLOAT32_EXPONENT_BIAS - FLOAT16_EXPONENT_BIAS;
53 while ((significand32 & (FLOAT32_SIGNIFICAND_MASK + 1)) == 0)
59 significand32 &= FLOAT32_SIGNIFICAND_MASK;
62 else if (exponent16 == FLOAT16_EXPONENT_INFINITY_NAN)
65 exponent32 = FLOAT32_EXPONENT_INFINITY_NAN;
70 exponent32 =
static_cast<uint32_t
>(exponent16) - FLOAT16_EXPONENT_BIAS + FLOAT32_EXPONENT_BIAS;
74 const uint32_t sign32Shifted =
static_cast<uint32_t
>(sign16Shifted)
75 << (FLOAT32_SIGN_BIT_POSITION - FLOAT16_SIGN_BIT_POSITION);
76 const uint32_t exponent32Shifted = exponent32 << FLOAT32_EXPONENT_BIT_POSITION;
77 const uint32_t float32Value = sign32Shifted | exponent32Shifted | significand32;
88 const uint32_t sign32Shifted = (float32Value & FLOAT32_SIGN_MASK);
89 const uint32_t exponent32 = (float32Value & FLOAT32_EXPONENT_MASK) >> FLOAT32_EXPONENT_BIT_POSITION;
90 const uint32_t significand32 = (float32Value & FLOAT32_SIGNIFICAND_MASK);
93 uint16_t significand16 =
static_cast<uint16_t
>(
94 (significand32 >> (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS)));
97 bool needsRounding =
false;
98 uint16_t exponent16 = 0;
101 if (significand32 != 0)
107 else if (exponent32 == FLOAT32_EXPONENT_INFINITY_NAN)
110 exponent16 = FLOAT16_EXPONENT_INFINITY_NAN;
115 const int16_t signedExponent16 =
static_cast<int16_t
>(
static_cast<int32_t
>(exponent32) -
116 static_cast<int32_t
>(FLOAT32_EXPONENT_BIAS) +
static_cast<int32_t
>(FLOAT16_EXPONENT_BIAS));
117 if (signedExponent16 > FLOAT16_EXPONENT_INFINITY_NAN)
120 exponent16 = FLOAT16_EXPONENT_INFINITY_NAN;
122 else if (signedExponent16 <= 0)
125 if (signedExponent16 <=
static_cast<int16_t
>(-FLOAT16_SIGNIFICAND_NUM_BITS))
133 const uint32_t fullSignificand32 = significand32 | (FLOAT32_SIGNIFICAND_MASK + 1);
134 const uint32_t significandShift =
static_cast<uint32_t
>(1 - signedExponent16);
135 significand16 =
static_cast<uint16_t
>(fullSignificand32 >>
136 (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS + significandShift));
139 ((fullSignificand32 >> (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS +
140 significandShift - 1)) &
147 exponent16 =
static_cast<uint16_t
>(signedExponent16);
149 ((significand32 >> (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS - 1)) &
155 const uint16_t sign16Shifted =
156 static_cast<uint16_t
>(sign32Shifted >> (FLOAT32_SIGN_BIT_POSITION - FLOAT16_SIGN_BIT_POSITION));
157 const uint16_t exponent16Shifted =
static_cast<uint16_t
>(exponent16 << FLOAT16_EXPONENT_BIT_POSITION);
158 uint16_t float16Value =
static_cast<uint16_t
>(sign16Shifted | exponent16Shifted) | significand16;
171 float convertedFloat = 0.0F;
172 (void)std::memcpy(&convertedFloat, &float32Value,
sizeof(uint32_t));
174 return convertedFloat;
179 uint32_t float32Value = 0;
180 (void)std::memcpy(&float32Value, &float32,
sizeof(
float));
187 double convertedDouble = 0.0;
188 (void)std::memcpy(&convertedDouble, &float64Value,
sizeof(uint64_t));
190 return convertedDouble;
195 uint64_t float64Value = 0;
196 (void)std::memcpy(&float64Value, &float64,
sizeof(
double));
uint64_t convertDoubleToUInt64(double float64)
uint32_t convertFloatToUInt32(float float32)
float convertUInt32ToFloat(uint32_t float32Value)
double convertUInt64ToDouble(uint64_t float64Value)
uint16_t convertFloatToUInt16(float float32)
float convertUInt16ToFloat(uint16_t float16Value)