Source code for zserio.float
"""
The module provides help methods for manipulation with float numbers.
The following float formats defined by IEEE 754 standard are supported:
* half precision float point format (https://en.wikipedia.org/wiki/Half-precision_floating-point_format)
* single precision float point format (https://en.wikipedia.org/wiki/Single-precision_floating-point_format)
* double precision float point format (https://en.wikipedia.org/wiki/Double-precision_floating-point_format)
"""
import struct
[docs]def uint16_to_float(float16_value: int) -> float:
"""
Converts 16-bit float stored as an integer value to python native float.
:param float16_value: Half precision float value stored as an integer value to convert.
:returns: Converted python native float.
"""
# decompose half precision float (float16)
sign16_shifted = float16_value & FLOAT16_SIGN_MASK
exponent16 = (float16_value & FLOAT16_EXPONENT_MASK) >> FLOAT16_EXPONENT_BIT_POSITION
significand16 = float16_value & FLOAT16_SIGNIFICAND_MASK
# calculate significand for single precision float (float32)
significand32 = significand16 << (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS)
# calculate exponent for single precision float (float32)
if exponent16 == 0:
if significand32 != 0:
# subnormal (denormal) number will be normalized
exponent32 = 1 + FLOAT32_EXPONENT_BIAS - FLOAT16_EXPONENT_BIAS # exp is initialized by -14
# shift significand until leading bit overflows into exponent bit
while (significand32 & (FLOAT32_SIGNIFICAND_MASK + 1)) == 0:
exponent32 = exponent32 - 1
significand32 <<= 1
# mask out overflowed leading bit from significand (normalized has implicit leading bit 1)
significand32 &= FLOAT32_SIGNIFICAND_MASK
else:
# zero
exponent32 = 0
elif exponent16 == FLOAT16_EXPONENT_INFINITY_NAN:
# infinity or NaN
exponent32 = FLOAT32_EXPONENT_INFINITY_NAN
else:
# normal number
exponent32 = exponent16 - FLOAT16_EXPONENT_BIAS + FLOAT32_EXPONENT_BIAS
# compose single precision float (float32)
sign32_shifted = sign16_shifted << (FLOAT32_SIGN_BIT_POSITION - FLOAT16_SIGN_BIT_POSITION)
exponent32_shifted = exponent32 << FLOAT32_EXPONENT_BIT_POSITION
float32_value = sign32_shifted | exponent32_shifted | significand32
# convert it to float
return uint32_to_float(float32_value)
[docs]def float_to_uint16(float64: float) -> int:
"""
Converts python native float to 16-bit float stored as integer value.
:param float64: Python native float to convert.
:returns: Converted half precision float value stored as an integer value.
"""
float32_value = float_to_uint32(float64)
# decompose single precision float (float32)
sign32_shifted = float32_value & FLOAT32_SIGN_MASK
exponent32 = (float32_value & FLOAT32_EXPONENT_MASK) >> FLOAT32_EXPONENT_BIT_POSITION
significand32 = float32_value & FLOAT32_SIGNIFICAND_MASK
# calculate significand for half precision float (float16)
significand16 = significand32 >> (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS)
# calculate exponent for half precision float (float16)
needs_rounding = False
if exponent32 == 0:
if significand32 != 0:
# subnormal (denormal) number will be zero
significand16 = 0
exponent16 = 0
elif exponent32 == FLOAT32_EXPONENT_INFINITY_NAN:
# infinity or NaN
exponent16 = FLOAT16_EXPONENT_INFINITY_NAN
else:
# normal number
signed_exponent16 = exponent32 - FLOAT32_EXPONENT_BIAS + FLOAT16_EXPONENT_BIAS
if signed_exponent16 > FLOAT16_EXPONENT_INFINITY_NAN:
# exponent overflow, set infinity or NaN
exponent16 = FLOAT16_EXPONENT_INFINITY_NAN
elif signed_exponent16 <= 0:
# exponent underflow
if signed_exponent16 <= -FLOAT16_SIGNIFICAND_NUM_BITS:
# too big underflow, set to zero
exponent16 = 0
significand16 = 0
else:
# we can still use subnormal numbers
exponent16 = 0
full_significand32 = significand32 | (FLOAT32_SIGNIFICAND_MASK + 1)
significand_shift = 1 - signed_exponent16
significand16 = full_significand32 >> (
FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS + significand_shift
)
needs_rounding = (
(
full_significand32
>> (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS + significand_shift - 1)
)
& 1
) != 0
else:
# exponent ok
exponent16 = signed_exponent16
needs_rounding = (
(significand32 >> (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS - 1)) & 1
) != 0
# compose half precision float (float16)
sign16_shifted = sign32_shifted >> (FLOAT32_SIGN_BIT_POSITION - FLOAT16_SIGN_BIT_POSITION)
exponent16_shifted = exponent16 << FLOAT16_EXPONENT_BIT_POSITION
float16_value = sign16_shifted | exponent16_shifted | significand16
# check rounding
if needs_rounding:
float16_value += 1 # might overflow to infinity
return float16_value
[docs]def uint32_to_float(float32_value: int) -> float:
"""
Converts 32-bit float stored as an integer value to python native float.
:param float32_value: Single precision float value stored as an integer value to convert.
:returns: Converted python native float.
"""
float32_value_in_bytes = float32_value.to_bytes(4, byteorder="big")
return struct.unpack(">f", float32_value_in_bytes)[0]
[docs]def float_to_uint32(float64: float) -> int:
"""
Converts python native float to 32-bit float stored as integer value.
:param float64: Python native float to convert.
:returns: Converted single precision float value stored as an integer value.
"""
float32_value_in_bytes = struct.pack(">f", float64)
return int.from_bytes(float32_value_in_bytes, byteorder="big")
[docs]def uint64_to_float(float64_value: int) -> float:
"""
Converts 64-bit float stored as an integer value to python native float.
:param float64_value: Double precision float value stored as an integer value to convert.
:returns: Converted python native float.
"""
float64_value_in_bytes = float64_value.to_bytes(8, byteorder="big")
return struct.unpack(">d", float64_value_in_bytes)[0]
[docs]def float_to_uint64(float64: float) -> int:
"""
Converts python native float to 64-bit float stored as integer value.
:param float64: Python native float to convert.
:returns: Converted double precision float value stored as an integer value.
"""
float64_value_in_bytes = struct.pack(">d", float64)
return int.from_bytes(float64_value_in_bytes, byteorder="big")
FLOAT16_SIGN_MASK = 0x8000
FLOAT16_EXPONENT_MASK = 0x7C00
FLOAT16_SIGNIFICAND_MASK = 0x03FF
FLOAT16_SIGN_BIT_POSITION = 15
FLOAT16_EXPONENT_BIT_POSITION = 10
FLOAT16_SIGNIFICAND_NUM_BITS = FLOAT16_EXPONENT_BIT_POSITION
FLOAT16_EXPONENT_INFINITY_NAN = 0x001F
FLOAT16_EXPONENT_BIAS = 15
FLOAT32_SIGN_MASK = 0x80000000
FLOAT32_EXPONENT_MASK = 0x7F800000
FLOAT32_SIGNIFICAND_MASK = 0x007FFFFF
FLOAT32_SIGN_BIT_POSITION = 31
FLOAT32_EXPONENT_BIT_POSITION = 23
FLOAT32_SIGNIFICAND_NUM_BITS = FLOAT32_EXPONENT_BIT_POSITION
FLOAT32_EXPONENT_INFINITY_NAN = 0x00FF
FLOAT32_EXPONENT_BIAS = 127