Coverage for /home/runner/work/zserio/zserio/compiler/extensions/python/runtime/src/zserio/float.py: 100%

82 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-12-05 10:43 +0000

1""" 

2The module provides help methods for manipulation with float numbers. 

3 

4The following float formats defined by IEEE 754 standard are supported: 

5 

6* half precision float point format (https://en.wikipedia.org/wiki/Half-precision_floating-point_format) 

7* single precision float point format (https://en.wikipedia.org/wiki/Single-precision_floating-point_format) 

8* double precision float point format (https://en.wikipedia.org/wiki/Double-precision_floating-point_format) 

9""" 

10 

11import struct 

12 

13 

14def uint16_to_float(float16_value: int) -> float: 

15 """ 

16 Converts 16-bit float stored as an integer value to python native float. 

17 

18 :param float16_value: Half precision float value stored as an integer value to convert. 

19 :returns: Converted python native float. 

20 """ 

21 

22 # decompose half precision float (float16) 

23 sign16_shifted = float16_value & FLOAT16_SIGN_MASK 

24 exponent16 = (float16_value & FLOAT16_EXPONENT_MASK) >> FLOAT16_EXPONENT_BIT_POSITION 

25 significand16 = float16_value & FLOAT16_SIGNIFICAND_MASK 

26 

27 # calculate significand for single precision float (float32) 

28 significand32 = significand16 << (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS) 

29 

30 # calculate exponent for single precision float (float32) 

31 if exponent16 == 0: 

32 if significand32 != 0: 

33 # subnormal (denormal) number will be normalized 

34 exponent32 = 1 + FLOAT32_EXPONENT_BIAS - FLOAT16_EXPONENT_BIAS # exp is initialized by -14 

35 # shift significand until leading bit overflows into exponent bit 

36 while (significand32 & (FLOAT32_SIGNIFICAND_MASK + 1)) == 0: 

37 exponent32 = exponent32 - 1 

38 significand32 <<= 1 

39 

40 # mask out overflowed leading bit from significand (normalized has implicit leading bit 1) 

41 significand32 &= FLOAT32_SIGNIFICAND_MASK 

42 else: 

43 # zero 

44 exponent32 = 0 

45 elif exponent16 == FLOAT16_EXPONENT_INFINITY_NAN: 

46 # infinity or NaN 

47 exponent32 = FLOAT32_EXPONENT_INFINITY_NAN 

48 else: 

49 # normal number 

50 exponent32 = exponent16 - FLOAT16_EXPONENT_BIAS + FLOAT32_EXPONENT_BIAS 

51 

52 # compose single precision float (float32) 

53 sign32_shifted = sign16_shifted << (FLOAT32_SIGN_BIT_POSITION - FLOAT16_SIGN_BIT_POSITION) 

54 exponent32_shifted = exponent32 << FLOAT32_EXPONENT_BIT_POSITION 

55 float32_value = sign32_shifted | exponent32_shifted | significand32 

56 

57 # convert it to float 

58 return uint32_to_float(float32_value) 

59 

60 

61def float_to_uint16(float64: float) -> int: 

62 """ 

63 Converts python native float to 16-bit float stored as integer value. 

64 

65 :param float64: Python native float to convert. 

66 :returns: Converted half precision float value stored as an integer value. 

67 """ 

68 

69 float32_value = float_to_uint32(float64) 

70 

71 # decompose single precision float (float32) 

72 sign32_shifted = float32_value & FLOAT32_SIGN_MASK 

73 exponent32 = (float32_value & FLOAT32_EXPONENT_MASK) >> FLOAT32_EXPONENT_BIT_POSITION 

74 significand32 = float32_value & FLOAT32_SIGNIFICAND_MASK 

75 

76 # calculate significand for half precision float (float16) 

77 significand16 = significand32 >> (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS) 

78 

79 # calculate exponent for half precision float (float16) 

80 needs_rounding = False 

81 if exponent32 == 0: 

82 if significand32 != 0: 

83 # subnormal (denormal) number will be zero 

84 significand16 = 0 

85 exponent16 = 0 

86 elif exponent32 == FLOAT32_EXPONENT_INFINITY_NAN: 

87 # infinity or NaN 

88 exponent16 = FLOAT16_EXPONENT_INFINITY_NAN 

89 else: 

90 # normal number 

91 signed_exponent16 = exponent32 - FLOAT32_EXPONENT_BIAS + FLOAT16_EXPONENT_BIAS 

92 if signed_exponent16 > FLOAT16_EXPONENT_INFINITY_NAN: 

93 # exponent overflow, set infinity or NaN 

94 exponent16 = FLOAT16_EXPONENT_INFINITY_NAN 

95 elif signed_exponent16 <= 0: 

96 # exponent underflow 

97 if signed_exponent16 <= -FLOAT16_SIGNIFICAND_NUM_BITS: 

98 # too big underflow, set to zero 

99 exponent16 = 0 

100 significand16 = 0 

101 else: 

102 # we can still use subnormal numbers 

103 exponent16 = 0 

104 full_significand32 = significand32 | (FLOAT32_SIGNIFICAND_MASK + 1) 

105 significand_shift = 1 - signed_exponent16 

106 significand16 = full_significand32 >> ( 

107 FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS + significand_shift 

108 ) 

109 

110 needs_rounding = ( 

111 ( 

112 full_significand32 

113 >> (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS + significand_shift - 1) 

114 ) 

115 & 1 

116 ) != 0 

117 else: 

118 # exponent ok 

119 exponent16 = signed_exponent16 

120 needs_rounding = ( 

121 (significand32 >> (FLOAT32_SIGNIFICAND_NUM_BITS - FLOAT16_SIGNIFICAND_NUM_BITS - 1)) & 1 

122 ) != 0 

123 

124 # compose half precision float (float16) 

125 sign16_shifted = sign32_shifted >> (FLOAT32_SIGN_BIT_POSITION - FLOAT16_SIGN_BIT_POSITION) 

126 exponent16_shifted = exponent16 << FLOAT16_EXPONENT_BIT_POSITION 

127 float16_value = sign16_shifted | exponent16_shifted | significand16 

128 

129 # check rounding 

130 if needs_rounding: 

131 float16_value += 1 # might overflow to infinity 

132 

133 return float16_value 

134 

135 

136def uint32_to_float(float32_value: int) -> float: 

137 """ 

138 Converts 32-bit float stored as an integer value to python native float. 

139 

140 :param float32_value: Single precision float value stored as an integer value to convert. 

141 :returns: Converted python native float. 

142 """ 

143 

144 float32_value_in_bytes = float32_value.to_bytes(4, byteorder="big") 

145 

146 return struct.unpack(">f", float32_value_in_bytes)[0] 

147 

148 

149def float_to_uint32(float64: float) -> int: 

150 """ 

151 Converts python native float to 32-bit float stored as integer value. 

152 

153 :param float64: Python native float to convert. 

154 :returns: Converted single precision float value stored as an integer value. 

155 """ 

156 

157 float32_value_in_bytes = struct.pack(">f", float64) 

158 

159 return int.from_bytes(float32_value_in_bytes, byteorder="big") 

160 

161 

162def uint64_to_float(float64_value: int) -> float: 

163 """ 

164 Converts 64-bit float stored as an integer value to python native float. 

165 

166 :param float64_value: Double precision float value stored as an integer value to convert. 

167 :returns: Converted python native float. 

168 """ 

169 

170 float64_value_in_bytes = float64_value.to_bytes(8, byteorder="big") 

171 

172 return struct.unpack(">d", float64_value_in_bytes)[0] 

173 

174 

175def float_to_uint64(float64: float) -> int: 

176 """ 

177 Converts python native float to 64-bit float stored as integer value. 

178 

179 :param float64: Python native float to convert. 

180 :returns: Converted double precision float value stored as an integer value. 

181 """ 

182 

183 float64_value_in_bytes = struct.pack(">d", float64) 

184 

185 return int.from_bytes(float64_value_in_bytes, byteorder="big") 

186 

187 

188FLOAT16_SIGN_MASK = 0x8000 

189FLOAT16_EXPONENT_MASK = 0x7C00 

190FLOAT16_SIGNIFICAND_MASK = 0x03FF 

191 

192FLOAT16_SIGN_BIT_POSITION = 15 

193FLOAT16_EXPONENT_BIT_POSITION = 10 

194 

195FLOAT16_SIGNIFICAND_NUM_BITS = FLOAT16_EXPONENT_BIT_POSITION 

196 

197FLOAT16_EXPONENT_INFINITY_NAN = 0x001F 

198FLOAT16_EXPONENT_BIAS = 15 

199 

200FLOAT32_SIGN_MASK = 0x80000000 

201FLOAT32_EXPONENT_MASK = 0x7F800000 

202FLOAT32_SIGNIFICAND_MASK = 0x007FFFFF 

203 

204FLOAT32_SIGN_BIT_POSITION = 31 

205FLOAT32_EXPONENT_BIT_POSITION = 23 

206 

207FLOAT32_SIGNIFICAND_NUM_BITS = FLOAT32_EXPONENT_BIT_POSITION 

208 

209FLOAT32_EXPONENT_INFINITY_NAN = 0x00FF 

210FLOAT32_EXPONENT_BIAS = 127