FloatUtilTest.java
package zserio.runtime;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.Test;
public class FloatUtilTest
{
@Test
public void convertShortToFloat()
{
// plus zero
final short float16ValuePlusZero = createFloat16Value((short)0, (short)0, (short)0); // +0.0
checkFloat16ToFloat32Conversion(float16ValuePlusZero, 0.0f);
// minus zero
final short float16ValueMinusZero = createFloat16Value((short)1, (short)0, (short)0); // -0.0
checkFloat16ToFloat32Conversion(float16ValueMinusZero, -0.0f);
// plus infinity
final short float16ValuePlusInfinity = createFloat16Value((short)0, (short)0x1F, (short)0); // +INF
final int float32ValuePlusInfinity = createFloat32Value(0, 0xFF, 0); // +INF
checkFloat16ToFloat32Conversion(float16ValuePlusInfinity, float32ValuePlusInfinity);
// minus infinity
final short float16ValueMinusInfinity = createFloat16Value((short)1, (short)0x1F, (short)0); // -INF
final int float32ValueMinusInfinity = createFloat32Value(1, 0xFF, 0); // -INF
checkFloat16ToFloat32Conversion(float16ValueMinusInfinity, float32ValueMinusInfinity);
// quiet NaN (Java uses only the 1st significand bit in NaN)
final short float16ValueQuietNan = createFloat16Value((short)0, (short)0x1F, (short)0x3FF); // +NaN
final int float32ValueQuietNan = createFloat32Value(0, 0xFF, 0x400000); // +NaN
checkFloat16ToFloat32Conversion(float16ValueQuietNan, float32ValueQuietNan);
// signaling NaN (Java uses only quiet NaN)
// -NaN
final short float16ValueSignalingNan = createFloat16Value((short)1, (short)0x1F, (short)0x3FF);
checkFloat16ToFloat32Conversion(float16ValueSignalingNan, float32ValueQuietNan);
// normal numbers
final short float16ValueOne = createFloat16Value((short)0, (short)15, (short)0); // 1.0
checkFloat16ToFloat32Conversion(float16ValueOne, 1.0f);
// 1.0 + 2^-10
final short float16ValueOnePlus = createFloat16Value((short)0, (short)15, (short)0x01);
// 1.0 + 2^-10
final int float32ValueOnePlus = createFloat32Value(0, 127, 0x2000);
checkFloat16ToFloat32Conversion(float16ValueOnePlus, float32ValueOnePlus);
// 2^15 (1 + 2^-1 + ... + 2^-10)
final short float16ValueMax = createFloat16Value((short)0, (short)30, (short)0x3FF);
checkFloat16ToFloat32Conversion(float16ValueMax, 65504.0f);
// subnormal numbers
// 2^-14 (2^-10)
final short float16ValueMinSubnormal = createFloat16Value((short)0, (short)0, (short)1);
// 2^-24
final int float32ValueMinSubnormal = createFloat32Value(0, 103, 0);
checkFloat16ToFloat32Conversion(float16ValueMinSubnormal, float32ValueMinSubnormal);
// 2^-14 (2^-1 + ... + 2^-10)
final short float16ValueMaxSubnormal = createFloat16Value((short)0, (short)0, (short)0x3FF);
// 2^-15 (1 + 2^-1 + ... + 2^-9)
final int float32ValueMaxSubnormal = createFloat32Value(0, 112, 0x7FC000);
checkFloat16ToFloat32Conversion(float16ValueMaxSubnormal, float32ValueMaxSubnormal);
}
@Test
public void convertFloatToShort()
{
// plus zero
final short float16ValuePlusZero = createFloat16Value((short)0, (short)0, (short)0); // +0.0
checkFloat32ToFloat16Conversion(0.0f, float16ValuePlusZero);
// minus zero
final short float16ValueMinusZero = createFloat16Value((short)1, (short)0, (short)0); // -0.0
checkFloat32ToFloat16Conversion(-0.0f, float16ValueMinusZero);
// plus infinity
final int float32ValuePlusInfinity = createFloat32Value(0, 0xFF, 0); // +INF
final short float16ValuePlusInfinity = createFloat16Value((short)0, (short)0x1F, (short)0); // +INF
checkFloat32ToFloat16Conversion(float32ValuePlusInfinity, float16ValuePlusInfinity);
// minus infinity
final int float32ValueMinusInfinity = createFloat32Value(1, 0xFF, 0); // -INF
final short float16ValueMinusInfinity = createFloat16Value((short)1, (short)0x1F, (short)0); // -INF
checkFloat32ToFloat16Conversion(float32ValueMinusInfinity, float16ValueMinusInfinity);
// quiet NaN (Java uses only the 1st significand bit in NaN)
final int float32ValueQuietNan = createFloat32Value(0, 0xFF, 0x7FE000); // +NaN
final short float16ValueQuietNan = createFloat16Value((short)0, (short)0x1F, (short)0x200); // +NaN
checkFloat32ToFloat16Conversion(float32ValueQuietNan, float16ValueQuietNan);
// signaling NaN (Java uses only quiet NaN)
final int float32ValueSignalingNan = createFloat32Value(1, 0xFF, 0x7FE000); // -NaN
checkFloat32ToFloat16Conversion(float32ValueSignalingNan, float16ValueQuietNan);
// normal numbers
final short float16ValueOne = createFloat16Value((short)0, (short)15, (short)0); // 1.0
checkFloat32ToFloat16Conversion(1.0f, float16ValueOne);
final int float32ValueOnePlus = createFloat32Value(0, 127, 0x2000); // 1.0 + 2^-10
final short float16ValueOnePlus = createFloat16Value((short)0, (short)15, (short)0x01); // 1.0 + 2^-10
checkFloat32ToFloat16Conversion(float32ValueOnePlus, float16ValueOnePlus);
final short float16ValueMax =
createFloat16Value((short)0, (short)30, (short)0x3FF); // 2^15 (1 + 2^-1 + ... + 2^-10)
checkFloat32ToFloat16Conversion(65504.0f, float16ValueMax);
// normal numbers converted to zero
final int float32ValueUnderflow = createFloat32Value(0, 102, 0); // 2^-25
checkFloat32ToFloat16Conversion(float32ValueUnderflow, float16ValuePlusZero);
// normal numbers converted to subnormal numbers
final int float32ValueMinUnderflow = createFloat32Value(0, 103, 1); // 2^-24 (1 + 2^-23)
final short float16ValueMinSubnormal = createFloat16Value((short)0, (short)0, (short)1); // 2^-24
checkFloat32ToFloat16Conversion(float32ValueMinUnderflow, float16ValueMinSubnormal);
// normal numbers converted to subnormal numbers with rounding
final int float32ValueMinUnderflowRounding = createFloat32Value(0, 104, 0x200000); // 2^-23 (1 + 2^-2)
final short float16ValueMinSubnormalRounding =
createFloat16Value((short)0, (short)0, (short)0x3); // 2^-14 (2^-9 + 2^-10)
checkFloat32ToFloat16Conversion(float32ValueMinUnderflowRounding, float16ValueMinSubnormalRounding);
// normal numbers converted to infinity
final int float32ValueOverflow = createFloat32Value(0, 144, 0); // 2^17
checkFloat32ToFloat16Conversion(float32ValueOverflow, float16ValuePlusInfinity);
// normal numbers converted with rounding
final int float32ValueRounding = createFloat32Value(0, 127, 0x401000); // 1 + 2^-1 + 2^-11
final short float16ValueRounding =
createFloat16Value((short)0, (short)15, (short)0x201); // 1 + 2^-1 + 2^-10
checkFloat32ToFloat16Conversion(float32ValueRounding, float16ValueRounding);
// subnormal numbers
final int float32ValueMinSubnormal = createFloat32Value(0, 0, 1); // 2^-126 (2^-23)
checkFloat32ToFloat16Conversion(float32ValueMinSubnormal, float16ValuePlusZero);
// 2^-126 (2^-1 + ... + 2^-23)
final int float32ValueMaxSubnormal = createFloat32Value(0, 0, 0x007FFFFF);
checkFloat32ToFloat16Conversion(float32ValueMaxSubnormal, float16ValuePlusZero);
}
@Test
public void convertIntToFloat()
{
for (TestFloat32Element testElement : TEST_FLOAT32_DATA)
{
final int float32Value =
createFloat32Value(testElement.sign, testElement.exponent, testElement.significand);
final float convertedFloat = FloatUtil.convertIntToFloat(float32Value);
assertEquals(Float.toString(testElement.expectedFloat), Float.toString(convertedFloat));
}
}
@Test
public void convertFloatToInt()
{
for (TestFloat32Element testElement : TEST_FLOAT32_DATA)
{
final int convertedFloatValue = FloatUtil.convertFloatToInt(testElement.expectedFloat);
final int expectedFloatValue =
createFloat32Value(testElement.sign, testElement.exponent, testElement.significand);
assertEquals(expectedFloatValue, convertedFloatValue);
}
}
@Test
public void convertLongToDouble()
{
for (TestFloat64Element testElement : TEST_FLOAT64_DATA)
{
final long float64Value =
createFloat64Value(testElement.sign, testElement.exponent, testElement.significand);
final double convertedDouble = FloatUtil.convertLongToDouble(float64Value);
assertEquals(Double.toString(testElement.expectedDouble), Double.toString(convertedDouble));
}
}
@Test
public void convertDoubleToLong()
{
for (TestFloat64Element testElement : TEST_FLOAT64_DATA)
{
final long convertedDoubleValue = FloatUtil.convertDoubleToLong(testElement.expectedDouble);
final long expectedDoubleValue =
createFloat64Value(testElement.sign, testElement.exponent, testElement.significand);
assertEquals(expectedDoubleValue, convertedDoubleValue);
}
}
private static short createFloat16Value(short sign, short exponent, short significand)
{
return (short)((sign << FLOAT16_SIGN_BIT_POSITION) | (exponent << FLOAT16_EXPONENT_BIT_POSITION) |
significand);
}
private static int createFloat32Value(int sign, int exponent, int significand)
{
return (sign << FLOAT32_SIGN_BIT_POSITION) | (exponent << FLOAT32_EXPONENT_BIT_POSITION) | significand;
}
private static long createFloat64Value(long sign, long exponent, long significand)
{
return (sign << FLOAT64_SIGN_BIT_POSITION) | (exponent << FLOAT64_EXPONENT_BIT_POSITION) | significand;
}
private static void checkFloat16ToFloat32Conversion(short float16Value, int expectedFloat32Value)
{
final float float32 = FloatUtil.convertShortToFloat(float16Value);
assertEquals(expectedFloat32Value, Float.floatToIntBits(float32));
}
private static void checkFloat16ToFloat32Conversion(short float16Value, float expectedFloat32)
{
assertEquals(
Float.toString(expectedFloat32), Float.toString(FloatUtil.convertShortToFloat(float16Value)));
}
private static void checkFloat32ToFloat16Conversion(int float32Value, short expectedFloat16Value)
{
final float float32 = Float.intBitsToFloat(float32Value);
assertEquals(expectedFloat16Value, FloatUtil.convertFloatToShort(float32));
}
private static void checkFloat32ToFloat16Conversion(float float32, short expectedFloat16Value)
{
assertEquals(expectedFloat16Value, FloatUtil.convertFloatToShort(float32));
}
private static class TestFloat32Element
{
public TestFloat32Element(int sign, int exponent, int significand, float expectedFloat)
{
this.sign = sign;
this.exponent = exponent;
this.significand = significand;
this.expectedFloat = expectedFloat;
}
public int sign;
public int exponent;
public int significand;
public float expectedFloat;
};
private static class TestFloat64Element
{
public TestFloat64Element(long sign, long exponent, long significand, double expectedDouble)
{
this.sign = sign;
this.exponent = exponent;
this.significand = significand;
this.expectedDouble = expectedDouble;
}
public long sign;
public long exponent;
public long significand;
public double expectedDouble;
};
private static final TestFloat32Element TEST_FLOAT32_DATA[] = {
new TestFloat32Element(0, 0, 0, 0.0f),
new TestFloat32Element(1, 0, 0, -0.0f),
new TestFloat32Element(0, 127, 0, +1.0f),
new TestFloat32Element(1, 127, 0, -1.0f),
// 2^1 (1 + 2^-1 + 2^-2)
new TestFloat32Element(0, 128, 0x600000, 3.5f),
// 2^-1 (1 + 2^-1 + 2^-2)
new TestFloat32Element(0, 126, 0x600000, 0.875f),
// 2^3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6)
new TestFloat32Element(0, 130, 0x1E0000, 9.875f),
// 2^-3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6)
new TestFloat32Element(0, 126, 0x1E0000, 0.6171875f),
};
private static final TestFloat64Element TEST_FLOAT64_DATA[] = {
new TestFloat64Element(0, 0, 0, 0.0),
new TestFloat64Element(1, 0, 0, -0.0),
new TestFloat64Element(0, 1023, 0, +1.0f),
new TestFloat64Element(1, 1023, 0, -1.0f),
// 2^1 (1 + 2^-1 + 2^-2)
new TestFloat64Element(0, 1024, 0xC000000000000L, 3.5f),
// 2^-1 (1 + 2^-1 + 2^-2)
new TestFloat64Element(0, 1022, 0xC000000000000L, 0.875f),
// 2^3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6)
new TestFloat64Element(0, 1026, 0x3C00000000000L, 9.875f),
// 2^-3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6)
new TestFloat64Element(0, 1022, 0x3C00000000000L, 0.6171875f),
};
private static final short FLOAT16_SIGN_BIT_POSITION = 15;
private static final short FLOAT16_EXPONENT_BIT_POSITION = 10;
private static final int FLOAT32_SIGN_BIT_POSITION = 31;
private static final int FLOAT32_EXPONENT_BIT_POSITION = 23;
private static final long FLOAT64_SIGN_BIT_POSITION = 63;
private static final long FLOAT64_EXPONENT_BIT_POSITION = 52;
};