from float + tests

2024-10-01 15:00:30 -07:00 · 2024-10-01 15:00:30 -07:00 · 0d05540a6b
commit 0d05540a6b
parent 9b3dde66bd
2 changed files with 144 additions and 0 deletions
--- a/fixed_wide/src/fixed.rs
+++ b/fixed_wide/src/fixed.rs
@ -197,6 +197,97 @@ macro_rules! impl_into_float {
 impl_into_float!(f32,u32,8,24);
 impl_into_float!(f64,u64,11,53);

+#[inline]
+fn integer_decode_f32(f: f32) -> (u64, i16, bool) {
+	let bits: u32 = f.to_bits();
+	let sign: bool = bits & (1<<31) != 0;
+	let mut exponent: i16 = ((bits >> 23) & 0xff) as i16;
+	let mantissa = if exponent == 0 {
+		(bits & 0x7fffff) << 1
+	} else {
+		(bits & 0x7fffff) | 0x800000
+	};
+	// Exponent bias + mantissa shift
+	exponent -= 127 + 23;
+	(mantissa as u64, exponent, sign)
+}
+#[inline]
+fn integer_decode_f64(f: f64) -> (u64, i16, bool) {
+	let bits: u64 = f.to_bits();
+	let sign: bool = bits & (1u64<<63) != 0;
+	let mut exponent: i16 = ((bits >> 52) & 0x7ff) as i16;
+	let mantissa = if exponent == 0 {
+		(bits & 0xfffffffffffff) << 1
+	} else {
+		(bits & 0xfffffffffffff) | 0x10000000000000
+	};
+	// Exponent bias + mantissa shift
+	exponent -= 1023 + 52;
+	(mantissa, exponent, sign)
+}
+#[derive(Debug,Eq,PartialEq)]
+pub enum FixedFromFloatError{
+	Nan,
+	Infinite,
+	Overflow,
+	Underflow,
+}
+impl FixedFromFloatError{
+	pub fn underflow_to_zero<const N:usize,const F:usize>(self)->Result<Fixed<N,F>,Self>{
+		match self{
+			FixedFromFloatError::Underflow=>Ok(Fixed::ZERO),
+			_=>Err(self),
+		}
+	}
+}
+macro_rules! impl_from_float {
+	( $decode:ident, $input: ty, $mantissa_bits:expr ) => {
+		impl<const N:usize,const F:usize> TryFrom<$input> for Fixed<N,F>{
+			type Error=FixedFromFloatError;
+			#[inline]
+			fn try_from(value:$input)->Result<Self,Self::Error>{
+				const DIGIT_SHIFT:u32=6;
+				match value.classify(){
+					std::num::FpCategory::Nan=>Err(FixedFromFloatError::Nan),
+					std::num::FpCategory::Infinite=>Err(FixedFromFloatError::Infinite),
+					std::num::FpCategory::Zero=>Ok(Self::ZERO),
+					std::num::FpCategory::Subnormal
+					|std::num::FpCategory::Normal
+					=>{
+						let (m,e,s)=$decode(value);
+						let mut digits=[0u64;N];
+						let most_significant_bit=e as i32+$mantissa_bits as i32+F as i32;
+						if most_significant_bit<0{
+							return Err(FixedFromFloatError::Underflow);
+						}
+						let digit_index=most_significant_bit>>DIGIT_SHIFT;
+						let digit=digits.get_mut(digit_index as usize).ok_or(FixedFromFloatError::Overflow)?;
+						let take_bits=most_significant_bit-(digit_index<<DIGIT_SHIFT);
+						let rest_of_mantissa=-($mantissa_bits as i32-(take_bits as i32));
+						*digit=signed_shift(m,rest_of_mantissa);
+						if rest_of_mantissa<0&&digit_index!=0{
+							//we don't care if some float bits are partially truncated
+							if let Some(digit)=digits.get_mut((digit_index-1) as usize){
+								let take_bits=most_significant_bit-((digit_index-1)<<DIGIT_SHIFT);
+								let rest_of_mantissa=-($mantissa_bits as i32-(take_bits as i32));
+								*digit=signed_shift(m,rest_of_mantissa);
+							}
+						}
+						let bits=BInt::from_bits(bnum::BUint::from_digits(digits));
+						Ok(if s{
+							Self::from_bits(bits.overflowing_neg().0)
+						}else{
+							Self::from_bits(bits)
+						})
+					},
+				}
+			}
+		}
+	}
+}
+impl_from_float!(integer_decode_f32,f32,24);
+impl_from_float!(integer_decode_f64,f64,53);
+
 impl<const N:usize,const F:usize> core::fmt::Display for Fixed<N,F>{
 	#[inline]
 	fn fmt(&self,f:&mut core::fmt::Formatter)->Result<(),core::fmt::Error>{
--- a/fixed_wide/src/tests.rs
+++ b/fixed_wide/src/tests.rs
@ -37,6 +37,59 @@ fn to_f64(){
 	assert_eq!(f,237946589723468975f64*2.0f64.powi(16));
 }

+#[test]
+fn from_f32(){
+	let a=I256F256::from(1)>>2;
+	let b:Result<I256F256,_>=0.25f32.try_into();
+	assert_eq!(b,Ok(a));
+	let a=I256F256::from(-1)>>2;
+	let b:Result<I256F256,_>=(-0.25f32).try_into();
+	assert_eq!(b,Ok(a));
+	let a=I256F256::from(0);
+	let b:Result<I256F256,_>=0.try_into();
+	assert_eq!(b,Ok(a));
+	let a=I256F256::from(0b101011110101001010101010000000000000000000000000000i64)<<16;
+	let b:Result<I256F256,_>=(0b101011110101001010101010000000000000000000000000000u64 as f32*2.0f32.powi(16)).try_into();
+	assert_eq!(b,Ok(a));
+	//I32F32::MAX into f32 is truncated into this value
+	let a=I32F32::raw(0b111111111111111111111111000000000000000000000000000000000000000i64);
+	let b:Result<I32F32,_>=Into::<f32>::into(I32F32::MAX).try_into();
+	assert_eq!(b,Ok(a));
+	//I32F32::MIN hits a special case since it's not representable as a positive signed integer
+	//TODO: don't return an overflow because this is technically possible
+	let a=I32F32::MIN;
+	let b:Result<I32F32,_>=Into::<f32>::into(I32F32::MIN).try_into();
+	assert_eq!(b,Err(crate::fixed::FixedFromFloatError::Overflow));
+	//16 is within the 24 bits of float precision
+	let b:Result<I32F32,_>=Into::<f32>::into(-I32F32::MIN.fix_2()).try_into();
+	assert_eq!(b,Err(crate::fixed::FixedFromFloatError::Overflow));
+	let b:Result<I32F32,_>=f32::MIN_POSITIVE.try_into();
+	assert_eq!(b,Err(crate::fixed::FixedFromFloatError::Underflow));
+	//test many cases
+	for i in 0..64{
+		let a=crate::fixed::Fixed::<2,64>::raw_digit(0b111111111111111111111111000000000000000000000000000000000000000i64)<<i;
+		let f:f32=a.into();
+		let b:Result<crate::fixed::Fixed<2,64>,_>=f.try_into();
+		assert_eq!(b,Ok(a));
+	}
+}
+
+#[test]
+fn from_f64(){
+	let a=I256F256::from(1)>>2;
+	let b:Result<I256F256,_>=0.25f64.try_into();
+	assert_eq!(b,Ok(a));
+	let a=I256F256::from(-1)>>2;
+	let b:Result<I256F256,_>=(-0.25f64).try_into();
+	assert_eq!(b,Ok(a));
+	let a=I256F256::from(0);
+	let b:Result<I256F256,_>=0.try_into();
+	assert_eq!(b,Ok(a));
+	let a=I256F256::from(0b101011110101001010101010000000000000000000000000000i64)<<16;
+	let b:Result<I256F256,_>=(0b101011110101001010101010000000000000000000000000000u64 as f64*2.0f64.powi(16)).try_into();
+	assert_eq!(b,Ok(a));
+}
+
 #[test]
 fn you_can_shr_numbers(){
 	let a=I32F32::from(4);