rewrite from float

2024-10-02 12:32:41 -07:00 · 2024-10-02 12:32:41 -07:00 · fa547050e2
commit fa547050e2
parent 64e44846aa
2 changed files with 59 additions and 52 deletions
--- a/fixed_wide/src/fixed.rs
+++ b/fixed_wide/src/fixed.rs
@ -197,34 +197,6 @@ macro_rules! impl_into_float {
 impl_into_float!(f32,u32,8,24);
 impl_into_float!(f64,u64,11,53);

-#[inline]
-fn integer_decode_f32(f: f32) -> (u64, i16, bool) {
-	let bits: u32 = f.to_bits();
-	let sign: bool = bits & (1<<31) != 0;
-	let mut exponent: i16 = ((bits >> 23) & 0xff) as i16;
-	let mantissa = if exponent == 0 {
-		(bits & 0x7fffff) << 1
-	} else {
-		(bits & 0x7fffff) | 0x800000
-	};
-	// Exponent bias + mantissa shift
-	exponent -= 127 + 23;
-	(mantissa as u64, exponent, sign)
-}
-#[inline]
-fn integer_decode_f64(f: f64) -> (u64, i16, bool) {
-	let bits: u64 = f.to_bits();
-	let sign: bool = bits & (1u64<<63) != 0;
-	let mut exponent: i16 = ((bits >> 52) & 0x7ff) as i16;
-	let mantissa = if exponent == 0 {
-		(bits & 0xfffffffffffff) << 1
-	} else {
-		(bits & 0xfffffffffffff) | 0x10000000000000
-	};
-	// Exponent bias + mantissa shift
-	exponent -= 1023 + 52;
-	(mantissa, exponent, sign)
-}
 #[derive(Debug,Eq,PartialEq)]
 pub enum FixedFromFloatError{
 	Nan,
@ -240,13 +212,19 @@ impl FixedFromFloatError{
 		}
 	}
 }
+
+struct FloatInfo{
+	sign:bool,
+	digit_index:usize,
+	bits:[u64;2],
+}
+
 macro_rules! impl_from_float {
-	( $decode:ident, $input: ty, $mantissa_bits:expr ) => {
+	( $input: ty, $unsigned: ty, $exponent_bits:expr, $mantissa_bits:expr, $exp_bias:expr ) => {
 		impl<const N:usize,const F:usize> TryFrom<$input> for Fixed<N,F>{
 			type Error=FixedFromFloatError;
 			#[inline]
 			fn try_from(value:$input)->Result<Self,Self::Error>{
-				const DIGIT_SHIFT:u32=6;
 				match value.classify(){
 					std::num::FpCategory::Nan=>Err(FixedFromFloatError::Nan),
 					std::num::FpCategory::Infinite=>Err(FixedFromFloatError::Infinite),
@ -254,27 +232,54 @@ macro_rules! impl_from_float {
 					std::num::FpCategory::Subnormal
 					|std::num::FpCategory::Normal
 					=>{
-						let (m,e,s)=$decode(value);
+						fn to_float_info<const F:usize>(f:$input)->Option<FloatInfo>{
+							const DIGIT_SHIFT:u32=6;
+							let bits=f.to_bits();
+							//extract exponent, add fractional offset
+							//usize is used to calculate digit_index. exp_cycle must be at least 8 bits so 32 bits is fine
+							let exp=((bits>>($mantissa_bits-1)) as usize&((1<<$exponent_bits)-1))+F;
+							//digit_index is where the hi digit should end up in a fixed point number
+							//if it's less than zero, that's a conversion underflow.
+							let digit_index=exp.checked_sub($exp_bias)?>>DIGIT_SHIFT;
+							//cycle the exponent to keep the top bit of the mantissa within the hi digit
+							let exp_cycle=exp.overflowing_sub($exp_bias+64).0.rem_euclid(64).overflowing_add($exp_bias+64).0;
+							let out_bits=
+								bits
+								//remove (mask) sign bit and exponent
+								&((1 as $unsigned<<($mantissa_bits-1))-1)
+								//write exponent
+								|((exp_cycle as $unsigned)<<($mantissa_bits-1));
+							//ready to convert
+							let _128=<$input>::from_bits(out_bits) as u128;
+							Some(FloatInfo{
+								sign:f.is_sign_negative(),
+								digit_index,
+								bits:[_128 as u64,(_128>>64) as u64],
+							})
+						}
+
+						let FloatInfo{
+							sign,
+							digit_index,
+							bits:[lo,hi],
+						}=to_float_info::<F>(value)
+							.ok_or(FixedFromFloatError::Underflow)?;
+
 						let mut digits=[0u64;N];
-						let most_significant_bit=e as i32+$mantissa_bits as i32+F as i32;
-						if most_significant_bit<0{
-							return Err(FixedFromFloatError::Underflow);
-						}
-						let digit_index=most_significant_bit>>DIGIT_SHIFT;
-						let digit=digits.get_mut(digit_index as usize).ok_or(FixedFromFloatError::Overflow)?;
-						let take_bits=most_significant_bit-(digit_index<<DIGIT_SHIFT);
-						let rest_of_mantissa=-($mantissa_bits as i32-(take_bits as i32));
-						*digit=signed_shift(m,rest_of_mantissa);
-						if rest_of_mantissa<0&&digit_index!=0{
-							//we don't care if some float bits are partially truncated
-							if let Some(digit)=digits.get_mut((digit_index-1) as usize){
-								let take_bits=most_significant_bit-((digit_index-1)<<DIGIT_SHIFT);
-								let rest_of_mantissa=-($mantissa_bits as i32-(take_bits as i32));
-								*digit=signed_shift(m,rest_of_mantissa);
-							}
+						let digit=digits.get_mut(digit_index)
+							.ok_or(FixedFromFloatError::Overflow)?;
+						*digit=hi;
+
+						if digit_index!=0{
+							//if digit_index exists, so does digit_index-1
+							digits[digit_index-1]=lo;
 						}
+
 						let bits=BInt::from_bits(bnum::BUint::from_digits(digits));
-						Ok(if s{
+						if bits.is_negative()&&!(sign&&bits==BInt::MIN){
+							return Err(FixedFromFloatError::Overflow);
+						}
+						Ok(if sign{
 							Self::from_bits(bits.overflowing_neg().0)
 						}else{
 							Self::from_bits(bits)
@ -285,8 +290,8 @@ macro_rules! impl_from_float {
 		}
 	}
 }
-impl_from_float!(integer_decode_f32,f32,24);
-impl_from_float!(integer_decode_f64,f64,53);
+impl_from_float!(f32,u32,8,24,127);
+impl_from_float!(f64,u64,11,53,1023);

 impl<const N:usize,const F:usize> core::fmt::Display for Fixed<N,F>{
 	#[inline]
--- a/fixed_wide/src/tests.rs
+++ b/fixed_wide/src/tests.rs
@ -47,6 +47,8 @@ fn from_f32(){
 	assert_eq!(b,Ok(a));
 	let a=I256F256::from(0);
 	let b:Result<I256F256,_>=0.try_into();
+	//test float mantissa spread across digit boundary
+	//16 is within the 24 bits of float precision
 	assert_eq!(b,Ok(a));
 	let a=I256F256::from(0b101011110101001010101010000000000000000000000000000i64)<<16;
 	let b:Result<I256F256,_>=(0b101011110101001010101010000000000000000000000000000u64 as f32*2.0f32.powi(16)).try_into();
@ -56,11 +58,11 @@ fn from_f32(){
 	let b:Result<I32F32,_>=Into::<f32>::into(I32F32::MAX).try_into();
 	assert_eq!(b,Ok(a));
 	//I32F32::MIN hits a special case since it's not representable as a positive signed integer
-	//TODO: don't return an overflow because this is technically possible
 	let a=I32F32::MIN;
 	let b:Result<I32F32,_>=Into::<f32>::into(I32F32::MIN).try_into();
+	assert_eq!(b,Ok(a));
+	let b:Result<I32F32,_>=Into::<f32>::into(I32F32::MIN.fix_2()+(I32F32::MIN>>1).fix_2()).try_into();
 	assert_eq!(b,Err(crate::fixed::FixedFromFloatError::Overflow));
-	//16 is within the 24 bits of float precision
 	let b:Result<I32F32,_>=Into::<f32>::into(-I32F32::MIN.fix_2()).try_into();
 	assert_eq!(b,Err(crate::fixed::FixedFromFloatError::Overflow));
 	let b:Result<I32F32,_>=f32::MIN_POSITIVE.try_into();