use wide_mul for more precise sqrt

2024-08-29 14:41:08 -07:00 · 2024-08-29 14:41:08 -07:00 · 95651d7091
commit 95651d7091
parent 91b378aa43
3 changed files with 68 additions and 57 deletions
--- a/fixed_wide/src/fixed.rs
+++ b/fixed_wide/src/fixed.rs
@ -294,58 +294,3 @@ impl_shift_assign_operator!( Fixed, ShlAssign, shl_assign );
 impl_shift_operator!( Fixed, Shl, shl, Self );
 impl_shift_assign_operator!( Fixed, ShrAssign, shr_assign );
 impl_shift_operator!( Fixed, Shr, shr, Self );
 impl<const CHUNKS:usize,Frac:Unsigned> Fixed<CHUNKS,Frac>
 	where
 		Fixed::<CHUNKS,Frac>:std::ops::Mul<Fixed<CHUNKS,Frac>,Output=Fixed<CHUNKS,Frac>>,
 {
 	pub fn sqrt_unchecked(self)->Self{
 		//pow2 must be the minimum power of two which when squared is greater than self
 		//the algorithm:
 		//1. count "used" bits to the left of the decimal
 		//2. add one
 		//This is the power of two which is greater than self.
 		//3. divide by 2 via >>1
 		//4. add on fractional offset
 		//Voila
 		//0001.0000 Fixed<u8,4>
 		//sqrt
 		//0110.0000
 		//pow2 = 0100.0000
 		let mut pow2=Self{
 			bits:BInt::<CHUNKS>::ONE.shl((((CHUNKS as i32*64-Frac::I32-(self.bits.leading_zeros() as i32)+1)>>1)+Frac::I32) as u32),
 			frac:PhantomData,
 		};
 		let mut result=pow2>>1;
 		loop{
 			pow2>>=1;
 			if pow2==Self::ZERO{
 				break result;
 			}
 			//TODO: flip a single bit instead of adding a power of 2
 			let new_result=result+pow2;
 			//note that the implicit truncation in the multiply
 			//means that the algorithm can return a result which squares to a number greater than the input.
 			match self.cmp(&(new_result*new_result)){
 				core::cmp::Ordering::Less=>continue,
 				core::cmp::Ordering::Equal=>break new_result,
 				core::cmp::Ordering::Greater=>result=new_result,
 			}
 		}
 	}
 	pub fn sqrt(self)->Self{
 		if self<Self::ZERO{
 			panic!("Square root less than zero")
 		}else{
 			self.sqrt_unchecked()
 		}
 	}
 	pub fn sqrt_checked(self)->Option<Self>{
 		if self<Self::ZERO{
 			None
 		}else{
 			Some(self.sqrt_unchecked())
 		}
 	}
 }
--- a/fixed_wide/src/fixed_wide_traits.rs
+++ b/fixed_wide/src/fixed_wide_traits.rs
@ -42,3 +42,69 @@ impl_wide_mul_all!(
 	(1,7),(2,7),(3,7),(4,7),(5,7),(6,7),(7,7),(8,7),
 	(1,8),(2,8),(3,8),(4,8),(5,8),(6,8),(7,8),(8,8)
 );
 impl<const SRC:usize,Frac> Fixed<SRC,Frac>{
 	pub fn widen<const DST:usize>(self)->Fixed<DST,Frac>{
 		Fixed{
 			bits:self.bits.as_::<BInt<DST>>(),
 			frac:PhantomData,
 		}
 	}
 }
 impl<const CHUNKS:usize,Frac:Unsigned> Fixed<CHUNKS,Frac>
 	where
 		Fixed::<CHUNKS,Frac>:WideMul,
 		<Fixed::<CHUNKS,Frac> as WideMul>::Output:Ord,
 {
 	pub fn sqrt_unchecked(self)->Self{
 		//pow2 must be the minimum power of two which when squared is greater than self
 		//the algorithm:
 		//1. count "used" bits to the left of the decimal
 		//2. add one
 		//This is the power of two which is greater than self.
 		//3. divide by 2 via >>1
 		//4. add on fractional offset
 		//Voila
 		//0001.0000 Fixed<u8,4>
 		//sqrt
 		//0110.0000
 		//pow2 = 0100.0000
 		let mut pow2=Self{
 			bits:BInt::<CHUNKS>::ONE.shl((((CHUNKS as i32*64-Frac::I32-(self.bits.leading_zeros() as i32)+1)>>1)+Frac::I32) as u32),
 			frac:PhantomData,
 		};
 		let mut result=pow2>>1;
 		//cheat to make the types match
 		let wide_self=self.wide_mul(Fixed::<CHUNKS,Frac>::ONE);
 		loop{
 			pow2>>=1;
 			if pow2==Self::ZERO{
 				break result;
 			}
 			//TODO: flip a single bit instead of adding a power of 2
 			let new_result=result+pow2;
 			//note that the implicit truncation in the multiply
 			//means that the algorithm can return a result which squares to a number greater than the input.
 			match wide_self.cmp(&new_result.wide_mul(new_result)){
 				core::cmp::Ordering::Less=>continue,
 				core::cmp::Ordering::Equal=>break new_result,
 				core::cmp::Ordering::Greater=>result=new_result,
 			}
 		}
 	}
 	pub fn sqrt(self)->Self{
 		if self<Self::ZERO{
 			panic!("Square root less than zero")
 		}else{
 			self.sqrt_unchecked()
 		}
 	}
 	pub fn sqrt_checked(self)->Option<Self>{
 		if self<Self::ZERO{
 			None
 		}else{
 			Some(self.sqrt_unchecked())
 		}
 	}
 }
--- a/fixed_wide/src/tests.rs
+++ b/fixed_wide/src/tests.rs
@ -35,10 +35,10 @@ fn find_equiv_sqrt_via_f64(n:I32F32)->I32F32{
 	let r=I32F32::from_bits(bnum::BInt::<1>::from(i));
 	//mimic the behaviour of the algorithm,
 	//return the result if it truncates to the exact answer
-	if (r+I32F32::EPSILON)*(r+I32F32::EPSILON)==n{
+	if (r+I32F32::EPSILON).wide_mul(r+I32F32::EPSILON)==n.wide_mul(I32F32::ONE){
 		return r+I32F32::EPSILON;
 	}
-	if (r-I32F32::EPSILON)*(r-I32F32::EPSILON)==n{
+	if (r-I32F32::EPSILON).wide_mul(r-I32F32::EPSILON)==n.wide_mul(I32F32::ONE){
 		return r-I32F32::EPSILON;
 	}
 	return r;