From 95651d7091427fe65709af0268a20c47754fb918 Mon Sep 17 00:00:00 2001
From: Quaternions <krakow20@gmail.com>
Date: Thu, 29 Aug 2024 14:41:08 -0700
Subject: [PATCH] use wide_mul for more precise sqrt

---
 fixed_wide/src/fixed.rs             | 55 ------------------------
 fixed_wide/src/fixed_wide_traits.rs | 66 +++++++++++++++++++++++++++++
 fixed_wide/src/tests.rs             |  4 +-
 3 files changed, 68 insertions(+), 57 deletions(-)
diff --git a/fixed_wide/src/fixed.rs b/fixed_wide/src/fixed.rs
index c39e31d..b6956b2 100644
--- a/fixed_wide/src/fixed.rs
+++ b/fixed_wide/src/fixed.rs
@@ -294,58 +294,3 @@ impl_shift_assign_operator!( Fixed, ShlAssign, shl_assign );
 impl_shift_operator!( Fixed, Shl, shl, Self );
 impl_shift_assign_operator!( Fixed, ShrAssign, shr_assign );
 impl_shift_operator!( Fixed, Shr, shr, Self );
-
-impl<const CHUNKS:usize,Frac:Unsigned> Fixed<CHUNKS,Frac>
-	where
-		Fixed::<CHUNKS,Frac>:std::ops::Mul<Fixed<CHUNKS,Frac>,Output=Fixed<CHUNKS,Frac>>,
-{
-	pub fn sqrt_unchecked(self)->Self{
-		//pow2 must be the minimum power of two which when squared is greater than self
-		//the algorithm:
-		//1. count "used" bits to the left of the decimal
-		//2. add one
-		//This is the power of two which is greater than self.
-		//3. divide by 2 via >>1
-		//4. add on fractional offset
-		//Voila
-		//0001.0000 Fixed<u8,4>
-		//sqrt
-		//0110.0000
-		//pow2 = 0100.0000
-		let mut pow2=Self{
-			bits:BInt::<CHUNKS>::ONE.shl((((CHUNKS as i32*64-Frac::I32-(self.bits.leading_zeros() as i32)+1)>>1)+Frac::I32) as u32),
-			frac:PhantomData,
-		};
-		let mut result=pow2>>1;
-
-		loop{
-			pow2>>=1;
-			if pow2==Self::ZERO{
-				break result;
-			}
-			//TODO: flip a single bit instead of adding a power of 2
-			let new_result=result+pow2;
-			//note that the implicit truncation in the multiply
-			//means that the algorithm can return a result which squares to a number greater than the input.
-			match self.cmp(&(new_result*new_result)){
-				core::cmp::Ordering::Less=>continue,
-				core::cmp::Ordering::Equal=>break new_result,
-				core::cmp::Ordering::Greater=>result=new_result,
-			}
-		}
-	}
-	pub fn sqrt(self)->Self{
-		if self<Self::ZERO{
-			panic!("Square root less than zero")
-		}else{
-			self.sqrt_unchecked()
-		}
-	}
-	pub fn sqrt_checked(self)->Option<Self>{
-		if self<Self::ZERO{
-			None
-		}else{
-			Some(self.sqrt_unchecked())
-		}
-	}
-}
diff --git a/fixed_wide/src/fixed_wide_traits.rs b/fixed_wide/src/fixed_wide_traits.rs
index 84f6634..f8b0d5c 100644
--- a/fixed_wide/src/fixed_wide_traits.rs
+++ b/fixed_wide/src/fixed_wide_traits.rs
@@ -42,3 +42,69 @@ impl_wide_mul_all!(
 	(1,7),(2,7),(3,7),(4,7),(5,7),(6,7),(7,7),(8,7),
 	(1,8),(2,8),(3,8),(4,8),(5,8),(6,8),(7,8),(8,8)
 );
+impl<const SRC:usize,Frac> Fixed<SRC,Frac>{
+	pub fn widen<const DST:usize>(self)->Fixed<DST,Frac>{
+		Fixed{
+			bits:self.bits.as_::<BInt<DST>>(),
+			frac:PhantomData,
+		}
+	}
+}
+
+impl<const CHUNKS:usize,Frac:Unsigned> Fixed<CHUNKS,Frac>
+	where
+		Fixed::<CHUNKS,Frac>:WideMul,
+		<Fixed::<CHUNKS,Frac> as WideMul>::Output:Ord,
+{
+	pub fn sqrt_unchecked(self)->Self{
+		//pow2 must be the minimum power of two which when squared is greater than self
+		//the algorithm:
+		//1. count "used" bits to the left of the decimal
+		//2. add one
+		//This is the power of two which is greater than self.
+		//3. divide by 2 via >>1
+		//4. add on fractional offset
+		//Voila
+		//0001.0000 Fixed<u8,4>
+		//sqrt
+		//0110.0000
+		//pow2 = 0100.0000
+		let mut pow2=Self{
+			bits:BInt::<CHUNKS>::ONE.shl((((CHUNKS as i32*64-Frac::I32-(self.bits.leading_zeros() as i32)+1)>>1)+Frac::I32) as u32),
+			frac:PhantomData,
+		};
+		let mut result=pow2>>1;
+
+		//cheat to make the types match
+		let wide_self=self.wide_mul(Fixed::<CHUNKS,Frac>::ONE);
+		loop{
+			pow2>>=1;
+			if pow2==Self::ZERO{
+				break result;
+			}
+			//TODO: flip a single bit instead of adding a power of 2
+			let new_result=result+pow2;
+			//note that the implicit truncation in the multiply
+			//means that the algorithm can return a result which squares to a number greater than the input.
+			match wide_self.cmp(&new_result.wide_mul(new_result)){
+				core::cmp::Ordering::Less=>continue,
+				core::cmp::Ordering::Equal=>break new_result,
+				core::cmp::Ordering::Greater=>result=new_result,
+			}
+		}
+	}
+	pub fn sqrt(self)->Self{
+		if self<Self::ZERO{
+			panic!("Square root less than zero")
+		}else{
+			self.sqrt_unchecked()
+		}
+	}
+	pub fn sqrt_checked(self)->Option<Self>{
+		if self<Self::ZERO{
+			None
+		}else{
+			Some(self.sqrt_unchecked())
+		}
+	}
+}
diff --git a/fixed_wide/src/tests.rs b/fixed_wide/src/tests.rs
index a46ef87..3234541 100644
--- a/fixed_wide/src/tests.rs
+++ b/fixed_wide/src/tests.rs
@@ -35,10 +35,10 @@ fn find_equiv_sqrt_via_f64(n:I32F32)->I32F32{
 	let r=I32F32::from_bits(bnum::BInt::<1>::from(i));
 	//mimic the behaviour of the algorithm,
 	//return the result if it truncates to the exact answer
-	if (r+I32F32::EPSILON)*(r+I32F32::EPSILON)==n{
+	if (r+I32F32::EPSILON).wide_mul(r+I32F32::EPSILON)==n.wide_mul(I32F32::ONE){
 		return r+I32F32::EPSILON;
 	}
-	if (r-I32F32::EPSILON)*(r-I32F32::EPSILON)==n{
+	if (r-I32F32::EPSILON).wide_mul(r-I32F32::EPSILON)==n.wide_mul(I32F32::ONE){
 		return r-I32F32::EPSILON;
 	}
 	return r;