use wide_mul for more precise sqrt

This commit is contained in:
Quaternions 2024-08-29 14:41:08 -07:00
parent 91b378aa43
commit 95651d7091
3 changed files with 68 additions and 57 deletions

View File

@ -294,58 +294,3 @@ impl_shift_assign_operator!( Fixed, ShlAssign, shl_assign );
impl_shift_operator!( Fixed, Shl, shl, Self ); impl_shift_operator!( Fixed, Shl, shl, Self );
impl_shift_assign_operator!( Fixed, ShrAssign, shr_assign ); impl_shift_assign_operator!( Fixed, ShrAssign, shr_assign );
impl_shift_operator!( Fixed, Shr, shr, Self ); impl_shift_operator!( Fixed, Shr, shr, Self );
impl<const CHUNKS:usize,Frac:Unsigned> Fixed<CHUNKS,Frac>
where
Fixed::<CHUNKS,Frac>:std::ops::Mul<Fixed<CHUNKS,Frac>,Output=Fixed<CHUNKS,Frac>>,
{
pub fn sqrt_unchecked(self)->Self{
//pow2 must be the minimum power of two which when squared is greater than self
//the algorithm:
//1. count "used" bits to the left of the decimal
//2. add one
//This is the power of two which is greater than self.
//3. divide by 2 via >>1
//4. add on fractional offset
//Voila
//0001.0000 Fixed<u8,4>
//sqrt
//0110.0000
//pow2 = 0100.0000
let mut pow2=Self{
bits:BInt::<CHUNKS>::ONE.shl((((CHUNKS as i32*64-Frac::I32-(self.bits.leading_zeros() as i32)+1)>>1)+Frac::I32) as u32),
frac:PhantomData,
};
let mut result=pow2>>1;
loop{
pow2>>=1;
if pow2==Self::ZERO{
break result;
}
//TODO: flip a single bit instead of adding a power of 2
let new_result=result+pow2;
//note that the implicit truncation in the multiply
//means that the algorithm can return a result which squares to a number greater than the input.
match self.cmp(&(new_result*new_result)){
core::cmp::Ordering::Less=>continue,
core::cmp::Ordering::Equal=>break new_result,
core::cmp::Ordering::Greater=>result=new_result,
}
}
}
pub fn sqrt(self)->Self{
if self<Self::ZERO{
panic!("Square root less than zero")
}else{
self.sqrt_unchecked()
}
}
pub fn sqrt_checked(self)->Option<Self>{
if self<Self::ZERO{
None
}else{
Some(self.sqrt_unchecked())
}
}
}

View File

@ -42,3 +42,69 @@ impl_wide_mul_all!(
(1,7),(2,7),(3,7),(4,7),(5,7),(6,7),(7,7),(8,7), (1,7),(2,7),(3,7),(4,7),(5,7),(6,7),(7,7),(8,7),
(1,8),(2,8),(3,8),(4,8),(5,8),(6,8),(7,8),(8,8) (1,8),(2,8),(3,8),(4,8),(5,8),(6,8),(7,8),(8,8)
); );
impl<const SRC:usize,Frac> Fixed<SRC,Frac>{
pub fn widen<const DST:usize>(self)->Fixed<DST,Frac>{
Fixed{
bits:self.bits.as_::<BInt<DST>>(),
frac:PhantomData,
}
}
}
impl<const CHUNKS:usize,Frac:Unsigned> Fixed<CHUNKS,Frac>
where
Fixed::<CHUNKS,Frac>:WideMul,
<Fixed::<CHUNKS,Frac> as WideMul>::Output:Ord,
{
pub fn sqrt_unchecked(self)->Self{
//pow2 must be the minimum power of two which when squared is greater than self
//the algorithm:
//1. count "used" bits to the left of the decimal
//2. add one
//This is the power of two which is greater than self.
//3. divide by 2 via >>1
//4. add on fractional offset
//Voila
//0001.0000 Fixed<u8,4>
//sqrt
//0110.0000
//pow2 = 0100.0000
let mut pow2=Self{
bits:BInt::<CHUNKS>::ONE.shl((((CHUNKS as i32*64-Frac::I32-(self.bits.leading_zeros() as i32)+1)>>1)+Frac::I32) as u32),
frac:PhantomData,
};
let mut result=pow2>>1;
//cheat to make the types match
let wide_self=self.wide_mul(Fixed::<CHUNKS,Frac>::ONE);
loop{
pow2>>=1;
if pow2==Self::ZERO{
break result;
}
//TODO: flip a single bit instead of adding a power of 2
let new_result=result+pow2;
//note that the implicit truncation in the multiply
//means that the algorithm can return a result which squares to a number greater than the input.
match wide_self.cmp(&new_result.wide_mul(new_result)){
core::cmp::Ordering::Less=>continue,
core::cmp::Ordering::Equal=>break new_result,
core::cmp::Ordering::Greater=>result=new_result,
}
}
}
pub fn sqrt(self)->Self{
if self<Self::ZERO{
panic!("Square root less than zero")
}else{
self.sqrt_unchecked()
}
}
pub fn sqrt_checked(self)->Option<Self>{
if self<Self::ZERO{
None
}else{
Some(self.sqrt_unchecked())
}
}
}

View File

@ -35,10 +35,10 @@ fn find_equiv_sqrt_via_f64(n:I32F32)->I32F32{
let r=I32F32::from_bits(bnum::BInt::<1>::from(i)); let r=I32F32::from_bits(bnum::BInt::<1>::from(i));
//mimic the behaviour of the algorithm, //mimic the behaviour of the algorithm,
//return the result if it truncates to the exact answer //return the result if it truncates to the exact answer
if (r+I32F32::EPSILON)*(r+I32F32::EPSILON)==n{ if (r+I32F32::EPSILON).wide_mul(r+I32F32::EPSILON)==n.wide_mul(I32F32::ONE){
return r+I32F32::EPSILON; return r+I32F32::EPSILON;
} }
if (r-I32F32::EPSILON)*(r-I32F32::EPSILON)==n{ if (r-I32F32::EPSILON).wide_mul(r-I32F32::EPSILON)==n.wide_mul(I32F32::ONE){
return r-I32F32::EPSILON; return r-I32F32::EPSILON;
} }
return r; return r;