From b91f061797d746c40ebe7cf0b702079dfb1fa142 Mon Sep 17 00:00:00 2001 From: Quaternions Date: Wed, 25 Sep 2024 09:46:44 -0700 Subject: [PATCH] implement same-size wide mul more efficiently --- fixed_wide/src/fixed.rs | 56 +++++++++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/fixed_wide/src/fixed.rs b/fixed_wide/src/fixed.rs index 7d592d5..446d957 100644 --- a/fixed_wide/src/fixed.rs +++ b/fixed_wide/src/fixed.rs @@ -510,18 +510,54 @@ macro_rules! impl_wide_not_const_generic{ impl_wide_operators!($lhs,$rhs); }; } +macro_rules! impl_wide_same_size_not_const_generic{ + ( + (), + $width:expr + )=>{ + impl Fixed<$width,{$width*32}> + { + paste::item!{ + #[inline] + pub fn [](self,rhs:Fixed<$width,{$width*32}>)->Fixed<{$width*2},{$width*2*32}>{ + let (low,high)=self.bits.unsigned_abs().widening_mul(rhs.bits.unsigned_abs()); + let out:BInt::<{$width*2}>=unsafe{core::mem::transmute([low,high])}; + if self.is_negative()==rhs.is_negative(){ + Fixed::from_bits(out) + }else{ + // Wrapping is the cheapest negation operation + // And the inputs cannot reach the point where it matters + Fixed::from_bits(out.wrapping_neg()) + } + } + /// This operation cannot represent the fraction exactly, + /// but it shapes the output to have precision for the + /// largest and smallest possible fractions. + #[inline] + pub fn [](self,rhs:Fixed<$width,{$width*32}>)->Fixed<{$width*2},{$width*2*32}>{ + // (lhs/2^LHS_FRAC)/(rhs/2^RHS_FRAC) + let lhs=self.bits.as_::>().shl($width*64); + let rhs=rhs.bits.as_::>(); + Fixed::from_bits(lhs/rhs) + } + } + } + #[cfg(feature="wide-mul")] + impl_wide_operators!($width,$width); + }; +} //const generics sidestepped wahoo macro_repeated!( impl_wide_not_const_generic,(), - (1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1),(11,1),(12,1),(13,1),(14,1),(15,1), - (1,2),(2,2),(3,2),(4,2),(5,2),(6,2),(7,2),(8,2),(9,2),(10,2),(11,2),(12,2),(13,2),(14,2), - (1,3),(2,3),(3,3),(4,3),(5,3),(6,3),(7,3),(8,3),(9,3),(10,3),(11,3),(12,3),(13,3), - (1,4),(2,4),(3,4),(4,4),(5,4),(6,4),(7,4),(8,4),(9,4),(10,4),(11,4),(12,4), - (1,5),(2,5),(3,5),(4,5),(5,5),(6,5),(7,5),(8,5),(9,5),(10,5),(11,5), - (1,6),(2,6),(3,6),(4,6),(5,6),(6,6),(7,6),(8,6),(9,6),(10,6), - (1,7),(2,7),(3,7),(4,7),(5,7),(6,7),(7,7),(8,7),(9,7), - (1,8),(2,8),(3,8),(4,8),(5,8),(6,8),(7,8),(8,8),(9,8), + (2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1),(11,1),(12,1),(13,1),(14,1),(15,1), + (1,2), (3,2),(4,2),(5,2),(6,2),(7,2),(8,2),(9,2),(10,2),(11,2),(12,2),(13,2),(14,2), + (1,3),(2,3), (4,3),(5,3),(6,3),(7,3),(8,3),(9,3),(10,3),(11,3),(12,3),(13,3), + (1,4),(2,4),(3,4), (5,4),(6,4),(7,4),(8,4),(9,4),(10,4),(11,4),(12,4), + (1,5),(2,5),(3,5),(4,5), (6,5),(7,5),(8,5),(9,5),(10,5),(11,5), + (1,6),(2,6),(3,6),(4,6),(5,6), (7,6),(8,6),(9,6),(10,6), + (1,7),(2,7),(3,7),(4,7),(5,7),(6,7), (8,7),(9,7), + (1,8),(2,8),(3,8),(4,8),(5,8),(6,8),(7,8), (9,8), (1,9),(2,9),(3,9),(4,9),(5,9),(6,9),(7,9), (1,10),(2,10),(3,10),(4,10),(5,10),(6,10), (1,11),(2,11),(3,11),(4,11),(5,11), @@ -530,6 +566,10 @@ macro_repeated!( (1,14),(2,14), (1,15) ); +macro_repeated!( + impl_wide_same_size_not_const_generic,(), + 1,2,3,4,5,6,7,8 +); pub trait Fix{ fn fix(self)->Out;