Specialization to save additions in Fp2 for negative non-residue (ark…

…works-rs#207) Co-authored-by: Pratyush Mishra <[email protected]>
SRI-CSL · Feb 6, 2021 · a3a1ab2 · a3a1ab2
1 parent 87e25cb
commit a3a1ab2
Show file tree

Hide file tree

Showing 3 changed files with 137 additions and 20 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -84,6 +84,7 @@ The main features of this release are:
 - #190 (ark-ec) Add curve cycle trait and extended pairing cycle trait for all types of ec cycles.
 - #201 (ark-ec, ark-ff, ark-test-curves, ark-test-templates) Remove the dependency on `rand_xorshift`
 - #205 (ark-ec, ark-ff) Unroll loops and conditionally use intrinsics in `biginteger` arithmetic, and reduce copies in `ff` and `ec` arithmetic.
+- #207 (ark-ff) Improve performance of extension fields when the non-residue is negative. (Improves fq2, fq12, and g2 speed on bls12 and bn curves)
 
 ### Bug fixes
 - #36 (ark-ec) In Short-Weierstrass curves, include an infinity bit in `ToConstraintField`.

diff --git a/ff/src/fields/models/fp2.rs b/ff/src/fields/models/fp2.rs
@@ -12,10 +12,36 @@ pub trait Fp2Parameters: 'static + Send + Sync {
     /// Coefficients for the Frobenius automorphism.
     const FROBENIUS_COEFF_FP2_C1: &'static [Self::Fp];
 
+    /// Return `fe * Self::NONRESIDUE`.
     #[inline(always)]
     fn mul_fp_by_nonresidue(fe: &Self::Fp) -> Self::Fp {
         Self::NONRESIDUE * fe
     }
+
+    /// A specializable method for computing `x + mul_base_field_by_nonresidue(y)`
+    /// This allows for optimizations when the non-residue is
+    /// canonically negative in the field.
+    #[inline(always)]
+    fn add_and_mul_fp_by_nonresidue(x: &Self::Fp, y: &Self::Fp) -> Self::Fp {
+        *x + Self::mul_fp_by_nonresidue(y)
+    }
+
+    /// A specializable method for computing `x + y + mul_base_field_by_nonresidue(y)`
+    /// This allows for optimizations when the non-residue is not `-1`.
+    #[inline(always)]
+    fn add_and_mul_fp_by_nonresidue_plus_one(x: &Self::Fp, y: &Self::Fp) -> Self::Fp {
+        let mut tmp = *x;
+        tmp += y;
+        Self::add_and_mul_fp_by_nonresidue(&tmp, &y)
+    }
+
+    /// A specializable method for computing `x - mul_base_field_by_nonresidue(y)`
+    /// This allows for optimizations when the non-residue is
+    /// canonically negative in the field.
+    #[inline(always)]
+    fn sub_and_mul_fp_by_nonresidue(x: &Self::Fp, y: &Self::Fp) -> Self::Fp {
+        *x - Self::mul_fp_by_nonresidue(y)
+    }
 }
 
 pub struct Fp2ParamsWrapper<P: Fp2Parameters>(PhantomData<P>);
@@ -36,6 +62,30 @@ impl<P: Fp2Parameters> QuadExtParameters for Fp2ParamsWrapper<P> {
         P::mul_fp_by_nonresidue(fe)
     }
 
+    #[inline(always)]
+    fn add_and_mul_base_field_by_nonresidue(
+        x: &Self::BaseField,
+        y: &Self::BaseField,
+    ) -> Self::BaseField {
+        P::add_and_mul_fp_by_nonresidue(x, y)
+    }
+
+    #[inline(always)]
+    fn add_and_mul_base_field_by_nonresidue_plus_one(
+        x: &Self::BaseField,
+        y: &Self::BaseField,
+    ) -> Self::BaseField {
+        P::add_and_mul_fp_by_nonresidue_plus_one(x, y)
+    }
+
+    #[inline(always)]
+    fn sub_and_mul_base_field_by_nonresidue(
+        x: &Self::BaseField,
+        y: &Self::BaseField,
+    ) -> Self::BaseField {
+        P::sub_and_mul_fp_by_nonresidue(x, y)
+    }
+
     fn mul_base_field_by_frob_coeff(fe: &mut Self::BaseField, power: usize) {
         *fe *= &Self::FROBENIUS_COEFF_C1[power % Self::DEGREE_OVER_BASE_PRIME_FIELD];
     }

diff --git a/ff/src/fields/models/quadratic_extension.rs b/ff/src/fields/models/quadratic_extension.rs
@@ -52,6 +52,40 @@ pub trait QuadExtParameters: 'static + Send + Sync + Sized {
         Self::NONRESIDUE * fe
     }
 
+    /// A specializable method for computing x + mul_base_field_by_nonresidue(y)
+    /// This allows for optimizations when the non-residue is
+    /// canonically negative in the field.
+    #[inline(always)]
+    fn add_and_mul_base_field_by_nonresidue(
+        x: &Self::BaseField,
+        y: &Self::BaseField,
+    ) -> Self::BaseField {
+        *x + Self::mul_base_field_by_nonresidue(y)
+    }
+
+    /// A specializable method for computing x + mul_base_field_by_nonresidue(y) + y
+    /// This allows for optimizations when the non-residue is not -1.
+    #[inline(always)]
+    fn add_and_mul_base_field_by_nonresidue_plus_one(
+        x: &Self::BaseField,
+        y: &Self::BaseField,
+    ) -> Self::BaseField {
+        let mut tmp = *x;
+        tmp += y;
+        Self::add_and_mul_base_field_by_nonresidue(&tmp, &y)
+    }
+
+    /// A specializable method for computing x - mul_base_field_by_nonresidue(y)
+    /// This allows for optimizations when the non-residue is
+    /// canonically negative in the field.
+    #[inline(always)]
+    fn sub_and_mul_base_field_by_nonresidue(
+        x: &Self::BaseField,
+        y: &Self::BaseField,
+    ) -> Self::BaseField {
+        *x - Self::mul_base_field_by_nonresidue(y)
+    }
+
     /// A specializable method for multiplying an element of the base field by
     /// the appropriate Frobenius coefficient.
     fn mul_base_field_by_frob_coeff(fe: &mut Self::BaseField, power: usize);
@@ -129,9 +163,9 @@ impl<P: QuadExtParameters> QuadExtField<P> {
     /// This is alternatively expressed as `Norm(a) = a^(1 + p)`.
     pub fn norm(&self) -> P::BaseField {
         let t0 = self.c0.square();
+        // t1 = t0 - P::NON_RESIDUE * c1^2
         let mut t1 = self.c1.square();
-        t1 = -P::mul_base_field_by_nonresidue(&t1);
-        t1.add_assign(&t0);
+        t1 = P::sub_and_mul_base_field_by_nonresidue(&t0, &t1);
         t1
     }
 
@@ -216,34 +250,66 @@ impl<P: QuadExtParameters> Field for QuadExtField<P> {
     }
 
     fn square_in_place(&mut self) -> &mut Self {
-        // v0 = c0 - c1
-        let mut v0 = self.c0 - &self.c1;
-        // v3 = c0 - beta * c1
-        let v3 = self.c0 - &P::mul_base_field_by_nonresidue(&self.c1);
-        // v2 = c0 * c1
-        let v2 = self.c0 * &self.c1;
+        // (c0, c1)^2 = (c0 + x*c1)^2
+        //            = c0^2 + 2 c0 c1 x + c1^2 x^2
+        //            = c0^2 + beta * c1^2 + 2 c0 * c1 * x
+        //            = (c0^2 + beta * c1^2, 2 c0 * c1)
+        // Where beta is P::NONRESIDUE.
+        // When beta = -1, we can re-use intermediate additions to improve performance.
+        if P::NONRESIDUE == -P::BaseField::one() {
+            // When the non-residue is -1, we save 2 intermediate additions,
+            // and use one fewer intermediate variable
+
+            let c0_copy = self.c0;
+            // v0 = c0 - c1
+            let v0 = self.c0 - &self.c1;
+            // result.c1 = 2 c1
+            self.c1.double_in_place();
+            // result.c0 = (c0 - c1) + 2c1 = c0 + c1
+            self.c0 = v0 + &self.c1;
+            // result.c0 *= (c0 - c1)
+            // result.c0 = (c0 - c1) * (c0 + c1) = c0^2 - c1^2
+            self.c0 *= v0;
+            // result.c1 *= c0
+            // result.c1 = (2 * c1) * c0
+            self.c1 *= c0_copy;
 
-        // v0 = (v0 * v3) + v2
-        v0 *= &v3;
-        v0 += &v2;
-
-        self.c1 = v2.double();
-        self.c0 = v0 + &P::mul_base_field_by_nonresidue(&v2);
+            self
+        } else {
+            // v0 = c0 - c1
+            let mut v0 = self.c0 - &self.c1;
+            // v3 = c0 - beta * c1
+            let v3 = P::sub_and_mul_base_field_by_nonresidue(&self.c0, &self.c1);
+            // v2 = c0 * c1
+            let v2 = self.c0 * &self.c1;
+
+            // v0 = (v0 * v3)
+            // v0 = (c0 - c1) * (c0 - beta*c1)
+            // v0 = c0^2 - beta * c0 * c1 - c0 * c1 + beta * c1^2
+            v0 *= &v3;
+
+            // result.c1 = 2 * c0 * c1
+            self.c1 = v2.double();
+            // result.c0 = (v0) + ((beta + 1) * v2)
+            // result.c0 = (c0^2 - beta * c0 * c1 - c0 * c1 + beta * c1^2) + ((beta + 1) c0 * c1)
+            // result.c0 = (c0^2 - beta * c0 * c1 + beta * c1^2) + (beta * c0 * c1)
+            // result.c0 = c0^2 + beta * c1^2
+            self.c0 = P::add_and_mul_base_field_by_nonresidue_plus_one(&v0, &v2);
 
-        self
+            self
+        }
     }
 
     fn inverse(&self) -> Option<Self> {
         if self.is_zero() {
             None
         } else {
             // Guide to Pairing-based Cryptography, Algorithm 5.19.
-            // v0 = c0.square()
-            let mut v0 = self.c0.square();
             // v1 = c1.square()
             let v1 = self.c1.square();
-            // v0 = v0 - beta * v1
-            v0 -= &P::mul_base_field_by_nonresidue(&v1);
+            // v0 = c0.square() - beta * v1
+            let v0 = P::sub_and_mul_base_field_by_nonresidue(&self.c0.square(), &v1);
+
             v0.inverse().map(|v1| {
                 let c0 = self.c0 * &v1;
                 let c1 = -(self.c1 * &v1);
@@ -505,7 +571,7 @@ impl<'a, P: QuadExtParameters> MulAssign<&'a Self> for QuadExtField<P> {
         self.c1 *= &(other.c0 + &other.c1);
         self.c1 -= &v0;
         self.c1 -= &v1;
-        self.c0 = v0 + &P::mul_base_field_by_nonresidue(&v1);
+        self.c0 = P::add_and_mul_base_field_by_nonresidue(&v0, &v1);
     }
 }