File libgcrypt-CVE-2024-2236_07.patch of Package libgcrypt.39917

From 45f1846db9d8924b91f1da6972b516932431ba50 Mon Sep 17 00:00:00 2001
From: Jakub Jelen <jjelen@redhat.com>
Date: Wed, 10 Apr 2024 15:18:39 +0200
Subject: [PATCH 07/11] rsa: Constant time blinding removal

* cipher/rsa.c (secret_blinded): Use constant time mulm implementation
* configure.ac: New configure option --enable-marvin-workaround
* mpi/Makefile.am: Build constant time multiplication
* mpi/mpi-internal.h: Add constant time function prototypes
* mpi/mpi-mul-cs.c: New file with constant time multiplication and
  modulo.
* mpi/mpi-mul.c (_gcry_mpi_mul_sec): New function.
  (_gcry_mpi_mod_sec): New function.
  (_gcry_mpi_mod_sec): New function.
  (_gcry_mpi_reverse_sec): New function.
  (_gcry_mpi_mulm_sec): New function.
* src/gcrypt-int.h: Add new functions.

--

The current MPI code is not constant time, potentially leaking plaintext
when the attacker can observe enough decipher operations using RSA
PKCS#1.5. This is described as a Marvin Attack:

https://eprint.iacr.org/2023/1442

Note, that this code is tested to be constant time only with the -O3
optimization level.

Signed-off-by: Jakub Jelen <jjelen@redhat.com>
---
 cipher/rsa.c       |   4 +
 configure.ac       |  16 +++
 mpi/Makefile.am    |   4 +
 mpi/mpi-internal.h |   6 ++
 mpi/mpi-mul-cs.c   | 263 +++++++++++++++++++++++++++++++++++++++++++++
 mpi/mpi-mul.c      | 169 +++++++++++++++++++++++++++++
 src/gcrypt-int.h   |   6 ++
 7 files changed, 468 insertions(+)
 create mode 100644 mpi/mpi-mul-cs.c

Index: libgcrypt-1.6.1/cipher/rsa.c
===================================================================
--- libgcrypt-1.6.1.orig/cipher/rsa.c
+++ libgcrypt-1.6.1/cipher/rsa.c
@@ -1489,7 +1489,11 @@ rsa_decrypt (gcry_sexp_t *r_plain, gcry_
       /* Undo blinding.  Here we calculate: y = (x * r^-1) mod n,
          where x is the blinded decrypted data, ri is the modular
          multiplicative inverse of r and n is the RSA modulus.  */
+#ifdef WITH_MARVIN_WORKAROUND
+      mpi_mulm_sec (plain, plain, ri, sk.n);
+#else
       mpi_mulm (plain, plain, ri, sk.n);
+#endif /* WITH_MARVIN_WORKAROUND */
 
       _gcry_mpi_release (r); r = NULL;
       _gcry_mpi_release (ri); ri = NULL;
Index: libgcrypt-1.6.1/configure.ac
===================================================================
--- libgcrypt-1.6.1.orig/configure.ac
+++ libgcrypt-1.6.1/configure.ac
@@ -559,6 +559,22 @@ if test "$use_hmac_binary_check" = yes ;
 fi
 
 
+AC_MSG_CHECKING([whether a Marvin workaround is requested])
+AC_ARG_ENABLE(marvin-workaround,
+              AS_HELP_STRING([--enable-marvin-workaround],
+                             [Enable Marvin workaround for constant time PKCS1.5 depadding]),
+              [with_marvin_workaround="$enableval"],
+              [with_marvin_workaround=no])
+AC_MSG_RESULT($with_marvin_workaround)
+if test "$with_marvin_workaround" = no ; then
+    WITH_MARVIN_WORKAROUND=''
+else
+    AC_DEFINE(WITH_MARVIN_WORKAROUND,1,
+              [Define to provide constant time PKCS1.5 depadding])
+fi
+AM_CONDITIONAL(WITH_MARVIN_WORKAROUND, test "x$with_marvin_workaround" != xno)
+AC_SUBST(WITH_MARVIN_WORKAROUND)
+
 # Implementation of the --disable-padlock-support switch.
 AC_MSG_CHECKING([whether padlock support is requested])
 AC_ARG_ENABLE(padlock-support,
Index: libgcrypt-1.6.1/mpi/Makefile.am
===================================================================
--- libgcrypt-1.6.1.orig/mpi/Makefile.am
+++ libgcrypt-1.6.1/mpi/Makefile.am
@@ -175,3 +175,7 @@ libmpi_la_SOURCES = longlong.h	   \
 	      mpih-mul.c     \
 	      mpiutil.c      \
               ec.c ec-internal.h ec-ed25519.c
+
+if WITH_MARVIN_WORKAROUND
+libmpi_la_SOURCES += mpi-mul-cs.c
+endif
\ No newline at end of file
Index: libgcrypt-1.6.1/mpi/mpi-internal.h
===================================================================
--- libgcrypt-1.6.1.orig/mpi/mpi-internal.h
+++ libgcrypt-1.6.1/mpi/mpi-internal.h
@@ -233,6 +233,12 @@ void _gcry_mpih_mul_karatsuba_case( mpi_
 				 mpi_ptr_t vp, mpi_size_t vsize,
 				 struct karatsuba_ctx *ctx );
 
+#ifdef WITH_MARVIN_WORKAROUND
+/*-- mpih-mul-cs.c --*/
+void mul_cs(mpi_limb_t *ret, mpi_limb_t *a, mpi_limb_t *b, size_t n, mpi_limb_t *tmp);
+void mod_cs(mpi_limb_t *ret, mpi_limb_t *a, size_t anum, mpi_limb_t *mod, size_t modnum, mpi_limb_t *tmp);
+size_t mod_limb_numb(size_t anum, size_t modnum);
+#endif /* WITH_MARVIN_WORKAROUND */
 
 /*-- mpih-mul_1.c (or xxx/cpu/ *.S) --*/
 mpi_limb_t _gcry_mpih_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
Index: libgcrypt-1.6.1/mpi/mpi-mul-cs.c
===================================================================
--- /dev/null
+++ libgcrypt-1.6.1/mpi/mpi-mul-cs.c
@@ -0,0 +1,268 @@
+/* Based on https://github.com/tomato42/ctmpi/blob/master/mul.c which does
+ * not have any copyright information, assuming public domain */
+
+#include <config.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include <string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+/* For multiplication we're using schoolbook multiplication,
+ * so if we have two numbers, each with 6 "digits" (words)
+ * the multiplication is calculated as follows:
+ *                        A B C D E F
+ *                     x  I J K L M N
+ *                     --------------
+ *                                N*F
+ *                              N*E
+ *                            N*D
+ *                          N*C
+ *                        N*B
+ *                      N*A
+ *                              M*F
+ *                            M*E
+ *                          M*D
+ *                        M*C
+ *                      M*B
+ *                    M*A
+ *                            L*F
+ *                          L*E
+ *                        L*D
+ *                      L*C
+ *                    L*B
+ *                  L*A
+ *                          K*F
+ *                        K*E
+ *                      K*D
+ *                    K*C
+ *                  K*B
+ *                K*A
+ *                        J*F
+ *                      J*E
+ *                    J*D
+ *                  J*C
+ *                J*B
+ *              J*A
+ *                      I*F
+ *                    I*E
+ *                  I*D
+ *                I*C
+ *              I*B
+ *         +  I*A
+ *         ==========================
+ *                        N*B N*D N*F
+ *                    + N*A N*C N*E
+ *                    + M*B M*D M*F
+ *                  + M*A M*C M*E
+ *                  + L*B L*D L*F
+ *                + L*A L*C L*E
+ *                + K*B K*D K*F
+ *              + K*A K*C K*E
+ *              + J*B J*D J*F
+ *            + J*A J*C J*E
+ *            + I*B I*D I*F
+ *          + I*A I*C I*E
+ *
+ *                1+1 1+3 1+5
+ *              1+0 1+2 1+4
+ *              0+1 0+3 0+5
+ *            0+0 0+2 0+4
+ *
+ *            0 1 2 3 4 5 6
+ * which requires n^2 multiplications and 2n full length additions
+ * as we can keep every other result of limb multiplication in two separate
+ * limbs
+ */
+
+typedef mpi_limb_t limb_t;
+#if BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_LONG_LONG
+#define LIMB_BIT_SIZE 64
+#define LIMB_BYTE_SIZE 8
+#elif BYTES_PER_MPI_LIMB == SIZEOF_UNSIGNED_LONG
+#define LIMB_BIT_SIZE 32
+#define LIMB_BYTE_SIZE 4
+/* if we're on a 32 bit platform */
+#else
+#define LIMB_BIT_SIZE 16
+#define LIMB_BYTE_SIZE 2
+/*
+ * if the compiler doesn't have either a 128bit data type nor a "return
+ * high 64 bits of multiplication"
+ */
+#endif
+
+/* add two limbs with carry in, return carry out */
+static limb_t
+_add_limb (limb_t *ret, limb_t a, limb_t b, limb_t carry)
+{
+  limb_t carry1, carry2, t;
+  add_ssaaaa (carry1, t, 0, a, 0, carry);
+  add_ssaaaa (carry2, t, 0, b, 0, t);
+  *ret = t;
+  return carry1 + carry2;
+}
+
+/* add two numbers of the same size, return overflow
+ *
+ * add a to b, place result in ret; all arrays need to be n limbs long
+ * return overflow from addition (0 or 1)
+ */
+static limb_t
+add (limb_t *ret, limb_t *a, limb_t *b, size_t n)
+{
+  limb_t c = 0;
+  ssize_t i;
+  for (i = n - 1; i > -1; i--)
+    {
+      c = _add_limb (&ret[i], a[i], b[i], c);
+    }
+  return c;
+}
+
+/* multiply two numbers of the same size
+ *
+ * multiply a by b, place result in ret; a and b need to be n limbs long
+ * ret needs to be 2*n limbs long, tmp needs to be 2 * n 2 limbs
+ * long
+ */
+void
+mul_cs (limb_t *ret, limb_t *a, limb_t *b, size_t n, limb_t *tmp)
+{
+  limb_t *r_odd, *r_even;
+  r_odd = tmp;
+  r_even = &tmp[2 * n];
+  size_t i = 0, j = 0;
+  for (i = 0; i < 2 * n; i++)
+    {
+      ret[i] = 0;
+    }
+
+  for (i=0; i<n; i++)
+    {
+      size_t k = 0;
+      for (k=0; k<i+n+1; k++)
+        {
+          r_even[k] = 0;
+          r_odd[k] = 0;
+        }
+      for (j=0; j<n; j++)
+        {
+          /* place results from even and odd limbs in separate arrays so that
+           * we don't have to calculate overflow every time we get individual
+           * limb multiplication result */
+          if (j % 2 == 0)
+            {
+              umul_ppmm (r_even[i+j], r_even[i+j+1], a[i], b[j]);
+            }
+          else
+            {
+              umul_ppmm (r_odd[i+j], r_odd[i+j+1], a[i], b[j]);
+            }
+        }
+      /* skip the least significant limbs when adding multiples of
+       * more significant limbs (they're zero anyway) */
+      add (ret, ret, r_even, n+i+1);
+      add (ret, ret, r_odd, n+i+1);
+    }
+}
+
+/* modifies the value in place by performing a right shift by one bit */
+static void
+rshift1 (limb_t *val, size_t n)
+{
+  limb_t shift_in = 0, shift_out = 0;
+  size_t i =0;
+  for (i = 0; i < n; i++)
+    {
+      shift_out = val[i] & 1;
+      val[i] = shift_in << (LIMB_BIT_SIZE-1) | (val[i] >> 1);
+      shift_in = shift_out;
+    }
+}
+
+/* copy from either a or b to ret based on flag
+ * when flag == 0, then copies from b
+ * when flag == 1, then copies from a
+ */
+static void
+cselect (limb_t flag, limb_t *ret, limb_t *a, limb_t *b, size_t n)
+{
+  /* would be more efficient with non volatile mask, but then gcc
+   * generates code with jumps */
+  limb_t mask1 = ct_limb_gen_mask (flag);
+  limb_t mask2 = ct_limb_gen_inv_mask (flag);
+  size_t i = 0;
+  for (i = 0; i < n; i++)
+    {
+      ret[i] = (mask1 & a[i]) | (mask2 & b[i]);
+    }
+}
+
+static limb_t
+_sub_limb (limb_t *ret, limb_t a, limb_t b, limb_t borrow)
+{
+  limb_t borrow1, borrow2, t;
+  sub_ddmmss (borrow1, t, 0, a, 0, borrow);
+  sub_ddmmss (borrow2, t, 0, t, 0, b);
+  *ret = t;
+  return -(borrow1 + borrow2);
+}
+
+/* place the result of a - b into ret, return the borrow bit.
+ * All arrays need to be n limbs long
+ */
+static limb_t
+sub (limb_t *ret, limb_t *a, limb_t *b, size_t n)
+{
+  limb_t borrow = 0;
+  ssize_t i;
+  for (i=n-1; i>-1; i--)
+    {
+      borrow = _sub_limb (&ret[i], a[i], b[i], borrow);
+    }
+  return borrow;
+}
+
+/* return the number of limbs necessary to allocate for the mod() tmp operand */
+size_t
+mod_limb_numb (size_t anum, size_t modnum)
+{
+    return (anum + modnum) * 3;
+}
+
+/* calculate a % mod, place the result in ret
+ * size of a is defined by anum, size of ret and mod is modnum,
+ * size of tmp is returned by mod_limb_numb()
+ */
+void
+mod_cs (limb_t *ret, limb_t *a, size_t anum, limb_t *mod, size_t modnum, limb_t *tmp)
+{
+  limb_t *atmp, *modtmp, *rettmp;
+  limb_t res;
+
+  memset (tmp, 0, mod_limb_numb(anum, modnum) * LIMB_BYTE_SIZE);
+
+  atmp = tmp;
+  modtmp = &tmp[anum+modnum];
+  rettmp = &tmp[(anum+modnum)*2];
+  size_t i = modnum;
+  for (i=modnum; i<modnum+anum; i++)
+    {
+      atmp[i] = a[i-modnum];
+    }
+  for (i=0; i<modnum; i++)
+    {
+      modtmp[i] = mod[i];
+    }
+
+  for (i=0; i<anum*LIMB_BIT_SIZE; i++)
+    {
+      rshift1 (modtmp, anum+modnum);
+      res = sub (rettmp, atmp, modtmp, anum+modnum);
+      cselect (res, atmp, atmp, rettmp, anum+modnum);
+    }
+
+  memcpy (ret, &atmp[anum], sizeof(limb_t)*modnum);
+}
Index: libgcrypt-1.6.1/mpi/mpi-mul.c
===================================================================
--- libgcrypt-1.6.1.orig/mpi/mpi-mul.c
+++ libgcrypt-1.6.1/mpi/mpi-mul.c
@@ -203,6 +203,133 @@ _gcry_mpi_mul (gcry_mpi_t w, gcry_mpi_t
 	_gcry_mpi_free_limb_space (tmp_limb, tmp_limb_nlimbs);
 }
 
+#ifdef WITH_MARVIN_WORKAROUND
+static void
+_gcry_mpi_mul_sec (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v)
+{
+  mpi_size_t usize, vsize, wsize;
+  mpi_ptr_t up, vp, wp;
+  int usign, vsign, usecure, vsecure, sign_product;
+  int assign_wp = 0;
+  int clean_vp = 0;
+  mpi_ptr_t tmp_limb = NULL;
+  unsigned int tmp_limb_nlimbs = 0;
+
+  if (u->nlimbs < v->nlimbs)
+    { /* Swap U and V. */
+      usize = v->nlimbs;
+      usign = v->sign;
+      usecure = mpi_is_secure (v);
+      up    = v->d;
+      vsize = u->nlimbs;
+      vsign = u->sign;
+      vsecure = mpi_is_secure (u);
+      vp    = u->d;
+    }
+  else
+    {
+      usize = u->nlimbs;
+      usign = u->sign;
+      usecure = mpi_is_secure (u);
+      up    = u->d;
+      vsize = v->nlimbs;
+      vsign = v->sign;
+      vsecure = mpi_is_secure (v);
+      vp    = v->d;
+    }
+  sign_product = usign ^ vsign;
+  wp = w->d;
+
+  /* make sure u and v have the same length by extending the limbs to the larger one, now u */
+  if (usize != vsize)
+    {
+      mpi_limb_t *tmp_vp = mpi_alloc_limb_space (usize, vsecure);
+      clean_vp = 1;
+      MPN_ZERO (tmp_vp, (usize - vsize));
+      MPN_COPY (tmp_vp + (usize - vsize), vp, vsize);
+      vsize = usize;
+      vp = tmp_vp;
+    }
+
+  /* w == u */
+  /* Ensure W has space enough to store the result.  */
+  wsize = usize + vsize;
+  if (!mpi_is_secure (w) && (mpi_is_secure (u) || mpi_is_secure (v)))
+    {
+      /* w is not allocated in secure space but u or v is.  To make sure
+       * that no temporary results are stored in w, we temporary use
+       * a newly allocated limb space for w */
+      wp = mpi_alloc_limb_space( wsize, 1 );
+      assign_wp = 2; /* mark it as 2 so that we can later copy it back to
+                      * normal memory */
+    }
+  else if (w->alloced < wsize )
+    {
+      if (wp == up || wp == vp)
+        {
+          wp = mpi_alloc_limb_space (wsize, mpi_is_secure (w));
+          assign_wp = 1;
+        }
+      else
+        {
+          mpi_resize(w, wsize );
+          wp = w->d;
+        }
+    }
+  else
+    { /* Make U and V not overlap with W.        */
+      if (wp == up)
+        {
+          /* W and U are identical.  Allocate temporary space for U. */
+          tmp_limb_nlimbs = usize;
+          up = tmp_limb = mpi_alloc_limb_space (usize, usecure);
+          /* Is V identical too?  Keep it identical with U.  */
+          if (wp == vp)
+              vp = up;
+          /* Copy to the temporary space.  */
+          MPN_COPY (up, wp, usize);
+        }
+      else if (wp == vp)
+        {
+          /* W and V are identical.  Allocate temporary space for V. */
+          tmp_limb_nlimbs = vsize;
+          vp = tmp_limb = mpi_alloc_limb_space (vsize, vsecure);
+          /* Copy to the temporary space.  */
+          MPN_COPY (vp, wp, vsize);
+        }
+    }
+
+  if (!vsize)
+    wsize = 0;
+  else
+    {
+      mpi_limb_t *tmp = mpi_alloc_limb_space (wsize * 2, mpi_is_secure (w));
+      mul_cs (wp, up, vp, vsize, tmp);
+      _gcry_mpi_free_limb_space (tmp, wsize * 2);
+    }
+
+  if (clean_vp)
+    {
+      _gcry_mpi_free_limb_space (vp, vsize);
+    }
+  if (assign_wp)
+    {
+      if (assign_wp == 2)
+        {
+          /* copy the temp wp from secure memory back to normal memory */
+          mpi_ptr_t tmp_wp = mpi_alloc_limb_space (wsize, 0);
+          MPN_COPY (tmp_wp, wp, wsize);
+          _gcry_mpi_free_limb_space (wp, 0);
+          wp = tmp_wp;
+        }
+      _gcry_mpi_assign_limb_space (w, wp, wsize);
+    }
+  w->nlimbs = wsize;
+  w->sign = sign_product;
+  if (tmp_limb)
+      _gcry_mpi_free_limb_space (tmp_limb, tmp_limb_nlimbs);
+}
+#endif /* WITH_MARVIN_WORKAROUND */
 
 void
 _gcry_mpi_mulm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m)
@@ -210,3 +337,46 @@ _gcry_mpi_mulm (gcry_mpi_t w, gcry_mpi_t
   mpi_mul (w, u, v);
   _gcry_mpi_tdiv_r (w, w, m);
 }
+
+#ifdef WITH_MARVIN_WORKAROUND
+static void
+_gcry_mpi_mod_sec (gcry_mpi_t ret, gcry_mpi_t a, gcry_mpi_t mod)
+{
+  size_t asize = a->nlimbs;
+  size_t modsize = mod->nlimbs;
+  size_t tmp_size = mod_limb_numb (asize, modsize);
+  mpi_limb_t *tmp_limb = mpi_alloc_limb_space (tmp_size, mpi_is_secure(a));
+  mod_cs (ret->d, a->d, asize, mod->d, modsize, tmp_limb);
+  /* cut the length to the mod size */
+  ret->nlimbs = mod->nlimbs;
+  _gcry_mpi_free_limb_space (tmp_limb, tmp_size);
+}
+
+/* The constant time code uses different order of the limbs ... */
+static void
+_gcry_mpi_reverse_sec (gcry_mpi_t w)
+{
+  size_t i = 0;
+  for (i = 0; i < w->nlimbs/2; i++)
+    {
+      mpi_limb_t t = w->d[i];
+      w->d[i] = w->d[w->nlimbs - i - 1];
+      w->d[w->nlimbs - i - 1] = t;
+    }
+}
+
+void
+_gcry_mpi_mulm_sec (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m)
+{
+  /* w == u */
+  _gcry_mpi_reverse_sec (u);
+  _gcry_mpi_reverse_sec (v);
+  _gcry_mpi_reverse_sec (m);
+  _gcry_mpi_mul_sec (w, u, v);
+  _gcry_mpi_mod_sec (w, w, m);
+
+  /* get them back to the order the rest of the code expects */
+  _gcry_mpi_reverse_sec (w); /* -- this is the result */
+  _gcry_mpi_reverse_sec (m); /* -- this might be still used by the calling function */
+}
+#endif /* WITH_MARVIN_WORKAROUND */
Index: libgcrypt-1.6.1/src/gcrypt-int.h
===================================================================
--- libgcrypt-1.6.1.orig/src/gcrypt-int.h
+++ libgcrypt-1.6.1/src/gcrypt-int.h
@@ -395,6 +395,9 @@ void _gcry_mpi_subm (gcry_mpi_t w, gcry_
 void _gcry_mpi_mul (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v);
 void _gcry_mpi_mul_ui (gcry_mpi_t w, gcry_mpi_t u, unsigned long v );
 void _gcry_mpi_mulm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m);
+#ifdef WITH_MARVIN_WORKAROUND
+void _gcry_mpi_mulm_sec (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m);
+#endif /* WITH_MARVIN_WORKAROUND */
 void _gcry_mpi_mul_2exp (gcry_mpi_t w, gcry_mpi_t u, unsigned long cnt);
 void _gcry_mpi_div (gcry_mpi_t q, gcry_mpi_t r,
                    gcry_mpi_t dividend, gcry_mpi_t divisor, int round);
@@ -490,6 +493,9 @@ int _gcry_mpi_get_flag (gcry_mpi_t a, en
 #define mpi_mul_2exp(w,u,v)    _gcry_mpi_mul_2exp ((w),(u),(v))
 #define mpi_mul(w,u,v)         _gcry_mpi_mul ((w),(u),(v))
 #define mpi_mulm(w,u,v,m)      _gcry_mpi_mulm ((w),(u),(v),(m))
+#ifdef WITH_MARVIN_WORKAROUND
+#define mpi_mulm_sec(w,u,v,m)  _gcry_mpi_mulm_sec ((w),(u),(v),(m))
+#endif /* WITH_MARVIN_WORKAROUND */
 #define mpi_powm(w,b,e,m)      _gcry_mpi_powm ( (w), (b), (e), (m) )
 #define mpi_tdiv(q,r,a,m)      _gcry_mpi_div ( (q), (r), (a), (m), 0)
 #define mpi_fdiv(q,r,a,m)      _gcry_mpi_div ( (q), (r), (a), (m), -1)
Places

File libgcrypt-CVE-2024-2236_07.patch of Package libgcrypt.39917

Places