blob: 9ccd9ff88071a4d5a6522bee294fbc7f84fad95b [file] [log] [blame]
/*
* Copyright (C) 2012 Michael Brown <mbrown@fensystems.co.uk>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*
* You can also choose to distribute this program under the terms of
* the Unmodified Binary Distribution Licence (as given in the file
* COPYING.UBDL), provided that you have satisfied its requirements.
*/
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include <stdio.h>
#include <ipxe/bigint.h>
/** @file
*
* Big integer support
*/
/** Minimum number of least significant bytes included in transcription */
#define BIGINT_NTOA_LSB_MIN 16
/**
* Transcribe big integer (for debugging)
*
* @v value0 Element 0 of big integer to be transcribed
* @v size Number of elements
* @ret string Big integer in string form (may be abbreviated)
*/
const char * bigint_ntoa_raw ( const bigint_element_t *value0,
unsigned int size ) {
const bigint_t ( size ) __attribute__ (( may_alias ))
*value = ( ( const void * ) value0 );
bigint_element_t element;
static char buf[256];
unsigned int count;
int threshold;
uint8_t byte;
char *tmp;
int i;
/* Calculate abbreviation threshold, if any */
count = ( size * sizeof ( element ) );
threshold = count;
if ( count >= ( ( sizeof ( buf ) - 1 /* NUL */ ) / 2 /* "xx" */ ) ) {
threshold -= ( ( sizeof ( buf ) - 3 /* "..." */
- ( BIGINT_NTOA_LSB_MIN * 2 /* "xx" */ )
- 1 /* NUL */ ) / 2 /* "xx" */ );
}
/* Transcribe bytes, abbreviating with "..." if necessary */
for ( tmp = buf, i = ( count - 1 ) ; i >= 0 ; i-- ) {
element = value->element[ i / sizeof ( element ) ];
byte = ( element >> ( 8 * ( i % sizeof ( element ) ) ) );
tmp += sprintf ( tmp, "%02x", byte );
if ( i == threshold ) {
tmp += sprintf ( tmp, "..." );
i = BIGINT_NTOA_LSB_MIN;
}
}
assert ( tmp < ( buf + sizeof ( buf ) ) );
return buf;
}
/**
* Conditionally swap big integers (in constant time)
*
* @v first0 Element 0 of big integer to be conditionally swapped
* @v second0 Element 0 of big integer to be conditionally swapped
* @v size Number of elements in big integers
* @v swap Swap first and second big integers
*/
void bigint_swap_raw ( bigint_element_t *first0, bigint_element_t *second0,
unsigned int size, int swap ) {
bigint_element_t mask;
bigint_element_t xor;
unsigned int i;
/* Construct mask */
mask = ( ( bigint_element_t ) ( ! swap ) - 1 );
/* Conditionally swap elements */
for ( i = 0 ; i < size ; i++ ) {
xor = ( mask & ( first0[i] ^ second0[i] ) );
first0[i] ^= xor;
second0[i] ^= xor;
}
}
/**
* Multiply big integers
*
* @v multiplicand0 Element 0 of big integer to be multiplied
* @v multiplicand_size Number of elements in multiplicand
* @v multiplier0 Element 0 of big integer to be multiplied
* @v multiplier_size Number of elements in multiplier
* @v result0 Element 0 of big integer to hold result
*/
void bigint_multiply_raw ( const bigint_element_t *multiplicand0,
unsigned int multiplicand_size,
const bigint_element_t *multiplier0,
unsigned int multiplier_size,
bigint_element_t *result0 ) {
unsigned int result_size = ( multiplicand_size + multiplier_size );
const bigint_t ( multiplicand_size ) __attribute__ (( may_alias ))
*multiplicand = ( ( const void * ) multiplicand0 );
const bigint_t ( multiplier_size ) __attribute__ (( may_alias ))
*multiplier = ( ( const void * ) multiplier0 );
bigint_t ( result_size ) __attribute__ (( may_alias ))
*result = ( ( void * ) result0 );
bigint_element_t multiplicand_element;
const bigint_element_t *multiplier_element;
bigint_element_t *result_element;
bigint_element_t carry_element;
unsigned int i;
unsigned int j;
/* Zero required portion of result
*
* All elements beyond the length of the multiplier will be
* written before they are read, and so do not need to be
* zeroed in advance.
*/
memset ( result, 0, sizeof ( *multiplier ) );
/* Multiply integers one element at a time, adding the low
* half of the double-element product directly into the
* result, and maintaining a running single-element carry.
*
* The running carry can never overflow beyond a single
* element. At each step, the calculation we perform is:
*
* carry:result[i+j] := ( ( multiplicand[i] * multiplier[j] )
* + result[i+j] + carry )
*
* The maximum value (for n-bit elements) is therefore:
*
* (2^n - 1)*(2^n - 1) + (2^n - 1) + (2^n - 1) = 2^(2n) - 1
*
* This is precisely the maximum value for a 2n-bit integer,
* and so the carry out remains within the range of an n-bit
* integer, i.e. a single element.
*/
for ( i = 0 ; i < multiplicand_size ; i++ ) {
multiplicand_element = multiplicand->element[i];
multiplier_element = &multiplier->element[0];
result_element = &result->element[i];
carry_element = 0;
for ( j = 0 ; j < multiplier_size ; j++ ) {
bigint_multiply_one ( multiplicand_element,
*(multiplier_element++),
result_element++,
&carry_element );
}
*result_element = carry_element;
}
}
/**
* Reduce big integer R^2 modulo N
*
* @v modulus0 Element 0 of big integer modulus
* @v result0 Element 0 of big integer to hold result
* @v size Number of elements in modulus and result
*
* Reduce the value R^2 modulo N, where R=2^n and n is the number of
* bits in the representation of the modulus N, including any leading
* zero bits.
*/
void bigint_reduce_raw ( const bigint_element_t *modulus0,
bigint_element_t *result0, unsigned int size ) {
const bigint_t ( size ) __attribute__ (( may_alias ))
*modulus = ( ( const void * ) modulus0 );
bigint_t ( size ) __attribute__ (( may_alias ))
*result = ( ( void * ) result0 );
const unsigned int width = ( 8 * sizeof ( bigint_element_t ) );
unsigned int shift;
int max;
int sign;
int msb;
int carry;
/* We have the constants:
*
* N = modulus
*
* n = number of bits in the modulus (including any leading zeros)
*
* R = 2^n
*
* Let r be the extension of the n-bit result register by a
* separate two's complement sign bit, such that -R <= r < R,
* and define:
*
* x = r * 2^k
*
* as the value being reduced modulo N, where k is a
* non-negative integer bit shift.
*
* We want to reduce the initial value R^2=2^(2n), which we
* may trivially represent using r=1 and k=2n.
*
* We then iterate over decrementing k, maintaining the loop
* invariant:
*
* -N <= r < N
*
* On each iteration we must first double r, to compensate for
* having decremented k:
*
* k' = k - 1
*
* r' = 2r
*
* x = r * 2^k = 2r * 2^(k-1) = r' * 2^k'
*
* Note that doubling the n-bit result register will create a
* value of n+1 bits: this extra bit needs to be handled
* separately during the calculation.
*
* We then subtract N (if r is currently non-negative) or add
* N (if r is currently negative) to restore the loop
* invariant:
*
* 0 <= r < N => r" = 2r - N => -N <= r" < N
* -N <= r < 0 => r" = 2r + N => -N <= r" < N
*
* Note that since N may use all n bits, the most significant
* bit of the n-bit result register is not a valid two's
* complement sign bit for r: the extra sign bit therefore
* also needs to be handled separately.
*
* Once we reach k=0, we have x=r and therefore:
*
* -N <= x < N
*
* After this last loop iteration (with k=0), we may need to
* add a single multiple of N to ensure that x is positive,
* i.e. lies within the range 0 <= x < N.
*
* Since neither the modulus nor the value R^2 are secret, we
* may elide approximately half of the total number of
* iterations by constructing the initial representation of
* R^2 as r=2^m and k=2n-m (for some m such that 2^m < N).
*/
/* Initialise x=R^2 */
memset ( result, 0, sizeof ( *result ) );
max = ( bigint_max_set_bit ( modulus ) - 2 );
if ( max < 0 ) {
/* Degenerate case of N=0 or N=1: return a zero result */
return;
}
bigint_set_bit ( result, max );
shift = ( ( 2 * size * width ) - max );
sign = 0;
/* Iterate as described above */
while ( shift-- ) {
/* Calculate 2r, storing extra bit separately */
msb = bigint_shl ( result );
/* Add or subtract N according to current sign */
if ( sign ) {
carry = bigint_add ( modulus, result );
} else {
carry = bigint_subtract ( modulus, result );
}
/* Calculate new sign of result
*
* We know the result lies in the range -N <= r < N
* and so the tuple (old sign, msb, carry) cannot ever
* take the values (0, 1, 0) or (1, 0, 0). We can
* therefore treat these as don't-care inputs, which
* allows us to simplify the boolean expression by
* ignoring the old sign completely.
*/
assert ( ( sign == msb ) || carry );
sign = ( msb ^ carry );
}
/* Add N to make result positive if necessary */
if ( sign )
bigint_add ( modulus, result );
/* Sanity check */
assert ( ! bigint_is_geq ( result, modulus ) );
}
/**
* Compute inverse of odd big integer modulo any power of two
*
* @v invertend0 Element 0 of odd big integer to be inverted
* @v inverse0 Element 0 of big integer to hold result
* @v size Number of elements in invertend and result
*/
void bigint_mod_invert_raw ( const bigint_element_t *invertend0,
bigint_element_t *inverse0, unsigned int size ) {
const bigint_t ( size ) __attribute__ (( may_alias ))
*invertend = ( ( const void * ) invertend0 );
bigint_t ( size ) __attribute__ (( may_alias ))
*inverse = ( ( void * ) inverse0 );
bigint_element_t accum;
bigint_element_t bit;
unsigned int i;
/* Sanity check */
assert ( bigint_bit_is_set ( invertend, 0 ) );
/* Initialise output */
memset ( inverse, 0xff, sizeof ( *inverse ) );
/* Compute inverse modulo 2^(width)
*
* This method is a lightly modified version of the pseudocode
* presented in "A New Algorithm for Inversion mod p^k (Koç,
* 2017)".
*
* Each inner loop iteration calculates one bit of the
* inverse. The residue value is the two's complement
* negation of the value "b" as used by Koç, to allow for
* division by two using a logical right shift (since we have
* no arithmetic right shift operation for big integers).
*
* The residue is stored in the as-yet uncalculated portion of
* the inverse. The size of the residue therefore decreases
* by one element for each outer loop iteration. Trivial
* inspection of the algorithm shows that any higher bits
* could not contribute to the eventual output value, and so
* we may safely reuse storage this way.
*
* Due to the suffix property of inverses mod 2^k, the result
* represents the least significant bits of the inverse modulo
* an arbitrarily large 2^k.
*/
for ( i = size ; i > 0 ; i-- ) {
const bigint_t ( i ) __attribute__ (( may_alias ))
*addend = ( ( const void * ) invertend );
bigint_t ( i ) __attribute__ (( may_alias ))
*residue = ( ( void * ) inverse );
/* Calculate one element's worth of inverse bits */
for ( accum = 0, bit = 1 ; bit ; bit <<= 1 ) {
if ( bigint_bit_is_set ( residue, 0 ) ) {
accum |= bit;
bigint_add ( addend, residue );
}
bigint_shr ( residue );
}
/* Store in the element no longer required to hold residue */
inverse->element[ i - 1 ] = accum;
}
/* Correct order of inverse elements */
for ( i = 0 ; i < ( size / 2 ) ; i++ ) {
accum = inverse->element[i];
inverse->element[i] = inverse->element[ size - 1 - i ];
inverse->element[ size - 1 - i ] = accum;
}
}
/**
* Perform relaxed Montgomery reduction (REDC) of a big integer
*
* @v modulus0 Element 0 of big integer odd modulus
* @v value0 Element 0 of big integer to be reduced
* @v result0 Element 0 of big integer to hold result
* @v size Number of elements in modulus and result
* @ret carry Carry out
*
* The value to be reduced will be made divisible by the size of the
* modulus while retaining its residue class (i.e. multiples of the
* modulus will be added until the low half of the value is zero).
*
* The result may be expressed as
*
* tR = x + mN
*
* where x is the input value, N is the modulus, R=2^n (where n is the
* number of bits in the representation of the modulus, including any
* leading zero bits), and m is the number of multiples of the modulus
* added to make the result tR divisible by R.
*
* The maximum addend is mN <= (R-1)*N (and such an m can be proven to
* exist since N is limited to being odd and therefore coprime to R).
*
* Since the result of this addition is one bit larger than the input
* value, a carry out bit is also returned. The caller may be able to
* prove that the carry out is always zero, in which case it may be
* safely ignored.
*
* The upper half of the output value (i.e. t) will also be copied to
* the result pointer. It is permissible for the result pointer to
* overlap the lower half of the input value.
*
* External knowledge of constraints on the modulus and the input
* value may be used to prove constraints on the result. The
* constraint on the modulus may be generally expressed as
*
* R > kN
*
* for some positive integer k. The value k=1 is allowed, and simply
* expresses that the modulus fits within the number of bits in its
* own representation.
*
* For classic Montgomery reduction, we have k=1, i.e. R > N and a
* separate constraint that the input value is in the range x < RN.
* This gives the result constraint
*
* tR < RN + (R-1)N
* < 2RN - N
* < 2RN
* t < 2N
*
* A single subtraction of the modulus may therefore be required to
* bring it into the range t < N.
*
* When the input value is known to be a product of two integers A and
* B, with A < aN and B < bN, we get the result constraint
*
* tR < abN^2 + (R-1)N
* < (ab/k)RN + RN - N
* < (1 + ab/k)RN
* t < (1 + ab/k)N
*
* If we have k=a=b=1, i.e. R > N with A < N and B < N, then the
* result is in the range t < 2N and may require a single subtraction
* of the modulus to bring it into the range t < N so that it may be
* used as an input on a subsequent iteration.
*
* If we have k=4 and a=b=2, i.e. R > 4N with A < 2N and B < 2N, then
* the result is in the range t < 2N and may immediately be used as an
* input on a subsequent iteration, without requiring a subtraction.
*
* Larger values of k may be used to allow for larger values of a and
* b, which can be useful to elide intermediate reductions in a
* calculation chain that involves additions and subtractions between
* multiplications (as used in elliptic curve point addition, for
* example). As a general rule: each intermediate addition or
* subtraction will require k to be doubled.
*
* When the input value is known to be a single integer A, with A < aN
* (as used when converting out of Montgomery form), we get the result
* constraint
*
* tR < aN + (R-1)N
* < RN + (a-1)N
*
* If we have a=1, i.e. A < N, then the constraint becomes
*
* tR < RN
* t < N
*
* and so the result is immediately in the range t < N with no
* subtraction of the modulus required.
*
* For any larger value of a, the result value t=N becomes possible.
* Additional external knowledge may potentially be used to prove that
* t=N cannot occur. For example: if the caller is performing modular
* exponentiation with a prime modulus (or, more generally, a modulus
* that is coprime to the base), then there is no way for a non-zero
* base value to end up producing an exact multiple of the modulus.
* If t=N cannot be disproved, then conversion out of Montgomery form
* may require an additional subtraction of the modulus.
*/
int bigint_montgomery_relaxed_raw ( const bigint_element_t *modulus0,
bigint_element_t *value0,
bigint_element_t *result0,
unsigned int size ) {
const bigint_t ( size ) __attribute__ (( may_alias ))
*modulus = ( ( const void * ) modulus0 );
union {
bigint_t ( size * 2 ) full;
struct {
bigint_t ( size ) low;
bigint_t ( size ) high;
} __attribute__ (( packed ));
} __attribute__ (( may_alias )) *value = ( ( void * ) value0 );
bigint_t ( size ) __attribute__ (( may_alias ))
*result = ( ( void * ) result0 );
static bigint_t ( 1 ) cached;
static bigint_t ( 1 ) negmodinv;
bigint_element_t multiple;
bigint_element_t carry;
unsigned int i;
unsigned int j;
int overflow;
/* Sanity checks */
assert ( bigint_bit_is_set ( modulus, 0 ) );
/* Calculate inverse (or use cached version) */
if ( cached.element[0] != modulus->element[0] ) {
bigint_mod_invert ( modulus, &negmodinv );
negmodinv.element[0] = -negmodinv.element[0];
cached.element[0] = modulus->element[0];
}
/* Perform multiprecision Montgomery reduction */
for ( i = 0 ; i < size ; i++ ) {
/* Determine scalar multiple for this round */
multiple = ( value->low.element[i] * negmodinv.element[0] );
/* Multiply value to make it divisible by 2^(width*i) */
carry = 0;
for ( j = 0 ; j < size ; j++ ) {
bigint_multiply_one ( multiple, modulus->element[j],
&value->full.element[ i + j ],
&carry );
}
/* Since value is now divisible by 2^(width*i), we
* know that the current low element must have been
* zeroed.
*/
assert ( value->low.element[i] == 0 );
/* Store the multiplication carry out in the result,
* avoiding the need to immediately propagate the
* carry through the remaining elements.
*/
result->element[i] = carry;
}
/* Add the accumulated carries */
overflow = bigint_add ( result, &value->high );
/* Copy to result buffer */
bigint_copy ( &value->high, result );
return overflow;
}
/**
* Perform classic Montgomery reduction (REDC) of a big integer
*
* @v modulus0 Element 0 of big integer odd modulus
* @v value0 Element 0 of big integer to be reduced
* @v result0 Element 0 of big integer to hold result
* @v size Number of elements in modulus and result
*/
void bigint_montgomery_raw ( const bigint_element_t *modulus0,
bigint_element_t *value0,
bigint_element_t *result0,
unsigned int size ) {
const bigint_t ( size ) __attribute__ (( may_alias ))
*modulus = ( ( const void * ) modulus0 );
union {
bigint_t ( size * 2 ) full;
struct {
bigint_t ( size ) low;
bigint_t ( size ) high;
} __attribute__ (( packed ));
} __attribute__ (( may_alias )) *value = ( ( void * ) value0 );
bigint_t ( size ) __attribute__ (( may_alias ))
*result = ( ( void * ) result0 );
int overflow;
int underflow;
/* Sanity check */
assert ( ! bigint_is_geq ( &value->high, modulus ) );
/* Perform relaxed Montgomery reduction */
overflow = bigint_montgomery_relaxed ( modulus, &value->full, result );
/* Conditionally subtract the modulus once */
underflow = bigint_subtract ( modulus, result );
bigint_swap ( result, &value->high, ( underflow & ~overflow ) );
/* Sanity check */
assert ( ! bigint_is_geq ( result, modulus ) );
}
/**
* Perform generalised exponentiation via a Montgomery ladder
*
* @v result0 Element 0 of result (initialised to identity element)
* @v multiple0 Element 0 of multiple (initialised to generator)
* @v size Number of elements in result and multiple
* @v exponent0 Element 0 of exponent
* @v exponent_size Number of elements in exponent
* @v op Montgomery ladder commutative operation
* @v ctx Operation context (if needed)
* @v tmp Temporary working space (if needed)
*
* The Montgomery ladder may be used to perform any operation that is
* isomorphic to exponentiation, i.e. to compute the result
*
* r = g^e = g * g * g * g * .... * g
*
* for an arbitrary commutative operation "*", generator "g" and
* exponent "e".
*
* The result "r" is computed in constant time (assuming that the
* underlying operation is constant time) in k steps, where k is the
* number of bits in the big integer representation of the exponent.
*
* The result "r" must be initialised to the operation's identity
* element, and the multiple must be initialised to the generator "g".
* On exit, the multiple will contain
*
* m = r * g = g^(e+1)
*
* Note that the terminology used here refers to exponentiation
* defined as repeated multiplication, but that the ladder may equally
* well be used to perform any isomorphic operation (such as
* multiplication defined as repeated addition).
*/
void bigint_ladder_raw ( bigint_element_t *result0,
bigint_element_t *multiple0, unsigned int size,
const bigint_element_t *exponent0,
unsigned int exponent_size, bigint_ladder_op_t *op,
const void *ctx, void *tmp ) {
bigint_t ( size ) __attribute__ (( may_alias ))
*result = ( ( void * ) result0 );
bigint_t ( size ) __attribute__ (( may_alias ))
*multiple = ( ( void * ) multiple0 );
const bigint_t ( exponent_size ) __attribute__ (( may_alias ))
*exponent = ( ( const void * ) exponent0 );
const unsigned int width = ( 8 * sizeof ( bigint_element_t ) );
unsigned int bit = ( exponent_size * width );
int toggle = 0;
int previous;
/* We have two physical storage locations: Rres (the "result"
* register) and Rmul (the "multiple" register).
*
* On entry we have:
*
* Rres = g^0 = 1 (the identity element)
* Rmul = g (the generator)
*
* For calculation purposes, define two virtual registers R[0]
* and R[1], mapped to the underlying physical storage
* locations via a boolean toggle state "t":
*
* R[t] -> Rres
* R[~t] -> Rmul
*
* Define the initial toggle state to be t=0, so that we have:
*
* R[0] = g^0 = 1 (the identity element)
* R[1] = g (the generator)
*
* The Montgomery ladder then iterates over each bit "b" of
* the exponent "e" (from MSB down to LSB), computing:
*
* R[1] := R[0] * R[1], R[0] := R[0] * R[0] (if b=0)
* R[0] := R[1] * R[0], R[1] := R[1] * R[1] (if b=1)
*
* i.e.
*
* R[~b] := R[b] * R[~b], R[b] := R[b] * R[b]
*
* To avoid data-dependent memory access patterns, we
* implement this as:
*
* Rmul := Rres * Rmul
* Rres := Rres * Rres
*
* and update the toggle state "t" (via constant-time
* conditional swaps) as needed to ensure that R[b] always
* maps to Rres and R[~b] always maps to Rmul.
*/
while ( bit-- ) {
/* Update toggle state t := b
*
* If the new toggle state is not equal to the old
* toggle state, then we must swap R[0] and R[1] (in
* constant time).
*/
previous = toggle;
toggle = bigint_bit_is_set ( exponent, bit );
bigint_swap ( result, multiple, ( toggle ^ previous ) );
/* Calculate Rmul = R[~b] := R[b] * R[~b] = Rres * Rmul */
op ( result->element, multiple->element, size, ctx, tmp );
/* Calculate Rres = R[b] := R[b] * R[b] = Rres * Rres */
op ( result->element, result->element, size, ctx, tmp );
}
/* Reset toggle state to zero */
bigint_swap ( result, multiple, toggle );
}
/**
* Perform modular multiplication as part of a Montgomery ladder
*
* @v operand Element 0 of first input operand (may overlap result)
* @v result Element 0 of second input operand and result
* @v size Number of elements in operands and result
* @v ctx Operation context (odd modulus, or NULL)
* @v tmp Temporary working space
*/
void bigint_mod_exp_ladder ( const bigint_element_t *multiplier0,
bigint_element_t *result0, unsigned int size,
const void *ctx, void *tmp ) {
const bigint_t ( size ) __attribute__ (( may_alias ))
*multiplier = ( ( const void * ) multiplier0 );
bigint_t ( size ) __attribute__ (( may_alias ))
*result = ( ( void * ) result0 );
const bigint_t ( size ) __attribute__ (( may_alias ))
*modulus = ( ( const void * ) ctx );
bigint_t ( size * 2 ) __attribute__ (( may_alias ))
*product = ( ( void * ) tmp );
/* Multiply and reduce */
bigint_multiply ( result, multiplier, product );
if ( modulus ) {
bigint_montgomery ( modulus, product, result );
} else {
bigint_shrink ( product, result );
}
}
/**
* Perform modular exponentiation of big integers
*
* @v base0 Element 0 of big integer base
* @v modulus0 Element 0 of big integer modulus
* @v exponent0 Element 0 of big integer exponent
* @v result0 Element 0 of big integer to hold result
* @v size Number of elements in base, modulus, and result
* @v exponent_size Number of elements in exponent
* @v tmp Temporary working space
*/
void bigint_mod_exp_raw ( const bigint_element_t *base0,
const bigint_element_t *modulus0,
const bigint_element_t *exponent0,
bigint_element_t *result0,
unsigned int size, unsigned int exponent_size,
void *tmp ) {
const bigint_t ( size ) __attribute__ (( may_alias )) *base =
( ( const void * ) base0 );
const bigint_t ( size ) __attribute__ (( may_alias )) *modulus =
( ( const void * ) modulus0 );
const bigint_t ( exponent_size ) __attribute__ (( may_alias ))
*exponent = ( ( const void * ) exponent0 );
bigint_t ( size ) __attribute__ (( may_alias )) *result =
( ( void * ) result0 );
const unsigned int width = ( 8 * sizeof ( bigint_element_t ) );
struct {
struct {
bigint_t ( size ) modulus;
bigint_t ( size ) stash;
};
union {
bigint_t ( 2 * size ) full;
bigint_t ( size ) low;
} product;
} *temp = tmp;
const uint8_t one[1] = { 1 };
bigint_element_t submask;
unsigned int subsize;
unsigned int scale;
unsigned int i;
/* Sanity check */
assert ( sizeof ( *temp ) == bigint_mod_exp_tmp_len ( modulus ) );
/* Handle degenerate case of zero modulus */
if ( ! bigint_max_set_bit ( modulus ) ) {
memset ( result, 0, sizeof ( *result ) );
return;
}
/* Factor modulus as (N * 2^scale) where N is odd */
bigint_copy ( modulus, &temp->modulus );
for ( scale = 0 ; ( ! bigint_bit_is_set ( &temp->modulus, 0 ) ) ;
scale++ ) {
bigint_shr ( &temp->modulus );
}
subsize = ( ( scale + width - 1 ) / width );
submask = ( ( 1UL << ( scale % width ) ) - 1 );
if ( ! submask )
submask = ~submask;
/* Calculate (R^2 mod N) */
bigint_reduce ( &temp->modulus, &temp->stash );
/* Initialise result = Montgomery(1, R^2 mod N) */
bigint_grow ( &temp->stash, &temp->product.full );
bigint_montgomery ( &temp->modulus, &temp->product.full, result );
/* Convert base into Montgomery form */
bigint_multiply ( base, &temp->stash, &temp->product.full );
bigint_montgomery ( &temp->modulus, &temp->product.full,
&temp->stash );
/* Calculate x1 = base^exponent modulo N */
bigint_ladder ( result, &temp->stash, exponent, bigint_mod_exp_ladder,
&temp->modulus, &temp->product );
/* Convert back out of Montgomery form */
bigint_grow ( result, &temp->product.full );
bigint_montgomery_relaxed ( &temp->modulus, &temp->product.full,
result );
/* Handle even moduli via Garner's algorithm */
if ( subsize ) {
const bigint_t ( subsize ) __attribute__ (( may_alias ))
*subbase = ( ( const void * ) base );
bigint_t ( subsize ) __attribute__ (( may_alias ))
*submodulus = ( ( void * ) &temp->modulus );
bigint_t ( subsize ) __attribute__ (( may_alias ))
*substash = ( ( void * ) &temp->stash );
bigint_t ( subsize ) __attribute__ (( may_alias ))
*subresult = ( ( void * ) result );
union {
bigint_t ( 2 * subsize ) full;
bigint_t ( subsize ) low;
} __attribute__ (( may_alias ))
*subproduct = ( ( void * ) &temp->product.full );
/* Calculate x2 = base^exponent modulo 2^k */
bigint_init ( substash, one, sizeof ( one ) );
bigint_copy ( subbase, submodulus );
bigint_ladder ( substash, submodulus, exponent,
bigint_mod_exp_ladder, NULL, subproduct );
/* Reconstruct N */
bigint_copy ( modulus, &temp->modulus );
for ( i = 0 ; i < scale ; i++ )
bigint_shr ( &temp->modulus );
/* Calculate N^-1 modulo 2^k */
bigint_mod_invert ( submodulus, &subproduct->low );
bigint_copy ( &subproduct->low, submodulus );
/* Calculate y = (x2 - x1) * N^-1 modulo 2^k */
bigint_subtract ( subresult, substash );
bigint_multiply ( substash, submodulus, &subproduct->full );
subproduct->low.element[ subsize - 1 ] &= submask;
bigint_grow ( &subproduct->low, &temp->stash );
/* Reconstruct N */
bigint_mod_invert ( submodulus, &subproduct->low );
bigint_copy ( &subproduct->low, submodulus );
/* Calculate x = x1 + N * y */
bigint_multiply ( &temp->modulus, &temp->stash,
&temp->product.full );
bigint_add ( &temp->product.low, result );
}
}