[crypto] Use constant-time big integer multiplication
Big integer multiplication currently performs immediate carry
propagation from each step of the long multiplication, relying on the
fact that the overall result has a known maximum value to minimise the
number of carries performed without ever needing to explicitly check
against the result buffer size.
This is not a constant-time algorithm, since the number of carries
performed will be a function of the input values. We could make it
constant-time by always continuing to propagate the carry until
reaching the end of the result buffer, but this would introduce a
large number of redundant zero carries.
Require callers of bigint_multiply() to provide a temporary carry
storage buffer, of the same size as the result buffer. This allows
the carry-out from the accumulation of each double-element product to
be accumulated in the temporary carry space, and then added in via a
single call to bigint_add() after the multiplication is complete.
Since the structure of big integer multiplication is identical across
all current CPU architectures, provide a single shared implementation
of bigint_multiply(). The architecture-specific operation then
becomes the multiplication of two big integer elements and the
accumulation of the double-element product.
Note that any intermediate carry arising from accumulating the lower
half of the double-element product may be added to the upper half of
the double-element product without risk of overflow, since the result
of multiplying two n-bit integers can never have all n bits set in its
upper half. This simplifies the carry calculations for architectures
such as RISC-V and LoongArch64 that do not have a carry flag.
Signed-off-by: Michael Brown <mcb30@ipxe.org>
diff --git a/src/arch/arm32/core/arm32_bigint.c b/src/arch/arm32/core/arm32_bigint.c
deleted file mode 100644
index 29fb40a..0000000
--- a/src/arch/arm32/core/arm32_bigint.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- * You can also choose to distribute this program under the terms of
- * the Unmodified Binary Distribution Licence (as given in the file
- * COPYING.UBDL), provided that you have satisfied its requirements.
- */
-
-FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
-
-#include <stdint.h>
-#include <string.h>
-#include <ipxe/bigint.h>
-
-/** @file
- *
- * Big integer support
- */
-
-/**
- * Multiply big integers
- *
- * @v multiplicand0 Element 0 of big integer to be multiplied
- * @v multiplicand_size Number of elements in multiplicand
- * @v multiplier0 Element 0 of big integer to be multiplied
- * @v multiplier_size Number of elements in multiplier
- * @v result0 Element 0 of big integer to hold result
- */
-void bigint_multiply_raw ( const uint32_t *multiplicand0,
- unsigned int multiplicand_size,
- const uint32_t *multiplier0,
- unsigned int multiplier_size,
- uint32_t *result0 ) {
- unsigned int result_size = ( multiplicand_size + multiplier_size );
- const bigint_t ( multiplicand_size ) __attribute__ (( may_alias ))
- *multiplicand = ( ( const void * ) multiplicand0 );
- const bigint_t ( multiplier_size ) __attribute__ (( may_alias ))
- *multiplier = ( ( const void * ) multiplier0 );
- bigint_t ( result_size ) __attribute__ (( may_alias ))
- *result = ( ( void * ) result0 );
- unsigned int i;
- unsigned int j;
- uint32_t multiplicand_element;
- uint32_t multiplier_element;
- uint32_t *result_elements;
- uint32_t discard_low;
- uint32_t discard_high;
- uint32_t discard_temp;
-
- /* Zero result */
- memset ( result, 0, sizeof ( *result ) );
-
- /* Multiply integers one element at a time */
- for ( i = 0 ; i < multiplicand_size ; i++ ) {
- multiplicand_element = multiplicand->element[i];
- for ( j = 0 ; j < multiplier_size ; j++ ) {
- multiplier_element = multiplier->element[j];
- result_elements = &result->element[ i + j ];
- /* Perform a single multiply, and add the
- * resulting double-element into the result,
- * carrying as necessary. The carry can
- * never overflow beyond the end of the
- * result, since:
- *
- * a < 2^{n}, b < 2^{m} => ab < 2^{n+m}
- */
- __asm__ __volatile__ ( "umull %1, %2, %5, %6\n\t"
- "ldr %3, [%0]\n\t"
- "adds %3, %1\n\t"
- "stmia %0!, {%3}\n\t"
- "ldr %3, [%0]\n\t"
- "adcs %3, %2\n\t"
- "stmia %0!, {%3}\n\t"
- "bcc 2f\n\t"
- "\n1:\n\t"
- "ldr %3, [%0]\n\t"
- "adcs %3, #0\n\t"
- "stmia %0!, {%3}\n\t"
- "bcs 1b\n\t"
- "\n2:\n\t"
- : "+l" ( result_elements ),
- "=l" ( discard_low ),
- "=l" ( discard_high ),
- "=l" ( discard_temp ),
- "+m" ( *result )
- : "l" ( multiplicand_element ),
- "l" ( multiplier_element )
- : "cc" );
- }
- }
-}
diff --git a/src/arch/arm32/include/bits/bigint.h b/src/arch/arm32/include/bits/bigint.h
index e4b511d..0a368a0 100644
--- a/src/arch/arm32/include/bits/bigint.h
+++ b/src/arch/arm32/include/bits/bigint.h
@@ -309,10 +309,34 @@
*(--out_byte) = *(value_byte++);
}
-extern void bigint_multiply_raw ( const uint32_t *multiplicand0,
- unsigned int multiplicand_size,
- const uint32_t *multiplier0,
- unsigned int multiplier_size,
- uint32_t *value0 );
+/**
+ * Multiply big integer elements
+ *
+ * @v multiplicand Multiplicand element
+ * @v multiplier Multiplier element
+ * @v result Result element pair
+ * @v carry Carry element
+ */
+static inline __attribute__ (( always_inline )) void
+bigint_multiply_one ( const uint32_t multiplicand, const uint32_t multiplier,
+ uint32_t *result, uint32_t *carry ) {
+ uint32_t discard_low;
+ uint32_t discard_high;
+
+ __asm__ __volatile__ ( /* Perform multiplication */
+ "umull %0, %1, %5, %6\n\t"
+ /* Accumulate result */
+ "adds %2, %0\n\t"
+ "adcs %3, %1\n\t"
+ /* Accumulate carry (cannot overflow) */
+ "adc %4, #0\n\t"
+ : "=r" ( discard_low ),
+ "=r" ( discard_high ),
+ "+r" ( result[0] ),
+ "+r" ( result[1] ),
+ "+r" ( *carry )
+ : "r" ( multiplicand ),
+ "r" ( multiplier ) );
+}
#endif /* _BITS_BIGINT_H */
diff --git a/src/arch/arm64/core/arm64_bigint.c b/src/arch/arm64/core/arm64_bigint.c
deleted file mode 100644
index 7740f1a..0000000
--- a/src/arch/arm64/core/arm64_bigint.c
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (C) 2016 Michael Brown <mbrown@fensystems.co.uk>.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- * You can also choose to distribute this program under the terms of
- * the Unmodified Binary Distribution Licence (as given in the file
- * COPYING.UBDL), provided that you have satisfied its requirements.
- */
-
-FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
-
-#include <stdint.h>
-#include <string.h>
-#include <ipxe/bigint.h>
-
-/** @file
- *
- * Big integer support
- */
-
-/**
- * Multiply big integers
- *
- * @v multiplicand0 Element 0 of big integer to be multiplied
- * @v multiplicand_size Number of elements in multiplicand
- * @v multiplier0 Element 0 of big integer to be multiplied
- * @v multiplier_size Number of elements in multiplier
- * @v result0 Element 0 of big integer to hold result
- */
-void bigint_multiply_raw ( const uint64_t *multiplicand0,
- unsigned int multiplicand_size,
- const uint64_t *multiplier0,
- unsigned int multiplier_size,
- uint64_t *result0 ) {
- unsigned int result_size = ( multiplicand_size + multiplier_size );
- const bigint_t ( multiplicand_size ) __attribute__ (( may_alias ))
- *multiplicand = ( ( const void * ) multiplicand0 );
- const bigint_t ( multiplier_size ) __attribute__ (( may_alias ))
- *multiplier = ( ( const void * ) multiplier0 );
- bigint_t ( result_size ) __attribute__ (( may_alias ))
- *result = ( ( void * ) result0 );
- unsigned int i;
- unsigned int j;
- uint64_t multiplicand_element;
- uint64_t multiplier_element;
- uint64_t *result_elements;
- uint64_t discard_low;
- uint64_t discard_high;
- uint64_t discard_temp_low;
- uint64_t discard_temp_high;
-
- /* Zero result */
- memset ( result, 0, sizeof ( *result ) );
-
- /* Multiply integers one element at a time */
- for ( i = 0 ; i < multiplicand_size ; i++ ) {
- multiplicand_element = multiplicand->element[i];
- for ( j = 0 ; j < multiplier_size ; j++ ) {
- multiplier_element = multiplier->element[j];
- result_elements = &result->element[ i + j ];
- /* Perform a single multiply, and add the
- * resulting double-element into the result,
- * carrying as necessary. The carry can
- * never overflow beyond the end of the
- * result, since:
- *
- * a < 2^{n}, b < 2^{m} => ab < 2^{n+m}
- */
- __asm__ __volatile__ ( "mul %1, %6, %7\n\t"
- "umulh %2, %6, %7\n\t"
- "ldp %3, %4, [%0]\n\t"
- "adds %3, %3, %1\n\t"
- "adcs %4, %4, %2\n\t"
- "stp %3, %4, [%0], #16\n\t"
- "bcc 2f\n\t"
- "\n1:\n\t"
- "ldr %3, [%0]\n\t"
- "adcs %3, %3, xzr\n\t"
- "str %3, [%0], #8\n\t"
- "bcs 1b\n\t"
- "\n2:\n\t"
- : "+r" ( result_elements ),
- "=&r" ( discard_low ),
- "=&r" ( discard_high ),
- "=r" ( discard_temp_low ),
- "=r" ( discard_temp_high ),
- "+m" ( *result )
- : "r" ( multiplicand_element ),
- "r" ( multiplier_element )
- : "cc" );
- }
- }
-}
diff --git a/src/arch/arm64/include/bits/bigint.h b/src/arch/arm64/include/bits/bigint.h
index 0d08bbd..ca9feaf 100644
--- a/src/arch/arm64/include/bits/bigint.h
+++ b/src/arch/arm64/include/bits/bigint.h
@@ -310,10 +310,35 @@
*(--out_byte) = *(value_byte++);
}
-extern void bigint_multiply_raw ( const uint64_t *multiplicand0,
- unsigned int multiplicand_size,
- const uint64_t *multiplier0,
- unsigned int multiplier_size,
- uint64_t *value0 );
+/**
+ * Multiply big integer elements
+ *
+ * @v multiplicand Multiplicand element
+ * @v multiplier Multiplier element
+ * @v result Result element pair
+ * @v carry Carry element
+ */
+static inline __attribute__ (( always_inline )) void
+bigint_multiply_one ( const uint64_t multiplicand, const uint64_t multiplier,
+ uint64_t *result, uint64_t *carry ) {
+ uint64_t discard_low;
+ uint64_t discard_high;
+
+ __asm__ __volatile__ ( /* Perform multiplication */
+ "mul %0, %5, %6\n\t"
+ "umulh %1, %5, %6\n\t"
+ /* Accumulate result */
+ "adds %2, %2, %0\n\t"
+ "adcs %3, %3, %1\n\t"
+ /* Accumulate carry (cannot overflow) */
+ "adc %4, %4, xzr\n\t"
+ : "=&r" ( discard_low ),
+ "=r" ( discard_high ),
+ "+r" ( result[0] ),
+ "+r" ( result[1] ),
+ "+r" ( *carry )
+ : "r" ( multiplicand ),
+ "r" ( multiplier ) );
+}
#endif /* _BITS_BIGINT_H */
diff --git a/src/arch/loong64/core/loong64_bigint.c b/src/arch/loong64/core/loong64_bigint.c
deleted file mode 100644
index b428e22..0000000
--- a/src/arch/loong64/core/loong64_bigint.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (C) 2012 Michael Brown <mbrown@fensystems.co.uk>.
- * Copyright (c) 2023, Xiaotian Wu <wuxiaotian@loongson.cn>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- * You can also choose to distribute this program under the terms of
- * the Unmodified Binary Distribution Licence (as given in the file
- * COPYING.UBDL), provided that you have satisfied its requirements.
- */
-
-FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
-
-#include <stdint.h>
-#include <string.h>
-#include <ipxe/bigint.h>
-
-/** @file
- *
- * Big integer support
- */
-
-/**
- * Multiply big integers
- *
- * @v multiplicand0 Element 0 of big integer to be multiplied
- * @v multiplicand_size Number of elements in multiplicand
- * @v multiplier0 Element 0 of big integer to be multiplied
- * @v multiplier_size Number of elements in multiplier
- * @v result0 Element 0 of big integer to hold result
- */
-void bigint_multiply_raw ( const uint64_t *multiplicand0,
- unsigned int multiplicand_size,
- const uint64_t *multiplier0,
- unsigned int multiplier_size,
- uint64_t *result0 ) {
- unsigned int result_size = ( multiplicand_size + multiplier_size );
- const bigint_t ( multiplicand_size ) __attribute__ (( may_alias ))
- *multiplicand = ( ( const void * ) multiplicand0 );
- const bigint_t ( multiplier_size ) __attribute__ (( may_alias ))
- *multiplier = ( ( const void * ) multiplier0 );
- bigint_t ( result_size ) __attribute__ (( may_alias ))
- *result = ( ( void * ) result0 );
- unsigned int i;
- unsigned int j;
- uint64_t multiplicand_element;
- uint64_t multiplier_element;
- uint64_t *result_elements;
- uint64_t discard_low;
- uint64_t discard_high;
- uint64_t discard_temp_low;
- uint64_t discard_temp_high;
-
- /* Zero result */
- memset ( result, 0, sizeof ( *result ) );
-
- /* Multiply integers one element at a time */
- for ( i = 0 ; i < multiplicand_size ; i++ ) {
- multiplicand_element = multiplicand->element[i];
- for ( j = 0 ; j < multiplier_size ; j++ ) {
- multiplier_element = multiplier->element[j];
- result_elements = &result->element[ i + j ];
- /* Perform a single multiply, and add the
- * resulting double-element into the result,
- * carrying as necessary. The carry can
- * never overflow beyond the end of the
- * result, since:
- *
- * a < 2^{n}, b < 2^{m} => ab < 2^{n+m}
- */
- __asm__ __volatile__ ( "mul.d %1, %6, %7\n\t"
- "mulh.du %2, %6, %7\n\t"
-
- "ld.d %3, %0, 0\n\t"
- "ld.d %4, %0, 8\n\t"
-
- "add.d %3, %3, %1\n\t"
- "sltu $t0, %3, %1\n\t"
-
- "add.d %4, %4, %2\n\t"
- "sltu $t1, %4, %2\n\t"
-
- "add.d %4, %4, $t0\n\t"
- "sltu $t0, %4, $t0\n\t"
- "or $t0, $t0, $t1\n\t"
-
- "st.d %3, %0, 0\n\t"
- "st.d %4, %0, 8\n\t"
-
- "addi.d %0, %0, 16\n\t"
- "beqz $t0, 2f\n"
- "1:\n\t"
- "ld.d %3, %0, 0\n\t"
- "add.d %3, %3, $t0\n\t"
- "sltu $t0, %3, $t0\n\t"
- "st.d %3, %0, 0\n\t"
- "addi.d %0, %0, 8\n\t"
- "bnez $t0, 1b\n"
- "2:"
- : "+r" ( result_elements ),
- "=&r" ( discard_low ),
- "=&r" ( discard_high ),
- "=r" ( discard_temp_low ),
- "=r" ( discard_temp_high ),
- "+m" ( *result )
- : "r" ( multiplicand_element ),
- "r" ( multiplier_element )
- : "t0", "t1" );
- }
- }
-}
diff --git a/src/arch/loong64/include/bits/bigint.h b/src/arch/loong64/include/bits/bigint.h
index bec748b..ec6ca4b 100644
--- a/src/arch/loong64/include/bits/bigint.h
+++ b/src/arch/loong64/include/bits/bigint.h
@@ -357,10 +357,42 @@
*(--out_byte) = *(value_byte++);
}
-extern void bigint_multiply_raw ( const uint64_t *multiplicand0,
- unsigned int multiplicand_size,
- const uint64_t *multiplier0,
- unsigned int multiplier_size,
- uint64_t *value0 );
+/**
+ * Multiply big integer elements
+ *
+ * @v multiplicand Multiplicand element
+ * @v multiplier Multiplier element
+ * @v result Result element pair
+ * @v carry Carry element
+ */
+static inline __attribute__ (( always_inline )) void
+bigint_multiply_one ( const uint64_t multiplicand, const uint64_t multiplier,
+ uint64_t *result, uint64_t *carry ) {
+ uint64_t discard_low;
+ uint64_t discard_high;
+ uint64_t discard_carry;
+
+ __asm__ __volatile__ ( /* Perform multiplication */
+ "mul.d %0, %6, %7\n\t"
+ "mulh.du %1, %6, %7\n\t"
+ /* Accumulate low half */
+ "add.d %3, %3, %0\n\t"
+ "sltu %2, %3, %0\n\t"
+ /* Add carry to high half (cannot overflow) */
+ "add.d %1, %1, %2\n\t"
+ /* Accumulate high half */
+ "add.d %4, %4, %1\n\t"
+ "sltu %2, %4, %1\n\t"
+ /* Accumulate carry (cannot overflow) */
+ "add.d %5, %5, %2\n\t"
+ : "=&r" ( discard_low ),
+ "=r" ( discard_high ),
+ "=r" ( discard_carry ),
+ "+r" ( result[0] ),
+ "+r" ( result[1] ),
+ "+r" ( *carry )
+ : "r" ( multiplicand ),
+ "r" ( multiplier ) );
+}
#endif /* _BITS_BIGINT_H */
diff --git a/src/arch/riscv/core/riscv_bigint.c b/src/arch/riscv/core/riscv_bigint.c
deleted file mode 100644
index cbc5631..0000000
--- a/src/arch/riscv/core/riscv_bigint.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (C) 2024 Michael Brown <mbrown@fensystems.co.uk>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- * You can also choose to distribute this program under the terms of
- * the Unmodified Binary Distribution Licence (as given in the file
- * COPYING.UBDL), provided that you have satisfied its requirements.
- */
-
-FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
-
-#include <stdint.h>
-#include <string.h>
-#include <ipxe/bigint.h>
-
-/** @file
- *
- * Big integer support
- */
-
-/**
- * Multiply big integers
- *
- * @v multiplicand0 Element 0 of big integer to be multiplied
- * @v multiplicand_size Number of elements in multiplicand
- * @v multiplier0 Element 0 of big integer to be multiplied
- * @v multiplier_size Number of elements in multiplier
- * @v result0 Element 0 of big integer to hold result
- */
-void bigint_multiply_raw ( const unsigned long *multiplicand0,
- unsigned int multiplicand_size,
- const unsigned long *multiplier0,
- unsigned int multiplier_size,
- unsigned long *result0 ) {
- unsigned int result_size = ( multiplicand_size + multiplier_size );
- const bigint_t ( multiplicand_size ) __attribute__ (( may_alias ))
- *multiplicand = ( ( const void * ) multiplicand0 );
- const bigint_t ( multiplier_size ) __attribute__ (( may_alias ))
- *multiplier = ( ( const void * ) multiplier0 );
- bigint_t ( result_size ) __attribute__ (( may_alias ))
- *result = ( ( void * ) result0 );
- unsigned int i;
- unsigned int j;
- unsigned long multiplicand_element;
- unsigned long multiplier_element;
- unsigned long *result_elements;
- unsigned long discard_low;
- unsigned long discard_high;
- unsigned long discard_temp;
- unsigned long discard_carry;
-
- /* Zero result */
- memset ( result, 0, sizeof ( *result ) );
-
- /* Multiply integers one element at a time */
- for ( i = 0 ; i < multiplicand_size ; i++ ) {
- multiplicand_element = multiplicand->element[i];
- for ( j = 0 ; j < multiplier_size ; j++ ) {
- multiplier_element = multiplier->element[j];
- result_elements = &result->element[ i + j ];
- /* Perform a single multiply, and add the
- * resulting double-element into the result,
- * carrying as necessary. The carry can
- * never overflow beyond the end of the
- * result, since:
- *
- * a < 2^{n}, b < 2^{m} => ab < 2^{n+m}
- */
- __asm__ __volatile__ ( /* Perform multiplication */
- "mulhu %2, %6, %7\n\t"
- "mul %1, %6, %7\n\t"
- /* Accumulate low half */
- LOADN " %3, (%0)\n\t"
- "add %3, %3, %1\n\t"
- "sltu %4, %3, %1\n\t"
- STOREN " %3, 0(%0)\n\t"
- /* Carry into high half */
- "add %4, %4, %2\n\t"
- /* Propagate as necessary */
- "\n1:\n\t"
- "addi %0, %0, %8\n\t"
- LOADN " %3, 0(%0)\n\t"
- "add %3, %3, %4\n\t"
- "sltu %4, %3, %4\n\t"
- STOREN " %3, 0(%0)\n\t"
- "bnez %4, 1b\n\t"
- : "+r" ( result_elements ),
- "=r" ( discard_low ),
- "=r" ( discard_high ),
- "=r" ( discard_temp ),
- "=r" ( discard_carry ),
- "+m" ( *result )
- : "r" ( multiplicand_element ),
- "r" ( multiplier_element ),
- "i" ( sizeof ( *result0 ) ) );
- }
- }
-}
diff --git a/src/arch/riscv/include/bits/bigint.h b/src/arch/riscv/include/bits/bigint.h
index fcc0d51..c582334 100644
--- a/src/arch/riscv/include/bits/bigint.h
+++ b/src/arch/riscv/include/bits/bigint.h
@@ -353,10 +353,43 @@
*(--out_byte) = *(value_byte++);
}
-extern void bigint_multiply_raw ( const unsigned long *multiplicand0,
- unsigned int multiplicand_size,
- const unsigned long *multiplier0,
- unsigned int multiplier_size,
- unsigned long *value0 );
+/**
+ * Multiply big integer elements
+ *
+ * @v multiplicand Multiplicand element
+ * @v multiplier Multiplier element
+ * @v result Result element pair
+ * @v carry Carry element
+ */
+static inline __attribute__ (( always_inline )) void
+bigint_multiply_one ( const unsigned long multiplicand,
+ const unsigned long multiplier,
+ unsigned long *result, unsigned long *carry ) {
+ unsigned long discard_low;
+ unsigned long discard_high;
+ unsigned long discard_carry;
+
+ __asm__ __volatile__ ( /* Perform multiplication */
+ "mulhu %1, %6, %7\n\t"
+ "mul %0, %6, %7\n\t"
+ /* Accumulate low half */
+ "add %3, %3, %0\n\t"
+ "sltu %2, %3, %0\n\t"
+ /* Add carry to high half (cannot overflow) */
+ "add %1, %1, %2\n\t"
+ /* Accumulate high half */
+ "add %4, %4, %1\n\t"
+ "sltu %2, %4, %1\n\t"
+ /* Accumulate carry (cannot overflow) */
+ "add %5, %5, %2\n\t"
+ : "=r" ( discard_low ),
+ "=&r" ( discard_high ),
+ "=r" ( discard_carry ),
+ "+r" ( result[0] ),
+ "+r" ( result[1] ),
+ "+r" ( *carry )
+ : "r" ( multiplicand ),
+ "r" ( multiplier ) );
+}
#endif /* _BITS_BIGINT_H */
diff --git a/src/arch/x86/core/x86_bigint.c b/src/arch/x86/core/x86_bigint.c
deleted file mode 100644
index 74e5da9..0000000
--- a/src/arch/x86/core/x86_bigint.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (C) 2012 Michael Brown <mbrown@fensystems.co.uk>.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- * You can also choose to distribute this program under the terms of
- * the Unmodified Binary Distribution Licence (as given in the file
- * COPYING.UBDL), provided that you have satisfied its requirements.
- */
-
-FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
-
-#include <stdint.h>
-#include <string.h>
-#include <ipxe/bigint.h>
-
-/** @file
- *
- * Big integer support
- */
-
-/**
- * Multiply big integers
- *
- * @v multiplicand0 Element 0 of big integer to be multiplied
- * @v multiplicand_size Number of elements in multiplicand
- * @v multiplier0 Element 0 of big integer to be multiplied
- * @v multiplier_size Number of elements in multiplier
- * @v result0 Element 0 of big integer to hold result
- */
-void bigint_multiply_raw ( const uint32_t *multiplicand0,
- unsigned int multiplicand_size,
- const uint32_t *multiplier0,
- unsigned int multiplier_size,
- uint32_t *result0 ) {
- unsigned int result_size = ( multiplicand_size + multiplier_size );
- const bigint_t ( multiplicand_size ) __attribute__ (( may_alias ))
- *multiplicand = ( ( const void * ) multiplicand0 );
- const bigint_t ( multiplier_size ) __attribute__ (( may_alias ))
- *multiplier = ( ( const void * ) multiplier0 );
- bigint_t ( result_size ) __attribute__ (( may_alias ))
- *result = ( ( void * ) result0 );
- unsigned int i;
- unsigned int j;
- uint32_t multiplicand_element;
- uint32_t multiplier_element;
- uint32_t *result_elements;
- uint32_t discard_a;
- uint32_t discard_d;
- long index;
-
- /* Zero result */
- memset ( result, 0, sizeof ( *result ) );
-
- /* Multiply integers one element at a time */
- for ( i = 0 ; i < multiplicand_size ; i++ ) {
- multiplicand_element = multiplicand->element[i];
- for ( j = 0 ; j < multiplier_size ; j++ ) {
- multiplier_element = multiplier->element[j];
- result_elements = &result->element[ i + j ];
- /* Perform a single multiply, and add the
- * resulting double-element into the result,
- * carrying as necessary. The carry can
- * never overflow beyond the end of the
- * result, since:
- *
- * a < 2^{n}, b < 2^{m} => ab < 2^{n+m}
- */
- __asm__ __volatile__ ( "mull %5\n\t"
- "addl %%eax, (%6,%2,4)\n\t"
- "adcl %%edx, 4(%6,%2,4)\n\t"
- "\n1:\n\t"
- "adcl $0, 8(%6,%2,4)\n\t"
- "inc %2\n\t"
- /* Does not affect CF */
- "jc 1b\n\t"
- : "=&a" ( discard_a ),
- "=&d" ( discard_d ),
- "=&r" ( index ),
- "+m" ( *result )
- : "0" ( multiplicand_element ),
- "g" ( multiplier_element ),
- "r" ( result_elements ),
- "2" ( 0 ) );
- }
- }
-}
diff --git a/src/arch/x86/include/bits/bigint.h b/src/arch/x86/include/bits/bigint.h
index a6bc2ca..a481e90 100644
--- a/src/arch/x86/include/bits/bigint.h
+++ b/src/arch/x86/include/bits/bigint.h
@@ -322,10 +322,34 @@
: "eax" );
}
-extern void bigint_multiply_raw ( const uint32_t *multiplicand0,
- unsigned int multiplicand_size,
- const uint32_t *multiplier0,
- unsigned int multiplier_size,
- uint32_t *value0 );
+/**
+ * Multiply big integer elements
+ *
+ * @v multiplicand Multiplicand element
+ * @v multiplier Multiplier element
+ * @v result Result element pair
+ * @v carry Carry element
+ */
+static inline __attribute__ (( always_inline )) void
+bigint_multiply_one ( const uint32_t multiplicand, const uint32_t multiplier,
+ uint32_t *result, uint32_t *carry ) {
+ uint32_t discard_a;
+ uint32_t discard_d;
+
+ __asm__ __volatile__ ( /* Perform multiplication */
+ "mull %6\n\t"
+ /* Accumulate result */
+ "addl %0, %2\n\t"
+ "adcl %1, %3\n\t"
+ /* Accumulate carry (cannot overflow) */
+ "adcl $0, %4\n\t"
+ : "=a" ( discard_a ),
+ "=d" ( discard_d ),
+ "+m" ( result[0] ),
+ "+m" ( result[1] ),
+ "+m" ( *carry )
+ : "0" ( multiplicand ),
+ "g" ( multiplier ) );
+}
#endif /* _BITS_BIGINT_H */
diff --git a/src/crypto/bigint.c b/src/crypto/bigint.c
index 656f979..5b7116e 100644
--- a/src/crypto/bigint.c
+++ b/src/crypto/bigint.c
@@ -76,6 +76,115 @@
}
/**
+ * Multiply big integers
+ *
+ * @v multiplicand0 Element 0 of big integer to be multiplied
+ * @v multiplicand_size Number of elements in multiplicand
+ * @v multiplier0 Element 0 of big integer to be multiplied
+ * @v multiplier_size Number of elements in multiplier
+ * @v result0 Element 0 of big integer to hold result
+ * @v carry0 Element 0 of big integer to hold temporary carry
+ */
+void bigint_multiply_raw ( const bigint_element_t *multiplicand0,
+ unsigned int multiplicand_size,
+ const bigint_element_t *multiplier0,
+ unsigned int multiplier_size,
+ bigint_element_t *result0,
+ bigint_element_t *carry0 ) {
+ unsigned int result_size = ( multiplicand_size + multiplier_size );
+ const bigint_t ( multiplicand_size ) __attribute__ (( may_alias ))
+ *multiplicand = ( ( const void * ) multiplicand0 );
+ const bigint_t ( multiplier_size ) __attribute__ (( may_alias ))
+ *multiplier = ( ( const void * ) multiplier0 );
+ bigint_t ( result_size ) __attribute__ (( may_alias ))
+ *result = ( ( void * ) result0 );
+ bigint_t ( result_size ) __attribute__ (( may_alias ))
+ *carry = ( ( void * ) carry0 );
+ bigint_element_t multiplicand_element;
+ const bigint_element_t *multiplier_element;
+ bigint_element_t *result_elements;
+ bigint_element_t *carry_element;
+ unsigned int i;
+ unsigned int j;
+
+ /* Zero result and temporary carry space */
+ memset ( result, 0, sizeof ( *result ) );
+ memset ( carry, 0, sizeof ( *carry ) );
+
+ /* Multiply integers one element at a time, adding the double
+ * element directly into the result and accumulating any
+ * overall carry out from this double-element addition into
+ * the temporary carry space.
+ *
+ * We could propagate the carry immediately instead of using a
+ * temporary carry space. However, this would cause the
+ * multiplication to run in non-constant time, which is
+ * undesirable.
+ *
+ * The carry elements can never overflow, provided that the
+ * element size is large enough to accommodate any plausible
+ * big integer. The total number of potential carries (across
+ * all elements) is the sum of the number of elements in the
+ * multiplicand and multiplier. With a 16-bit element size,
+ * this therefore allows for up to a 1Mbit multiplication
+ * result (e.g. a 512kbit integer multiplied by another
+ * 512kbit integer), which is around 100x higher than could be
+ * needed in practice. With a more realistic 32-bit element
+ * size, the limit becomes a totally implausible 128Gbit
+ * multiplication result.
+ */
+ for ( i = 0 ; i < multiplicand_size ; i++ ) {
+ multiplicand_element = multiplicand->element[i];
+ multiplier_element = &multiplier->element[0];
+ result_elements = &result->element[i];
+ carry_element = &carry->element[i];
+ for ( j = 0 ; j < multiplier_size ; j++ ) {
+ bigint_multiply_one ( multiplicand_element,
+ *(multiplier_element++),
+ result_elements++,
+ carry_element++ );
+ }
+ }
+
+ /* Add the temporary carry into the result. The least
+ * significant element of the carry represents the carry out
+ * from multiplying the least significant elements of the
+ * multiplicand and multiplier, and therefore must be added to
+ * the third-least significant element of the result (i.e. the
+ * carry needs to be shifted left by two elements before being
+ * adding to the result).
+ *
+ * The most significant two elements of the carry are
+ * guaranteed to be zero, since:
+ *
+ * a < 2^{n}, b < 2^{m} => ab < 2^{n+m}
+ *
+ * and the overall result of the multiplication (including
+ * adding in the shifted carries) is therefore guaranteed not
+ * to overflow beyond the end of the result.
+ *
+ * We could avoid this shifting by writing the carry directly
+ * into the "correct" element during the element-by-element
+ * multiplication stage above. However, this would add
+ * complexity to the loop since we would have to arrange for
+ * the (provably zero) most significant two carry out results
+ * to be discarded, in order to avoid writing beyond the end
+ * of the temporary carry space.
+ *
+ * Performing the logical shift is essentially free, since we
+ * simply adjust the element pointers.
+ *
+ * To avoid requiring additional checks in each architecture's
+ * implementation of bigint_add_raw(), we explicitly avoid
+ * calling bigint_add_raw() with a size of zero.
+ */
+ if ( result_size > 2 ) {
+ bigint_add_raw ( &carry->element[0], &result->element[2],
+ ( result_size - 2 ) );
+ }
+}
+
+/**
* Perform modular multiplication of big integers
*
* @v multiplicand0 Element 0 of big integer to be multiplied
@@ -100,7 +209,10 @@
( ( void * ) result0 );
struct {
bigint_t ( size * 2 ) result;
- bigint_t ( size * 2 ) modulus;
+ union {
+ bigint_t ( size * 2 ) modulus;
+ bigint_t ( size * 2 ) carry;
+ };
} *temp = tmp;
int rotation;
int i;
@@ -113,7 +225,8 @@
/* Perform multiplication */
profile_start ( &bigint_mod_multiply_multiply_profiler );
- bigint_multiply ( multiplicand, multiplier, &temp->result );
+ bigint_multiply ( multiplicand, multiplier, &temp->result,
+ &temp->carry );
profile_stop ( &bigint_mod_multiply_multiply_profiler );
/* Rescale modulus to match result */
diff --git a/src/crypto/x25519.c b/src/crypto/x25519.c
index d58f716..553f43d 100644
--- a/src/crypto/x25519.c
+++ b/src/crypto/x25519.c
@@ -43,7 +43,7 @@
* Storage size of each big integer 128 40
* (in bytes)
*
- * Stack usage for key exchange 1144 360
+ * Stack usage for key exchange 1144 424
* (in bytes, large objects only)
*
* Cost of big integer addition 16 5
@@ -207,35 +207,60 @@
* We overlap the buffers used by each step of the multiplication
* calculation to reduce the total stack space required:
*
- * |--------------------------------------------------------|
- * | <- pad -> | <------------ step 1 result -------------> |
- * | | <- low 256 bits -> | <-- high 260 bits --> |
- * | <------- step 2 result ------> | <-- step 3 result --> |
- * |--------------------------------------------------------|
+ * |--------------------------------------------------------------------------|
+ * | <------- step 1 carry ------> | <----------- step 1 result ------------> |
+ * | | <- low 256 bits -> | <- high 260 bits -> |
+ * | <- step 2 carry -> | <-- step 2 result --> | <pad> | |
+ * | <- s3 carry -> | <--------- pad ---------> | <- step 3 result -> | |
+ * |--------------------------------------------------------------------------|
*/
union x25519_multiply_workspace {
- /** Step 1 result */
+ /** Step 1 */
struct {
- /** Padding to avoid collision between steps 1 and 2
- *
- * The step 2 multiplication consumes the high 260
- * bits of step 1, and so the step 2 multiplication
- * result must not overlap this portion of the step 1
- * result.
- */
- uint8_t pad[ sizeof ( union x25519_multiply_step2 ) -
- offsetof ( union x25519_multiply_step1,
- parts.high_260bit ) ];
+ /** Step 1 temporary carry workspace */
+ union x25519_multiply_step1 carry;
/** Step 1 result */
- union x25519_multiply_step1 step1;
- } __attribute__ (( packed ));
- /** Steps 2 and 3 results */
+ union x25519_multiply_step1 result;
+ } __attribute__ (( packed )) step1;
+ /** Step 2
+ *
+ * The step 2 multiplication consumes the high 260 bits of
+ * step 1, and so the step 2 multiplication result (and
+ * temporary carry workspace) must not overlap this portion of
+ * the step 1 result.
+ */
struct {
+ /** Step 2 temporary carry workspace */
+ union x25519_multiply_step2 carry;
/** Step 2 result */
- union x25519_multiply_step2 step2;
+ union x25519_multiply_step2 result;
+ /** Avoid collision between step 1 result and step 2 result */
+ uint8_t pad[ ( int )
+ ( sizeof ( union x25519_multiply_step1 ) +
+ offsetof ( union x25519_multiply_step1,
+ parts.high_260bit ) -
+ sizeof ( union x25519_multiply_step2 ) -
+ sizeof ( union x25519_multiply_step2 ) ) ];
+ } __attribute__ (( packed )) step2;
+ /** Step 3
+ *
+ * The step 3 multiplication consumes the high 11 bits of step
+ * 2, and so the step 3 multiplication result (and temporary
+ * carry workspace) must not overlap this portion of the step
+ * 2 result.
+ */
+ struct {
+ /** Step 3 temporary carry workspace */
+ union x25519_multiply_step3 carry;
+ /** Avoid collision between step 2 result and step 3 carry */
+ uint8_t pad1[ ( int )
+ ( sizeof ( union x25519_multiply_step2 ) -
+ sizeof ( union x25519_multiply_step3 ) ) ];
+ /** Avoid collision between step 2 result and step 3 result */
+ uint8_t pad2[ sizeof ( union x25519_multiply_step2 ) ];
/** Step 3 result */
- union x25519_multiply_step3 step3;
- } __attribute__ (( packed ));
+ union x25519_multiply_step3 result;
+ } __attribute__ (( packed )) step3;
};
/** An X25519 elliptic curve point in projective coordinates
@@ -426,9 +451,9 @@
const union x25519_oct258 *multiplier,
union x25519_quad257 *result ) {
union x25519_multiply_workspace tmp;
- union x25519_multiply_step1 *step1 = &tmp.step1;
- union x25519_multiply_step2 *step2 = &tmp.step2;
- union x25519_multiply_step3 *step3 = &tmp.step3;
+ union x25519_multiply_step1 *step1 = &tmp.step1.result;
+ union x25519_multiply_step2 *step2 = &tmp.step2.result;
+ union x25519_multiply_step3 *step3 = &tmp.step3.result;
/* Step 1: perform raw multiplication
*
@@ -439,7 +464,7 @@
*/
static_assert ( sizeof ( step1->product ) >= sizeof ( step1->parts ) );
bigint_multiply ( &multiplicand->value, &multiplier->value,
- &step1->product );
+ &step1->product, &tmp.step1.carry.product );
/* Step 2: reduce high-order 516-256=260 bits of step 1 result
*
@@ -465,7 +490,7 @@
static_assert ( sizeof ( step2->product ) >= sizeof ( step2->parts ) );
bigint_grow ( &step1->parts.low_256bit, &result->value );
bigint_multiply ( &step1->parts.high_260bit, &x25519_reduce_256,
- &step2->product );
+ &step2->product, &tmp.step2.carry.product );
bigint_add ( &result->value, &step2->value );
/* Step 3: reduce high-order 267-256=11 bits of step 2 result
@@ -503,7 +528,7 @@
memset ( &step3->value, 0, sizeof ( step3->value ) );
bigint_grow ( &step2->parts.low_256bit, &result->value );
bigint_multiply ( &step2->parts.high_11bit, &x25519_reduce_256,
- &step3->product );
+ &step3->product, &tmp.step3.carry.product );
bigint_add ( &step3->value, &result->value );
/* Step 1 calculates the product of the input operands, and
diff --git a/src/include/ipxe/bigint.h b/src/include/ipxe/bigint.h
index 3dc344d..efe1565 100644
--- a/src/include/ipxe/bigint.h
+++ b/src/include/ipxe/bigint.h
@@ -208,13 +208,15 @@
* @v multiplicand Big integer to be multiplied
* @v multiplier Big integer to be multiplied
* @v result Big integer to hold result
+ * @v carry Big integer to hold temporary carry space
*/
-#define bigint_multiply( multiplicand, multiplier, result ) do { \
+#define bigint_multiply( multiplicand, multiplier, result, carry ) do { \
unsigned int multiplicand_size = bigint_size (multiplicand); \
unsigned int multiplier_size = bigint_size (multiplier); \
bigint_multiply_raw ( (multiplicand)->element, \
multiplicand_size, (multiplier)->element, \
- multiplier_size, (result)->element ); \
+ multiplier_size, (result)->element, \
+ (carry)->element ); \
} while ( 0 )
/**
@@ -245,7 +247,10 @@
unsigned int size = bigint_size (modulus); \
sizeof ( struct { \
bigint_t ( size * 2 ) temp_result; \
- bigint_t ( size * 2 ) temp_modulus; \
+ union { \
+ bigint_t ( size * 2 ) temp_modulus; \
+ bigint_t ( size * 2 ) temp_carry; \
+ }; \
} ); } )
/**
@@ -311,11 +316,16 @@
unsigned int dest_size );
void bigint_swap_raw ( bigint_element_t *first0, bigint_element_t *second0,
unsigned int size, int swap );
+void bigint_multiply_one ( const bigint_element_t multiplicand,
+ const bigint_element_t multiplier,
+ bigint_element_t *result,
+ bigint_element_t *carry );
void bigint_multiply_raw ( const bigint_element_t *multiplicand0,
unsigned int multiplicand_size,
const bigint_element_t *multiplier0,
unsigned int multiplier_size,
- bigint_element_t *result0 );
+ bigint_element_t *result0,
+ bigint_element_t *carry0 );
void bigint_mod_multiply_raw ( const bigint_element_t *multiplicand0,
const bigint_element_t *multiplier0,
const bigint_element_t *modulus0,
diff --git a/src/tests/bigint_test.c b/src/tests/bigint_test.c
index 76aca10..fcc77f2 100644
--- a/src/tests/bigint_test.c
+++ b/src/tests/bigint_test.c
@@ -173,7 +173,8 @@
unsigned int multiplicand_size,
const bigint_element_t *multiplier0,
unsigned int multiplier_size,
- bigint_element_t *result0 ) {
+ bigint_element_t *result0,
+ bigint_element_t *carry0 ) {
unsigned int result_size = ( multiplicand_size + multiplier_size );
const bigint_t ( multiplicand_size ) __attribute__ (( may_alias ))
*multiplicand = ( ( const void * ) multiplicand0 );
@@ -181,8 +182,10 @@
*multiplier = ( ( const void * ) multiplier0 );
bigint_t ( result_size ) __attribute__ (( may_alias ))
*result = ( ( void * ) result0 );
+ bigint_t ( result_size ) __attribute__ (( may_alias ))
+ *carry = ( ( void * ) carry0 );
- bigint_multiply ( multiplicand, multiplier, result );
+ bigint_multiply ( multiplicand, multiplier, result, carry );
}
void bigint_mod_multiply_sample ( const bigint_element_t *multiplicand0,
@@ -495,11 +498,14 @@
bigint_t ( multiplicand_size ) multiplicand_temp; \
bigint_t ( multiplier_size ) multiplier_temp; \
bigint_t ( multiplicand_size + multiplier_size ) result_temp; \
+ bigint_t ( multiplicand_size + multiplier_size ) carry_temp; \
{} /* Fix emacs alignment */ \
\
assert ( bigint_size ( &result_temp ) == \
( bigint_size ( &multiplicand_temp ) + \
bigint_size ( &multiplier_temp ) ) ); \
+ assert ( bigint_size ( &carry_temp ) == \
+ bigint_size ( &result_temp ) ); \
bigint_init ( &multiplicand_temp, multiplicand_raw, \
sizeof ( multiplicand_raw ) ); \
bigint_init ( &multiplier_temp, multiplier_raw, \
@@ -508,7 +514,7 @@
DBG_HDA ( 0, &multiplicand_temp, sizeof ( multiplicand_temp ) );\
DBG_HDA ( 0, &multiplier_temp, sizeof ( multiplier_temp ) ); \
bigint_multiply ( &multiplicand_temp, &multiplier_temp, \
- &result_temp ); \
+ &result_temp, &carry_temp ); \
DBG_HDA ( 0, &result_temp, sizeof ( result_temp ) ); \
bigint_done ( &result_temp, result_raw, sizeof ( result_raw ) );\
\