[crypto] Add memory output constraints for big-integer inline assembly

The ARM versions of the big-integer inline assembly functions include
constraints to indicate that the output value is modified by the
assembly code.  These constraints are not present in the equivalent
code for the x86 versions.

As of GCC 11, this results in the compiler reporting that the output
values may be uninitialized.

Fix by including the relevant memory output constraints.

Reported-by: Christian Hesse <mail@eworm.de>
Signed-off-by: Michael Brown <mcb30@ipxe.org>
diff --git a/src/arch/x86/core/x86_bigint.c b/src/arch/x86/core/x86_bigint.c
index 6413b2f..9a25bda 100644
--- a/src/arch/x86/core/x86_bigint.c
+++ b/src/arch/x86/core/x86_bigint.c
@@ -75,17 +75,18 @@
 			 *
 			 *     a < 2^{n}, b < 2^{n} => ab < 2^{2n}
 			 */
-			__asm__ __volatile__ ( "mull %4\n\t"
-					       "addl %%eax, (%5,%2,4)\n\t"
-					       "adcl %%edx, 4(%5,%2,4)\n\t"
+			__asm__ __volatile__ ( "mull %5\n\t"
+					       "addl %%eax, (%6,%2,4)\n\t"
+					       "adcl %%edx, 4(%6,%2,4)\n\t"
 					       "\n1:\n\t"
-					       "adcl $0, 8(%5,%2,4)\n\t"
+					       "adcl $0, 8(%6,%2,4)\n\t"
 					       "inc %2\n\t"
 						       /* Does not affect CF */
 					       "jc 1b\n\t"
 					       : "=&a" ( discard_a ),
 						 "=&d" ( discard_d ),
-						 "=&r" ( index )
+						 "=&r" ( index ),
+						 "+m" ( *result )
 					       : "0" ( multiplicand_element ),
 						 "g" ( multiplier_element ),
 						 "r" ( result_elements ),
diff --git a/src/arch/x86/include/bits/bigint.h b/src/arch/x86/include/bits/bigint.h
index 4f1bc87..7443d6f 100644
--- a/src/arch/x86/include/bits/bigint.h
+++ b/src/arch/x86/include/bits/bigint.h
@@ -25,19 +25,22 @@
 static inline __attribute__ (( always_inline )) void
 bigint_init_raw ( uint32_t *value0, unsigned int size,
 		  const void *data, size_t len ) {
-	long pad_len = ( sizeof ( bigint_t ( size ) ) - len );
+	bigint_t ( size ) __attribute__ (( may_alias )) *value =
+		( ( void * ) value0 );
+	long pad_len = ( sizeof ( *value ) - len );
 	void *discard_D;
 	long discard_c;
 
 	/* Copy raw data in reverse order, padding with zeros */
 	__asm__ __volatile__ ( "\n1:\n\t"
-			       "movb -1(%2,%1), %%al\n\t"
+			       "movb -1(%3,%1), %%al\n\t"
 			       "stosb\n\t"
 			       "loop 1b\n\t"
 			       "xorl %%eax, %%eax\n\t"
-			       "mov %3, %1\n\t"
+			       "mov %4, %1\n\t"
 			       "rep stosb\n\t"
-			       : "=&D" ( discard_D ), "=&c" ( discard_c )
+			       : "=&D" ( discard_D ), "=&c" ( discard_c ),
+				 "+m" ( *value )
 			       : "r" ( data ), "g" ( pad_len ), "0" ( value0 ),
 				 "1" ( len )
 			       : "eax" );
@@ -53,6 +56,8 @@
 static inline __attribute__ (( always_inline )) void
 bigint_add_raw ( const uint32_t *addend0, uint32_t *value0,
 		 unsigned int size ) {
+	bigint_t ( size ) __attribute__ (( may_alias )) *value =
+		( ( void * ) value0 );
 	long index;
 	void *discard_S;
 	long discard_c;
@@ -60,11 +65,11 @@
 	__asm__ __volatile__ ( "xor %0, %0\n\t" /* Zero %0 and clear CF */
 			       "\n1:\n\t"
 			       "lodsl\n\t"
-			       "adcl %%eax, (%3,%0,4)\n\t"
+			       "adcl %%eax, (%4,%0,4)\n\t"
 			       "inc %0\n\t" /* Does not affect CF */
 			       "loop 1b\n\t"
 			       : "=&r" ( index ), "=&S" ( discard_S ),
-				 "=&c" ( discard_c )
+				 "=&c" ( discard_c ), "+m" ( *value )
 			       : "r" ( value0 ), "1" ( addend0 ), "2" ( size )
 			       : "eax" );
 }
@@ -79,6 +84,8 @@
 static inline __attribute__ (( always_inline )) void
 bigint_subtract_raw ( const uint32_t *subtrahend0, uint32_t *value0,
 		      unsigned int size ) {
+	bigint_t ( size ) __attribute__ (( may_alias )) *value =
+		( ( void * ) value0 );
 	long index;
 	void *discard_S;
 	long discard_c;
@@ -86,11 +93,11 @@
 	__asm__ __volatile__ ( "xor %0, %0\n\t" /* Zero %0 and clear CF */
 			       "\n1:\n\t"
 			       "lodsl\n\t"
-			       "sbbl %%eax, (%3,%0,4)\n\t"
+			       "sbbl %%eax, (%4,%0,4)\n\t"
 			       "inc %0\n\t" /* Does not affect CF */
 			       "loop 1b\n\t"
 			       : "=&r" ( index ), "=&S" ( discard_S ),
-				 "=&c" ( discard_c )
+				 "=&c" ( discard_c ), "+m" ( *value )
 			       : "r" ( value0 ), "1" ( subtrahend0 ),
 				 "2" ( size )
 			       : "eax" );
@@ -104,15 +111,18 @@
  */
 static inline __attribute__ (( always_inline )) void
 bigint_rol_raw ( uint32_t *value0, unsigned int size ) {
+	bigint_t ( size ) __attribute__ (( may_alias )) *value =
+		( ( void * ) value0 );
 	long index;
 	long discard_c;
 
 	__asm__ __volatile__ ( "xor %0, %0\n\t" /* Zero %0 and clear CF */
 			       "\n1:\n\t"
-			       "rcll $1, (%2,%0,4)\n\t"
+			       "rcll $1, (%3,%0,4)\n\t"
 			       "inc %0\n\t" /* Does not affect CF */
 			       "loop 1b\n\t"
-			       : "=&r" ( index ), "=&c" ( discard_c )
+			       : "=&r" ( index ), "=&c" ( discard_c ),
+				 "+m" ( *value )
 			       : "r" ( value0 ), "1" ( size ) );
 }
 
@@ -124,13 +134,15 @@
  */
 static inline __attribute__ (( always_inline )) void
 bigint_ror_raw ( uint32_t *value0, unsigned int size ) {
+	bigint_t ( size ) __attribute__ (( may_alias )) *value =
+		( ( void * ) value0 );
 	long discard_c;
 
 	__asm__ __volatile__ ( "clc\n\t"
 			       "\n1:\n\t"
-			       "rcrl $1, -4(%1,%0,4)\n\t"
+			       "rcrl $1, -4(%2,%0,4)\n\t"
 			       "loop 1b\n\t"
-			       : "=&c" ( discard_c )
+			       : "=&c" ( discard_c ), "+m" ( *value )
 			       : "r" ( value0 ), "0" ( size ) );
 }
 
@@ -239,6 +251,8 @@
 static inline __attribute__ (( always_inline )) void
 bigint_grow_raw ( const uint32_t *source0, unsigned int source_size,
 		  uint32_t *dest0, unsigned int dest_size ) {
+	bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
+		( ( void * ) dest0 );
 	long pad_size = ( dest_size - source_size );
 	void *discard_D;
 	void *discard_S;
@@ -246,10 +260,10 @@
 
 	__asm__ __volatile__ ( "rep movsl\n\t"
 			       "xorl %%eax, %%eax\n\t"
-			       "mov %3, %2\n\t"
+			       "mov %4, %2\n\t"
 			       "rep stosl\n\t"
 			       : "=&D" ( discard_D ), "=&S" ( discard_S ),
-				 "=&c" ( discard_c )
+				 "=&c" ( discard_c ), "+m" ( *dest )
 			       : "g" ( pad_size ), "0" ( dest0 ),
 				 "1" ( source0 ), "2" ( source_size )
 			       : "eax" );
@@ -266,13 +280,15 @@
 static inline __attribute__ (( always_inline )) void
 bigint_shrink_raw ( const uint32_t *source0, unsigned int source_size __unused,
 		    uint32_t *dest0, unsigned int dest_size ) {
+	bigint_t ( dest_size ) __attribute__ (( may_alias )) *dest =
+		( ( void * ) dest0 );
 	void *discard_D;
 	void *discard_S;
 	long discard_c;
 
 	__asm__ __volatile__ ( "rep movsl\n\t"
 			       : "=&D" ( discard_D ), "=&S" ( discard_S ),
-				 "=&c" ( discard_c )
+				 "=&c" ( discard_c ), "+m" ( *dest )
 			       : "0" ( dest0 ), "1" ( source0 ),
 				 "2" ( dest_size )
 			       : "eax" );
@@ -289,15 +305,19 @@
 static inline __attribute__ (( always_inline )) void
 bigint_done_raw ( const uint32_t *value0, unsigned int size __unused,
 		  void *out, size_t len ) {
+	struct {
+		uint8_t bytes[len];
+	} __attribute__ (( may_alias )) *out_bytes = out;
 	void *discard_D;
 	long discard_c;
 
 	/* Copy raw data in reverse order */
 	__asm__ __volatile__ ( "\n1:\n\t"
-			       "movb -1(%2,%1), %%al\n\t"
+			       "movb -1(%3,%1), %%al\n\t"
 			       "stosb\n\t"
 			       "loop 1b\n\t"
-			       : "=&D" ( discard_D ), "=&c" ( discard_c )
+			       : "=&D" ( discard_D ), "=&c" ( discard_c ),
+				 "+m" ( *out_bytes )
 			       : "r" ( value0 ), "0" ( out ), "1" ( len )
 			       : "eax" );
 }