host/include/aarch64/host/crypto/aes-round.h - qemu - Git at Google

 /*
  * AArch64 specific aes acceleration.
  * SPDX-License-Identifier: GPL-2.0-or-later
  */

 #ifndef AARCH64_HOST_CRYPTO_AES_ROUND_H
 #define AARCH64_HOST_CRYPTO_AES_ROUND_H

 #include "host/cpuinfo.h"
 #include <arm_neon.h>

 #ifdef __ARM_FEATURE_AES
 # define HAVE_AES_ACCEL  true
 #else
 # define HAVE_AES_ACCEL  likely(cpuinfo & CPUINFO_AES)
 #endif
 #if !defined(__ARM_FEATURE_AES) && defined(CONFIG_ARM_AES_BUILTIN)
 # define ATTR_AES_ACCEL  __attribute__((target("+crypto")))
 #else
 # define ATTR_AES_ACCEL
 #endif

 static inline uint8x16_t aes_accel_bswap(uint8x16_t x)
 {
     return vqtbl1q_u8(x, (uint8x16_t){ 15, 14, 13, 12, 11, 10, 9, 8,
                                         7,  6,  5,  4,  3,  2, 1, 0, });
 }

 #ifdef CONFIG_ARM_AES_BUILTIN
 # define aes_accel_aesd            vaesdq_u8
 # define aes_accel_aese            vaeseq_u8
 # define aes_accel_aesmc           vaesmcq_u8
 # define aes_accel_aesimc          vaesimcq_u8
 # define aes_accel_aesd_imc(S, K)  vaesimcq_u8(vaesdq_u8(S, K))
 # define aes_accel_aese_mc(S, K)   vaesmcq_u8(vaeseq_u8(S, K))
 #else
 static inline uint8x16_t aes_accel_aesd(uint8x16_t d, uint8x16_t k)
 {
     asm(".arch_extension aes\n\t"
         "aesd %0.16b, %1.16b" : "+w"(d) : "w"(k));
     return d;
 }

 static inline uint8x16_t aes_accel_aese(uint8x16_t d, uint8x16_t k)
 {
     asm(".arch_extension aes\n\t"
         "aese %0.16b, %1.16b" : "+w"(d) : "w"(k));
     return d;
 }

 static inline uint8x16_t aes_accel_aesmc(uint8x16_t d)
 {
     asm(".arch_extension aes\n\t"
         "aesmc %0.16b, %1.16b" : "=w"(d) : "w"(d));
     return d;
 }

 static inline uint8x16_t aes_accel_aesimc(uint8x16_t d)
 {
     asm(".arch_extension aes\n\t"
         "aesimc %0.16b, %1.16b" : "=w"(d) : "w"(d));
     return d;
 }

 /* Most CPUs fuse AESD+AESIMC in the execution pipeline. */
 static inline uint8x16_t aes_accel_aesd_imc(uint8x16_t d, uint8x16_t k)
 {
     asm(".arch_extension aes\n\t"
         "aesd %0.16b, %1.16b\n\t"
         "aesimc %0.16b, %0.16b" : "+w"(d) : "w"(k));
     return d;
 }

 /* Most CPUs fuse AESE+AESMC in the execution pipeline. */
 static inline uint8x16_t aes_accel_aese_mc(uint8x16_t d, uint8x16_t k)
 {
     asm(".arch_extension aes\n\t"
         "aese %0.16b, %1.16b\n\t"
         "aesmc %0.16b, %0.16b" : "+w"(d) : "w"(k));
     return d;
 }
 #endif /* CONFIG_ARM_AES_BUILTIN */

 static inline void ATTR_AES_ACCEL
 aesenc_MC_accel(AESState *ret, const AESState *st, bool be)
 {
     uint8x16_t t = (uint8x16_t)st->v;

     if (be) {
         t = aes_accel_bswap(t);
         t = aes_accel_aesmc(t);
         t = aes_accel_bswap(t);
     } else {
         t = aes_accel_aesmc(t);
     }
     ret->v = (AESStateVec)t;
 }

 static inline void ATTR_AES_ACCEL
 aesenc_SB_SR_AK_accel(AESState *ret, const AESState *st,
                       const AESState *rk, bool be)
 {
     uint8x16_t t = (uint8x16_t)st->v;
     uint8x16_t z = { };

     if (be) {
         t = aes_accel_bswap(t);
         t = aes_accel_aese(t, z);
         t = aes_accel_bswap(t);
     } else {
         t = aes_accel_aese(t, z);
     }
     ret->v = (AESStateVec)t ^ rk->v;
 }

 static inline void ATTR_AES_ACCEL
 aesenc_SB_SR_MC_AK_accel(AESState *ret, const AESState *st,
                          const AESState *rk, bool be)
 {
     uint8x16_t t = (uint8x16_t)st->v;
     uint8x16_t z = { };

     if (be) {
         t = aes_accel_bswap(t);
         t = aes_accel_aese_mc(t, z);
         t = aes_accel_bswap(t);
     } else {
         t = aes_accel_aese_mc(t, z);
     }
     ret->v = (AESStateVec)t ^ rk->v;
 }

 static inline void ATTR_AES_ACCEL
 aesdec_IMC_accel(AESState *ret, const AESState *st, bool be)
 {
     uint8x16_t t = (uint8x16_t)st->v;

     if (be) {
         t = aes_accel_bswap(t);
         t = aes_accel_aesimc(t);
         t = aes_accel_bswap(t);
     } else {
         t = aes_accel_aesimc(t);
     }
     ret->v = (AESStateVec)t;
 }

 static inline void ATTR_AES_ACCEL
 aesdec_ISB_ISR_AK_accel(AESState *ret, const AESState *st,
                         const AESState *rk, bool be)
 {
     uint8x16_t t = (uint8x16_t)st->v;
     uint8x16_t z = { };

     if (be) {
         t = aes_accel_bswap(t);
         t = aes_accel_aesd(t, z);
         t = aes_accel_bswap(t);
     } else {
         t = aes_accel_aesd(t, z);
     }
     ret->v = (AESStateVec)t ^ rk->v;
 }

 static inline void ATTR_AES_ACCEL
 aesdec_ISB_ISR_AK_IMC_accel(AESState *ret, const AESState *st,
                             const AESState *rk, bool be)
 {
     uint8x16_t t = (uint8x16_t)st->v;
     uint8x16_t k = (uint8x16_t)rk->v;
     uint8x16_t z = { };

     if (be) {
         t = aes_accel_bswap(t);
         k = aes_accel_bswap(k);
         t = aes_accel_aesd(t, z);
         t ^= k;
         t = aes_accel_aesimc(t);
         t = aes_accel_bswap(t);
     } else {
         t = aes_accel_aesd(t, z);
         t ^= k;
         t = aes_accel_aesimc(t);
     }
     ret->v = (AESStateVec)t;
 }

 static inline void ATTR_AES_ACCEL
 aesdec_ISB_ISR_IMC_AK_accel(AESState *ret, const AESState *st,
                             const AESState *rk, bool be)
 {
     uint8x16_t t = (uint8x16_t)st->v;
     uint8x16_t z = { };

     if (be) {
         t = aes_accel_bswap(t);
         t = aes_accel_aesd_imc(t, z);
         t = aes_accel_bswap(t);
     } else {
         t = aes_accel_aesd_imc(t, z);
     }
     ret->v = (AESStateVec)t ^ rk->v;
 }

 #endif /* AARCH64_HOST_CRYPTO_AES_ROUND_H */
	/*
	* AArch64 specific aes acceleration.
	* SPDX-License-Identifier: GPL-2.0-or-later
	*/

	#ifndef AARCH64_HOST_CRYPTO_AES_ROUND_H
	#define AARCH64_HOST_CRYPTO_AES_ROUND_H

	#include "host/cpuinfo.h"
	#include <arm_neon.h>

	#ifdef __ARM_FEATURE_AES
	# define HAVE_AES_ACCEL true
	#else
	# define HAVE_AES_ACCEL likely(cpuinfo & CPUINFO_AES)
	#endif
	#if !defined(__ARM_FEATURE_AES) && defined(CONFIG_ARM_AES_BUILTIN)
	# define ATTR_AES_ACCEL __attribute__((target("+crypto")))
	#else
	# define ATTR_AES_ACCEL
	#endif

	static inline uint8x16_t aes_accel_bswap(uint8x16_t x)
	{
	return vqtbl1q_u8(x, (uint8x16_t){ 15, 14, 13, 12, 11, 10, 9, 8,
	7, 6, 5, 4, 3, 2, 1, 0, });
	}

	#ifdef CONFIG_ARM_AES_BUILTIN
	# define aes_accel_aesd vaesdq_u8
	# define aes_accel_aese vaeseq_u8
	# define aes_accel_aesmc vaesmcq_u8
	# define aes_accel_aesimc vaesimcq_u8
	# define aes_accel_aesd_imc(S, K) vaesimcq_u8(vaesdq_u8(S, K))
	# define aes_accel_aese_mc(S, K) vaesmcq_u8(vaeseq_u8(S, K))
	#else
	static inline uint8x16_t aes_accel_aesd(uint8x16_t d, uint8x16_t k)
	{
	asm(".arch_extension aes\n\t"
	"aesd %0.16b, %1.16b" : "+w"(d) : "w"(k));
	return d;
	}

	static inline uint8x16_t aes_accel_aese(uint8x16_t d, uint8x16_t k)
	{
	asm(".arch_extension aes\n\t"
	"aese %0.16b, %1.16b" : "+w"(d) : "w"(k));
	return d;
	}

	static inline uint8x16_t aes_accel_aesmc(uint8x16_t d)
	{
	asm(".arch_extension aes\n\t"
	"aesmc %0.16b, %1.16b" : "=w"(d) : "w"(d));
	return d;
	}

	static inline uint8x16_t aes_accel_aesimc(uint8x16_t d)
	{
	asm(".arch_extension aes\n\t"
	"aesimc %0.16b, %1.16b" : "=w"(d) : "w"(d));
	return d;
	}

	/* Most CPUs fuse AESD+AESIMC in the execution pipeline. */
	static inline uint8x16_t aes_accel_aesd_imc(uint8x16_t d, uint8x16_t k)
	{
	asm(".arch_extension aes\n\t"
	"aesd %0.16b, %1.16b\n\t"
	"aesimc %0.16b, %0.16b" : "+w"(d) : "w"(k));
	return d;
	}

	/* Most CPUs fuse AESE+AESMC in the execution pipeline. */
	static inline uint8x16_t aes_accel_aese_mc(uint8x16_t d, uint8x16_t k)
	{
	asm(".arch_extension aes\n\t"
	"aese %0.16b, %1.16b\n\t"
	"aesmc %0.16b, %0.16b" : "+w"(d) : "w"(k));
	return d;
	}
	#endif /* CONFIG_ARM_AES_BUILTIN */

	static inline void ATTR_AES_ACCEL
	aesenc_MC_accel(AESState ret, const AESState st, bool be)
	{
	uint8x16_t t = (uint8x16_t)st->v;

	if (be) {
	t = aes_accel_bswap(t);
	t = aes_accel_aesmc(t);
	t = aes_accel_bswap(t);
	} else {
	t = aes_accel_aesmc(t);
	}
	ret->v = (AESStateVec)t;
	}

	static inline void ATTR_AES_ACCEL
	aesenc_SB_SR_AK_accel(AESState ret, const AESState st,
	const AESState *rk, bool be)
	{
	uint8x16_t t = (uint8x16_t)st->v;
	uint8x16_t z = { };

	if (be) {
	t = aes_accel_bswap(t);
	t = aes_accel_aese(t, z);
	t = aes_accel_bswap(t);
	} else {
	t = aes_accel_aese(t, z);
	}
	ret->v = (AESStateVec)t ^ rk->v;
	}

	static inline void ATTR_AES_ACCEL
	aesenc_SB_SR_MC_AK_accel(AESState ret, const AESState st,
	const AESState *rk, bool be)
	{
	uint8x16_t t = (uint8x16_t)st->v;
	uint8x16_t z = { };

	if (be) {
	t = aes_accel_bswap(t);
	t = aes_accel_aese_mc(t, z);
	t = aes_accel_bswap(t);
	} else {
	t = aes_accel_aese_mc(t, z);
	}
	ret->v = (AESStateVec)t ^ rk->v;
	}

	static inline void ATTR_AES_ACCEL
	aesdec_IMC_accel(AESState ret, const AESState st, bool be)
	{
	uint8x16_t t = (uint8x16_t)st->v;

	if (be) {
	t = aes_accel_bswap(t);
	t = aes_accel_aesimc(t);
	t = aes_accel_bswap(t);
	} else {
	t = aes_accel_aesimc(t);
	}
	ret->v = (AESStateVec)t;
	}

	static inline void ATTR_AES_ACCEL
	aesdec_ISB_ISR_AK_accel(AESState ret, const AESState st,
	const AESState *rk, bool be)
	{
	uint8x16_t t = (uint8x16_t)st->v;
	uint8x16_t z = { };

	if (be) {
	t = aes_accel_bswap(t);
	t = aes_accel_aesd(t, z);
	t = aes_accel_bswap(t);
	} else {
	t = aes_accel_aesd(t, z);
	}
	ret->v = (AESStateVec)t ^ rk->v;
	}

	static inline void ATTR_AES_ACCEL
	aesdec_ISB_ISR_AK_IMC_accel(AESState ret, const AESState st,
	const AESState *rk, bool be)
	{
	uint8x16_t t = (uint8x16_t)st->v;
	uint8x16_t k = (uint8x16_t)rk->v;
	uint8x16_t z = { };

	if (be) {
	t = aes_accel_bswap(t);
	k = aes_accel_bswap(k);
	t = aes_accel_aesd(t, z);
	t ^= k;
	t = aes_accel_aesimc(t);
	t = aes_accel_bswap(t);
	} else {
	t = aes_accel_aesd(t, z);
	t ^= k;
	t = aes_accel_aesimc(t);
	}
	ret->v = (AESStateVec)t;
	}

	static inline void ATTR_AES_ACCEL
	aesdec_ISB_ISR_IMC_AK_accel(AESState ret, const AESState st,
	const AESState *rk, bool be)
	{
	uint8x16_t t = (uint8x16_t)st->v;
	uint8x16_t z = { };

	if (be) {
	t = aes_accel_bswap(t);
	t = aes_accel_aesd_imc(t, z);
	t = aes_accel_bswap(t);
	} else {
	t = aes_accel_aesd_imc(t, z);
	}
	ret->v = (AESStateVec)t ^ rk->v;
	}

	#endif /* AARCH64_HOST_CRYPTO_AES_ROUND_H */