target-arm: add support for v8 SHA1 and SHA256 instructions
This adds support for the SHA1 and SHA256 instructions that are available
on some v8 implementations of Aarch32.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401386724-26529-2-git-send-email-peter.maydell@linaro.org
[PMM:
* rebase
* fix bad indent
* add a missing UNDEF check for Q!=1 in the 3-reg SHA1/SHA256 case
* use g_assert_not_reached()
* don't re-extract bit 6 for the 2-reg-misc encodings
* set the ELF HWCAP2 bits for the new features
]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 995f999..9bda262 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -468,6 +468,8 @@
uint32_t hwcaps = 0;
GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES);
+ GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1);
+ GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2);
GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32);
return hwcaps;
}
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 794dcb9..753f6cb 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -317,6 +317,8 @@
set_feature(env, ARM_FEATURE_ARM_DIV);
set_feature(env, ARM_FEATURE_LPAE);
set_feature(env, ARM_FEATURE_V8_AES);
+ set_feature(env, ARM_FEATURE_V8_SHA1);
+ set_feature(env, ARM_FEATURE_V8_SHA256);
}
if (arm_feature(env, ARM_FEATURE_V7)) {
set_feature(env, ARM_FEATURE_VAPA);
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 7d8332e..14d5f21 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -635,6 +635,8 @@
ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */
ARM_FEATURE_EL2, /* has EL2 Virtualization support */
ARM_FEATURE_EL3, /* has EL3 Secure monitor support */
+ ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */
+ ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */
};
static inline int arm_feature(CPUARMState *env, int feature)
diff --git a/target-arm/crypto_helper.c b/target-arm/crypto_helper.c
index d8898ed..3e4b5f7 100644
--- a/target-arm/crypto_helper.c
+++ b/target-arm/crypto_helper.c
@@ -1,7 +1,7 @@
/*
* crypto_helper.c - emulate v8 Crypto Extensions instructions
*
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -15,9 +15,9 @@
#include "exec/exec-all.h"
#include "exec/helper-proto.h"
-union AES_STATE {
+union CRYPTO_STATE {
uint8_t bytes[16];
- uint32_t cols[4];
+ uint32_t words[4];
uint64_t l[2];
};
@@ -99,11 +99,11 @@
/* ShiftRows permutation vector for decryption */
{ 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 },
};
- union AES_STATE rk = { .l = {
+ union CRYPTO_STATE rk = { .l = {
float64_val(env->vfp.regs[rm]),
float64_val(env->vfp.regs[rm + 1])
} };
- union AES_STATE st = { .l = {
+ union CRYPTO_STATE st = { .l = {
float64_val(env->vfp.regs[rd]),
float64_val(env->vfp.regs[rd + 1])
} };
@@ -260,7 +260,7 @@
0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
} };
- union AES_STATE st = { .l = {
+ union CRYPTO_STATE st = { .l = {
float64_val(env->vfp.regs[rm]),
float64_val(env->vfp.regs[rm + 1])
} };
@@ -269,7 +269,7 @@
assert(decrypt < 2);
for (i = 0; i < 16; i += 4) {
- st.cols[i >> 2] = cpu_to_le32(
+ st.words[i >> 2] = cpu_to_le32(
mc[decrypt][st.bytes[i]] ^
rol32(mc[decrypt][st.bytes[i + 1]], 8) ^
rol32(mc[decrypt][st.bytes[i + 2]], 16) ^
@@ -279,3 +279,246 @@
env->vfp.regs[rd] = make_float64(st.l[0]);
env->vfp.regs[rd + 1] = make_float64(st.l[1]);
}
+
+/*
+ * SHA-1 logical functions
+ */
+
+static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
+{
+ return (x & (y ^ z)) ^ z;
+}
+
+static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
+{
+ return x ^ y ^ z;
+}
+
+static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
+{
+ return (x & y) | ((x | y) & z);
+}
+
+void HELPER(crypto_sha1_3reg)(CPUARMState *env, uint32_t rd, uint32_t rn,
+ uint32_t rm, uint32_t op)
+{
+ union CRYPTO_STATE d = { .l = {
+ float64_val(env->vfp.regs[rd]),
+ float64_val(env->vfp.regs[rd + 1])
+ } };
+ union CRYPTO_STATE n = { .l = {
+ float64_val(env->vfp.regs[rn]),
+ float64_val(env->vfp.regs[rn + 1])
+ } };
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+
+ if (op == 3) { /* sha1su0 */
+ d.l[0] ^= d.l[1] ^ m.l[0];
+ d.l[1] ^= n.l[0] ^ m.l[1];
+ } else {
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ uint32_t t;
+
+ switch (op) {
+ case 0: /* sha1c */
+ t = cho(d.words[1], d.words[2], d.words[3]);
+ break;
+ case 1: /* sha1p */
+ t = par(d.words[1], d.words[2], d.words[3]);
+ break;
+ case 2: /* sha1m */
+ t = maj(d.words[1], d.words[2], d.words[3]);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ t += rol32(d.words[0], 5) + n.words[0] + m.words[i];
+
+ n.words[0] = d.words[3];
+ d.words[3] = d.words[2];
+ d.words[2] = ror32(d.words[1], 2);
+ d.words[1] = d.words[0];
+ d.words[0] = t;
+ }
+ }
+ env->vfp.regs[rd] = make_float64(d.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+void HELPER(crypto_sha1h)(CPUARMState *env, uint32_t rd, uint32_t rm)
+{
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+
+ m.words[0] = ror32(m.words[0], 2);
+ m.words[1] = m.words[2] = m.words[3] = 0;
+
+ env->vfp.regs[rd] = make_float64(m.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(m.l[1]);
+}
+
+void HELPER(crypto_sha1su1)(CPUARMState *env, uint32_t rd, uint32_t rm)
+{
+ union CRYPTO_STATE d = { .l = {
+ float64_val(env->vfp.regs[rd]),
+ float64_val(env->vfp.regs[rd + 1])
+ } };
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+
+ d.words[0] = rol32(d.words[0] ^ m.words[1], 1);
+ d.words[1] = rol32(d.words[1] ^ m.words[2], 1);
+ d.words[2] = rol32(d.words[2] ^ m.words[3], 1);
+ d.words[3] = rol32(d.words[3] ^ d.words[0], 1);
+
+ env->vfp.regs[rd] = make_float64(d.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+/*
+ * The SHA-256 logical functions, according to
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ */
+
+static uint32_t S0(uint32_t x)
+{
+ return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
+}
+
+static uint32_t S1(uint32_t x)
+{
+ return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
+}
+
+static uint32_t s0(uint32_t x)
+{
+ return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
+}
+
+static uint32_t s1(uint32_t x)
+{
+ return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
+}
+
+void HELPER(crypto_sha256h)(CPUARMState *env, uint32_t rd, uint32_t rn,
+ uint32_t rm)
+{
+ union CRYPTO_STATE d = { .l = {
+ float64_val(env->vfp.regs[rd]),
+ float64_val(env->vfp.regs[rd + 1])
+ } };
+ union CRYPTO_STATE n = { .l = {
+ float64_val(env->vfp.regs[rn]),
+ float64_val(env->vfp.regs[rn + 1])
+ } };
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ uint32_t t = cho(n.words[0], n.words[1], n.words[2]) + n.words[3]
+ + S1(n.words[0]) + m.words[i];
+
+ n.words[3] = n.words[2];
+ n.words[2] = n.words[1];
+ n.words[1] = n.words[0];
+ n.words[0] = d.words[3] + t;
+
+ t += maj(d.words[0], d.words[1], d.words[2]) + S0(d.words[0]);
+
+ d.words[3] = d.words[2];
+ d.words[2] = d.words[1];
+ d.words[1] = d.words[0];
+ d.words[0] = t;
+ }
+
+ env->vfp.regs[rd] = make_float64(d.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+void HELPER(crypto_sha256h2)(CPUARMState *env, uint32_t rd, uint32_t rn,
+ uint32_t rm)
+{
+ union CRYPTO_STATE d = { .l = {
+ float64_val(env->vfp.regs[rd]),
+ float64_val(env->vfp.regs[rd + 1])
+ } };
+ union CRYPTO_STATE n = { .l = {
+ float64_val(env->vfp.regs[rn]),
+ float64_val(env->vfp.regs[rn + 1])
+ } };
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ uint32_t t = cho(d.words[0], d.words[1], d.words[2]) + d.words[3]
+ + S1(d.words[0]) + m.words[i];
+
+ d.words[3] = d.words[2];
+ d.words[2] = d.words[1];
+ d.words[1] = d.words[0];
+ d.words[0] = n.words[3 - i] + t;
+ }
+
+ env->vfp.regs[rd] = make_float64(d.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+void HELPER(crypto_sha256su0)(CPUARMState *env, uint32_t rd, uint32_t rm)
+{
+ union CRYPTO_STATE d = { .l = {
+ float64_val(env->vfp.regs[rd]),
+ float64_val(env->vfp.regs[rd + 1])
+ } };
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+
+ d.words[0] += s0(d.words[1]);
+ d.words[1] += s0(d.words[2]);
+ d.words[2] += s0(d.words[3]);
+ d.words[3] += s0(m.words[0]);
+
+ env->vfp.regs[rd] = make_float64(d.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+void HELPER(crypto_sha256su1)(CPUARMState *env, uint32_t rd, uint32_t rn,
+ uint32_t rm)
+{
+ union CRYPTO_STATE d = { .l = {
+ float64_val(env->vfp.regs[rd]),
+ float64_val(env->vfp.regs[rd + 1])
+ } };
+ union CRYPTO_STATE n = { .l = {
+ float64_val(env->vfp.regs[rn]),
+ float64_val(env->vfp.regs[rn + 1])
+ } };
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+
+ d.words[0] += s1(m.words[2]) + n.words[1];
+ d.words[1] += s1(m.words[3]) + n.words[2];
+ d.words[2] += s1(d.words[0]) + n.words[3];
+ d.words[3] += s1(d.words[1]) + m.words[0];
+
+ env->vfp.regs[rd] = make_float64(d.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index b63fd0f..113b09d 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -512,6 +512,15 @@
DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32)
DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
+DEF_HELPER_5(crypto_sha1_3reg, void, env, i32, i32, i32, i32)
+DEF_HELPER_3(crypto_sha1h, void, env, i32, i32)
+DEF_HELPER_3(crypto_sha1su1, void, env, i32, i32)
+
+DEF_HELPER_4(crypto_sha256h, void, env, i32, i32, i32)
+DEF_HELPER_4(crypto_sha256h2, void, env, i32, i32, i32)
+DEF_HELPER_3(crypto_sha256su0, void, env, i32, i32)
+DEF_HELPER_4(crypto_sha256su1, void, env, i32, i32, i32)
+
DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_2(dc_zva, void, env, i64)
diff --git a/target-arm/translate.c b/target-arm/translate.c
index d499caa..38ef5b1 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4776,6 +4776,7 @@
#define NEON_3R_VPMIN 21
#define NEON_3R_VQDMULH_VQRDMULH 22
#define NEON_3R_VPADD 23
+#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
@@ -4809,6 +4810,7 @@
[NEON_3R_VPMIN] = 0x7,
[NEON_3R_VQDMULH_VQRDMULH] = 0x6,
[NEON_3R_VPADD] = 0x7,
+ [NEON_3R_SHA] = 0xf, /* size field encodes op type */
[NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
@@ -4842,6 +4844,7 @@
#define NEON_2RM_VCEQ0 18
#define NEON_2RM_VCLE0 19
#define NEON_2RM_VCLT0 20
+#define NEON_2RM_SHA1H 21
#define NEON_2RM_VABS 22
#define NEON_2RM_VNEG 23
#define NEON_2RM_VCGT0_F 24
@@ -4858,6 +4861,7 @@
#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
#define NEON_2RM_VSHLL 38
+#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
#define NEON_2RM_VRINTN 40
#define NEON_2RM_VRINTX 41
#define NEON_2RM_VRINTA 42
@@ -4918,6 +4922,7 @@
[NEON_2RM_VCEQ0] = 0x7,
[NEON_2RM_VCLE0] = 0x7,
[NEON_2RM_VCLT0] = 0x7,
+ [NEON_2RM_SHA1H] = 0x4,
[NEON_2RM_VABS] = 0x7,
[NEON_2RM_VNEG] = 0x7,
[NEON_2RM_VCGT0_F] = 0x4,
@@ -4934,6 +4939,7 @@
[NEON_2RM_VMOVN] = 0x7,
[NEON_2RM_VQMOVN] = 0x7,
[NEON_2RM_VSHLL] = 0x7,
+ [NEON_2RM_SHA1SU1] = 0x4,
[NEON_2RM_VRINTN] = 0x4,
[NEON_2RM_VRINTX] = 0x4,
[NEON_2RM_VRINTA] = 0x4,
@@ -5011,6 +5017,49 @@
if (q && ((rd | rn | rm) & 1)) {
return 1;
}
+ /*
+ * The SHA-1/SHA-256 3-register instructions require special treatment
+ * here, as their size field is overloaded as an op type selector, and
+ * they all consume their input in a single pass.
+ */
+ if (op == NEON_3R_SHA) {
+ if (!q) {
+ return 1;
+ }
+ if (!u) { /* SHA-1 */
+ if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rn);
+ tmp3 = tcg_const_i32(rm);
+ tmp4 = tcg_const_i32(size);
+ gen_helper_crypto_sha1_3reg(cpu_env, tmp, tmp2, tmp3, tmp4);
+ tcg_temp_free_i32(tmp4);
+ } else { /* SHA-256 */
+ if (!arm_feature(env, ARM_FEATURE_V8_SHA256) || size == 3) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rn);
+ tmp3 = tcg_const_i32(rm);
+ switch (size) {
+ case 0:
+ gen_helper_crypto_sha256h(cpu_env, tmp, tmp2, tmp3);
+ break;
+ case 1:
+ gen_helper_crypto_sha256h2(cpu_env, tmp, tmp2, tmp3);
+ break;
+ case 2:
+ gen_helper_crypto_sha256su1(cpu_env, tmp, tmp2, tmp3);
+ break;
+ }
+ }
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp3);
+ return 0;
+ }
if (size == 3 && op != NEON_3R_LOGIC) {
/* 64-bit element instructions. */
for (pass = 0; pass < (q ? 2 : 1); pass++) {
@@ -6486,6 +6535,41 @@
tcg_temp_free_i32(tmp2);
tcg_temp_free_i32(tmp3);
break;
+ case NEON_2RM_SHA1H:
+ if (!arm_feature(env, ARM_FEATURE_V8_SHA1)
+ || ((rm | rd) & 1)) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rm);
+
+ gen_helper_crypto_sha1h(cpu_env, tmp, tmp2);
+
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ break;
+ case NEON_2RM_SHA1SU1:
+ if ((rm | rd) & 1) {
+ return 1;
+ }
+ /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
+ if (q) {
+ if (!arm_feature(env, ARM_FEATURE_V8_SHA256)) {
+ return 1;
+ }
+ } else if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rm);
+ if (q) {
+ gen_helper_crypto_sha256su0(cpu_env, tmp, tmp2);
+ } else {
+ gen_helper_crypto_sha1su1(cpu_env, tmp, tmp2);
+ }
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ break;
default:
elementwise:
for (pass = 0; pass < (q ? 4 : 2); pass++) {