From 9d66d876f4b7b9f7de46066f3df9296445693700 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Wed, 25 Nov 2020 14:28:41 -0500 Subject: [PATCH] Likely fix for some alignment issues on ARM. --- node/AES.cpp | 22 ++++++---------------- node/AES_armcrypto.cpp | 17 ++++++++--------- node/Constants.hpp | 7 ++++++- 3 files changed, 20 insertions(+), 26 deletions(-) diff --git a/node/AES.cpp b/node/AES.cpp index 8402fc908..8f2f30d20 100644 --- a/node/AES.cpp +++ b/node/AES.cpp @@ -149,22 +149,12 @@ void AES::GMAC::update(const void *const data, unsigned int len) noexcept } } - if (likely(((uintptr_t)in & 7U) == 0U)) { - while (len >= 16) { - y0 ^= *reinterpret_cast(in); - y1 ^= *reinterpret_cast(in + 8); - in += 16; - s_gfmul(h0, h1, y0, y1); - len -= 16; - } - } else { - while (len >= 16) { - y0 ^= Utils::loadMachineEndian< uint64_t >(in); - y1 ^= Utils::loadMachineEndian< uint64_t >(in + 8); - in += 16; - s_gfmul(h0, h1, y0, y1); - len -= 16; - } + while (len >= 16) { + y0 ^= Utils::loadMachineEndian< uint64_t >(in); + y1 ^= Utils::loadMachineEndian< uint64_t >(in + 8); + in += 16; + s_gfmul(h0, h1, y0, y1); + len -= 16; } _y[0] = y0; diff --git a/node/AES_armcrypto.cpp b/node/AES_armcrypto.cpp index 30a7ec35a..f01304b3b 100644 --- a/node/AES_armcrypto.cpp +++ b/node/AES_armcrypto.cpp @@ -131,7 +131,7 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe uint8x16_t k14 = _aes.p_k.neon.ek[14]; unsigned int totalLen = _len; - if ((totalLen & 15U)) { + if ((totalLen & 15U) != 0) { for (;;) { if (unlikely(!len)) { vst1q_u8(reinterpret_cast(_ctr), vrev32q_u8(dd)); @@ -140,7 +140,7 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe } --len; out[totalLen++] = *(in++); - if (!(totalLen & 15U)) { + if ((totalLen & 15U) == 0) { uint8_t *const otmp = out + (totalLen - 16); uint8x16_t d0 = vrev32q_u8(dd); uint8x16_t pt = vld1q_u8(otmp); @@ -180,7 +180,10 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe uint8x16_t d2 = vrev32q_u8(dd2); uint8x16_t d3 = vrev32q_u8(dd3); uint8x16_t pt0 = vld1q_u8(in); - in += 16; + uint8x16_t pt1 = vld1q_u8(in + 16); + uint8x16_t pt2 = vld1q_u8(in + 16); + uint8x16_t pt3 = vld1q_u8(in + 16); + d0 = vaesmcq_u8(vaeseq_u8(d0, k0)); d1 = vaesmcq_u8(vaeseq_u8(d1, k0)); d2 = vaesmcq_u8(vaeseq_u8(d2, k0)); @@ -193,8 +196,6 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe d1 = vaesmcq_u8(vaeseq_u8(d1, k2)); d2 = vaesmcq_u8(vaeseq_u8(d2, k2)); d3 = vaesmcq_u8(vaeseq_u8(d3, k2)); - uint8x16_t pt1 = vld1q_u8(in); - in += 16; d0 = vaesmcq_u8(vaeseq_u8(d0, k3)); d1 = vaesmcq_u8(vaeseq_u8(d1, k3)); d2 = vaesmcq_u8(vaeseq_u8(d2, k3)); @@ -207,8 +208,6 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe d1 = vaesmcq_u8(vaeseq_u8(d1, k5)); d2 = vaesmcq_u8(vaeseq_u8(d2, k5)); d3 = vaesmcq_u8(vaeseq_u8(d3, k5)); - uint8x16_t pt2 = vld1q_u8(in); - in += 16; d0 = vaesmcq_u8(vaeseq_u8(d0, k6)); d1 = vaesmcq_u8(vaeseq_u8(d1, k6)); d2 = vaesmcq_u8(vaeseq_u8(d2, k6)); @@ -221,8 +220,6 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe d1 = vaesmcq_u8(vaeseq_u8(d1, k8)); d2 = vaesmcq_u8(vaeseq_u8(d2, k8)); d3 = vaesmcq_u8(vaeseq_u8(d3, k8)); - uint8x16_t pt3 = vld1q_u8(in); - in += 16; d0 = vaesmcq_u8(vaeseq_u8(d0, k9)); d1 = vaesmcq_u8(vaeseq_u8(d1, k9)); d2 = vaesmcq_u8(vaeseq_u8(d2, k9)); @@ -253,7 +250,9 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe vst1q_u8(out + 16, d1); vst1q_u8(out + 32, d2); vst1q_u8(out + 48, d3); + out += 64; + in += 64; dd = (uint8x16_t)vaddq_u32((uint32x4_t)dd, four); if (unlikely(len < 64)) diff --git a/node/Constants.hpp b/node/Constants.hpp index f9775dd63..400976c13 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -126,7 +126,12 @@ // Define ZT_NO_TYPE_PUNNING to disable reckless casts on anything other than x86/x64. #if (!(defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || defined(i386) || defined(__i386) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_IX86) || defined(__X86__) || defined(_X86_) || defined(__I86__) || defined(__INTEL__) || defined(__386))) #ifndef ZT_NO_TYPE_PUNNING -#define ZT_NO_TYPE_PUNNING +#define ZT_NO_TYPE_PUNNING 1 +#endif +#endif +#ifdef ZT_NO_TYPE_PUNNING +#ifndef ZT_NO_UNALIGNED_ACCESS +#define ZT_NO_UNALIGNED_ACCESS 1 #endif #endif