diff --git a/node/AES.cpp b/node/AES.cpp index d606524c9..b4fa93980 100644 --- a/node/AES.cpp +++ b/node/AES.cpp @@ -507,8 +507,7 @@ void AES::CTR::crypt(const void *const input,unsigned int len) noexcept --len; out[totalLen++] = *(in++); if (!(totalLen & 15U)) { - __m128i d0 = _mm_set_epi64x((long long)Utils::hton(c1),(long long)c0); - if (unlikely(++c1 == 0ULL)) c0 = Utils::hton(Utils::ntoh(c0) + 1ULL); + __m128i d0 = _mm_set_epi64x((long long)Utils::hton(c1++),(long long)c0); d0 = _mm_xor_si128(d0,k0); d0 = _mm_aesenc_si128(d0,k1); d0 = _mm_aesenc_si128(d0,k2); @@ -656,7 +655,6 @@ void AES::CTR::crypt(const void *const input,unsigned int len) noexcept d0 = _mm_aesenc_si128(d0,k13); d0 = _mm_aesenclast_si128(d0,k14); _mm_storeu_si128(reinterpret_cast<__m128i *>(out),_mm_xor_si128(d0,_mm_loadu_si128(reinterpret_cast(in)))); - in += 16; len -= 16; out += 16; @@ -678,6 +676,7 @@ void AES::CTR::crypt(const void *const input,unsigned int len) noexcept #endif uint64_t keyStream[2]; + uint32_t ctr = Utils::ntoh(reinterpret_cast(_ctr)[3]); unsigned int totalLen = _len; if ((totalLen & 15U)) { @@ -690,10 +689,10 @@ void AES::CTR::crypt(const void *const input,unsigned int len) noexcept out[totalLen++] = *(in++); if (!(totalLen & 15U)) { _aes._encryptSW(reinterpret_cast(_ctr),reinterpret_cast(keyStream)); + reinterpret_cast(_ctr)[3] = Utils::hton(++ctr); uint8_t *outblk = out + (totalLen - 16); for(int i=0;i<16;++i) outblk[i] ^= reinterpret_cast(keyStream)[i]; - _ctr[1] = Utils::hton(Utils::ntoh(_ctr[1]) + 1ULL); break; } } @@ -707,23 +706,23 @@ void AES::CTR::crypt(const void *const input,unsigned int len) noexcept #endif while (len >= 16) { _aes._encryptSW(reinterpret_cast(_ctr),reinterpret_cast(keyStream)); + reinterpret_cast(_ctr)[3] = Utils::hton(++ctr); reinterpret_cast(out)[0] = reinterpret_cast(in)[0] ^ keyStream[0]; reinterpret_cast(out)[1] = reinterpret_cast(in)[1] ^ keyStream[1]; out += 16; len -= 16; in += 16; - _ctr[1] = Utils::hton(Utils::ntoh(_ctr[1]) + 1ULL); } #ifdef ZT_NO_UNALIGNED_ACCESS } else { while (len >= 16) { _aes._encryptSW(reinterpret_cast(_ctr),reinterpret_cast(keyStream)); + reinterpret_cast(_ctr)[3] = Utils::hton(++ctr); for (int i = 0;i < 16;++i) out[i] = in[i] ^ reinterpret_cast(keyStream)[i]; out += 16; len -= 16; in += 16; - _ctr[1] = Utils::hton(Utils::ntoh(_ctr[1]) + 1ULL); } } #endif diff --git a/node/AES.hpp b/node/AES.hpp index 8f83ced5d..6194c6b24 100644 --- a/node/AES.hpp +++ b/node/AES.hpp @@ -57,16 +57,27 @@ public: /** * Create an un-initialized AES instance (must call init() before use) */ - ZT_INLINE AES() noexcept {} + ZT_INLINE AES() noexcept + { + Utils::memoryLock(this,sizeof(AES)); + } /** * Create an AES instance with the given key * * @param key 256-bit key */ - explicit ZT_INLINE AES(const void *const key) noexcept { this->init(key); } + explicit ZT_INLINE AES(const void *const key) noexcept : + AES() + { + this->init(key); + } - ZT_INLINE ~AES() { Utils::burn(&_k,sizeof(_k)); } + ZT_INLINE ~AES() + { + Utils::burn(&_k,sizeof(_k)); + Utils::memoryUnlock(this,sizeof(AES)); + } /** * Set (or re-set) this AES256 cipher's key @@ -191,6 +202,9 @@ public: /** * Streaming AES-CTR encrypt/decrypt + * + * NOTE: this doesn't support overflow of the counter in the least significant 32 bits. + * AES-GMAC-CTR doesn't need this, so we don't support it as an optimization. */ class CTR { @@ -202,7 +216,7 @@ public: /** * Initialize this CTR instance to encrypt a new stream * - * @param iv Unique initialization vector + * @param iv Unique initialization vector and initial 32-bit counter (least significant 32 bits, big-endian) * @param output Buffer to which to store output (MUST be large enough for total bytes processed!) */ ZT_INLINE void init(const uint8_t iv[16],void *const output) noexcept @@ -312,15 +326,16 @@ public: _iv[1] = gmacTag[0]; _ctr._aes.encrypt(_iv,_iv); - // Bit 31 of the CTR IV is masked to (1) allow us to optimize by forgetting - // about integer overflow for less than 2^31 bytes (which is far less than - // this system's max message size), and (2) ensure interoperability with any - // future FIPS-compliant or other cryptographic libraries that may or may not - // handle 32-bit integer overflow of the least significant 32 bits in the - // counter in the expected way. + // For CTR we need to mask the least significant 32 bits to zero for a typical + // 96-bit nonce, since this guarantees compatiblity with the most CTR implementations + // and avoids having to do add with carry. uint64_t ctrIv[2]; ctrIv[0] = _iv[0]; - ctrIv[1] = _iv[1] & ZT_CONST_TO_BE_UINT64(0xffffffff7fffffffULL); +#if __BYTE_ORDER == __BIG_ENDIAN + ctrIv[1] = _iv[1] & 0xffffffff00000000ULL; +#else + ctrIv[1] = _iv[1] & 0x00000000ffffffffULL; +#endif _ctr.init(reinterpret_cast(ctrIv),_output); } diff --git a/node/Identity.hpp b/node/Identity.hpp index 2ed39edf9..41a1e3a51 100644 --- a/node/Identity.hpp +++ b/node/Identity.hpp @@ -63,8 +63,11 @@ public: */ static const Identity NIL; - ZT_INLINE Identity() noexcept { memoryZero(this); } - ZT_INLINE ~Identity() { Utils::burn(reinterpret_cast(&this->_priv),sizeof(this->_priv)); } + ZT_INLINE Identity() noexcept + { + Utils::memoryLock(this,sizeof(Identity)); + memoryZero(this); + } /** * Construct identity from string @@ -74,7 +77,17 @@ public: * * @param str Identity in canonical string format */ - explicit ZT_INLINE Identity(const char *str) { fromString(str); } + explicit ZT_INLINE Identity(const char *str) + { + Utils::memoryLock(this,sizeof(Identity)); + fromString(str); + } + + ZT_INLINE ~Identity() + { + Utils::memoryUnlock(this,sizeof(Identity)); + Utils::burn(reinterpret_cast(&this->_priv),sizeof(this->_priv)); + } /** * Set identity to NIL value (all zero) diff --git a/node/Peer.cpp b/node/Peer.cpp index 8bcc72939..ece5bdd01 100644 --- a/node/Peer.cpp +++ b/node/Peer.cpp @@ -52,6 +52,7 @@ Peer::Peer(const RuntimeEnvironment *renv) : _vMinor(0), _vRevision(0) { + Utils::memoryLock(_key,sizeof(_key)); } bool Peer::init(const Identity &peerIdentity) diff --git a/node/Peer.hpp b/node/Peer.hpp index 95cd09ca2..2e4a2e583 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -47,9 +47,6 @@ class Peer friend class SharedPtr; friend class Topology; -private: - ZT_INLINE Peer() {} - public: /** * Create an uninitialized peer @@ -61,7 +58,11 @@ public: */ explicit Peer(const RuntimeEnvironment *renv); - ZT_INLINE ~Peer() { Utils::burn(_key,sizeof(_key)); } + ZT_INLINE ~Peer() + { + Utils::memoryUnlock(_key,sizeof(_key)); + Utils::burn(_key,sizeof(_key)); + } /** * Initialize peer with an identity diff --git a/node/Utils.hpp b/node/Utils.hpp index ced6cde6c..6b6217a71 100644 --- a/node/Utils.hpp +++ b/node/Utils.hpp @@ -20,6 +20,10 @@ namespace ZeroTier { namespace Utils { +#ifndef __WINDOWS__ +#include +#endif + // Macros to convert endian-ness at compile time for constants. #if __BYTE_ORDER == __LITTLE_ENDIAN #define ZT_CONST_TO_BE_UINT16(x) ((uint16_t)((uint16_t)((uint16_t)(x) << 8U) | (uint16_t)((uint16_t)(x) >> 8U))) @@ -58,6 +62,36 @@ extern const uint64_t ZERO256[4]; */ extern const char HEXCHARS[16]; +/** + * Lock memory to prevent swapping out to secondary storage (if possible) + * + * This is used to attempt to prevent the swapping out of long-term stored secure + * credentials like secret keys. It isn't supported on all platforms and may not + * be absolutely guaranteed to work, but it's a countermeasure. + * + * @param p Memory to lock + * @param l Size of memory + */ +static ZT_INLINE void memoryLock(const void *const p,const unsigned int l) noexcept +{ +#ifndef __WINDOWS__ + mlock(p,l); +#endif +} + +/** + * Unlock memory locked with memoryLock() + * + * @param p Memory to unlock + * @param l Size of memory + */ +static ZT_INLINE void memoryUnlock(const void *const p,const unsigned int l) noexcept +{ +#ifndef __WINDOWS__ + munlock(p,l); +#endif +} + /** * Perform a time-invariant binary comparison *