Enable use of NaCl for faster X64 Salsa20 implementations. Also include binary for OSX for easy build. Blazingly fast.

This commit is contained in:
Adam Ierymenko 2017-04-17 16:43:03 -07:00
commit df48738ac9
32 changed files with 801 additions and 47 deletions

View file

@ -45,7 +45,7 @@ static inline void _computeMemoryHardHash(const void *publicKey,unsigned int pub
// ordinary Salsa20 is randomly seekable. This is good for a cipher
// but is not what we want for sequential memory-harndess.
memset(genmem,0,ZT_IDENTITY_GEN_MEMORY);
Salsa20 s20(digest,256,(char *)digest + 32);
Salsa20 s20(digest,(char *)digest + 32);
s20.crypt20((char *)genmem,(char *)genmem,64);
for(unsigned long i=64;i<ZT_IDENTITY_GEN_MEMORY;i+=64) {
unsigned long k = i - 64;

View file

@ -66,9 +66,9 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,uint6
memset(_lastIdentityVerification,0,sizeof(_lastIdentityVerification));
// Use Salsa20 alone as a high-quality non-crypto PRNG
char foo[32];
Utils::getSecureRandom(foo,32);
_prng.init(foo,256,foo);
char foo[64];
Utils::getSecureRandom(foo,64);
_prng.init(foo,foo + 32);
memset(_prngStream,0,sizeof(_prngStream));
_prng.crypt12(_prngStream,_prngStream,sizeof(_prngStream));

View file

@ -1074,7 +1074,7 @@ void Packet::armor(const void *key,bool encryptPayload,unsigned int counter)
setCipher(encryptPayload ? ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012 : ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE);
_salsa20MangleKey((const unsigned char *)key,mangledKey);
Salsa20 s20(mangledKey,256,data + ZT_PACKET_IDX_IV);
Salsa20 s20(mangledKey,data + ZT_PACKET_IDX_IV);
// MAC key is always the first 32 bytes of the Salsa20 key stream
// This is the same construction DJB's NaCl library uses
@ -1098,7 +1098,7 @@ bool Packet::dearmor(const void *key)
if ((cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE)||(cs == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_SALSA2012)) {
_salsa20MangleKey((const unsigned char *)key,mangledKey);
Salsa20 s20(mangledKey,256,data + ZT_PACKET_IDX_IV);
Salsa20 s20(mangledKey,data + ZT_PACKET_IDX_IV);
s20.crypt12(ZERO_KEY,macKey,sizeof(macKey));
Poly1305::compute(mac,payload,payloadLen,macKey);
@ -1120,7 +1120,7 @@ void Packet::cryptField(const void *key,unsigned int start,unsigned int len)
uint8_t iv[8];
for(int i=0;i<8;++i) iv[i] = data[i];
iv[7] &= 0xf8; // mask off least significant 3 bits of packet ID / IV since this is unset when this function gets called
Salsa20 s20(key,256,iv);
Salsa20 s20(key,iv);
s20.crypt12(data + start,data + start,len);
}

View file

@ -10,6 +10,8 @@
#include "Constants.hpp"
#include "Salsa20.hpp"
#ifndef ZT_USE_LIBSODIUM
#define ROTATE(v,c) (((v) << (c)) | ((v) >> (32 - (c))))
#define XOR(v,w) ((v) ^ (w))
#define PLUS(v,w) ((uint32_t)((v) + (w)))
@ -66,8 +68,7 @@ static const _s20sseconsts _S20SSECONSTANTS;
namespace ZeroTier {
void Salsa20::init(const void *key,unsigned int kbits,const void *iv)
throw()
void Salsa20::init(const void *key,const void *iv)
{
#ifdef ZT_SALSA20_SSE
const uint32_t *k = (const uint32_t *)key;
@ -78,14 +79,9 @@ void Salsa20::init(const void *key,unsigned int kbits,const void *iv)
_state.i[10] = k[1];
_state.i[7] = k[2];
_state.i[4] = k[3];
if (kbits == 256) {
k += 4;
_state.i[1] = 0x3320646e;
_state.i[2] = 0x79622d32;
} else {
_state.i[1] = 0x3120646e;
_state.i[2] = 0x79622d36;
}
k += 4;
_state.i[1] = 0x3320646e;
_state.i[2] = 0x79622d32;
_state.i[15] = k[0];
_state.i[12] = k[1];
_state.i[9] = k[2];
@ -95,19 +91,14 @@ void Salsa20::init(const void *key,unsigned int kbits,const void *iv)
_state.i[5] = 0;
_state.i[8] = 0;
#else
const char *constants;
const char *const constants = "expand 32-byte k";
const uint8_t *k = (const uint8_t *)key;
_state.i[1] = U8TO32_LITTLE(k + 0);
_state.i[2] = U8TO32_LITTLE(k + 4);
_state.i[3] = U8TO32_LITTLE(k + 8);
_state.i[4] = U8TO32_LITTLE(k + 12);
if (kbits == 256) { /* recommended */
k += 16;
constants = "expand 32-byte k";
} else { /* kbits == 128 */
constants = "expand 16-byte k";
}
k += 16;
_state.i[5] = U8TO32_LITTLE(constants + 4);
_state.i[6] = U8TO32_LITTLE(((const uint8_t *)iv) + 0);
_state.i[7] = U8TO32_LITTLE(((const uint8_t *)iv) + 4);
@ -124,7 +115,6 @@ void Salsa20::init(const void *key,unsigned int kbits,const void *iv)
}
void Salsa20::crypt12(const void *in,void *out,unsigned int bytes)
throw()
{
uint8_t tmp[64];
const uint8_t *m = (const uint8_t *)in;
@ -624,7 +614,6 @@ void Salsa20::crypt12(const void *in,void *out,unsigned int bytes)
}
void Salsa20::crypt20(const void *in,void *out,unsigned int bytes)
throw()
{
uint8_t tmp[64];
const uint8_t *m = (const uint8_t *)in;
@ -1356,3 +1345,5 @@ void Salsa20::crypt20(const void *in,void *out,unsigned int bytes)
}
} // namespace ZeroTier
#endif // !ZT_USE_LIBSODIUM

View file

@ -10,10 +10,82 @@
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "Constants.hpp"
#include "Utils.hpp"
#ifdef ZT_USE_LIBSODIUM
#include <sodium/crypto_stream_salsa20.h>
#include <sodium/crypto_stream_salsa2012.h>
namespace ZeroTier {
/**
* Salsa20 stream cipher
*/
class Salsa20
{
public:
Salsa20() {}
~Salsa20() { Utils::burn(_k,sizeof(_k)); }
/**
* @param key 256-bit (32 byte) key
* @param iv 64-bit initialization vector
*/
Salsa20(const void *key,const void *iv)
{
memcpy(_k,key,32);
memcpy(&_iv,iv,8);
}
/**
* Initialize cipher
*
* @param key Key bits
* @param iv 64-bit initialization vector
*/
inline void init(const void *key,const void *iv)
{
memcpy(_k,key,32);
memcpy(&_iv,iv,8);
}
/**
* Encrypt/decrypt data using Salsa20/12
*
* @param in Input data
* @param out Output buffer
* @param bytes Length of data
*/
inline void crypt12(const void *in,void *out,unsigned int bytes)
{
crypto_stream_salsa2012_xor(reinterpret_cast<unsigned char *>(out),reinterpret_cast<const unsigned char *>(in),bytes,reinterpret_cast<const unsigned char *>(&_iv),reinterpret_cast<const unsigned char *>(_k));
}
/**
* Encrypt/decrypt data using Salsa20/20
*
* @param in Input data
* @param out Output buffer
* @param bytes Length of data
*/
inline void crypt20(const void *in,void *out,unsigned int bytes)
{
crypto_stream_salsa20_xor(reinterpret_cast<unsigned char *>(out),reinterpret_cast<const unsigned char *>(in),bytes,reinterpret_cast<const unsigned char *>(&_iv),reinterpret_cast<const unsigned char *>(_k));
}
private:
uint64_t _k[4];
uint64_t _iv;
};
} // namespace ZeroTier
#else // !ZT_USE_LIBSODIUM
#if (!defined(ZT_SALSA20_SSE)) && (defined(__SSE2__) || defined(__WINDOWS__))
#define ZT_SALSA20_SSE 1
#endif
@ -30,30 +102,25 @@ namespace ZeroTier {
class Salsa20
{
public:
Salsa20() throw() {}
Salsa20() {}
~Salsa20() { Utils::burn(&_state,sizeof(_state)); }
/**
* @param key Key bits
* @param kbits Number of key bits: 128 or 256 (recommended)
* @param key 256-bit (32 byte) key
* @param iv 64-bit initialization vector
*/
Salsa20(const void *key,unsigned int kbits,const void *iv)
throw()
Salsa20(const void *key,const void *iv)
{
init(key,kbits,iv);
init(key,iv);
}
/**
* Initialize cipher
*
* @param key Key bits
* @param kbits Number of key bits: 128 or 256 (recommended)
* @param iv 64-bit initialization vector
*/
void init(const void *key,unsigned int kbits,const void *iv)
throw();
void init(const void *key,const void *iv);
/**
* Encrypt/decrypt data using Salsa20/12
@ -62,8 +129,7 @@ public:
* @param out Output buffer
* @param bytes Length of data
*/
void crypt12(const void *in,void *out,unsigned int bytes)
throw();
void crypt12(const void *in,void *out,unsigned int bytes);
/**
* Encrypt/decrypt data using Salsa20/20
@ -72,8 +138,7 @@ public:
* @param out Output buffer
* @param bytes Length of data
*/
void crypt20(const void *in,void *out,unsigned int bytes)
throw();
void crypt20(const void *in,void *out,unsigned int bytes);
private:
union {
@ -86,4 +151,6 @@ private:
} // namespace ZeroTier
#endif // ZT_USE_LIBSODIUM
#endif

View file

@ -156,7 +156,7 @@ void Utils::getSecureRandom(void *buf,unsigned int bytes)
s20Key[1] = (uint64_t)buf; // address of buf
s20Key[2] = (uint64_t)s20Key; // address of s20Key[]
s20Key[3] = (uint64_t)&s20; // address of s20
s20.init(s20Key,256,s20Key);
s20.init(s20Key,s20Key);
}
#ifdef __WINDOWS__