make style

This commit is contained in:
Philippe Teuwen 2019-03-10 00:00:59 +01:00
commit 0373696662
483 changed files with 56514 additions and 52451 deletions

View file

@ -80,9 +80,9 @@ THE SOFTWARE.
#define VECTOR_SIZE (MAX_BITSLICES/8)
typedef uint32_t __attribute__((aligned(VECTOR_SIZE))) __attribute__((vector_size(VECTOR_SIZE))) bitslice_value_t;
typedef union {
bitslice_value_t value;
uint64_t bytes64[MAX_BITSLICES/64];
uint8_t bytes[MAX_BITSLICES/8];
bitslice_value_t value;
uint64_t bytes64[MAX_BITSLICES / 64];
uint8_t bytes[MAX_BITSLICES / 8];
} bitslice_t;
// filter function (f20)
@ -123,7 +123,7 @@ typedef union {
#endif
// typedefs and declaration of functions:
typedef const uint64_t crack_states_bitsliced_t(uint32_t, uint8_t*, statelist_t*, uint32_t*, uint64_t*, uint32_t, uint8_t*, noncelist_t*);
typedef const uint64_t crack_states_bitsliced_t(uint32_t, uint8_t *, statelist_t *, uint32_t *, uint64_t *, uint32_t, uint8_t *, noncelist_t *);
crack_states_bitsliced_t crack_states_bitsliced_AVX512;
crack_states_bitsliced_t crack_states_bitsliced_AVX2;
crack_states_bitsliced_t crack_states_bitsliced_AVX;
@ -132,7 +132,7 @@ crack_states_bitsliced_t crack_states_bitsliced_MMX;
crack_states_bitsliced_t crack_states_bitsliced_NOSIMD;
crack_states_bitsliced_t crack_states_bitsliced_dispatch;
typedef void bitslice_test_nonces_t(uint32_t, uint32_t*, uint8_t*);
typedef void bitslice_test_nonces_t(uint32_t, uint32_t *, uint8_t *);
bitslice_test_nonces_t bitslice_test_nonces_AVX512;
bitslice_test_nonces_t bitslice_test_nonces_AVX2;
bitslice_test_nonces_t bitslice_test_nonces_AVX;
@ -145,12 +145,13 @@ bitslice_test_nonces_t bitslice_test_nonces_dispatch;
#define malloc_bitslice(x) __builtin_assume_aligned(_aligned_malloc((x), MAX_BITSLICES/8), MAX_BITSLICES/8)
#define free_bitslice(x) _aligned_free(x)
#elif defined (__APPLE__)
static void *malloc_bitslice(size_t x) {
static void *malloc_bitslice(size_t x)
{
char *allocated_memory;
if (posix_memalign((void**)&allocated_memory, MAX_BITSLICES/8, x)) {
if (posix_memalign((void **)&allocated_memory, MAX_BITSLICES / 8, x)) {
return NULL;
} else {
return __builtin_assume_aligned(allocated_memory, MAX_BITSLICES/8);
return __builtin_assume_aligned(allocated_memory, MAX_BITSLICES / 8);
}
}
#define free_bitslice(x) free(x)
@ -173,7 +174,8 @@ static bitslice_t bs_ones;
static bitslice_t bs_zeroes;
void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par)
{
// initialize 1 and 0 vectors
memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
@ -181,9 +183,9 @@ void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce
// bitslice nonces' 2nd to 4th byte
for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
for(uint32_t bit_idx = 0; bit_idx < KEYSTREAM_SIZE; bit_idx++){
bool bit = get_bit(KEYSTREAM_SIZE-1-bit_idx, BSWAP_32(bf_test_nonce[i] << 8));
if(bit){
for (uint32_t bit_idx = 0; bit_idx < KEYSTREAM_SIZE; bit_idx++) {
bool bit = get_bit(KEYSTREAM_SIZE - 1 - bit_idx, BSWAP_32(bf_test_nonce[i] << 8));
if (bit) {
bitsliced_encrypted_nonces[i][bit_idx].value = bs_ones.value;
} else {
bitsliced_encrypted_nonces[i][bit_idx].value = bs_zeroes.value;
@ -192,9 +194,9 @@ void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce
}
// bitslice nonces' parity (4 bits)
for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
for(uint32_t bit_idx = 0; bit_idx < 4; bit_idx++){
bool bit = get_bit(4-1-bit_idx, bf_test_nonce_par[i]);
if(bit){
for (uint32_t bit_idx = 0; bit_idx < 4; bit_idx++) {
bool bit = get_bit(4 - 1 - bit_idx, bf_test_nonce_par[i]);
if (bit) {
bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_ones.value;
} else {
bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_zeroes.value;
@ -205,27 +207,28 @@ void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce
}
const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces){
const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces)
{
// Unlike aczid's implementation this doesn't roll back at all when performing bitsliced bruteforce.
// We know that the best first byte is already shifted in. Testing with the remaining three bytes of
// the nonces is sufficient to eliminate most of them. The small rest is tested with a simple unsliced
// brute forcing (including roll back).
bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE];
bitslice_t * restrict state_p;
bitslice_t states[KEYSTREAM_SIZE + STATE_SIZE];
bitslice_t *restrict state_p;
uint64_t key = -1;
uint64_t bucket_states_tested = 0;
uint32_t bucket_size[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1];
uint32_t bucket_size[(p->len[EVEN_STATE] - 1) / MAX_BITSLICES + 1];
uint32_t bitsliced_blocks = 0;
uint32_t const *restrict p_even_end = p->states[EVEN_STATE] + p->len[EVEN_STATE];
#if defined (DEBUG_BRUTE_FORCE)
uint32_t elimination_step = 0;
#define MAX_ELIMINATION_STEP 32
#define MAX_ELIMINATION_STEP 32
uint64_t keys_eliminated[MAX_ELIMINATION_STEP] = {0};
#endif
#ifdef DEBUG_KEY_ELIMINATION
bool bucket_contains_test_key[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1];
bool bucket_contains_test_key[(p->len[EVEN_STATE] - 1) / MAX_BITSLICES + 1];
#endif
// constant ones/zeroes
@ -235,32 +238,32 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
// bitslice all the even states
bitslice_t **restrict bitsliced_even_states = (bitslice_t **)malloc(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_t *));
bitslice_t **restrict bitsliced_even_states = (bitslice_t **)malloc(((p->len[EVEN_STATE] - 1) / MAX_BITSLICES + 1) * sizeof(bitslice_t *));
if (bitsliced_even_states == NULL) {
printf("Out of memory error in brute_force. Aborting...");
exit(4);
}
bitslice_value_t *restrict bitsliced_even_feedback = malloc_bitslice(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_value_t));
bitslice_value_t *restrict bitsliced_even_feedback = malloc_bitslice(((p->len[EVEN_STATE] - 1) / MAX_BITSLICES + 1) * sizeof(bitslice_value_t));
if (bitsliced_even_feedback == NULL) {
printf("Out of memory error in brute_force. Aborting...");
exit(4);
}
for(uint32_t *restrict p_even = p->states[EVEN_STATE]; p_even < p_even_end; p_even += MAX_BITSLICES){
bitslice_t *restrict lstate_p = malloc_bitslice(STATE_SIZE/2*sizeof(bitslice_t));
for (uint32_t *restrict p_even = p->states[EVEN_STATE]; p_even < p_even_end; p_even += MAX_BITSLICES) {
bitslice_t *restrict lstate_p = malloc_bitslice(STATE_SIZE / 2 * sizeof(bitslice_t));
if (lstate_p == NULL) {
printf("Out of memory error in brute_force. Aborting... \n");
exit(4);
}
memset(lstate_p, 0x00, STATE_SIZE/2*sizeof(bitslice_t)); // zero even bits
memset(lstate_p, 0x00, STATE_SIZE / 2 * sizeof(bitslice_t)); // zero even bits
// bitslice even half-states
const uint32_t max_slices = (p_even_end-p_even) < MAX_BITSLICES ? p_even_end-p_even : MAX_BITSLICES;
const uint32_t max_slices = (p_even_end - p_even) < MAX_BITSLICES ? p_even_end - p_even : MAX_BITSLICES;
bucket_size[bitsliced_blocks] = max_slices;
#ifdef DEBUG_KEY_ELIMINATION
bucket_contains_test_key[bitsliced_blocks] = false;
#endif
uint32_t slice_idx;
for(slice_idx = 0; slice_idx < max_slices; ++slice_idx){
uint32_t e = *(p_even+slice_idx);
for (slice_idx = 0; slice_idx < max_slices; ++slice_idx) {
uint32_t e = *(p_even + slice_idx);
#ifdef DEBUG_KEY_ELIMINATION
if (known_target_key != -1 && e == test_state[EVEN_STATE]) {
bucket_contains_test_key[bitsliced_blocks] = true;
@ -268,34 +271,34 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
// printf("in slice %d\n", slice_idx);
}
#endif
for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){
for (uint32_t bit_idx = 0; bit_idx < STATE_SIZE / 2; bit_idx++, e >>= 1) {
// set even bits
if(e&1){
lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f);
if (e & 1) {
lstate_p[bit_idx].bytes64[slice_idx >> 6] |= 1ull << (slice_idx & 0x3f);
}
}
}
// padding with last even state
for ( ; slice_idx < MAX_BITSLICES; ++slice_idx) {
uint32_t e = *(p_even_end-1);
for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){
for (; slice_idx < MAX_BITSLICES; ++slice_idx) {
uint32_t e = *(p_even_end - 1);
for (uint32_t bit_idx = 0; bit_idx < STATE_SIZE / 2; bit_idx++, e >>= 1) {
// set even bits
if(e&1){
lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f);
if (e & 1) {
lstate_p[bit_idx].bytes64[slice_idx >> 6] |= 1ull << (slice_idx & 0x3f);
}
}
}
bitsliced_even_states[bitsliced_blocks] = lstate_p;
// bitsliced_even_feedback[bitsliced_blocks] = bs_ones;
bitsliced_even_feedback[bitsliced_blocks] = lstate_p[(47- 0)/2].value ^
lstate_p[(47-10)/2].value ^ lstate_p[(47-12)/2].value ^ lstate_p[(47-14)/2].value ^
lstate_p[(47-24)/2].value ^ lstate_p[(47-42)/2].value;
bitsliced_even_feedback[bitsliced_blocks] = lstate_p[(47 - 0) / 2].value ^
lstate_p[(47 - 10) / 2].value ^ lstate_p[(47 - 12) / 2].value ^ lstate_p[(47 - 14) / 2].value ^
lstate_p[(47 - 24) / 2].value ^ lstate_p[(47 - 42) / 2].value;
bitsliced_blocks++;
}
// bitslice every odd state to every block of even states
for(uint32_t const *restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE] + p->len[ODD_STATE]; ++p_odd){
for (uint32_t const *restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE] + p->len[ODD_STATE]; ++p_odd) {
// early abort
if(*keys_found){
if (*keys_found) {
goto out;
}
@ -305,12 +308,12 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
uint32_t o = *p_odd;
// pre-compute the odd feedback bit
bool odd_feedback_bit = evenparity32(o&0x29ce5c);
bool odd_feedback_bit = evenparity32(o & 0x29ce5c);
const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value;
// set odd state bits
for (uint32_t state_idx = 0; state_idx < STATE_SIZE; o >>= 1, state_idx += 2) {
if (o & 1){
if (o & 1) {
state_p[state_idx] = bs_ones;
} else {
state_p[state_idx] = bs_zeroes;
@ -320,14 +323,14 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
bitslice_value_t crypto1_bs_f20b_2[16];
bitslice_value_t crypto1_bs_f20b_3[8];
crypto1_bs_f20b_2[0] = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
crypto1_bs_f20b_3[0] = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
crypto1_bs_f20b_2[0] = f20b(state_p[47 - 25].value, state_p[47 - 27].value, state_p[47 - 29].value, state_p[47 - 31].value);
crypto1_bs_f20b_3[0] = f20b(state_p[47 - 41].value, state_p[47 - 43].value, state_p[47 - 45].value, state_p[47 - 47].value);
bitslice_value_t ksb[8];
ksb[0] = f20c(f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value),
f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value),
ksb[0] = f20c(f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value),
f20b(state_p[47 - 17].value, state_p[47 - 19].value, state_p[47 - 21].value, state_p[47 - 23].value),
crypto1_bs_f20b_2[0],
f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value),
f20a(state_p[47 - 33].value, state_p[47 - 35].value, state_p[47 - 37].value, state_p[47 - 39].value),
crypto1_bs_f20b_3[0]);
uint32_t *restrict p_even = p->states[EVEN_STATE];
@ -335,14 +338,14 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
#ifdef DEBUG_KEY_ELIMINATION
// if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
// printf("Now testing known target key.\n");
// printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
// printf("Now testing known target key.\n");
// printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
// }
#endif
// add the even state bits
const bitslice_t *restrict bitsliced_even_state = bitsliced_even_states[block_idx];
for(uint32_t state_idx = 1; state_idx < STATE_SIZE; state_idx += 2) {
state_p[state_idx] = bitsliced_even_state[state_idx/2];
for (uint32_t state_idx = 1; state_idx < STATE_SIZE; state_idx += 2) {
state_p[state_idx] = bitsliced_even_state[state_idx / 2];
}
// pre-compute first feedback bit vector. This is the same for all nonces
@ -357,10 +360,10 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
par[0] = bs_zeroes.value;
uint32_t next_common_bits = 0;
for(uint32_t tests = 0; tests < nonces_to_bruteforce; ++tests){
for (uint32_t tests = 0; tests < nonces_to_bruteforce; ++tests) {
// common bits with preceding test nonce
uint32_t common_bits = next_common_bits; //tests ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests-1]) : 0;
next_common_bits = tests < nonces_to_bruteforce - 1 ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests+1]) : 0;
next_common_bits = tests < nonces_to_bruteforce - 1 ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests + 1]) : 0;
uint32_t parity_bit_idx = 1; // start checking with the parity of second nonce byte
bitslice_value_t fb_bits = fbb[common_bits]; // start with precomputed feedback bits from previous nonce
bitslice_value_t ks_bits = ksb[common_bits]; // dito for first keystream bits
@ -371,7 +374,7 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
state_p -= common_bits; // and reuse the already calculated state bits
// highest bit is transmitted/received first. We start with Bit 23 (highest bit of second nonce byte),
// or the highest bit which differs from the previous nonce
for (int32_t ks_idx = KEYSTREAM_SIZE-1-common_bits; ks_idx >= 0; --ks_idx) {
for (int32_t ks_idx = KEYSTREAM_SIZE - 1 - common_bits; ks_idx >= 0; --ks_idx) {
// decrypt nonce bits
const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value;
@ -386,27 +389,27 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
// update crypto1 subfunctions
bitslice_value_t f20a_1, f20b_1, f20b_2, f20a_2, f20b_3;
f20a_2 = f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value);
f20b_3 = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
f20a_2 = f20a(state_p[47 - 33].value, state_p[47 - 35].value, state_p[47 - 37].value, state_p[47 - 39].value);
f20b_3 = f20b(state_p[47 - 41].value, state_p[47 - 43].value, state_p[47 - 45].value, state_p[47 - 47].value);
if (ks_idx > KEYSTREAM_SIZE - 8) {
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
f20a_1 = f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value);
f20b_1 = f20b(state_p[47 - 17].value, state_p[47 - 19].value, state_p[47 - 21].value, state_p[47 - 23].value);
f20b_2 = f20b(state_p[47 - 25].value, state_p[47 - 27].value, state_p[47 - 29].value, state_p[47 - 31].value);
crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx] = f20b_3;
} else if (ks_idx > KEYSTREAM_SIZE - 16) {
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
f20a_1 = f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value);
f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
f20b_2 = f20b(state_p[47 - 25].value, state_p[47 - 27].value, state_p[47 - 29].value, state_p[47 - 31].value);
crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
} else if (ks_idx > KEYSTREAM_SIZE - 24){
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
} else if (ks_idx > KEYSTREAM_SIZE - 24) {
f20a_1 = f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value);
f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
f20b_2 = crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx - 16];
} else {
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
f20a_1 = f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value);
f20b_1 = f20b(state_p[47 - 17].value, state_p[47 - 19].value, state_p[47 - 21].value, state_p[47 - 23].value);
f20b_2 = f20b(state_p[47 - 25].value, state_p[47 - 27].value, state_p[47 - 29].value, state_p[47 - 31].value);
}
// update keystream bit
ks_bits = f20c(f20a_1, f20b_1, f20b_2, f20a_2, f20b_3);
@ -427,15 +430,15 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
// this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ???
// the short-circuiting also helps
if(results.bytes64[0] == 0
if (results.bytes64[0] == 0
#if MAX_BITSLICES > 64
&& results.bytes64[1] == 0
&& results.bytes64[1] == 0
#endif
#if MAX_BITSLICES > 128
&& results.bytes64[2] == 0
&& results.bytes64[3] == 0
&& results.bytes64[2] == 0
&& results.bytes64[3] == 0
#endif
) {
) {
#if defined (DEBUG_BRUTE_FORCE)
if (elimination_step < MAX_ELIMINATION_STEP) {
keys_eliminated[elimination_step] += MAX_BITSLICES;
@ -458,12 +461,12 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
// update feedback bit vector
if (ks_idx != 0) {
fb_bits =
(state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^
state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
(state_p[47 - 0].value ^ state_p[47 - 5].value ^ state_p[47 - 9].value ^
state_p[47 - 10].value ^ state_p[47 - 12].value ^ state_p[47 - 14].value ^
state_p[47 - 15].value ^ state_p[47 - 17].value ^ state_p[47 - 19].value ^
state_p[47 - 24].value ^ state_p[47 - 25].value ^ state_p[47 - 27].value ^
state_p[47 - 29].value ^ state_p[47 - 35].value ^ state_p[47 - 39].value ^
state_p[47 - 41].value ^ state_p[47 - 42].value ^ state_p[47 - 43].value);
}
// remember feedback and keystream vectors for later use
uint8_t bit = KEYSTREAM_SIZE - ks_idx;
@ -523,7 +526,7 @@ stop_tests:
}
}
out:
for(uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){
for (uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx) {
free_bitslice(bitsliced_even_states[block_idx]);
}
free(bitsliced_even_states);
@ -532,7 +535,7 @@ out:
#if defined (DEBUG_BRUTE_FORCE)
for (uint32_t i = 0; i < MAX_ELIMINATION_STEP; i++) {
printf("Eliminated after %2u test_bytes: %5.2f%%\n", i+1, (float)keys_eliminated[i] / bucket_states_tested * 100);
printf("Eliminated after %2u test_bytes: %5.2f%%\n", i + 1, (float)keys_eliminated[i] / bucket_states_tested * 100);
}
#endif
return key;
@ -548,36 +551,39 @@ bitslice_test_nonces_t *bitslice_test_nonces_function_p = &bitslice_test_nonces_
static SIMDExecInstr intSIMDInstr = SIMD_AUTO;
void SetSIMDInstr(SIMDExecInstr instr) {
void SetSIMDInstr(SIMDExecInstr instr)
{
intSIMDInstr = instr;
crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch;
bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch;
}
SIMDExecInstr GetSIMDInstr() {
SIMDExecInstr GetSIMDInstr()
{
SIMDExecInstr instr = SIMD_NONE;
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) instr = SIMD_AVX512;
else if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
#else
if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) instr = SIMD_AVX;
else if (__builtin_cpu_supports("sse2")) instr = SIMD_SSE2;
else if (__builtin_cpu_supports("mmx")) instr = SIMD_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) instr = SIMD_AVX512;
else if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
#else
if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) instr = SIMD_AVX;
else if (__builtin_cpu_supports("sse2")) instr = SIMD_SSE2;
else if (__builtin_cpu_supports("mmx")) instr = SIMD_MMX;
else
#endif
#endif
instr = SIMD_NONE;
return instr;
}
SIMDExecInstr GetSIMDInstrAuto() {
SIMDExecInstr GetSIMDInstrAuto()
{
SIMDExecInstr instr = intSIMDInstr;
if (instr == SIMD_AUTO)
return GetSIMDInstr();
@ -586,15 +592,16 @@ SIMDExecInstr GetSIMDInstrAuto() {
}
// determine the available instruction set at runtime and call the correct function
const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
switch(GetSIMDInstrAuto()) {
const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces)
{
switch (GetSIMDInstrAuto()) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
case SIMD_AVX512:
crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512;
break;
#endif
#endif
case SIMD_AVX2:
crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2;
break;
@ -607,10 +614,10 @@ const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_firs
case SIMD_MMX:
crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX;
break;
#endif
#endif
#endif
default:
crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD;
crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD;
break;
}
@ -618,15 +625,16 @@ const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_firs
return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
}
void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
switch(GetSIMDInstrAuto()) {
void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par)
{
switch (GetSIMDInstrAuto()) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
case SIMD_AVX512:
bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512;
break;
#endif
#endif
case SIMD_AVX2:
bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2;
break;
@ -639,10 +647,10 @@ void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_t
case SIMD_MMX:
bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX;
break;
#endif
#endif
#endif
default:
bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD;
bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD;
break;
}
@ -651,11 +659,13 @@ void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_t
}
// Entries to dispatched function calls
const uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
const uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces)
{
return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
}
void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par)
{
(*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
}