client: fix mix of spaces & tabs

This commit is contained in:
Philippe Teuwen 2019-03-09 23:35:06 +01:00
commit 0d9223a547
197 changed files with 49383 additions and 49383 deletions

View file

@ -146,12 +146,12 @@ bitslice_test_nonces_t bitslice_test_nonces_dispatch;
#define free_bitslice(x) _aligned_free(x)
#elif defined (__APPLE__)
static void *malloc_bitslice(size_t x) {
char *allocated_memory;
if (posix_memalign((void**)&allocated_memory, MAX_BITSLICES/8, x)) {
return NULL;
} else {
return __builtin_assume_aligned(allocated_memory, MAX_BITSLICES/8);
}
char *allocated_memory;
if (posix_memalign((void**)&allocated_memory, MAX_BITSLICES/8, x)) {
return NULL;
} else {
return __builtin_assume_aligned(allocated_memory, MAX_BITSLICES/8);
}
}
#define free_bitslice(x) free(x)
#else
@ -160,8 +160,8 @@ static void *malloc_bitslice(size_t x) {
#endif
typedef enum {
EVEN_STATE = 0,
ODD_STATE = 1
EVEN_STATE = 0,
ODD_STATE = 1
} odd_even_t;
@ -175,32 +175,32 @@ static bitslice_t bs_zeroes;
void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
// initialize 1 and 0 vectors
// initialize 1 and 0 vectors
memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
// bitslice nonces' 2nd to 4th byte
for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
for(uint32_t bit_idx = 0; bit_idx < KEYSTREAM_SIZE; bit_idx++){
bool bit = get_bit(KEYSTREAM_SIZE-1-bit_idx, BSWAP_32(bf_test_nonce[i] << 8));
if(bit){
bitsliced_encrypted_nonces[i][bit_idx].value = bs_ones.value;
} else {
bitsliced_encrypted_nonces[i][bit_idx].value = bs_zeroes.value;
}
}
}
// bitslice nonces' parity (4 bits)
for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
for(uint32_t bit_idx = 0; bit_idx < 4; bit_idx++){
bool bit = get_bit(4-1-bit_idx, bf_test_nonce_par[i]);
if(bit){
bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_ones.value;
} else {
bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_zeroes.value;
}
}
}
// bitslice nonces' 2nd to 4th byte
for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
for(uint32_t bit_idx = 0; bit_idx < KEYSTREAM_SIZE; bit_idx++){
bool bit = get_bit(KEYSTREAM_SIZE-1-bit_idx, BSWAP_32(bf_test_nonce[i] << 8));
if(bit){
bitsliced_encrypted_nonces[i][bit_idx].value = bs_ones.value;
} else {
bitsliced_encrypted_nonces[i][bit_idx].value = bs_zeroes.value;
}
}
}
// bitslice nonces' parity (4 bits)
for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
for(uint32_t bit_idx = 0; bit_idx < 4; bit_idx++){
bool bit = get_bit(4-1-bit_idx, bf_test_nonce_par[i]);
if(bit){
bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_ones.value;
} else {
bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_zeroes.value;
}
}
}
}
@ -208,65 +208,65 @@ void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce
const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces){
// Unlike aczid's implementation this doesn't roll back at all when performing bitsliced bruteforce.
// We know that the best first byte is already shifted in. Testing with the remaining three bytes of
// the nonces is sufficient to eliminate most of them. The small rest is tested with a simple unsliced
// brute forcing (including roll back).
// We know that the best first byte is already shifted in. Testing with the remaining three bytes of
// the nonces is sufficient to eliminate most of them. The small rest is tested with a simple unsliced
// brute forcing (including roll back).
bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE];
bitslice_t * restrict state_p;
bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE];
bitslice_t * restrict state_p;
uint64_t key = -1;
uint64_t bucket_states_tested = 0;
uint32_t bucket_size[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1];
uint32_t bitsliced_blocks = 0;
uint32_t const *restrict p_even_end = p->states[EVEN_STATE] + p->len[EVEN_STATE];
#if defined (DEBUG_BRUTE_FORCE)
uint32_t elimination_step = 0;
#define MAX_ELIMINATION_STEP 32
uint64_t keys_eliminated[MAX_ELIMINATION_STEP] = {0};
uint32_t elimination_step = 0;
#define MAX_ELIMINATION_STEP 32
uint64_t keys_eliminated[MAX_ELIMINATION_STEP] = {0};
#endif
#ifdef DEBUG_KEY_ELIMINATION
bool bucket_contains_test_key[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1];
bool bucket_contains_test_key[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1];
#endif
// constant ones/zeroes
bitslice_t bs_ones;
// constant ones/zeroes
bitslice_t bs_ones;
memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
bitslice_t bs_zeroes;
bitslice_t bs_zeroes;
memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
// bitslice all the even states
bitslice_t **restrict bitsliced_even_states = (bitslice_t **)malloc(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_t *));
if (bitsliced_even_states == NULL) {
printf("Out of memory error in brute_force. Aborting...");
exit(4);
}
if (bitsliced_even_states == NULL) {
printf("Out of memory error in brute_force. Aborting...");
exit(4);
}
bitslice_value_t *restrict bitsliced_even_feedback = malloc_bitslice(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_value_t));
if (bitsliced_even_feedback == NULL) {
printf("Out of memory error in brute_force. Aborting...");
exit(4);
}
if (bitsliced_even_feedback == NULL) {
printf("Out of memory error in brute_force. Aborting...");
exit(4);
}
for(uint32_t *restrict p_even = p->states[EVEN_STATE]; p_even < p_even_end; p_even += MAX_BITSLICES){
bitslice_t *restrict lstate_p = malloc_bitslice(STATE_SIZE/2*sizeof(bitslice_t));
if (lstate_p == NULL) {
printf("Out of memory error in brute_force. Aborting... \n");
exit(4);
}
if (lstate_p == NULL) {
printf("Out of memory error in brute_force. Aborting... \n");
exit(4);
}
memset(lstate_p, 0x00, STATE_SIZE/2*sizeof(bitslice_t)); // zero even bits
// bitslice even half-states
const uint32_t max_slices = (p_even_end-p_even) < MAX_BITSLICES ? p_even_end-p_even : MAX_BITSLICES;
bucket_size[bitsliced_blocks] = max_slices;
#ifdef DEBUG_KEY_ELIMINATION
bucket_contains_test_key[bitsliced_blocks] = false;
bucket_contains_test_key[bitsliced_blocks] = false;
#endif
uint32_t slice_idx;
uint32_t slice_idx;
for(slice_idx = 0; slice_idx < max_slices; ++slice_idx){
uint32_t e = *(p_even+slice_idx);
#ifdef DEBUG_KEY_ELIMINATION
if (known_target_key != -1 && e == test_state[EVEN_STATE]) {
bucket_contains_test_key[bitsliced_blocks] = true;
// printf("bucket %d contains test key even state\n", bitsliced_blocks);
// printf("in slice %d\n", slice_idx);
}
if (known_target_key != -1 && e == test_state[EVEN_STATE]) {
bucket_contains_test_key[bitsliced_blocks] = true;
// printf("bucket %d contains test key even state\n", bitsliced_blocks);
// printf("in slice %d\n", slice_idx);
}
#endif
for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){
// set even bits
@ -275,8 +275,8 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
}
}
}
// padding with last even state
for ( ; slice_idx < MAX_BITSLICES; ++slice_idx) {
// padding with last even state
for ( ; slice_idx < MAX_BITSLICES; ++slice_idx) {
uint32_t e = *(p_even_end-1);
for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){
// set even bits
@ -284,13 +284,13 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f);
}
}
}
}
bitsliced_even_states[bitsliced_blocks] = lstate_p;
// bitsliced_even_feedback[bitsliced_blocks] = bs_ones;
bitsliced_even_feedback[bitsliced_blocks] = lstate_p[(47- 0)/2].value ^
// bitsliced_even_feedback[bitsliced_blocks] = bs_ones;
bitsliced_even_feedback[bitsliced_blocks] = lstate_p[(47- 0)/2].value ^
lstate_p[(47-10)/2].value ^ lstate_p[(47-12)/2].value ^ lstate_p[(47-14)/2].value ^
lstate_p[(47-24)/2].value ^ lstate_p[(47-42)/2].value;
bitsliced_blocks++;
bitsliced_blocks++;
}
// bitslice every odd state to every block of even states
for(uint32_t const *restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE] + p->len[ODD_STATE]; ++p_odd){
@ -299,78 +299,78 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
goto out;
}
// set odd state bits and pre-compute first keystream bit vector. This is the same for all blocks of even states
// set odd state bits and pre-compute first keystream bit vector. This is the same for all blocks of even states
state_p = &states[KEYSTREAM_SIZE];
uint32_t o = *p_odd;
state_p = &states[KEYSTREAM_SIZE];
uint32_t o = *p_odd;
// pre-compute the odd feedback bit
bool odd_feedback_bit = evenparity32(o&0x29ce5c);
const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value;
// set odd state bits
for (uint32_t state_idx = 0; state_idx < STATE_SIZE; o >>= 1, state_idx += 2) {
if (o & 1){
state_p[state_idx] = bs_ones;
} else {
state_p[state_idx] = bs_zeroes;
}
}
// set odd state bits
for (uint32_t state_idx = 0; state_idx < STATE_SIZE; o >>= 1, state_idx += 2) {
if (o & 1){
state_p[state_idx] = bs_ones;
} else {
state_p[state_idx] = bs_zeroes;
}
}
bitslice_value_t crypto1_bs_f20b_2[16];
bitslice_value_t crypto1_bs_f20b_3[8];
bitslice_value_t crypto1_bs_f20b_2[16];
bitslice_value_t crypto1_bs_f20b_3[8];
crypto1_bs_f20b_2[0] = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
crypto1_bs_f20b_3[0] = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
crypto1_bs_f20b_2[0] = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
crypto1_bs_f20b_3[0] = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
bitslice_value_t ksb[8];
ksb[0] = f20c(f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value),
f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value),
crypto1_bs_f20b_2[0],
f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value),
crypto1_bs_f20b_3[0]);
bitslice_value_t ksb[8];
ksb[0] = f20c(f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value),
f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value),
crypto1_bs_f20b_2[0],
f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value),
crypto1_bs_f20b_3[0]);
uint32_t *restrict p_even = p->states[EVEN_STATE];
uint32_t *restrict p_even = p->states[EVEN_STATE];
for (uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx, p_even += MAX_BITSLICES) {
#ifdef DEBUG_KEY_ELIMINATION
// if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
// printf("Now testing known target key.\n");
// printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
// }
// if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
// printf("Now testing known target key.\n");
// printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
// }
#endif
// add the even state bits
const bitslice_t *restrict bitsliced_even_state = bitsliced_even_states[block_idx];
for(uint32_t state_idx = 1; state_idx < STATE_SIZE; state_idx += 2) {
state_p[state_idx] = bitsliced_even_state[state_idx/2];
}
const bitslice_t *restrict bitsliced_even_state = bitsliced_even_states[block_idx];
for(uint32_t state_idx = 1; state_idx < STATE_SIZE; state_idx += 2) {
state_p[state_idx] = bitsliced_even_state[state_idx/2];
}
// pre-compute first feedback bit vector. This is the same for all nonces
bitslice_value_t fbb[8];
// pre-compute first feedback bit vector. This is the same for all nonces
bitslice_value_t fbb[8];
fbb[0] = odd_feedback ^ bitsliced_even_feedback[block_idx];
// vector to contain test results (1 = passed, 0 = failed)
bitslice_t results = bs_ones;
// parity_bits
bitslice_value_t par[8];
par[0] = bs_zeroes.value;
uint32_t next_common_bits = 0;
// parity_bits
bitslice_value_t par[8];
par[0] = bs_zeroes.value;
uint32_t next_common_bits = 0;
for(uint32_t tests = 0; tests < nonces_to_bruteforce; ++tests){
// common bits with preceding test nonce
uint32_t common_bits = next_common_bits; //tests ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests-1]) : 0;
next_common_bits = tests < nonces_to_bruteforce - 1 ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests+1]) : 0;
uint32_t parity_bit_idx = 1; // start checking with the parity of second nonce byte
bitslice_value_t fb_bits = fbb[common_bits]; // start with precomputed feedback bits from previous nonce
bitslice_value_t ks_bits = ksb[common_bits]; // dito for first keystream bits
// common bits with preceding test nonce
uint32_t common_bits = next_common_bits; //tests ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests-1]) : 0;
next_common_bits = tests < nonces_to_bruteforce - 1 ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests+1]) : 0;
uint32_t parity_bit_idx = 1; // start checking with the parity of second nonce byte
bitslice_value_t fb_bits = fbb[common_bits]; // start with precomputed feedback bits from previous nonce
bitslice_value_t ks_bits = ksb[common_bits]; // dito for first keystream bits
bitslice_value_t parity_bit_vector = par[common_bits]; // dito for first parity vector
// bitslice_value_t fb_bits = fbb[0]; // start with precomputed feedback bits from previous nonce
// bitslice_value_t ks_bits = ksb[0]; // dito for first keystream bits
// bitslice_value_t parity_bit_vector = par[0]; // dito for first parity vector
state_p -= common_bits; // and reuse the already calculated state bits
// bitslice_value_t fb_bits = fbb[0]; // start with precomputed feedback bits from previous nonce
// bitslice_value_t ks_bits = ksb[0]; // dito for first keystream bits
// bitslice_value_t parity_bit_vector = par[0]; // dito for first parity vector
state_p -= common_bits; // and reuse the already calculated state bits
// highest bit is transmitted/received first. We start with Bit 23 (highest bit of second nonce byte),
// or the highest bit which differs from the previous nonce
// or the highest bit which differs from the previous nonce
for (int32_t ks_idx = KEYSTREAM_SIZE-1-common_bits; ks_idx >= 0; --ks_idx) {
// decrypt nonce bits
@ -381,35 +381,35 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
parity_bit_vector ^= decrypted_nonce_bit_vector;
// update state
state_p--;
state_p--;
state_p[0].value = fb_bits ^ decrypted_nonce_bit_vector;
// update crypto1 subfunctions
bitslice_value_t f20a_1, f20b_1, f20b_2, f20a_2, f20b_3;
f20a_2 = f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value);
f20b_3 = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
if (ks_idx > KEYSTREAM_SIZE - 8) {
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx] = f20b_3;
} else if (ks_idx > KEYSTREAM_SIZE - 16) {
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
} else if (ks_idx > KEYSTREAM_SIZE - 24){
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
f20b_2 = crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx - 16];
} else {
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
}
// update keystream bit
ks_bits = f20c(f20a_1, f20b_1, f20b_2, f20a_2, f20b_3);
// update crypto1 subfunctions
bitslice_value_t f20a_1, f20b_1, f20b_2, f20a_2, f20b_3;
f20a_2 = f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value);
f20b_3 = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
if (ks_idx > KEYSTREAM_SIZE - 8) {
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx] = f20b_3;
} else if (ks_idx > KEYSTREAM_SIZE - 16) {
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
} else if (ks_idx > KEYSTREAM_SIZE - 24){
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
f20b_2 = crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx - 16];
} else {
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
}
// update keystream bit
ks_bits = f20c(f20a_1, f20b_1, f20b_2, f20a_2, f20b_3);
// for each completed byte:
if ((ks_idx & 0x07) == 0) {
@ -437,88 +437,88 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
#endif
) {
#if defined (DEBUG_BRUTE_FORCE)
if (elimination_step < MAX_ELIMINATION_STEP) {
keys_eliminated[elimination_step] += MAX_BITSLICES;
}
if (elimination_step < MAX_ELIMINATION_STEP) {
keys_eliminated[elimination_step] += MAX_BITSLICES;
}
#endif
#ifdef DEBUG_KEY_ELIMINATION
if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
printf("Known target key eliminated in brute_force.\n");
printf("block_idx = %d/%d, nonce = %d/%d\n", block_idx, bitsliced_blocks, tests, nonces_to_bruteforce);
}
if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
printf("Known target key eliminated in brute_force.\n");
printf("block_idx = %d/%d, nonce = %d/%d\n", block_idx, bitsliced_blocks, tests, nonces_to_bruteforce);
}
#endif
goto stop_tests;
}
// prepare for next nonce byte
goto stop_tests;
}
// prepare for next nonce byte
#if defined (DEBUG_BRUTE_FORCE)
elimination_step++;
elimination_step++;
#endif
parity_bit_vector = bs_zeroes.value;
}
// update feedback bit vector
if (ks_idx != 0) {
fb_bits =
(state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^
state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
}
// remember feedback and keystream vectors for later use
uint8_t bit = KEYSTREAM_SIZE - ks_idx;
if (bit <= next_common_bits) { // if needed and not yet stored
fbb[bit] = fb_bits;
ksb[bit] = ks_bits;
par[bit] = parity_bit_vector;
}
parity_bit_vector = bs_zeroes.value;
}
// update feedback bit vector
if (ks_idx != 0) {
fb_bits =
(state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^
state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
}
// remember feedback and keystream vectors for later use
uint8_t bit = KEYSTREAM_SIZE - ks_idx;
if (bit <= next_common_bits) { // if needed and not yet stored
fbb[bit] = fb_bits;
ksb[bit] = ks_bits;
par[bit] = parity_bit_vector;
}
}
// prepare for next nonce. Revert to initial state
state_p = &states[KEYSTREAM_SIZE];
// prepare for next nonce. Revert to initial state
state_p = &states[KEYSTREAM_SIZE];
}
// all nonce tests were successful: we've found a possible key in this block!
uint32_t *p_even_test = p_even;
uint32_t *p_even_test = p_even;
for (uint32_t results_word = 0; results_word < MAX_BITSLICES / 64; ++results_word) {
uint64_t results64 = results.bytes64[results_word];
for (uint32_t results_bit = 0; results_bit < 64; results_bit++) {
if (results64 & 0x01) {
if (verify_key(cuid, nonces, best_first_bytes, *p_odd, *p_even_test)) {
struct Crypto1State pcs;
pcs.odd = *p_odd;
pcs.even = *p_even_test;
lfsr_rollback_byte(&pcs, (cuid >> 24) ^ best_first_bytes[0], true);
crypto1_get_lfsr(&pcs, &key);
bucket_states_tested += 64 * results_word + results_bit;
goto out;
}
uint64_t results64 = results.bytes64[results_word];
for (uint32_t results_bit = 0; results_bit < 64; results_bit++) {
if (results64 & 0x01) {
if (verify_key(cuid, nonces, best_first_bytes, *p_odd, *p_even_test)) {
struct Crypto1State pcs;
pcs.odd = *p_odd;
pcs.even = *p_even_test;
lfsr_rollback_byte(&pcs, (cuid >> 24) ^ best_first_bytes[0], true);
crypto1_get_lfsr(&pcs, &key);
bucket_states_tested += 64 * results_word + results_bit;
goto out;
}
#ifdef DEBUG_KEY_ELIMINATION
if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) {
printf("Known target key eliminated in brute_force verification.\n");
printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
}
if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) {
printf("Known target key eliminated in brute_force verification.\n");
printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
}
#endif
}
}
#ifdef DEBUG_KEY_ELIMINATION
if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) {
printf("Known target key eliminated in brute_force (results_bit == 0).\n");
printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
}
if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) {
printf("Known target key eliminated in brute_force (results_bit == 0).\n");
printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
}
#endif
results64 >>= 1;
p_even_test++;
if (p_even_test == p_even_end) {
goto stop_tests;
}
}
results64 >>= 1;
p_even_test++;
if (p_even_test == p_even_end) {
goto stop_tests;
}
}
}
stop_tests:
#if defined (DEBUG_BRUTE_FORCE)
elimination_step = 0;
elimination_step = 0;
#endif
bucket_states_tested += bucket_size[block_idx];
// prepare to set new states
state_p = &states[KEYSTREAM_SIZE];
state_p = &states[KEYSTREAM_SIZE];
continue;
}
}
@ -526,14 +526,14 @@ out:
for(uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){
free_bitslice(bitsliced_even_states[block_idx]);
}
free(bitsliced_even_states);
free_bitslice(bitsliced_even_feedback);
free(bitsliced_even_states);
free_bitslice(bitsliced_even_feedback);
__sync_fetch_and_add(num_keys_tested, bucket_states_tested);
#if defined (DEBUG_BRUTE_FORCE)
for (uint32_t i = 0; i < MAX_ELIMINATION_STEP; i++) {
printf("Eliminated after %2u test_bytes: %5.2f%%\n", i+1, (float)keys_eliminated[i] / bucket_states_tested * 100);
}
for (uint32_t i = 0; i < MAX_ELIMINATION_STEP; i++) {
printf("Eliminated after %2u test_bytes: %5.2f%%\n", i+1, (float)keys_eliminated[i] / bucket_states_tested * 100);
}
#endif
return key;
}
@ -549,102 +549,102 @@ bitslice_test_nonces_t *bitslice_test_nonces_function_p = &bitslice_test_nonces_
static SIMDExecInstr intSIMDInstr = SIMD_AUTO;
void SetSIMDInstr(SIMDExecInstr instr) {
intSIMDInstr = instr;
intSIMDInstr = instr;
crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch;
bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch;
crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch;
bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch;
}
SIMDExecInstr GetSIMDInstr() {
SIMDExecInstr instr = SIMD_NONE;
SIMDExecInstr instr = SIMD_NONE;
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) instr = SIMD_AVX512;
else if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
#else
if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) instr = SIMD_AVX;
else if (__builtin_cpu_supports("sse2")) instr = SIMD_SSE2;
else if (__builtin_cpu_supports("mmx")) instr = SIMD_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) instr = SIMD_AVX512;
else if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
#else
if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) instr = SIMD_AVX;
else if (__builtin_cpu_supports("sse2")) instr = SIMD_SSE2;
else if (__builtin_cpu_supports("mmx")) instr = SIMD_MMX;
else
#endif
#endif
instr = SIMD_NONE;
instr = SIMD_NONE;
return instr;
return instr;
}
SIMDExecInstr GetSIMDInstrAuto() {
SIMDExecInstr instr = intSIMDInstr;
if (instr == SIMD_AUTO)
return GetSIMDInstr();
SIMDExecInstr instr = intSIMDInstr;
if (instr == SIMD_AUTO)
return GetSIMDInstr();
return instr;
return instr;
}
// determine the available instruction set at runtime and call the correct function
const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
switch(GetSIMDInstrAuto()) {
switch(GetSIMDInstrAuto()) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
case SIMD_AVX512:
crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512;
break;
#endif
case SIMD_AVX2:
crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2;
break;
case SIMD_AVX:
crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX;
break;
case SIMD_SSE2:
crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2;
break;
case SIMD_MMX:
crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX;
break;
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
case SIMD_AVX512:
crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512;
break;
#endif
case SIMD_AVX2:
crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2;
break;
case SIMD_AVX:
crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX;
break;
case SIMD_SSE2:
crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2;
break;
case SIMD_MMX:
crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX;
break;
#endif
#endif
default:
crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD;
break;
}
default:
crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD;
break;
}
// call the most optimized function for this CPU
return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
}
void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
switch(GetSIMDInstrAuto()) {
switch(GetSIMDInstrAuto()) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
case SIMD_AVX512:
bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512;
break;
#endif
case SIMD_AVX2:
bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2;
break;
case SIMD_AVX:
bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX;
break;
case SIMD_SSE2:
bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2;
break;
case SIMD_MMX:
bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX;
break;
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
case SIMD_AVX512:
bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512;
break;
#endif
case SIMD_AVX2:
bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2;
break;
case SIMD_AVX:
bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX;
break;
case SIMD_SSE2:
bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2;
break;
case SIMD_MMX:
bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX;
break;
#endif
#endif
default:
bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD;
break;
}
default:
bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD;
break;
}
// call the most optimized function for this CPU
(*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);

View file

@ -50,16 +50,16 @@ THE SOFTWARE.
#ifndef HARDNESTED_BF_CORE_H__
#define HARDNESTED_BF_CORE_H__
#include "hardnested_bruteforce.h" // statelist_t
#include "hardnested_bruteforce.h" // statelist_t
typedef enum {
SIMD_AUTO,
SIMD_AVX512,
SIMD_AVX2,
SIMD_AVX,
SIMD_SSE2,
SIMD_MMX,
SIMD_NONE,
SIMD_AUTO,
SIMD_AVX512,
SIMD_AVX2,
SIMD_AVX,
SIMD_SSE2,
SIMD_MMX,
SIMD_NONE,
} SIMDExecInstr;
extern void SetSIMDInstr(SIMDExecInstr instr);
extern SIMDExecInstr GetSIMDInstrAuto();

View file

@ -146,16 +146,16 @@ count_bitarray_AND4_t count_bitarray_AND4_AVX512, count_bitarray_AND4_AVX2, coun
inline uint32_t *MALLOC_BITARRAY(uint32_t x)
{
#if defined (_WIN32)
return __builtin_assume_aligned(_aligned_malloc((x), __BIGGEST_ALIGNMENT__), __BIGGEST_ALIGNMENT__);
return __builtin_assume_aligned(_aligned_malloc((x), __BIGGEST_ALIGNMENT__), __BIGGEST_ALIGNMENT__);
#elif defined (__APPLE__)
uint32_t *allocated_memory;
if (posix_memalign((void**)&allocated_memory, __BIGGEST_ALIGNMENT__, x)) {
return NULL;
} else {
return __builtin_assume_aligned(allocated_memory, __BIGGEST_ALIGNMENT__);
}
uint32_t *allocated_memory;
if (posix_memalign((void**)&allocated_memory, __BIGGEST_ALIGNMENT__, x)) {
return NULL;
} else {
return __builtin_assume_aligned(allocated_memory, __BIGGEST_ALIGNMENT__);
}
#else
return __builtin_assume_aligned(memalign(__BIGGEST_ALIGNMENT__, (x)), __BIGGEST_ALIGNMENT__);
return __builtin_assume_aligned(memalign(__BIGGEST_ALIGNMENT__, (x)), __BIGGEST_ALIGNMENT__);
#endif
}
@ -163,139 +163,139 @@ inline uint32_t *MALLOC_BITARRAY(uint32_t x)
inline void FREE_BITARRAY(uint32_t *x)
{
#ifdef _WIN32
_aligned_free(x);
_aligned_free(x);
#else
free(x);
free(x);
#endif
}
inline uint32_t BITCOUNT(uint32_t a)
{
return __builtin_popcountl(a);
return __builtin_popcountl(a);
}
inline uint32_t COUNT_STATES(uint32_t *A)
{
uint32_t count = 0;
for (uint32_t i = 0; i < (1<<19); i++) {
count += BITCOUNT(A[i]);
}
return count;
uint32_t count = 0;
for (uint32_t i = 0; i < (1<<19); i++) {
count += BITCOUNT(A[i]);
}
return count;
}
inline void BITARRAY_AND(uint32_t *restrict A, uint32_t *restrict B)
{
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
for (uint32_t i = 0; i < (1<<19); i++) {
A[i] &= B[i];
}
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
for (uint32_t i = 0; i < (1<<19); i++) {
A[i] &= B[i];
}
}
inline void BITARRAY_LOW20_AND(uint32_t *restrict A, uint32_t *restrict B)
{
uint16_t *a = (uint16_t *)__builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
uint16_t *b = (uint16_t *)__builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
uint16_t *a = (uint16_t *)__builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
uint16_t *b = (uint16_t *)__builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
for (uint32_t i = 0; i < (1<<20); i++) {
if (!b[i]) {
a[i] = 0;
}
}
for (uint32_t i = 0; i < (1<<20); i++) {
if (!b[i]) {
a[i] = 0;
}
}
}
inline uint32_t COUNT_BITARRAY_AND(uint32_t *restrict A, uint32_t *restrict B)
{
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
uint32_t count = 0;
for (uint32_t i = 0; i < (1<<19); i++) {
A[i] &= B[i];
count += BITCOUNT(A[i]);
}
return count;
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
uint32_t count = 0;
for (uint32_t i = 0; i < (1<<19); i++) {
A[i] &= B[i];
count += BITCOUNT(A[i]);
}
return count;
}
inline uint32_t COUNT_BITARRAY_LOW20_AND(uint32_t *restrict A, uint32_t *restrict B)
{
uint16_t *a = (uint16_t *)__builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
uint16_t *b = (uint16_t *)__builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
uint32_t count = 0;
uint16_t *a = (uint16_t *)__builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
uint16_t *b = (uint16_t *)__builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
uint32_t count = 0;
for (uint32_t i = 0; i < (1<<20); i++) {
if (!b[i]) {
a[i] = 0;
}
count += BITCOUNT(a[i]);
}
return count;
for (uint32_t i = 0; i < (1<<20); i++) {
if (!b[i]) {
a[i] = 0;
}
count += BITCOUNT(a[i]);
}
return count;
}
inline void BITARRAY_AND4(uint32_t *restrict A, uint32_t *restrict B, uint32_t *restrict C, uint32_t *restrict D)
{
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
D = __builtin_assume_aligned(D, __BIGGEST_ALIGNMENT__);
for (uint32_t i = 0; i < (1<<19); i++) {
A[i] = B[i] & C[i] & D[i];
}
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
D = __builtin_assume_aligned(D, __BIGGEST_ALIGNMENT__);
for (uint32_t i = 0; i < (1<<19); i++) {
A[i] = B[i] & C[i] & D[i];
}
}
inline void BITARRAY_OR(uint32_t *restrict A, uint32_t *restrict B)
{
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
for (uint32_t i = 0; i < (1<<19); i++) {
A[i] |= B[i];
}
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
for (uint32_t i = 0; i < (1<<19); i++) {
A[i] |= B[i];
}
}
inline uint32_t COUNT_BITARRAY_AND2(uint32_t *restrict A, uint32_t *restrict B)
{
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
uint32_t count = 0;
for (uint32_t i = 0; i < (1<<19); i++) {
count += BITCOUNT(A[i] & B[i]);
}
return count;
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
uint32_t count = 0;
for (uint32_t i = 0; i < (1<<19); i++) {
count += BITCOUNT(A[i] & B[i]);
}
return count;
}
inline uint32_t COUNT_BITARRAY_AND3(uint32_t *restrict A, uint32_t *restrict B, uint32_t *restrict C)
{
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
uint32_t count = 0;
for (uint32_t i = 0; i < (1<<19); i++) {
count += BITCOUNT(A[i] & B[i] & C[i]);
}
return count;
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
uint32_t count = 0;
for (uint32_t i = 0; i < (1<<19); i++) {
count += BITCOUNT(A[i] & B[i] & C[i]);
}
return count;
}
inline uint32_t COUNT_BITARRAY_AND4(uint32_t *restrict A, uint32_t *restrict B, uint32_t *restrict C, uint32_t *restrict D)
{
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
D = __builtin_assume_aligned(D, __BIGGEST_ALIGNMENT__);
uint32_t count = 0;
for (uint32_t i = 0; i < (1<<19); i++) {
count += BITCOUNT(A[i] & B[i] & C[i] & D[i]);
}
return count;
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
D = __builtin_assume_aligned(D, __BIGGEST_ALIGNMENT__);
uint32_t count = 0;
for (uint32_t i = 0; i < (1<<19); i++) {
count += BITCOUNT(A[i] & B[i] & C[i] & D[i]);
}
return count;
}
@ -319,20 +319,20 @@ count_bitarray_AND4_t *count_bitarray_AND4_function_p = &count_bitarray_AND4_dis
// determine the available instruction set at runtime and call the correct function
uint32_t *malloc_bitarray_dispatch(uint32_t x) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) malloc_bitarray_function_p = &malloc_bitarray_AVX512;
else if (__builtin_cpu_supports("avx2")) malloc_bitarray_function_p = &malloc_bitarray_AVX2;
#else
if (__builtin_cpu_supports("avx2")) malloc_bitarray_function_p = &malloc_bitarray_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) malloc_bitarray_function_p = &malloc_bitarray_AVX;
else if (__builtin_cpu_supports("sse2")) malloc_bitarray_function_p = &malloc_bitarray_SSE2;
else if (__builtin_cpu_supports("mmx")) malloc_bitarray_function_p = &malloc_bitarray_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) malloc_bitarray_function_p = &malloc_bitarray_AVX512;
else if (__builtin_cpu_supports("avx2")) malloc_bitarray_function_p = &malloc_bitarray_AVX2;
#else
if (__builtin_cpu_supports("avx2")) malloc_bitarray_function_p = &malloc_bitarray_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) malloc_bitarray_function_p = &malloc_bitarray_AVX;
else if (__builtin_cpu_supports("sse2")) malloc_bitarray_function_p = &malloc_bitarray_SSE2;
else if (__builtin_cpu_supports("mmx")) malloc_bitarray_function_p = &malloc_bitarray_MMX;
else
#endif
#endif
malloc_bitarray_function_p = &malloc_bitarray_NOSIMD;
malloc_bitarray_function_p = &malloc_bitarray_NOSIMD;
// call the most optimized function for this CPU
return (*malloc_bitarray_function_p)(x);
@ -340,20 +340,20 @@ uint32_t *malloc_bitarray_dispatch(uint32_t x) {
void free_bitarray_dispatch(uint32_t *x) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) free_bitarray_function_p = &free_bitarray_AVX512;
else if (__builtin_cpu_supports("avx2")) free_bitarray_function_p = &free_bitarray_AVX2;
#else
if (__builtin_cpu_supports("avx2")) free_bitarray_function_p = &free_bitarray_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) free_bitarray_function_p = &free_bitarray_AVX;
else if (__builtin_cpu_supports("sse2")) free_bitarray_function_p = &free_bitarray_SSE2;
else if (__builtin_cpu_supports("mmx")) free_bitarray_function_p = &free_bitarray_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) free_bitarray_function_p = &free_bitarray_AVX512;
else if (__builtin_cpu_supports("avx2")) free_bitarray_function_p = &free_bitarray_AVX2;
#else
if (__builtin_cpu_supports("avx2")) free_bitarray_function_p = &free_bitarray_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) free_bitarray_function_p = &free_bitarray_AVX;
else if (__builtin_cpu_supports("sse2")) free_bitarray_function_p = &free_bitarray_SSE2;
else if (__builtin_cpu_supports("mmx")) free_bitarray_function_p = &free_bitarray_MMX;
else
#endif
#endif
free_bitarray_function_p = &free_bitarray_NOSIMD;
free_bitarray_function_p = &free_bitarray_NOSIMD;
// call the most optimized function for this CPU
(*free_bitarray_function_p)(x);
@ -361,20 +361,20 @@ void free_bitarray_dispatch(uint32_t *x) {
uint32_t bitcount_dispatch(uint32_t a) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) bitcount_function_p = &bitcount_AVX512;
else if (__builtin_cpu_supports("avx2")) bitcount_function_p = &bitcount_AVX2;
#else
if (__builtin_cpu_supports("avx2")) bitcount_function_p = &bitcount_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) bitcount_function_p = &bitcount_AVX;
else if (__builtin_cpu_supports("sse2")) bitcount_function_p = &bitcount_SSE2;
else if (__builtin_cpu_supports("mmx")) bitcount_function_p = &bitcount_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) bitcount_function_p = &bitcount_AVX512;
else if (__builtin_cpu_supports("avx2")) bitcount_function_p = &bitcount_AVX2;
#else
if (__builtin_cpu_supports("avx2")) bitcount_function_p = &bitcount_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) bitcount_function_p = &bitcount_AVX;
else if (__builtin_cpu_supports("sse2")) bitcount_function_p = &bitcount_SSE2;
else if (__builtin_cpu_supports("mmx")) bitcount_function_p = &bitcount_MMX;
else
#endif
#endif
bitcount_function_p = &bitcount_NOSIMD;
bitcount_function_p = &bitcount_NOSIMD;
// call the most optimized function for this CPU
return (*bitcount_function_p)(a);
@ -382,20 +382,20 @@ uint32_t bitcount_dispatch(uint32_t a) {
uint32_t count_states_dispatch(uint32_t *bitarray) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) count_states_function_p = &count_states_AVX512;
else if (__builtin_cpu_supports("avx2")) count_states_function_p = &count_states_AVX2;
#else
if (__builtin_cpu_supports("avx2")) count_states_function_p = &count_states_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) count_states_function_p = &count_states_AVX;
else if (__builtin_cpu_supports("sse2")) count_states_function_p = &count_states_SSE2;
else if (__builtin_cpu_supports("mmx")) count_states_function_p = &count_states_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) count_states_function_p = &count_states_AVX512;
else if (__builtin_cpu_supports("avx2")) count_states_function_p = &count_states_AVX2;
#else
if (__builtin_cpu_supports("avx2")) count_states_function_p = &count_states_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) count_states_function_p = &count_states_AVX;
else if (__builtin_cpu_supports("sse2")) count_states_function_p = &count_states_SSE2;
else if (__builtin_cpu_supports("mmx")) count_states_function_p = &count_states_MMX;
else
#endif
#endif
count_states_function_p = &count_states_NOSIMD;
count_states_function_p = &count_states_NOSIMD;
// call the most optimized function for this CPU
return (*count_states_function_p)(bitarray);
@ -403,20 +403,20 @@ uint32_t count_states_dispatch(uint32_t *bitarray) {
void bitarray_AND_dispatch(uint32_t *A, uint32_t *B) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) bitarray_AND_function_p = &bitarray_AND_AVX512;
else if (__builtin_cpu_supports("avx2")) bitarray_AND_function_p = &bitarray_AND_AVX2;
#else
if (__builtin_cpu_supports("avx2")) bitarray_AND_function_p = &bitarray_AND_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) bitarray_AND_function_p = &bitarray_AND_AVX;
else if (__builtin_cpu_supports("sse2")) bitarray_AND_function_p = &bitarray_AND_SSE2;
else if (__builtin_cpu_supports("mmx")) bitarray_AND_function_p = &bitarray_AND_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) bitarray_AND_function_p = &bitarray_AND_AVX512;
else if (__builtin_cpu_supports("avx2")) bitarray_AND_function_p = &bitarray_AND_AVX2;
#else
if (__builtin_cpu_supports("avx2")) bitarray_AND_function_p = &bitarray_AND_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) bitarray_AND_function_p = &bitarray_AND_AVX;
else if (__builtin_cpu_supports("sse2")) bitarray_AND_function_p = &bitarray_AND_SSE2;
else if (__builtin_cpu_supports("mmx")) bitarray_AND_function_p = &bitarray_AND_MMX;
else
#endif
#endif
bitarray_AND_function_p = &bitarray_AND_NOSIMD;
bitarray_AND_function_p = &bitarray_AND_NOSIMD;
// call the most optimized function for this CPU
(*bitarray_AND_function_p)(A,B);
@ -424,20 +424,20 @@ void bitarray_AND_dispatch(uint32_t *A, uint32_t *B) {
void bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX512;
else if (__builtin_cpu_supports("avx2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX2;
#else
if (__builtin_cpu_supports("avx2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX;
else if (__builtin_cpu_supports("sse2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_SSE2;
else if (__builtin_cpu_supports("mmx")) bitarray_low20_AND_function_p = &bitarray_low20_AND_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX512;
else if (__builtin_cpu_supports("avx2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX2;
#else
if (__builtin_cpu_supports("avx2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX;
else if (__builtin_cpu_supports("sse2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_SSE2;
else if (__builtin_cpu_supports("mmx")) bitarray_low20_AND_function_p = &bitarray_low20_AND_MMX;
else
#endif
#endif
bitarray_low20_AND_function_p = &bitarray_low20_AND_NOSIMD;
bitarray_low20_AND_function_p = &bitarray_low20_AND_NOSIMD;
// call the most optimized function for this CPU
(*bitarray_low20_AND_function_p)(A, B);
@ -445,20 +445,20 @@ void bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
uint32_t count_bitarray_AND_dispatch(uint32_t *A, uint32_t *B) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX512;
else if (__builtin_cpu_supports("avx2")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX2;
#else
if (__builtin_cpu_supports("avx2")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX;
else if (__builtin_cpu_supports("sse2")) count_bitarray_AND_function_p = &count_bitarray_AND_SSE2;
else if (__builtin_cpu_supports("mmx")) count_bitarray_AND_function_p = &count_bitarray_AND_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX512;
else if (__builtin_cpu_supports("avx2")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX2;
#else
if (__builtin_cpu_supports("avx2")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX;
else if (__builtin_cpu_supports("sse2")) count_bitarray_AND_function_p = &count_bitarray_AND_SSE2;
else if (__builtin_cpu_supports("mmx")) count_bitarray_AND_function_p = &count_bitarray_AND_MMX;
else
#endif
#endif
count_bitarray_AND_function_p = &count_bitarray_AND_NOSIMD;
count_bitarray_AND_function_p = &count_bitarray_AND_NOSIMD;
// call the most optimized function for this CPU
return (*count_bitarray_AND_function_p)(A, B);
@ -466,20 +466,20 @@ uint32_t count_bitarray_AND_dispatch(uint32_t *A, uint32_t *B) {
uint32_t count_bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX512;
else if (__builtin_cpu_supports("avx2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2;
#else
if (__builtin_cpu_supports("avx2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX;
else if (__builtin_cpu_supports("sse2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_SSE2;
else if (__builtin_cpu_supports("mmx")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX512;
else if (__builtin_cpu_supports("avx2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2;
#else
if (__builtin_cpu_supports("avx2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX;
else if (__builtin_cpu_supports("sse2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_SSE2;
else if (__builtin_cpu_supports("mmx")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_MMX;
else
#endif
#endif
count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_NOSIMD;
count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_NOSIMD;
// call the most optimized function for this CPU
return (*count_bitarray_low20_AND_function_p)(A, B);
@ -487,20 +487,20 @@ uint32_t count_bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
void bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) bitarray_AND4_function_p = &bitarray_AND4_AVX512;
else if (__builtin_cpu_supports("avx2")) bitarray_AND4_function_p = &bitarray_AND4_AVX2;
#else
if (__builtin_cpu_supports("avx2")) bitarray_AND4_function_p = &bitarray_AND4_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) bitarray_AND4_function_p = &bitarray_AND4_AVX;
else if (__builtin_cpu_supports("sse2")) bitarray_AND4_function_p = &bitarray_AND4_SSE2;
else if (__builtin_cpu_supports("mmx")) bitarray_AND4_function_p = &bitarray_AND4_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) bitarray_AND4_function_p = &bitarray_AND4_AVX512;
else if (__builtin_cpu_supports("avx2")) bitarray_AND4_function_p = &bitarray_AND4_AVX2;
#else
if (__builtin_cpu_supports("avx2")) bitarray_AND4_function_p = &bitarray_AND4_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) bitarray_AND4_function_p = &bitarray_AND4_AVX;
else if (__builtin_cpu_supports("sse2")) bitarray_AND4_function_p = &bitarray_AND4_SSE2;
else if (__builtin_cpu_supports("mmx")) bitarray_AND4_function_p = &bitarray_AND4_MMX;
else
#endif
#endif
bitarray_AND4_function_p = &bitarray_AND4_NOSIMD;
bitarray_AND4_function_p = &bitarray_AND4_NOSIMD;
// call the most optimized function for this CPU
(*bitarray_AND4_function_p)(A, B, C, D);
@ -508,20 +508,20 @@ void bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D)
void bitarray_OR_dispatch(uint32_t *A, uint32_t *B) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) bitarray_OR_function_p = &bitarray_OR_AVX512;
else if (__builtin_cpu_supports("avx2")) bitarray_OR_function_p = &bitarray_OR_AVX2;
#else
if (__builtin_cpu_supports("avx2")) bitarray_OR_function_p = &bitarray_OR_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) bitarray_OR_function_p = &bitarray_OR_AVX;
else if (__builtin_cpu_supports("sse2")) bitarray_OR_function_p = &bitarray_OR_SSE2;
else if (__builtin_cpu_supports("mmx")) bitarray_OR_function_p = &bitarray_OR_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) bitarray_OR_function_p = &bitarray_OR_AVX512;
else if (__builtin_cpu_supports("avx2")) bitarray_OR_function_p = &bitarray_OR_AVX2;
#else
if (__builtin_cpu_supports("avx2")) bitarray_OR_function_p = &bitarray_OR_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) bitarray_OR_function_p = &bitarray_OR_AVX;
else if (__builtin_cpu_supports("sse2")) bitarray_OR_function_p = &bitarray_OR_SSE2;
else if (__builtin_cpu_supports("mmx")) bitarray_OR_function_p = &bitarray_OR_MMX;
else
#endif
#endif
bitarray_OR_function_p = &bitarray_OR_NOSIMD;
bitarray_OR_function_p = &bitarray_OR_NOSIMD;
// call the most optimized function for this CPU
(*bitarray_OR_function_p)(A,B);
@ -529,20 +529,20 @@ void bitarray_OR_dispatch(uint32_t *A, uint32_t *B) {
uint32_t count_bitarray_AND2_dispatch(uint32_t *A, uint32_t *B) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX512;
else if (__builtin_cpu_supports("avx2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2;
#else
if (__builtin_cpu_supports("avx2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX;
else if (__builtin_cpu_supports("sse2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_SSE2;
else if (__builtin_cpu_supports("mmx")) count_bitarray_AND2_function_p = &count_bitarray_AND2_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX512;
else if (__builtin_cpu_supports("avx2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2;
#else
if (__builtin_cpu_supports("avx2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX;
else if (__builtin_cpu_supports("sse2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_SSE2;
else if (__builtin_cpu_supports("mmx")) count_bitarray_AND2_function_p = &count_bitarray_AND2_MMX;
else
#endif
#endif
count_bitarray_AND2_function_p = &count_bitarray_AND2_NOSIMD;
count_bitarray_AND2_function_p = &count_bitarray_AND2_NOSIMD;
// call the most optimized function for this CPU
return (*count_bitarray_AND2_function_p)(A, B);
@ -550,20 +550,20 @@ uint32_t count_bitarray_AND2_dispatch(uint32_t *A, uint32_t *B) {
uint32_t count_bitarray_AND3_dispatch(uint32_t *A, uint32_t *B, uint32_t *C) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX512;
else if (__builtin_cpu_supports("avx2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2;
#else
if (__builtin_cpu_supports("avx2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX;
else if (__builtin_cpu_supports("sse2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_SSE2;
else if (__builtin_cpu_supports("mmx")) count_bitarray_AND3_function_p = &count_bitarray_AND3_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX512;
else if (__builtin_cpu_supports("avx2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2;
#else
if (__builtin_cpu_supports("avx2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX;
else if (__builtin_cpu_supports("sse2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_SSE2;
else if (__builtin_cpu_supports("mmx")) count_bitarray_AND3_function_p = &count_bitarray_AND3_MMX;
else
#endif
#endif
count_bitarray_AND3_function_p = &count_bitarray_AND3_NOSIMD;
count_bitarray_AND3_function_p = &count_bitarray_AND3_NOSIMD;
// call the most optimized function for this CPU
return (*count_bitarray_AND3_function_p)(A, B, C);
@ -571,20 +571,20 @@ uint32_t count_bitarray_AND3_dispatch(uint32_t *A, uint32_t *B, uint32_t *C) {
uint32_t count_bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
#if defined (__i386__) || defined (__x86_64__)
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX512;
else if (__builtin_cpu_supports("avx2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2;
#else
if (__builtin_cpu_supports("avx2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX;
else if (__builtin_cpu_supports("sse2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_SSE2;
else if (__builtin_cpu_supports("mmx")) count_bitarray_AND4_function_p = &count_bitarray_AND4_MMX;
else
#endif
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
if (__builtin_cpu_supports("avx512f")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX512;
else if (__builtin_cpu_supports("avx2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2;
#else
if (__builtin_cpu_supports("avx2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2;
#endif
else if (__builtin_cpu_supports("avx")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX;
else if (__builtin_cpu_supports("sse2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_SSE2;
else if (__builtin_cpu_supports("mmx")) count_bitarray_AND4_function_p = &count_bitarray_AND4_MMX;
else
#endif
#endif
count_bitarray_AND4_function_p = &count_bitarray_AND4_NOSIMD;
count_bitarray_AND4_function_p = &count_bitarray_AND4_NOSIMD;
// call the most optimized function for this CPU
return (*count_bitarray_AND4_function_p)(A, B, C, D);

View file

@ -64,10 +64,10 @@ THE SOFTWARE.
#include "crapto1/crapto1.h"
#include "parity.h"
#define NUM_BRUTE_FORCE_THREADS (num_CPUs())
#define DEFAULT_BRUTE_FORCE_RATE (120000000.0) // if benchmark doesn't succeed
#define TEST_BENCH_SIZE (6000) // number of odd and even states for brute force benchmark
#define TEST_BENCH_FILENAME "hardnested/bf_bench_data.bin"
#define NUM_BRUTE_FORCE_THREADS (num_CPUs())
#define DEFAULT_BRUTE_FORCE_RATE (120000000.0) // if benchmark doesn't succeed
#define TEST_BENCH_SIZE (6000) // number of odd and even states for brute force benchmark
#define TEST_BENCH_FILENAME "hardnested/bf_bench_data.bin"
//#define WRITE_BENCH_FILE
// debugging options
@ -75,8 +75,8 @@ THE SOFTWARE.
// #define DEBUG_BRUTE_FORCE
typedef enum {
EVEN_STATE = 0,
ODD_STATE = 1
EVEN_STATE = 0,
ODD_STATE = 1
} odd_even_t;
static uint32_t nonces_to_bruteforce = 0;
@ -91,99 +91,99 @@ static uint64_t found_bs_key = 0;
inline uint8_t trailing_zeros(uint8_t byte)
{
static const uint8_t trailing_zeros_LUT[256] = {
8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
static const uint8_t trailing_zeros_LUT[256] = {
8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
return trailing_zeros_LUT[byte];
return trailing_zeros_LUT[byte];
}
bool verify_key(uint32_t cuid, noncelist_t *nonces, uint8_t *best_first_bytes, uint32_t odd, uint32_t even)
{
struct Crypto1State pcs;
for (uint16_t test_first_byte = 1; test_first_byte < 256; test_first_byte++) {
noncelistentry_t *test_nonce = nonces[best_first_bytes[test_first_byte]].first;
while (test_nonce != NULL) {
pcs.odd = odd;
pcs.even = even;
lfsr_rollback_byte(&pcs, (cuid >> 24) ^ best_first_bytes[0], true);
for (int8_t byte_pos = 3; byte_pos >= 0; byte_pos--) {
uint8_t test_par_enc_bit = (test_nonce->par_enc >> byte_pos) & 0x01; // the encoded parity bit
uint8_t test_byte_enc = (test_nonce->nonce_enc >> (8*byte_pos)) & 0xff; // the encoded nonce byte
uint8_t test_byte_dec = crypto1_byte(&pcs, test_byte_enc /* ^ (cuid >> (8*byte_pos)) */, true) ^ test_byte_enc; // decode the nonce byte
uint8_t ks_par = filter(pcs.odd); // the keystream bit to encode/decode the parity bit
uint8_t test_par_enc2 = ks_par ^ evenparity8(test_byte_dec); // determine the decoded byte's parity and encode it
if (test_par_enc_bit != test_par_enc2) {
return false;
}
}
test_nonce = test_nonce->next;
}
}
return true;
struct Crypto1State pcs;
for (uint16_t test_first_byte = 1; test_first_byte < 256; test_first_byte++) {
noncelistentry_t *test_nonce = nonces[best_first_bytes[test_first_byte]].first;
while (test_nonce != NULL) {
pcs.odd = odd;
pcs.even = even;
lfsr_rollback_byte(&pcs, (cuid >> 24) ^ best_first_bytes[0], true);
for (int8_t byte_pos = 3; byte_pos >= 0; byte_pos--) {
uint8_t test_par_enc_bit = (test_nonce->par_enc >> byte_pos) & 0x01; // the encoded parity bit
uint8_t test_byte_enc = (test_nonce->nonce_enc >> (8*byte_pos)) & 0xff; // the encoded nonce byte
uint8_t test_byte_dec = crypto1_byte(&pcs, test_byte_enc /* ^ (cuid >> (8*byte_pos)) */, true) ^ test_byte_enc; // decode the nonce byte
uint8_t ks_par = filter(pcs.odd); // the keystream bit to encode/decode the parity bit
uint8_t test_par_enc2 = ks_par ^ evenparity8(test_byte_dec); // determine the decoded byte's parity and encode it
if (test_par_enc_bit != test_par_enc2) {
return false;
}
}
test_nonce = test_nonce->next;
}
}
return true;
}
static void*
#ifdef __has_attribute
#if __has_attribute(force_align_arg_pointer)
__attribute__((force_align_arg_pointer))
#endif
#if __has_attribute(force_align_arg_pointer)
__attribute__((force_align_arg_pointer))
#endif
#endif
crack_states_thread(void* x){
struct arg {
bool silent;
int thread_ID;
uint32_t cuid;
uint32_t num_acquired_nonces;
uint64_t maximum_states;
noncelist_t *nonces;
uint8_t* best_first_bytes;
} *thread_arg;
struct arg {
bool silent;
int thread_ID;
uint32_t cuid;
uint32_t num_acquired_nonces;
uint64_t maximum_states;
noncelist_t *nonces;
uint8_t* best_first_bytes;
} *thread_arg;
thread_arg = (struct arg *)x;
thread_arg = (struct arg *)x;
const int thread_id = thread_arg->thread_ID;
uint32_t current_bucket = thread_id;
while(current_bucket < bucket_count){
statelist_t *bucket = buckets[current_bucket];
if(bucket){
#if defined (DEBUG_BRUTE_FORCE)
printf("Thread %u starts working on bucket %u\n", thread_id, current_bucket);
printf("Thread %u starts working on bucket %u\n", thread_id, current_bucket);
#endif
const uint64_t key = crack_states_bitsliced(thread_arg->cuid, thread_arg->best_first_bytes, bucket, &keys_found, &num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, thread_arg->nonces);
if(key != -1){
__atomic_fetch_add(&keys_found, 1, __ATOMIC_SEQ_CST);
__atomic_fetch_add(&found_bs_key, key, __ATOMIC_SEQ_CST);
__atomic_fetch_add(&found_bs_key, key, __ATOMIC_SEQ_CST);
char progress_text[80];
char keystr[18];
sprintf(keystr, "%012" PRIx64 " ", key);
sprintf(progress_text, "Brute force phase completed. Key found: " _YELLOW_(%s), keystr);
hardnested_print_progress(thread_arg->num_acquired_nonces, progress_text, 0.0, 0);
char progress_text[80];
char keystr[18];
sprintf(keystr, "%012" PRIx64 " ", key);
sprintf(progress_text, "Brute force phase completed. Key found: " _YELLOW_(%s), keystr);
hardnested_print_progress(thread_arg->num_acquired_nonces, progress_text, 0.0, 0);
break;
} else if(keys_found){
break;
} else {
if (!thread_arg->silent) {
char progress_text[80];
sprintf(progress_text, "Brute force phase: %6.02f%%\t", 100.0*(float)num_keys_tested/(float)(thread_arg->maximum_states));
float remaining_bruteforce = thread_arg->nonces[thread_arg->best_first_bytes[0]].expected_num_brute_force - (float)num_keys_tested/2;
hardnested_print_progress(thread_arg->num_acquired_nonces, progress_text, remaining_bruteforce, 5000);
}
if (!thread_arg->silent) {
char progress_text[80];
sprintf(progress_text, "Brute force phase: %6.02f%%\t", 100.0*(float)num_keys_tested/(float)(thread_arg->maximum_states));
float remaining_bruteforce = thread_arg->nonces[thread_arg->best_first_bytes[0]].expected_num_brute_force - (float)num_keys_tested/2;
hardnested_print_progress(thread_arg->num_acquired_nonces, progress_text, remaining_bruteforce, 5000);
}
}
}
current_bucket += NUM_BRUTE_FORCE_THREADS;
@ -194,97 +194,97 @@ crack_states_thread(void* x){
void prepare_bf_test_nonces(noncelist_t *nonces, uint8_t best_first_byte)
{
// we do bitsliced brute forcing with best_first_bytes[0] only.
// Extract the corresponding 2nd bytes
noncelistentry_t *test_nonce = nonces[best_first_byte].first;
uint32_t i = 0;
while (test_nonce != NULL) {
bf_test_nonce[i] = test_nonce->nonce_enc;
bf_test_nonce_par[i] = test_nonce->par_enc;
bf_test_nonce_2nd_byte[i] = (test_nonce->nonce_enc >> 16) & 0xff;
test_nonce = test_nonce->next;
i++;
}
nonces_to_bruteforce = i;
// we do bitsliced brute forcing with best_first_bytes[0] only.
// Extract the corresponding 2nd bytes
noncelistentry_t *test_nonce = nonces[best_first_byte].first;
uint32_t i = 0;
while (test_nonce != NULL) {
bf_test_nonce[i] = test_nonce->nonce_enc;
bf_test_nonce_par[i] = test_nonce->par_enc;
bf_test_nonce_2nd_byte[i] = (test_nonce->nonce_enc >> 16) & 0xff;
test_nonce = test_nonce->next;
i++;
}
nonces_to_bruteforce = i;
// printf("Nonces to bruteforce: %d\n", nonces_to_bruteforce);
// printf("Common bits of first 4 2nd nonce bytes (before sorting): %u %u %u\n",
// trailing_zeros(bf_test_nonce_2nd_byte[1] ^ bf_test_nonce_2nd_byte[0]),
// trailing_zeros(bf_test_nonce_2nd_byte[2] ^ bf_test_nonce_2nd_byte[1]),
// trailing_zeros(bf_test_nonce_2nd_byte[3] ^ bf_test_nonce_2nd_byte[2]));
// printf("Nonces to bruteforce: %d\n", nonces_to_bruteforce);
// printf("Common bits of first 4 2nd nonce bytes (before sorting): %u %u %u\n",
// trailing_zeros(bf_test_nonce_2nd_byte[1] ^ bf_test_nonce_2nd_byte[0]),
// trailing_zeros(bf_test_nonce_2nd_byte[2] ^ bf_test_nonce_2nd_byte[1]),
// trailing_zeros(bf_test_nonce_2nd_byte[3] ^ bf_test_nonce_2nd_byte[2]));
uint8_t best_4[4] = {0};
int sum_best = -1;
for (uint16_t n1 = 0; n1 < nonces_to_bruteforce; n1++) {
for (uint16_t n2 = 0; n2 < nonces_to_bruteforce; n2++) {
if (n2 != n1) {
for (uint16_t n3 = 0; n3 < nonces_to_bruteforce; n3++) {
if ((n3 != n2 && n3 != n1) || nonces_to_bruteforce < 3
// && trailing_zeros(bf_test_nonce_2nd_byte[n1] ^ bf_test_nonce_2nd_byte[n2])
// > trailing_zeros(bf_test_nonce_2nd_byte[n2] ^ bf_test_nonce_2nd_byte[n3])
) {
for (uint16_t n4 = 0; n4 < nonces_to_bruteforce; n4++) {
if ((n4 != n3 && n4 != n2 && n4 != n1) || nonces_to_bruteforce < 4
// && trailing_zeros(bf_test_nonce_2nd_byte[n2] ^ bf_test_nonce_2nd_byte[n3])
// > trailing_zeros(bf_test_nonce_2nd_byte[n3] ^ bf_test_nonce_2nd_byte[n4])
) {
int sum = nonces_to_bruteforce > 1 ? trailing_zeros(bf_test_nonce_2nd_byte[n1] ^ bf_test_nonce_2nd_byte[n2]) : 0.0
+ nonces_to_bruteforce > 2 ? trailing_zeros(bf_test_nonce_2nd_byte[n2] ^ bf_test_nonce_2nd_byte[n3]) : 0.0
+ nonces_to_bruteforce > 3 ? trailing_zeros(bf_test_nonce_2nd_byte[n3] ^ bf_test_nonce_2nd_byte[n4]) : 0.0;
if (sum > sum_best) {
sum_best = sum;
best_4[0] = n1;
best_4[1] = n2;
best_4[2] = n3;
best_4[3] = n4;
}
}
}
}
}
}
}
}
uint8_t best_4[4] = {0};
int sum_best = -1;
for (uint16_t n1 = 0; n1 < nonces_to_bruteforce; n1++) {
for (uint16_t n2 = 0; n2 < nonces_to_bruteforce; n2++) {
if (n2 != n1) {
for (uint16_t n3 = 0; n3 < nonces_to_bruteforce; n3++) {
if ((n3 != n2 && n3 != n1) || nonces_to_bruteforce < 3
// && trailing_zeros(bf_test_nonce_2nd_byte[n1] ^ bf_test_nonce_2nd_byte[n2])
// > trailing_zeros(bf_test_nonce_2nd_byte[n2] ^ bf_test_nonce_2nd_byte[n3])
) {
for (uint16_t n4 = 0; n4 < nonces_to_bruteforce; n4++) {
if ((n4 != n3 && n4 != n2 && n4 != n1) || nonces_to_bruteforce < 4
// && trailing_zeros(bf_test_nonce_2nd_byte[n2] ^ bf_test_nonce_2nd_byte[n3])
// > trailing_zeros(bf_test_nonce_2nd_byte[n3] ^ bf_test_nonce_2nd_byte[n4])
) {
int sum = nonces_to_bruteforce > 1 ? trailing_zeros(bf_test_nonce_2nd_byte[n1] ^ bf_test_nonce_2nd_byte[n2]) : 0.0
+ nonces_to_bruteforce > 2 ? trailing_zeros(bf_test_nonce_2nd_byte[n2] ^ bf_test_nonce_2nd_byte[n3]) : 0.0
+ nonces_to_bruteforce > 3 ? trailing_zeros(bf_test_nonce_2nd_byte[n3] ^ bf_test_nonce_2nd_byte[n4]) : 0.0;
if (sum > sum_best) {
sum_best = sum;
best_4[0] = n1;
best_4[1] = n2;
best_4[2] = n3;
best_4[3] = n4;
}
}
}
}
}
}
}
}
uint32_t bf_test_nonce_temp[4];
uint8_t bf_test_nonce_par_temp[4];
uint8_t bf_test_nonce_2nd_byte_temp[4];
for (uint8_t i = 0; i < 4 && i < nonces_to_bruteforce; i++) {
bf_test_nonce_temp[i] = bf_test_nonce[best_4[i]];
uint32_t bf_test_nonce_temp[4];
uint8_t bf_test_nonce_par_temp[4];
uint8_t bf_test_nonce_2nd_byte_temp[4];
for (uint8_t i = 0; i < 4 && i < nonces_to_bruteforce; i++) {
bf_test_nonce_temp[i] = bf_test_nonce[best_4[i]];
bf_test_nonce_par_temp[i] = bf_test_nonce_par[best_4[i]];
bf_test_nonce_2nd_byte_temp[i] = bf_test_nonce_2nd_byte[best_4[i]];
}
for (uint8_t i = 0; i < 4 && i < nonces_to_bruteforce; i++) {
bf_test_nonce[i] = bf_test_nonce_temp[i];
bf_test_nonce_par[i] = bf_test_nonce_par_temp[i];
bf_test_nonce_2nd_byte[i] = bf_test_nonce_2nd_byte_temp[i];
}
bf_test_nonce_par_temp[i] = bf_test_nonce_par[best_4[i]];
bf_test_nonce_2nd_byte_temp[i] = bf_test_nonce_2nd_byte[best_4[i]];
}
for (uint8_t i = 0; i < 4 && i < nonces_to_bruteforce; i++) {
bf_test_nonce[i] = bf_test_nonce_temp[i];
bf_test_nonce_par[i] = bf_test_nonce_par_temp[i];
bf_test_nonce_2nd_byte[i] = bf_test_nonce_2nd_byte_temp[i];
}
}
#if defined (WRITE_BENCH_FILE)
static void write_benchfile(statelist_t *candidates) {
printf("Writing brute force benchmark data...");
FILE *benchfile = fopen(TEST_BENCH_FILENAME, "wb");
fwrite(&nonces_to_bruteforce, 1, sizeof(nonces_to_bruteforce), benchfile);
for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
fwrite(&(bf_test_nonce[i]), 1, sizeof(bf_test_nonce[i]), benchfile);
fwrite(&(bf_test_nonce_par[i]), 1, sizeof(bf_test_nonce_par[i]), benchfile);
}
uint32_t num_states = MIN(candidates->len[EVEN_STATE], TEST_BENCH_SIZE);
fwrite(&num_states, 1, sizeof(num_states), benchfile);
for (uint32_t i = 0; i < num_states; i++) {
fwrite(&(candidates->states[EVEN_STATE][i]), 1, sizeof(uint32_t), benchfile);
}
num_states = MIN(candidates->len[ODD_STATE], TEST_BENCH_SIZE);
fwrite(&num_states, 1, sizeof(num_states), benchfile);
for (uint32_t i = 0; i < num_states; i++) {
fwrite(&(candidates->states[ODD_STATE][i]), 1, sizeof(uint32_t), benchfile);
}
fclose(benchfile);
printf("done.\n");
printf("Writing brute force benchmark data...");
FILE *benchfile = fopen(TEST_BENCH_FILENAME, "wb");
fwrite(&nonces_to_bruteforce, 1, sizeof(nonces_to_bruteforce), benchfile);
for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
fwrite(&(bf_test_nonce[i]), 1, sizeof(bf_test_nonce[i]), benchfile);
fwrite(&(bf_test_nonce_par[i]), 1, sizeof(bf_test_nonce_par[i]), benchfile);
}
uint32_t num_states = MIN(candidates->len[EVEN_STATE], TEST_BENCH_SIZE);
fwrite(&num_states, 1, sizeof(num_states), benchfile);
for (uint32_t i = 0; i < num_states; i++) {
fwrite(&(candidates->states[EVEN_STATE][i]), 1, sizeof(uint32_t), benchfile);
}
num_states = MIN(candidates->len[ODD_STATE], TEST_BENCH_SIZE);
fwrite(&num_states, 1, sizeof(num_states), benchfile);
for (uint32_t i = 0; i < num_states; i++) {
fwrite(&(candidates->states[ODD_STATE][i]), 1, sizeof(uint32_t), benchfile);
}
fclose(benchfile);
printf("done.\n");
}
#endif
@ -292,174 +292,174 @@ static void write_benchfile(statelist_t *candidates) {
bool brute_force_bs(float *bf_rate, statelist_t *candidates, uint32_t cuid, uint32_t num_acquired_nonces, uint64_t maximum_states, noncelist_t *nonces, uint8_t *best_first_bytes, uint64_t *foundkey)
{
#if defined (WRITE_BENCH_FILE)
write_benchfile(candidates);
write_benchfile(candidates);
#endif
bool silent = (bf_rate != NULL);
bool silent = (bf_rate != NULL);
keys_found = 0;
num_keys_tested = 0;
found_bs_key = 0;
keys_found = 0;
num_keys_tested = 0;
found_bs_key = 0;
bitslice_test_nonces(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
bitslice_test_nonces(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
// count number of states to go
bucket_count = 0;
for (statelist_t *p = candidates; p != NULL; p = p->next) {
if (p->states[ODD_STATE] != NULL && p->states[EVEN_STATE] != NULL) {
buckets[bucket_count] = p;
bucket_count++;
}
}
// count number of states to go
bucket_count = 0;
for (statelist_t *p = candidates; p != NULL; p = p->next) {
if (p->states[ODD_STATE] != NULL && p->states[EVEN_STATE] != NULL) {
buckets[bucket_count] = p;
bucket_count++;
}
}
uint64_t start_time = msclock();
uint64_t start_time = msclock();
#if defined(__linux__) || defined(__APPLE__)
if ( NUM_BRUTE_FORCE_THREADS < 0 )
return false;
if ( NUM_BRUTE_FORCE_THREADS < 0 )
return false;
#endif
pthread_t threads[NUM_BRUTE_FORCE_THREADS];
struct args {
bool silent;
int thread_ID;
uint32_t cuid;
uint32_t num_acquired_nonces;
uint64_t maximum_states;
noncelist_t *nonces;
uint8_t *best_first_bytes;
} thread_args[NUM_BRUTE_FORCE_THREADS];
pthread_t threads[NUM_BRUTE_FORCE_THREADS];
struct args {
bool silent;
int thread_ID;
uint32_t cuid;
uint32_t num_acquired_nonces;
uint64_t maximum_states;
noncelist_t *nonces;
uint8_t *best_first_bytes;
} thread_args[NUM_BRUTE_FORCE_THREADS];
for (uint32_t i = 0; i < NUM_BRUTE_FORCE_THREADS; i++){
thread_args[i].thread_ID = i;
thread_args[i].silent = silent;
thread_args[i].cuid = cuid;
thread_args[i].num_acquired_nonces = num_acquired_nonces;
thread_args[i].maximum_states = maximum_states;
thread_args[i].nonces = nonces;
thread_args[i].best_first_bytes = best_first_bytes;
pthread_create(&threads[i], NULL, crack_states_thread, (void*)&thread_args[i]);
}
for (uint32_t i = 0; i < NUM_BRUTE_FORCE_THREADS; i++){
pthread_join(threads[i], 0);
}
for (uint32_t i = 0; i < NUM_BRUTE_FORCE_THREADS; i++){
thread_args[i].thread_ID = i;
thread_args[i].silent = silent;
thread_args[i].cuid = cuid;
thread_args[i].num_acquired_nonces = num_acquired_nonces;
thread_args[i].maximum_states = maximum_states;
thread_args[i].nonces = nonces;
thread_args[i].best_first_bytes = best_first_bytes;
pthread_create(&threads[i], NULL, crack_states_thread, (void*)&thread_args[i]);
}
for (uint32_t i = 0; i < NUM_BRUTE_FORCE_THREADS; i++){
pthread_join(threads[i], 0);
}
uint64_t elapsed_time = msclock() - start_time;
uint64_t elapsed_time = msclock() - start_time;
if (bf_rate != NULL)
*bf_rate = (float)num_keys_tested / ((float)elapsed_time / 1000.0);
if (bf_rate != NULL)
*bf_rate = (float)num_keys_tested / ((float)elapsed_time / 1000.0);
if ( keys_found > 0)
*foundkey = found_bs_key;
if ( keys_found > 0)
*foundkey = found_bs_key;
return (keys_found != 0);
return (keys_found != 0);
}
static bool read_bench_data(statelist_t *test_candidates) {
size_t bytes_read = 0;
uint32_t temp = 0;
uint32_t num_states = 0;
uint32_t states_read = 0;
size_t bytes_read = 0;
uint32_t temp = 0;
uint32_t num_states = 0;
uint32_t states_read = 0;
char bench_file_path[strlen(get_my_executable_directory()) + strlen(TEST_BENCH_FILENAME) + 1];
strcpy(bench_file_path, get_my_executable_directory());
strcat(bench_file_path, TEST_BENCH_FILENAME);
char bench_file_path[strlen(get_my_executable_directory()) + strlen(TEST_BENCH_FILENAME) + 1];
strcpy(bench_file_path, get_my_executable_directory());
strcat(bench_file_path, TEST_BENCH_FILENAME);
FILE *benchfile = fopen(bench_file_path, "rb");
if (benchfile == NULL) {
return false;
}
bytes_read = fread(&nonces_to_bruteforce, 1, sizeof(nonces_to_bruteforce), benchfile);
if (bytes_read != sizeof(nonces_to_bruteforce)) {
fclose(benchfile);
return false;
}
for (uint16_t i = 0; i < nonces_to_bruteforce && i < 256; i++) {
bytes_read = fread(&bf_test_nonce[i], 1, sizeof(uint32_t), benchfile);
if (bytes_read != sizeof(uint32_t)) {
fclose(benchfile);
return false;
}
bf_test_nonce_2nd_byte[i] = (bf_test_nonce[i] >> 16) & 0xff;
bytes_read = fread(&bf_test_nonce_par[i], 1, sizeof(uint8_t), benchfile);
if (bytes_read != sizeof(uint8_t)) {
fclose(benchfile);
return false;
}
}
bytes_read = fread(&num_states, 1, sizeof(uint32_t), benchfile);
if (bytes_read != sizeof(uint32_t)) {
fclose(benchfile);
return false;
}
for (states_read = 0; states_read < MIN(num_states, TEST_BENCH_SIZE); states_read++) {
bytes_read = fread(test_candidates->states[EVEN_STATE] + states_read, 1, sizeof(uint32_t), benchfile);
if (bytes_read != sizeof(uint32_t)) {
fclose(benchfile);
return false;
}
}
for (uint32_t i = states_read; i < TEST_BENCH_SIZE; i++) {
test_candidates->states[EVEN_STATE][i] = test_candidates->states[EVEN_STATE][i-states_read];
}
for (uint32_t i = states_read; i < num_states; i++) {
bytes_read = fread(&temp, 1, sizeof(uint32_t), benchfile);
if (bytes_read != sizeof(uint32_t)) {
fclose(benchfile);
return false;
}
}
for (states_read = 0; states_read < MIN(num_states, TEST_BENCH_SIZE); states_read++) {
bytes_read = fread(test_candidates->states[ODD_STATE] + states_read, 1, sizeof(uint32_t), benchfile);
if (bytes_read != sizeof(uint32_t)) {
fclose(benchfile);
return false;
}
}
for (uint32_t i = states_read; i < TEST_BENCH_SIZE; i++) {
test_candidates->states[ODD_STATE][i] = test_candidates->states[ODD_STATE][i-states_read];
}
FILE *benchfile = fopen(bench_file_path, "rb");
if (benchfile == NULL) {
return false;
}
bytes_read = fread(&nonces_to_bruteforce, 1, sizeof(nonces_to_bruteforce), benchfile);
if (bytes_read != sizeof(nonces_to_bruteforce)) {
fclose(benchfile);
return false;
}
for (uint16_t i = 0; i < nonces_to_bruteforce && i < 256; i++) {
bytes_read = fread(&bf_test_nonce[i], 1, sizeof(uint32_t), benchfile);
if (bytes_read != sizeof(uint32_t)) {
fclose(benchfile);
return false;
}
bf_test_nonce_2nd_byte[i] = (bf_test_nonce[i] >> 16) & 0xff;
bytes_read = fread(&bf_test_nonce_par[i], 1, sizeof(uint8_t), benchfile);
if (bytes_read != sizeof(uint8_t)) {
fclose(benchfile);
return false;
}
}
bytes_read = fread(&num_states, 1, sizeof(uint32_t), benchfile);
if (bytes_read != sizeof(uint32_t)) {
fclose(benchfile);
return false;
}
for (states_read = 0; states_read < MIN(num_states, TEST_BENCH_SIZE); states_read++) {
bytes_read = fread(test_candidates->states[EVEN_STATE] + states_read, 1, sizeof(uint32_t), benchfile);
if (bytes_read != sizeof(uint32_t)) {
fclose(benchfile);
return false;
}
}
for (uint32_t i = states_read; i < TEST_BENCH_SIZE; i++) {
test_candidates->states[EVEN_STATE][i] = test_candidates->states[EVEN_STATE][i-states_read];
}
for (uint32_t i = states_read; i < num_states; i++) {
bytes_read = fread(&temp, 1, sizeof(uint32_t), benchfile);
if (bytes_read != sizeof(uint32_t)) {
fclose(benchfile);
return false;
}
}
for (states_read = 0; states_read < MIN(num_states, TEST_BENCH_SIZE); states_read++) {
bytes_read = fread(test_candidates->states[ODD_STATE] + states_read, 1, sizeof(uint32_t), benchfile);
if (bytes_read != sizeof(uint32_t)) {
fclose(benchfile);
return false;
}
}
for (uint32_t i = states_read; i < TEST_BENCH_SIZE; i++) {
test_candidates->states[ODD_STATE][i] = test_candidates->states[ODD_STATE][i-states_read];
}
fclose(benchfile);
return true;
fclose(benchfile);
return true;
}
float brute_force_benchmark() {
statelist_t test_candidates[NUM_BRUTE_FORCE_THREADS];
statelist_t test_candidates[NUM_BRUTE_FORCE_THREADS];
test_candidates[0].states[ODD_STATE] = malloc((TEST_BENCH_SIZE+1) * sizeof(uint32_t));
test_candidates[0].states[EVEN_STATE] = malloc((TEST_BENCH_SIZE+1) * sizeof(uint32_t));
for (uint8_t i = 0; i < NUM_BRUTE_FORCE_THREADS - 1; i++){
test_candidates[i].next = test_candidates + i + 1;
test_candidates[i+1].states[ODD_STATE] = test_candidates[0].states[ODD_STATE];
test_candidates[i+1].states[EVEN_STATE] = test_candidates[0].states[EVEN_STATE];
}
test_candidates[NUM_BRUTE_FORCE_THREADS-1].next = NULL;
test_candidates[0].states[ODD_STATE] = malloc((TEST_BENCH_SIZE+1) * sizeof(uint32_t));
test_candidates[0].states[EVEN_STATE] = malloc((TEST_BENCH_SIZE+1) * sizeof(uint32_t));
for (uint8_t i = 0; i < NUM_BRUTE_FORCE_THREADS - 1; i++){
test_candidates[i].next = test_candidates + i + 1;
test_candidates[i+1].states[ODD_STATE] = test_candidates[0].states[ODD_STATE];
test_candidates[i+1].states[EVEN_STATE] = test_candidates[0].states[EVEN_STATE];
}
test_candidates[NUM_BRUTE_FORCE_THREADS-1].next = NULL;
if (!read_bench_data(test_candidates)) {
PrintAndLogEx(NORMAL, "Couldn't read benchmark data. Assuming brute force rate of %1.0f states per second", DEFAULT_BRUTE_FORCE_RATE);
return DEFAULT_BRUTE_FORCE_RATE;
}
if (!read_bench_data(test_candidates)) {
PrintAndLogEx(NORMAL, "Couldn't read benchmark data. Assuming brute force rate of %1.0f states per second", DEFAULT_BRUTE_FORCE_RATE);
return DEFAULT_BRUTE_FORCE_RATE;
}
for (uint8_t i = 0; i < NUM_BRUTE_FORCE_THREADS; i++) {
test_candidates[i].len[ODD_STATE] = TEST_BENCH_SIZE;
test_candidates[i].len[EVEN_STATE] = TEST_BENCH_SIZE;
test_candidates[i].states[ODD_STATE][TEST_BENCH_SIZE] = -1;
test_candidates[i].states[EVEN_STATE][TEST_BENCH_SIZE] = -1;
}
for (uint8_t i = 0; i < NUM_BRUTE_FORCE_THREADS; i++) {
test_candidates[i].len[ODD_STATE] = TEST_BENCH_SIZE;
test_candidates[i].len[EVEN_STATE] = TEST_BENCH_SIZE;
test_candidates[i].states[ODD_STATE][TEST_BENCH_SIZE] = -1;
test_candidates[i].states[EVEN_STATE][TEST_BENCH_SIZE] = -1;
}
uint64_t maximum_states = TEST_BENCH_SIZE*TEST_BENCH_SIZE*(uint64_t)NUM_BRUTE_FORCE_THREADS;
uint64_t maximum_states = TEST_BENCH_SIZE*TEST_BENCH_SIZE*(uint64_t)NUM_BRUTE_FORCE_THREADS;
float bf_rate;
uint64_t found_key = 0;
brute_force_bs(&bf_rate, test_candidates, 0, 0, maximum_states, NULL, 0, &found_key);
float bf_rate;
uint64_t found_key = 0;
brute_force_bs(&bf_rate, test_candidates, 0, 0, maximum_states, NULL, 0, &found_key);
free(test_candidates[0].states[ODD_STATE]);
free(test_candidates[0].states[EVEN_STATE]);
free(test_candidates[0].states[ODD_STATE]);
free(test_candidates[0].states[EVEN_STATE]);
return bf_rate;
return bf_rate;
}

View file

@ -22,9 +22,9 @@
#include "cmdhfmfhard.h"
typedef struct {
uint32_t *states[2];
uint32_t len[2];
void* next;
uint32_t *states[2];
uint32_t len[2];
void* next;
} statelist_t;
extern void prepare_bf_test_nonces(noncelist_t *nonces, uint8_t best_first_byte);

View file

@ -29,36 +29,36 @@
#include "parity.h"
#define NUM_PART_SUMS 9
#define BITFLIP_2ND_BYTE 0x0200
#define NUM_PART_SUMS 9
#define BITFLIP_2ND_BYTE 0x0200
typedef enum {
EVEN_STATE = 0,
ODD_STATE = 1
EVEN_STATE = 0,
ODD_STATE = 1
} odd_even_t;
static uint16_t PartialSumProperty(uint32_t state, odd_even_t odd_even)
{
uint16_t sum = 0;
for (uint16_t j = 0; j < 16; j++) {
uint32_t st = state;
uint16_t part_sum = 0;
if (odd_even == ODD_STATE) {
for (uint16_t i = 0; i < 5; i++) {
part_sum ^= filter(st);
st = (st << 1) | ((j >> (3-i)) & 0x01) ;
}
part_sum ^= 1; // XOR 1 cancelled out for the other 8 bits
} else {
for (uint16_t i = 0; i < 4; i++) {
st = (st << 1) | ((j >> (3-i)) & 0x01) ;
part_sum ^= filter(st);
}
}
sum += part_sum;
}
return sum;
uint16_t sum = 0;
for (uint16_t j = 0; j < 16; j++) {
uint32_t st = state;
uint16_t part_sum = 0;
if (odd_even == ODD_STATE) {
for (uint16_t i = 0; i < 5; i++) {
part_sum ^= filter(st);
st = (st << 1) | ((j >> (3-i)) & 0x01) ;
}
part_sum ^= 1; // XOR 1 cancelled out for the other 8 bits
} else {
for (uint16_t i = 0; i < 4; i++) {
st = (st << 1) | ((j >> (3-i)) & 0x01) ;
part_sum ^= filter(st);
}
}
sum += part_sum;
}
return sum;
}
@ -70,86 +70,86 @@ static uint16_t PartialSumProperty(uint32_t state, odd_even_t odd_even)
static inline void clear_bitarray24(uint32_t *bitarray)
{
memset(bitarray, 0x00, sizeof(uint32_t) * (1<<19));
memset(bitarray, 0x00, sizeof(uint32_t) * (1<<19));
}
static inline uint32_t test_bit24(uint32_t *bitarray, uint32_t index)
{
return bitarray[index>>5] & (0x80000000>>(index&0x0000001f));
return bitarray[index>>5] & (0x80000000>>(index&0x0000001f));
}
static inline void set_bit24(uint32_t *bitarray, uint32_t index)
{
bitarray[index>>5] |= 0x80000000>>(index&0x0000001f);
bitarray[index>>5] |= 0x80000000>>(index&0x0000001f);
}
static inline uint32_t next_state(uint32_t *bitset, uint32_t state)
{
if (++state == 1<<24) return 1<<24;
uint32_t index = state >> 5;
uint_fast8_t bit = state & 0x1f;
uint32_t line = bitset[index] << bit;
while (bit <= 0x1f) {
if (line & 0x80000000) return state;
state++;
bit++;
line <<= 1;
}
index++;
while (bitset[index] == 0x00000000 && state < 1<<24) {
index++;
state += 0x20;
}
if (state >= 1<<24) return 1<<24;
if (++state == 1<<24) return 1<<24;
uint32_t index = state >> 5;
uint_fast8_t bit = state & 0x1f;
uint32_t line = bitset[index] << bit;
while (bit <= 0x1f) {
if (line & 0x80000000) return state;
state++;
bit++;
line <<= 1;
}
index++;
while (bitset[index] == 0x00000000 && state < 1<<24) {
index++;
state += 0x20;
}
if (state >= 1<<24) return 1<<24;
#if defined __GNUC__
return state + __builtin_clz(bitset[index]);
return state + __builtin_clz(bitset[index]);
#else
bit = 0x00;
line = bitset[index];
while (bit <= 0x1f) {
if (line & 0x80000000) return state;
state++;
bit++;
line <<= 1;
}
return 1<<24;
bit = 0x00;
line = bitset[index];
while (bit <= 0x1f) {
if (line & 0x80000000) return state;
state++;
bit++;
line <<= 1;
}
return 1<<24;
#endif
}
static inline uint32_t next_not_state(uint32_t *bitset, uint32_t state)
{
if (++state == 1<<24) return 1<<24;
uint32_t index = state >> 5;
uint_fast8_t bit = state & 0x1f;
uint32_t line = bitset[index] << bit;
while (bit <= 0x1f) {
if ((line & 0x80000000) == 0) return state;
state++;
bit++;
line <<= 1;
}
index++;
while (bitset[index] == 0xffffffff && state < 1<<24) {
index++;
state += 0x20;
}
if (state >= 1<<24) return 1<<24;
if (++state == 1<<24) return 1<<24;
uint32_t index = state >> 5;
uint_fast8_t bit = state & 0x1f;
uint32_t line = bitset[index] << bit;
while (bit <= 0x1f) {
if ((line & 0x80000000) == 0) return state;
state++;
bit++;
line <<= 1;
}
index++;
while (bitset[index] == 0xffffffff && state < 1<<24) {
index++;
state += 0x20;
}
if (state >= 1<<24) return 1<<24;
#if defined __GNUC__
return state + __builtin_clz(~bitset[index]);
return state + __builtin_clz(~bitset[index]);
#else
bit = 0x00;
line = bitset[index];
while (bit <= 0x1f) {
if ((line & 0x80000000) == 0) return state;
state++;
bit++;
line <<= 1;
}
return 1<<24;
bit = 0x00;
line = bitset[index];
while (bit <= 0x1f) {
if ((line & 0x80000000) == 0) return state;
state++;
bit++;
line <<= 1;
}
return 1<<24;
#endif
}
@ -157,33 +157,33 @@ static inline uint32_t next_not_state(uint32_t *bitset, uint32_t state)
static inline uint32_t bitcount(uint32_t a)
{
#if defined __GNUC__
return __builtin_popcountl(a);
return __builtin_popcountl(a);
#else
a = a - ((a >> 1) & 0x55555555);
a = (a & 0x33333333) + ((a >> 2) & 0x33333333);
return (((a + (a >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24;
a = a - ((a >> 1) & 0x55555555);
a = (a & 0x33333333) + ((a >> 2) & 0x33333333);
return (((a + (a >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24;
#endif
}
static inline uint32_t count_states(uint32_t *bitset)
{
uint32_t count = 0;
for (uint32_t i = 0; i < (1<<19); i++) {
count += bitcount(bitset[i]);
}
return count;
uint32_t count = 0;
for (uint32_t i = 0; i < (1<<19); i++) {
count += bitcount(bitset[i]);
}
return count;
}
static void write_bitflips_file(odd_even_t odd_even, uint16_t bitflip, int sum_a0, uint32_t *bitset, uint32_t count)
{
char filename[80];
sprintf(filename, "bitflip_%d_%03" PRIx16 "_sum%d_states.bin", odd_even, bitflip, sum_a0);
FILE *outfile = fopen(filename, "wb");
fwrite(&count, 1, sizeof(count), outfile);
fwrite(bitset, 1, sizeof(uint32_t)*(1<<19), outfile);
fclose(outfile);
char filename[80];
sprintf(filename, "bitflip_%d_%03" PRIx16 "_sum%d_states.bin", odd_even, bitflip, sum_a0);
FILE *outfile = fopen(filename, "wb");
fwrite(&count, 1, sizeof(count), outfile);
fwrite(bitset, 1, sizeof(uint32_t)*(1<<19), outfile);
fclose(outfile);
}
@ -191,402 +191,402 @@ uint32_t *restrict part_sum_a0_bitarrays[2][NUM_PART_SUMS];
static void init_part_sum_bitarrays(void)
{
printf("init_part_sum_bitarrays()...");
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
for (uint16_t part_sum_a0 = 0; part_sum_a0 < NUM_PART_SUMS; part_sum_a0++) {
part_sum_a0_bitarrays[odd_even][part_sum_a0] = (uint32_t *)malloc_bitarray(sizeof(uint32_t) * (1<<19));
if (part_sum_a0_bitarrays[odd_even][part_sum_a0] == NULL) {
printf("Out of memory error in init_part_suma0_statelists(). Aborting...\n");
exit(4);
}
clear_bitarray24(part_sum_a0_bitarrays[odd_even][part_sum_a0]);
}
}
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
//printf("(%d, %" PRIu16 ")...", odd_even, part_sum_a0);
for (uint32_t state = 0; state < (1<<20); state++) {
uint16_t part_sum_a0 = PartialSumProperty(state, odd_even) / 2;
for (uint16_t low_bits = 0; low_bits < 1<<4; low_bits++) {
set_bit24(part_sum_a0_bitarrays[odd_even][part_sum_a0], state<<4 | low_bits);
}
}
}
printf("done.\n");
printf("init_part_sum_bitarrays()...");
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
for (uint16_t part_sum_a0 = 0; part_sum_a0 < NUM_PART_SUMS; part_sum_a0++) {
part_sum_a0_bitarrays[odd_even][part_sum_a0] = (uint32_t *)malloc_bitarray(sizeof(uint32_t) * (1<<19));
if (part_sum_a0_bitarrays[odd_even][part_sum_a0] == NULL) {
printf("Out of memory error in init_part_suma0_statelists(). Aborting...\n");
exit(4);
}
clear_bitarray24(part_sum_a0_bitarrays[odd_even][part_sum_a0]);
}
}
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
//printf("(%d, %" PRIu16 ")...", odd_even, part_sum_a0);
for (uint32_t state = 0; state < (1<<20); state++) {
uint16_t part_sum_a0 = PartialSumProperty(state, odd_even) / 2;
for (uint16_t low_bits = 0; low_bits < 1<<4; low_bits++) {
set_bit24(part_sum_a0_bitarrays[odd_even][part_sum_a0], state<<4 | low_bits);
}
}
}
printf("done.\n");
}
static void free_part_sum_bitarrays(void)
{
printf("free_part_sum_bitarrays()...");
for (int16_t part_sum_a0 = (NUM_PART_SUMS-1); part_sum_a0 >= 0; part_sum_a0--) {
free_bitarray(part_sum_a0_bitarrays[ODD_STATE][part_sum_a0]);
}
for (int16_t part_sum_a0 = (NUM_PART_SUMS-1); part_sum_a0 >= 0; part_sum_a0--) {
free_bitarray(part_sum_a0_bitarrays[EVEN_STATE][part_sum_a0]);
}
printf("done.\n");
printf("free_part_sum_bitarrays()...");
for (int16_t part_sum_a0 = (NUM_PART_SUMS-1); part_sum_a0 >= 0; part_sum_a0--) {
free_bitarray(part_sum_a0_bitarrays[ODD_STATE][part_sum_a0]);
}
for (int16_t part_sum_a0 = (NUM_PART_SUMS-1); part_sum_a0 >= 0; part_sum_a0--) {
free_bitarray(part_sum_a0_bitarrays[EVEN_STATE][part_sum_a0]);
}
printf("done.\n");
}
uint32_t *restrict sum_a0_bitarray[2];
void init_sum_bitarray(uint16_t sum_a0)
{
printf("init_sum_bitarray()...\n");
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
sum_a0_bitarray[odd_even] = (uint32_t *)malloc_bitarray(sizeof(uint32_t) * (1<<19));
if (sum_a0_bitarray[odd_even] == NULL) {
printf("Out of memory error in init_sum_bitarrays(). Aborting...\n");
exit(4);
}
clear_bitarray24(sum_a0_bitarray[odd_even]);
}
for (uint8_t p = 0; p < NUM_PART_SUMS; p++) {
for (uint8_t q = 0; q < NUM_PART_SUMS; q++) {
if (sum_a0 == 2*p*(16-2*q) + (16-2*p)*2*q) {
for (uint32_t i = 0; i < (1<<19); i++) {
sum_a0_bitarray[EVEN_STATE][i] |= part_sum_a0_bitarrays[EVEN_STATE][q][i];
sum_a0_bitarray[ODD_STATE][i] |= part_sum_a0_bitarrays[ODD_STATE][p][i];
}
}
}
}
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
uint32_t count = count_states(sum_a0_bitarray[odd_even]);
printf("sum_a0_bitarray[%s] has %d states (%5.2f%%)\n", odd_even==EVEN_STATE?"even":"odd ", count, (float)count/(1<<24)*100.0);
}
printf("done.\n");
printf("init_sum_bitarray()...\n");
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
sum_a0_bitarray[odd_even] = (uint32_t *)malloc_bitarray(sizeof(uint32_t) * (1<<19));
if (sum_a0_bitarray[odd_even] == NULL) {
printf("Out of memory error in init_sum_bitarrays(). Aborting...\n");
exit(4);
}
clear_bitarray24(sum_a0_bitarray[odd_even]);
}
for (uint8_t p = 0; p < NUM_PART_SUMS; p++) {
for (uint8_t q = 0; q < NUM_PART_SUMS; q++) {
if (sum_a0 == 2*p*(16-2*q) + (16-2*p)*2*q) {
for (uint32_t i = 0; i < (1<<19); i++) {
sum_a0_bitarray[EVEN_STATE][i] |= part_sum_a0_bitarrays[EVEN_STATE][q][i];
sum_a0_bitarray[ODD_STATE][i] |= part_sum_a0_bitarrays[ODD_STATE][p][i];
}
}
}
}
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
uint32_t count = count_states(sum_a0_bitarray[odd_even]);
printf("sum_a0_bitarray[%s] has %d states (%5.2f%%)\n", odd_even==EVEN_STATE?"even":"odd ", count, (float)count/(1<<24)*100.0);
}
printf("done.\n");
}
static void free_sum_bitarray(void)
{
printf("free_sum_bitarray()...");
free_bitarray(sum_a0_bitarray[ODD_STATE]);
free_bitarray(sum_a0_bitarray[EVEN_STATE]);
printf("done.\n");
printf("free_sum_bitarray()...");
free_bitarray(sum_a0_bitarray[ODD_STATE]);
free_bitarray(sum_a0_bitarray[EVEN_STATE]);
printf("done.\n");
}
static void precalculate_bit0_bitflip_bitarrays(uint8_t const bitflip, uint16_t const sum_a0)
{
// #define TEST_RUN
#ifdef TEST_RUN
#define NUM_TEST_STATES (1<<10)
#else
#define NUM_TEST_STATES (1<<23)
#endif
// #define TEST_RUN
#ifdef TEST_RUN
#define NUM_TEST_STATES (1<<10)
#else
#define NUM_TEST_STATES (1<<23)
#endif
time_t start_time = time(NULL);
time_t last_check_time = start_time;
time_t start_time = time(NULL);
time_t last_check_time = start_time;
uint32_t *restrict test_bitarray[2];
uint32_t *restrict test_not_bitarray[2];
uint32_t *restrict test_bitarray[2];
uint32_t *restrict test_not_bitarray[2];
test_bitarray[EVEN_STATE] = malloc_bitarray(sizeof(uint32_t) * (1<<19));
clear_bitarray24(test_bitarray[EVEN_STATE]);
test_bitarray[ODD_STATE] = malloc_bitarray(sizeof(uint32_t) * (1<<19));
clear_bitarray24(test_bitarray[ODD_STATE]);
test_bitarray[EVEN_STATE] = malloc_bitarray(sizeof(uint32_t) * (1<<19));
clear_bitarray24(test_bitarray[EVEN_STATE]);
test_bitarray[ODD_STATE] = malloc_bitarray(sizeof(uint32_t) * (1<<19));
clear_bitarray24(test_bitarray[ODD_STATE]);
test_not_bitarray[EVEN_STATE] = malloc_bitarray(sizeof(uint32_t) * (1<<19));
clear_bitarray24(test_not_bitarray[EVEN_STATE]);
test_not_bitarray[ODD_STATE] = malloc_bitarray(sizeof(uint32_t) * (1<<19));
clear_bitarray24(test_not_bitarray[ODD_STATE]);
test_not_bitarray[EVEN_STATE] = malloc_bitarray(sizeof(uint32_t) * (1<<19));
clear_bitarray24(test_not_bitarray[EVEN_STATE]);
test_not_bitarray[ODD_STATE] = malloc_bitarray(sizeof(uint32_t) * (1<<19));
clear_bitarray24(test_not_bitarray[ODD_STATE]);
uint32_t count[2];
bool all_odd_states_are_possible_for_notbitflip = false;
uint32_t count[2];
bool all_odd_states_are_possible_for_notbitflip = false;
printf("\n\nStarting search for crypto1 states resulting in bitflip property 0x%03x...\n", bitflip);
for (uint32_t even_state = next_state(sum_a0_bitarray[EVEN_STATE], -1); even_state < NUM_TEST_STATES; even_state = next_state(sum_a0_bitarray[EVEN_STATE], even_state)) {
bool even_state_is_possible = false;
time_t time_now = time(NULL);
if (difftime(time_now, last_check_time) > 5*60) { // print status every 5 minutes
float runtime = difftime(time_now, start_time);
float remaining_time = runtime * ((1<<23) - even_state) / even_state;
printf("\n%1.1f hours elapsed, expected completion in %1.1f hours (%1.1f days)", runtime/3600, remaining_time/3600, remaining_time/3600/24);
last_check_time = time_now;
}
for (uint32_t odd_state = next_state(sum_a0_bitarray[ODD_STATE], -1); odd_state < (1<<24); odd_state = next_state(test_bitarray[ODD_STATE], odd_state)) {
if (even_state_is_possible && test_bit24(test_bitarray[ODD_STATE], odd_state)) continue;
// load crypto1 state
struct Crypto1State cs;
cs.odd = odd_state >> 4;
cs.even = even_state >> 4;
printf("\n\nStarting search for crypto1 states resulting in bitflip property 0x%03x...\n", bitflip);
for (uint32_t even_state = next_state(sum_a0_bitarray[EVEN_STATE], -1); even_state < NUM_TEST_STATES; even_state = next_state(sum_a0_bitarray[EVEN_STATE], even_state)) {
bool even_state_is_possible = false;
time_t time_now = time(NULL);
if (difftime(time_now, last_check_time) > 5*60) { // print status every 5 minutes
float runtime = difftime(time_now, start_time);
float remaining_time = runtime * ((1<<23) - even_state) / even_state;
printf("\n%1.1f hours elapsed, expected completion in %1.1f hours (%1.1f days)", runtime/3600, remaining_time/3600, remaining_time/3600/24);
last_check_time = time_now;
}
for (uint32_t odd_state = next_state(sum_a0_bitarray[ODD_STATE], -1); odd_state < (1<<24); odd_state = next_state(test_bitarray[ODD_STATE], odd_state)) {
if (even_state_is_possible && test_bit24(test_bitarray[ODD_STATE], odd_state)) continue;
// load crypto1 state
struct Crypto1State cs;
cs.odd = odd_state >> 4;
cs.even = even_state >> 4;
// track flipping bits in state
struct Crypto1DeltaState {
uint_fast8_t odd;
uint_fast8_t even;
} cs_delta;
cs_delta.odd = 0;
cs_delta.even = 0;
// track flipping bits in state
struct Crypto1DeltaState {
uint_fast8_t odd;
uint_fast8_t even;
} cs_delta;
cs_delta.odd = 0;
cs_delta.even = 0;
uint_fast16_t keystream = 0;
uint_fast16_t keystream = 0;
// decrypt 9 bits
for (int i = 0; i < 9; i++) {
uint_fast8_t keystream_bit = filter(cs.odd & 0x000fffff) ^ filter((cs.odd & 0x000fffff) ^ cs_delta.odd);
keystream = keystream << 1 | keystream_bit;
uint_fast8_t nt_bit = BIT(bitflip, i) ^ keystream_bit;
uint_fast8_t LSFR_feedback = BIT(cs_delta.odd, 2) ^ BIT(cs_delta.even, 2) ^ BIT(cs_delta.odd, 3);
// decrypt 9 bits
for (int i = 0; i < 9; i++) {
uint_fast8_t keystream_bit = filter(cs.odd & 0x000fffff) ^ filter((cs.odd & 0x000fffff) ^ cs_delta.odd);
keystream = keystream << 1 | keystream_bit;
uint_fast8_t nt_bit = BIT(bitflip, i) ^ keystream_bit;
uint_fast8_t LSFR_feedback = BIT(cs_delta.odd, 2) ^ BIT(cs_delta.even, 2) ^ BIT(cs_delta.odd, 3);
cs_delta.even = cs_delta.even << 1 | (LSFR_feedback ^ nt_bit);
uint_fast8_t tmp = cs_delta.odd;
cs_delta.odd = cs_delta.even;
cs_delta.even = tmp;
cs_delta.even = cs_delta.even << 1 | (LSFR_feedback ^ nt_bit);
uint_fast8_t tmp = cs_delta.odd;
cs_delta.odd = cs_delta.even;
cs_delta.even = tmp;
cs.even = cs.odd;
if (i & 1) {
cs.odd = odd_state >> (7 - i) / 2;
} else {
cs.odd = even_state >> (7 - i) / 2;
}
}
cs.even = cs.odd;
if (i & 1) {
cs.odd = odd_state >> (7 - i) / 2;
} else {
cs.odd = even_state >> (7 - i) / 2;
}
}
if (evenparity32(keystream) == evenparity32(bitflip)) {
// found valid bitflip state
even_state_is_possible = true;
set_bit24(test_bitarray[EVEN_STATE], even_state);
set_bit24(test_bitarray[EVEN_STATE], 1 << 23 | even_state);
set_bit24(test_bitarray[ODD_STATE], odd_state);
} else {
// found valid !bitflip state
set_bit24(test_not_bitarray[EVEN_STATE], even_state);
set_bit24(test_not_bitarray[EVEN_STATE], 1 << 23 | even_state);
set_bit24(test_not_bitarray[ODD_STATE], odd_state);
}
}
if (!even_state_is_possible) {
all_odd_states_are_possible_for_notbitflip = true;
}
}
if (evenparity32(keystream) == evenparity32(bitflip)) {
// found valid bitflip state
even_state_is_possible = true;
set_bit24(test_bitarray[EVEN_STATE], even_state);
set_bit24(test_bitarray[EVEN_STATE], 1 << 23 | even_state);
set_bit24(test_bitarray[ODD_STATE], odd_state);
} else {
// found valid !bitflip state
set_bit24(test_not_bitarray[EVEN_STATE], even_state);
set_bit24(test_not_bitarray[EVEN_STATE], 1 << 23 | even_state);
set_bit24(test_not_bitarray[ODD_STATE], odd_state);
}
}
if (!even_state_is_possible) {
all_odd_states_are_possible_for_notbitflip = true;
}
}
printf("\nAnalysis completed. Checking for effective bitflip properties...\n");
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
count[odd_even] = count_states(test_bitarray[odd_even]);
if (count[odd_even] != 1<<24) {
printf("Writing %d possible %s states for bitflip property %03x (%d (%1.2f%%) states eliminated)\n",
count[odd_even],
odd_even==EVEN_STATE?"even":"odd",
bitflip, (1<<24) - count[odd_even],
(float)((1<<24) - count[odd_even]) / (1<<24) * 100.0);
#ifndef TEST_RUN
write_bitflips_file(odd_even, bitflip, sum_a0, test_bitarray[odd_even], count[odd_even]);
#endif
} else {
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip);
}
}
uint32_t *restrict test_bitarray_2nd = malloc_bitarray(sizeof(uint32_t) * (1<<19));
clear_bitarray24(test_bitarray_2nd);
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
if (count[odd_even] != 1<<24) {
for (uint32_t state = 0; state < (1<<24); state += 1<<4) {
uint32_t line = test_bitarray[odd_even][state>>5];
uint16_t half_line = state&0x000000010 ? line&0x0000ffff : line>>16;
if (half_line != 0) {
for (uint32_t low_bits = 0; low_bits < (1<<4); low_bits++) {
set_bit24(test_bitarray_2nd, low_bits << 20 | state >> 4);
}
}
}
count[odd_even] = count_states(test_bitarray_2nd);
if (count[odd_even] != 1<<24) {
printf("Writing %d possible %s states for bitflip property %03x (%d (%1.2f%%) states eliminated)\n",
count[odd_even],
odd_even==EVEN_STATE?"even":"odd",
bitflip | BITFLIP_2ND_BYTE, (1<<24) - count[odd_even],
(float)((1<<24) - count[odd_even]) / (1<<24) * 100.0);
#ifndef TEST_RUN
write_bitflips_file(odd_even, bitflip | BITFLIP_2ND_BYTE, sum_a0, test_bitarray_2nd, count[odd_even]);
#endif
} else {
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip | BITFLIP_2ND_BYTE);
}
} else {
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip | BITFLIP_2ND_BYTE);
}
}
printf("\nAnalysis completed. Checking for effective bitflip properties...\n");
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
count[odd_even] = count_states(test_bitarray[odd_even]);
if (count[odd_even] != 1<<24) {
printf("Writing %d possible %s states for bitflip property %03x (%d (%1.2f%%) states eliminated)\n",
count[odd_even],
odd_even==EVEN_STATE?"even":"odd",
bitflip, (1<<24) - count[odd_even],
(float)((1<<24) - count[odd_even]) / (1<<24) * 100.0);
#ifndef TEST_RUN
write_bitflips_file(odd_even, bitflip, sum_a0, test_bitarray[odd_even], count[odd_even]);
#endif
} else {
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip);
}
}
uint32_t *restrict test_bitarray_2nd = malloc_bitarray(sizeof(uint32_t) * (1<<19));
clear_bitarray24(test_bitarray_2nd);
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
if (count[odd_even] != 1<<24) {
for (uint32_t state = 0; state < (1<<24); state += 1<<4) {
uint32_t line = test_bitarray[odd_even][state>>5];
uint16_t half_line = state&0x000000010 ? line&0x0000ffff : line>>16;
if (half_line != 0) {
for (uint32_t low_bits = 0; low_bits < (1<<4); low_bits++) {
set_bit24(test_bitarray_2nd, low_bits << 20 | state >> 4);
}
}
}
count[odd_even] = count_states(test_bitarray_2nd);
if (count[odd_even] != 1<<24) {
printf("Writing %d possible %s states for bitflip property %03x (%d (%1.2f%%) states eliminated)\n",
count[odd_even],
odd_even==EVEN_STATE?"even":"odd",
bitflip | BITFLIP_2ND_BYTE, (1<<24) - count[odd_even],
(float)((1<<24) - count[odd_even]) / (1<<24) * 100.0);
#ifndef TEST_RUN
write_bitflips_file(odd_even, bitflip | BITFLIP_2ND_BYTE, sum_a0, test_bitarray_2nd, count[odd_even]);
#endif
} else {
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip | BITFLIP_2ND_BYTE);
}
} else {
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip | BITFLIP_2ND_BYTE);
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// second run for the remaining "not bitflip" states
printf("\n\nStarting search for crypto1 states resulting in bitflip property 0x%03x...", bitflip | 0x100);
start_time = time(NULL);
last_check_time = start_time;
for (uint32_t even_state = next_state(sum_a0_bitarray[EVEN_STATE], -1); even_state < NUM_TEST_STATES; even_state = next_state(sum_a0_bitarray[EVEN_STATE], even_state)) {
bool even_state_is_possible = test_bit24(test_not_bitarray[EVEN_STATE], even_state);
time_t time_now = time(NULL);
if (difftime(time_now, last_check_time) > 5*60) { // print status every 5 minutes
float runtime = difftime(time_now, start_time);
float remaining_time = runtime * ((1<<23) - even_state) / even_state;
printf("\n%1.1f hours elapsed, expected completion in %1.1f hours (%1.1f days)", runtime/3600, remaining_time/3600, remaining_time/3600/24);
last_check_time = time_now;
}
for (uint32_t odd_state = next_state(sum_a0_bitarray[ODD_STATE], -1); odd_state < (1<<24); odd_state = next_state(sum_a0_bitarray[ODD_STATE], odd_state)) {
if (even_state_is_possible) {
if (all_odd_states_are_possible_for_notbitflip) break;
if (test_bit24(test_not_bitarray[ODD_STATE], odd_state)) continue;
}
// load crypto1 state
struct Crypto1State cs;
cs.odd = odd_state >> 4;
cs.even = even_state >> 4;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// second run for the remaining "not bitflip" states
printf("\n\nStarting search for crypto1 states resulting in bitflip property 0x%03x...", bitflip | 0x100);
start_time = time(NULL);
last_check_time = start_time;
for (uint32_t even_state = next_state(sum_a0_bitarray[EVEN_STATE], -1); even_state < NUM_TEST_STATES; even_state = next_state(sum_a0_bitarray[EVEN_STATE], even_state)) {
bool even_state_is_possible = test_bit24(test_not_bitarray[EVEN_STATE], even_state);
time_t time_now = time(NULL);
if (difftime(time_now, last_check_time) > 5*60) { // print status every 5 minutes
float runtime = difftime(time_now, start_time);
float remaining_time = runtime * ((1<<23) - even_state) / even_state;
printf("\n%1.1f hours elapsed, expected completion in %1.1f hours (%1.1f days)", runtime/3600, remaining_time/3600, remaining_time/3600/24);
last_check_time = time_now;
}
for (uint32_t odd_state = next_state(sum_a0_bitarray[ODD_STATE], -1); odd_state < (1<<24); odd_state = next_state(sum_a0_bitarray[ODD_STATE], odd_state)) {
if (even_state_is_possible) {
if (all_odd_states_are_possible_for_notbitflip) break;
if (test_bit24(test_not_bitarray[ODD_STATE], odd_state)) continue;
}
// load crypto1 state
struct Crypto1State cs;
cs.odd = odd_state >> 4;
cs.even = even_state >> 4;
// track flipping bits in state
struct Crypto1DeltaState {
uint_fast8_t odd;
uint_fast8_t even;
} cs_delta;
cs_delta.odd = 0;
cs_delta.even = 0;
// track flipping bits in state
struct Crypto1DeltaState {
uint_fast8_t odd;
uint_fast8_t even;
} cs_delta;
cs_delta.odd = 0;
cs_delta.even = 0;
uint_fast16_t keystream = 0;
// uint_fast16_t nt = 0;
uint_fast16_t keystream = 0;
// uint_fast16_t nt = 0;
// decrypt 9 bits
for (int i = 0; i < 9; i++) {
uint_fast8_t keystream_bit = filter(cs.odd & 0x000fffff) ^ filter((cs.odd & 0x000fffff) ^ cs_delta.odd);
keystream = keystream << 1 | keystream_bit;
uint_fast8_t nt_bit = BIT(bitflip|0x100, i) ^ keystream_bit;
uint_fast8_t LSFR_feedback = BIT(cs_delta.odd, 2) ^ BIT(cs_delta.even, 2) ^ BIT(cs_delta.odd, 3);
// decrypt 9 bits
for (int i = 0; i < 9; i++) {
uint_fast8_t keystream_bit = filter(cs.odd & 0x000fffff) ^ filter((cs.odd & 0x000fffff) ^ cs_delta.odd);
keystream = keystream << 1 | keystream_bit;
uint_fast8_t nt_bit = BIT(bitflip|0x100, i) ^ keystream_bit;
uint_fast8_t LSFR_feedback = BIT(cs_delta.odd, 2) ^ BIT(cs_delta.even, 2) ^ BIT(cs_delta.odd, 3);
cs_delta.even = cs_delta.even << 1 | (LSFR_feedback ^ nt_bit);
uint_fast8_t tmp = cs_delta.odd;
cs_delta.odd = cs_delta.even;
cs_delta.even = tmp;
cs_delta.even = cs_delta.even << 1 | (LSFR_feedback ^ nt_bit);
uint_fast8_t tmp = cs_delta.odd;
cs_delta.odd = cs_delta.even;
cs_delta.even = tmp;
cs.even = cs.odd;
if (i & 1) {
cs.odd = odd_state >> (7 - i) / 2;
} else {
cs.odd = even_state >> (7 - i) / 2;
}
}
cs.even = cs.odd;
if (i & 1) {
cs.odd = odd_state >> (7 - i) / 2;
} else {
cs.odd = even_state >> (7 - i) / 2;
}
}
if (evenparity32(keystream) != evenparity32(bitflip)) {
// found valid !bitflip state
even_state_is_possible = true;
set_bit24(test_not_bitarray[EVEN_STATE], even_state);
set_bit24(test_not_bitarray[EVEN_STATE], 1 << 23 | even_state);
set_bit24(test_not_bitarray[ODD_STATE], odd_state);
}
}
}
if (evenparity32(keystream) != evenparity32(bitflip)) {
// found valid !bitflip state
even_state_is_possible = true;
set_bit24(test_not_bitarray[EVEN_STATE], even_state);
set_bit24(test_not_bitarray[EVEN_STATE], 1 << 23 | even_state);
set_bit24(test_not_bitarray[ODD_STATE], odd_state);
}
}
}
printf("\nAnalysis completed. Checking for effective !bitflip properties...\n");
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
count[odd_even] = count_states(test_not_bitarray[odd_even]);
if (count[odd_even] != 1<<24) {
printf("Writing %d possible %s states for bitflip property %03x (%d (%1.2f%%) states eliminated)\n",
count[odd_even],
odd_even==EVEN_STATE?"even":"odd",
bitflip|0x100, (1<<24) - count[odd_even],
(float)((1<<24) - count[odd_even]) / (1<<24) * 100.0);
#ifndef TEST_RUN
write_bitflips_file(odd_even, bitflip|0x100, sum_a0, test_not_bitarray[odd_even], count[odd_even]);
#endif
} else {
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip|0x100);
}
}
printf("\nAnalysis completed. Checking for effective !bitflip properties...\n");
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
count[odd_even] = count_states(test_not_bitarray[odd_even]);
if (count[odd_even] != 1<<24) {
printf("Writing %d possible %s states for bitflip property %03x (%d (%1.2f%%) states eliminated)\n",
count[odd_even],
odd_even==EVEN_STATE?"even":"odd",
bitflip|0x100, (1<<24) - count[odd_even],
(float)((1<<24) - count[odd_even]) / (1<<24) * 100.0);
#ifndef TEST_RUN
write_bitflips_file(odd_even, bitflip|0x100, sum_a0, test_not_bitarray[odd_even], count[odd_even]);
#endif
} else {
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip|0x100);
}
}
clear_bitarray24(test_bitarray_2nd);
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
if (count[odd_even] != 1<<24) {
for (uint32_t state = 0; state < (1<<24); state += 1<<4) {
uint32_t line = test_not_bitarray[odd_even][state>>5];
uint16_t half_line = state&0x000000010 ? line&0x0000ffff : line>>16;
if (half_line != 0) {
for (uint32_t low_bits = 0; low_bits < (1<<4); low_bits++) {
set_bit24(test_bitarray_2nd, low_bits << 20 | state >> 4);
}
}
}
count[odd_even] = count_states(test_bitarray_2nd);
if (count[odd_even] != 1<<24) {
printf("Writing %d possible %s states for bitflip property %03x (%d (%1.2f%%) states eliminated)\n",
count[odd_even],
odd_even==EVEN_STATE?"even":"odd",
bitflip | 0x100| BITFLIP_2ND_BYTE, (1<<24) - count[odd_even],
(float)((1<<24) - count[odd_even]) / (1<<24) * 100.0);
#ifndef TEST_RUN
write_bitflips_file(odd_even, bitflip | 0x100 | BITFLIP_2ND_BYTE, sum_a0, test_bitarray_2nd, count[odd_even]);
#endif
} else {
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip | 0x100 | BITFLIP_2ND_BYTE);
}
} else {
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip | 0x100 | BITFLIP_2ND_BYTE);
}
}
clear_bitarray24(test_bitarray_2nd);
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
if (count[odd_even] != 1<<24) {
for (uint32_t state = 0; state < (1<<24); state += 1<<4) {
uint32_t line = test_not_bitarray[odd_even][state>>5];
uint16_t half_line = state&0x000000010 ? line&0x0000ffff : line>>16;
if (half_line != 0) {
for (uint32_t low_bits = 0; low_bits < (1<<4); low_bits++) {
set_bit24(test_bitarray_2nd, low_bits << 20 | state >> 4);
}
}
}
count[odd_even] = count_states(test_bitarray_2nd);
if (count[odd_even] != 1<<24) {
printf("Writing %d possible %s states for bitflip property %03x (%d (%1.2f%%) states eliminated)\n",
count[odd_even],
odd_even==EVEN_STATE?"even":"odd",
bitflip | 0x100| BITFLIP_2ND_BYTE, (1<<24) - count[odd_even],
(float)((1<<24) - count[odd_even]) / (1<<24) * 100.0);
#ifndef TEST_RUN
write_bitflips_file(odd_even, bitflip | 0x100 | BITFLIP_2ND_BYTE, sum_a0, test_bitarray_2nd, count[odd_even]);
#endif
} else {
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip | 0x100 | BITFLIP_2ND_BYTE);
}
} else {
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip | 0x100 | BITFLIP_2ND_BYTE);
}
}
free_bitarray(test_bitarray_2nd);
free_bitarray(test_not_bitarray[ODD_STATE]);
free_bitarray(test_not_bitarray[EVEN_STATE]);
free_bitarray(test_bitarray[ODD_STATE]);
free_bitarray(test_bitarray[EVEN_STATE]);
free_bitarray(test_bitarray_2nd);
free_bitarray(test_not_bitarray[ODD_STATE]);
free_bitarray(test_not_bitarray[EVEN_STATE]);
free_bitarray(test_bitarray[ODD_STATE]);
free_bitarray(test_bitarray[EVEN_STATE]);
exit(0);
exit(0);
}
int main (int argc, char *argv[]) {
unsigned int bitflip_in;
int sum_a0;
unsigned int bitflip_in;
int sum_a0;
printf("Create tables required by hardnested attack.\n");
printf("Expect a runtime in the range of days or weeks.\n");
printf("Single thread only. If you want to use several threads, start it multiple times :-)\n\n");
printf("Create tables required by hardnested attack.\n");
printf("Expect a runtime in the range of days or weeks.\n");
printf("Single thread only. If you want to use several threads, start it multiple times :-)\n\n");
if (argc != 2 && argc != 3) {
printf(" syntax: %s <bitflip property> [<Sum_a0>]\n\n", argv[0]);
printf(" example: %s 1f\n", argv[0]);
return 1;
}
if (argc != 2 && argc != 3) {
printf(" syntax: %s <bitflip property> [<Sum_a0>]\n\n", argv[0]);
printf(" example: %s 1f\n", argv[0]);
return 1;
}
sscanf(argv[1],"%x", &bitflip_in);
sscanf(argv[1],"%x", &bitflip_in);
if (bitflip_in > 255) {
printf("Bitflip property must be less than or equal to 0xff\n\n");
return 1;
}
if (bitflip_in > 255) {
printf("Bitflip property must be less than or equal to 0xff\n\n");
return 1;
}
if(argc == 3) {
sscanf(argv[2], "%d", &sum_a0);
}
if(argc == 3) {
sscanf(argv[2], "%d", &sum_a0);
}
switch (sum_a0) {
case 0:
case 32:
case 56:
case 64:
case 80:
case 96:
case 104:
case 112:
case 120:
case 128:
case 136:
case 144:
case 152:
case 160:
case 176:
case 192:
case 200:
case 224:
case 256: break;
default: sum_a0 = -1;
}
switch (sum_a0) {
case 0:
case 32:
case 56:
case 64:
case 80:
case 96:
case 104:
case 112:
case 120:
case 128:
case 136:
case 144:
case 152:
case 160:
case 176:
case 192:
case 200:
case 224:
case 256: break;
default: sum_a0 = -1;
}
printf("Calculating for bitflip = %02x, sum_a0 = %d\n", bitflip_in, sum_a0);
printf("Calculating for bitflip = %02x, sum_a0 = %d\n", bitflip_in, sum_a0);
init_part_sum_bitarrays();
init_sum_bitarray(sum_a0);
init_part_sum_bitarrays();
init_sum_bitarray(sum_a0);
precalculate_bit0_bitflip_bitarrays(bitflip_in, sum_a0);
precalculate_bit0_bitflip_bitarrays(bitflip_in, sum_a0);
free_sum_bitarray();
free_part_sum_bitarrays();
free_sum_bitarray();
free_part_sum_bitarrays();
return 0;
return 0;
}