mirror of
https://github.com/RfidResearchGroup/proxmark3.git
synced 2025-08-14 02:27:26 -07:00
make style
This commit is contained in:
parent
0d9223a547
commit
0373696662
483 changed files with 56514 additions and 52451 deletions
|
@ -80,9 +80,9 @@ THE SOFTWARE.
|
|||
#define VECTOR_SIZE (MAX_BITSLICES/8)
|
||||
typedef uint32_t __attribute__((aligned(VECTOR_SIZE))) __attribute__((vector_size(VECTOR_SIZE))) bitslice_value_t;
|
||||
typedef union {
|
||||
bitslice_value_t value;
|
||||
uint64_t bytes64[MAX_BITSLICES/64];
|
||||
uint8_t bytes[MAX_BITSLICES/8];
|
||||
bitslice_value_t value;
|
||||
uint64_t bytes64[MAX_BITSLICES / 64];
|
||||
uint8_t bytes[MAX_BITSLICES / 8];
|
||||
} bitslice_t;
|
||||
|
||||
// filter function (f20)
|
||||
|
@ -123,7 +123,7 @@ typedef union {
|
|||
#endif
|
||||
|
||||
// typedefs and declaration of functions:
|
||||
typedef const uint64_t crack_states_bitsliced_t(uint32_t, uint8_t*, statelist_t*, uint32_t*, uint64_t*, uint32_t, uint8_t*, noncelist_t*);
|
||||
typedef const uint64_t crack_states_bitsliced_t(uint32_t, uint8_t *, statelist_t *, uint32_t *, uint64_t *, uint32_t, uint8_t *, noncelist_t *);
|
||||
crack_states_bitsliced_t crack_states_bitsliced_AVX512;
|
||||
crack_states_bitsliced_t crack_states_bitsliced_AVX2;
|
||||
crack_states_bitsliced_t crack_states_bitsliced_AVX;
|
||||
|
@ -132,7 +132,7 @@ crack_states_bitsliced_t crack_states_bitsliced_MMX;
|
|||
crack_states_bitsliced_t crack_states_bitsliced_NOSIMD;
|
||||
crack_states_bitsliced_t crack_states_bitsliced_dispatch;
|
||||
|
||||
typedef void bitslice_test_nonces_t(uint32_t, uint32_t*, uint8_t*);
|
||||
typedef void bitslice_test_nonces_t(uint32_t, uint32_t *, uint8_t *);
|
||||
bitslice_test_nonces_t bitslice_test_nonces_AVX512;
|
||||
bitslice_test_nonces_t bitslice_test_nonces_AVX2;
|
||||
bitslice_test_nonces_t bitslice_test_nonces_AVX;
|
||||
|
@ -145,12 +145,13 @@ bitslice_test_nonces_t bitslice_test_nonces_dispatch;
|
|||
#define malloc_bitslice(x) __builtin_assume_aligned(_aligned_malloc((x), MAX_BITSLICES/8), MAX_BITSLICES/8)
|
||||
#define free_bitslice(x) _aligned_free(x)
|
||||
#elif defined (__APPLE__)
|
||||
static void *malloc_bitslice(size_t x) {
|
||||
static void *malloc_bitslice(size_t x)
|
||||
{
|
||||
char *allocated_memory;
|
||||
if (posix_memalign((void**)&allocated_memory, MAX_BITSLICES/8, x)) {
|
||||
if (posix_memalign((void **)&allocated_memory, MAX_BITSLICES / 8, x)) {
|
||||
return NULL;
|
||||
} else {
|
||||
return __builtin_assume_aligned(allocated_memory, MAX_BITSLICES/8);
|
||||
return __builtin_assume_aligned(allocated_memory, MAX_BITSLICES / 8);
|
||||
}
|
||||
}
|
||||
#define free_bitslice(x) free(x)
|
||||
|
@ -173,7 +174,8 @@ static bitslice_t bs_ones;
|
|||
static bitslice_t bs_zeroes;
|
||||
|
||||
|
||||
void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
|
||||
void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par)
|
||||
{
|
||||
|
||||
// initialize 1 and 0 vectors
|
||||
memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
|
||||
|
@ -181,9 +183,9 @@ void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce
|
|||
|
||||
// bitslice nonces' 2nd to 4th byte
|
||||
for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
|
||||
for(uint32_t bit_idx = 0; bit_idx < KEYSTREAM_SIZE; bit_idx++){
|
||||
bool bit = get_bit(KEYSTREAM_SIZE-1-bit_idx, BSWAP_32(bf_test_nonce[i] << 8));
|
||||
if(bit){
|
||||
for (uint32_t bit_idx = 0; bit_idx < KEYSTREAM_SIZE; bit_idx++) {
|
||||
bool bit = get_bit(KEYSTREAM_SIZE - 1 - bit_idx, BSWAP_32(bf_test_nonce[i] << 8));
|
||||
if (bit) {
|
||||
bitsliced_encrypted_nonces[i][bit_idx].value = bs_ones.value;
|
||||
} else {
|
||||
bitsliced_encrypted_nonces[i][bit_idx].value = bs_zeroes.value;
|
||||
|
@ -192,9 +194,9 @@ void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce
|
|||
}
|
||||
// bitslice nonces' parity (4 bits)
|
||||
for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
|
||||
for(uint32_t bit_idx = 0; bit_idx < 4; bit_idx++){
|
||||
bool bit = get_bit(4-1-bit_idx, bf_test_nonce_par[i]);
|
||||
if(bit){
|
||||
for (uint32_t bit_idx = 0; bit_idx < 4; bit_idx++) {
|
||||
bool bit = get_bit(4 - 1 - bit_idx, bf_test_nonce_par[i]);
|
||||
if (bit) {
|
||||
bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_ones.value;
|
||||
} else {
|
||||
bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_zeroes.value;
|
||||
|
@ -205,27 +207,28 @@ void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce
|
|||
}
|
||||
|
||||
|
||||
const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces){
|
||||
const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces)
|
||||
{
|
||||
|
||||
// Unlike aczid's implementation this doesn't roll back at all when performing bitsliced bruteforce.
|
||||
// We know that the best first byte is already shifted in. Testing with the remaining three bytes of
|
||||
// the nonces is sufficient to eliminate most of them. The small rest is tested with a simple unsliced
|
||||
// brute forcing (including roll back).
|
||||
|
||||
bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE];
|
||||
bitslice_t * restrict state_p;
|
||||
bitslice_t states[KEYSTREAM_SIZE + STATE_SIZE];
|
||||
bitslice_t *restrict state_p;
|
||||
uint64_t key = -1;
|
||||
uint64_t bucket_states_tested = 0;
|
||||
uint32_t bucket_size[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1];
|
||||
uint32_t bucket_size[(p->len[EVEN_STATE] - 1) / MAX_BITSLICES + 1];
|
||||
uint32_t bitsliced_blocks = 0;
|
||||
uint32_t const *restrict p_even_end = p->states[EVEN_STATE] + p->len[EVEN_STATE];
|
||||
#if defined (DEBUG_BRUTE_FORCE)
|
||||
uint32_t elimination_step = 0;
|
||||
#define MAX_ELIMINATION_STEP 32
|
||||
#define MAX_ELIMINATION_STEP 32
|
||||
uint64_t keys_eliminated[MAX_ELIMINATION_STEP] = {0};
|
||||
#endif
|
||||
#ifdef DEBUG_KEY_ELIMINATION
|
||||
bool bucket_contains_test_key[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1];
|
||||
bool bucket_contains_test_key[(p->len[EVEN_STATE] - 1) / MAX_BITSLICES + 1];
|
||||
#endif
|
||||
|
||||
// constant ones/zeroes
|
||||
|
@ -235,32 +238,32 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
|
|||
memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
|
||||
|
||||
// bitslice all the even states
|
||||
bitslice_t **restrict bitsliced_even_states = (bitslice_t **)malloc(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_t *));
|
||||
bitslice_t **restrict bitsliced_even_states = (bitslice_t **)malloc(((p->len[EVEN_STATE] - 1) / MAX_BITSLICES + 1) * sizeof(bitslice_t *));
|
||||
if (bitsliced_even_states == NULL) {
|
||||
printf("Out of memory error in brute_force. Aborting...");
|
||||
exit(4);
|
||||
}
|
||||
bitslice_value_t *restrict bitsliced_even_feedback = malloc_bitslice(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_value_t));
|
||||
bitslice_value_t *restrict bitsliced_even_feedback = malloc_bitslice(((p->len[EVEN_STATE] - 1) / MAX_BITSLICES + 1) * sizeof(bitslice_value_t));
|
||||
if (bitsliced_even_feedback == NULL) {
|
||||
printf("Out of memory error in brute_force. Aborting...");
|
||||
exit(4);
|
||||
}
|
||||
for(uint32_t *restrict p_even = p->states[EVEN_STATE]; p_even < p_even_end; p_even += MAX_BITSLICES){
|
||||
bitslice_t *restrict lstate_p = malloc_bitslice(STATE_SIZE/2*sizeof(bitslice_t));
|
||||
for (uint32_t *restrict p_even = p->states[EVEN_STATE]; p_even < p_even_end; p_even += MAX_BITSLICES) {
|
||||
bitslice_t *restrict lstate_p = malloc_bitslice(STATE_SIZE / 2 * sizeof(bitslice_t));
|
||||
if (lstate_p == NULL) {
|
||||
printf("Out of memory error in brute_force. Aborting... \n");
|
||||
exit(4);
|
||||
}
|
||||
memset(lstate_p, 0x00, STATE_SIZE/2*sizeof(bitslice_t)); // zero even bits
|
||||
memset(lstate_p, 0x00, STATE_SIZE / 2 * sizeof(bitslice_t)); // zero even bits
|
||||
// bitslice even half-states
|
||||
const uint32_t max_slices = (p_even_end-p_even) < MAX_BITSLICES ? p_even_end-p_even : MAX_BITSLICES;
|
||||
const uint32_t max_slices = (p_even_end - p_even) < MAX_BITSLICES ? p_even_end - p_even : MAX_BITSLICES;
|
||||
bucket_size[bitsliced_blocks] = max_slices;
|
||||
#ifdef DEBUG_KEY_ELIMINATION
|
||||
bucket_contains_test_key[bitsliced_blocks] = false;
|
||||
#endif
|
||||
uint32_t slice_idx;
|
||||
for(slice_idx = 0; slice_idx < max_slices; ++slice_idx){
|
||||
uint32_t e = *(p_even+slice_idx);
|
||||
for (slice_idx = 0; slice_idx < max_slices; ++slice_idx) {
|
||||
uint32_t e = *(p_even + slice_idx);
|
||||
#ifdef DEBUG_KEY_ELIMINATION
|
||||
if (known_target_key != -1 && e == test_state[EVEN_STATE]) {
|
||||
bucket_contains_test_key[bitsliced_blocks] = true;
|
||||
|
@ -268,34 +271,34 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
|
|||
// printf("in slice %d\n", slice_idx);
|
||||
}
|
||||
#endif
|
||||
for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){
|
||||
for (uint32_t bit_idx = 0; bit_idx < STATE_SIZE / 2; bit_idx++, e >>= 1) {
|
||||
// set even bits
|
||||
if(e&1){
|
||||
lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f);
|
||||
if (e & 1) {
|
||||
lstate_p[bit_idx].bytes64[slice_idx >> 6] |= 1ull << (slice_idx & 0x3f);
|
||||
}
|
||||
}
|
||||
}
|
||||
// padding with last even state
|
||||
for ( ; slice_idx < MAX_BITSLICES; ++slice_idx) {
|
||||
uint32_t e = *(p_even_end-1);
|
||||
for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){
|
||||
for (; slice_idx < MAX_BITSLICES; ++slice_idx) {
|
||||
uint32_t e = *(p_even_end - 1);
|
||||
for (uint32_t bit_idx = 0; bit_idx < STATE_SIZE / 2; bit_idx++, e >>= 1) {
|
||||
// set even bits
|
||||
if(e&1){
|
||||
lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f);
|
||||
if (e & 1) {
|
||||
lstate_p[bit_idx].bytes64[slice_idx >> 6] |= 1ull << (slice_idx & 0x3f);
|
||||
}
|
||||
}
|
||||
}
|
||||
bitsliced_even_states[bitsliced_blocks] = lstate_p;
|
||||
// bitsliced_even_feedback[bitsliced_blocks] = bs_ones;
|
||||
bitsliced_even_feedback[bitsliced_blocks] = lstate_p[(47- 0)/2].value ^
|
||||
lstate_p[(47-10)/2].value ^ lstate_p[(47-12)/2].value ^ lstate_p[(47-14)/2].value ^
|
||||
lstate_p[(47-24)/2].value ^ lstate_p[(47-42)/2].value;
|
||||
bitsliced_even_feedback[bitsliced_blocks] = lstate_p[(47 - 0) / 2].value ^
|
||||
lstate_p[(47 - 10) / 2].value ^ lstate_p[(47 - 12) / 2].value ^ lstate_p[(47 - 14) / 2].value ^
|
||||
lstate_p[(47 - 24) / 2].value ^ lstate_p[(47 - 42) / 2].value;
|
||||
bitsliced_blocks++;
|
||||
}
|
||||
// bitslice every odd state to every block of even states
|
||||
for(uint32_t const *restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE] + p->len[ODD_STATE]; ++p_odd){
|
||||
for (uint32_t const *restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE] + p->len[ODD_STATE]; ++p_odd) {
|
||||
// early abort
|
||||
if(*keys_found){
|
||||
if (*keys_found) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -305,12 +308,12 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
|
|||
uint32_t o = *p_odd;
|
||||
|
||||
// pre-compute the odd feedback bit
|
||||
bool odd_feedback_bit = evenparity32(o&0x29ce5c);
|
||||
bool odd_feedback_bit = evenparity32(o & 0x29ce5c);
|
||||
const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value;
|
||||
|
||||
// set odd state bits
|
||||
for (uint32_t state_idx = 0; state_idx < STATE_SIZE; o >>= 1, state_idx += 2) {
|
||||
if (o & 1){
|
||||
if (o & 1) {
|
||||
state_p[state_idx] = bs_ones;
|
||||
} else {
|
||||
state_p[state_idx] = bs_zeroes;
|
||||
|
@ -320,14 +323,14 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
|
|||
bitslice_value_t crypto1_bs_f20b_2[16];
|
||||
bitslice_value_t crypto1_bs_f20b_3[8];
|
||||
|
||||
crypto1_bs_f20b_2[0] = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
|
||||
crypto1_bs_f20b_3[0] = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
|
||||
crypto1_bs_f20b_2[0] = f20b(state_p[47 - 25].value, state_p[47 - 27].value, state_p[47 - 29].value, state_p[47 - 31].value);
|
||||
crypto1_bs_f20b_3[0] = f20b(state_p[47 - 41].value, state_p[47 - 43].value, state_p[47 - 45].value, state_p[47 - 47].value);
|
||||
|
||||
bitslice_value_t ksb[8];
|
||||
ksb[0] = f20c(f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value),
|
||||
f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value),
|
||||
ksb[0] = f20c(f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value),
|
||||
f20b(state_p[47 - 17].value, state_p[47 - 19].value, state_p[47 - 21].value, state_p[47 - 23].value),
|
||||
crypto1_bs_f20b_2[0],
|
||||
f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value),
|
||||
f20a(state_p[47 - 33].value, state_p[47 - 35].value, state_p[47 - 37].value, state_p[47 - 39].value),
|
||||
crypto1_bs_f20b_3[0]);
|
||||
|
||||
uint32_t *restrict p_even = p->states[EVEN_STATE];
|
||||
|
@ -335,14 +338,14 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
|
|||
|
||||
#ifdef DEBUG_KEY_ELIMINATION
|
||||
// if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
|
||||
// printf("Now testing known target key.\n");
|
||||
// printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
|
||||
// printf("Now testing known target key.\n");
|
||||
// printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
|
||||
// }
|
||||
#endif
|
||||
// add the even state bits
|
||||
const bitslice_t *restrict bitsliced_even_state = bitsliced_even_states[block_idx];
|
||||
for(uint32_t state_idx = 1; state_idx < STATE_SIZE; state_idx += 2) {
|
||||
state_p[state_idx] = bitsliced_even_state[state_idx/2];
|
||||
for (uint32_t state_idx = 1; state_idx < STATE_SIZE; state_idx += 2) {
|
||||
state_p[state_idx] = bitsliced_even_state[state_idx / 2];
|
||||
}
|
||||
|
||||
// pre-compute first feedback bit vector. This is the same for all nonces
|
||||
|
@ -357,10 +360,10 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
|
|||
par[0] = bs_zeroes.value;
|
||||
uint32_t next_common_bits = 0;
|
||||
|
||||
for(uint32_t tests = 0; tests < nonces_to_bruteforce; ++tests){
|
||||
for (uint32_t tests = 0; tests < nonces_to_bruteforce; ++tests) {
|
||||
// common bits with preceding test nonce
|
||||
uint32_t common_bits = next_common_bits; //tests ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests-1]) : 0;
|
||||
next_common_bits = tests < nonces_to_bruteforce - 1 ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests+1]) : 0;
|
||||
next_common_bits = tests < nonces_to_bruteforce - 1 ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests + 1]) : 0;
|
||||
uint32_t parity_bit_idx = 1; // start checking with the parity of second nonce byte
|
||||
bitslice_value_t fb_bits = fbb[common_bits]; // start with precomputed feedback bits from previous nonce
|
||||
bitslice_value_t ks_bits = ksb[common_bits]; // dito for first keystream bits
|
||||
|
@ -371,7 +374,7 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
|
|||
state_p -= common_bits; // and reuse the already calculated state bits
|
||||
// highest bit is transmitted/received first. We start with Bit 23 (highest bit of second nonce byte),
|
||||
// or the highest bit which differs from the previous nonce
|
||||
for (int32_t ks_idx = KEYSTREAM_SIZE-1-common_bits; ks_idx >= 0; --ks_idx) {
|
||||
for (int32_t ks_idx = KEYSTREAM_SIZE - 1 - common_bits; ks_idx >= 0; --ks_idx) {
|
||||
|
||||
// decrypt nonce bits
|
||||
const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value;
|
||||
|
@ -386,27 +389,27 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
|
|||
|
||||
// update crypto1 subfunctions
|
||||
bitslice_value_t f20a_1, f20b_1, f20b_2, f20a_2, f20b_3;
|
||||
f20a_2 = f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value);
|
||||
f20b_3 = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
|
||||
f20a_2 = f20a(state_p[47 - 33].value, state_p[47 - 35].value, state_p[47 - 37].value, state_p[47 - 39].value);
|
||||
f20b_3 = f20b(state_p[47 - 41].value, state_p[47 - 43].value, state_p[47 - 45].value, state_p[47 - 47].value);
|
||||
if (ks_idx > KEYSTREAM_SIZE - 8) {
|
||||
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
|
||||
f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
|
||||
f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
|
||||
f20a_1 = f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value);
|
||||
f20b_1 = f20b(state_p[47 - 17].value, state_p[47 - 19].value, state_p[47 - 21].value, state_p[47 - 23].value);
|
||||
f20b_2 = f20b(state_p[47 - 25].value, state_p[47 - 27].value, state_p[47 - 29].value, state_p[47 - 31].value);
|
||||
crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
|
||||
crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx] = f20b_3;
|
||||
} else if (ks_idx > KEYSTREAM_SIZE - 16) {
|
||||
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
|
||||
f20a_1 = f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value);
|
||||
f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
|
||||
f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
|
||||
f20b_2 = f20b(state_p[47 - 25].value, state_p[47 - 27].value, state_p[47 - 29].value, state_p[47 - 31].value);
|
||||
crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
|
||||
} else if (ks_idx > KEYSTREAM_SIZE - 24){
|
||||
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
|
||||
} else if (ks_idx > KEYSTREAM_SIZE - 24) {
|
||||
f20a_1 = f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value);
|
||||
f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
|
||||
f20b_2 = crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx - 16];
|
||||
} else {
|
||||
f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
|
||||
f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
|
||||
f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
|
||||
f20a_1 = f20a(state_p[47 - 9].value, state_p[47 - 11].value, state_p[47 - 13].value, state_p[47 - 15].value);
|
||||
f20b_1 = f20b(state_p[47 - 17].value, state_p[47 - 19].value, state_p[47 - 21].value, state_p[47 - 23].value);
|
||||
f20b_2 = f20b(state_p[47 - 25].value, state_p[47 - 27].value, state_p[47 - 29].value, state_p[47 - 31].value);
|
||||
}
|
||||
// update keystream bit
|
||||
ks_bits = f20c(f20a_1, f20b_1, f20b_2, f20a_2, f20b_3);
|
||||
|
@ -427,15 +430,15 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
|
|||
|
||||
// this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ???
|
||||
// the short-circuiting also helps
|
||||
if(results.bytes64[0] == 0
|
||||
if (results.bytes64[0] == 0
|
||||
#if MAX_BITSLICES > 64
|
||||
&& results.bytes64[1] == 0
|
||||
&& results.bytes64[1] == 0
|
||||
#endif
|
||||
#if MAX_BITSLICES > 128
|
||||
&& results.bytes64[2] == 0
|
||||
&& results.bytes64[3] == 0
|
||||
&& results.bytes64[2] == 0
|
||||
&& results.bytes64[3] == 0
|
||||
#endif
|
||||
) {
|
||||
) {
|
||||
#if defined (DEBUG_BRUTE_FORCE)
|
||||
if (elimination_step < MAX_ELIMINATION_STEP) {
|
||||
keys_eliminated[elimination_step] += MAX_BITSLICES;
|
||||
|
@ -458,12 +461,12 @@ const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes,
|
|||
// update feedback bit vector
|
||||
if (ks_idx != 0) {
|
||||
fb_bits =
|
||||
(state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^
|
||||
state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
|
||||
state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
|
||||
state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
|
||||
state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
|
||||
state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
|
||||
(state_p[47 - 0].value ^ state_p[47 - 5].value ^ state_p[47 - 9].value ^
|
||||
state_p[47 - 10].value ^ state_p[47 - 12].value ^ state_p[47 - 14].value ^
|
||||
state_p[47 - 15].value ^ state_p[47 - 17].value ^ state_p[47 - 19].value ^
|
||||
state_p[47 - 24].value ^ state_p[47 - 25].value ^ state_p[47 - 27].value ^
|
||||
state_p[47 - 29].value ^ state_p[47 - 35].value ^ state_p[47 - 39].value ^
|
||||
state_p[47 - 41].value ^ state_p[47 - 42].value ^ state_p[47 - 43].value);
|
||||
}
|
||||
// remember feedback and keystream vectors for later use
|
||||
uint8_t bit = KEYSTREAM_SIZE - ks_idx;
|
||||
|
@ -523,7 +526,7 @@ stop_tests:
|
|||
}
|
||||
}
|
||||
out:
|
||||
for(uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){
|
||||
for (uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx) {
|
||||
free_bitslice(bitsliced_even_states[block_idx]);
|
||||
}
|
||||
free(bitsliced_even_states);
|
||||
|
@ -532,7 +535,7 @@ out:
|
|||
|
||||
#if defined (DEBUG_BRUTE_FORCE)
|
||||
for (uint32_t i = 0; i < MAX_ELIMINATION_STEP; i++) {
|
||||
printf("Eliminated after %2u test_bytes: %5.2f%%\n", i+1, (float)keys_eliminated[i] / bucket_states_tested * 100);
|
||||
printf("Eliminated after %2u test_bytes: %5.2f%%\n", i + 1, (float)keys_eliminated[i] / bucket_states_tested * 100);
|
||||
}
|
||||
#endif
|
||||
return key;
|
||||
|
@ -548,36 +551,39 @@ bitslice_test_nonces_t *bitslice_test_nonces_function_p = &bitslice_test_nonces_
|
|||
|
||||
static SIMDExecInstr intSIMDInstr = SIMD_AUTO;
|
||||
|
||||
void SetSIMDInstr(SIMDExecInstr instr) {
|
||||
void SetSIMDInstr(SIMDExecInstr instr)
|
||||
{
|
||||
intSIMDInstr = instr;
|
||||
|
||||
crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch;
|
||||
bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch;
|
||||
}
|
||||
|
||||
SIMDExecInstr GetSIMDInstr() {
|
||||
SIMDExecInstr GetSIMDInstr()
|
||||
{
|
||||
SIMDExecInstr instr = SIMD_NONE;
|
||||
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) instr = SIMD_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) instr = SIMD_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) instr = SIMD_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) instr = SIMD_MMX;
|
||||
else
|
||||
#endif
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) instr = SIMD_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2;
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) instr = SIMD_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) instr = SIMD_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) instr = SIMD_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
instr = SIMD_NONE;
|
||||
|
||||
return instr;
|
||||
}
|
||||
|
||||
SIMDExecInstr GetSIMDInstrAuto() {
|
||||
SIMDExecInstr GetSIMDInstrAuto()
|
||||
{
|
||||
SIMDExecInstr instr = intSIMDInstr;
|
||||
if (instr == SIMD_AUTO)
|
||||
return GetSIMDInstr();
|
||||
|
@ -586,15 +592,16 @@ SIMDExecInstr GetSIMDInstrAuto() {
|
|||
}
|
||||
|
||||
// determine the available instruction set at runtime and call the correct function
|
||||
const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
|
||||
switch(GetSIMDInstrAuto()) {
|
||||
const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces)
|
||||
{
|
||||
switch (GetSIMDInstrAuto()) {
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
case SIMD_AVX512:
|
||||
crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512;
|
||||
break;
|
||||
#endif
|
||||
#endif
|
||||
case SIMD_AVX2:
|
||||
crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2;
|
||||
break;
|
||||
|
@ -607,10 +614,10 @@ const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_firs
|
|||
case SIMD_MMX:
|
||||
crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX;
|
||||
break;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
default:
|
||||
crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD;
|
||||
crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -618,15 +625,16 @@ const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_firs
|
|||
return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
|
||||
}
|
||||
|
||||
void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
|
||||
switch(GetSIMDInstrAuto()) {
|
||||
void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par)
|
||||
{
|
||||
switch (GetSIMDInstrAuto()) {
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
case SIMD_AVX512:
|
||||
bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512;
|
||||
break;
|
||||
#endif
|
||||
#endif
|
||||
case SIMD_AVX2:
|
||||
bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2;
|
||||
break;
|
||||
|
@ -639,10 +647,10 @@ void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_t
|
|||
case SIMD_MMX:
|
||||
bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX;
|
||||
break;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
default:
|
||||
bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD;
|
||||
bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -651,11 +659,13 @@ void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_t
|
|||
}
|
||||
|
||||
// Entries to dispatched function calls
|
||||
const uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
|
||||
const uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces)
|
||||
{
|
||||
return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
|
||||
}
|
||||
|
||||
void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
|
||||
void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par)
|
||||
{
|
||||
(*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
|
||||
}
|
||||
|
||||
|
|
|
@ -115,31 +115,31 @@
|
|||
|
||||
|
||||
// typedefs and declaration of functions:
|
||||
typedef uint32_t* malloc_bitarray_t(uint32_t);
|
||||
typedef uint32_t *malloc_bitarray_t(uint32_t);
|
||||
malloc_bitarray_t malloc_bitarray_AVX512, malloc_bitarray_AVX2, malloc_bitarray_AVX, malloc_bitarray_SSE2, malloc_bitarray_MMX, malloc_bitarray_NOSIMD, malloc_bitarray_dispatch;
|
||||
typedef void free_bitarray_t(uint32_t*);
|
||||
typedef void free_bitarray_t(uint32_t *);
|
||||
free_bitarray_t free_bitarray_AVX512, free_bitarray_AVX2, free_bitarray_AVX, free_bitarray_SSE2, free_bitarray_MMX, free_bitarray_NOSIMD, free_bitarray_dispatch;
|
||||
typedef uint32_t bitcount_t(uint32_t);
|
||||
bitcount_t bitcount_AVX512, bitcount_AVX2, bitcount_AVX, bitcount_SSE2, bitcount_MMX, bitcount_NOSIMD, bitcount_dispatch;
|
||||
typedef uint32_t count_states_t(uint32_t*);
|
||||
typedef uint32_t count_states_t(uint32_t *);
|
||||
count_states_t count_states_AVX512, count_states_AVX2, count_states_AVX, count_states_SSE2, count_states_MMX, count_states_NOSIMD, count_states_dispatch;
|
||||
typedef void bitarray_AND_t(uint32_t[], uint32_t[]);
|
||||
bitarray_AND_t bitarray_AND_AVX512, bitarray_AND_AVX2, bitarray_AND_AVX, bitarray_AND_SSE2, bitarray_AND_MMX, bitarray_AND_NOSIMD, bitarray_AND_dispatch;
|
||||
typedef void bitarray_low20_AND_t(uint32_t*, uint32_t*);
|
||||
typedef void bitarray_low20_AND_t(uint32_t *, uint32_t *);
|
||||
bitarray_low20_AND_t bitarray_low20_AND_AVX512, bitarray_low20_AND_AVX2, bitarray_low20_AND_AVX, bitarray_low20_AND_SSE2, bitarray_low20_AND_MMX, bitarray_low20_AND_NOSIMD, bitarray_low20_AND_dispatch;
|
||||
typedef uint32_t count_bitarray_AND_t(uint32_t*, uint32_t*);
|
||||
typedef uint32_t count_bitarray_AND_t(uint32_t *, uint32_t *);
|
||||
count_bitarray_AND_t count_bitarray_AND_AVX512, count_bitarray_AND_AVX2, count_bitarray_AND_AVX, count_bitarray_AND_SSE2, count_bitarray_AND_MMX, count_bitarray_AND_NOSIMD, count_bitarray_AND_dispatch;
|
||||
typedef uint32_t count_bitarray_low20_AND_t(uint32_t*, uint32_t*);
|
||||
typedef uint32_t count_bitarray_low20_AND_t(uint32_t *, uint32_t *);
|
||||
count_bitarray_low20_AND_t count_bitarray_low20_AND_AVX512, count_bitarray_low20_AND_AVX2, count_bitarray_low20_AND_AVX, count_bitarray_low20_AND_SSE2, count_bitarray_low20_AND_MMX, count_bitarray_low20_AND_NOSIMD, count_bitarray_low20_AND_dispatch;
|
||||
typedef void bitarray_AND4_t(uint32_t*, uint32_t*, uint32_t*, uint32_t*);
|
||||
typedef void bitarray_AND4_t(uint32_t *, uint32_t *, uint32_t *, uint32_t *);
|
||||
bitarray_AND4_t bitarray_AND4_AVX512, bitarray_AND4_AVX2, bitarray_AND4_AVX, bitarray_AND4_SSE2, bitarray_AND4_MMX, bitarray_AND4_NOSIMD, bitarray_AND4_dispatch;
|
||||
typedef void bitarray_OR_t(uint32_t[], uint32_t[]);
|
||||
bitarray_OR_t bitarray_OR_AVX512, bitarray_OR_AVX2, bitarray_OR_AVX, bitarray_OR_SSE2, bitarray_OR_MMX, bitarray_OR_NOSIMD, bitarray_OR_dispatch;
|
||||
typedef uint32_t count_bitarray_AND2_t(uint32_t*, uint32_t*);
|
||||
typedef uint32_t count_bitarray_AND2_t(uint32_t *, uint32_t *);
|
||||
count_bitarray_AND2_t count_bitarray_AND2_AVX512, count_bitarray_AND2_AVX2, count_bitarray_AND2_AVX, count_bitarray_AND2_SSE2, count_bitarray_AND2_MMX, count_bitarray_AND2_NOSIMD, count_bitarray_AND2_dispatch;
|
||||
typedef uint32_t count_bitarray_AND3_t(uint32_t*, uint32_t*, uint32_t*);
|
||||
typedef uint32_t count_bitarray_AND3_t(uint32_t *, uint32_t *, uint32_t *);
|
||||
count_bitarray_AND3_t count_bitarray_AND3_AVX512, count_bitarray_AND3_AVX2, count_bitarray_AND3_AVX, count_bitarray_AND3_SSE2, count_bitarray_AND3_MMX, count_bitarray_AND3_NOSIMD, count_bitarray_AND3_dispatch;
|
||||
typedef uint32_t count_bitarray_AND4_t(uint32_t*, uint32_t*, uint32_t*, uint32_t*);
|
||||
typedef uint32_t count_bitarray_AND4_t(uint32_t *, uint32_t *, uint32_t *, uint32_t *);
|
||||
count_bitarray_AND4_t count_bitarray_AND4_AVX512, count_bitarray_AND4_AVX2, count_bitarray_AND4_AVX, count_bitarray_AND4_SSE2, count_bitarray_AND4_MMX, count_bitarray_AND4_NOSIMD, count_bitarray_AND4_dispatch;
|
||||
|
||||
|
||||
|
@ -149,7 +149,7 @@ inline uint32_t *MALLOC_BITARRAY(uint32_t x)
|
|||
return __builtin_assume_aligned(_aligned_malloc((x), __BIGGEST_ALIGNMENT__), __BIGGEST_ALIGNMENT__);
|
||||
#elif defined (__APPLE__)
|
||||
uint32_t *allocated_memory;
|
||||
if (posix_memalign((void**)&allocated_memory, __BIGGEST_ALIGNMENT__, x)) {
|
||||
if (posix_memalign((void **)&allocated_memory, __BIGGEST_ALIGNMENT__, x)) {
|
||||
return NULL;
|
||||
} else {
|
||||
return __builtin_assume_aligned(allocated_memory, __BIGGEST_ALIGNMENT__);
|
||||
|
@ -179,7 +179,7 @@ inline uint32_t BITCOUNT(uint32_t a)
|
|||
inline uint32_t COUNT_STATES(uint32_t *A)
|
||||
{
|
||||
uint32_t count = 0;
|
||||
for (uint32_t i = 0; i < (1<<19); i++) {
|
||||
for (uint32_t i = 0; i < (1 << 19); i++) {
|
||||
count += BITCOUNT(A[i]);
|
||||
}
|
||||
return count;
|
||||
|
@ -190,7 +190,7 @@ inline void BITARRAY_AND(uint32_t *restrict A, uint32_t *restrict B)
|
|||
{
|
||||
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
|
||||
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
|
||||
for (uint32_t i = 0; i < (1<<19); i++) {
|
||||
for (uint32_t i = 0; i < (1 << 19); i++) {
|
||||
A[i] &= B[i];
|
||||
}
|
||||
}
|
||||
|
@ -201,7 +201,7 @@ inline void BITARRAY_LOW20_AND(uint32_t *restrict A, uint32_t *restrict B)
|
|||
uint16_t *a = (uint16_t *)__builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
|
||||
uint16_t *b = (uint16_t *)__builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
|
||||
|
||||
for (uint32_t i = 0; i < (1<<20); i++) {
|
||||
for (uint32_t i = 0; i < (1 << 20); i++) {
|
||||
if (!b[i]) {
|
||||
a[i] = 0;
|
||||
}
|
||||
|
@ -214,7 +214,7 @@ inline uint32_t COUNT_BITARRAY_AND(uint32_t *restrict A, uint32_t *restrict B)
|
|||
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
|
||||
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
|
||||
uint32_t count = 0;
|
||||
for (uint32_t i = 0; i < (1<<19); i++) {
|
||||
for (uint32_t i = 0; i < (1 << 19); i++) {
|
||||
A[i] &= B[i];
|
||||
count += BITCOUNT(A[i]);
|
||||
}
|
||||
|
@ -228,7 +228,7 @@ inline uint32_t COUNT_BITARRAY_LOW20_AND(uint32_t *restrict A, uint32_t *restric
|
|||
uint16_t *b = (uint16_t *)__builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
|
||||
uint32_t count = 0;
|
||||
|
||||
for (uint32_t i = 0; i < (1<<20); i++) {
|
||||
for (uint32_t i = 0; i < (1 << 20); i++) {
|
||||
if (!b[i]) {
|
||||
a[i] = 0;
|
||||
}
|
||||
|
@ -244,7 +244,7 @@ inline void BITARRAY_AND4(uint32_t *restrict A, uint32_t *restrict B, uint32_t *
|
|||
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
|
||||
C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
|
||||
D = __builtin_assume_aligned(D, __BIGGEST_ALIGNMENT__);
|
||||
for (uint32_t i = 0; i < (1<<19); i++) {
|
||||
for (uint32_t i = 0; i < (1 << 19); i++) {
|
||||
A[i] = B[i] & C[i] & D[i];
|
||||
}
|
||||
}
|
||||
|
@ -254,7 +254,7 @@ inline void BITARRAY_OR(uint32_t *restrict A, uint32_t *restrict B)
|
|||
{
|
||||
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
|
||||
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
|
||||
for (uint32_t i = 0; i < (1<<19); i++) {
|
||||
for (uint32_t i = 0; i < (1 << 19); i++) {
|
||||
A[i] |= B[i];
|
||||
}
|
||||
}
|
||||
|
@ -265,7 +265,7 @@ inline uint32_t COUNT_BITARRAY_AND2(uint32_t *restrict A, uint32_t *restrict B)
|
|||
A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
|
||||
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
|
||||
uint32_t count = 0;
|
||||
for (uint32_t i = 0; i < (1<<19); i++) {
|
||||
for (uint32_t i = 0; i < (1 << 19); i++) {
|
||||
count += BITCOUNT(A[i] & B[i]);
|
||||
}
|
||||
return count;
|
||||
|
@ -278,7 +278,7 @@ inline uint32_t COUNT_BITARRAY_AND3(uint32_t *restrict A, uint32_t *restrict B,
|
|||
B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
|
||||
C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
|
||||
uint32_t count = 0;
|
||||
for (uint32_t i = 0; i < (1<<19); i++) {
|
||||
for (uint32_t i = 0; i < (1 << 19); i++) {
|
||||
count += BITCOUNT(A[i] & B[i] & C[i]);
|
||||
}
|
||||
return count;
|
||||
|
@ -292,7 +292,7 @@ inline uint32_t COUNT_BITARRAY_AND4(uint32_t *restrict A, uint32_t *restrict B,
|
|||
C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
|
||||
D = __builtin_assume_aligned(D, __BIGGEST_ALIGNMENT__);
|
||||
uint32_t count = 0;
|
||||
for (uint32_t i = 0; i < (1<<19); i++) {
|
||||
for (uint32_t i = 0; i < (1 << 19); i++) {
|
||||
count += BITCOUNT(A[i] & B[i] & C[i] & D[i]);
|
||||
}
|
||||
return count;
|
||||
|
@ -317,20 +317,21 @@ count_bitarray_AND3_t *count_bitarray_AND3_function_p = &count_bitarray_AND3_dis
|
|||
count_bitarray_AND4_t *count_bitarray_AND4_function_p = &count_bitarray_AND4_dispatch;
|
||||
|
||||
// determine the available instruction set at runtime and call the correct function
|
||||
uint32_t *malloc_bitarray_dispatch(uint32_t x) {
|
||||
uint32_t *malloc_bitarray_dispatch(uint32_t x)
|
||||
{
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) malloc_bitarray_function_p = &malloc_bitarray_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) malloc_bitarray_function_p = &malloc_bitarray_AVX2;
|
||||
#else
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) malloc_bitarray_function_p = &malloc_bitarray_AVX2;
|
||||
#endif
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) malloc_bitarray_function_p = &malloc_bitarray_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) malloc_bitarray_function_p = &malloc_bitarray_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) malloc_bitarray_function_p = &malloc_bitarray_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
malloc_bitarray_function_p = &malloc_bitarray_NOSIMD;
|
||||
|
||||
|
@ -338,20 +339,21 @@ uint32_t *malloc_bitarray_dispatch(uint32_t x) {
|
|||
return (*malloc_bitarray_function_p)(x);
|
||||
}
|
||||
|
||||
void free_bitarray_dispatch(uint32_t *x) {
|
||||
void free_bitarray_dispatch(uint32_t *x)
|
||||
{
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) free_bitarray_function_p = &free_bitarray_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) free_bitarray_function_p = &free_bitarray_AVX2;
|
||||
#else
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) free_bitarray_function_p = &free_bitarray_AVX2;
|
||||
#endif
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) free_bitarray_function_p = &free_bitarray_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) free_bitarray_function_p = &free_bitarray_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) free_bitarray_function_p = &free_bitarray_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
free_bitarray_function_p = &free_bitarray_NOSIMD;
|
||||
|
||||
|
@ -359,20 +361,21 @@ void free_bitarray_dispatch(uint32_t *x) {
|
|||
(*free_bitarray_function_p)(x);
|
||||
}
|
||||
|
||||
uint32_t bitcount_dispatch(uint32_t a) {
|
||||
uint32_t bitcount_dispatch(uint32_t a)
|
||||
{
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) bitcount_function_p = &bitcount_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) bitcount_function_p = &bitcount_AVX2;
|
||||
#else
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) bitcount_function_p = &bitcount_AVX2;
|
||||
#endif
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) bitcount_function_p = &bitcount_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) bitcount_function_p = &bitcount_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) bitcount_function_p = &bitcount_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
bitcount_function_p = &bitcount_NOSIMD;
|
||||
|
||||
|
@ -380,20 +383,21 @@ uint32_t bitcount_dispatch(uint32_t a) {
|
|||
return (*bitcount_function_p)(a);
|
||||
}
|
||||
|
||||
uint32_t count_states_dispatch(uint32_t *bitarray) {
|
||||
uint32_t count_states_dispatch(uint32_t *bitarray)
|
||||
{
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) count_states_function_p = &count_states_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) count_states_function_p = &count_states_AVX2;
|
||||
#else
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) count_states_function_p = &count_states_AVX2;
|
||||
#endif
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) count_states_function_p = &count_states_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) count_states_function_p = &count_states_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) count_states_function_p = &count_states_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
count_states_function_p = &count_states_NOSIMD;
|
||||
|
||||
|
@ -401,41 +405,43 @@ uint32_t count_states_dispatch(uint32_t *bitarray) {
|
|||
return (*count_states_function_p)(bitarray);
|
||||
}
|
||||
|
||||
void bitarray_AND_dispatch(uint32_t *A, uint32_t *B) {
|
||||
void bitarray_AND_dispatch(uint32_t *A, uint32_t *B)
|
||||
{
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) bitarray_AND_function_p = &bitarray_AND_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) bitarray_AND_function_p = &bitarray_AND_AVX2;
|
||||
#else
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) bitarray_AND_function_p = &bitarray_AND_AVX2;
|
||||
#endif
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) bitarray_AND_function_p = &bitarray_AND_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) bitarray_AND_function_p = &bitarray_AND_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) bitarray_AND_function_p = &bitarray_AND_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
bitarray_AND_function_p = &bitarray_AND_NOSIMD;
|
||||
|
||||
// call the most optimized function for this CPU
|
||||
(*bitarray_AND_function_p)(A,B);
|
||||
(*bitarray_AND_function_p)(A, B);
|
||||
}
|
||||
|
||||
void bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
|
||||
void bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B)
|
||||
{
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX2;
|
||||
#else
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX2;
|
||||
#endif
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) bitarray_low20_AND_function_p = &bitarray_low20_AND_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
bitarray_low20_AND_function_p = &bitarray_low20_AND_NOSIMD;
|
||||
|
||||
|
@ -443,20 +449,21 @@ void bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
|
|||
(*bitarray_low20_AND_function_p)(A, B);
|
||||
}
|
||||
|
||||
uint32_t count_bitarray_AND_dispatch(uint32_t *A, uint32_t *B) {
|
||||
uint32_t count_bitarray_AND_dispatch(uint32_t *A, uint32_t *B)
|
||||
{
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX2;
|
||||
#else
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX2;
|
||||
#endif
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) count_bitarray_AND_function_p = &count_bitarray_AND_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) count_bitarray_AND_function_p = &count_bitarray_AND_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
count_bitarray_AND_function_p = &count_bitarray_AND_NOSIMD;
|
||||
|
||||
|
@ -464,20 +471,21 @@ uint32_t count_bitarray_AND_dispatch(uint32_t *A, uint32_t *B) {
|
|||
return (*count_bitarray_AND_function_p)(A, B);
|
||||
}
|
||||
|
||||
uint32_t count_bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
|
||||
uint32_t count_bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B)
|
||||
{
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2;
|
||||
#else
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2;
|
||||
#endif
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_NOSIMD;
|
||||
|
||||
|
@ -485,20 +493,21 @@ uint32_t count_bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
|
|||
return (*count_bitarray_low20_AND_function_p)(A, B);
|
||||
}
|
||||
|
||||
void bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
|
||||
void bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D)
|
||||
{
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) bitarray_AND4_function_p = &bitarray_AND4_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) bitarray_AND4_function_p = &bitarray_AND4_AVX2;
|
||||
#else
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) bitarray_AND4_function_p = &bitarray_AND4_AVX2;
|
||||
#endif
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) bitarray_AND4_function_p = &bitarray_AND4_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) bitarray_AND4_function_p = &bitarray_AND4_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) bitarray_AND4_function_p = &bitarray_AND4_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
bitarray_AND4_function_p = &bitarray_AND4_NOSIMD;
|
||||
|
||||
|
@ -506,41 +515,43 @@ void bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D)
|
|||
(*bitarray_AND4_function_p)(A, B, C, D);
|
||||
}
|
||||
|
||||
void bitarray_OR_dispatch(uint32_t *A, uint32_t *B) {
|
||||
void bitarray_OR_dispatch(uint32_t *A, uint32_t *B)
|
||||
{
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) bitarray_OR_function_p = &bitarray_OR_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) bitarray_OR_function_p = &bitarray_OR_AVX2;
|
||||
#else
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) bitarray_OR_function_p = &bitarray_OR_AVX2;
|
||||
#endif
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) bitarray_OR_function_p = &bitarray_OR_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) bitarray_OR_function_p = &bitarray_OR_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) bitarray_OR_function_p = &bitarray_OR_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
bitarray_OR_function_p = &bitarray_OR_NOSIMD;
|
||||
|
||||
// call the most optimized function for this CPU
|
||||
(*bitarray_OR_function_p)(A,B);
|
||||
(*bitarray_OR_function_p)(A, B);
|
||||
}
|
||||
|
||||
uint32_t count_bitarray_AND2_dispatch(uint32_t *A, uint32_t *B) {
|
||||
uint32_t count_bitarray_AND2_dispatch(uint32_t *A, uint32_t *B)
|
||||
{
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2;
|
||||
#else
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2;
|
||||
#endif
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) count_bitarray_AND2_function_p = &count_bitarray_AND2_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
count_bitarray_AND2_function_p = &count_bitarray_AND2_NOSIMD;
|
||||
|
||||
|
@ -548,20 +559,21 @@ uint32_t count_bitarray_AND2_dispatch(uint32_t *A, uint32_t *B) {
|
|||
return (*count_bitarray_AND2_function_p)(A, B);
|
||||
}
|
||||
|
||||
uint32_t count_bitarray_AND3_dispatch(uint32_t *A, uint32_t *B, uint32_t *C) {
|
||||
uint32_t count_bitarray_AND3_dispatch(uint32_t *A, uint32_t *B, uint32_t *C)
|
||||
{
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2;
|
||||
#else
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2;
|
||||
#endif
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) count_bitarray_AND3_function_p = &count_bitarray_AND3_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
count_bitarray_AND3_function_p = &count_bitarray_AND3_NOSIMD;
|
||||
|
||||
|
@ -569,20 +581,21 @@ uint32_t count_bitarray_AND3_dispatch(uint32_t *A, uint32_t *B, uint32_t *C) {
|
|||
return (*count_bitarray_AND3_function_p)(A, B, C);
|
||||
}
|
||||
|
||||
uint32_t count_bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
|
||||
uint32_t count_bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D)
|
||||
{
|
||||
#if defined (__i386__) || defined (__x86_64__)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1))
|
||||
#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
|
||||
if (__builtin_cpu_supports("avx512f")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX512;
|
||||
else if (__builtin_cpu_supports("avx2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2;
|
||||
#else
|
||||
#else
|
||||
if (__builtin_cpu_supports("avx2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2;
|
||||
#endif
|
||||
#endif
|
||||
else if (__builtin_cpu_supports("avx")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX;
|
||||
else if (__builtin_cpu_supports("sse2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_SSE2;
|
||||
else if (__builtin_cpu_supports("mmx")) count_bitarray_AND4_function_p = &count_bitarray_AND4_MMX;
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
count_bitarray_AND4_function_p = &count_bitarray_AND4_NOSIMD;
|
||||
|
||||
|
@ -594,55 +607,68 @@ uint32_t count_bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uin
|
|||
///////////////////////////////////////////////77
|
||||
// Entries to dispatched function calls
|
||||
|
||||
uint32_t *malloc_bitarray(uint32_t x) {
|
||||
uint32_t *malloc_bitarray(uint32_t x)
|
||||
{
|
||||
return (*malloc_bitarray_function_p)(x);
|
||||
}
|
||||
|
||||
void free_bitarray(uint32_t *x) {
|
||||
void free_bitarray(uint32_t *x)
|
||||
{
|
||||
(*free_bitarray_function_p)(x);
|
||||
}
|
||||
|
||||
uint32_t bitcount(uint32_t a) {
|
||||
uint32_t bitcount(uint32_t a)
|
||||
{
|
||||
return (*bitcount_function_p)(a);
|
||||
}
|
||||
|
||||
uint32_t count_states(uint32_t *bitarray) {
|
||||
uint32_t count_states(uint32_t *bitarray)
|
||||
{
|
||||
return (*count_states_function_p)(bitarray);
|
||||
}
|
||||
|
||||
void bitarray_AND(uint32_t *A, uint32_t *B) {
|
||||
void bitarray_AND(uint32_t *A, uint32_t *B)
|
||||
{
|
||||
(*bitarray_AND_function_p)(A, B);
|
||||
}
|
||||
|
||||
void bitarray_low20_AND(uint32_t *A, uint32_t *B) {
|
||||
void bitarray_low20_AND(uint32_t *A, uint32_t *B)
|
||||
{
|
||||
(*bitarray_low20_AND_function_p)(A, B);
|
||||
}
|
||||
|
||||
uint32_t count_bitarray_AND(uint32_t *A, uint32_t *B) {
|
||||
uint32_t count_bitarray_AND(uint32_t *A, uint32_t *B)
|
||||
{
|
||||
return (*count_bitarray_AND_function_p)(A, B);
|
||||
}
|
||||
|
||||
uint32_t count_bitarray_low20_AND(uint32_t *A, uint32_t *B) {
|
||||
uint32_t count_bitarray_low20_AND(uint32_t *A, uint32_t *B)
|
||||
{
|
||||
return (*count_bitarray_low20_AND_function_p)(A, B);
|
||||
}
|
||||
|
||||
void bitarray_AND4(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
|
||||
void bitarray_AND4(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D)
|
||||
{
|
||||
(*bitarray_AND4_function_p)(A, B, C, D);
|
||||
}
|
||||
|
||||
void bitarray_OR(uint32_t *A, uint32_t *B) {
|
||||
void bitarray_OR(uint32_t *A, uint32_t *B)
|
||||
{
|
||||
(*bitarray_OR_function_p)(A, B);
|
||||
}
|
||||
|
||||
uint32_t count_bitarray_AND2(uint32_t *A, uint32_t *B) {
|
||||
uint32_t count_bitarray_AND2(uint32_t *A, uint32_t *B)
|
||||
{
|
||||
return (*count_bitarray_AND2_function_p)(A, B);
|
||||
}
|
||||
|
||||
uint32_t count_bitarray_AND3(uint32_t *A, uint32_t *B, uint32_t *C) {
|
||||
uint32_t count_bitarray_AND3(uint32_t *A, uint32_t *B, uint32_t *C)
|
||||
{
|
||||
return (*count_bitarray_AND3_function_p)(A, B, C);
|
||||
}
|
||||
|
||||
uint32_t count_bitarray_AND4(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
|
||||
uint32_t count_bitarray_AND4(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D)
|
||||
{
|
||||
return (*count_bitarray_AND4_function_p)(A, B, C, D);
|
||||
}
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ static uint32_t bf_test_nonce[256];
|
|||
static uint8_t bf_test_nonce_2nd_byte[256];
|
||||
static uint8_t bf_test_nonce_par[256];
|
||||
static uint32_t bucket_count = 0;
|
||||
static statelist_t* buckets[128];
|
||||
static statelist_t *buckets[128];
|
||||
static uint32_t keys_found = 0;
|
||||
static uint64_t num_keys_tested;
|
||||
static uint64_t found_bs_key = 0;
|
||||
|
@ -125,7 +125,7 @@ bool verify_key(uint32_t cuid, noncelist_t *nonces, uint8_t *best_first_bytes, u
|
|||
lfsr_rollback_byte(&pcs, (cuid >> 24) ^ best_first_bytes[0], true);
|
||||
for (int8_t byte_pos = 3; byte_pos >= 0; byte_pos--) {
|
||||
uint8_t test_par_enc_bit = (test_nonce->par_enc >> byte_pos) & 0x01; // the encoded parity bit
|
||||
uint8_t test_byte_enc = (test_nonce->nonce_enc >> (8*byte_pos)) & 0xff; // the encoded nonce byte
|
||||
uint8_t test_byte_enc = (test_nonce->nonce_enc >> (8 * byte_pos)) & 0xff; // the encoded nonce byte
|
||||
uint8_t test_byte_dec = crypto1_byte(&pcs, test_byte_enc /* ^ (cuid >> (8*byte_pos)) */, true) ^ test_byte_enc; // decode the nonce byte
|
||||
uint8_t ks_par = filter(pcs.odd); // the keystream bit to encode/decode the parity bit
|
||||
uint8_t test_par_enc2 = ks_par ^ evenparity8(test_byte_dec); // determine the decoded byte's parity and encode it
|
||||
|
@ -138,13 +138,14 @@ bool verify_key(uint32_t cuid, noncelist_t *nonces, uint8_t *best_first_bytes, u
|
|||
}
|
||||
return true;
|
||||
}
|
||||
static void*
|
||||
static void *
|
||||
#ifdef __has_attribute
|
||||
#if __has_attribute(force_align_arg_pointer)
|
||||
__attribute__((force_align_arg_pointer))
|
||||
#endif
|
||||
#if __has_attribute(force_align_arg_pointer)
|
||||
__attribute__((force_align_arg_pointer))
|
||||
#endif
|
||||
crack_states_thread(void* x){
|
||||
#endif
|
||||
crack_states_thread(void *x)
|
||||
{
|
||||
struct arg {
|
||||
bool silent;
|
||||
int thread_ID;
|
||||
|
@ -152,36 +153,36 @@ crack_states_thread(void* x){
|
|||
uint32_t num_acquired_nonces;
|
||||
uint64_t maximum_states;
|
||||
noncelist_t *nonces;
|
||||
uint8_t* best_first_bytes;
|
||||
uint8_t *best_first_bytes;
|
||||
} *thread_arg;
|
||||
|
||||
thread_arg = (struct arg *)x;
|
||||
const int thread_id = thread_arg->thread_ID;
|
||||
uint32_t current_bucket = thread_id;
|
||||
while(current_bucket < bucket_count){
|
||||
while (current_bucket < bucket_count) {
|
||||
statelist_t *bucket = buckets[current_bucket];
|
||||
if(bucket){
|
||||
if (bucket) {
|
||||
#if defined (DEBUG_BRUTE_FORCE)
|
||||
printf("Thread %u starts working on bucket %u\n", thread_id, current_bucket);
|
||||
#endif
|
||||
const uint64_t key = crack_states_bitsliced(thread_arg->cuid, thread_arg->best_first_bytes, bucket, &keys_found, &num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, thread_arg->nonces);
|
||||
if(key != -1){
|
||||
if (key != -1) {
|
||||
__atomic_fetch_add(&keys_found, 1, __ATOMIC_SEQ_CST);
|
||||
__atomic_fetch_add(&found_bs_key, key, __ATOMIC_SEQ_CST);
|
||||
|
||||
char progress_text[80];
|
||||
char keystr[18];
|
||||
sprintf(keystr, "%012" PRIx64 " ", key);
|
||||
sprintf(progress_text, "Brute force phase completed. Key found: " _YELLOW_(%s), keystr);
|
||||
sprintf(progress_text, "Brute force phase completed. Key found: " _YELLOW_( % s), keystr);
|
||||
hardnested_print_progress(thread_arg->num_acquired_nonces, progress_text, 0.0, 0);
|
||||
break;
|
||||
} else if(keys_found){
|
||||
} else if (keys_found) {
|
||||
break;
|
||||
} else {
|
||||
if (!thread_arg->silent) {
|
||||
char progress_text[80];
|
||||
sprintf(progress_text, "Brute force phase: %6.02f%%\t", 100.0*(float)num_keys_tested/(float)(thread_arg->maximum_states));
|
||||
float remaining_bruteforce = thread_arg->nonces[thread_arg->best_first_bytes[0]].expected_num_brute_force - (float)num_keys_tested/2;
|
||||
sprintf(progress_text, "Brute force phase: %6.02f%%\t", 100.0 * (float)num_keys_tested / (float)(thread_arg->maximum_states));
|
||||
float remaining_bruteforce = thread_arg->nonces[thread_arg->best_first_bytes[0]].expected_num_brute_force - (float)num_keys_tested / 2;
|
||||
hardnested_print_progress(thread_arg->num_acquired_nonces, progress_text, remaining_bruteforce, 5000);
|
||||
}
|
||||
}
|
||||
|
@ -209,9 +210,9 @@ void prepare_bf_test_nonces(noncelist_t *nonces, uint8_t best_first_byte)
|
|||
|
||||
// printf("Nonces to bruteforce: %d\n", nonces_to_bruteforce);
|
||||
// printf("Common bits of first 4 2nd nonce bytes (before sorting): %u %u %u\n",
|
||||
// trailing_zeros(bf_test_nonce_2nd_byte[1] ^ bf_test_nonce_2nd_byte[0]),
|
||||
// trailing_zeros(bf_test_nonce_2nd_byte[2] ^ bf_test_nonce_2nd_byte[1]),
|
||||
// trailing_zeros(bf_test_nonce_2nd_byte[3] ^ bf_test_nonce_2nd_byte[2]));
|
||||
// trailing_zeros(bf_test_nonce_2nd_byte[1] ^ bf_test_nonce_2nd_byte[0]),
|
||||
// trailing_zeros(bf_test_nonce_2nd_byte[2] ^ bf_test_nonce_2nd_byte[1]),
|
||||
// trailing_zeros(bf_test_nonce_2nd_byte[3] ^ bf_test_nonce_2nd_byte[2]));
|
||||
|
||||
uint8_t best_4[4] = {0};
|
||||
int sum_best = -1;
|
||||
|
@ -221,13 +222,13 @@ void prepare_bf_test_nonces(noncelist_t *nonces, uint8_t best_first_byte)
|
|||
for (uint16_t n3 = 0; n3 < nonces_to_bruteforce; n3++) {
|
||||
if ((n3 != n2 && n3 != n1) || nonces_to_bruteforce < 3
|
||||
// && trailing_zeros(bf_test_nonce_2nd_byte[n1] ^ bf_test_nonce_2nd_byte[n2])
|
||||
// > trailing_zeros(bf_test_nonce_2nd_byte[n2] ^ bf_test_nonce_2nd_byte[n3])
|
||||
) {
|
||||
// > trailing_zeros(bf_test_nonce_2nd_byte[n2] ^ bf_test_nonce_2nd_byte[n3])
|
||||
) {
|
||||
for (uint16_t n4 = 0; n4 < nonces_to_bruteforce; n4++) {
|
||||
if ((n4 != n3 && n4 != n2 && n4 != n1) || nonces_to_bruteforce < 4
|
||||
// && trailing_zeros(bf_test_nonce_2nd_byte[n2] ^ bf_test_nonce_2nd_byte[n3])
|
||||
// > trailing_zeros(bf_test_nonce_2nd_byte[n3] ^ bf_test_nonce_2nd_byte[n4])
|
||||
) {
|
||||
) {
|
||||
int sum = nonces_to_bruteforce > 1 ? trailing_zeros(bf_test_nonce_2nd_byte[n1] ^ bf_test_nonce_2nd_byte[n2]) : 0.0
|
||||
+ nonces_to_bruteforce > 2 ? trailing_zeros(bf_test_nonce_2nd_byte[n2] ^ bf_test_nonce_2nd_byte[n3]) : 0.0
|
||||
+ nonces_to_bruteforce > 3 ? trailing_zeros(bf_test_nonce_2nd_byte[n3] ^ bf_test_nonce_2nd_byte[n4]) : 0.0;
|
||||
|
@ -264,7 +265,8 @@ void prepare_bf_test_nonces(noncelist_t *nonces, uint8_t best_first_byte)
|
|||
|
||||
|
||||
#if defined (WRITE_BENCH_FILE)
|
||||
static void write_benchfile(statelist_t *candidates) {
|
||||
static void write_benchfile(statelist_t *candidates)
|
||||
{
|
||||
|
||||
printf("Writing brute force benchmark data...");
|
||||
FILE *benchfile = fopen(TEST_BENCH_FILENAME, "wb");
|
||||
|
@ -314,7 +316,7 @@ bool brute_force_bs(float *bf_rate, statelist_t *candidates, uint32_t cuid, uint
|
|||
uint64_t start_time = msclock();
|
||||
|
||||
#if defined(__linux__) || defined(__APPLE__)
|
||||
if ( NUM_BRUTE_FORCE_THREADS < 0 )
|
||||
if (NUM_BRUTE_FORCE_THREADS < 0)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
|
@ -329,7 +331,7 @@ bool brute_force_bs(float *bf_rate, statelist_t *candidates, uint32_t cuid, uint
|
|||
uint8_t *best_first_bytes;
|
||||
} thread_args[NUM_BRUTE_FORCE_THREADS];
|
||||
|
||||
for (uint32_t i = 0; i < NUM_BRUTE_FORCE_THREADS; i++){
|
||||
for (uint32_t i = 0; i < NUM_BRUTE_FORCE_THREADS; i++) {
|
||||
thread_args[i].thread_ID = i;
|
||||
thread_args[i].silent = silent;
|
||||
thread_args[i].cuid = cuid;
|
||||
|
@ -337,9 +339,9 @@ bool brute_force_bs(float *bf_rate, statelist_t *candidates, uint32_t cuid, uint
|
|||
thread_args[i].maximum_states = maximum_states;
|
||||
thread_args[i].nonces = nonces;
|
||||
thread_args[i].best_first_bytes = best_first_bytes;
|
||||
pthread_create(&threads[i], NULL, crack_states_thread, (void*)&thread_args[i]);
|
||||
pthread_create(&threads[i], NULL, crack_states_thread, (void *)&thread_args[i]);
|
||||
}
|
||||
for (uint32_t i = 0; i < NUM_BRUTE_FORCE_THREADS; i++){
|
||||
for (uint32_t i = 0; i < NUM_BRUTE_FORCE_THREADS; i++) {
|
||||
pthread_join(threads[i], 0);
|
||||
}
|
||||
|
||||
|
@ -348,14 +350,15 @@ bool brute_force_bs(float *bf_rate, statelist_t *candidates, uint32_t cuid, uint
|
|||
if (bf_rate != NULL)
|
||||
*bf_rate = (float)num_keys_tested / ((float)elapsed_time / 1000.0);
|
||||
|
||||
if ( keys_found > 0)
|
||||
if (keys_found > 0)
|
||||
*foundkey = found_bs_key;
|
||||
|
||||
return (keys_found != 0);
|
||||
}
|
||||
|
||||
|
||||
static bool read_bench_data(statelist_t *test_candidates) {
|
||||
static bool read_bench_data(statelist_t *test_candidates)
|
||||
{
|
||||
|
||||
size_t bytes_read = 0;
|
||||
uint32_t temp = 0;
|
||||
|
@ -401,7 +404,7 @@ static bool read_bench_data(statelist_t *test_candidates) {
|
|||
}
|
||||
}
|
||||
for (uint32_t i = states_read; i < TEST_BENCH_SIZE; i++) {
|
||||
test_candidates->states[EVEN_STATE][i] = test_candidates->states[EVEN_STATE][i-states_read];
|
||||
test_candidates->states[EVEN_STATE][i] = test_candidates->states[EVEN_STATE][i - states_read];
|
||||
}
|
||||
for (uint32_t i = states_read; i < num_states; i++) {
|
||||
bytes_read = fread(&temp, 1, sizeof(uint32_t), benchfile);
|
||||
|
@ -418,7 +421,7 @@ static bool read_bench_data(statelist_t *test_candidates) {
|
|||
}
|
||||
}
|
||||
for (uint32_t i = states_read; i < TEST_BENCH_SIZE; i++) {
|
||||
test_candidates->states[ODD_STATE][i] = test_candidates->states[ODD_STATE][i-states_read];
|
||||
test_candidates->states[ODD_STATE][i] = test_candidates->states[ODD_STATE][i - states_read];
|
||||
}
|
||||
|
||||
fclose(benchfile);
|
||||
|
@ -426,17 +429,18 @@ static bool read_bench_data(statelist_t *test_candidates) {
|
|||
}
|
||||
|
||||
|
||||
float brute_force_benchmark() {
|
||||
float brute_force_benchmark()
|
||||
{
|
||||
statelist_t test_candidates[NUM_BRUTE_FORCE_THREADS];
|
||||
|
||||
test_candidates[0].states[ODD_STATE] = malloc((TEST_BENCH_SIZE+1) * sizeof(uint32_t));
|
||||
test_candidates[0].states[EVEN_STATE] = malloc((TEST_BENCH_SIZE+1) * sizeof(uint32_t));
|
||||
for (uint8_t i = 0; i < NUM_BRUTE_FORCE_THREADS - 1; i++){
|
||||
test_candidates[0].states[ODD_STATE] = malloc((TEST_BENCH_SIZE + 1) * sizeof(uint32_t));
|
||||
test_candidates[0].states[EVEN_STATE] = malloc((TEST_BENCH_SIZE + 1) * sizeof(uint32_t));
|
||||
for (uint8_t i = 0; i < NUM_BRUTE_FORCE_THREADS - 1; i++) {
|
||||
test_candidates[i].next = test_candidates + i + 1;
|
||||
test_candidates[i+1].states[ODD_STATE] = test_candidates[0].states[ODD_STATE];
|
||||
test_candidates[i+1].states[EVEN_STATE] = test_candidates[0].states[EVEN_STATE];
|
||||
test_candidates[i + 1].states[ODD_STATE] = test_candidates[0].states[ODD_STATE];
|
||||
test_candidates[i + 1].states[EVEN_STATE] = test_candidates[0].states[EVEN_STATE];
|
||||
}
|
||||
test_candidates[NUM_BRUTE_FORCE_THREADS-1].next = NULL;
|
||||
test_candidates[NUM_BRUTE_FORCE_THREADS - 1].next = NULL;
|
||||
|
||||
if (!read_bench_data(test_candidates)) {
|
||||
PrintAndLogEx(NORMAL, "Couldn't read benchmark data. Assuming brute force rate of %1.0f states per second", DEFAULT_BRUTE_FORCE_RATE);
|
||||
|
@ -450,7 +454,7 @@ float brute_force_benchmark() {
|
|||
test_candidates[i].states[EVEN_STATE][TEST_BENCH_SIZE] = -1;
|
||||
}
|
||||
|
||||
uint64_t maximum_states = TEST_BENCH_SIZE*TEST_BENCH_SIZE*(uint64_t)NUM_BRUTE_FORCE_THREADS;
|
||||
uint64_t maximum_states = TEST_BENCH_SIZE * TEST_BENCH_SIZE * (uint64_t)NUM_BRUTE_FORCE_THREADS;
|
||||
|
||||
float bf_rate;
|
||||
uint64_t found_key = 0;
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
typedef struct {
|
||||
uint32_t *states[2];
|
||||
uint32_t len[2];
|
||||
void* next;
|
||||
void *next;
|
||||
} statelist_t;
|
||||
|
||||
extern void prepare_bf_test_nonces(noncelist_t *nonces, uint8_t best_first_byte);
|
||||
|
|
|
@ -47,12 +47,12 @@ static uint16_t PartialSumProperty(uint32_t state, odd_even_t odd_even)
|
|||
if (odd_even == ODD_STATE) {
|
||||
for (uint16_t i = 0; i < 5; i++) {
|
||||
part_sum ^= filter(st);
|
||||
st = (st << 1) | ((j >> (3-i)) & 0x01) ;
|
||||
st = (st << 1) | ((j >> (3 - i)) & 0x01) ;
|
||||
}
|
||||
part_sum ^= 1; // XOR 1 cancelled out for the other 8 bits
|
||||
} else {
|
||||
for (uint16_t i = 0; i < 4; i++) {
|
||||
st = (st << 1) | ((j >> (3-i)) & 0x01) ;
|
||||
st = (st << 1) | ((j >> (3 - i)) & 0x01) ;
|
||||
part_sum ^= filter(st);
|
||||
}
|
||||
}
|
||||
|
@ -70,25 +70,25 @@ static uint16_t PartialSumProperty(uint32_t state, odd_even_t odd_even)
|
|||
|
||||
static inline void clear_bitarray24(uint32_t *bitarray)
|
||||
{
|
||||
memset(bitarray, 0x00, sizeof(uint32_t) * (1<<19));
|
||||
memset(bitarray, 0x00, sizeof(uint32_t) * (1 << 19));
|
||||
}
|
||||
|
||||
|
||||
static inline uint32_t test_bit24(uint32_t *bitarray, uint32_t index)
|
||||
{
|
||||
return bitarray[index>>5] & (0x80000000>>(index&0x0000001f));
|
||||
return bitarray[index >> 5] & (0x80000000 >> (index & 0x0000001f));
|
||||
}
|
||||
|
||||
|
||||
static inline void set_bit24(uint32_t *bitarray, uint32_t index)
|
||||
{
|
||||
bitarray[index>>5] |= 0x80000000>>(index&0x0000001f);
|
||||
bitarray[index >> 5] |= 0x80000000 >> (index & 0x0000001f);
|
||||
}
|
||||
|
||||
|
||||
static inline uint32_t next_state(uint32_t *bitset, uint32_t state)
|
||||
{
|
||||
if (++state == 1<<24) return 1<<24;
|
||||
if (++state == 1 << 24) return 1 << 24;
|
||||
uint32_t index = state >> 5;
|
||||
uint_fast8_t bit = state & 0x1f;
|
||||
uint32_t line = bitset[index] << bit;
|
||||
|
@ -99,11 +99,11 @@ static inline uint32_t next_state(uint32_t *bitset, uint32_t state)
|
|||
line <<= 1;
|
||||
}
|
||||
index++;
|
||||
while (bitset[index] == 0x00000000 && state < 1<<24) {
|
||||
while (bitset[index] == 0x00000000 && state < 1 << 24) {
|
||||
index++;
|
||||
state += 0x20;
|
||||
}
|
||||
if (state >= 1<<24) return 1<<24;
|
||||
if (state >= 1 << 24) return 1 << 24;
|
||||
#if defined __GNUC__
|
||||
return state + __builtin_clz(bitset[index]);
|
||||
#else
|
||||
|
@ -115,14 +115,14 @@ static inline uint32_t next_state(uint32_t *bitset, uint32_t state)
|
|||
bit++;
|
||||
line <<= 1;
|
||||
}
|
||||
return 1<<24;
|
||||
return 1 << 24;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline uint32_t next_not_state(uint32_t *bitset, uint32_t state)
|
||||
{
|
||||
if (++state == 1<<24) return 1<<24;
|
||||
if (++state == 1 << 24) return 1 << 24;
|
||||
uint32_t index = state >> 5;
|
||||
uint_fast8_t bit = state & 0x1f;
|
||||
uint32_t line = bitset[index] << bit;
|
||||
|
@ -133,11 +133,11 @@ static inline uint32_t next_not_state(uint32_t *bitset, uint32_t state)
|
|||
line <<= 1;
|
||||
}
|
||||
index++;
|
||||
while (bitset[index] == 0xffffffff && state < 1<<24) {
|
||||
while (bitset[index] == 0xffffffff && state < 1 << 24) {
|
||||
index++;
|
||||
state += 0x20;
|
||||
}
|
||||
if (state >= 1<<24) return 1<<24;
|
||||
if (state >= 1 << 24) return 1 << 24;
|
||||
#if defined __GNUC__
|
||||
return state + __builtin_clz(~bitset[index]);
|
||||
#else
|
||||
|
@ -149,7 +149,7 @@ static inline uint32_t next_not_state(uint32_t *bitset, uint32_t state)
|
|||
bit++;
|
||||
line <<= 1;
|
||||
}
|
||||
return 1<<24;
|
||||
return 1 << 24;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -169,7 +169,7 @@ static inline uint32_t bitcount(uint32_t a)
|
|||
static inline uint32_t count_states(uint32_t *bitset)
|
||||
{
|
||||
uint32_t count = 0;
|
||||
for (uint32_t i = 0; i < (1<<19); i++) {
|
||||
for (uint32_t i = 0; i < (1 << 19); i++) {
|
||||
count += bitcount(bitset[i]);
|
||||
}
|
||||
return count;
|
||||
|
@ -182,7 +182,7 @@ static void write_bitflips_file(odd_even_t odd_even, uint16_t bitflip, int sum_a
|
|||
sprintf(filename, "bitflip_%d_%03" PRIx16 "_sum%d_states.bin", odd_even, bitflip, sum_a0);
|
||||
FILE *outfile = fopen(filename, "wb");
|
||||
fwrite(&count, 1, sizeof(count), outfile);
|
||||
fwrite(bitset, 1, sizeof(uint32_t)*(1<<19), outfile);
|
||||
fwrite(bitset, 1, sizeof(uint32_t) * (1 << 19), outfile);
|
||||
fclose(outfile);
|
||||
}
|
||||
|
||||
|
@ -194,7 +194,7 @@ static void init_part_sum_bitarrays(void)
|
|||
printf("init_part_sum_bitarrays()...");
|
||||
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
|
||||
for (uint16_t part_sum_a0 = 0; part_sum_a0 < NUM_PART_SUMS; part_sum_a0++) {
|
||||
part_sum_a0_bitarrays[odd_even][part_sum_a0] = (uint32_t *)malloc_bitarray(sizeof(uint32_t) * (1<<19));
|
||||
part_sum_a0_bitarrays[odd_even][part_sum_a0] = (uint32_t *)malloc_bitarray(sizeof(uint32_t) * (1 << 19));
|
||||
if (part_sum_a0_bitarrays[odd_even][part_sum_a0] == NULL) {
|
||||
printf("Out of memory error in init_part_suma0_statelists(). Aborting...\n");
|
||||
exit(4);
|
||||
|
@ -204,10 +204,10 @@ static void init_part_sum_bitarrays(void)
|
|||
}
|
||||
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
|
||||
//printf("(%d, %" PRIu16 ")...", odd_even, part_sum_a0);
|
||||
for (uint32_t state = 0; state < (1<<20); state++) {
|
||||
for (uint32_t state = 0; state < (1 << 20); state++) {
|
||||
uint16_t part_sum_a0 = PartialSumProperty(state, odd_even) / 2;
|
||||
for (uint16_t low_bits = 0; low_bits < 1<<4; low_bits++) {
|
||||
set_bit24(part_sum_a0_bitarrays[odd_even][part_sum_a0], state<<4 | low_bits);
|
||||
for (uint16_t low_bits = 0; low_bits < 1 << 4; low_bits++) {
|
||||
set_bit24(part_sum_a0_bitarrays[odd_even][part_sum_a0], state << 4 | low_bits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -218,10 +218,10 @@ static void init_part_sum_bitarrays(void)
|
|||
static void free_part_sum_bitarrays(void)
|
||||
{
|
||||
printf("free_part_sum_bitarrays()...");
|
||||
for (int16_t part_sum_a0 = (NUM_PART_SUMS-1); part_sum_a0 >= 0; part_sum_a0--) {
|
||||
for (int16_t part_sum_a0 = (NUM_PART_SUMS - 1); part_sum_a0 >= 0; part_sum_a0--) {
|
||||
free_bitarray(part_sum_a0_bitarrays[ODD_STATE][part_sum_a0]);
|
||||
}
|
||||
for (int16_t part_sum_a0 = (NUM_PART_SUMS-1); part_sum_a0 >= 0; part_sum_a0--) {
|
||||
for (int16_t part_sum_a0 = (NUM_PART_SUMS - 1); part_sum_a0 >= 0; part_sum_a0--) {
|
||||
free_bitarray(part_sum_a0_bitarrays[EVEN_STATE][part_sum_a0]);
|
||||
}
|
||||
printf("done.\n");
|
||||
|
@ -233,7 +233,7 @@ void init_sum_bitarray(uint16_t sum_a0)
|
|||
{
|
||||
printf("init_sum_bitarray()...\n");
|
||||
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
|
||||
sum_a0_bitarray[odd_even] = (uint32_t *)malloc_bitarray(sizeof(uint32_t) * (1<<19));
|
||||
sum_a0_bitarray[odd_even] = (uint32_t *)malloc_bitarray(sizeof(uint32_t) * (1 << 19));
|
||||
if (sum_a0_bitarray[odd_even] == NULL) {
|
||||
printf("Out of memory error in init_sum_bitarrays(). Aborting...\n");
|
||||
exit(4);
|
||||
|
@ -242,8 +242,8 @@ void init_sum_bitarray(uint16_t sum_a0)
|
|||
}
|
||||
for (uint8_t p = 0; p < NUM_PART_SUMS; p++) {
|
||||
for (uint8_t q = 0; q < NUM_PART_SUMS; q++) {
|
||||
if (sum_a0 == 2*p*(16-2*q) + (16-2*p)*2*q) {
|
||||
for (uint32_t i = 0; i < (1<<19); i++) {
|
||||
if (sum_a0 == 2 * p * (16 - 2 * q) + (16 - 2 * p) * 2 * q) {
|
||||
for (uint32_t i = 0; i < (1 << 19); i++) {
|
||||
sum_a0_bitarray[EVEN_STATE][i] |= part_sum_a0_bitarrays[EVEN_STATE][q][i];
|
||||
sum_a0_bitarray[ODD_STATE][i] |= part_sum_a0_bitarrays[ODD_STATE][p][i];
|
||||
}
|
||||
|
@ -252,7 +252,7 @@ void init_sum_bitarray(uint16_t sum_a0)
|
|||
}
|
||||
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
|
||||
uint32_t count = count_states(sum_a0_bitarray[odd_even]);
|
||||
printf("sum_a0_bitarray[%s] has %d states (%5.2f%%)\n", odd_even==EVEN_STATE?"even":"odd ", count, (float)count/(1<<24)*100.0);
|
||||
printf("sum_a0_bitarray[%s] has %d states (%5.2f%%)\n", odd_even == EVEN_STATE ? "even" : "odd ", count, (float)count / (1 << 24) * 100.0);
|
||||
}
|
||||
printf("done.\n");
|
||||
}
|
||||
|
@ -270,11 +270,11 @@ static void free_sum_bitarray(void)
|
|||
static void precalculate_bit0_bitflip_bitarrays(uint8_t const bitflip, uint16_t const sum_a0)
|
||||
{
|
||||
// #define TEST_RUN
|
||||
#ifdef TEST_RUN
|
||||
#define NUM_TEST_STATES (1<<10)
|
||||
#else
|
||||
#define NUM_TEST_STATES (1<<23)
|
||||
#endif
|
||||
#ifdef TEST_RUN
|
||||
#define NUM_TEST_STATES (1<<10)
|
||||
#else
|
||||
#define NUM_TEST_STATES (1<<23)
|
||||
#endif
|
||||
|
||||
time_t start_time = time(NULL);
|
||||
time_t last_check_time = start_time;
|
||||
|
@ -282,14 +282,14 @@ static void precalculate_bit0_bitflip_bitarrays(uint8_t const bitflip, uint16_t
|
|||
uint32_t *restrict test_bitarray[2];
|
||||
uint32_t *restrict test_not_bitarray[2];
|
||||
|
||||
test_bitarray[EVEN_STATE] = malloc_bitarray(sizeof(uint32_t) * (1<<19));
|
||||
test_bitarray[EVEN_STATE] = malloc_bitarray(sizeof(uint32_t) * (1 << 19));
|
||||
clear_bitarray24(test_bitarray[EVEN_STATE]);
|
||||
test_bitarray[ODD_STATE] = malloc_bitarray(sizeof(uint32_t) * (1<<19));
|
||||
test_bitarray[ODD_STATE] = malloc_bitarray(sizeof(uint32_t) * (1 << 19));
|
||||
clear_bitarray24(test_bitarray[ODD_STATE]);
|
||||
|
||||
test_not_bitarray[EVEN_STATE] = malloc_bitarray(sizeof(uint32_t) * (1<<19));
|
||||
test_not_bitarray[EVEN_STATE] = malloc_bitarray(sizeof(uint32_t) * (1 << 19));
|
||||
clear_bitarray24(test_not_bitarray[EVEN_STATE]);
|
||||
test_not_bitarray[ODD_STATE] = malloc_bitarray(sizeof(uint32_t) * (1<<19));
|
||||
test_not_bitarray[ODD_STATE] = malloc_bitarray(sizeof(uint32_t) * (1 << 19));
|
||||
clear_bitarray24(test_not_bitarray[ODD_STATE]);
|
||||
|
||||
uint32_t count[2];
|
||||
|
@ -299,13 +299,13 @@ static void precalculate_bit0_bitflip_bitarrays(uint8_t const bitflip, uint16_t
|
|||
for (uint32_t even_state = next_state(sum_a0_bitarray[EVEN_STATE], -1); even_state < NUM_TEST_STATES; even_state = next_state(sum_a0_bitarray[EVEN_STATE], even_state)) {
|
||||
bool even_state_is_possible = false;
|
||||
time_t time_now = time(NULL);
|
||||
if (difftime(time_now, last_check_time) > 5*60) { // print status every 5 minutes
|
||||
if (difftime(time_now, last_check_time) > 5 * 60) { // print status every 5 minutes
|
||||
float runtime = difftime(time_now, start_time);
|
||||
float remaining_time = runtime * ((1<<23) - even_state) / even_state;
|
||||
printf("\n%1.1f hours elapsed, expected completion in %1.1f hours (%1.1f days)", runtime/3600, remaining_time/3600, remaining_time/3600/24);
|
||||
float remaining_time = runtime * ((1 << 23) - even_state) / even_state;
|
||||
printf("\n%1.1f hours elapsed, expected completion in %1.1f hours (%1.1f days)", runtime / 3600, remaining_time / 3600, remaining_time / 3600 / 24);
|
||||
last_check_time = time_now;
|
||||
}
|
||||
for (uint32_t odd_state = next_state(sum_a0_bitarray[ODD_STATE], -1); odd_state < (1<<24); odd_state = next_state(test_bitarray[ODD_STATE], odd_state)) {
|
||||
for (uint32_t odd_state = next_state(sum_a0_bitarray[ODD_STATE], -1); odd_state < (1 << 24); odd_state = next_state(test_bitarray[ODD_STATE], odd_state)) {
|
||||
if (even_state_is_possible && test_bit24(test_bitarray[ODD_STATE], odd_state)) continue;
|
||||
// load crypto1 state
|
||||
struct Crypto1State cs;
|
||||
|
@ -363,47 +363,47 @@ static void precalculate_bit0_bitflip_bitarrays(uint8_t const bitflip, uint16_t
|
|||
printf("\nAnalysis completed. Checking for effective bitflip properties...\n");
|
||||
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
|
||||
count[odd_even] = count_states(test_bitarray[odd_even]);
|
||||
if (count[odd_even] != 1<<24) {
|
||||
if (count[odd_even] != 1 << 24) {
|
||||
printf("Writing %d possible %s states for bitflip property %03x (%d (%1.2f%%) states eliminated)\n",
|
||||
count[odd_even],
|
||||
odd_even==EVEN_STATE?"even":"odd",
|
||||
bitflip, (1<<24) - count[odd_even],
|
||||
(float)((1<<24) - count[odd_even]) / (1<<24) * 100.0);
|
||||
#ifndef TEST_RUN
|
||||
count[odd_even],
|
||||
odd_even == EVEN_STATE ? "even" : "odd",
|
||||
bitflip, (1 << 24) - count[odd_even],
|
||||
(float)((1 << 24) - count[odd_even]) / (1 << 24) * 100.0);
|
||||
#ifndef TEST_RUN
|
||||
write_bitflips_file(odd_even, bitflip, sum_a0, test_bitarray[odd_even], count[odd_even]);
|
||||
#endif
|
||||
#endif
|
||||
} else {
|
||||
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip);
|
||||
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even == EVEN_STATE ? "even" : "odd", bitflip);
|
||||
}
|
||||
}
|
||||
uint32_t *restrict test_bitarray_2nd = malloc_bitarray(sizeof(uint32_t) * (1<<19));
|
||||
uint32_t *restrict test_bitarray_2nd = malloc_bitarray(sizeof(uint32_t) * (1 << 19));
|
||||
clear_bitarray24(test_bitarray_2nd);
|
||||
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
|
||||
if (count[odd_even] != 1<<24) {
|
||||
for (uint32_t state = 0; state < (1<<24); state += 1<<4) {
|
||||
uint32_t line = test_bitarray[odd_even][state>>5];
|
||||
uint16_t half_line = state&0x000000010 ? line&0x0000ffff : line>>16;
|
||||
if (count[odd_even] != 1 << 24) {
|
||||
for (uint32_t state = 0; state < (1 << 24); state += 1 << 4) {
|
||||
uint32_t line = test_bitarray[odd_even][state >> 5];
|
||||
uint16_t half_line = state & 0x000000010 ? line & 0x0000ffff : line >> 16;
|
||||
if (half_line != 0) {
|
||||
for (uint32_t low_bits = 0; low_bits < (1<<4); low_bits++) {
|
||||
for (uint32_t low_bits = 0; low_bits < (1 << 4); low_bits++) {
|
||||
set_bit24(test_bitarray_2nd, low_bits << 20 | state >> 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
count[odd_even] = count_states(test_bitarray_2nd);
|
||||
if (count[odd_even] != 1<<24) {
|
||||
if (count[odd_even] != 1 << 24) {
|
||||
printf("Writing %d possible %s states for bitflip property %03x (%d (%1.2f%%) states eliminated)\n",
|
||||
count[odd_even],
|
||||
odd_even==EVEN_STATE?"even":"odd",
|
||||
bitflip | BITFLIP_2ND_BYTE, (1<<24) - count[odd_even],
|
||||
(float)((1<<24) - count[odd_even]) / (1<<24) * 100.0);
|
||||
#ifndef TEST_RUN
|
||||
count[odd_even],
|
||||
odd_even == EVEN_STATE ? "even" : "odd",
|
||||
bitflip | BITFLIP_2ND_BYTE, (1 << 24) - count[odd_even],
|
||||
(float)((1 << 24) - count[odd_even]) / (1 << 24) * 100.0);
|
||||
#ifndef TEST_RUN
|
||||
write_bitflips_file(odd_even, bitflip | BITFLIP_2ND_BYTE, sum_a0, test_bitarray_2nd, count[odd_even]);
|
||||
#endif
|
||||
#endif
|
||||
} else {
|
||||
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip | BITFLIP_2ND_BYTE);
|
||||
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even == EVEN_STATE ? "even" : "odd", bitflip | BITFLIP_2ND_BYTE);
|
||||
}
|
||||
} else {
|
||||
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip | BITFLIP_2ND_BYTE);
|
||||
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even == EVEN_STATE ? "even" : "odd", bitflip | BITFLIP_2ND_BYTE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -415,13 +415,13 @@ static void precalculate_bit0_bitflip_bitarrays(uint8_t const bitflip, uint16_t
|
|||
for (uint32_t even_state = next_state(sum_a0_bitarray[EVEN_STATE], -1); even_state < NUM_TEST_STATES; even_state = next_state(sum_a0_bitarray[EVEN_STATE], even_state)) {
|
||||
bool even_state_is_possible = test_bit24(test_not_bitarray[EVEN_STATE], even_state);
|
||||
time_t time_now = time(NULL);
|
||||
if (difftime(time_now, last_check_time) > 5*60) { // print status every 5 minutes
|
||||
if (difftime(time_now, last_check_time) > 5 * 60) { // print status every 5 minutes
|
||||
float runtime = difftime(time_now, start_time);
|
||||
float remaining_time = runtime * ((1<<23) - even_state) / even_state;
|
||||
printf("\n%1.1f hours elapsed, expected completion in %1.1f hours (%1.1f days)", runtime/3600, remaining_time/3600, remaining_time/3600/24);
|
||||
float remaining_time = runtime * ((1 << 23) - even_state) / even_state;
|
||||
printf("\n%1.1f hours elapsed, expected completion in %1.1f hours (%1.1f days)", runtime / 3600, remaining_time / 3600, remaining_time / 3600 / 24);
|
||||
last_check_time = time_now;
|
||||
}
|
||||
for (uint32_t odd_state = next_state(sum_a0_bitarray[ODD_STATE], -1); odd_state < (1<<24); odd_state = next_state(sum_a0_bitarray[ODD_STATE], odd_state)) {
|
||||
for (uint32_t odd_state = next_state(sum_a0_bitarray[ODD_STATE], -1); odd_state < (1 << 24); odd_state = next_state(sum_a0_bitarray[ODD_STATE], odd_state)) {
|
||||
if (even_state_is_possible) {
|
||||
if (all_odd_states_are_possible_for_notbitflip) break;
|
||||
if (test_bit24(test_not_bitarray[ODD_STATE], odd_state)) continue;
|
||||
|
@ -446,7 +446,7 @@ static void precalculate_bit0_bitflip_bitarrays(uint8_t const bitflip, uint16_t
|
|||
for (int i = 0; i < 9; i++) {
|
||||
uint_fast8_t keystream_bit = filter(cs.odd & 0x000fffff) ^ filter((cs.odd & 0x000fffff) ^ cs_delta.odd);
|
||||
keystream = keystream << 1 | keystream_bit;
|
||||
uint_fast8_t nt_bit = BIT(bitflip|0x100, i) ^ keystream_bit;
|
||||
uint_fast8_t nt_bit = BIT(bitflip | 0x100, i) ^ keystream_bit;
|
||||
uint_fast8_t LSFR_feedback = BIT(cs_delta.odd, 2) ^ BIT(cs_delta.even, 2) ^ BIT(cs_delta.odd, 3);
|
||||
|
||||
cs_delta.even = cs_delta.even << 1 | (LSFR_feedback ^ nt_bit);
|
||||
|
@ -475,47 +475,47 @@ static void precalculate_bit0_bitflip_bitarrays(uint8_t const bitflip, uint16_t
|
|||
printf("\nAnalysis completed. Checking for effective !bitflip properties...\n");
|
||||
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
|
||||
count[odd_even] = count_states(test_not_bitarray[odd_even]);
|
||||
if (count[odd_even] != 1<<24) {
|
||||
if (count[odd_even] != 1 << 24) {
|
||||
printf("Writing %d possible %s states for bitflip property %03x (%d (%1.2f%%) states eliminated)\n",
|
||||
count[odd_even],
|
||||
odd_even==EVEN_STATE?"even":"odd",
|
||||
bitflip|0x100, (1<<24) - count[odd_even],
|
||||
(float)((1<<24) - count[odd_even]) / (1<<24) * 100.0);
|
||||
#ifndef TEST_RUN
|
||||
write_bitflips_file(odd_even, bitflip|0x100, sum_a0, test_not_bitarray[odd_even], count[odd_even]);
|
||||
#endif
|
||||
count[odd_even],
|
||||
odd_even == EVEN_STATE ? "even" : "odd",
|
||||
bitflip | 0x100, (1 << 24) - count[odd_even],
|
||||
(float)((1 << 24) - count[odd_even]) / (1 << 24) * 100.0);
|
||||
#ifndef TEST_RUN
|
||||
write_bitflips_file(odd_even, bitflip | 0x100, sum_a0, test_not_bitarray[odd_even], count[odd_even]);
|
||||
#endif
|
||||
} else {
|
||||
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip|0x100);
|
||||
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even == EVEN_STATE ? "even" : "odd", bitflip | 0x100);
|
||||
}
|
||||
}
|
||||
|
||||
clear_bitarray24(test_bitarray_2nd);
|
||||
for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
|
||||
if (count[odd_even] != 1<<24) {
|
||||
for (uint32_t state = 0; state < (1<<24); state += 1<<4) {
|
||||
uint32_t line = test_not_bitarray[odd_even][state>>5];
|
||||
uint16_t half_line = state&0x000000010 ? line&0x0000ffff : line>>16;
|
||||
if (count[odd_even] != 1 << 24) {
|
||||
for (uint32_t state = 0; state < (1 << 24); state += 1 << 4) {
|
||||
uint32_t line = test_not_bitarray[odd_even][state >> 5];
|
||||
uint16_t half_line = state & 0x000000010 ? line & 0x0000ffff : line >> 16;
|
||||
if (half_line != 0) {
|
||||
for (uint32_t low_bits = 0; low_bits < (1<<4); low_bits++) {
|
||||
for (uint32_t low_bits = 0; low_bits < (1 << 4); low_bits++) {
|
||||
set_bit24(test_bitarray_2nd, low_bits << 20 | state >> 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
count[odd_even] = count_states(test_bitarray_2nd);
|
||||
if (count[odd_even] != 1<<24) {
|
||||
if (count[odd_even] != 1 << 24) {
|
||||
printf("Writing %d possible %s states for bitflip property %03x (%d (%1.2f%%) states eliminated)\n",
|
||||
count[odd_even],
|
||||
odd_even==EVEN_STATE?"even":"odd",
|
||||
bitflip | 0x100| BITFLIP_2ND_BYTE, (1<<24) - count[odd_even],
|
||||
(float)((1<<24) - count[odd_even]) / (1<<24) * 100.0);
|
||||
#ifndef TEST_RUN
|
||||
count[odd_even],
|
||||
odd_even == EVEN_STATE ? "even" : "odd",
|
||||
bitflip | 0x100 | BITFLIP_2ND_BYTE, (1 << 24) - count[odd_even],
|
||||
(float)((1 << 24) - count[odd_even]) / (1 << 24) * 100.0);
|
||||
#ifndef TEST_RUN
|
||||
write_bitflips_file(odd_even, bitflip | 0x100 | BITFLIP_2ND_BYTE, sum_a0, test_bitarray_2nd, count[odd_even]);
|
||||
#endif
|
||||
#endif
|
||||
} else {
|
||||
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip | 0x100 | BITFLIP_2ND_BYTE);
|
||||
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even == EVEN_STATE ? "even" : "odd", bitflip | 0x100 | BITFLIP_2ND_BYTE);
|
||||
}
|
||||
} else {
|
||||
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even==EVEN_STATE?"even":"odd", bitflip | 0x100 | BITFLIP_2ND_BYTE);
|
||||
printf("All %s states for bitflip property %03x are possible. No file written.\n", odd_even == EVEN_STATE ? "even" : "odd", bitflip | 0x100 | BITFLIP_2ND_BYTE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -529,7 +529,8 @@ static void precalculate_bit0_bitflip_bitarrays(uint8_t const bitflip, uint16_t
|
|||
}
|
||||
|
||||
|
||||
int main (int argc, char *argv[]) {
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
unsigned int bitflip_in;
|
||||
int sum_a0;
|
||||
|
@ -544,14 +545,14 @@ int main (int argc, char *argv[]) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
sscanf(argv[1],"%x", &bitflip_in);
|
||||
sscanf(argv[1], "%x", &bitflip_in);
|
||||
|
||||
if (bitflip_in > 255) {
|
||||
printf("Bitflip property must be less than or equal to 0xff\n\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(argc == 3) {
|
||||
if (argc == 3) {
|
||||
sscanf(argv[2], "%d", &sum_a0);
|
||||
}
|
||||
|
||||
|
@ -574,8 +575,10 @@ int main (int argc, char *argv[]) {
|
|||
case 192:
|
||||
case 200:
|
||||
case 224:
|
||||
case 256: break;
|
||||
default: sum_a0 = -1;
|
||||
case 256:
|
||||
break;
|
||||
default:
|
||||
sum_a0 = -1;
|
||||
}
|
||||
|
||||
printf("Calculating for bitflip = %02x, sum_a0 = %d\n", bitflip_in, sum_a0);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue