ADD: added @azcid 's bitsliced BF solver for @piwi 's hardnested command. Awsume work! The original patch demanded some tweaking to work in mingw.

This is not tested for other systems so far.
This commit is contained in:
iceman1001 2016-04-21 10:26:00 +02:00
commit 3130ba4b21
4 changed files with 590 additions and 78 deletions

View file

@ -8,13 +8,14 @@ include ../common/Makefile.common
CC = gcc CC = gcc
CXX = g++ CXX = g++
#COMMON_FLAGS = -m32 #COMMON_FLAGS = -m32
VPATH = ../common ../zlib COMMON_FLAGS = -std=c99 -O3 -mpopcnt -march=native
#VPATH = ../common ../zlib
OBJDIR = obj OBJDIR = obj
LDLIBS = -L/opt/local/lib -L/usr/local/lib -lreadline -lpthread -lm LDLIBS = -L/opt/local/lib -L/usr/local/lib -lreadline -lpthread -lm
LUALIB = ../liblua/liblua.a LUALIB = ../liblua/liblua.a
LDFLAGS = $(COMMON_FLAGS) #LDFLAGS = $(COMMON_FLAGS)
CFLAGS = -std=c99 -I. -I../include -I../common -I../zlib -I/opt/local/include -I../liblua -Wall $(COMMON_FLAGS) -g -O3 CFLAGS = $(COMMON_FLAGS) -I. -I../include -I../common -I../zlib -I/opt/local/include -I../liblua -Wall -g
LUAPLATFORM = generic LUAPLATFORM = generic
ifneq (,$(findstring MINGW,$(platform))) ifneq (,$(findstring MINGW,$(platform)))
@ -69,17 +70,18 @@ CORESRCS = uart.c \
sleep.c sleep.c
CMDSRCS = nonce2key/crapto1.c\ CMDSRCS = nonce2key/crapto1.c \
nonce2key/crypto1.c\ nonce2key/crypto1.c \
nonce2key/nonce2key.c\ nonce2key/nonce2key.c \
nonce2key/crypto1_bs.c \
loclass/cipher.c \ loclass/cipher.c \
loclass/cipherutils.c \ loclass/cipherutils.c \
loclass/des.c \ loclass/des.c \
loclass/ikeys.c \ loclass/ikeys.c \
loclass/elite_crack.c\ loclass/elite_crack.c \
loclass/fileutils.c\ loclass/fileutils.c \
mifarehost.c\ mifarehost.c \
parity.c\ parity.c \
crc.c \ crc.c \
crc16.c \ crc16.c \
crc64.c \ crc64.c \
@ -113,30 +115,30 @@ CMDSRCS = nonce2key/crapto1.c\
cmdparser.c \ cmdparser.c \
cmdmain.c \ cmdmain.c \
cmdlft55xx.c \ cmdlft55xx.c \
cmdlfpcf7931.c\ cmdlfpcf7931.c \
cmdlfviking.c\ cmdlfviking.c \
cmdlfpresco.c\ cmdlfpresco.c \
cmdlfpyramid.c\ cmdlfpyramid.c \
cmdlfguard.c\ cmdlfguard.c \
pm3_binlib.c\ pm3_binlib.c \
scripting.c\ scripting.c \
cmdscript.c\ cmdscript.c \
pm3_bitlib.c\ pm3_bitlib.c \
aes.c\ aes.c \
protocols.c\ protocols.c \
sha1.c\ sha1.c \
sha256.c\ sha256.c \
cmdcrc.c\ cmdcrc.c \
reveng/preset.c\ reveng/preset.c \
reveng/reveng.c\ reveng/reveng.c \
reveng/cli.c\ reveng/cli.c \
reveng/bmpbit.c\ reveng/bmpbit.c \
reveng/model.c\ reveng/model.c \
reveng/poly.c\ reveng/poly.c \
reveng/getopt.c\ reveng/getopt.c \
tea.c\ tea.c \
prng.c\ prng.c \
radixsort.c\ radixsort.c \
bucketsort.c bucketsort.c
ZLIBSRCS = deflate.c adler32.c trees.c zutil.c inflate.c inffast.c inftrees.c ZLIBSRCS = deflate.c adler32.c trees.c zutil.c inflate.c inffast.c inftrees.c
ZLIB_FLAGS = -DZ_SOLO -DZ_PREFIX -DNO_GZIP -DZLIB_PM3_TUNED ZLIB_FLAGS = -DZ_SOLO -DZ_PREFIX -DNO_GZIP -DZLIB_PM3_TUNED
@ -147,13 +149,13 @@ CMDOBJS = $(CMDSRCS:%.c=$(OBJDIR)/%.o)
ZLIBOBJS = $(ZLIBSRCS:%.c=$(OBJDIR)/%.o) ZLIBOBJS = $(ZLIBSRCS:%.c=$(OBJDIR)/%.o)
RM = rm -f RM = rm -f
BINS = proxmark3 flasher fpga_compress #snooper cli BINS = proxmark3 flasher fpga_compress
CLEAN = cli cli.exe flasher flasher.exe proxmark3 proxmark3.exe fpga_compress fpga_compress.exe snooper snooper.exe $(CMDOBJS) $(OBJDIR)/*.o *.o *.moc.cpp CLEAN = cli cli.exe flasher flasher.exe proxmark3 proxmark3.exe fpga_compress fpga_compress.exe snooper snooper.exe $(CMDOBJS) $(OBJDIR)/*.o *.o *.moc.cpp
all: lua_build $(BINS) all: lua_build $(BINS)
all-static: LDLIBS:=-static $(LDLIBS) all-static: LDLIBS:=-static $(LDLIBS)
all-static: snooper cli flasher fpga_compress all-static: $(BINS)
proxmark3: LDLIBS+=$(LUALIB) $(QTLDLIBS) proxmark3: LDLIBS+=$(LUALIB) $(QTLDLIBS)
proxmark3: $(OBJDIR)/proxmark3.o $(COREOBJS) $(CMDOBJS) $(QTGUI) proxmark3: $(OBJDIR)/proxmark3.o $(COREOBJS) $(CMDOBJS) $(QTGUI)

View file

@ -1,6 +1,6 @@
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// Copyright (C) 2015 piwi // Copyright (C) 2015 piwi
// // fiddled with 2016 Azcid (hardnested bitsliced Bruteforce imp)
// This code is licensed to you under the terms of the GNU GPL, version 2 or, // This code is licensed to you under the terms of the GNU GPL, version 2 or,
// at your option, any later version. See the LICENSE.txt file for the text of // at your option, any later version. See the LICENSE.txt file for the text of
// the license. // the license.
@ -14,8 +14,8 @@
// Computer and Communications Security, 2015 // Computer and Communications Security, 2015
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h>
#include <string.h> #include <string.h>
#include <pthread.h> #include <pthread.h>
#include <locale.h> #include <locale.h>
@ -25,14 +25,19 @@
#include "ui.h" #include "ui.h"
#include "util.h" #include "util.h"
#include "nonce2key/crapto1.h" #include "nonce2key/crapto1.h"
#include "nonce2key/crypto1_bs.h"
#include "parity.h" #include "parity.h"
#ifdef __WIN32
#include <windows.h>
#endif
#include <malloc.h>
#include <assert.h>
// uint32_t test_state_odd = 0; // uint32_t test_state_odd = 0;
// uint32_t test_state_even = 0; // uint32_t test_state_even = 0;
#define CONFIDENCE_THRESHOLD 0.95 // Collect nonces until we are certain enough that the following brute force is successfull #define CONFIDENCE_THRESHOLD 0.95 // Collect nonces until we are certain enough that the following brute force is successfull
#define GOOD_BYTES_REQUIRED 30 #define GOOD_BYTES_REQUIRED 28
static const float p_K[257] = { // the probability that a random nonce has a Sum Property == K static const float p_K[257] = { // the probability that a random nonce has a Sum Property == K
0.0290, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0290, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
@ -88,6 +93,8 @@ typedef struct noncelist {
} noncelist_t; } noncelist_t;
static size_t nonces_to_bruteforce = 0;
static noncelistentry_t *brute_force_nonces[256];
static uint32_t cuid = 0; static uint32_t cuid = 0;
static noncelist_t nonces[256]; static noncelist_t nonces[256];
static uint8_t best_first_bytes[256]; static uint8_t best_first_bytes[256];
@ -169,6 +176,11 @@ static int add_nonce(uint32_t nonce_enc, uint8_t par_enc)
p2->nonce_enc = nonce_enc; p2->nonce_enc = nonce_enc;
p2->par_enc = par_enc; p2->par_enc = par_enc;
if(nonces_to_bruteforce < 256){
brute_force_nonces[nonces_to_bruteforce] = p2;
nonces_to_bruteforce++;
}
nonces[first_byte].num++; nonces[first_byte].num++;
nonces[first_byte].Sum += evenparity32((nonce_enc & 0x00ff0000) | (par_enc & 0x04)); nonces[first_byte].Sum += evenparity32((nonce_enc & 0x00ff0000) | (par_enc & 0x04));
nonces[first_byte].updated = true; // indicates that we need to recalculate the Sum(a8) probability for this first byte nonces[first_byte].updated = true; // indicates that we need to recalculate the Sum(a8) probability for this first byte
@ -176,7 +188,6 @@ static int add_nonce(uint32_t nonce_enc, uint8_t par_enc)
return (1); // new nonce added return (1); // new nonce added
} }
static void init_nonce_memory(void) static void init_nonce_memory(void)
{ {
for (uint16_t i = 0; i < 256; i++) { for (uint16_t i = 0; i < 256; i++) {
@ -203,7 +214,6 @@ static void free_nonce_list(noncelistentry_t *p)
} }
} }
static void free_nonces_memory(void) static void free_nonces_memory(void)
{ {
for (uint16_t i = 0; i < 256; i++) { for (uint16_t i = 0; i < 256; i++) {
@ -211,7 +221,6 @@ static void free_nonces_memory(void)
} }
} }
static uint16_t PartialSumProperty(uint32_t state, odd_even_t odd_even) static uint16_t PartialSumProperty(uint32_t state, odd_even_t odd_even)
{ {
uint16_t sum = 0; uint16_t sum = 0;
@ -235,7 +244,6 @@ static uint16_t PartialSumProperty(uint32_t state, odd_even_t odd_even)
return sum; return sum;
} }
// static uint16_t SumProperty(struct Crypto1State *s) // static uint16_t SumProperty(struct Crypto1State *s)
// { // {
// uint16_t sum_odd = PartialSumProperty(s->odd, ODD_STATE); // uint16_t sum_odd = PartialSumProperty(s->odd, ODD_STATE);
@ -243,7 +251,6 @@ static uint16_t PartialSumProperty(uint32_t state, odd_even_t odd_even)
// return (sum_odd*(16-sum_even) + (16-sum_odd)*sum_even); // return (sum_odd*(16-sum_even) + (16-sum_odd)*sum_even);
// } // }
static double p_hypergeometric(uint16_t N, uint16_t K, uint16_t n, uint16_t k) static double p_hypergeometric(uint16_t N, uint16_t K, uint16_t n, uint16_t k)
{ {
// for efficient computation we are using the recursive definition // for efficient computation we are using the recursive definition
@ -282,7 +289,6 @@ static double p_hypergeometric(uint16_t N, uint16_t K, uint16_t n, uint16_t k)
} }
} }
static float sum_probability(uint16_t K, uint16_t n, uint16_t k) static float sum_probability(uint16_t K, uint16_t n, uint16_t k)
{ {
const uint16_t N = 256; const uint16_t N = 256;
@ -301,8 +307,6 @@ static float sum_probability(uint16_t K, uint16_t n, uint16_t k)
} }
static inline uint_fast8_t common_bits(uint_fast8_t bytes_diff) static inline uint_fast8_t common_bits(uint_fast8_t bytes_diff)
{ {
static const uint_fast8_t common_bits_LUT[256] = { static const uint_fast8_t common_bits_LUT[256] = {
@ -327,7 +331,6 @@ static inline uint_fast8_t common_bits(uint_fast8_t bytes_diff)
return common_bits_LUT[bytes_diff]; return common_bits_LUT[bytes_diff];
} }
static void Tests() static void Tests()
{ {
// printf("Tests: Partial Statelist sizes\n"); // printf("Tests: Partial Statelist sizes\n");
@ -490,7 +493,6 @@ static void Tests()
} }
static void sort_best_first_bytes(void) static void sort_best_first_bytes(void)
{ {
// sort based on probability for correct guess // sort based on probability for correct guess
@ -576,7 +578,6 @@ static void sort_best_first_bytes(void)
} }
static uint16_t estimate_second_byte_sum(void) static uint16_t estimate_second_byte_sum(void)
{ {
@ -609,7 +610,6 @@ static uint16_t estimate_second_byte_sum(void)
return num_good_nonces; return num_good_nonces;
} }
static int read_nonce_file(void) static int read_nonce_file(void)
{ {
FILE *fnonces = NULL; FILE *fnonces = NULL;
@ -652,7 +652,6 @@ static int read_nonce_file(void)
return 0; return 0;
} }
static void Check_for_FilterFlipProperties(void) static void Check_for_FilterFlipProperties(void)
{ {
printf("Checking for Filter Flip Properties...\n"); printf("Checking for Filter Flip Properties...\n");
@ -683,12 +682,9 @@ static void Check_for_FilterFlipProperties(void)
} }
} }
static void simulate_MFplus_RNG(uint32_t test_cuid, uint64_t test_key, uint32_t *nt_enc, uint8_t *par_enc) static void simulate_MFplus_RNG(uint32_t test_cuid, uint64_t test_key, uint32_t *nt_enc, uint8_t *par_enc)
{ {
struct Crypto1State sim_cs = {0, 0}; struct Crypto1State sim_cs = {0, 0};
// sim_cs.odd = sim_cs.even = 0;
// init cryptostate with key: // init cryptostate with key:
for(int8_t i = 47; i > 0; i -= 2) { for(int8_t i = 47; i > 0; i -= 2) {
sim_cs.odd = sim_cs.odd << 1 | BIT(test_key, (i - 1) ^ 7); sim_cs.odd = sim_cs.odd << 1 | BIT(test_key, (i - 1) ^ 7);
@ -708,7 +704,6 @@ static void simulate_MFplus_RNG(uint32_t test_cuid, uint64_t test_key, uint32_t
} }
static void simulate_acquire_nonces() static void simulate_acquire_nonces()
{ {
clock_t time1 = clock(); clock_t time1 = clock();
@ -762,7 +757,6 @@ static void simulate_acquire_nonces()
} }
static int acquire_nonces(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, bool nonce_file_write, bool slow) static int acquire_nonces(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, bool nonce_file_write, bool slow)
{ {
clock_t time1 = clock(); clock_t time1 = clock();
@ -890,7 +884,6 @@ static int acquire_nonces(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_
return 0; return 0;
} }
static int init_partial_statelists(void) static int init_partial_statelists(void)
{ {
const uint32_t sizes_odd[17] = { 126757, 0, 18387, 0, 74241, 0, 181737, 0, 248801, 0, 182033, 0, 73421, 0, 17607, 0, 125601 }; const uint32_t sizes_odd[17] = { 126757, 0, 18387, 0, 74241, 0, 181737, 0, 248801, 0, 182033, 0, 73421, 0, 17607, 0, 125601 };
@ -941,7 +934,6 @@ static int init_partial_statelists(void)
return 0; return 0;
} }
static void init_BitFlip_statelist(void) static void init_BitFlip_statelist(void)
{ {
printf("Generating bitflip statelist...\n"); printf("Generating bitflip statelist...\n");
@ -965,7 +957,6 @@ static void init_BitFlip_statelist(void)
statelist_bitflip.states[0] = realloc(statelist_bitflip.states[0], sizeof(uint32_t) * (statelist_bitflip.len[0] + 1)); statelist_bitflip.states[0] = realloc(statelist_bitflip.states[0], sizeof(uint32_t) * (statelist_bitflip.len[0] + 1));
} }
static inline uint32_t *find_first_state(uint32_t state, uint32_t mask, partial_indexed_statelist_t *sl, odd_even_t odd_even) static inline uint32_t *find_first_state(uint32_t state, uint32_t mask, partial_indexed_statelist_t *sl, odd_even_t odd_even)
{ {
uint32_t *p = sl->index[odd_even][(state & mask) >> (20-STATELIST_INDEX_WIDTH)]; // first Bits as index uint32_t *p = sl->index[odd_even][(state & mask) >> (20-STATELIST_INDEX_WIDTH)]; // first Bits as index
@ -977,7 +968,6 @@ static inline uint32_t *find_first_state(uint32_t state, uint32_t mask, partial_
return NULL; // no match return NULL; // no match
} }
static inline bool /*__attribute__((always_inline))*/ invariant_holds(uint_fast8_t byte_diff, uint_fast32_t state1, uint_fast32_t state2, uint_fast8_t bit, uint_fast8_t state_bit) static inline bool /*__attribute__((always_inline))*/ invariant_holds(uint_fast8_t byte_diff, uint_fast32_t state1, uint_fast32_t state2, uint_fast8_t bit, uint_fast8_t state_bit)
{ {
uint_fast8_t j_1_bit_mask = 0x01 << (bit-1); uint_fast8_t j_1_bit_mask = 0x01 << (bit-1);
@ -989,7 +979,6 @@ static inline bool /*__attribute__((always_inline))*/ invariant_holds(uint_fast8
return !all_diff; return !all_diff;
} }
static inline bool /*__attribute__((always_inline))*/ invalid_state(uint_fast8_t byte_diff, uint_fast32_t state1, uint_fast32_t state2, uint_fast8_t bit, uint_fast8_t state_bit) static inline bool /*__attribute__((always_inline))*/ invalid_state(uint_fast8_t byte_diff, uint_fast32_t state1, uint_fast32_t state2, uint_fast8_t bit, uint_fast8_t state_bit)
{ {
uint_fast8_t j_bit_mask = 0x01 << bit; uint_fast8_t j_bit_mask = 0x01 << bit;
@ -1000,7 +989,6 @@ static inline bool /*__attribute__((always_inline))*/ invalid_state(uint_fast8_t
return all_diff; return all_diff;
} }
static inline bool remaining_bits_match(uint_fast8_t num_common_bits, uint_fast8_t byte_diff, uint_fast32_t state1, uint_fast32_t state2, odd_even_t odd_even) static inline bool remaining_bits_match(uint_fast8_t num_common_bits, uint_fast8_t byte_diff, uint_fast32_t state1, uint_fast32_t state2, odd_even_t odd_even)
{ {
if (odd_even) { if (odd_even) {
@ -1031,7 +1019,6 @@ static inline bool remaining_bits_match(uint_fast8_t num_common_bits, uint_fast8
return true; // valid state return true; // valid state
} }
static bool all_other_first_bytes_match(uint32_t state, odd_even_t odd_even) static bool all_other_first_bytes_match(uint32_t state, odd_even_t odd_even)
{ {
for (uint16_t i = 1; i < num_good_first_bytes; i++) { for (uint16_t i = 1; i < num_good_first_bytes; i++) {
@ -1095,7 +1082,6 @@ static bool all_other_first_bytes_match(uint32_t state, odd_even_t odd_even)
return true; return true;
} }
static bool all_bit_flips_match(uint32_t state, odd_even_t odd_even) static bool all_bit_flips_match(uint32_t state, odd_even_t odd_even)
{ {
for (uint16_t i = 0; i < 256; i++) { for (uint16_t i = 0; i < 256; i++) {
@ -1152,13 +1138,11 @@ static bool all_bit_flips_match(uint32_t state, odd_even_t odd_even)
return true; return true;
} }
static struct sl_cache_entry { static struct sl_cache_entry {
uint32_t *sl; uint32_t *sl;
uint32_t len; uint32_t len;
} sl_cache[17][17][2]; } sl_cache[17][17][2];
static void init_statelist_cache(void) static void init_statelist_cache(void)
{ {
for (uint16_t i = 0; i < 17; i+=2) { for (uint16_t i = 0; i < 17; i+=2) {
@ -1171,7 +1155,6 @@ static void init_statelist_cache(void)
} }
} }
static int add_matching_states(statelist_t *candidates, uint16_t part_sum_a0, uint16_t part_sum_a8, odd_even_t odd_even) static int add_matching_states(statelist_t *candidates, uint16_t part_sum_a0, uint16_t part_sum_a8, odd_even_t odd_even)
{ {
uint32_t worstcase_size = 1<<20; uint32_t worstcase_size = 1<<20;
@ -1219,7 +1202,6 @@ static int add_matching_states(statelist_t *candidates, uint16_t part_sum_a0, ui
return 0; return 0;
} }
static statelist_t *add_more_candidates(statelist_t *current_candidates) static statelist_t *add_more_candidates(statelist_t *current_candidates)
{ {
statelist_t *new_candidates = NULL; statelist_t *new_candidates = NULL;
@ -1239,7 +1221,6 @@ static statelist_t *add_more_candidates(statelist_t *current_candidates)
return new_candidates; return new_candidates;
} }
static void TestIfKeyExists(uint64_t key) static void TestIfKeyExists(uint64_t key)
{ {
struct Crypto1State *pcs; struct Crypto1State *pcs;
@ -1290,7 +1271,6 @@ static void TestIfKeyExists(uint64_t key)
crypto1_destroy(pcs); crypto1_destroy(pcs);
} }
static void generate_candidates(uint16_t sum_a0, uint16_t sum_a8) static void generate_candidates(uint16_t sum_a0, uint16_t sum_a8)
{ {
printf("Generating crypto1 state candidates... \n"); printf("Generating crypto1 state candidates... \n");
@ -1364,7 +1344,6 @@ static void generate_candidates(uint16_t sum_a0, uint16_t sum_a8)
} }
} }
static void free_candidates_memory(statelist_t *sl) static void free_candidates_memory(statelist_t *sl)
{ {
if (sl == NULL) { if (sl == NULL) {
@ -1375,7 +1354,6 @@ static void free_candidates_memory(statelist_t *sl)
} }
} }
static void free_statelist_cache(void) static void free_statelist_cache(void)
{ {
for (uint16_t i = 0; i < 17; i+=2) { for (uint16_t i = 0; i < 17; i+=2) {
@ -1387,18 +1365,331 @@ static void free_statelist_cache(void)
} }
} }
size_t keys_found = 0;
size_t bucket_count = 0;
statelist_t* buckets[128];
size_t total_states_tested = 0;
size_t thread_count = 4;
// these bitsliced states will hold identical states in all slices
bitslice_t bitsliced_rollback_byte[ROLLBACK_SIZE];
// arrays of bitsliced states with identical values in all slices
bitslice_t bitsliced_encrypted_nonces[NONCE_TESTS][STATE_SIZE];
bitslice_t bitsliced_encrypted_parity_bits[NONCE_TESTS][ROLLBACK_SIZE];
#define EXACT_COUNT
static const uint64_t crack_states_bitsliced(statelist_t *p){
// the idea to roll back the half-states before combining them was suggested/explained to me by bla
// first we pre-bitslice all the even state bits and roll them back, then bitslice the odd bits and combine the two in the inner loop
uint64_t key = -1;
uint8_t bSize = sizeof(bitslice_t);
#ifdef EXACT_COUNT
size_t bucket_states_tested = 0;
size_t bucket_size[p->len[EVEN_STATE]/MAX_BITSLICES];
#else
const size_t bucket_states_tested = (p->len[EVEN_STATE])*(p->len[ODD_STATE]);
#endif
bitslice_t *bitsliced_even_states[p->len[EVEN_STATE]/MAX_BITSLICES];
size_t bitsliced_blocks = 0;
uint32_t const * restrict even_end = p->states[EVEN_STATE]+p->len[EVEN_STATE];
// bitslice all the even states
for(uint32_t * restrict p_even = p->states[EVEN_STATE]; p_even < even_end; p_even += MAX_BITSLICES){
#ifdef __WIN32
#ifdef __MINGW32__
bitslice_t * restrict lstate_p = __mingw_aligned_malloc((STATE_SIZE+ROLLBACK_SIZE) * bSize, bSize);
#else
bitslice_t * restrict lstate_p = _aligned_malloc((STATE_SIZE+ROLLBACK_SIZE) * bSize, bSize);
#endif
#else
bitslice_t * restrict lstate_p = memalign(bSize, (STATE_SIZE+ROLLBACK_SIZE) * bSize);
#endif
if ( !lstate_p ) {
__sync_fetch_and_add(&total_states_tested, bucket_states_tested);
return key;
}
memset(lstate_p+1, 0x0, (STATE_SIZE-1)*sizeof(bitslice_t)); // zero even bits
// bitslice even half-states
const size_t max_slices = (even_end-p_even) < MAX_BITSLICES ? even_end-p_even : MAX_BITSLICES;
#ifdef EXACT_COUNT
bucket_size[bitsliced_blocks] = max_slices;
#endif
for(size_t slice_idx = 0; slice_idx < max_slices; ++slice_idx){
uint32_t e = *(p_even+slice_idx);
for(size_t bit_idx = 1; bit_idx < STATE_SIZE; bit_idx+=2, e >>= 1){
// set even bits
if(e&1){
lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx&63);
}
}
}
// compute the rollback bits
for(size_t rollback = 0; rollback < ROLLBACK_SIZE; ++rollback){
// inlined crypto1_bs_lfsr_rollback
const bitslice_value_t feedout = lstate_p[0].value;
++lstate_p;
const bitslice_value_t ks_bits = crypto1_bs_f20(lstate_p);
const bitslice_value_t feedback = (feedout ^ ks_bits ^ lstate_p[47- 5].value ^ lstate_p[47- 9].value ^
lstate_p[47-10].value ^ lstate_p[47-12].value ^ lstate_p[47-14].value ^
lstate_p[47-15].value ^ lstate_p[47-17].value ^ lstate_p[47-19].value ^
lstate_p[47-24].value ^ lstate_p[47-25].value ^ lstate_p[47-27].value ^
lstate_p[47-29].value ^ lstate_p[47-35].value ^ lstate_p[47-39].value ^
lstate_p[47-41].value ^ lstate_p[47-42].value ^ lstate_p[47-43].value);
lstate_p[47].value = feedback ^ bitsliced_rollback_byte[rollback].value;
}
bitsliced_even_states[bitsliced_blocks++] = lstate_p;
}
// bitslice every odd state to every block of even half-states with half-finished rollback
for(uint32_t const * restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE]+p->len[ODD_STATE]; ++p_odd){
// early abort
if(keys_found){
goto out;
}
// set the odd bits and compute rollback
uint64_t o = (uint64_t) *p_odd;
lfsr_rollback_byte((struct Crypto1State*) &o, 0, 1);
// pre-compute part of the odd feedback bits (minus rollback)
bool odd_feedback_bit = parity(o&0x9ce5c);
crypto1_bs_rewind_a0();
// set odd bits
for(size_t state_idx = 0; state_idx < STATE_SIZE-ROLLBACK_SIZE; o >>= 1, state_idx+=2){
if(o & 1){
state_p[state_idx] = bs_ones;
} else {
state_p[state_idx] = bs_zeroes;
}
}
const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value;
for(size_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){
const bitslice_t const * restrict bitsliced_even_state = bitsliced_even_states[block_idx];
size_t state_idx;
// set even bits
for(state_idx = 0; state_idx < STATE_SIZE-ROLLBACK_SIZE; state_idx+=2){
state_p[1+state_idx] = bitsliced_even_state[1+state_idx];
}
// set rollback bits
uint64_t lo = o;
for(; state_idx < STATE_SIZE; lo >>= 1, state_idx+=2){
// set the odd bits and take in the odd rollback bits from the even states
if(lo & 1){
state_p[state_idx].value = ~bitsliced_even_state[state_idx].value;
} else {
state_p[state_idx] = bitsliced_even_state[state_idx];
}
// set the even bits and take in the even rollback bits from the odd states
if((lo >> 32) & 1){
state_p[1+state_idx].value = ~bitsliced_even_state[1+state_idx].value;
} else {
state_p[1+state_idx] = bitsliced_even_state[1+state_idx];
}
}
#ifdef EXACT_COUNT
bucket_states_tested += bucket_size[block_idx];
#endif
// pre-compute first keystream and feedback bit vectors
const bitslice_value_t ksb = crypto1_bs_f20(state_p);
const bitslice_value_t fbb = (odd_feedback ^ state_p[47- 0].value ^ state_p[47- 5].value ^ // take in the even and rollback bits
state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
state_p[47-24].value ^ state_p[47-42].value);
// vector to contain test results (1 = passed, 0 = failed)
bitslice_t results = bs_ones;
for(size_t tests = 0; tests < NONCE_TESTS; ++tests){
size_t parity_bit_idx = 0;
bitslice_value_t fb_bits = fbb;
bitslice_value_t ks_bits = ksb;
state_p = &states[KEYSTREAM_SIZE-1];
bitslice_value_t parity_bit_vector = bs_zeroes.value;
// highest bit is transmitted/received first
for(int32_t ks_idx = KEYSTREAM_SIZE-1; ks_idx >= 0; --ks_idx, --state_p){
// decrypt nonce bits
const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value;
const bitslice_value_t decrypted_nonce_bit_vector = (encrypted_nonce_bit_vector ^ ks_bits);
// compute real parity bits on the fly
parity_bit_vector ^= decrypted_nonce_bit_vector;
// update state
state_p[0].value = (fb_bits ^ decrypted_nonce_bit_vector);
// compute next keystream bit
ks_bits = crypto1_bs_f20(state_p);
// for each byte:
if((ks_idx&7) == 0){
// get encrypted parity bits
const bitslice_value_t encrypted_parity_bit_vector = bitsliced_encrypted_parity_bits[tests][parity_bit_idx++].value;
// decrypt parity bits
const bitslice_value_t decrypted_parity_bit_vector = (encrypted_parity_bit_vector ^ ks_bits);
// compare actual parity bits with decrypted parity bits and take count in results vector
results.value &= (parity_bit_vector ^ decrypted_parity_bit_vector);
// make sure we still have a match in our set
// if(memcmp(&results, &bs_zeroes, sizeof(bitslice_t)) == 0){
// this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ???
// the short-circuiting also helps
if(results.bytes64[0] == 0
#if MAX_BITSLICES > 64
&& results.bytes64[1] == 0
#endif
#if MAX_BITSLICES > 128
&& results.bytes64[2] == 0
&& results.bytes64[3] == 0
#endif
){
goto stop_tests;
}
// this is about as fast but less portable (requires -std=gnu99)
// asm goto ("ptest %1, %0\n\t"
// "jz %l2" :: "xm" (results.value), "xm" (bs_ones.value) : "cc" : stop_tests);
parity_bit_vector = bs_zeroes.value;
}
// compute next feedback bit vector
fb_bits = (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^
state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
}
}
// all nonce tests were successful: we've found the key in this block!
state_t keys[MAX_BITSLICES];
crypto1_bs_convert_states(&states[KEYSTREAM_SIZE], keys);
for(size_t results_idx = 0; results_idx < MAX_BITSLICES; ++results_idx){
if(get_vector_bit(results_idx, results)){
key = keys[results_idx].value;
goto out;
}
}
stop_tests:
// prepare to set new states
crypto1_bs_rewind_a0();
continue;
}
}
out:
for(size_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){
#ifdef __WIN32
#ifdef __MINGW32__
__mingw_aligned_free(bitsliced_even_states[block_idx]-ROLLBACK_SIZE);
#else
_aligned_free(bitsliced_even_states[block_idx]-ROLLBACK_SIZE);
#endif
#else
memfree(bitsliced_even_states[block_idx]-ROLLBACK_SIZE);
#endif
}
__sync_fetch_and_add(&total_states_tested, bucket_states_tested);
return key;
}
static void* crack_states_thread(void* x){
const size_t thread_id = (size_t)x;
size_t current_bucket = thread_id;
while(current_bucket < bucket_count){
statelist_t * bucket = buckets[current_bucket];
if(bucket){
const uint64_t key = crack_states_bitsliced(bucket);
if(key != -1){
printf("\nFound key: %012"PRIx64"\n", key);
__sync_fetch_and_add(&keys_found, 1);
break;
} else if(keys_found){
break;
} else {
printf(".");
fflush(stdout);
}
}
current_bucket += thread_count;
}
return NULL;
}
#define _USE_32BIT_TIME_T
static void brute_force(void) static void brute_force(void)
{ {
if (known_target_key != -1) { if (known_target_key != -1) {
PrintAndLog("Looking for known target key in remaining key space..."); PrintAndLog("Looking for known target key in remaining key space...");
TestIfKeyExists(known_target_key); TestIfKeyExists(known_target_key);
} else { } else {
PrintAndLog("Brute Force phase is not implemented."); PrintAndLog("Brute force phase starting.");
time_t start, end;
time(&start);
keys_found = 0;
crypto1_bs_init();
PrintAndLog("Using %u-bit bitslices", MAX_BITSLICES);
PrintAndLog("Bitslicing best_first_byte^uid[3] (rollback byte): %02x...", best_first_bytes[0]^(cuid>>24));
// convert to 32 bit little-endian
crypto1_bs_bitslice_value32(rev32((best_first_bytes[0]^(cuid>>24))), bitsliced_rollback_byte, 8);
PrintAndLog("Bitslicing nonces...");
for(size_t tests = 0; tests < NONCE_TESTS; tests++){
uint32_t test_nonce = brute_force_nonces[tests]->nonce_enc;
uint8_t test_parity = brute_force_nonces[tests]->par_enc;
// pre-xor the uid into the decrypted nonces, and also pre-xor the cuid parity into the encrypted parity bits - otherwise an exta xor is required in the decryption routine
crypto1_bs_bitslice_value32(cuid^test_nonce, bitsliced_encrypted_nonces[tests], 32);
// convert to 32 bit little-endian
crypto1_bs_bitslice_value32(rev32( ~(test_parity ^ ~(parity(cuid>>24 & 0xff)<<3 | parity(cuid>>16 & 0xff)<<2 | parity(cuid>>8 & 0xff)<<1 | parity(cuid&0xff)))), bitsliced_encrypted_parity_bits[tests], 4);
}
total_states_tested = 0;
// count number of states to go
bucket_count = 0;
for (statelist_t *p = candidates; p != NULL; p = p->next) {
buckets[bucket_count] = p;
bucket_count++;
} }
} #ifndef __WIN32
thread_count = sysconf(_SC_NPROCESSORS_CONF);
#endif /* _WIN32 */
pthread_t threads[thread_count];
// enumerate states using all hardware threads, each thread handles one bucket
PrintAndLog("Starting %u cracking threads to search %u buckets containing a total of %"PRIu32" states...", thread_count, bucket_count, maximum_states);
for(size_t i = 0; i < thread_count; i++){
pthread_create(&threads[i], NULL, crack_states_thread, (void*) i);
}
for(size_t i = 0; i < thread_count; i++){
pthread_join(threads[i], 0);
}
time(&end);
unsigned long elapsed_time = difftime(end, start);
PrintAndLog("Tested %"PRIu32" states, found %u keys after %u seconds", total_states_tested, keys_found, elapsed_time);
if(!keys_found){
assert(total_states_tested == maximum_states);
}
// reset this counter for the next call
nonces_to_bruteforce = 0;
}
}
int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t *trgkey, bool nonce_file_read, bool nonce_file_write, bool slow, int tests) int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t *trgkey, bool nonce_file_read, bool nonce_file_write, bool slow, int tests)
{ {

View file

@ -0,0 +1,120 @@
// Bit-sliced Crypto-1 implementation
// The cipher states are stored with the least significant bit first, hence all bit indexes are reversed here
/*
Copyright (c) 2015-2016 Aram Verstegen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "crypto1_bs.h"
#include <inttypes.h>
#define __STDC_FORMAT_MACROS
#define llx PRIx64
#define lli PRIi64
#define lu PRIu32
// The following functions use this global or thread-local state
// It is sized to fit exactly KEYSTREAM_SIZE more states next to the initial state
__thread bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE];
__thread bitslice_t * restrict state_p;
void crypto1_bs_init(){
// initialize constant one and zero bit vectors
memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
}
// The following functions have side effects on 48 bitslices at the state_p pointer
// use the crypto1_bs_rewind_* macros to (re-)initialize them as needed
inline const bitslice_value_t crypto1_bs_bit(const bitslice_value_t input, const bool is_encrypted){
bitslice_value_t feedback = (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^
state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
const bitslice_value_t ks_bits = crypto1_bs_f20(state_p);
if(is_encrypted){
feedback ^= ks_bits;
}
state_p--;
state_p[0].value = feedback ^ input;
return ks_bits;
}
inline const bitslice_value_t crypto1_bs_lfsr_rollback(const bitslice_value_t input, const bool is_encrypted){
bitslice_value_t feedout = state_p[0].value;
state_p++;
const bitslice_value_t ks_bits = crypto1_bs_f20(state_p);
if(is_encrypted){
feedout ^= ks_bits;
}
const bitslice_value_t feedback = (feedout ^ state_p[47- 5].value ^ state_p[47- 9].value ^
state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
state_p[47].value = feedback ^ input;
return ks_bits;
}
// side-effect free from here on
// note that bytes are sliced and unsliced with reversed endianness
inline void crypto1_bs_convert_states(bitslice_t bitsliced_states[], state_t regular_states[]){
size_t bit_idx = 0, slice_idx = 0;
state_t values[MAX_BITSLICES];
for(slice_idx = 0; slice_idx < MAX_BITSLICES; slice_idx++){
for(bit_idx = 0; bit_idx < STATE_SIZE; bit_idx++){
bool bit = get_vector_bit(slice_idx, bitsliced_states[bit_idx]);
values[slice_idx].value <<= 1;
values[slice_idx].value |= bit;
}
// swap endianness
values[slice_idx].value = rev_state_t(values[slice_idx].value);
// roll off unused bits
values[slice_idx].value >>= ((sizeof(state_t)*8)-STATE_SIZE);
}
memcpy(regular_states, values, sizeof(values));
}
// bitslice a value
void crypto1_bs_bitslice_value32(uint32_t value, bitslice_t bitsliced_value[], size_t bit_len){
// load nonce bytes with unswapped endianness
size_t bit_idx;
for(bit_idx = 0; bit_idx < bit_len; bit_idx++){
bool bit = get_bit(bit_len-1-bit_idx, rev32(value));
if(bit){
bitsliced_value[bit_idx].value = bs_ones.value;
} else {
bitsliced_value[bit_idx].value = bs_zeroes.value;
}
}
}
void crypto1_bs_print_states(bitslice_t bitsliced_states[]){
size_t slice_idx = 0;
state_t values[MAX_BITSLICES];
crypto1_bs_convert_states(bitsliced_states, values);
for(slice_idx = 0; slice_idx < MAX_BITSLICES; slice_idx++){
printf("State %03zu: %012"llx"\n", slice_idx, values[slice_idx].value);
}
}

View file

@ -0,0 +1,99 @@
#ifndef _CRYPTO1_BS_H
#define _CRYPTO1_BS_H
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
// bitslice type
// while AVX supports 256 bit vector floating point operations, we need integer operations for boolean logic
// same for AVX2 and 512 bit vectors
// using larger vectors works but seems to generate more register pressure
#if defined(__AVX2__)
#define MAX_BITSLICES 256
#elif defined(__AVX__)
#define MAX_BITSLICES 128
#elif defined(__SSE2__)
#define MAX_BITSLICES 128
#else
#define MAX_BITSLICES 64
#endif
#define VECTOR_SIZE (MAX_BITSLICES/8)
typedef unsigned int __attribute__((aligned(VECTOR_SIZE))) __attribute__((vector_size(VECTOR_SIZE))) bitslice_value_t;
typedef union {
bitslice_value_t value;
uint64_t bytes64[MAX_BITSLICES/64];
uint8_t bytes[MAX_BITSLICES/8];
} bitslice_t;
// filter function (f20)
// sourced from ``Wirelessly Pickpocketing a Mifare Classic Card'' by Flavio Garcia, Peter van Rossum, Roel Verdult and Ronny Wichers Schreur
#define f20a(a,b,c,d) (((a|b)^(a&d))^(c&((a^b)|d)))
#define f20b(a,b,c,d) (((a&b)|c)^((a^b)&(c|d)))
#define f20c(a,b,c,d,e) ((a|((b|e)&(d^e)))^((a^(b&d))&((c^d)|(b&e))))
#define crypto1_bs_f20(s) \
f20c(f20a((s[47- 9].value), (s[47-11].value), (s[47-13].value), (s[47-15].value)), \
f20b((s[47-17].value), (s[47-19].value), (s[47-21].value), (s[47-23].value)), \
f20b((s[47-25].value), (s[47-27].value), (s[47-29].value), (s[47-31].value)), \
f20a((s[47-33].value), (s[47-35].value), (s[47-37].value), (s[47-39].value)), \
f20b((s[47-41].value), (s[47-43].value), (s[47-45].value), (s[47-47].value)))
// bit indexing
#define get_bit(n, word) ((word >> (n)) & 1)
#define get_vector_bit(slice, value) get_bit(slice&0x3f, value.bytes64[slice>>6])
// constant ones/zeroes
bitslice_t bs_ones;
bitslice_t bs_zeroes;
// size of crypto-1 state
#define STATE_SIZE 48
// size of nonce to be decrypted
#define KEYSTREAM_SIZE 32
// size of first uid^nonce byte to be rolled back to the initial key
#define ROLLBACK_SIZE 8
// number of nonces required to test to cover entire 48-bit state
// I would have said it's 12... but bla goes with 100, so I do too
#define NONCE_TESTS 100
// state pointer management
extern __thread bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE];
extern __thread bitslice_t * restrict state_p;
// rewind to the point a0, at which KEYSTREAM_SIZE more bits can be generated
#define crypto1_bs_rewind_a0() (state_p = &states[KEYSTREAM_SIZE])
// bitsliced bytewise parity
#define bitsliced_byte_parity(n) (n[0].value ^ n[1].value ^ n[2].value ^ n[3].value ^ n[4].value ^ n[5].value ^ n[6].value ^ n[7].value)
// 48-bit crypto-1 states are normally represented using 64-bit values
typedef union {
uint64_t value;
uint8_t bytes[8];
} state_t;
// endianness conversion
#define rev32(word) (((word & 0xff) << 24) | (((word >> 8) & 0xff) << 16) | (((word >> 16) & 0xff) << 8) | (((word >> 24) & 0xff)))
#define rev64(x) (rev32(x)<<32|(rev32((x>>32))))
#define rev_state_t rev64
// crypto-1 functions
const bitslice_value_t crypto1_bs_bit(const bitslice_value_t input, const bool is_encrypted);
const bitslice_value_t crypto1_bs_lfsr_rollback(const bitslice_value_t input, const bool is_encrypted);
// initialization functions
void crypto1_bs_init();
// conversion functions
void crypto1_bs_bitslice_value32(uint32_t value, bitslice_t bitsliced_value[], size_t bit_len);
void crypto1_bs_convert_states(bitslice_t bitsliced_states[], state_t regular_states[]);
// debug print
void crypto1_bs_print_states(bitslice_t *bitsliced_states);
#endif // _CRYPTO1_BS_H