From 3e7d9fde015e2de45ba71615c874e4383e68f146 Mon Sep 17 00:00:00 2001 From: Chris Date: Mon, 5 Nov 2018 17:46:32 +0100 Subject: [PATCH] CHG: 'hf mf hardnest' auto detect supported cpu-commandset. See helptext for more info. (@piwi) --- client/cmdhfmf.c | 50 +++++++++++--- client/cmdhfmf.h | 1 + client/cmdhfmfhard.c | 43 +++++++----- client/hardnested/hardnested_bf_core.c | 95 +++++++++++++++++++++----- client/hardnested/hardnested_bf_core.h | 12 ++++ 5 files changed, 160 insertions(+), 41 deletions(-) diff --git a/client/cmdhfmf.c b/client/cmdhfmf.c index 016025fbc..8f7004d66 100644 --- a/client/cmdhfmf.c +++ b/client/cmdhfmf.c @@ -131,6 +131,13 @@ int usage_hf14_hardnested(void){ PrintAndLogEx(NORMAL, " u read/write hf-mf--nonces.bin instead of default name"); PrintAndLogEx(NORMAL, " f read/write instead of default name"); PrintAndLogEx(NORMAL, " t tests?"); + PrintAndLogEx(NORMAL, " i set type of SIMD instructions. Without this flag programs autodetect it."); + PrintAndLogEx(NORMAL, " i 5 = AVX512"); + PrintAndLogEx(NORMAL, " i 2 = AVX2"); + PrintAndLogEx(NORMAL, " i a = AVX"); + PrintAndLogEx(NORMAL, " i s = SSE2"); + PrintAndLogEx(NORMAL, " i m = MMX"); + PrintAndLogEx(NORMAL, " i n = none (use CPU regular instruction set)"); PrintAndLogEx(NORMAL, ""); PrintAndLogEx(NORMAL, "Examples:"); PrintAndLogEx(NORMAL, " hf mf hardnested 0 A FFFFFFFFFFFF 4 A"); @@ -1252,8 +1259,8 @@ int CmdHF14AMfNestedHard(const char *Cmd) { switch(tolower(param_getchar(Cmd, cmdp))) { case 'h': return usage_hf14_hardnested(); case 'r': - fptr=GenerateFilename("hf-mf-","-nonces.bin"); - if(fptr==NULL) + fptr = GenerateFilename("hf-mf-","-nonces.bin"); + if (fptr == NULL) strncpy(filename,"nonces.bin", FILE_PATH_SIZE); else strncpy(filename,fptr, FILE_PATH_SIZE); @@ -1269,10 +1276,10 @@ int CmdHF14AMfNestedHard(const char *Cmd) { if (!param_gethex(Cmd, cmdp+2, trgkey, 12)) { know_target_key = true; } - cmdp+=2; + cmdp += 2; break; default: - if(param_getchar(Cmd, cmdp) == 0x00) + if (param_getchar(Cmd, cmdp) == 0x00) { PrintAndLogEx(NORMAL, "Block number is missing"); return 1; @@ -1309,7 +1316,7 @@ int CmdHF14AMfNestedHard(const char *Cmd) { if (ctmp != 'A' && ctmp != 'a') { trgKeyType = 1; } - cmdp+=5; + cmdp += 5; } if (!param_gethex(Cmd, cmdp, trgkey, 12)) { know_target_key = true; @@ -1317,14 +1324,13 @@ int CmdHF14AMfNestedHard(const char *Cmd) { } while ((ctmp = param_getchar(Cmd, cmdp))) { - switch(tolower(ctmp)) - { + switch(tolower(ctmp)) { case 's': slow = true; break; case 'w': nonce_file_write = true; - fptr=GenerateFilename("hf-mf-","-nonces.bin"); + fptr = GenerateFilename("hf-mf-","-nonces.bin"); if (fptr == NULL) return 1; strncpy(filename, fptr, FILE_PATH_SIZE); @@ -1339,6 +1345,34 @@ int CmdHF14AMfNestedHard(const char *Cmd) { strncpy(filename, szTemp, FILE_PATH_SIZE); cmdp++; break; + case 'i': + SetSIMDInstr(SIMD_AUTO); + ctmp = tolower(param_getchar(Cmd, cmdp+1)); + switch (ctmp) { + case '5': + SetSIMDInstr(SIMD_AVX512); + break; + case '2': + SetSIMDInstr(SIMD_AVX2); + break; + case 'a': + SetSIMDInstr(SIMD_AVX); + break; + case 's': + SetSIMDInstr(SIMD_SSE2); + break; + case 'm': + SetSIMDInstr(SIMD_MMX); + break; + case 'n': + SetSIMDInstr(SIMD_NONE); + break; + default: + PrintAndLog("Unknown SIMD type. %c", ctmp); + return 1; + } + cmdp += 2; + break; default: PrintAndLogEx(WARNING, "Unknown parameter '%c'\n", ctmp); usage_hf14_hardnested(); diff --git a/client/cmdhfmf.h b/client/cmdhfmf.h index 3404548d5..b253ac0bb 100644 --- a/client/cmdhfmf.h +++ b/client/cmdhfmf.h @@ -31,6 +31,7 @@ #include "mifaredefault.h" // mifare default key array #include "cmdhf14a.h" // dropfield #include "cliparser/cliparser.h" // argtable +#include "hardnested/hardnested_bf_core.h" // SetSIMDInstr extern int CmdHFMF(const char *Cmd); diff --git a/client/cmdhfmfhard.c b/client/cmdhfmfhard.c index c35664a7f..42e97e4b9 100644 --- a/client/cmdhfmfhard.c +++ b/client/cmdhfmfhard.c @@ -33,6 +33,7 @@ #include "crapto1/crapto1.h" #include "parity.h" #include "hardnested/hardnested_bruteforce.h" +#include "hardnested/hardnested_bf_core.h" #include "hardnested/hardnested_bitarray_core.h" #include "zlib.h" @@ -72,22 +73,27 @@ static float brute_force_per_second; static void get_SIMD_instruction_set(char* instruction_set) { -#if defined (__i386__) || defined (__x86_64__) - #if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) - #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) - if (__builtin_cpu_supports("avx512f")) strcpy(instruction_set, "AVX512F"); - else if (__builtin_cpu_supports("avx2")) strcpy(instruction_set, "AVX2"); - #else - if (__builtin_cpu_supports("avx2")) strcpy(instruction_set, "AVX2"); - #endif - else if (__builtin_cpu_supports("avx")) strcpy(instruction_set, "AVX"); - else if (__builtin_cpu_supports("sse2")) strcpy(instruction_set, "SSE2"); - else if (__builtin_cpu_supports("mmx")) strcpy(instruction_set, "MMX"); - else - #endif -#endif - strcpy(instruction_set, "no"); -} + switch(GetSIMDInstrAuto()) { + case SIMD_AVX512: + strcpy(instruction_set, "AVX512F"); + break; + case SIMD_AVX2: + strcpy(instruction_set, "AVX2"); + break; + case SIMD_AVX: + strcpy(instruction_set, "AVX"); + break; + case SIMD_SSE2: + strcpy(instruction_set, "SSE2"); + break; + case SIMD_MMX: + strcpy(instruction_set, "MMX"); + break; + default: + strcpy(instruction_set, "no"); + break; + } +} static void print_progress_header(void) { @@ -267,6 +273,7 @@ static void init_bitflip_bitarrays(void) if (bytesread != filesize) { PrintAndLogEx(WARNING, "File read error with %s. Aborting...\n", state_file_name); fclose(statesfile); + inflateEnd(&compressed_stream); exit(5); } fclose(statesfile); @@ -2208,6 +2215,10 @@ static void set_test_state(uint8_t byte) int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t *trgkey, bool nonce_file_read, bool nonce_file_write, bool slow, int tests, uint64_t *foundkey, char *filename) { char progress_text[80]; + + char instr_set[12] = {0}; + get_SIMD_instruction_set(instr_set); + PrintAndLog("Using %s SIMD core.", instr_set); srand((unsigned) time(NULL)); brute_force_per_second = brute_force_benchmark(); diff --git a/client/hardnested/hardnested_bf_core.c b/client/hardnested/hardnested_bf_core.c index 3982a144a..c00434a52 100644 --- a/client/hardnested/hardnested_bf_core.c +++ b/client/hardnested/hardnested_bf_core.c @@ -548,44 +548,105 @@ out: crack_states_bitsliced_t *crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch; bitslice_test_nonces_t *bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch; -// determine the available instruction set at runtime and call the correct function -const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) { +static SIMDExecInstr intSIMDInstr = SIMD_AUTO; + +void SetSIMDInstr(SIMDExecInstr instr) { + intSIMDInstr = instr; + + crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch; + bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch; +} + +SIMDExecInstr GetSIMDInstr() { + SIMDExecInstr instr = SIMD_NONE; + #if defined (__i386__) || defined (__x86_64__) - #if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) + #if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) - if (__builtin_cpu_supports("avx512f")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512; - else if (__builtin_cpu_supports("avx2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; + if (__builtin_cpu_supports("avx512f")) instr = SIMD_AVX512; + else if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2; #else - if (__builtin_cpu_supports("avx2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; + if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2; #endif - else if (__builtin_cpu_supports("avx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX; - else if (__builtin_cpu_supports("sse2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2; - else if (__builtin_cpu_supports("mmx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX; + else if (__builtin_cpu_supports("avx")) instr = SIMD_AVX; + else if (__builtin_cpu_supports("sse2")) instr = SIMD_SSE2; + else if (__builtin_cpu_supports("mmx")) instr = SIMD_MMX; else #endif #endif + instr = SIMD_NONE; + + return instr; +} + +SIMDExecInstr GetSIMDInstrAuto() { + SIMDExecInstr instr = intSIMDInstr; + if (instr == SIMD_AUTO) + return GetSIMDInstr(); + + return instr; +} + +// determine the available instruction set at runtime and call the correct function +const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) { + switch(GetSIMDInstrAuto()) { +#if defined (__i386__) || defined (__x86_64__) + #if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) + #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) + case SIMD_AVX512: + crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512; + break; + #endif + case SIMD_AVX2: + crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; + break; + case SIMD_AVX: + crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX; + break; + case SIMD_SSE2: + crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2; + break; + case SIMD_MMX: + crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX; + break; + #endif +#endif + default: crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD; + break; + } // call the most optimized function for this CPU return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces); } void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) { + switch(GetSIMDInstrAuto()) { #if defined (__i386__) || defined (__x86_64__) #if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) - if (__builtin_cpu_supports("avx512f")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512; - else if (__builtin_cpu_supports("avx2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; - #else - if (__builtin_cpu_supports("avx2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; + case SIMD_AVX512: + bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512; + break; #endif - else if (__builtin_cpu_supports("avx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX; - else if (__builtin_cpu_supports("sse2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2; - else if (__builtin_cpu_supports("mmx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX; - else + case SIMD_AVX2: + bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; + break; + case SIMD_AVX: + bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX; + break; + case SIMD_SSE2: + bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2; + break; + case SIMD_MMX: + bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX; + break; #endif #endif + default: bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD; + break; + } // call the most optimized function for this CPU (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par); diff --git a/client/hardnested/hardnested_bf_core.h b/client/hardnested/hardnested_bf_core.h index 7a445993e..b3df05474 100644 --- a/client/hardnested/hardnested_bf_core.h +++ b/client/hardnested/hardnested_bf_core.h @@ -52,6 +52,18 @@ THE SOFTWARE. #include "hardnested_bruteforce.h" // statelist_t +typedef enum { + SIMD_AUTO, + SIMD_AVX512, + SIMD_AVX2, + SIMD_AVX, + SIMD_SSE2, + SIMD_MMX, + SIMD_NONE, +} SIMDExecInstr; +extern void SetSIMDInstr(SIMDExecInstr instr); +extern SIMDExecInstr GetSIMDInstrAuto(); + extern const uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonces_2nd_byte, noncelist_t *nonces); extern void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonces, uint8_t *bf_test_nonce_par);