From 62704db5ed7e237a7e8b93486574050c3a3b4c5f Mon Sep 17 00:00:00 2001 From: Doridian Date: Sun, 27 Mar 2022 10:19:34 -0700 Subject: [PATCH 1/3] Add very basic feature flagging for NEON --- client/deps/hardnested.cmake | 21 ++++ client/deps/hardnested/Makefile | 31 ++++- client/deps/hardnested/hardnested_bf_core.c | 24 +++- client/deps/hardnested/hardnested_bf_core.h | 13 +++ .../hardnested/hardnested_bitarray_core.c | 107 +++++++++++++++--- client/src/cmdhfmf.c | 25 ++++ client/src/cmdhfmfhard.c | 5 + doc/commands.json | 10 +- 8 files changed, 210 insertions(+), 26 deletions(-) diff --git a/client/deps/hardnested.cmake b/client/deps/hardnested.cmake index dc569641c..b9651f394 100644 --- a/client/deps/hardnested.cmake +++ b/client/deps/hardnested.cmake @@ -10,10 +10,13 @@ target_include_directories(pm3rrg_rdv4_hardnested_nosimd PRIVATE ../../include ../src) +target_compile_definitions(pm3rrg_rdv4_hardnested_nosimd NOSIMD_BUILD) + ## CPU-specific code ## These are mostly for x86-based architectures, which is not useful for many Android devices. ## Mingw platforms: AMD64 set(X86_CPUS x86 x86_64 i686 AMD64) +set(ARM64_CPUS arm64 aarch64) message(STATUS "CMAKE_SYSTEM_PROCESSOR := ${CMAKE_SYSTEM_PROCESSOR}") @@ -104,6 +107,24 @@ if ("${CMAKE_SYSTEM_PROCESSOR}" IN_LIST X86_CPUS) $ $ $) +elseif ("${CMAKE_SYSTEM_PROCESSOR}" IN_LIST ARM64_CPUS) + message(STATUS "Building optimised arm64 binaries") + + ## arm64 / NEON + add_library(pm3rrg_rdv4_hardnested_neon OBJECT + hardnested/hardnested_bf_core.c + hardnested/hardnested_bitarray_core.c) + + target_compile_options(pm3rrg_rdv4_hardnested_neon PRIVATE -Wall -Werror -O3) + set_property(TARGET pm3rrg_rdv4_hardnested_neon PROPERTY POSITION_INDEPENDENT_CODE ON) + + target_include_directories(pm3rrg_rdv4_hardnested_neon PRIVATE + ../../common + ../../include + ../src) + + set(SIMD_TARGETS + $) else () message(STATUS "Not building optimised targets") set(SIMD_TARGETS) diff --git a/client/deps/hardnested/Makefile b/client/deps/hardnested/Makefile index 624eb730f..5f58606ae 100644 --- a/client/deps/hardnested/Makefile +++ b/client/deps/hardnested/Makefile @@ -11,6 +11,9 @@ endif ifneq ($(findstring amd64, $(cpu_arch)), ) MULTIARCHSRCS = hardnested_bf_core.c hardnested_bitarray_core.c endif +ifneq ($(findstring arm64, $(cpu_arch)), ) + MULTIARCHSRCS = hardnested_bf_core.c hardnested_bitarray_core.c +endif ifeq ($(MULTIARCHSRCS), ) MYSRCS += hardnested_bf_core.c hardnested_bitarray_core.c endif @@ -18,20 +21,30 @@ endif LIB_A = libhardnested.a MYOBJS = $(MYSRCS:%.c=$(OBJDIR)/%.o) -MYOBJS += $(MULTIARCHSRCS:%.c=$(OBJDIR)/%_NOSIMD.o) \ - $(MULTIARCHSRCS:%.c=$(OBJDIR)/%_MMX.o) \ - $(MULTIARCHSRCS:%.c=$(OBJDIR)/%_SSE2.o) \ - $(MULTIARCHSRCS:%.c=$(OBJDIR)/%_AVX.o) \ - $(MULTIARCHSRCS:%.c=$(OBJDIR)/%_AVX2.o) +ifneq ($(findstring arm64, $(cpu_arch)), ) + MYOBJS += $(MULTIARCHSRCS:%.c=$(OBJDIR)/%_NOSIMD.o) \ + $(MULTIARCHSRCS:%.c=$(OBJDIR)/%_NEON.o) +else + MYOBJS += $(MULTIARCHSRCS:%.c=$(OBJDIR)/%_NOSIMD.o) \ + $(MULTIARCHSRCS:%.c=$(OBJDIR)/%_MMX.o) \ + $(MULTIARCHSRCS:%.c=$(OBJDIR)/%_SSE2.o) \ + $(MULTIARCHSRCS:%.c=$(OBJDIR)/%_AVX.o) \ + $(MULTIARCHSRCS:%.c=$(OBJDIR)/%_AVX2.o) +endif SUPPORTS_AVX512 := $(shell echo | $(CC) -E -mavx512f - > /dev/null 2>&1 && echo "True" ) -HARD_SWITCH_NOSIMD = -mno-mmx -mno-sse2 -mno-avx -mno-avx2 +HARD_SWITCH_NOSIMD = -mno-mmx -mno-sse2 -mno-avx -mno-avx2 -DNOSIMD_BUILD +HARD_SWITCH_NEON = HARD_SWITCH_MMX = -mmmx -mno-sse2 -mno-avx -mno-avx2 HARD_SWITCH_SSE2 = -mmmx -msse2 -mno-avx -mno-avx2 HARD_SWITCH_AVX = -mmmx -msse2 -mavx -mno-avx2 HARD_SWITCH_AVX2 = -mmmx -msse2 -mavx -mavx2 HARD_SWITCH_AVX512 = -mmmx -msse2 -mavx -mavx2 -mavx512f +ifneq ($(findstring arm64, $(cpu_arch)), ) + SUPPORTS_AVX512=0 + HARD_SWITCH_NOSIMD = -DNOSIMD_BUILD +endif ifeq "$(SUPPORTS_AVX512)" "True" HARD_SWITCH_NOSIMD += -mno-avx512f HARD_SWITCH_MMX += -mno-avx512f @@ -52,6 +65,12 @@ $(OBJDIR)/%_NOSIMD.o : %.c $(OBJDIR)/%_NOSIMD.d $(Q)$(CC) $(DEPFLAGS:%.Td=%_NOSIMD.Td) $(CFLAGS) $(HARD_SWITCH_NOSIMD) -c -o $@ $< $(Q)$(MV) -f $(OBJDIR)/$*_NOSIMD.Td $(OBJDIR)/$*_NOSIMD.d && $(TOUCH) $@ +$(OBJDIR)/%_NEON.o : %.c $(OBJDIR)/%_NEON.d + $(info [-] CC(NEON) $<) + $(Q)$(MKDIR) $(dir $@) + $(Q)$(CC) $(DEPFLAGS:%.Td=%_NEON.Td) $(CFLAGS) $(HARD_SWITCH_NEON) -c -o $@ $< + $(Q)$(MV) -f $(OBJDIR)/$*_NEON.Td $(OBJDIR)/$*_NEON.d && $(TOUCH) $@ + $(OBJDIR)/%_MMX.o : %.c $(OBJDIR)/%_MMX.d $(info [-] CC(MMX) $<) $(Q)$(MKDIR) $(dir $@) diff --git a/client/deps/hardnested/hardnested_bf_core.c b/client/deps/hardnested/hardnested_bf_core.c index c9af2702b..65d77adf6 100644 --- a/client/deps/hardnested/hardnested_bf_core.c +++ b/client/deps/hardnested/hardnested_bf_core.c @@ -74,7 +74,7 @@ THE SOFTWARE. #define MAX_BITSLICES 128 #elif defined(__SSE2__) #define MAX_BITSLICES 128 -#elif defined(__ARM_NEON) +#elif defined(__ARM_NEON) && !defined(NOSIMD_BUILD) #define MAX_BITSLICES 128 #else // MMX or SSE or NOSIMD #define MAX_BITSLICES 64 @@ -120,6 +120,9 @@ typedef union { #elif defined (__MMX__) #define BITSLICE_TEST_NONCES bitslice_test_nonces_MMX #define CRACK_STATES_BITSLICED crack_states_bitsliced_MMX +#elif defined (__ARM_NEON) && !defined(NOSIMD_BUILD) +#define BITSLICE_TEST_NONCES bitslice_test_nonces_NEON +#define CRACK_STATES_BITSLICED crack_states_bitsliced_NEON #else #define BITSLICE_TEST_NONCES bitslice_test_nonces_NOSIMD #define CRACK_STATES_BITSLICED crack_states_bitsliced_NOSIMD @@ -132,6 +135,7 @@ crack_states_bitsliced_t crack_states_bitsliced_AVX2; crack_states_bitsliced_t crack_states_bitsliced_AVX; crack_states_bitsliced_t crack_states_bitsliced_SSE2; crack_states_bitsliced_t crack_states_bitsliced_MMX; +crack_states_bitsliced_t crack_states_bitsliced_NEON; crack_states_bitsliced_t crack_states_bitsliced_NOSIMD; crack_states_bitsliced_t crack_states_bitsliced_dispatch; @@ -141,6 +145,7 @@ bitslice_test_nonces_t bitslice_test_nonces_AVX2; bitslice_test_nonces_t bitslice_test_nonces_AVX; bitslice_test_nonces_t bitslice_test_nonces_SSE2; bitslice_test_nonces_t bitslice_test_nonces_MMX; +bitslice_test_nonces_t bitslice_test_nonces_NEON; bitslice_test_nonces_t bitslice_test_nonces_NOSIMD; bitslice_test_nonces_t bitslice_test_nonces_dispatch; @@ -545,7 +550,7 @@ out: -#ifndef __MMX__ +#ifdef NOSIMD_BUILD // pointers to functions: crack_states_bitsliced_t *crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch; @@ -582,6 +587,11 @@ static SIMDExecInstr GetSIMDInstr(void) { else if (__builtin_cpu_supports("mmx")) instr = SIMD_MMX; else +#endif +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) + instr = SIMD_NEON; + else #endif instr = SIMD_NONE; @@ -620,6 +630,11 @@ uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_byte case SIMD_MMX: crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX; break; +#endif +#if defined(COMPILER_HAS_SIMD_NEON) + case SIMD_NEON: + crack_states_bitsliced_function_p = &crack_states_bitsliced_NEON; + break; #endif case SIMD_AUTO: case SIMD_NONE: @@ -651,6 +666,11 @@ void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, const uint32_t case SIMD_MMX: bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX; break; +#endif +#if defined(COMPILER_HAS_SIMD_NEON) + case SIMD_NEON: + bitslice_test_nonces_function_p = &bitslice_test_nonces_NEON; + break; #endif case SIMD_AUTO: case SIMD_NONE: diff --git a/client/deps/hardnested/hardnested_bf_core.h b/client/deps/hardnested/hardnested_bf_core.h index 51eca7de5..4693462e3 100644 --- a/client/deps/hardnested/hardnested_bf_core.h +++ b/client/deps/hardnested/hardnested_bf_core.h @@ -61,6 +61,16 @@ THE SOFTWARE. # endif #endif +// ARM64 mandates implementation of NEON +#if defined(__arm64__) +#define COMPILER_HAS_SIMD_NEON +#define arm_has_neon() (true) +// ARMv7 or older, NEON is optional and autodetection is difficult +#elif defined(__ARM_NEON) +#define COMPILER_HAS_SIMD_NEON +#define arm_has_neon() (false) +#endif + typedef enum { SIMD_AUTO, #if defined(COMPILER_HAS_SIMD_AVX512) @@ -71,6 +81,9 @@ typedef enum { SIMD_AVX, SIMD_SSE2, SIMD_MMX, +#endif +#if defined(COMPILER_HAS_SIMD_NEON) + SIMD_NEON, #endif SIMD_NONE, } SIMDExecInstr; diff --git a/client/deps/hardnested/hardnested_bitarray_core.c b/client/deps/hardnested/hardnested_bitarray_core.c index d62da774c..68a885be9 100644 --- a/client/deps/hardnested/hardnested_bitarray_core.c +++ b/client/deps/hardnested/hardnested_bitarray_core.c @@ -98,6 +98,20 @@ #define COUNT_BITARRAY_AND2 count_bitarray_AND2_MMX #define COUNT_BITARRAY_AND3 count_bitarray_AND3_MMX #define COUNT_BITARRAY_AND4 count_bitarray_AND4_MMX +#elif defined (__ARM_NEON) && !defined (NOSIMD_BUILD) +#define MALLOC_BITARRAY malloc_bitarray_NEON +#define FREE_BITARRAY free_bitarray_NEON +#define BITCOUNT bitcount_NEON +#define COUNT_STATES count_states_NEON +#define BITARRAY_AND bitarray_AND_NEON +#define BITARRAY_LOW20_AND bitarray_low20_AND_NEON +#define COUNT_BITARRAY_AND count_bitarray_AND_NEON +#define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_NEON +#define BITARRAY_AND4 bitarray_AND4_NEON +#define BITARRAY_OR bitarray_OR_NEON +#define COUNT_BITARRAY_AND2 count_bitarray_AND2_NEON +#define COUNT_BITARRAY_AND3 count_bitarray_AND3_NEON +#define COUNT_BITARRAY_AND4 count_bitarray_AND4_NEON #else #define MALLOC_BITARRAY malloc_bitarray_NOSIMD #define FREE_BITARRAY free_bitarray_NOSIMD @@ -117,31 +131,31 @@ // typedefs and declaration of functions: typedef uint32_t *malloc_bitarray_t(uint32_t); -malloc_bitarray_t malloc_bitarray_AVX512, malloc_bitarray_AVX2, malloc_bitarray_AVX, malloc_bitarray_SSE2, malloc_bitarray_MMX, malloc_bitarray_NOSIMD, malloc_bitarray_dispatch; +malloc_bitarray_t malloc_bitarray_AVX512, malloc_bitarray_AVX2, malloc_bitarray_AVX, malloc_bitarray_SSE2, malloc_bitarray_MMX, malloc_bitarray_NOSIMD, malloc_bitarray_NEON, malloc_bitarray_dispatch; typedef void free_bitarray_t(uint32_t *); -free_bitarray_t free_bitarray_AVX512, free_bitarray_AVX2, free_bitarray_AVX, free_bitarray_SSE2, free_bitarray_MMX, free_bitarray_NOSIMD, free_bitarray_dispatch; +free_bitarray_t free_bitarray_AVX512, free_bitarray_AVX2, free_bitarray_AVX, free_bitarray_SSE2, free_bitarray_MMX, free_bitarray_NOSIMD, free_bitarray_NEON, free_bitarray_dispatch; typedef uint32_t bitcount_t(uint32_t); -bitcount_t bitcount_AVX512, bitcount_AVX2, bitcount_AVX, bitcount_SSE2, bitcount_MMX, bitcount_NOSIMD, bitcount_dispatch; +bitcount_t bitcount_AVX512, bitcount_AVX2, bitcount_AVX, bitcount_SSE2, bitcount_MMX, bitcount_NOSIMD, bitcount_NEON, bitcount_dispatch; typedef uint32_t count_states_t(uint32_t *); -count_states_t count_states_AVX512, count_states_AVX2, count_states_AVX, count_states_SSE2, count_states_MMX, count_states_NOSIMD, count_states_dispatch; +count_states_t count_states_AVX512, count_states_AVX2, count_states_AVX, count_states_SSE2, count_states_MMX, count_states_NOSIMD, count_states_NEON, count_states_dispatch; typedef void bitarray_AND_t(uint32_t[], uint32_t[]); -bitarray_AND_t bitarray_AND_AVX512, bitarray_AND_AVX2, bitarray_AND_AVX, bitarray_AND_SSE2, bitarray_AND_MMX, bitarray_AND_NOSIMD, bitarray_AND_dispatch; +bitarray_AND_t bitarray_AND_AVX512, bitarray_AND_AVX2, bitarray_AND_AVX, bitarray_AND_SSE2, bitarray_AND_MMX, bitarray_AND_NOSIMD, bitarray_AND_NEON, bitarray_AND_dispatch; typedef void bitarray_low20_AND_t(uint32_t *, uint32_t *); -bitarray_low20_AND_t bitarray_low20_AND_AVX512, bitarray_low20_AND_AVX2, bitarray_low20_AND_AVX, bitarray_low20_AND_SSE2, bitarray_low20_AND_MMX, bitarray_low20_AND_NOSIMD, bitarray_low20_AND_dispatch; +bitarray_low20_AND_t bitarray_low20_AND_AVX512, bitarray_low20_AND_AVX2, bitarray_low20_AND_AVX, bitarray_low20_AND_SSE2, bitarray_low20_AND_MMX, bitarray_low20_AND_NOSIMD, bitarray_low20_AND_NEON, bitarray_low20_AND_dispatch; typedef uint32_t count_bitarray_AND_t(uint32_t *, uint32_t *); -count_bitarray_AND_t count_bitarray_AND_AVX512, count_bitarray_AND_AVX2, count_bitarray_AND_AVX, count_bitarray_AND_SSE2, count_bitarray_AND_MMX, count_bitarray_AND_NOSIMD, count_bitarray_AND_dispatch; +count_bitarray_AND_t count_bitarray_AND_AVX512, count_bitarray_AND_AVX2, count_bitarray_AND_AVX, count_bitarray_AND_SSE2, count_bitarray_AND_MMX, count_bitarray_AND_NOSIMD, count_bitarray_AND_NEON, count_bitarray_AND_dispatch; typedef uint32_t count_bitarray_low20_AND_t(uint32_t *, uint32_t *); -count_bitarray_low20_AND_t count_bitarray_low20_AND_AVX512, count_bitarray_low20_AND_AVX2, count_bitarray_low20_AND_AVX, count_bitarray_low20_AND_SSE2, count_bitarray_low20_AND_MMX, count_bitarray_low20_AND_NOSIMD, count_bitarray_low20_AND_dispatch; +count_bitarray_low20_AND_t count_bitarray_low20_AND_AVX512, count_bitarray_low20_AND_AVX2, count_bitarray_low20_AND_AVX, count_bitarray_low20_AND_SSE2, count_bitarray_low20_AND_MMX, count_bitarray_low20_AND_NOSIMD, count_bitarray_low20_AND_NEON, count_bitarray_low20_AND_dispatch; typedef void bitarray_AND4_t(uint32_t *, uint32_t *, uint32_t *, uint32_t *); -bitarray_AND4_t bitarray_AND4_AVX512, bitarray_AND4_AVX2, bitarray_AND4_AVX, bitarray_AND4_SSE2, bitarray_AND4_MMX, bitarray_AND4_NOSIMD, bitarray_AND4_dispatch; +bitarray_AND4_t bitarray_AND4_AVX512, bitarray_AND4_AVX2, bitarray_AND4_AVX, bitarray_AND4_SSE2, bitarray_AND4_MMX, bitarray_AND4_NOSIMD, bitarray_AND4_NEON, bitarray_AND4_dispatch; typedef void bitarray_OR_t(uint32_t[], uint32_t[]); -bitarray_OR_t bitarray_OR_AVX512, bitarray_OR_AVX2, bitarray_OR_AVX, bitarray_OR_SSE2, bitarray_OR_MMX, bitarray_OR_NOSIMD, bitarray_OR_dispatch; +bitarray_OR_t bitarray_OR_AVX512, bitarray_OR_AVX2, bitarray_OR_AVX, bitarray_OR_SSE2, bitarray_OR_MMX, bitarray_OR_NOSIMD, bitarray_OR_NEON, bitarray_OR_dispatch; typedef uint32_t count_bitarray_AND2_t(uint32_t *, uint32_t *); -count_bitarray_AND2_t count_bitarray_AND2_AVX512, count_bitarray_AND2_AVX2, count_bitarray_AND2_AVX, count_bitarray_AND2_SSE2, count_bitarray_AND2_MMX, count_bitarray_AND2_NOSIMD, count_bitarray_AND2_dispatch; +count_bitarray_AND2_t count_bitarray_AND2_AVX512, count_bitarray_AND2_AVX2, count_bitarray_AND2_AVX, count_bitarray_AND2_SSE2, count_bitarray_AND2_MMX, count_bitarray_AND2_NOSIMD, count_bitarray_AND2_NEON, count_bitarray_AND2_dispatch; typedef uint32_t count_bitarray_AND3_t(uint32_t *, uint32_t *, uint32_t *); -count_bitarray_AND3_t count_bitarray_AND3_AVX512, count_bitarray_AND3_AVX2, count_bitarray_AND3_AVX, count_bitarray_AND3_SSE2, count_bitarray_AND3_MMX, count_bitarray_AND3_NOSIMD, count_bitarray_AND3_dispatch; +count_bitarray_AND3_t count_bitarray_AND3_AVX512, count_bitarray_AND3_AVX2, count_bitarray_AND3_AVX, count_bitarray_AND3_SSE2, count_bitarray_AND3_MMX, count_bitarray_AND3_NOSIMD, count_bitarray_AND3_NEON, count_bitarray_AND3_dispatch; typedef uint32_t count_bitarray_AND4_t(uint32_t *, uint32_t *, uint32_t *, uint32_t *); -count_bitarray_AND4_t count_bitarray_AND4_AVX512, count_bitarray_AND4_AVX2, count_bitarray_AND4_AVX, count_bitarray_AND4_SSE2, count_bitarray_AND4_MMX, count_bitarray_AND4_NOSIMD, count_bitarray_AND4_dispatch; +count_bitarray_AND4_t count_bitarray_AND4_AVX512, count_bitarray_AND4_AVX2, count_bitarray_AND4_AVX, count_bitarray_AND4_SSE2, count_bitarray_AND4_MMX, count_bitarray_AND4_NOSIMD, count_bitarray_AND4_NEON, count_bitarray_AND4_dispatch; inline uint32_t *MALLOC_BITARRAY(uint32_t x) { @@ -287,7 +301,7 @@ inline uint32_t COUNT_BITARRAY_AND4(uint32_t *restrict A, uint32_t *restrict B, } -#ifndef __MMX__ +#ifdef NOSIMD_BUILD // pointers to functions: malloc_bitarray_t *malloc_bitarray_function_p = &malloc_bitarray_dispatch; @@ -306,6 +320,11 @@ count_bitarray_AND4_t *count_bitarray_AND4_function_p = &count_bitarray_AND4_dis // determine the available instruction set at runtime and call the correct function uint32_t *malloc_bitarray_dispatch(uint32_t x) { +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) malloc_bitarray_function_p = &malloc_bitarray_NEON; + else +#endif + #if defined(COMPILER_HAS_SIMD_AVX512) if (__builtin_cpu_supports("avx512f")) malloc_bitarray_function_p = &malloc_bitarray_AVX512; else @@ -324,6 +343,11 @@ uint32_t *malloc_bitarray_dispatch(uint32_t x) { } void free_bitarray_dispatch(uint32_t *x) { +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) free_bitarray_function_p = &free_bitarray_NEON; + else +#endif + #if defined(COMPILER_HAS_SIMD_AVX512) if (__builtin_cpu_supports("avx512f")) free_bitarray_function_p = &free_bitarray_AVX512; else @@ -342,6 +366,11 @@ void free_bitarray_dispatch(uint32_t *x) { } uint32_t bitcount_dispatch(uint32_t a) { +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) bitcount_function_p = &bitcount_NEON; + else +#endif + #if defined(COMPILER_HAS_SIMD_AVX512) if (__builtin_cpu_supports("avx512f")) bitcount_function_p = &bitcount_AVX512; else @@ -360,6 +389,11 @@ uint32_t bitcount_dispatch(uint32_t a) { } uint32_t count_states_dispatch(uint32_t *bitarray) { +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) count_states_function_p = &count_states_NEON; + else +#endif + #if defined(COMPILER_HAS_SIMD_AVX512) if (__builtin_cpu_supports("avx512f")) count_states_function_p = &count_states_AVX512; else @@ -378,6 +412,11 @@ uint32_t count_states_dispatch(uint32_t *bitarray) { } void bitarray_AND_dispatch(uint32_t *A, uint32_t *B) { +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) bitarray_AND_function_p = &bitarray_AND_NEON; + else +#endif + #if defined(COMPILER_HAS_SIMD_AVX512) if (__builtin_cpu_supports("avx512f")) bitarray_AND_function_p = &bitarray_AND_AVX512; else @@ -396,6 +435,11 @@ void bitarray_AND_dispatch(uint32_t *A, uint32_t *B) { } void bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) { +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) bitarray_low20_AND_function_p = &bitarray_low20_AND_NEON; + else +#endif + #if defined(COMPILER_HAS_SIMD_AVX512) if (__builtin_cpu_supports("avx512f")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX512; else @@ -414,6 +458,11 @@ void bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) { } uint32_t count_bitarray_AND_dispatch(uint32_t *A, uint32_t *B) { +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) count_bitarray_AND_function_p = &count_bitarray_AND_NEON; + else +#endif + #if defined(COMPILER_HAS_SIMD_AVX512) if (__builtin_cpu_supports("avx512f")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX512; else @@ -432,6 +481,11 @@ uint32_t count_bitarray_AND_dispatch(uint32_t *A, uint32_t *B) { } uint32_t count_bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) { +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_NEON; + else +#endif + #if defined(COMPILER_HAS_SIMD_AVX512) if (__builtin_cpu_supports("avx512f")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX512; else @@ -450,6 +504,11 @@ uint32_t count_bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) { } void bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) { +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) bitarray_AND4_function_p = &bitarray_AND4_NEON; + else +#endif + #if defined(COMPILER_HAS_SIMD_AVX512) if (__builtin_cpu_supports("avx512f")) bitarray_AND4_function_p = &bitarray_AND4_AVX512; else @@ -468,6 +527,11 @@ void bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) } void bitarray_OR_dispatch(uint32_t *A, uint32_t *B) { +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) bitarray_OR_function_p = &bitarray_OR_NEON; + else +#endif + #if defined(COMPILER_HAS_SIMD_AVX512) if (__builtin_cpu_supports("avx512f")) bitarray_OR_function_p = &bitarray_OR_AVX512; else @@ -486,6 +550,11 @@ void bitarray_OR_dispatch(uint32_t *A, uint32_t *B) { } uint32_t count_bitarray_AND2_dispatch(uint32_t *A, uint32_t *B) { +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) count_bitarray_AND2_function_p = &count_bitarray_AND2_NEON; + else +#endif + #if defined(COMPILER_HAS_SIMD_AVX512) if (__builtin_cpu_supports("avx512f")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX512; else @@ -504,6 +573,11 @@ uint32_t count_bitarray_AND2_dispatch(uint32_t *A, uint32_t *B) { } uint32_t count_bitarray_AND3_dispatch(uint32_t *A, uint32_t *B, uint32_t *C) { +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) count_bitarray_AND3_function_p = &count_bitarray_AND3_NEON; + else +#endif + #if defined(COMPILER_HAS_SIMD_AVX512) if (__builtin_cpu_supports("avx512f")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX512; else @@ -522,6 +596,11 @@ uint32_t count_bitarray_AND3_dispatch(uint32_t *A, uint32_t *B, uint32_t *C) { } uint32_t count_bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) { +#if defined(COMPILER_HAS_SIMD_NEON) + if (arm_has_neon()) count_bitarray_AND4_function_p = &count_bitarray_AND4_NEON; + else +#endif + #if defined(COMPILER_HAS_SIMD_AVX512) if (__builtin_cpu_supports("avx512f")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX512; else diff --git a/client/src/cmdhfmf.c b/client/src/cmdhfmf.c index 81463e3ed..5d1f34dd5 100644 --- a/client/src/cmdhfmf.c +++ b/client/src/cmdhfmf.c @@ -1873,6 +1873,9 @@ static int CmdHF14AMfNestedHard(const char *Cmd) { #endif #if defined(COMPILER_HAS_SIMD_AVX512) arg_lit0(NULL, "i5", "AVX512"), +#endif +#if defined(COMPILER_HAS_SIMD_NEON) + arg_lit0(NULL, "ie", "NEON"), #endif arg_param_end }; @@ -1930,6 +1933,9 @@ static int CmdHF14AMfNestedHard(const char *Cmd) { #endif #if defined(COMPILER_HAS_SIMD_AVX512) bool i5 = arg_get_lit(ctx, 20); +#endif +#if defined(COMPILER_HAS_SIMD_NEON) + bool ie = arg_get_lit(ctx, 16); #endif CLIParserFree(ctx); @@ -1951,6 +1957,12 @@ static int CmdHF14AMfNestedHard(const char *Cmd) { if (im) SetSIMDInstr(SIMD_MMX); #endif + +#if defined(COMPILER_HAS_SIMD_NEON) + if (ie) + SetSIMDInstr(SIMD_NEON); +#endif + if (in) SetSIMDInstr(SIMD_NONE); @@ -2067,6 +2079,9 @@ static int CmdHF14AMfAutoPWN(const char *Cmd) { #endif #if defined(COMPILER_HAS_SIMD_AVX512) arg_lit0(NULL, "i5", "AVX512"), +#endif +#if defined(COMPILER_HAS_SIMD_NEON) + arg_lit0(NULL, "ie", "NEON"), #endif arg_param_end }; @@ -2118,6 +2133,10 @@ static int CmdHF14AMfAutoPWN(const char *Cmd) { #if defined(COMPILER_HAS_SIMD_AVX512) bool i5 = arg_get_lit(ctx, 18); #endif +#if defined(COMPILER_HAS_SIMD_NEON) + bool ie = arg_get_lit(ctx, 14); +#endif + CLIParserFree(ctx); //validations @@ -2167,6 +2186,12 @@ static int CmdHF14AMfAutoPWN(const char *Cmd) { if (im) SetSIMDInstr(SIMD_MMX); #endif + +#if defined(COMPILER_HAS_SIMD_NEON) + if (ie) + SetSIMDInstr(SIMD_NEON); +#endif + if (in) SetSIMDInstr(SIMD_NONE); diff --git a/client/src/cmdhfmfhard.c b/client/src/cmdhfmfhard.c index 6b219d574..0ed615bd1 100644 --- a/client/src/cmdhfmfhard.c +++ b/client/src/cmdhfmfhard.c @@ -101,6 +101,11 @@ static void get_SIMD_instruction_set(char *instruction_set) { case SIMD_MMX: strcpy(instruction_set, "MMX"); break; +#endif +#if defined(COMPILER_HAS_SIMD_NEON) + case SIMD_NEON: + strcpy(instruction_set, "NEON"); + break; #endif case SIMD_AUTO: case SIMD_NONE: diff --git a/doc/commands.json b/doc/commands.json index 8d302b9f1..ddea4a265 100644 --- a/doc/commands.json +++ b/doc/commands.json @@ -3643,9 +3643,10 @@ "--is sse2", "--ia avx", "--i2 avx2", - "--i5 avx512" + "--i5 avx512", + "--ie neon" ], - "usage": "hf mf autopwn [-habslv] [-k ] [-s ] [-f ] [--mini] [--1k] [--2k] [--4k] [--in] [--im] [--is] [--ia] [--i2] [--i5]" + "usage": "hf mf autopwn [-habslv] [-k ] [-s ] [-f ] [--mini] [--1k] [--2k] [--4k] [--in] [--im] [--is] [--ia] [--i2] [--i5] [--ie]" }, "hf mf cgetblk": { "command": "hf mf cgetblk", @@ -4122,9 +4123,10 @@ "--is sse2", "--ia avx", "--i2 avx2", - "--i5 avx512" + "--i5 avx512", + "--ie neon" ], - "usage": "hf mf hardnested [-habrstw] [-k ] [--blk ] [--tblk ] [--ta] [--tb] [--tk ] [-u ] [-f ] [--in] [--im] [--is] [--ia] [--i2] [--i5]" + "usage": "hf mf hardnested [-habrstw] [-k ] [--blk ] [--tblk ] [--ta] [--tb] [--tk ] [-u ] [-f ] [--in] [--im] [--is] [--ia] [--i2] [--i5] [--ie]" }, "hf mf help": { "command": "hf mf help", From 436bfff41e8f4c6ca0c6c1a6218045ae4cbac81c Mon Sep 17 00:00:00 2001 From: Mark Dietzer Date: Tue, 29 Mar 2022 07:32:11 -0700 Subject: [PATCH 2/3] rename COMPILER_HAS_SIMD to COMPILER_HAS_SIMD_X86 --- client/deps/hardnested/hardnested_bf_core.c | 8 +++--- client/deps/hardnested/hardnested_bf_core.h | 6 ++--- .../hardnested/hardnested_bitarray_core.c | 26 +++++++++---------- client/src/cmdhfmf.c | 12 ++++----- client/src/cmdhfmfhard.c | 2 +- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/client/deps/hardnested/hardnested_bf_core.c b/client/deps/hardnested/hardnested_bf_core.c index 65d77adf6..17f296a27 100644 --- a/client/deps/hardnested/hardnested_bf_core.c +++ b/client/deps/hardnested/hardnested_bf_core.c @@ -568,7 +568,7 @@ void SetSIMDInstr(SIMDExecInstr instr) { static SIMDExecInstr GetSIMDInstr(void) { SIMDExecInstr instr; -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) __builtin_cpu_init(); #endif @@ -577,7 +577,7 @@ static SIMDExecInstr GetSIMDInstr(void) { instr = SIMD_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2; else if (__builtin_cpu_supports("avx")) @@ -617,7 +617,7 @@ uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_byte crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512; break; #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) case SIMD_AVX2: crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; break; @@ -653,7 +653,7 @@ void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, const uint32_t bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512; break; #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) case SIMD_AVX2: bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; break; diff --git a/client/deps/hardnested/hardnested_bf_core.h b/client/deps/hardnested/hardnested_bf_core.h index 4693462e3..006b85da2 100644 --- a/client/deps/hardnested/hardnested_bf_core.h +++ b/client/deps/hardnested/hardnested_bf_core.h @@ -55,8 +55,8 @@ THE SOFTWARE. #if ( defined (__i386__) || defined (__x86_64__) ) && \ ( !defined(__APPLE__) || \ (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) ) -# define COMPILER_HAS_SIMD -# if defined(COMPILER_HAS_SIMD) && ((__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)) +# define COMPILER_HAS_SIMD_X86 +# if defined(COMPILER_HAS_SIMD_X86) && ((__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)) # define COMPILER_HAS_SIMD_AVX512 # endif #endif @@ -76,7 +76,7 @@ typedef enum { #if defined(COMPILER_HAS_SIMD_AVX512) SIMD_AVX512, #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) SIMD_AVX2, SIMD_AVX, SIMD_SSE2, diff --git a/client/deps/hardnested/hardnested_bitarray_core.c b/client/deps/hardnested/hardnested_bitarray_core.c index 68a885be9..2031a9ec5 100644 --- a/client/deps/hardnested/hardnested_bitarray_core.c +++ b/client/deps/hardnested/hardnested_bitarray_core.c @@ -329,7 +329,7 @@ uint32_t *malloc_bitarray_dispatch(uint32_t x) { if (__builtin_cpu_supports("avx512f")) malloc_bitarray_function_p = &malloc_bitarray_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) malloc_bitarray_function_p = &malloc_bitarray_AVX2; else if (__builtin_cpu_supports("avx")) malloc_bitarray_function_p = &malloc_bitarray_AVX; else if (__builtin_cpu_supports("sse2")) malloc_bitarray_function_p = &malloc_bitarray_SSE2; @@ -352,7 +352,7 @@ void free_bitarray_dispatch(uint32_t *x) { if (__builtin_cpu_supports("avx512f")) free_bitarray_function_p = &free_bitarray_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) free_bitarray_function_p = &free_bitarray_AVX2; else if (__builtin_cpu_supports("avx")) free_bitarray_function_p = &free_bitarray_AVX; else if (__builtin_cpu_supports("sse2")) free_bitarray_function_p = &free_bitarray_SSE2; @@ -375,7 +375,7 @@ uint32_t bitcount_dispatch(uint32_t a) { if (__builtin_cpu_supports("avx512f")) bitcount_function_p = &bitcount_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) bitcount_function_p = &bitcount_AVX2; else if (__builtin_cpu_supports("avx")) bitcount_function_p = &bitcount_AVX; else if (__builtin_cpu_supports("sse2")) bitcount_function_p = &bitcount_SSE2; @@ -398,7 +398,7 @@ uint32_t count_states_dispatch(uint32_t *bitarray) { if (__builtin_cpu_supports("avx512f")) count_states_function_p = &count_states_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) count_states_function_p = &count_states_AVX2; else if (__builtin_cpu_supports("avx")) count_states_function_p = &count_states_AVX; else if (__builtin_cpu_supports("sse2")) count_states_function_p = &count_states_SSE2; @@ -421,7 +421,7 @@ void bitarray_AND_dispatch(uint32_t *A, uint32_t *B) { if (__builtin_cpu_supports("avx512f")) bitarray_AND_function_p = &bitarray_AND_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) bitarray_AND_function_p = &bitarray_AND_AVX2; else if (__builtin_cpu_supports("avx")) bitarray_AND_function_p = &bitarray_AND_AVX; else if (__builtin_cpu_supports("sse2")) bitarray_AND_function_p = &bitarray_AND_SSE2; @@ -444,7 +444,7 @@ void bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) { if (__builtin_cpu_supports("avx512f")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX2; else if (__builtin_cpu_supports("avx")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX; else if (__builtin_cpu_supports("sse2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_SSE2; @@ -467,7 +467,7 @@ uint32_t count_bitarray_AND_dispatch(uint32_t *A, uint32_t *B) { if (__builtin_cpu_supports("avx512f")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX2; else if (__builtin_cpu_supports("avx")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX; else if (__builtin_cpu_supports("sse2")) count_bitarray_AND_function_p = &count_bitarray_AND_SSE2; @@ -490,7 +490,7 @@ uint32_t count_bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) { if (__builtin_cpu_supports("avx512f")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2; else if (__builtin_cpu_supports("avx")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX; else if (__builtin_cpu_supports("sse2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_SSE2; @@ -513,7 +513,7 @@ void bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) if (__builtin_cpu_supports("avx512f")) bitarray_AND4_function_p = &bitarray_AND4_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) bitarray_AND4_function_p = &bitarray_AND4_AVX2; else if (__builtin_cpu_supports("avx")) bitarray_AND4_function_p = &bitarray_AND4_AVX; else if (__builtin_cpu_supports("sse2")) bitarray_AND4_function_p = &bitarray_AND4_SSE2; @@ -536,7 +536,7 @@ void bitarray_OR_dispatch(uint32_t *A, uint32_t *B) { if (__builtin_cpu_supports("avx512f")) bitarray_OR_function_p = &bitarray_OR_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) bitarray_OR_function_p = &bitarray_OR_AVX2; else if (__builtin_cpu_supports("avx")) bitarray_OR_function_p = &bitarray_OR_AVX; else if (__builtin_cpu_supports("sse2")) bitarray_OR_function_p = &bitarray_OR_SSE2; @@ -559,7 +559,7 @@ uint32_t count_bitarray_AND2_dispatch(uint32_t *A, uint32_t *B) { if (__builtin_cpu_supports("avx512f")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2; else if (__builtin_cpu_supports("avx")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX; else if (__builtin_cpu_supports("sse2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_SSE2; @@ -582,7 +582,7 @@ uint32_t count_bitarray_AND3_dispatch(uint32_t *A, uint32_t *B, uint32_t *C) { if (__builtin_cpu_supports("avx512f")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2; else if (__builtin_cpu_supports("avx")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX; else if (__builtin_cpu_supports("sse2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_SSE2; @@ -605,7 +605,7 @@ uint32_t count_bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uin if (__builtin_cpu_supports("avx512f")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX512; else #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (__builtin_cpu_supports("avx2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2; else if (__builtin_cpu_supports("avx")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX; else if (__builtin_cpu_supports("sse2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_SSE2; diff --git a/client/src/cmdhfmf.c b/client/src/cmdhfmf.c index 5d1f34dd5..ab11f3415 100644 --- a/client/src/cmdhfmf.c +++ b/client/src/cmdhfmf.c @@ -1865,7 +1865,7 @@ static int CmdHF14AMfNestedHard(const char *Cmd) { arg_lit0("w", "wr", "Acquire nonces and UID, and write them to file `hf-mf--nonces.bin`"), arg_lit0(NULL, "in", "None (use CPU regular instruction set)"), -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) arg_lit0(NULL, "im", "MMX"), arg_lit0(NULL, "is", "SSE2"), arg_lit0(NULL, "ia", "AVX"), @@ -1925,7 +1925,7 @@ static int CmdHF14AMfNestedHard(const char *Cmd) { bool nonce_file_write = arg_get_lit(ctx, 14); bool in = arg_get_lit(ctx, 15); -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) bool im = arg_get_lit(ctx, 16); bool is = arg_get_lit(ctx, 17); bool ia = arg_get_lit(ctx, 18); @@ -1947,7 +1947,7 @@ static int CmdHF14AMfNestedHard(const char *Cmd) { SetSIMDInstr(SIMD_AVX512); #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (i2) SetSIMDInstr(SIMD_AVX2); if (ia) @@ -2071,7 +2071,7 @@ static int CmdHF14AMfAutoPWN(const char *Cmd) { arg_lit0(NULL, "4k", "MIFARE Classic 4k / S70"), arg_lit0(NULL, "in", "None (use CPU regular instruction set)"), -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) arg_lit0(NULL, "im", "MMX"), arg_lit0(NULL, "is", "SSE2"), arg_lit0(NULL, "ia", "AVX"), @@ -2124,7 +2124,7 @@ static int CmdHF14AMfAutoPWN(const char *Cmd) { bool m4 = arg_get_lit(ctx, 12); bool in = arg_get_lit(ctx, 13); -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) bool im = arg_get_lit(ctx, 14); bool is = arg_get_lit(ctx, 15); bool ia = arg_get_lit(ctx, 16); @@ -2176,7 +2176,7 @@ static int CmdHF14AMfAutoPWN(const char *Cmd) { SetSIMDInstr(SIMD_AVX512); #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) if (i2) SetSIMDInstr(SIMD_AVX2); if (ia) diff --git a/client/src/cmdhfmfhard.c b/client/src/cmdhfmfhard.c index 0ed615bd1..f4ba72cbc 100644 --- a/client/src/cmdhfmfhard.c +++ b/client/src/cmdhfmfhard.c @@ -88,7 +88,7 @@ static void get_SIMD_instruction_set(char *instruction_set) { strcpy(instruction_set, "AVX512F"); break; #endif -#if defined(COMPILER_HAS_SIMD) +#if defined(COMPILER_HAS_SIMD_X86) case SIMD_AVX2: strcpy(instruction_set, "AVX2"); break; From bf75841930eeb19807fa771a526b569d4f651b36 Mon Sep 17 00:00:00 2001 From: Mark Dietzer Date: Tue, 29 Mar 2022 08:19:35 -0700 Subject: [PATCH 3/3] fix compile CI windows hopefully --- client/deps/hardnested.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/deps/hardnested.cmake b/client/deps/hardnested.cmake index b9651f394..78f763039 100644 --- a/client/deps/hardnested.cmake +++ b/client/deps/hardnested.cmake @@ -10,7 +10,7 @@ target_include_directories(pm3rrg_rdv4_hardnested_nosimd PRIVATE ../../include ../src) -target_compile_definitions(pm3rrg_rdv4_hardnested_nosimd NOSIMD_BUILD) +target_compile_definitions(pm3rrg_rdv4_hardnested_nosimd PRIVATE NOSIMD_BUILD) ## CPU-specific code ## These are mostly for x86-based architectures, which is not useful for many Android devices.