From 3ecebacdd2370dc0d068fe8abc2ea78f4cf23e39 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Sat, 16 Jan 2021 01:08:30 +0100 Subject: [PATCH] move thread scheduler threads.c, move opencl engine in opencl.c, minor fixes --- .../crack5opencl/ht2crack5opencl.c | 733 ++---------------- tools/hitag2crack/crack5opencl/opencl.c | 446 +++++++++++ tools/hitag2crack/crack5opencl/opencl.h | 15 +- tools/hitag2crack/crack5opencl/queue.c | 4 +- tools/hitag2crack/crack5opencl/queue.h | 2 +- tools/hitag2crack/crack5opencl/threads.c | 265 ++++++- tools/hitag2crack/crack5opencl/threads.h | 12 +- 7 files changed, 753 insertions(+), 724 deletions(-) diff --git a/tools/hitag2crack/crack5opencl/ht2crack5opencl.c b/tools/hitag2crack/crack5opencl/ht2crack5opencl.c index bdff13ffc..87fb2c765 100644 --- a/tools/hitag2crack/crack5opencl/ht2crack5opencl.c +++ b/tools/hitag2crack/crack5opencl/ht2crack5opencl.c @@ -151,7 +151,7 @@ static bool parse_arg(char *restrict in, unsigned int *out, unsigned int *out_cn unsigned int tmp_sel = (unsigned int) strtoul(next, NULL, 10); if (errno == EINVAL || errno == ERANGE || (tmp_sel < 1 || tmp_sel > 16)) { - printf("! Invalid %s argument\n", (opt_type == 0) ? "'platform'" : "'device'"); + printf("Error: invalid %s argument\n", (opt_type == 0) ? "'platform'" : "'device'"); return false; } @@ -165,7 +165,7 @@ static bool parse_arg(char *restrict in, unsigned int *out, unsigned int *out_cn } else { out[0] = (unsigned int) strtoul(in, NULL, 10); if (errno == EINVAL || errno == ERANGE) { - printf("! Invalid %s argument\n", (opt_type == 0) ? "'platform'" : "'device'"); + printf("Error: invalid %s argument\n", (opt_type == 0) ? "'platform'" : "'device'"); return false; } *out_cnt = 1; @@ -191,7 +191,6 @@ int main(int argc, char **argv) { unsigned int profile_selected = 2; unsigned int queue_type = 0; - uint32_t target = 0; uint32_t **matches_found = NULL; uint64_t **matches = NULL; @@ -211,7 +210,7 @@ int main(int argc, char **argv) { // 0: gpu, 1: cpu, 2: all device_types_selected = (unsigned int) strtoul(optarg, NULL, 10); if (device_types_selected > 2) { - printf("! Invalid DEVICE TYPE argument (accepted values: from 0 to 2)\n"); + printf("Error: invalid DEVICE TYPE argument (accepted values: from 0 to 2)\n"); usage(argv[0]); } break; @@ -222,7 +221,7 @@ int main(int argc, char **argv) { case 'P': profile_selected = (unsigned int) strtoul(optarg, NULL, 10); if (profile_selected > 10) { - printf("! Invalid PROFILE argument (accepted valuee: from 0 to 10)\n"); + printf("Error: invalid PROFILE argument (accepted valuee: from 0 to 10)\n"); usage(argv[0]); } break; @@ -233,7 +232,7 @@ int main(int argc, char **argv) { // 0: forward, 1: reverse, 2: random queue_type = (unsigned int) strtoul(optarg, NULL, 10); if (queue_type != QUEUE_TYPE_FORWARD && queue_type != QUEUE_TYPE_REVERSE && queue_type != QUEUE_TYPE_RANDOM) { - printf("! Invalid QUEUE TYPE argument (accepted values: 0, 1 or 2)\n"); + printf("Error: invalid QUEUE TYPE argument (accepted values: 0, 1 or 2)\n"); usage(argv[0]); } break; @@ -298,13 +297,13 @@ int main(int argc, char **argv) { printf("Device types selected : %s\n", (device_types_selected == CL_DEVICE_TYPE_GPU) ? "GPU" : (device_types_selected == CL_DEVICE_TYPE_CPU) ? "CPU" : "ALL"); printf("Scheduler selected : %s\n", (thread_scheduler_type_selected == 0) ? "sequential" : "async"); - printf("Profile selected : %d\n", profile_selected); + printf("Profile selected : %u\n", profile_selected); } if (!show) { if ((argc - optind) < 5) { #if DEBUGME > 0 - printf("! Invalid extra arguments\n"); + printf("Error: invalid extra arguments\n"); #endif usage(argv[0]); } @@ -313,41 +312,41 @@ int main(int argc, char **argv) { switch (e) { case 0: // UID if (!strncmp(argv[optind], "0x", 2) || !strncmp(argv[optind], "0X", 2)) { - if (strlen(argv[optind]) != 2 + 8) { printf("! Invalid UID length\n"); usage(argv[0]); } + if (strlen(argv[optind]) != 2 + 8) { printf("Error: invalid UID length\n"); usage(argv[0]); } uid = (uint32_t) rev32(hexreversetoulong(argv[optind] + 2)); } else { - if (strlen(argv[optind]) != 8) { printf("! Invalid UID length\n"); usage(argv[0]); } + if (strlen(argv[optind]) != 8) { printf("Error: invalid UID length\n"); usage(argv[0]); } uid = (uint32_t) rev32(hexreversetoulong(argv[optind])); } break; case 1: // nR1 if (!strncmp(argv[optind], "0x", 2) || !strncmp(argv[optind], "0X", 2)) { - if (strlen(argv[optind]) != 2 + 8) { printf("! Invalid nR1 length\n"); usage(argv[0]); } + if (strlen(argv[optind]) != 2 + 8) { printf("Error: invalid nR1 length\n"); usage(argv[0]); } nR1 = (uint32_t) rev32(hexreversetoulong(argv[optind] + 2)); } else { - if (strlen(argv[optind]) != 8) { printf("! Invalid nR1 length\n"); usage(argv[0]); } + if (strlen(argv[optind]) != 8) { printf("Error: invalid nR1 length\n"); usage(argv[0]); } nR1 = (uint32_t) rev32(hexreversetoulong(argv[optind])); } break; case 2: // aR1 - if (strlen(argv[optind]) != 8) { printf("! Invalid aR1 length\n"); usage(argv[0]); } + if (strlen(argv[optind]) != 8) { printf("Error: invalid aR1 length\n"); usage(argv[0]); } aR1 = (uint32_t) strtoul(argv[optind], NULL, 16); break; case 3: // nR2 if (!strncmp(argv[optind], "0x", 2) || !strncmp(argv[optind], "0X", 2)) { - if (strlen(argv[optind]) != 2 + 8) { printf("! Invalid nR2 length\n"); usage(argv[0]); } + if (strlen(argv[optind]) != 2 + 8) { printf("Error: invalid nR2 length\n"); usage(argv[0]); } nR2 = (uint32_t) rev32(hexreversetoulong(argv[optind] + 2)); } else { - if (strlen(argv[optind]) != 8) { printf("! Invalid nR2 length\n"); usage(argv[0]); } + if (strlen(argv[optind]) != 8) { printf("Error: invalid nR2 length\n"); usage(argv[0]); } nR2 = (uint32_t) rev32(hexreversetoulong(argv[optind])); } break; case 4: // aR2 - if (strlen(argv[optind]) != 8) { printf("! Invalid aR2 length\n"); usage(argv[0]); } + if (strlen(argv[optind]) != 8) { printf("Error: invalid aR2 length\n"); usage(argv[0]); } aR2 = (uint32_t) strtoul(argv[optind], NULL, 16); break; @@ -371,7 +370,7 @@ int main(int argc, char **argv) { if (!show) { if (verbose) printf("uid: %u, aR2: %u, nR1: %u, nR2: %u\n", checks[0], checks[1], checks[2], checks[3]); - target = ~aR1; + uint32_t target = ~aR1; // bitslice inverse target bits bitslice(~target, keystream); @@ -446,389 +445,33 @@ int main(int argc, char **argv) { close(fd); } - // now discover and set up compute device(s) int err = 0; cl_uint ocl_platform_cnt = 0; - unsigned int ocl_platform_max = MAX_OPENCL_DEVICES; // 16 - - cl_platform_id *ocl_platforms = (cl_platform_id *) calloc(ocl_platform_max, sizeof(cl_platform_id)); - if (!ocl_platforms) { - printf("Error: calloc (ocl_platforms) failed (%d): %s\n", errno, strerror(errno)); - MEMORY_FREE_ALL - exit(2); - } - - MEMORY_FREE_ADD(ocl_platforms) - - // enum platforms - err = clGetPlatformIDs(ocl_platform_max, ocl_platforms, &ocl_platform_cnt); - if (err != CL_SUCCESS) { - printf("Error: clGetPlatformIDs() failed (%d)\n", err); - MEMORY_FREE_ALL - exit(2); - } - - if (ocl_platform_cnt == 0) { - printf("No platforms found, exit\n"); - MEMORY_FREE_ALL - exit(2); - } - - // allocate memory to hold info about platforms/devices - compute_platform_ctx_t *cd_ctx = (compute_platform_ctx_t *) calloc(ocl_platform_cnt, sizeof(compute_platform_ctx_t)); - if (!cd_ctx) { - printf("Error: calloc (compute_platform_ctx_t) failed (%d): %s\n", errno, strerror(errno)); - MEMORY_FREE_ALL - exit(err); - } - - MEMORY_FREE_ADD(cd_ctx) - - cl_platform_info ocl_platforms_info[3] = { CL_PLATFORM_NAME, CL_PLATFORM_VENDOR, CL_PLATFORM_VERSION }; - unsigned int ocl_platforms_info_cnt = sizeof(ocl_platforms_info) / sizeof(cl_platform_info); - - cl_device_info ocl_devices_info[8] = { CL_DEVICE_TYPE, CL_DEVICE_NAME, CL_DEVICE_VERSION, CL_DRIVER_VERSION, CL_DEVICE_VENDOR, CL_DEVICE_LOCAL_MEM_TYPE, CL_DEVICE_MAX_WORK_ITEM_SIZES, CL_DEVICE_MAX_COMPUTE_UNITS }; - unsigned int ocl_devices_info_cnt = sizeof(ocl_devices_info) / sizeof(cl_device_info); - - unsigned int info_idx = 0; - size_t tmp_len = 0; - char *tmp_buf = NULL; - - unsigned int global_device_id = 0; size_t selected_platforms_cnt = 0; size_t selected_devices_cnt = 0; + compute_platform_ctx_t *cd_ctx = NULL; if (show) verbose = true; - if (verbose) printf("- Found %u OpenCL Platform(s)\n", ocl_platform_cnt); - - for (cl_uint platform_idx = 0; platform_idx < ocl_platform_cnt; platform_idx++) { - cd_ctx[platform_idx].platform_id = ocl_platforms[platform_idx]; - cd_ctx[platform_idx].selected = plat_dev_enabled(platform_idx, plat_sel, plat_cnt, 0, 0); - - if (cd_ctx[platform_idx].selected) selected_platforms_cnt++; - - if (verbose) printf("\n-- Platform ID: %d\n", platform_idx + 1); - - for (info_idx = 0; info_idx < ocl_platforms_info_cnt; info_idx++) { - cl_platform_info ocl_info = ocl_platforms_info[info_idx]; - - err = clGetPlatformInfo(cd_ctx[platform_idx].platform_id, ocl_info, 0, NULL, &tmp_len); - if (err != CL_SUCCESS) { - printf("Error: clGetPlatformInfo(param size) failed (%d)\n", err); - MEMORY_FREE_ALL - exit(2); - } - - if (tmp_len > 0) { - if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) { - printf("Error: calloc (ocl_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno)); - MEMORY_FREE_ALL - exit(2); - } - - MEMORY_FREE_ADD(tmp_buf) - - err = clGetPlatformInfo(cd_ctx[platform_idx].platform_id, ocl_info, tmp_len, tmp_buf, 0); - if (err != CL_SUCCESS) { - printf("Error: clGetPlatformInfo(param) failed (%d)\n", err); - MEMORY_FREE_ALL - exit(2); - } - } else { - tmp_len = 4; - if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) { - printf("Error: calloc (ocl_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno)); - MEMORY_FREE_ALL - exit(2); - } - - MEMORY_FREE_ADD(tmp_buf) - - strncpy(tmp_buf, "N/A\0", tmp_len); - } - - if (verbose) { - const char *tmp_info_desc = (info_idx == 0) ? "Name" : (info_idx == 1) ? "Vendor" : "Version"; - - printf("%14s: %s\n", tmp_info_desc, tmp_buf); - } - - switch (info_idx) { - case 0: - strncpy(cd_ctx[platform_idx].name, tmp_buf, tmp_len < 0xff ? tmp_len : 0xff - 1); - break; - case 1: - strncpy(cd_ctx[platform_idx].vendor, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1); - break; - case 2: - strncpy(cd_ctx[platform_idx].version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1); - break; - } - - if (info_idx == 1) { - // todo: do the same this devices - if (!strncmp(tmp_buf, "NVIDIA", 6)) cd_ctx[platform_idx].is_nv = true; - else if (!strncmp(tmp_buf, "Apple", 5)) { cd_ctx[platform_idx].is_apple = true; cd_ctx[platform_idx].warning = true; } - else if (!strncmp(tmp_buf, "Intel", 5)) cd_ctx[platform_idx].is_intel = true; - } - - MEMORY_FREE_DEL(tmp_buf) - } - - if (!show && verbose) { - printf("%14s: %s\n", "Selected", (cd_ctx[platform_idx].selected) ? "yes" : "no"); - if (cd_ctx[platform_idx].warning) printf("\n%14s: performance will not be optimal using this platform\n\n", "=====> Warning"); - } - - // enum devices with this platform - unsigned int ocl_device_cnt = 0; - unsigned int ocl_device_max = MAX_OPENCL_DEVICES; - - cl_device_id *ocl_devices = (cl_device_id *) calloc(ocl_device_max, sizeof(cl_device_id)); - if (!ocl_devices) { - printf("Error: calloc (ocl_devices) failed (%d): %s\n", errno, strerror(errno)); - MEMORY_FREE_ALL - exit(2); - } - - MEMORY_FREE_ADD(ocl_devices) - - err = clGetDeviceIDs(cd_ctx[platform_idx].platform_id, CL_DEVICE_TYPE_ALL, ocl_device_max, ocl_devices, &ocl_device_cnt); - if (ocl_device_cnt == 0) { - if (device_types_selected == CL_DEVICE_TYPE_ALL) printf("No device(s) available with platform id %d\n", platform_idx); - cd_ctx[platform_idx].device_cnt = 0; - continue; - } - - if (err != CL_SUCCESS) { - printf("Error: clGetDeviceIDs(cnt) failed (%d)\n", err); - MEMORY_FREE_ALL - exit(2); - } - - if (verbose) printf("%14s: %u\n", "Device(s)", ocl_device_cnt); - - cd_ctx[platform_idx].device_cnt = ocl_device_cnt; - - for (unsigned int device_idx = 0; device_idx < ocl_device_cnt; device_idx++) { - memset(&cd_ctx[platform_idx].device[device_idx], 0, sizeof(compute_device_ctx_t)); - cl_device_id ocl_device = ocl_devices[device_idx]; - cd_ctx[platform_idx].device[device_idx].platform_id = cd_ctx[platform_idx].platform_id; - - if (verbose) printf("---- * ID: %u\n", global_device_id + 1); - - for (info_idx = 0; info_idx < ocl_devices_info_cnt; info_idx++) { - cl_device_info ocl_dev_info = ocl_devices_info[info_idx]; - - if (info_idx == 0) { - cl_device_type device_type; - - err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_device_type), &device_type, 0); - if (err != CL_SUCCESS) { - printf("Error: clGetDeviceInfo(device_type) failed (%d)\n", err); - MEMORY_FREE_ALL - exit(2); - } - - if (device_type & CL_DEVICE_TYPE_GPU) cd_ctx[platform_idx].device[device_idx].is_gpu = 1; - - if (verbose) printf("%14s: %s\n", "Device Type", (device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : (device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : "Other"); - - cd_ctx[platform_idx].device[device_idx].selected = plat_dev_enabled(global_device_id, dev_sel, dev_cnt, (unsigned int) device_type, device_types_selected); - global_device_id++; - if (cd_ctx[platform_idx].device[device_idx].selected) selected_devices_cnt++; - continue; - } else if (info_idx == 5) { - cl_device_local_mem_type local_mem_type; - - err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_device_local_mem_type), &local_mem_type, 0); - if (err != CL_SUCCESS) { - printf("Error: clGetDeviceInfo(local_mem_type) failed (%d)\n", err); - MEMORY_FREE_ALL - exit(2); - } - - if (local_mem_type == CL_LOCAL || local_mem_type == CL_GLOBAL) { - if (verbose) printf("%14s: %s\n", "Local Mem Type", (local_mem_type == CL_LOCAL) ? "Local" : "Global"); - - if (cd_ctx[platform_idx].is_apple) { - if (strncmp(cd_ctx[platform_idx].device[device_idx].vendor, "Intel", 5) != 0) cd_ctx[platform_idx].device[device_idx].have_local_memory = true; - } else if (cd_ctx[platform_idx].is_nv) cd_ctx[platform_idx].device[device_idx].have_local_memory = true; - /* - // swap the 'if' comment for enable local memory with apple gpu's (my Iris crash, abort 6) - // if (!(!strncmp (cd_ctx[platform_idx].device[device_idx].vendor, "Intel", 5) && cd_ctx[platform_idx].is_apple && !cd_ctx[platform_idx].device[device_idx].is_gpu)) - if (!(!strncmp (cd_ctx[platform_idx].device[device_idx].vendor, "Intel", 5) && cd_ctx[platform_idx].is_apple)) - { - cd_ctx[platform_idx].device[device_idx].have_local_memory = true; - } - */ - } else { - if (verbose) printf("%14s: None\n", "Local Mem Type"); - } - - if (verbose) printf("%14s: %s\n", "Local Mem Opt", (cd_ctx[platform_idx].device[device_idx].have_local_memory) ? "yes" : "no"); - - continue; - } else if (info_idx == 6) { - size_t wis[3] = { 0 }; - err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(size_t) * 3, wis, 0); - if (err != CL_SUCCESS) { - printf("Error: clGetDeviceInfo(work_items_size) failed (%d)\n", err); - MEMORY_FREE_ALL - exit(2); - } - - if (verbose) printf("%14s: (%zu,%zu,%zu)\n", "Max Work-Items", wis[0], wis[1], wis[2]); - -#if APPLE_GPU_BROKEN == 1 - if (wis[1] < GLOBAL_WS_1 && cd_ctx[platform_idx].device[device_idx].is_apple_gpu) { - cd_ctx[platform_idx].device[device_idx].unsupported = true; - } -#endif - continue; - } else if (info_idx == 7) { - cl_uint cores = 0; - err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_uint), &cores, 0); - if (err != CL_SUCCESS) { - printf("Error: clGetDeviceInfo(compute_units) failed (%d)\n", err); - MEMORY_FREE_ALL - exit(2); - } - - if (verbose) printf("%14s: %u\n", "Compute Units", cores); - - cd_ctx[platform_idx].device[device_idx].compute_units = cores; - continue; - } - - tmp_len = 0; - tmp_buf = NULL; - - err = clGetDeviceInfo(ocl_device, ocl_dev_info, 0, NULL, &tmp_len); - if (err != CL_SUCCESS) { - printf("Error: clGetDeviceInfo(param size) failed (%d)\n", err); - MEMORY_FREE_ALL - exit(2); - } - - if (tmp_len > 0) { - if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) { - printf("Error: calloc (ocl_dev_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno)); - MEMORY_FREE_ALL - exit(2); - } - - MEMORY_FREE_ADD(tmp_buf) - - err = clGetDeviceInfo(ocl_device, ocl_dev_info, tmp_len, tmp_buf, 0); - if (err != CL_SUCCESS) { - printf("Error: clGetDeviceInfo(param) failed (%d)\n", err); - MEMORY_FREE_ALL - exit(2); - } - } else { - tmp_len = 4; - if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) { - printf("Error: calloc (ocl_dev_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno)); - MEMORY_FREE_ALL - exit(2); - } - - MEMORY_FREE_ADD(tmp_buf) - - strncpy(tmp_buf, "N/A\0", tmp_len); - } - - if (verbose) { - const char *tmp_dev_info_desc = (info_idx == 1) ? "Name" : (info_idx == 2) ? "Version" : (info_idx == 3) ? "Driver Version" : "Vendor"; - - printf("%14s: %s\n", tmp_dev_info_desc, tmp_buf); - } - - switch (info_idx) { - case 1: - strncpy(cd_ctx[platform_idx].device[device_idx].name, tmp_buf, tmp_len < 0xff ? tmp_len : 0xff - 1); - break; - case 2: - strncpy(cd_ctx[platform_idx].device[device_idx].version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1); - break; - case 3: - strncpy(cd_ctx[platform_idx].device[device_idx].driver_version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1); - break; - case 4: - strncpy(cd_ctx[platform_idx].device[device_idx].vendor, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1); - break; - } - - if (info_idx == 4) { - if (!strncmp(tmp_buf, "Intel", 5) && cd_ctx[platform_idx].is_apple) { - // disable hitag2 with apple platform and not apple device vendor (< Apple M1) - ctx.force_hitag2_opencl = false; - - cd_ctx[platform_idx].device[device_idx].is_apple_gpu = cd_ctx[platform_idx].device[device_idx].is_gpu; - } - - if (!strncmp(tmp_buf, "NVIDIA", 6) && cd_ctx[platform_idx].is_nv) { - unsigned int sm_maj = 0, sm_min = 0; - - err = clGetDeviceInfo(ocl_device, 0x4000, sizeof(unsigned int), &sm_maj, 0); - err |= clGetDeviceInfo(ocl_device, 0x4001, sizeof(unsigned int), &sm_min, 0); - - if (err != CL_SUCCESS) { - printf("Error: clGetDeviceInfo(sm_maj/sm_min) failed (%d)\n", err); - MEMORY_FREE_ALL - exit(2); - } - - cd_ctx[platform_idx].device[device_idx].sm_maj = sm_maj; - cd_ctx[platform_idx].device[device_idx].sm_min = sm_min; - - if (verbose) printf("%14s: %u%u\n", "SM", sm_maj, sm_min); - - if (sm_maj >= 5) { // >= Maxwell - // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-lop3 - // Requires sm_50 or higher. - cd_ctx[platform_idx].device[device_idx].have_lop3 = true; - } else { - cd_ctx[platform_idx].device[device_idx].warning = true; - } - - cd_ctx[platform_idx].device[device_idx].is_nv = true; - } else { - cd_ctx[platform_idx].device[device_idx].warning = true; - } - } - - MEMORY_FREE_DEL(tmp_buf) - } - - if (!show && verbose) printf("%14s: %s\n", "Selected", (cd_ctx[platform_idx].device[device_idx].selected) ? "yes" : "no"); - - if (cd_ctx[platform_idx].device[device_idx].unsupported) { - printf("\n%14s: this device was not supported, beacuse of missing resources\n\n", "=====> Warning"); - continue; - } - - if (cd_ctx[platform_idx].device[device_idx].warning) { - if (!show && verbose) printf("\n%14s: performance will not be optimal using this device\n\n", "=====> Warning"); - } - - cd_ctx[platform_idx].device[device_idx].device_id = ocl_device; - } - MEMORY_FREE_DEL(ocl_devices) + // now discover and set up compute device(s) + if ((err = discoverDevices(profile_selected, device_types_selected, &ocl_platform_cnt, &selected_platforms_cnt, &selected_devices_cnt, &cd_ctx, plat_sel, plat_cnt, dev_sel, dev_cnt, verbose, show)) != 0) + { + printf ("Error: discoverDevices() failed\n"); + if (err < -5) free (cd_ctx); + MEMORY_FREE_ALL + exit (2); } - MEMORY_FREE_DEL(ocl_platforms) - - // new selection engine, need to support multi-gpu system (with the same platform) if (verbose) printf("\n"); + // new selection engine, need to support multi-gpu system (with the same platform) if (show) { MEMORY_FREE_ALL exit(2); } + MEMORY_FREE_ADD(cd_ctx) + if (selected_platforms_cnt == 0) { printf("! No platform was selected ...\n"); MEMORY_FREE_ALL @@ -855,6 +498,11 @@ int main(int argc, char **argv) { for (q = 0; q < cd_ctx[w].device_cnt; q++) { if (!cd_ctx[w].device[q].selected) continue; + if (cd_ctx[w].is_apple && !strncmp(cd_ctx[w].device[q].vendor, "Intel", 5)) { + // disable hitag2 with apple platform and not apple device vendor (< Apple M1) + ctx.force_hitag2_opencl = false; + } + printf("%2zu - %s", z, cd_ctx[w].device[q].name); if (verbose) { printf(" (Lop3 %s, ", (cd_ctx[w].device[q].have_lop3) ? "yes" : "no"); @@ -989,14 +637,6 @@ int main(int argc, char **argv) { MEMORY_FREE_ADD(ctx.local_ws) - if (!(ctx.profiles = (int *) calloc(selected_devices_cnt, sizeof(int)))) { - printf("Error: calloc (ctx.profiles) failed (%d): %s\n", errno, strerror(errno)); - MEMORY_FREE_ALL - exit(2); - } - - MEMORY_FREE_ADD(ctx.profiles) - // show buidlog in case of error // todo: only for device models unsigned int build_errors = 0; @@ -1121,7 +761,6 @@ int main(int argc, char **argv) { if (err != CL_SUCCESS) { printf("[%zu] Error: clGetProgramBuildInfo failed (%d)\n", z, err); continue; -// exit (2); } if (len == 0) continue; @@ -1132,17 +771,13 @@ int main(int argc, char **argv) { if (!buffer) { printf("[%zu] Error: calloc (CL_PROGRAM_BUILD_LOG) failed (%d): %s\n", z, errno, strerror(errno)); continue; -// exit (2); } - MEMORY_FREE_ADD(buffer) - err = clGetProgramBuildInfo(ctx.programs[z], cd_ctx[w].device[q].device_id, CL_PROGRAM_BUILD_LOG, len, buffer, 0); if (err != CL_SUCCESS) { printf("[%zu] clGetProgramBuildInfo() failed (%d)\n", z, err); - MEMORY_FREE_DEL(buffer) + free (buffer); continue; -// exit (2); } #if DEBUGME > 0 @@ -1152,7 +787,7 @@ int main(int argc, char **argv) { printf("[%zu] Build log (len %zu):\n--------\n%s\n--------\n", z, len, buffer); } - MEMORY_FREE_DEL(buffer) + free (buffer); build_logs++; #if DEBUGME == 0 @@ -1196,76 +831,13 @@ int main(int argc, char **argv) { } } - // z is device counter, dolphin counter as well - // setup, phase 2 (select lower profile) - - int profile = 0xff; - - g = 0; - - for (w = 0; w < ocl_platform_cnt; w++) { - if (!cd_ctx[w].selected) continue; - - for (q = 0; q < cd_ctx[w].device_cnt; q++) { - if (!cd_ctx[w].device[q].selected) continue; - - ctx.profiles[g] = (int) profile_selected; // start with default - -#if DEBUGME > 1 - printf("[debug] Initial profile for device %zu: %d\n", z, ctx.profiles[g]); -#endif - - // force profile to 0 with Apple GPU's to get it stable, and 1 for CPU - if (cd_ctx[w].is_apple && !strncmp(cd_ctx[w].device[q].vendor, "Intel", 5)) { - if (cd_ctx[w].device[q].is_gpu) { - if (profile_selected > 2) ctx.profiles[g] = PROFILE_DEFAULT; // Apple-Intel GPU's, 2 is the old 0 - } else { - if (profile_selected > 3) ctx.profiles[g] = PROFILE_DEFAULT; // Apple-Intel CPU's, 3 is the old 1 - } - } - - // force profile to 0 with Intel GPU and 2 wih Intel CPU's - if (cd_ctx[w].is_intel && !strncmp(cd_ctx[w].device[q].vendor, "Intel", 5)) { - if (cd_ctx[w].device[q].is_gpu) { - ctx.profiles[g] = 0; // Intel GPU, work better with a very slow profile - } else { - if (profile_selected > 2) ctx.profiles[g] = PROFILE_DEFAULT; // Intel CPU (2 is the old 0) - } - } - - // force profile to 2 with NVIDIA GPU's with NVIDIA platform - if (cd_ctx[w].is_nv && cd_ctx[w].device[q].is_gpu && !strncmp(cd_ctx[w].device[q].vendor, "NVIDIA", 6)) { - if (profile_selected > 10) { - // NVIDIA RTX 3090 perform better with 5 - ctx.profiles[g] = (cd_ctx[w].device[q].sm_maj >= 8) ? 5 : PROFILE_DEFAULT; - } - } - - // probably unstested hw, set profile to 0 - if (profile_selected == 0xff) { - profile_selected = 0; - ctx.profiles[g] = 0; - } - - // with same devices will be selected the best - // but for different devices in the same platform we need the worst for now (todo) - if (ctx.profiles[q] < profile) profile = ctx.profiles[q]; - } - } - - // profile consistency check - if (profile < 0 || profile > 10) { - printf("! Error: the selected profile is not allowed (%d)\n", profile); - MEMORY_FREE_OPENCL(ctx, z) - MEMORY_FREE_LIST_Z(matches, z) - MEMORY_FREE_LIST_Z(matches_found, z) - MEMORY_FREE_ALL - exit(2); - } + unsigned int profile = get_smallest_profile (cd_ctx, ocl_platform_cnt); // setup, phase 3 (finis him) + // z is device counter, dolphin buggy counter as well + z = 0; for (w = 0; w < ocl_platform_cnt; w++) { @@ -1296,8 +868,7 @@ int main(int argc, char **argv) { MEMORY_FREE_ALL exit(2); } - } else { - // one + } else { // one if (!(matches[z] = (uint64_t *) calloc(1, sizeof(uint64_t)))) { printf("[%zu] Error: calloc (matches) failed (%d): %s\n", z, errno, strerror(errno)); MEMORY_FREE_OPENCL(ctx, z) @@ -1374,7 +945,7 @@ int main(int argc, char **argv) { } err = clEnqueueWriteBuffer(ctx.commands[z], ctx.candidates[z], CL_TRUE, 0, sizeof(uint16_t) * ((1 << 20) * 3), candidates, 0, NULL, NULL); -// err = clEnqueueWriteBuffer(ctx.commands[z], ctx.candidates, CL_TRUE, 0, sizeof(uint64_t) * ((1 << 20)), candidates, 0, NULL, NULL); + // err = clEnqueueWriteBuffer(ctx.commands[z], ctx.candidates, CL_TRUE, 0, sizeof(uint64_t) * ((1 << 20)), candidates, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("[%zu] Error: clEnqueueWriteBuffer(ctx.candidates) failed (%d)\n", z, err); MEMORY_FREE_OPENCL(ctx, z) @@ -1492,17 +1063,14 @@ int main(int argc, char **argv) { t_arg[z].aR2 = aR2; t_arg[z].nR1 = nR1; t_arg[z].nR2 = nR2; - t_arg[z].max_step = max_step; + t_arg[z].max_slices = max_step; t_arg[z].ocl_ctx = &ctx; t_arg[z].device_id = z; - t_arg[z].async = (ctx.thread_sched_type == THREAD_TYPE_ASYNC); t_arg[z].thread_ctx = &th_ctx; - - if (ctx.thread_sched_type == THREAD_TYPE_ASYNC) { - t_arg[z].matches = matches[z]; - t_arg[z].matches_found = matches_found[z]; - t_arg[z].status = TH_START; - } + t_arg[z].r = false; + t_arg[z].matches = matches[z]; + t_arg[z].matches_found = matches_found[z]; + t_arg[z].status = TH_START; } if (ctx.thread_sched_type == THREAD_TYPE_ASYNC) { @@ -1550,202 +1118,20 @@ int main(int argc, char **argv) { printf("Attack 5 - opencl - start (Max Slices %u, %s order", max_step, wu_queue_strdesc(ctx.queue_ctx.queue_type)); if (!verbose) printf(")\n\n"); - else printf(", Profile %d, Async Threads %s, HiTag2 key verify on device %s)\n\n", profile, (ctx.thread_sched_type == THREAD_TYPE_ASYNC) ? "yes" : "no", (force_hitag2_opencl) ? "yes" : "no"); + else printf(", Profile %u, Async Threads %s, HiTag2 key verify on device %s)\n\n", profile, (ctx.thread_sched_type == THREAD_TYPE_ASYNC) ? "yes" : "no", (force_hitag2_opencl) ? "yes" : "no"); if (gettimeofday(&cpu_t_start, NULL) == -1) { - printf("! gettimeofday(start) failed (%d): %s\n", errno, strerror(errno)); + printf("Error: gettimeofday(start) failed (%d): %s\n", errno, strerror(errno)); show_overall_time = false; } - if (ctx.thread_sched_type == THREAD_TYPE_ASYNC) { - // crack hitag key or die tryin' - unsigned int th_cnt; - - bool done = false; - - do { // master - th_cnt = 0; - - for (z = 0; z < thread_count; z++) { -#if TDEBUG >= 1 && DEBUGME == 1 - if (thread_count == 1) { printf("[%zu] get status from slave ...\n", z); fflush(stdout); } -#endif - - pthread_mutex_lock(&th_ctx.thread_mutexs[z]); - thread_status_t cur_status = t_arg[z].status; - pthread_mutex_unlock(&th_ctx.thread_mutexs[z]); - -#if TDEBUG >= 1 && DEBUGME == 1 - if (thread_count == 1) { printf("[%zu] slave status: %s\n", z, thread_status_strdesc(cur_status)); fflush(stdout); } -#endif - - if (found) { -#if TDEBUG >= 3 - printf("[%zu] Processing exit logic\n", z); - fflush(stdout); -#endif - - if (cur_status < TH_FOUND_KEY) { -#if TDEBUG >= 1 - printf("[%zu] key found from another thread, set quit\n", z); - fflush(stdout); -#endif - pthread_mutex_lock(&th_ctx.thread_mutexs[z]); - t_arg[z].status = TH_END; - t_arg[z].quit = true; - if (cur_status == TH_WAIT) pthread_cond_signal(&th_ctx.thread_conds[z]); - pthread_mutex_unlock(&th_ctx.thread_mutexs[z]); - } else { - if (thread_count == 1) { - th_cnt++; -#if TDEBUG >= 1 - printf("[%zu] Increment th_cnt: %u/%zu\n", z, th_cnt, thread_count); - fflush(stdout); -#endif - } - } - continue; - } - - if (cur_status == TH_WAIT) { - pthread_mutex_lock(&th_ctx.thread_mutexs[z]); - - if (found) { -#if TDEBUG >= 1 - printf("[%zu] key is found in another thread 1\n", z); - fflush(stdout); -#endif - t_arg[z].status = TH_END; - t_arg[z].quit = true; - pthread_mutex_unlock(&th_ctx.thread_mutexs[z]); - continue; - } - - if (wu_queue_done(&ctx.queue_ctx) != QUEUE_EMPTY) { - t_arg[z].status = TH_PROCESSING; - -#if TDEBUG >= 1 - printf("[master] slave [%zu], I give you another try (%s)\n", z, thread_status_strdesc(t_arg[z].status)); - fflush(stdout); -#endif - - pthread_cond_signal(&th_ctx.thread_conds[z]); - pthread_mutex_unlock(&th_ctx.thread_mutexs[z]); - continue; - } else { -#if TDEBUG >= 1 - printf("[master] slave [%zu], max step reached. Quit.\n", z); - fflush(stdout); -#endif - - cur_status = t_arg[z].status = TH_END; - t_arg[z].quit = true; - - pthread_cond_signal(&th_ctx.thread_conds[z]); - pthread_mutex_unlock(&th_ctx.thread_mutexs[z]); - } - } - - if (cur_status == TH_PROCESSING) { - if (th_ctx.enable_condusleep) { -#if TDEBUG >= 1 - printf("[master] before pthread_cond_wait, TH_PROCESSING\n"); -#endif - pthread_mutex_lock(&th_ctx.thread_mutex_usleep); -#if TDEBUG >= 1 - printf("[master] slave [%zu], I'm waiting you end of task, I'm in %s give me a signal.\n", z, thread_status_strdesc(t_arg[z].status)); - fflush(stdout); -#endif - pthread_cond_wait(&th_ctx.thread_cond_usleep, &th_ctx.thread_mutex_usleep); -#if TDEBUG >= 1 - printf("[master] slave [%zu], got the signal with new state: %s.\n", z, thread_status_strdesc(t_arg[z].status)); - fflush(stdout); -#endif - - if (t_arg[z].status == TH_FOUND_KEY) found = true; - - pthread_mutex_unlock(&th_ctx.thread_mutex_usleep); -#if TDEBUG >= 1 - printf("[master] after pthread_cond_wait, TH_PROCESSING\n"); -#endif - continue; - } - - if (found) { -#if TDEBUG >= 1 - printf("[master] slave [%zu], the key is found. set TH_END from TH_PROCESSING\n", z); - fflush(stdout); -#endif - - pthread_mutex_lock(&th_ctx.thread_mutexs[z]); - t_arg[z].status = TH_END; - t_arg[z].quit = true; - pthread_mutex_unlock(&th_ctx.thread_mutexs[z]); - continue; - } - } - - if (cur_status == TH_ERROR) { - // something went wrong - pthread_mutex_lock(&th_ctx.thread_mutexs[z]); - t_arg[z].status = TH_END; - t_arg[z].quit = true; - pthread_mutex_unlock(&th_ctx.thread_mutexs[z]); - continue; - } - - // todo, do more clean exit logic - if (cur_status >= TH_FOUND_KEY) { - th_cnt++; - - if (cur_status == TH_FOUND_KEY) { - thread_setEnd(&th_ctx, t_arg); - found = true; - done = true; - } - } - } - - if (th_cnt == thread_count) done = true; - - } while (!done); - - // end of async engine - } else if (ctx.thread_sched_type == THREAD_TYPE_SEQ) { - uint32_t step = 0; - bool quit = false; - - for (step = 0; step < max_step; step += thread_count) { - for (z = 0; z < thread_count; z++) { - t_arg[z].r = found; - t_arg[z].matches = matches[z]; - t_arg[z].matches_found = matches_found[z]; - } - - if ((ret = thread_start(&th_ctx, t_arg)) != 0) { - printf("Error: thread_start() failed (%d): %s\n", ret, thread_strerror(ret)); - thread_destroy(&th_ctx); - MEMORY_FREE_OPENCL(ctx, z) - MEMORY_FREE_LIST_Z(matches, z) - MEMORY_FREE_LIST_Z(matches_found, z) - MEMORY_FREE_ALL - exit(3); - } - - // waiting threads return - thread_stop(&th_ctx); - - for (z = 0; z < th_ctx.thread_count; z++) { - if (t_arg[z].r) found = true; - - if (t_arg[z].err) { - error = true; - quit = true; - } - } - - if (found || quit) break; - } + // Hokuto Hyakuretsu Ken + ret = thread_start_scheduler (&th_ctx, t_arg, &ctx.queue_ctx); + if (ret < 0) { + printf("Error: thread_start_scheduler() failed (%d): %s\n", ret, thread_strerror(ret)); + error = true; + } else if (ret == 0) { + found = true; } // if found, show the key here @@ -1755,7 +1141,7 @@ int main(int argc, char **argv) { if (thread_count > 1) printf("[%zu] ", y); - printf("Key found @ slice %lu/%lu: ", t_arg[y].slice, t_arg[y].max_step); + printf("Key found @ slice %zu/%zu: ", t_arg[y].slice, t_arg[y].max_slices); for (int i = 0; i < 6; i++) { printf("%02X", (uint8_t)(t_arg[y].key & 0xff)); t_arg[y].key = t_arg[y].key >> 8; @@ -1770,14 +1156,13 @@ int main(int argc, char **argv) { if (gettimeofday(&cpu_t_end, NULL) == 0) { timersub(&cpu_t_end, &cpu_t_start, &cpu_t_result); } else { - printf("! gettimeofday(end) failed (%d): %s\n", errno, strerror(errno)); + printf("Error. gettimeofday(end) failed (%d): %s\n", errno, strerror(errno)); show_overall_time = false; } } if (!found) { printf("\nError. %s\n", (error) ? "something went wrong :(" : "Key not found :|"); - if (error) exit(-1); } printf("\nAttack 5 - opencl - end"); @@ -1792,17 +1177,19 @@ int main(int argc, char **argv) { fflush(stdout); #endif - thread_stop(&th_ctx); + if (!error && th_ctx.type != THREAD_TYPE_SEQ) thread_stop(&th_ctx); #if DEBUGME > 1 printf("destroy threads\n"); fflush(stdout); #endif - if ((ret = thread_destroy(&th_ctx)) != 0) { + if (!error) { + if ((ret = thread_destroy(&th_ctx)) != 0) { #if DEBUGME > 0 - printf("Warning: thread_destroy() failed (%d): %s\n", ret, thread_strerror(ret)); + printf("Warning: thread_destroy() failed (%d): %s\n", ret, thread_strerror(ret)); #endif + } } #if DEBUGME > 1 diff --git a/tools/hitag2crack/crack5opencl/opencl.c b/tools/hitag2crack/crack5opencl/opencl.c index a16ce0f94..ecee691b7 100644 --- a/tools/hitag2crack/crack5opencl/opencl.c +++ b/tools/hitag2crack/crack5opencl/opencl.c @@ -40,6 +40,452 @@ bool plat_dev_enabled(unsigned int id, unsigned int *sel, unsigned int cnt, unsi return false; } +unsigned int get_smallest_profile (compute_platform_ctx_t *cd_ctx, size_t ocl_platform_cnt) +{ + unsigned int profile = 0xff; + + size_t x = 0, y = 0; + + for (x = 0; x < ocl_platform_cnt; x++) { + if (!cd_ctx[x].selected) continue; + + for (y = 0; y < cd_ctx[x].device_cnt; y++) { + if (!cd_ctx[x].device[y].selected) continue; + +#if DEBUGME > 1 + printf("[debug] Initial profile for device %zu: %d\n", z, cd_ctx[x].device[y].profile); +#endif + + // with same devices will be selected the best + // but for different devices in the same platform we need the worst for now (todo) + if (cd_ctx[x].device[y].profile < profile) profile = cd_ctx[x].device[y].profile; + } + } + + // at worst, set profile to 0 + if (profile > 10) profile = 0; + + return profile; +} + +int discoverDevices(unsigned int profile_selected, uint32_t device_types_selected, cl_uint *platform_detected_cnt, size_t *selected_platforms_cnt, size_t *selected_devices_cnt, compute_platform_ctx_t **cd_ctx, unsigned int *plat_sel, unsigned int plat_cnt, unsigned int *dev_sel, unsigned int dev_cnt, bool verbose, bool show) +{ + int err = 0; + unsigned int ocl_platform_max = MAX_OPENCL_DEVICES; // 16 + cl_uint ocl_platform_cnt; + + cl_platform_id *ocl_platforms = (cl_platform_id *) calloc(ocl_platform_max, sizeof(cl_platform_id)); + if (!ocl_platforms) { + printf("Error: calloc (ocl_platforms) failed (%d): %s\n", errno, strerror(errno)); + return -2; + } + + // enum platforms + err = clGetPlatformIDs(ocl_platform_max, ocl_platforms, &ocl_platform_cnt); + if (err != CL_SUCCESS) { + printf("Error: clGetPlatformIDs() failed (%d)\n", err); + free (ocl_platforms); + return -3; + } + + if (ocl_platform_cnt == 0) { + printf("No platforms found, exit\n"); + free (ocl_platforms); + return -4; + } + + // allocate memory to hold info about platforms/devices + *cd_ctx = (compute_platform_ctx_t *) calloc(ocl_platform_cnt, sizeof(compute_platform_ctx_t)); + if (*cd_ctx == NULL) { + printf("Error: calloc (compute_platform_ctx_t) failed (%d): %s\n", errno, strerror(errno)); + free (ocl_platforms); + return -5; + } + + cl_platform_info ocl_platforms_info[3] = { CL_PLATFORM_NAME, CL_PLATFORM_VENDOR, CL_PLATFORM_VERSION }; + unsigned int ocl_platforms_info_cnt = sizeof(ocl_platforms_info) / sizeof(cl_platform_info); + + cl_device_info ocl_devices_info[8] = { CL_DEVICE_TYPE, CL_DEVICE_NAME, CL_DEVICE_VERSION, CL_DRIVER_VERSION, CL_DEVICE_VENDOR, CL_DEVICE_LOCAL_MEM_TYPE, CL_DEVICE_MAX_WORK_ITEM_SIZES, CL_DEVICE_MAX_COMPUTE_UNITS }; + unsigned int ocl_devices_info_cnt = sizeof(ocl_devices_info) / sizeof(cl_device_info); + + unsigned int info_idx = 0; + size_t tmp_len = 0; + char *tmp_buf = NULL; + + unsigned int global_device_id = 0; + + if (verbose) printf("- Found %u OpenCL Platform(s)\n", ocl_platform_cnt); + + for (cl_uint platform_idx = 0; platform_idx < ocl_platform_cnt; platform_idx++) { + (*cd_ctx)[platform_idx].platform_id = ocl_platforms[platform_idx]; + (*cd_ctx)[platform_idx].selected = plat_dev_enabled(platform_idx, plat_sel, plat_cnt, 0, 0); + + if ((*cd_ctx)[platform_idx].selected) (*selected_platforms_cnt)++; + + if (verbose) printf("\n-- Platform ID: %d\n", platform_idx + 1); + + for (info_idx = 0; info_idx < ocl_platforms_info_cnt; info_idx++) { + cl_platform_info ocl_info = ocl_platforms_info[info_idx]; + + err = clGetPlatformInfo((*cd_ctx)[platform_idx].platform_id, ocl_info, 0, NULL, &tmp_len); + if (err != CL_SUCCESS) { + printf("Error: clGetPlatformInfo(param size) failed (%d)\n", err); + free (*cd_ctx); + free (ocl_platforms); + return -6; + } + + if (tmp_len > 0) { + if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) { + printf("Error: calloc (ocl_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno)); + free (*cd_ctx); + free (ocl_platforms); + return -7; + } + + err = clGetPlatformInfo((*cd_ctx)[platform_idx].platform_id, ocl_info, tmp_len, tmp_buf, 0); + if (err != CL_SUCCESS) { + printf("Error: clGetPlatformInfo(param) failed (%d)\n", err); + free (tmp_buf); + free (*cd_ctx); + free (ocl_platforms); + return -8; + } + } else { + tmp_len = 4; + if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) { + printf("Error: calloc (ocl_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno)); + free (*cd_ctx); + free (ocl_platforms); + return -7; + } + + strncpy(tmp_buf, "N/A\0", tmp_len); + } + + if (verbose) { + const char *tmp_info_desc = (info_idx == 0) ? "Name" : (info_idx == 1) ? "Vendor" : "Version"; + + printf("%14s: %s\n", tmp_info_desc, tmp_buf); + } + + switch (info_idx) { + case 0: + strncpy((*cd_ctx)[platform_idx].name, tmp_buf, tmp_len < 0xff ? tmp_len : 0xff - 1); + break; + case 1: + strncpy((*cd_ctx)[platform_idx].vendor, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1); + break; + case 2: + strncpy((*cd_ctx)[platform_idx].version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1); + break; + } + + if (info_idx == 1) { + if (!strncmp(tmp_buf, "NVIDIA", 6)) (*cd_ctx)[platform_idx].is_nv = true; + else if (!strncmp(tmp_buf, "Apple", 5)) { (*cd_ctx)[platform_idx].is_apple = true; (*cd_ctx)[platform_idx].warning = true; } + else if (!strncmp(tmp_buf, "Intel", 5)) (*cd_ctx)[platform_idx].is_intel = true; + else if (!strncmp(tmp_buf, "The pocl project", 16)) (*cd_ctx)[platform_idx].is_pocl = true; + } + + free (tmp_buf); + } + + if (!show && verbose) { + printf("%14s: %s\n", "Selected", ((*cd_ctx)[platform_idx].selected) ? "yes" : "no"); + if ((*cd_ctx)[platform_idx].warning) printf("\n%14s: performance will not be optimal using this platform\n\n", "=====> Warning"); + } + + // enum devices with this platform + unsigned int ocl_device_cnt = 0; + unsigned int ocl_device_max = MAX_OPENCL_DEVICES; + + cl_device_id *ocl_devices = (cl_device_id *) calloc(ocl_device_max, sizeof(cl_device_id)); + if (!ocl_devices) { + printf("Error: calloc (ocl_devices) failed (%d): %s\n", errno, strerror(errno)); + free (*cd_ctx); + free (ocl_platforms); + return -7; + } + + err = clGetDeviceIDs((*cd_ctx)[platform_idx].platform_id, CL_DEVICE_TYPE_ALL, ocl_device_max, ocl_devices, &ocl_device_cnt); + if (ocl_device_cnt == 0) { + if (device_types_selected == CL_DEVICE_TYPE_ALL) printf("No device(s) available with platform id %d\n", platform_idx); + (*cd_ctx)[platform_idx].device_cnt = 0; + continue; + } + + if (err != CL_SUCCESS) { + printf("Error: clGetDeviceIDs(cnt) failed (%d)\n", err); + free (ocl_devices); + free (*cd_ctx); + free (ocl_platforms); + return -9; + } + + if (verbose) printf("%14s: %u\n", "Device(s)", ocl_device_cnt); + + (*cd_ctx)[platform_idx].device_cnt = ocl_device_cnt; + + for (unsigned int device_idx = 0; device_idx < ocl_device_cnt; device_idx++) { + memset(&(*cd_ctx)[platform_idx].device[device_idx], 0, sizeof(compute_device_ctx_t)); + cl_device_id ocl_device = ocl_devices[device_idx]; + (*cd_ctx)[platform_idx].device[device_idx].platform_id = (*cd_ctx)[platform_idx].platform_id; + + if (verbose) printf("---- * ID: %u\n", global_device_id + 1); + + for (info_idx = 0; info_idx < ocl_devices_info_cnt; info_idx++) { + cl_device_info ocl_dev_info = ocl_devices_info[info_idx]; + + if (info_idx == 0) { + cl_device_type device_type; + + err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_device_type), &device_type, 0); + if (err != CL_SUCCESS) { + printf("Error: clGetDeviceInfo(device_type) failed (%d)\n", err); + free (ocl_devices); + free (*cd_ctx); + free (ocl_platforms); + return -10; + } + + if (device_type & CL_DEVICE_TYPE_GPU) (*cd_ctx)[platform_idx].device[device_idx].is_gpu = 1; + else if ((device_type & CL_DEVICE_TYPE_CPU) && (*cd_ctx)[platform_idx].is_pocl) { + (*cd_ctx)[platform_idx].device[device_idx].profile = (profile_selected > 1) ? 0 : profile_selected; + } + + if (verbose) printf("%14s: %s\n", "Device Type", (device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : (device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : "Other"); + + (*cd_ctx)[platform_idx].device[device_idx].selected = plat_dev_enabled(global_device_id, dev_sel, dev_cnt, (unsigned int) device_type, device_types_selected); + global_device_id++; + if ((*cd_ctx)[platform_idx].device[device_idx].selected) (*selected_devices_cnt)++; + continue; + } else if (info_idx == 5) { + cl_device_local_mem_type local_mem_type; + + err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_device_local_mem_type), &local_mem_type, 0); + if (err != CL_SUCCESS) { + printf("Error: clGetDeviceInfo(local_mem_type) failed (%d)\n", err); + free (ocl_devices); + free (*cd_ctx); + free (ocl_platforms); + return -10; + } + + if (local_mem_type == CL_LOCAL || local_mem_type == CL_GLOBAL) { + if (verbose) printf("%14s: %s\n", "Local Mem Type", (local_mem_type == CL_LOCAL) ? "Local" : "Global"); + if ((*cd_ctx)[platform_idx].is_apple) { + if (strncmp((*cd_ctx)[platform_idx].device[device_idx].vendor, "Intel", 5) != 0) { + (*cd_ctx)[platform_idx].device[device_idx].have_local_memory = true; + + if ((*cd_ctx)[platform_idx].device[device_idx].is_gpu) { + if (profile_selected > 2) (*cd_ctx)[platform_idx].device[device_idx].profile = PROFILE_DEFAULT; // Apple-Intel GPU's + } else { + if (profile_selected > 3) (*cd_ctx)[platform_idx].device[device_idx].profile = PROFILE_DEFAULT; // Apple-Intel CPU's + } + } + } else if ((*cd_ctx)[platform_idx].is_nv) { + (*cd_ctx)[platform_idx].device[device_idx].have_local_memory = true; + } + } else { + if (verbose) printf("%14s: None\n", "Local Mem Type"); + } + + if (verbose) printf("%14s: %s\n", "Local Mem Opt", ((*cd_ctx)[platform_idx].device[device_idx].have_local_memory) ? "yes" : "no"); + + continue; + } else if (info_idx == 6) { + size_t wis[3] = { 0 }; + err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(size_t) * 3, wis, 0); + if (err != CL_SUCCESS) { + printf("Error: clGetDeviceInfo(work_items_size) failed (%d)\n", err); + free (ocl_devices); + free (*cd_ctx); + free (ocl_platforms); + return -10; + } + + if (verbose) printf("%14s: (%zu,%zu,%zu)\n", "Max Work-Items", wis[0], wis[1], wis[2]); + +#if APPLE_GPU_BROKEN == 1 + if (wis[1] < GLOBAL_WS_1 && (*cd_ctx)[platform_idx].device[device_idx].is_apple_gpu) { + (*cd_ctx)[platform_idx].device[device_idx].unsupported = true; + } +#endif + continue; + } else if (info_idx == 7) { + cl_uint cores = 0; + err = clGetDeviceInfo(ocl_device, ocl_dev_info, sizeof(cl_uint), &cores, 0); + if (err != CL_SUCCESS) { + printf("Error: clGetDeviceInfo(compute_units) failed (%d)\n", err); + free (ocl_devices); + free (*cd_ctx); + free (ocl_platforms); + return -10; + } + + if (verbose) printf("%14s: %u\n", "Compute Units", cores); + + (*cd_ctx)[platform_idx].device[device_idx].compute_units = cores; + continue; + } + + tmp_len = 0; + tmp_buf = NULL; + + err = clGetDeviceInfo(ocl_device, ocl_dev_info, 0, NULL, &tmp_len); + if (err != CL_SUCCESS) { + printf("Error: clGetDeviceInfo(param size) failed (%d)\n", err); + free (ocl_devices); + free (*cd_ctx); + free (ocl_platforms); + return -10; + } + + if (tmp_len > 0) { + if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) { + printf("Error: calloc (ocl_dev_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno)); + free (ocl_devices); + free (*cd_ctx); + free (ocl_platforms); + return -7; + } + + err = clGetDeviceInfo(ocl_device, ocl_dev_info, tmp_len, tmp_buf, 0); + if (err != CL_SUCCESS) { + printf("Error: clGetDeviceInfo(param) failed (%d)\n", err); + free (tmp_buf); + free (ocl_devices); + free (*cd_ctx); + free (ocl_platforms); + return -10; + } + } else { + tmp_len = 4; + if (!(tmp_buf = (char *) calloc(tmp_len, sizeof(char)))) { + printf("Error: calloc (ocl_dev_info %u) failed (%d): %s\n", info_idx, errno, strerror(errno)); + free (ocl_devices); + free (*cd_ctx); + free (ocl_platforms); + return -7; + } + + strncpy(tmp_buf, "N/A\0", tmp_len); + } + + if (verbose) { + const char *tmp_dev_info_desc = (info_idx == 1) ? "Name" : (info_idx == 2) ? "Version" : (info_idx == 3) ? "Driver Version" : "Vendor"; + + printf("%14s: %s\n", tmp_dev_info_desc, tmp_buf); + } + + switch (info_idx) { + case 1: + strncpy((*cd_ctx)[platform_idx].device[device_idx].name, tmp_buf, tmp_len < 0xff ? tmp_len : 0xff - 1); + break; + case 2: + strncpy((*cd_ctx)[platform_idx].device[device_idx].version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1); + break; + case 3: + strncpy((*cd_ctx)[platform_idx].device[device_idx].driver_version, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1); + break; + case 4: + strncpy((*cd_ctx)[platform_idx].device[device_idx].vendor, tmp_buf, tmp_len < 0x40 ? tmp_len : 0x40 - 1); + break; + } + + if (info_idx == 1) { + // force profile to 0-1 with Jetson Nano + if (strstr(tmp_buf, "Tegra") && (*cd_ctx)[platform_idx].is_pocl) { + (*cd_ctx)[platform_idx].device[device_idx].profile = (profile_selected > 1) ? 0 : profile_selected; + } + } else if (info_idx == 4) { + if (!strncmp(tmp_buf, "Intel", 5)) { + if ((*cd_ctx)[platform_idx].is_apple) { + (*cd_ctx)[platform_idx].device[device_idx].is_apple_gpu = (*cd_ctx)[platform_idx].device[device_idx].is_gpu; + } + + // force profile to 0 with Intel GPU and 2 wih Intel CPU's + if ((*cd_ctx)[platform_idx].is_intel) { + if ((*cd_ctx)[platform_idx].device[device_idx].is_gpu) { + (*cd_ctx)[platform_idx].device[device_idx].profile = 0; // Intel GPU's, work better with a very slow profile + } else { + (*cd_ctx)[platform_idx].device[device_idx].profile = (profile_selected > 2) ? PROFILE_DEFAULT : profile_selected; // Intel CPU's + } + } + } + + if (!strncmp(tmp_buf, "NVIDIA", 6) && (*cd_ctx)[platform_idx].is_nv) { + unsigned int sm_maj = 0, sm_min = 0; + + err = clGetDeviceInfo(ocl_device, 0x4000, sizeof(unsigned int), &sm_maj, 0); + err |= clGetDeviceInfo(ocl_device, 0x4001, sizeof(unsigned int), &sm_min, 0); + + if (err != CL_SUCCESS) { + printf("Error: clGetDeviceInfo(sm_maj/sm_min) failed (%d)\n", err); + free (tmp_buf); + free (ocl_devices); + free (*cd_ctx); + free (ocl_platforms); + return -10; + } + + (*cd_ctx)[platform_idx].device[device_idx].sm_maj = sm_maj; + (*cd_ctx)[platform_idx].device[device_idx].sm_min = sm_min; + + if (verbose) printf("%14s: %u%u\n", "SM", sm_maj, sm_min); + + if (sm_maj >= 5) { // >= Maxwell + // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#logic-and-shift-instructions-lop3 + // Requires sm_50 or higher. + (*cd_ctx)[platform_idx].device[device_idx].have_lop3 = true; + } else { + (*cd_ctx)[platform_idx].device[device_idx].warning = true; + } + + (*cd_ctx)[platform_idx].device[device_idx].is_nv = true; + + if ((*cd_ctx)[platform_idx].device[device_idx].is_gpu) { + if (profile_selected > 10) { + // NVIDIA RTX 3090 perform better with 5 + (*cd_ctx)[platform_idx].device[device_idx].profile = (sm_maj >= 8) ? 5 : PROFILE_DEFAULT; + } + } + } else { + (*cd_ctx)[platform_idx].device[device_idx].warning = true; + } + } + + free (tmp_buf); + } + + if (!show && verbose) printf("%14s: %s\n", "Selected", ((*cd_ctx)[platform_idx].device[device_idx].selected) ? "yes" : "no"); + + if ((*cd_ctx)[platform_idx].device[device_idx].unsupported) { + printf("\n%14s: this device was not supported, beacuse of missing resources\n\n", "=====> Warning"); + continue; + } + + if ((*cd_ctx)[platform_idx].device[device_idx].warning) { + if (!show && verbose) printf("\n%14s: performance will not be optimal using this device\n\n", "=====> Warning"); + } + + (*cd_ctx)[platform_idx].device[device_idx].device_id = ocl_device; + } + free (ocl_devices); + ocl_devices = NULL; + } + + free (ocl_platforms); + ocl_platforms = NULL; + + *platform_detected_cnt = ocl_platform_cnt; + + if (show) free (*cd_ctx); + + return 0; +} + int runKernel(opencl_ctx_t *ctx, uint32_t cand_base, uint64_t *matches, uint32_t *matches_found, size_t id) { int err = 0; diff --git a/tools/hitag2crack/crack5opencl/opencl.h b/tools/hitag2crack/crack5opencl/opencl.h index 410a7cb5a..462dbbb4f 100644 --- a/tools/hitag2crack/crack5opencl/opencl.h +++ b/tools/hitag2crack/crack5opencl/opencl.h @@ -38,6 +38,7 @@ License: GNU General Public License v3 or any later version (see LICENSE.txt) #include #include +#include // max number of concurrent devices (tested up to 4x RTX 3090) #define MAX_OPENCL_DEVICES 16 @@ -54,9 +55,10 @@ typedef struct compute_device_ctx { bool warning, unsupported; bool selected; - bool enabled; - unsigned char pad1[4]; + unsigned char pad1[1]; + unsigned int profile; + unsigned int sm_maj; unsigned int sm_min; unsigned int compute_units; @@ -70,11 +72,11 @@ typedef struct compute_platform_ctx { unsigned int device_cnt; unsigned int compute_units_max; - bool is_nv, is_apple, is_intel; + bool is_nv, is_apple, is_intel, is_pocl; bool warning; bool selected; - unsigned char pad1[3]; + unsigned char pad1[2]; compute_device_ctx_t device[0x10]; char name[0xff]; @@ -94,7 +96,7 @@ typedef struct opencl_ctx { size_t *global_ws; size_t *local_ws; - int *profiles; + unsigned int *profiles; cl_device_id *device_ids; // compute device id's array cl_context *contexts; // compute contexts @@ -120,7 +122,8 @@ typedef struct opencl_ctx { } opencl_ctx_t; bool plat_dev_enabled(unsigned int id, unsigned int *sel, unsigned int cnt, unsigned int cur_type, unsigned int allow_type); - +unsigned int get_smallest_profile (compute_platform_ctx_t *cd_ctx, size_t ocl_platform_cnt); +int discoverDevices(unsigned int profile_selected, uint32_t device_types_selected, cl_uint *ocl_platform_cnt, size_t *selected_platforms_cnt, size_t *selected_devices_cnt, compute_platform_ctx_t **cd_ctx, unsigned int *plat_sel, unsigned int plat_cnt, unsigned int *dev_sel, unsigned int dev_cnt, bool verbose, bool show); int runKernel(opencl_ctx_t *ctx, uint32_t cand_base, uint64_t *matches, uint32_t *matches_found, size_t id); #endif // OPENCL_H diff --git a/tools/hitag2crack/crack5opencl/queue.c b/tools/hitag2crack/crack5opencl/queue.c index ee289f552..861387cb3 100644 --- a/tools/hitag2crack/crack5opencl/queue.c +++ b/tools/hitag2crack/crack5opencl/queue.c @@ -340,7 +340,7 @@ int wu_queue_pop(wu_queue_ctx_t *ctx, wu_queue_data_t *wu, short remove) { break; case QUEUE_TYPE_RANDOM: // from the head #if TEST_UNIT == 1 - fprintf(stdout, "pop id %ld\n", wu->id); + fprintf(stdout, "pop id %zu\n", wu->id); fflush(stdout); #endif if (ptrPrev == NULL) { @@ -441,9 +441,9 @@ int main(void) { wu_queue_type_t types[4] = { QUEUE_TYPE_FORWARD, QUEUE_TYPE_REVERSE, QUEUE_TYPE_RANDOM, 1234 }; int types_max = (int)(sizeof(types) / sizeof(wu_queue_type_t)); - int ret = 0; for (i = 0; i < types_max; i++) { + int ret = 0; printf("[%d] trying wu_queue_init() in %s mode\n", i, wu_queue_strdesc(types[i])); if ((ret = wu_queue_init(&ctx, types[i])) != 0) { diff --git a/tools/hitag2crack/crack5opencl/queue.h b/tools/hitag2crack/crack5opencl/queue.h index 80304b0d3..9fe3cf97d 100644 --- a/tools/hitag2crack/crack5opencl/queue.h +++ b/tools/hitag2crack/crack5opencl/queue.h @@ -95,7 +95,7 @@ typedef struct wu_queue_ctx { // mutex pthread_mutexattr_t queue_mutex_attr; -// unsigned char pad1[4]; + unsigned char pad1[4]; pthread_mutex_t queue_mutex; } wu_queue_ctx_t; diff --git a/tools/hitag2crack/crack5opencl/threads.c b/tools/hitag2crack/crack5opencl/threads.c index 5cb5a2b87..ff79a4c74 100644 --- a/tools/hitag2crack/crack5opencl/threads.c +++ b/tools/hitag2crack/crack5opencl/threads.c @@ -56,6 +56,8 @@ const char *thread_strerror(int error) { return (const char *) "GENERIC ERROR"; case THREAD_ERROR_ALLOC: return (const char *) "ALLOC FAILED"; + case THREAD_ERROR_INTERNAL: + return (const char *) "INTERNAL ERROR"; } return (const char *) "GENERIC"; @@ -174,6 +176,198 @@ int thread_init(thread_ctx_t *ctx, short type, size_t thread_count) { return 0; } +int thread_start_scheduler (thread_ctx_t *ctx, thread_args_t *t_arg, wu_queue_ctx_t *queue_ctx) +{ + size_t z = 0; + bool found = false; + bool done = false; + unsigned int th_cnt = 0; + + if (ctx->type == THREAD_TYPE_SEQ) { + bool error = false; + uint32_t slice = 0; + for (slice = 0; slice < t_arg[0].max_slices; slice += ctx->thread_count) { + int err = 0; + + if ((err = thread_start(ctx, t_arg)) != 0) { + printf("Error: thread_start() failed (%d): %s\n", err, thread_strerror(err)); + } + + // waiting threads return + if (err == 0) thread_stop(ctx); + + for (z = 0; z < ctx->thread_count; z++) { + if (t_arg[z].r) { + found = true; + break; + } + + if (t_arg[z].err) { + error = true; + } + } + + // internel err + if (error && err == 0) { + thread_destroy(ctx); + err = THREAD_ERROR_INTERNAL; + } + + if (err != 0) return err; + + if (found) break; + } + } else if (ctx->type == THREAD_TYPE_ASYNC) { + + // crack hitag key or die tryin' + do { // master + th_cnt = 0; + + for (z = 0; z < ctx->thread_count; z++) { +#if TDEBUG >= 1 && DEBUGME == 1 + if (ctx->thread_count == 1) { printf("[%zu] get status from thread ...\n", z); fflush(stdout); } +#endif + + pthread_mutex_lock(&ctx->thread_mutexs[z]); + thread_status_t cur_status = t_arg[z].status; + pthread_mutex_unlock(&ctx->thread_mutexs[z]); + +#if TDEBUG >= 1 && DEBUGME == 1 + if (ctx->thread_count == 1) { printf("[%zu] thread status: %s\n", z, thread_status_strdesc(cur_status)); fflush(stdout); } +#endif + if (found) { +#if TDEBUG >= 3 + printf("[%zu] Processing exit logic\n", z); + fflush(stdout); +#endif + + if (cur_status < TH_FOUND_KEY) { +#if TDEBUG >= 1 + printf("[%zu] key found from another thread, set quit\n", z); + fflush(stdout); +#endif + pthread_mutex_lock(&ctx->thread_mutexs[z]); + t_arg[z].status = TH_END; + t_arg[z].quit = true; + if (cur_status == TH_WAIT) pthread_cond_signal(&ctx->thread_conds[z]); + pthread_mutex_unlock(&ctx->thread_mutexs[z]); + } else { + if (ctx->thread_count == 1) { + th_cnt++; +#if TDEBUG >= 1 + printf("[%zu] Increment th_cnt: %u/%zu\n", z, th_cnt, ctx->thread_count); + fflush(stdout); +#endif + } + } + continue; + } + + if (cur_status == TH_WAIT) { + pthread_mutex_lock(&ctx->thread_mutexs[z]); + + if (found) { +#if TDEBUG >= 1 + printf("[%zu] key is found in another thread 1\n", z); + fflush(stdout); +#endif + t_arg[z].status = TH_END; + t_arg[z].quit = true; + pthread_mutex_unlock(&ctx->thread_mutexs[z]); + continue; + } + + if (wu_queue_done(queue_ctx) != QUEUE_EMPTY) { + t_arg[z].status = TH_PROCESSING; + +#if TDEBUG >= 1 + printf("[master] thread [%zu], I give you another try (%s)\n", z, thread_status_strdesc(t_arg[z].status)); + fflush(stdout); +#endif + + pthread_cond_signal(&ctx->thread_conds[z]); + pthread_mutex_unlock(&ctx->thread_mutexs[z]); + continue; + } else { +#if TDEBUG >= 1 + printf("[master] thread [%zu], max step reached. Quit.\n", z); + fflush(stdout); +#endif + + cur_status = t_arg[z].status = TH_END; + t_arg[z].quit = true; + + pthread_cond_signal(&ctx->thread_conds[z]); + pthread_mutex_unlock(&ctx->thread_mutexs[z]); + } + } + + if (cur_status == TH_PROCESSING) { + if (ctx->enable_condusleep) { +#if TDEBUG >= 1 + printf("[master] before pthread_cond_wait, TH_PROCESSING\n"); + fflush(stdout); +#endif + pthread_mutex_lock(&ctx->thread_mutex_usleep); +#if TDEBUG >= 1 + printf("[master] thread [%zu], I'm waiting you end of task, I'm in %s give me a signal.\n", z, thread_status_strdesc(t_arg[z].status)); + fflush(stdout); +#endif + pthread_cond_wait(&ctx->thread_cond_usleep, &ctx->thread_mutex_usleep); +#if TDEBUG >= 1 + printf("[master] thread [%zu], got the signal with new state: %s.\n", z, thread_status_strdesc(t_arg[z].status)); + fflush(stdout); +#endif + if (t_arg[z].status == TH_FOUND_KEY) found = true; + + pthread_mutex_unlock(&ctx->thread_mutex_usleep); +#if TDEBUG >= 1 + printf("[master] after pthread_cond_wait, TH_PROCESSING\n"); + fflush(stdout); +#endif + continue; + } + + if (found) { +#if TDEBUG >= 1 + printf("[master] thread [%zu], the key is found. set TH_END from TH_PROCESSING\n", z); + fflush(stdout); +#endif + pthread_mutex_lock(&ctx->thread_mutexs[z]); + t_arg[z].status = TH_END; + t_arg[z].quit = true; + pthread_mutex_unlock(&ctx->thread_mutexs[z]); + continue; + } + } + if (cur_status == TH_ERROR) { + // something went wrong + pthread_mutex_lock(&ctx->thread_mutexs[z]); + t_arg[z].status = TH_END; + t_arg[z].quit = true; + pthread_mutex_unlock(&ctx->thread_mutexs[z]); + continue; + } + + if (cur_status >= TH_FOUND_KEY) { + th_cnt++; + + if (cur_status == TH_FOUND_KEY) { + thread_setEnd(ctx, t_arg); + found = true; + done = true; + } + } + } + + if (th_cnt == ctx->thread_count) done = true; + + } while (!done); + } + + return (found) ? 0 : 1; +} + int thread_destroy(thread_ctx_t *ctx) { if (!ctx) return -1; if (!ctx->init) return -2; @@ -258,8 +452,6 @@ const char *thread_status_strdesc(thread_status_t s) { return (const char *) "PROCESSING"; case TH_ERROR: return (const char *) "ERROR"; - case TH_STOP: - return (const char *) "STOP"; case TH_FOUND_KEY: return (const char *) "FOUND_KEY"; case TH_END: @@ -274,11 +466,10 @@ bool thread_setEnd(thread_ctx_t *ctx, thread_args_t *t_arg) { size_t z; - int m_ret = 0; int c_ret = 0; for (z = 0; z < ctx->thread_count; z++) { - m_ret = pthread_mutex_lock(&ctx->thread_mutexs[z]); + int m_ret = pthread_mutex_lock(&ctx->thread_mutexs[z]); if (m_ret != 0) { tprintf("[%zu] [%s] Error: pthread_mutex_lock() failed (%d): %s\n", z, __func__, m_ret, strerror(m_ret)); } @@ -296,10 +487,10 @@ bool thread_setEnd(thread_ctx_t *ctx, thread_args_t *t_arg) { } #if DEBUGME > 0 - tprintf("[%zu] [%s] Set thread status to TH_STOP\n", z, __func__); + tprintf("[%zu] [%s] Set thread status to TH_END\n", z, __func__); #endif - t_arg[z].status = TH_STOP; + t_arg[z].status = TH_END; if (tmp == TH_WAIT) { #if DEBUGME > 0 @@ -366,7 +557,7 @@ void *computing_process(void *arg) { if (!ctx->force_hitag2_opencl) { #if DEBUGME >= 2 - printf("[slave][%zu] master, I found %5u candidates @ slice %zu\n", z, matches_found[0], a->slice + 1); + printf("[%s][%zu] master, I found %5u candidates @ slice %zu\n", __func__, z, matches_found[0], a->slice + 1); fflush(stdout); #endif @@ -378,7 +569,7 @@ void *computing_process(void *arg) { // the OpenCL kernel return only one key if found, else nothing #if TDEBUG >= 1 - printf("[slave][%zu] master, I found the key @ slice %zu\n", z, a->slice + 1); + printf("[%s][%zu] master, I found the key @ slice %zu\n", __func__, z, a->slice + 1); fflush(stdout); #endif @@ -400,32 +591,31 @@ void *computing_process_async(void *arg) { // fetching data from thread struct, I hope they are good thread_status_t status = a->status; + uint64_t *matches = a->matches; + uint32_t *matches_found = a->matches_found; uint32_t uid = a->uid; uint32_t aR2 = a->aR2; uint32_t nR1 = a->nR1; uint32_t nR2 = a->nR2; - uint64_t *matches = a->matches; - uint32_t *matches_found = a->matches_found; - size_t max_step = a->max_step; + size_t max_slices = a->max_slices; opencl_ctx_t *ctx = a->ocl_ctx; pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]); uint64_t off = 0; -// size_t slice = 0; int ret = 0; if (status == TH_START) { #if TDEBUG >= 1 - printf("[slave][%zu] plat id %d, uid %u, aR2 %u, nR1 %u, nR2 %u, Initial status: %s\n", z, ctx->id_platform, uid, aR2, nR1, nR2, thread_status_strdesc(status)); + printf("[%s][%zu] plat id %d, uid %u, aR2 %u, nR1 %u, nR2 %u, Initial status: %s\n", __func__, z, ctx->id_platform, uid, aR2, nR1, nR2, thread_status_strdesc(status)); #endif status = TH_WAIT; // proceed to next } - do { // slave + do { if (status == TH_WAIT) { pthread_mutex_lock(&a->thread_ctx->thread_mutexs[z]); @@ -433,7 +623,7 @@ void *computing_process_async(void *arg) { if (a->status == TH_END) { // other threads found the key fflush(stdout); - status = TH_END; + //status = TH_END; a->quit = true; pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]); pthread_exit(NULL); @@ -444,7 +634,7 @@ void *computing_process_async(void *arg) { pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep); pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond #if TDEBUG >= 1 - printf("[slate][%zu] after pthread_cond_signal TH_WAIT\n", z); + printf("[%s][%zu] after pthread_cond_signal TH_WAIT\n", __func__, z); fflush(stdout); #endif pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep); @@ -452,7 +642,7 @@ void *computing_process_async(void *arg) { } #if TDEBUG >= 1 - printf("[slave][%zu] master, i'm here to serve you. I'm in %s give me a signal.\n", z, thread_status_strdesc(status)); + printf("[%s][%zu] master, i'm here to serve you. I'm in %s give me a signal.\n", __func__, z, thread_status_strdesc(status)); fflush(stdout); #endif @@ -461,7 +651,7 @@ void *computing_process_async(void *arg) { status = a->status; // read new status from master #if TDEBUG >= 2 - printf("[slave][%zu] master, got the signal with new state: %s.\n", z, thread_status_strdesc(status)); + printf("[%s][%zu] master, got the signal with new state: %s.\n", __func__, z, thread_status_strdesc(status)); fflush(stdout); #endif @@ -469,7 +659,7 @@ void *computing_process_async(void *arg) { if (status == TH_WAIT) { #if TDEBUG >=1 - printf("[slave] ! Error: need to be TH_PROCESSING or TH_END, not TH_WAIT ... exit\n"); + printf("[%s] ! Error: need to be TH_PROCESSING or TH_END, not TH_WAIT ... exit\n", __func__); fflush(stdout); #endif break; @@ -478,7 +668,7 @@ void *computing_process_async(void *arg) { if (status == TH_ERROR) { #if TDEBUG >= 1 - printf("[slave][%zu] master, got error signal, proceed with exit\n", z); + printf("[%s][%zu] master, got error signal, proceed with exit\n", __func__, z); fflush(stdout); #endif pthread_exit(NULL); @@ -486,7 +676,7 @@ void *computing_process_async(void *arg) { if (status == TH_PROCESSING) { #if TDEBUG >= 2 - printf("[slave][%zu] master, got a work-unit, processing ...\n", z); + printf("[%s][%zu] master, got a work-unit, processing ...\n", __func__, z); fflush(stdout); #endif @@ -521,7 +711,7 @@ void *computing_process_async(void *arg) { a->status = TH_ERROR; pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]); #if TDEBUG >= 1 - printf("[slave][%zu] master, something is broken, exit\n", z); + printf("[%s][%zu] master, something is broken, exit\n", __func__, z); fflush(stdout); #endif @@ -529,7 +719,7 @@ void *computing_process_async(void *arg) { pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep); pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond #if TDEBUG >= 1 - printf("[slave][%zu] after pthread_cond_signal TH_ERROR\n", z); + printf("[%s][%zu] after pthread_cond_signal TH_ERROR\n", __func__, z); #endif pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep); } @@ -539,12 +729,12 @@ void *computing_process_async(void *arg) { } #if TDEBUG >= 1 - printf("[slave][%zu] master, process is done but no candidates found\n", z); + printf("[%s][%zu] master, process is done but no candidates found\n", __func__, z); fflush(stdout); #endif pthread_mutex_lock(&a->thread_ctx->thread_mutexs[z]); - if (a->slice >= max_step) a->status = TH_END; + if (a->slice >= max_slices) a->status = TH_END; else a->status = TH_WAIT; status = a->status; @@ -555,7 +745,7 @@ void *computing_process_async(void *arg) { pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep); pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond #if TDEBUG >= 1 - printf("[slave][%zu] after pthread_cond_signal TH_WAIT\n", z); + printf("[%s][%zu] after pthread_cond_signal TH_WAIT\n", __func__, z); fflush(stdout); #endif pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep); @@ -566,7 +756,7 @@ void *computing_process_async(void *arg) { if (!ctx->force_hitag2_opencl) { #if TDEBUG >= 1 - printf("[slave][%zu] master, we got %5u candidates. Proceed to validation\n", z, matches_found[0]); + printf("[%s][%zu] master, we got %5u candidates. Proceed to validation\n", __func__, z, matches_found[0]); fflush(stdout); #endif @@ -576,7 +766,7 @@ void *computing_process_async(void *arg) { a->status = TH_END; pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]); #if TDEBUG >= 1 - printf("[slave][%zu] master, Another thread found the key, quit 2 \n", z); + printf("[%s][%zu] master, Another thread found the key, quit 2 \n", __func__, z); fflush(stdout); #endif @@ -584,7 +774,8 @@ void *computing_process_async(void *arg) { pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep); pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond #if TDEBUG >= 1 - printf("[slave][%zu] after pthread_cond_signal TH_END\n", z); + printf("[%s][%zu] after pthread_cond_signal TH_END\n", __func__, z); + fflush (stdout); #endif pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep); } @@ -600,7 +791,7 @@ void *computing_process_async(void *arg) { a->quit = true; pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]); #if TDEBUG >= 1 - printf("[slave][%zu] master, I found the key ! state %" STR(OFF_FORMAT_U) ", slice %zu\n", z, a->s, a->slice + 1); + printf("[%s][%zu] master, I found the key ! state %" STR(OFF_FORMAT_U) ", slice %zu\n", __func__, z, a->s, a->slice + 1); fflush(stdout); #endif @@ -608,7 +799,7 @@ void *computing_process_async(void *arg) { pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep); pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond #if TDEBUG >= 1 - printf("[slave][%zu] after pthread_cond_signal TH_FOUND_KEY\n", z); + printf("[%s][%zu] after pthread_cond_signal TH_FOUND_KEY\n", __func__, z); #endif pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep); } @@ -622,7 +813,7 @@ void *computing_process_async(void *arg) { a->status = TH_END; pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]); #if TDEBUG >= 1 - printf("[slave][%zu] master, Another thread found the key, quit 1 \n", z); + printf("[%s][%zu] master, Another thread found the key, quit 1 \n", __func__, z); fflush(stdout); #endif @@ -630,7 +821,7 @@ void *computing_process_async(void *arg) { pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep); pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond #if TDEBUG >= 1 - printf("[slave][%zu] after pthread_cond_signal TH_END\n", z); + printf("[%s][%zu] after pthread_cond_signal TH_END\n", __func__, z); #endif pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep); } @@ -651,7 +842,7 @@ void *computing_process_async(void *arg) { a->quit = true; pthread_mutex_unlock(&a->thread_ctx->thread_mutexs[z]); #if TDEBUG >= 1 - printf("[slave][%zu] master, I found the key at slice %zu\n", z, a->slice + 1); + printf("[%s][%zu] master, I found the key at slice %zu\n", __func__, z, a->slice + 1); fflush(stdout); #endif @@ -659,7 +850,7 @@ void *computing_process_async(void *arg) { pthread_mutex_lock(&a->thread_ctx->thread_mutex_usleep); pthread_cond_signal(&a->thread_ctx->thread_cond_usleep); // unlock master/TH_PROCESSING cond #if TDEBUG >= 1 - printf("[slave][%zu] after pthread_cond_signal TH_FOUND_KEY\n", z); + printf("[%s][%zu] after pthread_cond_signal TH_FOUND_KEY\n", __func__, z); #endif pthread_mutex_unlock(&a->thread_ctx->thread_mutex_usleep); } @@ -671,10 +862,10 @@ void *computing_process_async(void *arg) { if (status >= TH_FOUND_KEY) { #if TDEBUG >= 1 if (status == TH_FOUND_KEY) { - printf("[slave][%zu] master, TH_FOUND_KEY, if you see this message, something is wrong\n", z); + printf("[%s][%zu] master, TH_FOUND_KEY, if you see this message, something is wrong\n", __func__, z); fflush(stdout); } else if (status == TH_END) { - printf("[slave][%zu] master, TH_END reached\n", z); + printf("[%s][%zu] master, TH_END reached\n", __func__, z); fflush(stdout); } #endif diff --git a/tools/hitag2crack/crack5opencl/threads.h b/tools/hitag2crack/crack5opencl/threads.h index d60c16cfc..15874a5a1 100644 --- a/tools/hitag2crack/crack5opencl/threads.h +++ b/tools/hitag2crack/crack5opencl/threads.h @@ -38,7 +38,6 @@ typedef enum thread_status { TH_START = 0, TH_WAIT, TH_PROCESSING, - TH_STOP, TH_ERROR, TH_FOUND_KEY, TH_END @@ -66,7 +65,8 @@ typedef enum thread_error { THREAD_ERROR_MUTEX_USLEEP = -11, THREAD_ERROR_COND_USLEEP = -12, THREAD_ERROR_GENERIC = -13, - THREAD_ERROR_ALLOC = -14 + THREAD_ERROR_ALLOC = -14, + THREAD_ERROR_INTERNAL = -15 } thread_error_t; @@ -92,6 +92,7 @@ typedef struct threads_ctx { pthread_attr_t attr; pthread_mutexattr_t mutex_attr; + unsigned char pad3[4]; } thread_ctx_t; // used by threads engine @@ -105,13 +106,13 @@ typedef struct thread_arg { bool r; bool err; bool quit; - bool async; + unsigned char pad2[1]; uint64_t off; uint64_t *matches; uint32_t *matches_found; size_t slice; - size_t max_step; + size_t max_slices; size_t device_id; uint64_t key; @@ -124,11 +125,12 @@ typedef struct thread_arg { int thread_init(thread_ctx_t *ctx, short type, size_t thread_count); int thread_start(thread_ctx_t *ctx, thread_args_t *args); int thread_stop(thread_ctx_t *ctx); +int thread_start_scheduler(thread_ctx_t *ctx, thread_args_t *t_arg, wu_queue_ctx_t *queue_ctx); +bool thread_setEnd(thread_ctx_t *ctx, thread_args_t *t_arg); void tprintf(const char *restrict format, ...); const char *thread_strerror(int error); const char *thread_status_strdesc(thread_status_t s); -bool thread_setEnd(thread_ctx_t *ctx, thread_args_t *t_arg); void *computing_process(void *arg); void *computing_process_async(void *arg);