From fedeee43952e5f33a017ccfc1e9becf18522badc Mon Sep 17 00:00:00 2001 From: iperov Date: Tue, 26 Mar 2019 19:06:19 +0400 Subject: [PATCH] Colab: added ability to run programs after N secs in trainer --- main.py | 2 ++ mainscripts/Trainer.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/main.py b/main.py index 676b485..88ea0ae 100644 --- a/main.py +++ b/main.py @@ -85,6 +85,7 @@ if __name__ == "__main__": 'model_name' : arguments.model_name, 'no_preview' : arguments.no_preview, 'debug' : arguments.debug, + 'execute_programs' : [ [int(x[0]), x[1] ] for x in arguments.execute_program] } device_args = {'cpu_only' : arguments.cpu_only, 'force_gpu_idx' : arguments.force_gpu_idx, @@ -101,6 +102,7 @@ if __name__ == "__main__": p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug samples.") p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.") p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.") + p.add_argument('--execute-program', dest="execute_program", action='append', nargs='+') p.set_defaults (func=process_train) def process_convert(arguments): diff --git a/mainscripts/Trainer.py b/mainscripts/Trainer.py index e023c26..3f86f38 100644 --- a/mainscripts/Trainer.py +++ b/mainscripts/Trainer.py @@ -15,12 +15,15 @@ from interact import interact as io def trainerThread (s2c, c2s, args, device_args): while True: try: + start_time = time.time() + training_data_src_path = Path( args.get('training_data_src_dir', '') ) training_data_dst_path = Path( args.get('training_data_dst_dir', '') ) model_path = Path( args.get('model_path', '') ) model_name = args.get('model_name', '') save_interval_min = 15 debug = args.get('debug', '') + execute_programs = args.get('execute_programs', []) if not training_data_src_path.exists(): io.log_err('Training data src directory does not exist.') @@ -75,6 +78,17 @@ def trainerThread (s2c, c2s, args, device_args): for i in itertools.count(0,1): if not debug: + cur_time = time.time() + + for x in execute_programs: + prog_time, prog = x + if prog_time != 0 and (cur_time - start_time) >= prog_time: + x[0] = 0 + try: + exec(prog) + except Exception as e: + print("Unable to execute program: %s" % (prog) ) + if not is_reached_goal: iter, iter_time = model.train_one_iter()