Colab: added ability to run programs after N secs in trainer

This commit is contained in:
iperov 2019-03-26 19:06:19 +04:00
parent 009af42617
commit fedeee4395
2 changed files with 16 additions and 0 deletions

View file

@ -85,6 +85,7 @@ if __name__ == "__main__":
'model_name' : arguments.model_name, 'model_name' : arguments.model_name,
'no_preview' : arguments.no_preview, 'no_preview' : arguments.no_preview,
'debug' : arguments.debug, 'debug' : arguments.debug,
'execute_programs' : [ [int(x[0]), x[1] ] for x in arguments.execute_program]
} }
device_args = {'cpu_only' : arguments.cpu_only, device_args = {'cpu_only' : arguments.cpu_only,
'force_gpu_idx' : arguments.force_gpu_idx, 'force_gpu_idx' : arguments.force_gpu_idx,
@ -101,6 +102,7 @@ if __name__ == "__main__":
p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug samples.") p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug samples.")
p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.") p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.")
p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.") p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.")
p.add_argument('--execute-program', dest="execute_program", action='append', nargs='+')
p.set_defaults (func=process_train) p.set_defaults (func=process_train)
def process_convert(arguments): def process_convert(arguments):

View file

@ -15,12 +15,15 @@ from interact import interact as io
def trainerThread (s2c, c2s, args, device_args): def trainerThread (s2c, c2s, args, device_args):
while True: while True:
try: try:
start_time = time.time()
training_data_src_path = Path( args.get('training_data_src_dir', '') ) training_data_src_path = Path( args.get('training_data_src_dir', '') )
training_data_dst_path = Path( args.get('training_data_dst_dir', '') ) training_data_dst_path = Path( args.get('training_data_dst_dir', '') )
model_path = Path( args.get('model_path', '') ) model_path = Path( args.get('model_path', '') )
model_name = args.get('model_name', '') model_name = args.get('model_name', '')
save_interval_min = 15 save_interval_min = 15
debug = args.get('debug', '') debug = args.get('debug', '')
execute_programs = args.get('execute_programs', [])
if not training_data_src_path.exists(): if not training_data_src_path.exists():
io.log_err('Training data src directory does not exist.') io.log_err('Training data src directory does not exist.')
@ -75,6 +78,17 @@ def trainerThread (s2c, c2s, args, device_args):
for i in itertools.count(0,1): for i in itertools.count(0,1):
if not debug: if not debug:
cur_time = time.time()
for x in execute_programs:
prog_time, prog = x
if prog_time != 0 and (cur_time - start_time) >= prog_time:
x[0] = 0
try:
exec(prog)
except Exception as e:
print("Unable to execute program: %s" % (prog) )
if not is_reached_goal: if not is_reached_goal:
iter, iter_time = model.train_one_iter() iter, iter_time = model.train_one_iter()