diff --git a/mainscripts/Trainer.py b/mainscripts/Trainer.py index 3305adf..b8d62d3 100644 --- a/mainscripts/Trainer.py +++ b/mainscripts/Trainer.py @@ -67,6 +67,10 @@ def trainerThread (s2c, c2s, e, io.log_info ("Saving....", end='\r') model.save() shared_state['after_save'] = True + + def model_backup(): + if not debug and not is_reached_goal: + model.create_backup() def send_preview(): if not debug: @@ -172,6 +176,8 @@ def trainerThread (s2c, c2s, e, op = input['op'] if op == 'save': model_save() + elif op == 'backup': + model_backup() elif op == 'preview': if is_reached_goal: model.pass_one_iter() @@ -277,7 +283,7 @@ def main(**kwargs): # HEAD head_lines = [ - '[s]:save [enter]:exit', + '[s]:save [b]:backup [enter]:exit', '[p]:update [space]:next preview [l]:change history range', 'Preview: "%s" [%d/%d]' % (selected_preview_name,selected_preview+1, len(previews) ) ] @@ -314,6 +320,8 @@ def main(**kwargs): s2c.put ( {'op': 'close'} ) elif key == ord('s'): s2c.put ( {'op': 'save'} ) + elif key == ord('b'): + s2c.put ( {'op': 'backup'} ) elif key == ord('p'): if not is_waiting_preview: is_waiting_preview = True diff --git a/models/ModelBase.py b/models/ModelBase.py index ee06f00..1c2eccf 100644 --- a/models/ModelBase.py +++ b/models/ModelBase.py @@ -345,8 +345,7 @@ class ModelBase(object): return self.onGetPreview (self.sample_for_preview)[0][1] #first preview, and bgr def save(self): - summary_path = self.get_strpath_storage_for_file('summary.txt') - Path( summary_path ).write_text( self.get_summary_text() ) + Path( self.get_summary_path() ).write_text( self.get_summary_text() ) self.onSave() @@ -360,42 +359,49 @@ class ModelBase(object): pathex.write_bytes_safe (self.model_data_path, pickle.dumps(model_data) ) if self.autobackup: - bckp_filename_list = [ self.get_strpath_storage_for_file(filename) for _, filename in self.get_model_filename_list() ] - bckp_filename_list += [ str(summary_path), str(self.model_data_path) ] - current_hour = time.localtime().tm_hour if self.autobackup_current_hour != current_hour: self.autobackup_current_hour = current_hour + self.create_backup() + + def create_backup(self): + io.log_info ("Creating backup...", end='\r') + + if not self.autobackups_path.exists(): + self.autobackups_path.mkdir(exist_ok=True) + + bckp_filename_list = [ self.get_strpath_storage_for_file(filename) for _, filename in self.get_model_filename_list() ] + bckp_filename_list += [ str(self.get_summary_path()), str(self.model_data_path) ] + + for i in range(15,0,-1): + idx_str = '%.2d' % i + next_idx_str = '%.2d' % (i+1) - for i in range(15,0,-1): - idx_str = '%.2d' % i - next_idx_str = '%.2d' % (i+1) + idx_backup_path = self.autobackups_path / idx_str + next_idx_packup_path = self.autobackups_path / next_idx_str - idx_backup_path = self.autobackups_path / idx_str - next_idx_packup_path = self.autobackups_path / next_idx_str + if idx_backup_path.exists(): + if i == 15: + pathex.delete_all_files(idx_backup_path) + else: + next_idx_packup_path.mkdir(exist_ok=True) + pathex.move_all_files (idx_backup_path, next_idx_packup_path) - if idx_backup_path.exists(): - if i == 15: - pathex.delete_all_files(idx_backup_path) - else: - next_idx_packup_path.mkdir(exist_ok=True) - pathex.move_all_files (idx_backup_path, next_idx_packup_path) + if i == 1: + idx_backup_path.mkdir(exist_ok=True) + for filename in bckp_filename_list: + shutil.copy ( str(filename), str(idx_backup_path / Path(filename).name) ) - if i == 1: - idx_backup_path.mkdir(exist_ok=True) - for filename in bckp_filename_list: - shutil.copy ( str(filename), str(idx_backup_path / Path(filename).name) ) + previews = self.get_previews() + plist = [] + for i in range(len(previews)): + name, bgr = previews[i] + plist += [ (bgr, idx_backup_path / ( ('preview_%s.jpg') % (name)) ) ] - previews = self.get_previews() - plist = [] - for i in range(len(previews)): - name, bgr = previews[i] - plist += [ (bgr, idx_backup_path / ( ('preview_%s.jpg') % (name)) ) ] - - for preview, filepath in plist: - preview_lh = ModelBase.get_loss_history_preview(self.loss_history, self.iter, preview.shape[1], preview.shape[2]) - img = (np.concatenate ( [preview_lh, preview], axis=0 ) * 255).astype(np.uint8) - cv2_imwrite (filepath, img ) + for preview, filepath in plist: + preview_lh = ModelBase.get_loss_history_preview(self.loss_history, self.iter, preview.shape[1], preview.shape[2]) + img = (np.concatenate ( [preview_lh, preview], axis=0 ) * 255).astype(np.uint8) + cv2_imwrite (filepath, img ) def debug_one_iter(self): images = [] @@ -479,6 +485,9 @@ class ModelBase(object): def get_strpath_storage_for_file(self, filename): return str( self.saved_models_path / ( self.get_model_name() + '_' + filename) ) + def get_summary_path(self): + return self.get_strpath_storage_for_file('summary.txt') + def get_summary_text(self): ###Generate text summary of model hyperparameters #Find the longest key name and value string. Used as column widths. diff --git a/models/Model_Quick96/Model.py b/models/Model_Quick96/Model.py index 3be6c53..1d430d5 100644 --- a/models/Model_Quick96/Model.py +++ b/models/Model_Quick96/Model.py @@ -163,7 +163,7 @@ class QModel(ModelBase): masked_training = True - models_opt_on_gpu = len(devices) == 1 and devices[0].total_mem_gb >= 4 + models_opt_on_gpu = len(devices) >= 1 and all([dev.total_mem_gb >= 2 for dev in devices]) models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0' optimizer_vars_on_cpu = models_opt_device=='/CPU:0' diff --git a/models/Model_SAEHD/Model.py b/models/Model_SAEHD/Model.py index d84f83a..cc2c051 100644 --- a/models/Model_SAEHD/Model.py +++ b/models/Model_SAEHD/Model.py @@ -349,7 +349,7 @@ class SAEHDModel(ModelBase): masked_training = True - models_opt_on_gpu = False if len(devices) != 1 else self.options['models_opt_on_gpu'] + models_opt_on_gpu = False if len(devices) == 0 else self.options['models_opt_on_gpu'] models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0' optimizer_vars_on_cpu = models_opt_device=='/CPU:0'