mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-07-06 21:12:07 -07:00
MultiGPU training:
speed is significantly increased. fixed CUDNN_STREAM errors. Trainer: added key 'b' : creates a backup even if the autobackup is disabled.
This commit is contained in:
parent
a6d72b620d
commit
0251eb3490
4 changed files with 50 additions and 33 deletions
|
@ -68,6 +68,10 @@ def trainerThread (s2c, c2s, e,
|
||||||
model.save()
|
model.save()
|
||||||
shared_state['after_save'] = True
|
shared_state['after_save'] = True
|
||||||
|
|
||||||
|
def model_backup():
|
||||||
|
if not debug and not is_reached_goal:
|
||||||
|
model.create_backup()
|
||||||
|
|
||||||
def send_preview():
|
def send_preview():
|
||||||
if not debug:
|
if not debug:
|
||||||
previews = model.get_previews()
|
previews = model.get_previews()
|
||||||
|
@ -172,6 +176,8 @@ def trainerThread (s2c, c2s, e,
|
||||||
op = input['op']
|
op = input['op']
|
||||||
if op == 'save':
|
if op == 'save':
|
||||||
model_save()
|
model_save()
|
||||||
|
elif op == 'backup':
|
||||||
|
model_backup()
|
||||||
elif op == 'preview':
|
elif op == 'preview':
|
||||||
if is_reached_goal:
|
if is_reached_goal:
|
||||||
model.pass_one_iter()
|
model.pass_one_iter()
|
||||||
|
@ -277,7 +283,7 @@ def main(**kwargs):
|
||||||
|
|
||||||
# HEAD
|
# HEAD
|
||||||
head_lines = [
|
head_lines = [
|
||||||
'[s]:save [enter]:exit',
|
'[s]:save [b]:backup [enter]:exit',
|
||||||
'[p]:update [space]:next preview [l]:change history range',
|
'[p]:update [space]:next preview [l]:change history range',
|
||||||
'Preview: "%s" [%d/%d]' % (selected_preview_name,selected_preview+1, len(previews) )
|
'Preview: "%s" [%d/%d]' % (selected_preview_name,selected_preview+1, len(previews) )
|
||||||
]
|
]
|
||||||
|
@ -314,6 +320,8 @@ def main(**kwargs):
|
||||||
s2c.put ( {'op': 'close'} )
|
s2c.put ( {'op': 'close'} )
|
||||||
elif key == ord('s'):
|
elif key == ord('s'):
|
||||||
s2c.put ( {'op': 'save'} )
|
s2c.put ( {'op': 'save'} )
|
||||||
|
elif key == ord('b'):
|
||||||
|
s2c.put ( {'op': 'backup'} )
|
||||||
elif key == ord('p'):
|
elif key == ord('p'):
|
||||||
if not is_waiting_preview:
|
if not is_waiting_preview:
|
||||||
is_waiting_preview = True
|
is_waiting_preview = True
|
||||||
|
|
|
@ -345,8 +345,7 @@ class ModelBase(object):
|
||||||
return self.onGetPreview (self.sample_for_preview)[0][1] #first preview, and bgr
|
return self.onGetPreview (self.sample_for_preview)[0][1] #first preview, and bgr
|
||||||
|
|
||||||
def save(self):
|
def save(self):
|
||||||
summary_path = self.get_strpath_storage_for_file('summary.txt')
|
Path( self.get_summary_path() ).write_text( self.get_summary_text() )
|
||||||
Path( summary_path ).write_text( self.get_summary_text() )
|
|
||||||
|
|
||||||
self.onSave()
|
self.onSave()
|
||||||
|
|
||||||
|
@ -360,12 +359,19 @@ class ModelBase(object):
|
||||||
pathex.write_bytes_safe (self.model_data_path, pickle.dumps(model_data) )
|
pathex.write_bytes_safe (self.model_data_path, pickle.dumps(model_data) )
|
||||||
|
|
||||||
if self.autobackup:
|
if self.autobackup:
|
||||||
bckp_filename_list = [ self.get_strpath_storage_for_file(filename) for _, filename in self.get_model_filename_list() ]
|
|
||||||
bckp_filename_list += [ str(summary_path), str(self.model_data_path) ]
|
|
||||||
|
|
||||||
current_hour = time.localtime().tm_hour
|
current_hour = time.localtime().tm_hour
|
||||||
if self.autobackup_current_hour != current_hour:
|
if self.autobackup_current_hour != current_hour:
|
||||||
self.autobackup_current_hour = current_hour
|
self.autobackup_current_hour = current_hour
|
||||||
|
self.create_backup()
|
||||||
|
|
||||||
|
def create_backup(self):
|
||||||
|
io.log_info ("Creating backup...", end='\r')
|
||||||
|
|
||||||
|
if not self.autobackups_path.exists():
|
||||||
|
self.autobackups_path.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
bckp_filename_list = [ self.get_strpath_storage_for_file(filename) for _, filename in self.get_model_filename_list() ]
|
||||||
|
bckp_filename_list += [ str(self.get_summary_path()), str(self.model_data_path) ]
|
||||||
|
|
||||||
for i in range(15,0,-1):
|
for i in range(15,0,-1):
|
||||||
idx_str = '%.2d' % i
|
idx_str = '%.2d' % i
|
||||||
|
@ -479,6 +485,9 @@ class ModelBase(object):
|
||||||
def get_strpath_storage_for_file(self, filename):
|
def get_strpath_storage_for_file(self, filename):
|
||||||
return str( self.saved_models_path / ( self.get_model_name() + '_' + filename) )
|
return str( self.saved_models_path / ( self.get_model_name() + '_' + filename) )
|
||||||
|
|
||||||
|
def get_summary_path(self):
|
||||||
|
return self.get_strpath_storage_for_file('summary.txt')
|
||||||
|
|
||||||
def get_summary_text(self):
|
def get_summary_text(self):
|
||||||
###Generate text summary of model hyperparameters
|
###Generate text summary of model hyperparameters
|
||||||
#Find the longest key name and value string. Used as column widths.
|
#Find the longest key name and value string. Used as column widths.
|
||||||
|
|
|
@ -163,7 +163,7 @@ class QModel(ModelBase):
|
||||||
|
|
||||||
masked_training = True
|
masked_training = True
|
||||||
|
|
||||||
models_opt_on_gpu = len(devices) == 1 and devices[0].total_mem_gb >= 4
|
models_opt_on_gpu = len(devices) >= 1 and all([dev.total_mem_gb >= 2 for dev in devices])
|
||||||
models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
|
models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
|
||||||
optimizer_vars_on_cpu = models_opt_device=='/CPU:0'
|
optimizer_vars_on_cpu = models_opt_device=='/CPU:0'
|
||||||
|
|
||||||
|
|
|
@ -349,7 +349,7 @@ class SAEHDModel(ModelBase):
|
||||||
|
|
||||||
masked_training = True
|
masked_training = True
|
||||||
|
|
||||||
models_opt_on_gpu = False if len(devices) != 1 else self.options['models_opt_on_gpu']
|
models_opt_on_gpu = False if len(devices) == 0 else self.options['models_opt_on_gpu']
|
||||||
models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
|
models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
|
||||||
optimizer_vars_on_cpu = models_opt_device=='/CPU:0'
|
optimizer_vars_on_cpu = models_opt_device=='/CPU:0'
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue