MultiGPU training:

speed is significantly increased.
fixed CUDNN_STREAM errors.

Trainer: added key 'b' : creates a backup even if the autobackup is disabled.
This commit is contained in:
Colombo 2020-01-29 10:55:51 +04:00
parent a6d72b620d
commit 0251eb3490
4 changed files with 50 additions and 33 deletions

View file

@ -163,7 +163,7 @@ class QModel(ModelBase):
masked_training = True
models_opt_on_gpu = len(devices) == 1 and devices[0].total_mem_gb >= 4
models_opt_on_gpu = len(devices) >= 1 and all([dev.total_mem_gb >= 2 for dev in devices])
models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
optimizer_vars_on_cpu = models_opt_device=='/CPU:0'