added --cpu-only mode for all stages. Upd readme. Added requirements-cpu.txt

This commit is contained in:
iperov 2018-12-21 16:54:22 +04:00
parent d04e8b1d91
commit 0d7387165a
6 changed files with 91 additions and 35 deletions

View file

@ -14,6 +14,8 @@ Based on original FaceSwap repo. **Facesets** of FaceSwap or FakeApp are **not c
- automatic GPU manager, chooses best gpu(s) and supports --multi-gpu (only for identical cards). Warning: dont use cards in SLI mode. - automatic GPU manager, chooses best gpu(s) and supports --multi-gpu (only for identical cards). Warning: dont use cards in SLI mode.
- cpu mode. 8th gen Intel core CPU able to train H64 model in 2 days.
- new preview window - new preview window
- extractor in parallel - extractor in parallel
@ -156,9 +158,15 @@ dlib==19.10.0 from pip compiled without CUDA. Therefore you have to compile DLIB
Command line example for windows: `python setup.py install -G "Visual Studio 14 2015" --yes DLIB_USE_CUDA` Command line example for windows: `python setup.py install -G "Visual Studio 14 2015" --yes DLIB_USE_CUDA`
### **CPU only mode**
CPU mode enabled by arg --cpu-only for all stages. Follow requirements-cpu.txt to install req packages.
Do not use DLIB extractor in CPU mode, its too slow.
Only H64 model reasonable to train on home CPU.
### Prebuilt windows app: ### Prebuilt windows app:
Windows 7,8,8.1,10 zero dependency (just install/update your GeForce Drivers) prebuilt DeepFaceLab can be downloaded from Windows 7,8,8.1,10 zero dependency (just install/update your GeForce Drivers) prebuilt DeepFaceLab (include GPU and CPU versions) can be downloaded from
1) torrent https://rutracker.org/forum/viewtopic.php?p=75318742 (magnet link inside). 1) torrent https://rutracker.org/forum/viewtopic.php?p=75318742 (magnet link inside).
2) https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg 2) https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg

View file

@ -8,13 +8,14 @@ from .pynvml import *
dlib_module = None dlib_module = None
def import_dlib(device_idx): def import_dlib(device_idx, cpu_only=False):
global dlib_module global dlib_module
if dlib_module is not None: if dlib_module is not None:
raise Exception ('Multiple import of dlib is not allowed, reorganize your program.') raise Exception ('Multiple import of dlib is not allowed, reorganize your program.')
import dlib import dlib
dlib_module = dlib dlib_module = dlib
if not cpu_only:
dlib_module.cuda.set_device(device_idx) dlib_module.cuda.set_device(device_idx)
return dlib_module return dlib_module
@ -152,6 +153,14 @@ def finalize_keras_vggface():
global keras_vggface_module global keras_vggface_module
keras_vggface_module = None keras_vggface_module = None
def hasNVML():
try:
nvmlInit()
nvmlShutdown()
except:
return False
return True
#returns [ (device_idx, device_name), ... ] #returns [ (device_idx, device_name), ... ]
def getDevicesWithAtLeastFreeMemory(freememsize): def getDevicesWithAtLeastFreeMemory(freememsize):
result = [] result = []
@ -279,6 +288,8 @@ class GPUConfig():
allow_growth = True, allow_growth = True,
cpu_only = False, cpu_only = False,
**in_options): **in_options):
if not hasNVML():
cpu_only = True
if cpu_only: if cpu_only:
self.cpu_only = cpu_only self.cpu_only = cpu_only

13
main.py
View file

@ -37,8 +37,10 @@ if __name__ == "__main__":
face_type=arguments.face_type, face_type=arguments.face_type,
detector=arguments.detector, detector=arguments.detector,
multi_gpu=arguments.multi_gpu, multi_gpu=arguments.multi_gpu,
cpu_only=arguments.cpu_only,
manual_fix=arguments.manual_fix, manual_fix=arguments.manual_fix,
manual_window_size=arguments.manual_window_size) manual_window_size=arguments.manual_window_size
)
extract_parser = subparsers.add_parser( "extract", help="Extract the faces from a pictures.") extract_parser = subparsers.add_parser( "extract", help="Extract the faces from a pictures.")
extract_parser.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") extract_parser.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.")
@ -49,6 +51,8 @@ if __name__ == "__main__":
extract_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") extract_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.")
extract_parser.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.") extract_parser.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.")
extract_parser.add_argument('--manual-window-size', type=int, dest="manual_window_size", default=0, help="Manual fix window size. Example: 1368. Default: frame size.") extract_parser.add_argument('--manual-window-size', type=int, dest="manual_window_size", default=0, help="Manual fix window size. Example: 1368. Default: frame size.")
extract_parser.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU. Forces to use MT extractor.")
extract_parser.set_defaults (func=process_extract) extract_parser.set_defaults (func=process_extract)
@ -85,6 +89,7 @@ if __name__ == "__main__":
force_best_gpu_idx = arguments.force_best_gpu_idx, force_best_gpu_idx = arguments.force_best_gpu_idx,
multi_gpu = arguments.multi_gpu, multi_gpu = arguments.multi_gpu,
force_gpu_idxs = arguments.force_gpu_idxs, force_gpu_idxs = arguments.force_gpu_idxs,
cpu_only = arguments.cpu_only
) )
train_parser = subparsers.add_parser( "train", help="Trainer") train_parser = subparsers.add_parser( "train", help="Trainer")
@ -101,6 +106,8 @@ if __name__ == "__main__":
train_parser.add_argument('--force-best-gpu-idx', type=int, dest="force_best_gpu_idx", default=-1, help="Force to choose this GPU idx as best(worst).") train_parser.add_argument('--force-best-gpu-idx', type=int, dest="force_best_gpu_idx", default=-1, help="Force to choose this GPU idx as best(worst).")
train_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="MultiGPU option. It will select only same best(worst) GPU models.") train_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="MultiGPU option. It will select only same best(worst) GPU models.")
train_parser.add_argument('--force-gpu-idxs', type=str, dest="force_gpu_idxs", default=None, help="Override final GPU idxs. Example: 0,1,2.") train_parser.add_argument('--force-gpu-idxs', type=str, dest="force_gpu_idxs", default=None, help="Override final GPU idxs. Example: 0,1,2.")
train_parser.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.")
train_parser.set_defaults (func=process_train) train_parser.set_defaults (func=process_train)
def process_convert(arguments): def process_convert(arguments):
@ -197,7 +204,8 @@ if __name__ == "__main__":
final_image_color_degrade_power = arguments.final_image_color_degrade_power, final_image_color_degrade_power = arguments.final_image_color_degrade_power,
transfercolor = arguments.transfercolor, transfercolor = arguments.transfercolor,
alpha = arguments.alpha, alpha = arguments.alpha,
force_best_gpu_idx = arguments.force_best_gpu_idx force_best_gpu_idx = arguments.force_best_gpu_idx,
cpu_only = arguments.cpu_only
) )
convert_parser = subparsers.add_parser( "convert", help="Converter") convert_parser = subparsers.add_parser( "convert", help="Converter")
@ -220,6 +228,7 @@ if __name__ == "__main__":
convert_parser.add_argument('--alpha', action="store_true", dest="alpha", default=False, help="Embeds alpha channel of face mask to final PNG. Used in manual composing video by editors such as Sony Vegas or After Effects.") convert_parser.add_argument('--alpha', action="store_true", dest="alpha", default=False, help="Embeds alpha channel of face mask to final PNG. Used in manual composing video by editors such as Sony Vegas or After Effects.")
convert_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug converter.") convert_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug converter.")
convert_parser.add_argument('--force-best-gpu-idx', type=int, dest="force_best_gpu_idx", default=-1, help="Force to choose this GPU idx as best.") convert_parser.add_argument('--force-best-gpu-idx', type=int, dest="force_best_gpu_idx", default=-1, help="Force to choose this GPU idx as best.")
convert_parser.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Convert on CPU.")
convert_parser.set_defaults(func=process_convert) convert_parser.set_defaults(func=process_convert)

View file

@ -18,13 +18,14 @@ from utils.SubprocessorBase import SubprocessorBase
class ExtractSubprocessor(SubprocessorBase): class ExtractSubprocessor(SubprocessorBase):
#override #override
def __init__(self, input_data, type, image_size, face_type, debug, multi_gpu=False, manual=False, manual_window_size=0, detector=None, output_path=None ): def __init__(self, input_data, type, image_size, face_type, debug, multi_gpu=False, cpu_only=False, manual=False, manual_window_size=0, detector=None, output_path=None ):
self.input_data = input_data self.input_data = input_data
self.type = type self.type = type
self.image_size = image_size self.image_size = image_size
self.face_type = face_type self.face_type = face_type
self.debug = debug self.debug = debug
self.multi_gpu = multi_gpu self.multi_gpu = multi_gpu
self.cpu_only = cpu_only
self.detector = detector self.detector = detector
self.output_path = output_path self.output_path = output_path
self.manual = manual self.manual = manual
@ -59,8 +60,10 @@ class ExtractSubprocessor(SubprocessorBase):
cv2.setMouseCallback(self.wnd_name, onMouse, self.param) cv2.setMouseCallback(self.wnd_name, onMouse, self.param)
def get_devices_for_type (self, type, multi_gpu): def get_devices_for_type (self, type, multi_gpu, cpu_only):
if (type == 'rects' or type == 'landmarks'): if cpu_only:
devices = [ (0, 'CPU', 0 ) ]
elif (type == 'rects' or type == 'landmarks'):
if not multi_gpu: if not multi_gpu:
devices = [gpufmkmgr.getBestDeviceIdx()] devices = [gpufmkmgr.getBestDeviceIdx()]
else: else:
@ -74,9 +77,12 @@ class ExtractSubprocessor(SubprocessorBase):
#override #override
def process_info_generator(self): def process_info_generator(self):
for (device_idx, device_name, device_total_vram_gb) in self.get_devices_for_type(self.type, self.multi_gpu): for (device_idx, device_name, device_total_vram_gb) in self.get_devices_for_type(self.type, self.multi_gpu, self.cpu_only):
num_processes = 1 num_processes = 1
if not self.manual and self.type == 'rects' and self.detector == 'mt': if not self.manual and self.type == 'rects' and self.detector == 'mt':
if self.cpu_only:
num_processes = int ( max (1, multiprocessing.cpu_count() / 2 ) )
else:
num_processes = int ( max (1, device_total_vram_gb / 2) ) num_processes = int ( max (1, device_total_vram_gb / 2) )
for i in range(0, num_processes ): for i in range(0, num_processes ):
@ -84,6 +90,7 @@ class ExtractSubprocessor(SubprocessorBase):
yield device_name_for_process, {}, {'type' : self.type, yield device_name_for_process, {}, {'type' : self.type,
'device_idx' : device_idx, 'device_idx' : device_idx,
'device_name' : device_name_for_process, 'device_name' : device_name_for_process,
'device_type' : 'CPU' if self.cpu_only else 'GPU',
'image_size': self.image_size, 'image_size': self.image_size,
'face_type': self.face_type, 'face_type': self.face_type,
'debug': self.debug, 'debug': self.debug,
@ -229,6 +236,7 @@ class ExtractSubprocessor(SubprocessorBase):
self.image_size = client_dict['image_size'] self.image_size = client_dict['image_size']
self.face_type = client_dict['face_type'] self.face_type = client_dict['face_type']
self.device_idx = client_dict['device_idx'] self.device_idx = client_dict['device_idx']
self.cpu_only = client_dict['device_type'] == 'CPU'
self.output_path = Path(client_dict['output_dir']) if 'output_dir' in client_dict.keys() else None self.output_path = Path(client_dict['output_dir']) if 'output_dir' in client_dict.keys() else None
self.debug = client_dict['debug'] self.debug = client_dict['debug']
self.detector = client_dict['detector'] self.detector = client_dict['detector']
@ -242,18 +250,18 @@ class ExtractSubprocessor(SubprocessorBase):
if self.detector is not None: if self.detector is not None:
if self.detector == 'mt': if self.detector == 'mt':
self.gpu_config = gpufmkmgr.GPUConfig ( force_best_gpu_idx=self.device_idx, allow_growth=True) self.gpu_config = gpufmkmgr.GPUConfig ( cpu_only=self.cpu_only, force_best_gpu_idx=self.device_idx, allow_growth=True)
self.tf = gpufmkmgr.import_tf ( self.gpu_config ) self.tf = gpufmkmgr.import_tf ( self.gpu_config )
self.tf_session = gpufmkmgr.get_tf_session() self.tf_session = gpufmkmgr.get_tf_session()
self.keras = gpufmkmgr.import_keras() self.keras = gpufmkmgr.import_keras()
self.e = facelib.MTCExtractor(self.keras, self.tf, self.tf_session) self.e = facelib.MTCExtractor(self.keras, self.tf, self.tf_session)
elif self.detector == 'dlib': elif self.detector == 'dlib':
self.dlib = gpufmkmgr.import_dlib( self.device_idx ) self.dlib = gpufmkmgr.import_dlib( self.device_idx, cpu_only=self.cpu_only )
self.e = facelib.DLIBExtractor(self.dlib) self.e = facelib.DLIBExtractor(self.dlib)
self.e.__enter__() self.e.__enter__()
elif self.type == 'landmarks': elif self.type == 'landmarks':
self.gpu_config = gpufmkmgr.GPUConfig ( force_best_gpu_idx=self.device_idx, allow_growth=True) self.gpu_config = gpufmkmgr.GPUConfig ( cpu_only=self.cpu_only, force_best_gpu_idx=self.device_idx, allow_growth=True)
self.tf = gpufmkmgr.import_tf ( self.gpu_config ) self.tf = gpufmkmgr.import_tf ( self.gpu_config )
self.tf_session = gpufmkmgr.get_tf_session() self.tf_session = gpufmkmgr.get_tf_session()
self.keras = gpufmkmgr.import_keras() self.keras = gpufmkmgr.import_keras()
@ -381,7 +389,7 @@ face_type
'full_face' 'full_face'
'avatar' 'avatar'
''' '''
def main (input_dir, output_dir, debug, detector='mt', multi_gpu=True, manual_fix=False, manual_window_size=0, image_size=256, face_type='full_face'): def main (input_dir, output_dir, debug, detector='mt', multi_gpu=True, cpu_only=False, manual_fix=False, manual_window_size=0, image_size=256, face_type='full_face'):
print ("Running extractor.\r\n") print ("Running extractor.\r\n")
input_path = Path(input_dir) input_path = Path(input_dir)
@ -412,13 +420,13 @@ def main (input_dir, output_dir, debug, detector='mt', multi_gpu=True, manual_fi
if images_found != 0: if images_found != 0:
if detector == 'manual': if detector == 'manual':
print ('Performing manual extract...') print ('Performing manual extract...')
extracted_faces = ExtractSubprocessor ([ (filename,[]) for filename in input_path_image_paths ], 'landmarks', image_size, face_type, debug, manual=True, manual_window_size=manual_window_size).process() extracted_faces = ExtractSubprocessor ([ (filename,[]) for filename in input_path_image_paths ], 'landmarks', image_size, face_type, debug, cpu_only=cpu_only, manual=True, manual_window_size=manual_window_size).process()
else: else:
print ('Performing 1st pass...') print ('Performing 1st pass...')
extracted_rects = ExtractSubprocessor ([ (x,) for x in input_path_image_paths ], 'rects', image_size, face_type, debug, multi_gpu=multi_gpu, manual=False, detector=detector).process() extracted_rects = ExtractSubprocessor ([ (x,) for x in input_path_image_paths ], 'rects', image_size, face_type, debug, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, detector=detector).process()
print ('Performing 2nd pass...') print ('Performing 2nd pass...')
extracted_faces = ExtractSubprocessor (extracted_rects, 'landmarks', image_size, face_type, debug, multi_gpu=multi_gpu, manual=False).process() extracted_faces = ExtractSubprocessor (extracted_rects, 'landmarks', image_size, face_type, debug, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).process()
if manual_fix: if manual_fix:
print ('Performing manual fix...') print ('Performing manual fix...')
@ -430,7 +438,7 @@ def main (input_dir, output_dir, debug, detector='mt', multi_gpu=True, manual_fi
if len(extracted_faces) > 0: if len(extracted_faces) > 0:
print ('Performing 3rd pass...') print ('Performing 3rd pass...')
final_imgs_paths = ExtractSubprocessor (extracted_faces, 'final', image_size, face_type, debug, multi_gpu=multi_gpu, manual=False, output_path=output_path).process() final_imgs_paths = ExtractSubprocessor (extracted_faces, 'final', image_size, face_type, debug, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, output_path=output_path).process()
faces_detected = len(final_imgs_paths) faces_detected = len(final_imgs_paths)
print('-------------------------') print('-------------------------')

View file

@ -118,10 +118,13 @@ class ModelBase(object):
print ("== |== %s : %s" % (key, self.options[key]) ) print ("== |== %s : %s" % (key, self.options[key]) )
print ("== Running on:") print ("== Running on:")
if self.gpu_config.cpu_only:
print ("== |== [CPU]")
else:
for idx in self.gpu_config.gpu_idxs: for idx in self.gpu_config.gpu_idxs:
print ("== |== [%d : %s]" % (idx, gpufmkmgr.getDeviceName(idx)) ) print ("== |== [%d : %s]" % (idx, gpufmkmgr.getDeviceName(idx)) )
if self.gpu_total_vram_gb == 2: if not self.gpu_config.cpu_only and self.gpu_total_vram_gb == 2:
print ("==") print ("==")
print ("== WARNING: You are using 2GB GPU. Result quality may be significantly decreased.") print ("== WARNING: You are using 2GB GPU. Result quality may be significantly decreased.")
print ("== If training does not start, close all programs and try again.") print ("== If training does not start, close all programs and try again.")
@ -264,6 +267,9 @@ class ModelBase(object):
self.epoch += 1 self.epoch += 1
#............."Saving... #............."Saving...
if epoch_time >= 10000:
loss_string = "Training [#{0:06d}][{1:03d}s]".format ( self.epoch, epoch_time / 1000 )
else:
loss_string = "Training [#{0:06d}][{1:04d}ms]".format ( self.epoch, int(epoch_time*1000) % 10000 ) loss_string = "Training [#{0:06d}][{1:04d}ms]".format ( self.epoch, int(epoch_time*1000) % 10000 )
for (loss_name, loss_value) in losses: for (loss_name, loss_value) in losses:
loss_string += " %s:%.3f" % (loss_name, loss_value) loss_string += " %s:%.3f" % (loss_name, loss_value)
@ -301,6 +307,10 @@ class ModelBase(object):
#example d = {2:2,3:4,4:8,5:16,6:32,7:32,8:32,9:48} #example d = {2:2,3:4,4:8,5:16,6:32,7:32,8:32,9:48}
keys = [x for x in d.keys()] keys = [x for x in d.keys()]
if self.gpu_config.cpu_only:
if self.batch_size == 0:
self.batch_size = 2
else:
if self.gpu_total_vram_gb < keys[0]: if self.gpu_total_vram_gb < keys[0]:
raise Exception ('Sorry, this model works only on %dGB+ GPU' % ( keys[0] ) ) raise Exception ('Sorry, this model works only on %dGB+ GPU' % ( keys[0] ) )

10
requirements-cpu.txt Normal file
View file

@ -0,0 +1,10 @@
pathlib==1.0.1
scandir==1.6
h5py==2.7.1
Keras==2.2.4
opencv-python==3.4.0.12
tensorflow==1.11.0
scikit-image
dlib==19.10.0
tqdm
git+https://www.github.com/keras-team/keras-contrib.git