diff --git a/README.md b/README.md index 974c5f2..1794088 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,8 @@ Based on original FaceSwap repo. **Facesets** of FaceSwap or FakeApp are **not c - automatic GPU manager, chooses best gpu(s) and supports --multi-gpu (only for identical cards). Warning: dont use cards in SLI mode. +- cpu mode. 8th gen Intel core CPU able to train H64 model in 2 days. + - new preview window - extractor in parallel @@ -156,9 +158,15 @@ dlib==19.10.0 from pip compiled without CUDA. Therefore you have to compile DLIB Command line example for windows: `python setup.py install -G "Visual Studio 14 2015" --yes DLIB_USE_CUDA` +### **CPU only mode** + +CPU mode enabled by arg --cpu-only for all stages. Follow requirements-cpu.txt to install req packages. +Do not use DLIB extractor in CPU mode, its too slow. +Only H64 model reasonable to train on home CPU. + ### Prebuilt windows app: -Windows 7,8,8.1,10 zero dependency (just install/update your GeForce Drivers) prebuilt DeepFaceLab can be downloaded from +Windows 7,8,8.1,10 zero dependency (just install/update your GeForce Drivers) prebuilt DeepFaceLab (include GPU and CPU versions) can be downloaded from 1) torrent https://rutracker.org/forum/viewtopic.php?p=75318742 (magnet link inside). 2) https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg diff --git a/gpufmkmgr/gpufmkmgr.py b/gpufmkmgr/gpufmkmgr.py index a8c0923..479a8f3 100644 --- a/gpufmkmgr/gpufmkmgr.py +++ b/gpufmkmgr/gpufmkmgr.py @@ -8,14 +8,15 @@ from .pynvml import * dlib_module = None -def import_dlib(device_idx): +def import_dlib(device_idx, cpu_only=False): global dlib_module if dlib_module is not None: raise Exception ('Multiple import of dlib is not allowed, reorganize your program.') import dlib dlib_module = dlib - dlib_module.cuda.set_device(device_idx) + if not cpu_only: + dlib_module.cuda.set_device(device_idx) return dlib_module tf_module = None @@ -151,7 +152,15 @@ def import_keras_vggface(optional=False): def finalize_keras_vggface(): global keras_vggface_module keras_vggface_module = None - + +def hasNVML(): + try: + nvmlInit() + nvmlShutdown() + except: + return False + return True + #returns [ (device_idx, device_name), ... ] def getDevicesWithAtLeastFreeMemory(freememsize): result = [] @@ -279,7 +288,9 @@ class GPUConfig(): allow_growth = True, cpu_only = False, **in_options): - + if not hasNVML(): + cpu_only = True + if cpu_only: self.cpu_only = cpu_only else: diff --git a/main.py b/main.py index 03abe7b..1ae2c29 100644 --- a/main.py +++ b/main.py @@ -37,8 +37,10 @@ if __name__ == "__main__": face_type=arguments.face_type, detector=arguments.detector, multi_gpu=arguments.multi_gpu, + cpu_only=arguments.cpu_only, manual_fix=arguments.manual_fix, - manual_window_size=arguments.manual_window_size) + manual_window_size=arguments.manual_window_size + ) extract_parser = subparsers.add_parser( "extract", help="Extract the faces from a pictures.") extract_parser.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") @@ -49,6 +51,8 @@ if __name__ == "__main__": extract_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") extract_parser.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.") extract_parser.add_argument('--manual-window-size', type=int, dest="manual_window_size", default=0, help="Manual fix window size. Example: 1368. Default: frame size.") + extract_parser.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU. Forces to use MT extractor.") + extract_parser.set_defaults (func=process_extract) @@ -85,6 +89,7 @@ if __name__ == "__main__": force_best_gpu_idx = arguments.force_best_gpu_idx, multi_gpu = arguments.multi_gpu, force_gpu_idxs = arguments.force_gpu_idxs, + cpu_only = arguments.cpu_only ) train_parser = subparsers.add_parser( "train", help="Trainer") @@ -101,6 +106,8 @@ if __name__ == "__main__": train_parser.add_argument('--force-best-gpu-idx', type=int, dest="force_best_gpu_idx", default=-1, help="Force to choose this GPU idx as best(worst).") train_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="MultiGPU option. It will select only same best(worst) GPU models.") train_parser.add_argument('--force-gpu-idxs', type=str, dest="force_gpu_idxs", default=None, help="Override final GPU idxs. Example: 0,1,2.") + train_parser.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.") + train_parser.set_defaults (func=process_train) def process_convert(arguments): @@ -197,7 +204,8 @@ if __name__ == "__main__": final_image_color_degrade_power = arguments.final_image_color_degrade_power, transfercolor = arguments.transfercolor, alpha = arguments.alpha, - force_best_gpu_idx = arguments.force_best_gpu_idx + force_best_gpu_idx = arguments.force_best_gpu_idx, + cpu_only = arguments.cpu_only ) convert_parser = subparsers.add_parser( "convert", help="Converter") @@ -220,6 +228,7 @@ if __name__ == "__main__": convert_parser.add_argument('--alpha', action="store_true", dest="alpha", default=False, help="Embeds alpha channel of face mask to final PNG. Used in manual composing video by editors such as Sony Vegas or After Effects.") convert_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug converter.") convert_parser.add_argument('--force-best-gpu-idx', type=int, dest="force_best_gpu_idx", default=-1, help="Force to choose this GPU idx as best.") + convert_parser.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Convert on CPU.") convert_parser.set_defaults(func=process_convert) diff --git a/mainscripts/Extractor.py b/mainscripts/Extractor.py index ed03280..6cb62db 100644 --- a/mainscripts/Extractor.py +++ b/mainscripts/Extractor.py @@ -18,13 +18,14 @@ from utils.SubprocessorBase import SubprocessorBase class ExtractSubprocessor(SubprocessorBase): #override - def __init__(self, input_data, type, image_size, face_type, debug, multi_gpu=False, manual=False, manual_window_size=0, detector=None, output_path=None ): + def __init__(self, input_data, type, image_size, face_type, debug, multi_gpu=False, cpu_only=False, manual=False, manual_window_size=0, detector=None, output_path=None ): self.input_data = input_data self.type = type self.image_size = image_size self.face_type = face_type self.debug = debug self.multi_gpu = multi_gpu + self.cpu_only = cpu_only self.detector = detector self.output_path = output_path self.manual = manual @@ -59,8 +60,10 @@ class ExtractSubprocessor(SubprocessorBase): cv2.setMouseCallback(self.wnd_name, onMouse, self.param) - def get_devices_for_type (self, type, multi_gpu): - if (type == 'rects' or type == 'landmarks'): + def get_devices_for_type (self, type, multi_gpu, cpu_only): + if cpu_only: + devices = [ (0, 'CPU', 0 ) ] + elif (type == 'rects' or type == 'landmarks'): if not multi_gpu: devices = [gpufmkmgr.getBestDeviceIdx()] else: @@ -74,16 +77,20 @@ class ExtractSubprocessor(SubprocessorBase): #override def process_info_generator(self): - for (device_idx, device_name, device_total_vram_gb) in self.get_devices_for_type(self.type, self.multi_gpu): + for (device_idx, device_name, device_total_vram_gb) in self.get_devices_for_type(self.type, self.multi_gpu, self.cpu_only): num_processes = 1 if not self.manual and self.type == 'rects' and self.detector == 'mt': - num_processes = int ( max (1, device_total_vram_gb / 2) ) + if self.cpu_only: + num_processes = int ( max (1, multiprocessing.cpu_count() / 2 ) ) + else: + num_processes = int ( max (1, device_total_vram_gb / 2) ) for i in range(0, num_processes ): device_name_for_process = device_name if num_processes == 1 else '%s #%d' % (device_name,i) yield device_name_for_process, {}, {'type' : self.type, 'device_idx' : device_idx, 'device_name' : device_name_for_process, + 'device_type' : 'CPU' if self.cpu_only else 'GPU', 'image_size': self.image_size, 'face_type': self.face_type, 'debug': self.debug, @@ -229,6 +236,7 @@ class ExtractSubprocessor(SubprocessorBase): self.image_size = client_dict['image_size'] self.face_type = client_dict['face_type'] self.device_idx = client_dict['device_idx'] + self.cpu_only = client_dict['device_type'] == 'CPU' self.output_path = Path(client_dict['output_dir']) if 'output_dir' in client_dict.keys() else None self.debug = client_dict['debug'] self.detector = client_dict['detector'] @@ -242,18 +250,18 @@ class ExtractSubprocessor(SubprocessorBase): if self.detector is not None: if self.detector == 'mt': - self.gpu_config = gpufmkmgr.GPUConfig ( force_best_gpu_idx=self.device_idx, allow_growth=True) + self.gpu_config = gpufmkmgr.GPUConfig ( cpu_only=self.cpu_only, force_best_gpu_idx=self.device_idx, allow_growth=True) self.tf = gpufmkmgr.import_tf ( self.gpu_config ) self.tf_session = gpufmkmgr.get_tf_session() self.keras = gpufmkmgr.import_keras() self.e = facelib.MTCExtractor(self.keras, self.tf, self.tf_session) elif self.detector == 'dlib': - self.dlib = gpufmkmgr.import_dlib( self.device_idx ) + self.dlib = gpufmkmgr.import_dlib( self.device_idx, cpu_only=self.cpu_only ) self.e = facelib.DLIBExtractor(self.dlib) self.e.__enter__() elif self.type == 'landmarks': - self.gpu_config = gpufmkmgr.GPUConfig ( force_best_gpu_idx=self.device_idx, allow_growth=True) + self.gpu_config = gpufmkmgr.GPUConfig ( cpu_only=self.cpu_only, force_best_gpu_idx=self.device_idx, allow_growth=True) self.tf = gpufmkmgr.import_tf ( self.gpu_config ) self.tf_session = gpufmkmgr.get_tf_session() self.keras = gpufmkmgr.import_keras() @@ -381,9 +389,9 @@ face_type 'full_face' 'avatar' ''' -def main (input_dir, output_dir, debug, detector='mt', multi_gpu=True, manual_fix=False, manual_window_size=0, image_size=256, face_type='full_face'): +def main (input_dir, output_dir, debug, detector='mt', multi_gpu=True, cpu_only=False, manual_fix=False, manual_window_size=0, image_size=256, face_type='full_face'): print ("Running extractor.\r\n") - + input_path = Path(input_dir) output_path = Path(output_dir) face_type = FaceType.fromString(face_type) @@ -412,13 +420,13 @@ def main (input_dir, output_dir, debug, detector='mt', multi_gpu=True, manual_fi if images_found != 0: if detector == 'manual': print ('Performing manual extract...') - extracted_faces = ExtractSubprocessor ([ (filename,[]) for filename in input_path_image_paths ], 'landmarks', image_size, face_type, debug, manual=True, manual_window_size=manual_window_size).process() + extracted_faces = ExtractSubprocessor ([ (filename,[]) for filename in input_path_image_paths ], 'landmarks', image_size, face_type, debug, cpu_only=cpu_only, manual=True, manual_window_size=manual_window_size).process() else: print ('Performing 1st pass...') - extracted_rects = ExtractSubprocessor ([ (x,) for x in input_path_image_paths ], 'rects', image_size, face_type, debug, multi_gpu=multi_gpu, manual=False, detector=detector).process() + extracted_rects = ExtractSubprocessor ([ (x,) for x in input_path_image_paths ], 'rects', image_size, face_type, debug, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, detector=detector).process() print ('Performing 2nd pass...') - extracted_faces = ExtractSubprocessor (extracted_rects, 'landmarks', image_size, face_type, debug, multi_gpu=multi_gpu, manual=False).process() + extracted_faces = ExtractSubprocessor (extracted_rects, 'landmarks', image_size, face_type, debug, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).process() if manual_fix: print ('Performing manual fix...') @@ -430,7 +438,7 @@ def main (input_dir, output_dir, debug, detector='mt', multi_gpu=True, manual_fi if len(extracted_faces) > 0: print ('Performing 3rd pass...') - final_imgs_paths = ExtractSubprocessor (extracted_faces, 'final', image_size, face_type, debug, multi_gpu=multi_gpu, manual=False, output_path=output_path).process() + final_imgs_paths = ExtractSubprocessor (extracted_faces, 'final', image_size, face_type, debug, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, output_path=output_path).process() faces_detected = len(final_imgs_paths) print('-------------------------') diff --git a/models/ModelBase.py b/models/ModelBase.py index f660fc8..a597653 100644 --- a/models/ModelBase.py +++ b/models/ModelBase.py @@ -118,10 +118,13 @@ class ModelBase(object): print ("== |== %s : %s" % (key, self.options[key]) ) print ("== Running on:") - for idx in self.gpu_config.gpu_idxs: - print ("== |== [%d : %s]" % (idx, gpufmkmgr.getDeviceName(idx)) ) + if self.gpu_config.cpu_only: + print ("== |== [CPU]") + else: + for idx in self.gpu_config.gpu_idxs: + print ("== |== [%d : %s]" % (idx, gpufmkmgr.getDeviceName(idx)) ) - if self.gpu_total_vram_gb == 2: + if not self.gpu_config.cpu_only and self.gpu_total_vram_gb == 2: print ("==") print ("== WARNING: You are using 2GB GPU. Result quality may be significantly decreased.") print ("== If training does not start, close all programs and try again.") @@ -264,7 +267,10 @@ class ModelBase(object): self.epoch += 1 #............."Saving... - loss_string = "Training [#{0:06d}][{1:04d}ms]".format ( self.epoch, int(epoch_time*1000) % 10000 ) + if epoch_time >= 10000: + loss_string = "Training [#{0:06d}][{1:03d}s]".format ( self.epoch, epoch_time / 1000 ) + else: + loss_string = "Training [#{0:06d}][{1:04d}ms]".format ( self.epoch, int(epoch_time*1000) % 10000 ) for (loss_name, loss_value) in losses: loss_string += " %s:%.3f" % (loss_name, loss_value) @@ -301,14 +307,18 @@ class ModelBase(object): #example d = {2:2,3:4,4:8,5:16,6:32,7:32,8:32,9:48} keys = [x for x in d.keys()] - if self.gpu_total_vram_gb < keys[0]: - raise Exception ('Sorry, this model works only on %dGB+ GPU' % ( keys[0] ) ) - - if self.batch_size == 0: - for x in keys: - if self.gpu_total_vram_gb <= x: - self.batch_size = d[x] - break - + if self.gpu_config.cpu_only: if self.batch_size == 0: - self.batch_size = d[ keys[-1] ] \ No newline at end of file + self.batch_size = 2 + else: + if self.gpu_total_vram_gb < keys[0]: + raise Exception ('Sorry, this model works only on %dGB+ GPU' % ( keys[0] ) ) + + if self.batch_size == 0: + for x in keys: + if self.gpu_total_vram_gb <= x: + self.batch_size = d[x] + break + + if self.batch_size == 0: + self.batch_size = d[ keys[-1] ] \ No newline at end of file diff --git a/requirements-cpu.txt b/requirements-cpu.txt new file mode 100644 index 0000000..52177a6 --- /dev/null +++ b/requirements-cpu.txt @@ -0,0 +1,10 @@ +pathlib==1.0.1 +scandir==1.6 +h5py==2.7.1 +Keras==2.2.4 +opencv-python==3.4.0.12 +tensorflow==1.11.0 +scikit-image +dlib==19.10.0 +tqdm +git+https://www.github.com/keras-team/keras-contrib.git