diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..382cf7a --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,16 @@ +## Expected behavior + +*Describe, in some detail, what you are trying to do and what the output is that you expect from the program.* + +## Actual behavior + +*Describe, in some detail, what the program does instead. Be sure to include any error message or screenshots.* + +## Steps to reproduce + +*Describe, in some detail, the steps you tried that resulted in the behavior described above.* + +## Other relevant information +- **Command lined used (if not specified in steps to reproduce)**: main.py ... +- **Operating system and version:** Windows, macOS, Linux +- **Python version:** 3.5, 3.6.4, ... \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2a76c1e --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +* +!*.py +!*.md +!*.txt +!*.jpg +!requirements* +!doc +!facelib +!gpufmkmgr +!localization +!mainscripts +!mathlib +!models +!nnlib +!utils \ No newline at end of file diff --git a/CODEGUIDELINES b/CODEGUIDELINES new file mode 100644 index 0000000..0d40a02 --- /dev/null +++ b/CODEGUIDELINES @@ -0,0 +1,5 @@ +Please don't ruin the code and this good (as I think) architecture. + +Please follow the same logic and brevity/pithiness. + +Don't abstract the code into huge classes if you only win some lines of code in one place, because this can prevent programmers from understanding it quickly. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2967271 --- /dev/null +++ b/README.md @@ -0,0 +1,116 @@ +## **DeepFaceLab** is a tool that utilizes deep learning to recognize and swap faces in pictures and videos. + +Based on original FaceSwap repo. **Facesets** of FaceSwap or FakeApp are **not compatible** with this repo. You should to run extract again. + +### **Features**: + +- new models + +- new architecture, easy to experiment with models + +- works on 2GB old cards , such as GT730. Example of fake trained on 2GB gtx850m notebook in 18 hours https://www.youtube.com/watch?v=bprVuRxBA34 + +- face data embedded to png files + +- automatic GPU manager, chooses best gpu(s) and supports --multi-gpu + +- new preview window + +- extractor in parallel + +- converter in parallel + +- added **--debug** option for all stages + +- added **MTCNN extractor** which produce less jittered aligned face than DLIBCNN, but can produce more false faces. Comparison dlib (at left) vs mtcnn on hard case: +![](https://i.imgur.com/5qLiiOV.gif) +MTCNN produces less jitter. + +- added **Manual extractor**. You can fix missed faces manually or do full manual extract, click on video: +[![Watch the video](https://i.imgur.com/BDrPKR2.jpg)](https://webm.video/i/ogL0DL.mp4) +![Result](https://user-images.githubusercontent.com/8076202/38454756-0fa7a86c-3a7e-11e8-9065-182b4a8a7a43.gif) + +- standalone zero dependencies ready to work prebuilt binary for all windows versions, see below + +### **Model types**: + +- **H64 (2GB+)** - half face with 64 resolution. It is as original FakeApp or FaceSwap, but with new TensorFlow 1.8 DSSIM Loss func and separated mask decoder + better ConverterMasked. for 2GB and 3GB VRAM model works in reduced mode. +* H64 Robert Downey Jr.: +* ![](https://github.com/iperov/OpenDeepFaceSwap/blob/master/doc/H64_Downey_0.jpg) +* ![](https://github.com/iperov/OpenDeepFaceSwap/blob/master/doc/H64_Downey_1.jpg) + +- **H128 (3GB+)** - as H64, but in 128 resolution. Better face details. for 3GB and 4GB VRAM model works in reduced mode. +* H128 Cage: +* ![](https://github.com/iperov/OpenDeepFaceSwap/blob/master/doc/H128_Cage_0.jpg) +* H128 asian face on blurry target: +* ![](https://github.com/iperov/OpenDeepFaceSwap/blob/master/doc/H128_Asian_0.jpg) +* ![](https://github.com/iperov/OpenDeepFaceSwap/blob/master/doc/H128_Asian_1.jpg) +- **DF (5GB+)** - @dfaker model. As H128, but fullface model. +* DF example - later + +- **LIAEF128 (5GB+)** - new model. Result of combining DF, IAE, + experiments. Model tries to morph src face to dst, while keeping facial features of src face, but less agressive morphing. Model has problems with closed eyes recognizing. +* LIAEF128 Cage: +* ![](https://github.com/iperov/OpenDeepFaceSwap/blob/master/doc/LIAEF128_Cage_0.jpg) +* ![](https://github.com/iperov/OpenDeepFaceSwap/blob/master/doc/LIAEF128_Cage_1.jpg) +* LIAEF128 Cage video: +* [![Watch the video](https://img.youtube.com/vi/mRsexePEVco/0.jpg)](https://www.youtube.com/watch?v=mRsexePEVco) +- **LIAEF128YAW (5GB+)** - currently testing. Useful when your src faceset has too many side faces vs dst faceset. It feeds NN by sorted samples by yaw. +- **MIAEF128 (5GB+)** - as LIAEF128, but also it tries to match brightness/color features. +* MIAEF128 model diagramm: +* ![](https://github.com/iperov/OpenDeepFaceSwap/blob/master/doc/MIAEF128_diagramm.png) +* MIAEF128 Ford success case: +* ![](https://github.com/iperov/OpenDeepFaceSwap/blob/master/doc/MIAEF128_Ford_0.jpg) +* ![](https://github.com/iperov/OpenDeepFaceSwap/blob/master/doc/MIAEF128_Ford_1.jpg) +* MIAEF128 Cage fail case: +* ![](https://github.com/iperov/OpenDeepFaceSwap/blob/master/doc/MIAEF128_Cage_fail.jpg) +- **AVATAR (4GB+)** - face controlling model. Usage: +* src - controllable face (Cage) +* dst - controller face (your face) +* converter --input-dir contains aligned dst faces in sequence to be converted, its mean you can train on 1500 dst faces, but use only 100 for convert. + +### **Sort tool**: + +`hist` groups images by similar content + +`hist-dissim` places most similar to each other images to end. + +`hist-blur` sort by blur in groups of similar content + +`brightness` + +`hue` + +`face` and `face-dissim` currently useless + +Best practice for gather src faceset: + +1) delete first unsorted aligned groups of images what you can to delete. Dont touch target face mixed with others. +2) `blur` -> delete ~half of them +3) `hist` -> delete groups of similar and leave only target face +4) `hist-blur` -> delete blurred at end of groups of similar +5) `hist-dissim` -> leave only first **1000-1500 faces**, because number of src faces can affect result. For YAW feeder model skip this step. +6) `face-yaw` -> just for finalize faceset + +Best practice for dst faces: + +1) delete first unsorted aligned groups of images what you can to delete. Dont touch target face mixed with others. +2) `hist` -> delete groups of similar and leave only target face + +### **Prebuilt binary**: + +Windows 7,8,8.1,10 zero dependency binary except NVidia Video Drivers can be downloaded from torrent. + +Torrent page: https://rutracker.org/forum/viewtopic.php?p=75318742 (magnet link inside) + +### **Facesets**: + +- Nicolas Cage. + +- Cage/Trump workspace + +download from here: https://mega.nz/#F!y1ERHDaL!PPwg01PQZk0FhWLVo5_MaQ + +### **Pull requesting**: + +I understand some people want to help. But result of mass people contribution we can see in deepfakes\faceswap. +High chance I will decline PR. Therefore before PR better ask me what you want to change or add to save your time. \ No newline at end of file diff --git a/doc/H128_Asian_0.jpg b/doc/H128_Asian_0.jpg new file mode 100644 index 0000000..9ceb4bd Binary files /dev/null and b/doc/H128_Asian_0.jpg differ diff --git a/doc/H128_Asian_1.jpg b/doc/H128_Asian_1.jpg new file mode 100644 index 0000000..2606578 Binary files /dev/null and b/doc/H128_Asian_1.jpg differ diff --git a/doc/H128_Cage_0.jpg b/doc/H128_Cage_0.jpg new file mode 100644 index 0000000..1ca6745 Binary files /dev/null and b/doc/H128_Cage_0.jpg differ diff --git a/doc/H64_Downey_0.jpg b/doc/H64_Downey_0.jpg new file mode 100644 index 0000000..eb00a9b Binary files /dev/null and b/doc/H64_Downey_0.jpg differ diff --git a/doc/H64_Downey_1.jpg b/doc/H64_Downey_1.jpg new file mode 100644 index 0000000..7066f30 Binary files /dev/null and b/doc/H64_Downey_1.jpg differ diff --git a/doc/LIAEF128_Cage_0.jpg b/doc/LIAEF128_Cage_0.jpg new file mode 100644 index 0000000..cbb9859 Binary files /dev/null and b/doc/LIAEF128_Cage_0.jpg differ diff --git a/doc/LIAEF128_Cage_1.jpg b/doc/LIAEF128_Cage_1.jpg new file mode 100644 index 0000000..118f922 Binary files /dev/null and b/doc/LIAEF128_Cage_1.jpg differ diff --git a/doc/MIAEF128_Cage_fail.jpg b/doc/MIAEF128_Cage_fail.jpg new file mode 100644 index 0000000..b3c7923 Binary files /dev/null and b/doc/MIAEF128_Cage_fail.jpg differ diff --git a/doc/MIAEF128_Ford_0.jpg b/doc/MIAEF128_Ford_0.jpg new file mode 100644 index 0000000..efc5e0f Binary files /dev/null and b/doc/MIAEF128_Ford_0.jpg differ diff --git a/doc/MIAEF128_Ford_1.jpg b/doc/MIAEF128_Ford_1.jpg new file mode 100644 index 0000000..51c7f9e Binary files /dev/null and b/doc/MIAEF128_Ford_1.jpg differ diff --git a/doc/MIAEF128_diagramm.png b/doc/MIAEF128_diagramm.png new file mode 100644 index 0000000..a940d52 Binary files /dev/null and b/doc/MIAEF128_diagramm.png differ diff --git a/doc/landmarks.jpg b/doc/landmarks.jpg new file mode 100644 index 0000000..1b9c275 Binary files /dev/null and b/doc/landmarks.jpg differ diff --git a/facelib/2DFAN-4.h5 b/facelib/2DFAN-4.h5 new file mode 100644 index 0000000..8c5079a Binary files /dev/null and b/facelib/2DFAN-4.h5 differ diff --git a/facelib/DLIBExtractor.py b/facelib/DLIBExtractor.py new file mode 100644 index 0000000..fced9f2 --- /dev/null +++ b/facelib/DLIBExtractor.py @@ -0,0 +1,40 @@ +import numpy as np +import os +import cv2 + +from pathlib import Path + +class DLIBExtractor(object): + def __init__(self, dlib): + self.scale_to = 1850 + #3100 eats ~1.687GB VRAM on 2GB 730 desktop card, but >4Gb on 6GB card, + #but 3100 doesnt work on 2GB 850M notebook card, I cant understand this behaviour + #1850 works on 2GB 850M notebook card, works faster than 3100, produces good result + self.dlib = dlib + + def __enter__(self): + self.dlib_cnn_face_detector = self.dlib.cnn_face_detection_model_v1( str(Path(__file__).parent / "mmod_human_face_detector.dat") ) + self.dlib_cnn_face_detector ( np.zeros ( (self.scale_to, self.scale_to, 3), dtype=np.uint8), 0 ) + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + del self.dlib_cnn_face_detector + return False #pass exception between __enter__ and __exit__ to outter level + + def extract_from_bgr (self, input_image): + input_image = input_image[:,:,::-1].copy() + (h, w, ch) = input_image.shape + + detected_faces = [] + input_scale = self.scale_to / (w if w > h else h) + input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR) + detected_faces = self.dlib_cnn_face_detector(input_image, 0) + + result = [] + for d_rect in detected_faces: + if type(d_rect) == self.dlib.mmod_rectangle: + d_rect = d_rect.rect + left, top, right, bottom = d_rect.left(), d_rect.top(), d_rect.right(), d_rect.bottom() + result.append ( (int(left/input_scale), int(top/input_scale), int(right/input_scale), int(bottom/input_scale)) ) + + return result diff --git a/facelib/FaceType.py b/facelib/FaceType.py new file mode 100644 index 0000000..782f39d --- /dev/null +++ b/facelib/FaceType.py @@ -0,0 +1,34 @@ +from enum import IntEnum + +class FaceType(IntEnum): + HALF = 0, + FULL = 1, + HEAD = 2, + AVATAR = 3, #centered nose only + MARK_ONLY = 4, #no align at all, just embedded faceinfo + QTY = 5 + + @staticmethod + def fromString (s): + r = from_string_dict.get (s.lower()) + if r is None: + raise Exception ('FaceType.fromString value error') + return r + + @staticmethod + def toString (face_type): + return to_string_list[face_type] + +from_string_dict = {'half_face': FaceType.HALF, + 'full_face': FaceType.FULL, + 'head' : FaceType.HEAD, + 'avatar' : FaceType.AVATAR, + 'mark_only' : FaceType.MARK_ONLY, + } +to_string_list = [ 'half_face', + 'full_face', + 'head', + 'avatar', + 'mark_only' + ] + diff --git a/facelib/LandmarksExtractor.py b/facelib/LandmarksExtractor.py new file mode 100644 index 0000000..7773364 --- /dev/null +++ b/facelib/LandmarksExtractor.py @@ -0,0 +1,133 @@ +import numpy as np +import os +import cv2 +from pathlib import Path + +from utils import std_utils + + + +def transform(point, center, scale, resolution): + pt = np.array ( [point[0], point[1], 1.0] ) + h = 200.0 * scale + m = np.eye(3) + m[0,0] = resolution / h + m[1,1] = resolution / h + m[0,2] = resolution * ( -center[0] / h + 0.5 ) + m[1,2] = resolution * ( -center[1] / h + 0.5 ) + m = np.linalg.inv(m) + return np.matmul (m, pt)[0:2] + +def crop(image, center, scale, resolution=256.0): + ul = transform([1, 1], center, scale, resolution).astype( np.int ) + br = transform([resolution, resolution], center, scale, resolution).astype( np.int ) + if image.ndim > 2: + newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32) + newImg = np.zeros(newDim, dtype=np.uint8) + else: + newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int) + newImg = np.zeros(newDim, dtype=np.uint8) + ht = image.shape[0] + wd = image.shape[1] + newX = np.array([max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32) + newY = np.array([max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32) + oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32) + oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32) + newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1] ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :] + newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR) + return newImg + +def get_pts_from_predict(a, center, scale): + b = a.reshape ( (a.shape[0], a.shape[1]*a.shape[2]) ) + c = b.argmax(1).reshape ( (a.shape[0], 1) ).repeat(2, axis=1).astype(np.float) + c[:,0] %= a.shape[2] + c[:,1] = np.apply_along_axis ( lambda x: np.floor(x / a.shape[2]), 0, c[:,1] ) + + for i in range(a.shape[0]): + pX, pY = int(c[i,0]), int(c[i,1]) + if pX > 0 and pX < 63 and pY > 0 and pY < 63: + diff = np.array ( [a[i,pY,pX+1]-a[i,pY,pX-1], a[i,pY+1,pX]-a[i,pY-1,pX]] ) + c[i] += np.sign(diff)*0.25 + + c += 0.5 + return [ transform (c[i], center, scale, a.shape[2]) for i in range(a.shape[0]) ] + + +class LandmarksExtractor(object): + def __init__ (self, keras): + self.keras = keras + K = self.keras.backend + class TorchBatchNorm2D(self.keras.engine.topology.Layer): + def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, **kwargs): + super(TorchBatchNorm2D, self).__init__(**kwargs) + self.supports_masking = True + self.axis = axis + self.momentum = momentum + self.epsilon = epsilon + + def build(self, input_shape): + dim = input_shape[self.axis] + if dim is None: + raise ValueError('Axis ' + str(self.axis) + ' of ' 'input tensor should have a defined dimension ' 'but the layer received an input with shape ' + str(input_shape) + '.') + shape = (dim,) + self.gamma = self.add_weight(shape=shape, name='gamma', initializer='ones', regularizer=None, constraint=None) + self.beta = self.add_weight(shape=shape, name='beta', initializer='zeros', regularizer=None, constraint=None) + self.moving_mean = self.add_weight(shape=shape, name='moving_mean', initializer='zeros', trainable=False) + self.moving_variance = self.add_weight(shape=shape, name='moving_variance', initializer='ones', trainable=False) + self.built = True + + def call(self, inputs, training=None): + input_shape = K.int_shape(inputs) + + broadcast_shape = [1] * len(input_shape) + broadcast_shape[self.axis] = input_shape[self.axis] + + broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) + broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) + broadcast_gamma = K.reshape(self.gamma, broadcast_shape) + broadcast_beta = K.reshape(self.beta, broadcast_shape) + invstd = K.ones (shape=broadcast_shape, dtype='float32') / K.sqrt(broadcast_moving_variance + K.constant(self.epsilon, dtype='float32')) + + return (inputs - broadcast_moving_mean) * invstd * broadcast_gamma + broadcast_beta + + def get_config(self): + config = { 'axis': self.axis, 'momentum': self.momentum, 'epsilon': self.epsilon } + base_config = super(TorchBatchNorm2D, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + self.TorchBatchNorm2D = TorchBatchNorm2D + + def __enter__(self): + keras_model_path = Path(__file__).parent / "2DFAN-4.h5" + if not keras_model_path.exists(): + return None + + self.keras_model = self.keras.models.load_model ( str(keras_model_path), custom_objects={'TorchBatchNorm2D': self.TorchBatchNorm2D} ) + + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + del self.keras_model + return False #pass exception between __enter__ and __exit__ to outter level + + def extract_from_bgr (self, input_image, rects): + input_image = input_image[:,:,::-1].copy() + (h, w, ch) = input_image.shape + + landmarks = [] + for (left, top, right, bottom) in rects: + + center = np.array( [ (left + right) / 2.0, (top + bottom) / 2.0] ) + center[1] -= (bottom - top) * 0.12 + scale = (right - left + bottom - top) / 195.0 + + image = crop(input_image, center, scale).transpose ( (2,0,1) ).astype(np.float32) / 255.0 + image = np.expand_dims(image, 0) + + with std_utils.suppress_stdout_stderr(): + predicted = self.keras_model.predict (image) + + pts_img = get_pts_from_predict ( predicted[-1][0], center, scale) + pts_img = [ ( int(pt[0]), int(pt[1]) ) for pt in pts_img ] + landmarks.append ( ( (left, top, right, bottom),pts_img ) ) + + return landmarks diff --git a/facelib/LandmarksProcessor.py b/facelib/LandmarksProcessor.py new file mode 100644 index 0000000..3f8975e --- /dev/null +++ b/facelib/LandmarksProcessor.py @@ -0,0 +1,193 @@ +import colorsys +import cv2 +import numpy as np +from enum import IntEnum +from mathlib.umeyama import umeyama +from utils import image_utils +from facelib import FaceType +import math + +mean_face_x = np.array([ +0.000213256, 0.0752622, 0.18113, 0.29077, 0.393397, 0.586856, 0.689483, 0.799124, +0.904991, 0.98004, 0.490127, 0.490127, 0.490127, 0.490127, 0.36688, 0.426036, +0.490127, 0.554217, 0.613373, 0.121737, 0.187122, 0.265825, 0.334606, 0.260918, +0.182743, 0.645647, 0.714428, 0.793132, 0.858516, 0.79751, 0.719335, 0.254149, +0.340985, 0.428858, 0.490127, 0.551395, 0.639268, 0.726104, 0.642159, 0.556721, +0.490127, 0.423532, 0.338094, 0.290379, 0.428096, 0.490127, 0.552157, 0.689874, +0.553364, 0.490127, 0.42689 ]) + +mean_face_y = np.array([ +0.106454, 0.038915, 0.0187482, 0.0344891, 0.0773906, 0.0773906, 0.0344891, +0.0187482, 0.038915, 0.106454, 0.203352, 0.307009, 0.409805, 0.515625, 0.587326, +0.609345, 0.628106, 0.609345, 0.587326, 0.216423, 0.178758, 0.179852, 0.231733, +0.245099, 0.244077, 0.231733, 0.179852, 0.178758, 0.216423, 0.244077, 0.245099, +0.780233, 0.745405, 0.727388, 0.742578, 0.727388, 0.745405, 0.780233, 0.864805, +0.902192, 0.909281, 0.902192, 0.864805, 0.784792, 0.778746, 0.785343, 0.778746, +0.784792, 0.824182, 0.831803, 0.824182 ]) + +landmarks_2D = np.stack( [ mean_face_x, mean_face_y ], axis=1 ) + +def get_transform_mat (image_landmarks, output_size, face_type): + if not isinstance(image_landmarks, np.ndarray): + image_landmarks = np.array (image_landmarks) + + if face_type == FaceType.AVATAR: + centroid = np.mean (image_landmarks, axis=0) + + mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2] + a, c = mat[0,0], mat[1,0] + scale = math.sqrt((a * a) + (c * c)) + + padding = (output_size / 64) * 32 + + mat = np.eye ( 2,3 ) + mat[0,2] = -centroid[0] + mat[1,2] = -centroid[1] + mat = mat * scale * (output_size / 3) + mat[:,2] += output_size / 2 + else: + if face_type == FaceType.HALF: + padding = 0 + elif face_type == FaceType.FULL: + padding = (output_size / 64) * 12 + elif face_type == FaceType.HEAD: + padding = (output_size / 64) * 24 + else: + raise ValueError ('wrong face_type') + + mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2] + mat = mat * (output_size - 2 * padding) + mat[:,2] += padding + + return mat + +def transform_points(points, mat, invert=False): + if invert: + mat = cv2.invertAffineTransform (mat) + points = np.expand_dims(points, axis=1) + points = cv2.transform(points, mat, points.shape) + points = np.squeeze(points) + return points + + +def get_image_hull_mask (image, image_landmarks): + if len(image_landmarks) != 68: + raise Exception('get_image_hull_mask work only with 68 landmarks') + + hull_mask = np.zeros(image.shape[0:2]+(1,),dtype=np.float32) + + cv2.fillConvexPoly( hull_mask, cv2.convexHull( np.concatenate ( (image_landmarks[0:17], image_landmarks[48:], [image_landmarks[0]], [image_landmarks[8]], [image_landmarks[16]])) ), (1,) ) + cv2.fillConvexPoly( hull_mask, cv2.convexHull( np.concatenate ( (image_landmarks[27:31], [image_landmarks[33]]) ) ), (1,) ) + cv2.fillConvexPoly( hull_mask, cv2.convexHull( np.concatenate ( (image_landmarks[17:27], [image_landmarks[0]], [image_landmarks[27]], [image_landmarks[16]], [image_landmarks[33]])) ), (1,) ) + + return hull_mask + +def get_image_eye_mask (image, image_landmarks): + if len(image_landmarks) != 68: + raise Exception('get_image_eye_mask work only with 68 landmarks') + + hull_mask = np.zeros(image.shape[0:2]+(1,),dtype=np.float32) + + cv2.fillConvexPoly( hull_mask, cv2.convexHull( image_landmarks[36:42]), (1,) ) + cv2.fillConvexPoly( hull_mask, cv2.convexHull( image_landmarks[42:48]), (1,) ) + + return hull_mask + +def get_image_hull_mask_3D (image, image_landmarks): + result = get_image_hull_mask(image, image_landmarks) + + return np.repeat ( result, (3,), -1 ) + +def blur_image_hull_mask (hull_mask): + + maxregion = np.argwhere(hull_mask==1.0) + miny,minx = maxregion.min(axis=0)[:2] + maxy,maxx = maxregion.max(axis=0)[:2] + lenx = maxx - minx; + leny = maxy - miny; + masky = int(minx+(lenx//2)) + maskx = int(miny+(leny//2)) + lowest_len = min (lenx, leny) + ero = int( lowest_len * 0.085 ) + blur = int( lowest_len * 0.10 ) + + hull_mask = cv2.erode(hull_mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) + hull_mask = cv2.blur(hull_mask, (blur, blur) ) + hull_mask = np.expand_dims (hull_mask,-1) + + return hull_mask + +def get_blurred_image_hull_mask(image, image_landmarks): + return blur_image_hull_mask ( get_image_hull_mask(image, image_landmarks) ) + +mirror_idxs = [ + [0,16], + [1,15], + [2,14], + [3,13], + [4,12], + [5,11], + [6,10], + [7,9], + + [17,26], + [18,25], + [19,24], + [20,23], + [21,22], + + [36,45], + [37,44], + [38,43], + [39,42], + [40,47], + [41,46], + + [31,35], + [32,34], + + [50,52], + [49,53], + [48,54], + [59,55], + [58,56], + [67,65], + [60,64], + [61,63] ] + +def mirror_landmarks (landmarks, val): + result = landmarks.copy() + + for idx in mirror_idxs: + result [ idx ] = result [ idx[::-1] ] + + result[:,0] = val - result[:,0] - 1 + return result + +def draw_landmarks (image, image_landmarks, color): + for i, (x, y) in enumerate(image_landmarks): + cv2.circle(image, (x, y), 2, color, -1) + #text_color = colorsys.hsv_to_rgb ( (i%4) * (0.25), 1.0, 1.0 ) + #cv2.putText(image, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.1,text_color,1) + +def draw_rect_landmarks (image, rect, image_landmarks, face_size, face_type): + image_utils.draw_rect (image, rect, (255,0,0), 2 ) + draw_landmarks(image, image_landmarks, (0,255,0) ) + + image_to_face_mat = get_transform_mat (image_landmarks, face_size, face_type) + points = transform_points ( [ (0,0), (0,face_size-1), (face_size-1, face_size-1), (face_size-1,0) ], image_to_face_mat, True) + image_utils.draw_polygon (image, points, (0,0,255), 2) + +def calc_face_pitch(landmarks): + if not isinstance(landmarks, np.ndarray): + landmarks = np.array (landmarks) + t = ( (landmarks[6][1]-landmarks[8][1]) + (landmarks[10][1]-landmarks[8][1]) ) / 2.0 + b = landmarks[8][1] + return float(b-t) +def calc_face_yaw(landmarks): + if not isinstance(landmarks, np.ndarray): + landmarks = np.array (landmarks) + l = ( (landmarks[27][0]-landmarks[0][0]) + (landmarks[28][0]-landmarks[1][0]) + (landmarks[29][0]-landmarks[2][0]) ) / 3.0 + r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0 + return float(r-l) + \ No newline at end of file diff --git a/facelib/MTCExtractor.py b/facelib/MTCExtractor.py new file mode 100644 index 0000000..cccbd78 --- /dev/null +++ b/facelib/MTCExtractor.py @@ -0,0 +1,66 @@ +import numpy as np +import os +import cv2 + +from pathlib import Path + +from .mtcnn import * + +class MTCExtractor(object): + def __init__(self, keras, tf, tf_session): + self.scale_to = 1920 + self.keras = keras + self.tf = tf + self.tf_session = tf_session + + self.min_face_size = self.scale_to * 0.042 + self.thresh1 = 0.7 + self.thresh2 = 0.85 + self.thresh3 = 0.6 + self.scale_factor = 0.95 + + ''' + self.min_face_size = self.scale_to * 0.042 + self.thresh1 = 7 + self.thresh2 = 85 + self.thresh3 = 6 + self.scale_factor = 0.95 + ''' + + def __enter__(self): + with self.tf.variable_scope('pnet2'): + data = self.tf.placeholder(self.tf.float32, (None,None,None,3), 'input') + pnet2 = PNet(self.tf, {'data':data}) + pnet2.load(str(Path(__file__).parent/'det1.npy'), self.tf_session) + with self.tf.variable_scope('rnet2'): + data = self.tf.placeholder(self.tf.float32, (None,24,24,3), 'input') + rnet2 = RNet(self.tf, {'data':data}) + rnet2.load(str(Path(__file__).parent/'det2.npy'), self.tf_session) + with self.tf.variable_scope('onet2'): + data = self.tf.placeholder(self.tf.float32, (None,48,48,3), 'input') + onet2 = ONet(self.tf, {'data':data}) + onet2.load(str(Path(__file__).parent/'det3.npy'), self.tf_session) + + self.pnet_fun = self.keras.backend.function([pnet2.layers['data']],[pnet2.layers['conv4-2'], pnet2.layers['prob1']]) + self.rnet_fun = self.keras.backend.function([rnet2.layers['data']],[rnet2.layers['conv5-2'], rnet2.layers['prob1']]) + self.onet_fun = self.keras.backend.function([onet2.layers['data']],[onet2.layers['conv6-2'], onet2.layers['conv6-3'], onet2.layers['prob1']]) + + faces, pnts = detect_face ( np.zeros ( (self.scale_to, self.scale_to, 3)), self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor ) + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + return False #pass exception between __enter__ and __exit__ to outter level + + def extract_from_bgr (self, input_image): + input_image = input_image[:,:,::-1].copy() + (h, w, ch) = input_image.shape + + + input_scale = self.scale_to / (w if w > h else h) + input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR) + + detected_faces, pnts = detect_face ( input_image, self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor ) + detected_faces = [ ( int(face[0]/input_scale), int(face[1]/input_scale), int(face[2]/input_scale), int(face[3]/input_scale)) for face in detected_faces ] + + return detected_faces + diff --git a/facelib/__init__.py b/facelib/__init__.py new file mode 100644 index 0000000..c05e37b --- /dev/null +++ b/facelib/__init__.py @@ -0,0 +1,5 @@ +from .FaceType import FaceType +from .DLIBExtractor import DLIBExtractor +from .MTCExtractor import MTCExtractor +from .LandmarksExtractor import LandmarksExtractor +from .LandmarksProcessor import * \ No newline at end of file diff --git a/facelib/det1.npy b/facelib/det1.npy new file mode 100644 index 0000000..7c05a2c Binary files /dev/null and b/facelib/det1.npy differ diff --git a/facelib/det2.npy b/facelib/det2.npy new file mode 100644 index 0000000..85d5bf0 Binary files /dev/null and b/facelib/det2.npy differ diff --git a/facelib/det3.npy b/facelib/det3.npy new file mode 100644 index 0000000..90d5ba9 Binary files /dev/null and b/facelib/det3.npy differ diff --git a/facelib/mmod_human_face_detector.dat b/facelib/mmod_human_face_detector.dat new file mode 100644 index 0000000..f1f73a5 Binary files /dev/null and b/facelib/mmod_human_face_detector.dat differ diff --git a/facelib/mtcnn.py b/facelib/mtcnn.py new file mode 100644 index 0000000..7247954 --- /dev/null +++ b/facelib/mtcnn.py @@ -0,0 +1,761 @@ +# Source: https://github.com/davidsandberg/facenet/blob/master/src/align/ + +""" Tensorflow implementation of the face detection / alignment algorithm found at +https://github.com/kpzhang93/MTCNN_face_detection_alignment +""" +# MIT License +# +# Copyright (c) 2016 David Sandberg +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from six import string_types, iteritems + +import numpy as np +#from math import floor +import cv2 +import os + +def layer(op): + """Decorator for composable network layers.""" + + def layer_decorated(self, *args, **kwargs): + # Automatically set a name if not provided. + name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) + # Figure out the layer inputs. + if len(self.terminals) == 0: + raise RuntimeError('No input variables found for layer %s.' % name) + elif len(self.terminals) == 1: + layer_input = self.terminals[0] + else: + layer_input = list(self.terminals) + # Perform the operation and get the output. + layer_output = op(self, layer_input, *args, **kwargs) + # Add to layer LUT. + self.layers[name] = layer_output + # This output is now the input for the next layer. + self.feed(layer_output) + # Return self for chained calls. + return self + + return layer_decorated + +class Network(object): + + def __init__(self, tf, inputs, trainable=True): + # The input nodes for this network + self.tf = tf + self.inputs = inputs + # The current list of terminal nodes + self.terminals = [] + # Mapping from layer names to layers + self.layers = dict(inputs) + # If true, the resulting variables are set as trainable + self.trainable = trainable + + self.setup() + + def setup(self): + """Construct the network. """ + raise NotImplementedError('Must be implemented by the subclass.') + + def load(self, data_path, session, ignore_missing=False): + """Load network weights. + data_path: The path to the numpy-serialized network weights + session: The current TensorFlow session + ignore_missing: If true, serialized weights for missing layers are ignored. + """ + data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member + + for op_name in data_dict: + with self.tf.variable_scope(op_name, reuse=True): + for param_name, data in iteritems(data_dict[op_name]): + try: + var = self.tf.get_variable(param_name) + session.run(var.assign(data)) + except ValueError: + if not ignore_missing: + raise + + def feed(self, *args): + """Set the input(s) for the next operation by replacing the terminal nodes. + The arguments can be either layer names or the actual layers. + """ + assert len(args) != 0 + self.terminals = [] + for fed_layer in args: + if isinstance(fed_layer, string_types): + try: + fed_layer = self.layers[fed_layer] + except KeyError: + raise KeyError('Unknown layer name fed: %s' % fed_layer) + self.terminals.append(fed_layer) + return self + + def get_output(self): + """Returns the current network output.""" + return self.terminals[-1] + + def get_unique_name(self, prefix): + """Returns an index-suffixed unique name for the given prefix. + This is used for auto-generating layer names based on the type-prefix. + """ + ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 + return '%s_%d' % (prefix, ident) + + def make_var(self, name, shape): + """Creates a new TensorFlow variable.""" + return self.tf.get_variable(name, shape, trainable=self.trainable) + + def validate_padding(self, padding): + """Verifies that the padding is one of the supported ones.""" + assert padding in ('SAME', 'VALID') + + @layer + def conv(self, + inp, + k_h, + k_w, + c_o, + s_h, + s_w, + name, + relu=True, + padding='SAME', + group=1, + biased=True): + # Verify that the padding is acceptable + self.validate_padding(padding) + # Get the number of channels in the input + c_i = int(inp.get_shape()[-1]) + # Verify that the grouping parameter is valid + assert c_i % group == 0 + assert c_o % group == 0 + # Convolution for a given input and kernel + convolve = lambda i, k: self.tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) + with self.tf.variable_scope(name) as scope: + kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o]) + # This is the common-case. Convolve the input without any further complications. + output = convolve(inp, kernel) + # Add the biases + if biased: + biases = self.make_var('biases', [c_o]) + output = self.tf.nn.bias_add(output, biases) + if relu: + # ReLU non-linearity + output = self.tf.nn.relu(output, name=scope.name) + return output + + @layer + def prelu(self, inp, name): + with self.tf.variable_scope(name): + i = int(inp.get_shape()[-1]) + alpha = self.make_var('alpha', shape=(i,)) + output = self.tf.nn.relu(inp) + self.tf.multiply(alpha, -self.tf.nn.relu(-inp)) + return output + + @layer + def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'): + self.validate_padding(padding) + return self.tf.nn.max_pool(inp, + ksize=[1, k_h, k_w, 1], + strides=[1, s_h, s_w, 1], + padding=padding, + name=name) + + @layer + def fc(self, inp, num_out, name, relu=True): + with self.tf.variable_scope(name): + input_shape = inp.get_shape() + if input_shape.ndims == 4: + # The input is spatial. Vectorize it first. + dim = 1 + for d in input_shape[1:].as_list(): + dim *= int(d) + feed_in = self.tf.reshape(inp, [-1, dim]) + else: + feed_in, dim = (inp, input_shape[-1].value) + weights = self.make_var('weights', shape=[dim, num_out]) + biases = self.make_var('biases', [num_out]) + op = self.tf.nn.relu_layer if relu else self.tf.nn.xw_plus_b + fc = op(feed_in, weights, biases, name=name) + return fc + + + """ + Multi dimensional softmax, + refer to https://github.com/tensorflow/tensorflow/issues/210 + compute softmax along the dimension of target + the native softmax only supports batch_size x dimension + """ + @layer + def softmax(self, target, axis, name=None): + max_axis = self.tf.reduce_max(target, axis, keepdims=True) + target_exp = self.tf.exp(target-max_axis) + normalize = self.tf.reduce_sum(target_exp, axis, keepdims=True) + softmax = self.tf.div(target_exp, normalize, name) + return softmax + +class PNet(Network): + def setup(self): + (self.feed('data') #pylint: disable=no-value-for-parameter, no-member + .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1') + .prelu(name='PReLU1') + .max_pool(2, 2, 2, 2, name='pool1') + .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2') + .prelu(name='PReLU2') + .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3') + .prelu(name='PReLU3') + .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1') + .softmax(3,name='prob1')) + + (self.feed('PReLU3') #pylint: disable=no-value-for-parameter + .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2')) + +class RNet(Network): + def setup(self): + (self.feed('data') #pylint: disable=no-value-for-parameter, no-member + .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1') + .prelu(name='prelu1') + .max_pool(3, 3, 2, 2, name='pool1') + .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2') + .prelu(name='prelu2') + .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') + .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3') + .prelu(name='prelu3') + .fc(128, relu=False, name='conv4') + .prelu(name='prelu4') + .fc(2, relu=False, name='conv5-1') + .softmax(1,name='prob1')) + + (self.feed('prelu4') #pylint: disable=no-value-for-parameter + .fc(4, relu=False, name='conv5-2')) + +class ONet(Network): + def setup(self): + (self.feed('data') #pylint: disable=no-value-for-parameter, no-member + .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1') + .prelu(name='prelu1') + .max_pool(3, 3, 2, 2, name='pool1') + .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2') + .prelu(name='prelu2') + .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') + .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3') + .prelu(name='prelu3') + .max_pool(2, 2, 2, 2, name='pool3') + .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4') + .prelu(name='prelu4') + .fc(256, relu=False, name='conv5') + .prelu(name='prelu5') + .fc(2, relu=False, name='conv6-1') + .softmax(1, name='prob1')) + + (self.feed('prelu5') #pylint: disable=no-value-for-parameter + .fc(4, relu=False, name='conv6-2')) + + (self.feed('prelu5') #pylint: disable=no-value-for-parameter + .fc(10, relu=False, name='conv6-3')) + +def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): + """Detects faces in an image, and returns bounding boxes and points for them. + img: input image + minsize: minimum faces' size + pnet, rnet, onet: caffemodel + threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold + factor: the factor used to create a scaling pyramid of face sizes to detect in the image. + """ + factor_count=0 + total_boxes=np.empty((0,9)) + points=np.empty(0) + h=img.shape[0] + w=img.shape[1] + minl=np.amin([h, w]) + m=12.0/minsize + minl=minl*m + # create scale pyramid + scales=[] + while minl>=12: + scales += [m*np.power(factor, factor_count)] + minl = minl*factor + factor_count += 1 + # first stage + for scale in scales: + hs=int(np.ceil(h*scale)) + ws=int(np.ceil(w*scale)) + #print ('scale %f %d %d' % (scale, ws,hs)) + im_data = imresample(img, (hs, ws)) + im_data = (im_data-127.5)*0.0078125 + img_x = np.expand_dims(im_data, 0) + img_y = np.transpose(img_x, (0,2,1,3)) + out = pnet([img_y]) + out0 = np.transpose(out[0], (0,2,1,3)) + out1 = np.transpose(out[1], (0,2,1,3)) + + boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) + + # inter-scale nms + pick = nms(boxes.copy(), 0.5, 'Union') + if boxes.size>0 and pick.size>0: + boxes = boxes[pick,:] + total_boxes = np.append(total_boxes, boxes, axis=0) + + numbox = total_boxes.shape[0] + if numbox>0: + pick = nms(total_boxes.copy(), 0.7, 'Union') + total_boxes = total_boxes[pick,:] + regw = total_boxes[:,2]-total_boxes[:,0] + regh = total_boxes[:,3]-total_boxes[:,1] + qq1 = total_boxes[:,0]+total_boxes[:,5]*regw + qq2 = total_boxes[:,1]+total_boxes[:,6]*regh + qq3 = total_boxes[:,2]+total_boxes[:,7]*regw + qq4 = total_boxes[:,3]+total_boxes[:,8]*regh + total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) + total_boxes = rerec(total_boxes.copy()) + total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) + dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) + + numbox = total_boxes.shape[0] + if numbox>0: + # second stage + tempimg = np.zeros((24,24,3,numbox)) + for k in range(0,numbox): + tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) + tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] + if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: + tempimg[:,:,:,k] = imresample(tmp, (24, 24)) + else: + return np.empty() + tempimg = (tempimg-127.5)*0.0078125 + tempimg1 = np.transpose(tempimg, (3,1,0,2)) + out = rnet([tempimg1]) + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + score = out1[1,:] + ipass = np.where(score>threshold[1]) + total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) + mv = out0[:,ipass[0]] + if total_boxes.shape[0]>0: + pick = nms(total_boxes, 0.7, 'Union') + total_boxes = total_boxes[pick,:] + total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) + total_boxes = rerec(total_boxes.copy()) + + numbox = total_boxes.shape[0] + if numbox>0: + # third stage + total_boxes = np.fix(total_boxes).astype(np.int32) + dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) + tempimg = np.zeros((48,48,3,numbox)) + for k in range(0,numbox): + tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) + tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] + if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: + tempimg[:,:,:,k] = imresample(tmp, (48, 48)) + else: + return np.empty() + tempimg = (tempimg-127.5)*0.0078125 + tempimg1 = np.transpose(tempimg, (3,1,0,2)) + out = onet([tempimg1]) + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + out2 = np.transpose(out[2]) + score = out2[1,:] + points = out1 + ipass = np.where(score>threshold[2]) + points = points[:,ipass[0]] + total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) + mv = out0[:,ipass[0]] + + w = total_boxes[:,2]-total_boxes[:,0]+1 + h = total_boxes[:,3]-total_boxes[:,1]+1 + points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 + points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 + if total_boxes.shape[0]>0: + total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) + pick = nms(total_boxes.copy(), 0.7, 'Min') + total_boxes = total_boxes[pick,:] + points = points[:,pick] + + return total_boxes, points + + +def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor): + """Detects faces in a list of images + images: list containing input images + detection_window_size_ratio: ratio of minimum face size to smallest image dimension + pnet, rnet, onet: caffemodel + threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1] + factor: the factor used to create a scaling pyramid of face sizes to detect in the image. + """ + all_scales = [None] * len(images) + images_with_boxes = [None] * len(images) + + for i in range(len(images)): + images_with_boxes[i] = {'total_boxes': np.empty((0, 9))} + + # create scale pyramid + for index, img in enumerate(images): + all_scales[index] = [] + h = img.shape[0] + w = img.shape[1] + minsize = int(detection_window_size_ratio * np.minimum(w, h)) + factor_count = 0 + minl = np.amin([h, w]) + if minsize <= 12: + minsize = 12 + + m = 12.0 / minsize + minl = minl * m + while minl >= 12: + all_scales[index].append(m * np.power(factor, factor_count)) + minl = minl * factor + factor_count += 1 + + # # # # # # # # # # # # # + # first stage - fast proposal network (pnet) to obtain face candidates + # # # # # # # # # # # # # + + images_obj_per_resolution = {} + + # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images + + for index, scales in enumerate(all_scales): + h = images[index].shape[0] + w = images[index].shape[1] + + for scale in scales: + hs = int(np.ceil(h * scale)) + ws = int(np.ceil(w * scale)) + + if (ws, hs) not in images_obj_per_resolution: + images_obj_per_resolution[(ws, hs)] = [] + + im_data = imresample(images[index], (hs, ws)) + im_data = (im_data - 127.5) * 0.0078125 + img_y = np.transpose(im_data, (1, 0, 2)) # caffe uses different dimensions ordering + images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index}) + + for resolution in images_obj_per_resolution: + images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]] + outs = pnet(images_per_resolution) + + for index in range(len(outs[0])): + scale = images_obj_per_resolution[resolution][index]['scale'] + image_index = images_obj_per_resolution[resolution][index]['index'] + out0 = np.transpose(outs[0][index], (1, 0, 2)) + out1 = np.transpose(outs[1][index], (1, 0, 2)) + + boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0]) + + # inter-scale nms + pick = nms(boxes.copy(), 0.5, 'Union') + if boxes.size > 0 and pick.size > 0: + boxes = boxes[pick, :] + images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'], + boxes, + axis=0) + + for index, image_obj in enumerate(images_with_boxes): + numbox = image_obj['total_boxes'].shape[0] + if numbox > 0: + h = images[index].shape[0] + w = images[index].shape[1] + pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union') + image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] + regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw + qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh + qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw + qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh + image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]])) + image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) + image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32) + dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) + + numbox = image_obj['total_boxes'].shape[0] + tempimg = np.zeros((24, 24, 3, numbox)) + + if numbox > 0: + for k in range(0, numbox): + tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) + tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] + if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: + tempimg[:, :, :, k] = imresample(tmp, (24, 24)) + else: + return np.empty() + + tempimg = (tempimg - 127.5) * 0.0078125 + image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) + + # # # # # # # # # # # # # + # second stage - refinement of face candidates with rnet + # # # # # # # # # # # # # + + bulk_rnet_input = np.empty((0, 24, 24, 3)) + for index, image_obj in enumerate(images_with_boxes): + if 'rnet_input' in image_obj: + bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0) + + out = rnet(bulk_rnet_input) + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + score = out1[1, :] + + i = 0 + for index, image_obj in enumerate(images_with_boxes): + if 'rnet_input' not in image_obj: + continue + + rnet_input_count = image_obj['rnet_input'].shape[0] + score_per_image = score[i:i + rnet_input_count] + out0_per_image = out0[:, i:i + rnet_input_count] + + ipass = np.where(score_per_image > threshold[1]) + image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), + np.expand_dims(score_per_image[ipass].copy(), 1)]) + + mv = out0_per_image[:, ipass[0]] + + if image_obj['total_boxes'].shape[0] > 0: + h = images[index].shape[0] + w = images[index].shape[1] + pick = nms(image_obj['total_boxes'], 0.7, 'Union') + image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] + image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick])) + image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) + + numbox = image_obj['total_boxes'].shape[0] + + if numbox > 0: + tempimg = np.zeros((48, 48, 3, numbox)) + image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32) + dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) + + for k in range(0, numbox): + tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) + tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] + if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: + tempimg[:, :, :, k] = imresample(tmp, (48, 48)) + else: + return np.empty() + tempimg = (tempimg - 127.5) * 0.0078125 + image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) + + i += rnet_input_count + + # # # # # # # # # # # # # + # third stage - further refinement and facial landmarks positions with onet + # # # # # # # # # # # # # + + bulk_onet_input = np.empty((0, 48, 48, 3)) + for index, image_obj in enumerate(images_with_boxes): + if 'onet_input' in image_obj: + bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0) + + out = onet(bulk_onet_input) + + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + out2 = np.transpose(out[2]) + score = out2[1, :] + points = out1 + + i = 0 + ret = [] + for index, image_obj in enumerate(images_with_boxes): + if 'onet_input' not in image_obj: + ret.append(None) + continue + + onet_input_count = image_obj['onet_input'].shape[0] + + out0_per_image = out0[:, i:i + onet_input_count] + score_per_image = score[i:i + onet_input_count] + points_per_image = points[:, i:i + onet_input_count] + + ipass = np.where(score_per_image > threshold[2]) + points_per_image = points_per_image[:, ipass[0]] + + image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), + np.expand_dims(score_per_image[ipass].copy(), 1)]) + mv = out0_per_image[:, ipass[0]] + + w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1 + h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1 + points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile( + image_obj['total_boxes'][:, 0], (5, 1)) - 1 + points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile( + image_obj['total_boxes'][:, 1], (5, 1)) - 1 + + if image_obj['total_boxes'].shape[0] > 0: + image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv)) + pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min') + image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] + points_per_image = points_per_image[:, pick] + + ret.append((image_obj['total_boxes'], points_per_image)) + else: + ret.append(None) + + i += onet_input_count + + return ret + + +# function [boundingbox] = bbreg(boundingbox,reg) +def bbreg(boundingbox,reg): + """Calibrate bounding boxes""" + if reg.shape[1]==1: + reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) + + w = boundingbox[:,2]-boundingbox[:,0]+1 + h = boundingbox[:,3]-boundingbox[:,1]+1 + b1 = boundingbox[:,0]+reg[:,0]*w + b2 = boundingbox[:,1]+reg[:,1]*h + b3 = boundingbox[:,2]+reg[:,2]*w + b4 = boundingbox[:,3]+reg[:,3]*h + boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) + return boundingbox + +def generateBoundingBox(imap, reg, scale, t): + """Use heatmap to generate bounding boxes""" + stride=2 + cellsize=12 + + imap = np.transpose(imap) + dx1 = np.transpose(reg[:,:,0]) + dy1 = np.transpose(reg[:,:,1]) + dx2 = np.transpose(reg[:,:,2]) + dy2 = np.transpose(reg[:,:,3]) + y, x = np.where(imap >= t) + if y.shape[0]==1: + dx1 = np.flipud(dx1) + dy1 = np.flipud(dy1) + dx2 = np.flipud(dx2) + dy2 = np.flipud(dy2) + score = imap[(y,x)] + reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) + if reg.size==0: + reg = np.empty((0,3)) + bb = np.transpose(np.vstack([y,x])) + q1 = np.fix((stride*bb+1)/scale) + q2 = np.fix((stride*bb+cellsize-1+1)/scale) + boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) + return boundingbox, reg + +# function pick = nms(boxes,threshold,type) +def nms(boxes, threshold, method): + if boxes.size==0: + return np.empty((0,3)) + x1 = boxes[:,0] + y1 = boxes[:,1] + x2 = boxes[:,2] + y2 = boxes[:,3] + s = boxes[:,4] + area = (x2-x1+1) * (y2-y1+1) + I = np.argsort(s) + pick = np.zeros_like(s, dtype=np.int16) + counter = 0 + while I.size>0: + i = I[-1] + pick[counter] = i + counter += 1 + idx = I[0:-1] + xx1 = np.maximum(x1[i], x1[idx]) + yy1 = np.maximum(y1[i], y1[idx]) + xx2 = np.minimum(x2[i], x2[idx]) + yy2 = np.minimum(y2[i], y2[idx]) + w = np.maximum(0.0, xx2-xx1+1) + h = np.maximum(0.0, yy2-yy1+1) + inter = w * h + if method is 'Min': + o = inter / np.minimum(area[i], area[idx]) + else: + o = inter / (area[i] + area[idx] - inter) + I = I[np.where(o<=threshold)] + pick = pick[0:counter] + return pick + +# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) +def pad(total_boxes, w, h): + """Compute the padding coordinates (pad the bounding boxes to square)""" + tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) + tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) + numbox = total_boxes.shape[0] + + dx = np.ones((numbox), dtype=np.int32) + dy = np.ones((numbox), dtype=np.int32) + edx = tmpw.copy().astype(np.int32) + edy = tmph.copy().astype(np.int32) + + x = total_boxes[:,0].copy().astype(np.int32) + y = total_boxes[:,1].copy().astype(np.int32) + ex = total_boxes[:,2].copy().astype(np.int32) + ey = total_boxes[:,3].copy().astype(np.int32) + + tmp = np.where(ex>w) + edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) + ex[tmp] = w + + tmp = np.where(ey>h) + edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) + ey[tmp] = h + + tmp = np.where(x<1) + dx.flat[tmp] = np.expand_dims(2-x[tmp],1) + x[tmp] = 1 + + tmp = np.where(y<1) + dy.flat[tmp] = np.expand_dims(2-y[tmp],1) + y[tmp] = 1 + + return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph + +# function [bboxA] = rerec(bboxA) +def rerec(bboxA): + """Convert bboxA to square.""" + h = bboxA[:,3]-bboxA[:,1] + w = bboxA[:,2]-bboxA[:,0] + l = np.maximum(w, h) + bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 + bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 + bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) + return bboxA + +def imresample(img, sz): + im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_LINEAR) #@UndefinedVariable + return im_data + + # This method is kept for debugging purpose +# h=img.shape[0] +# w=img.shape[1] +# hs, ws = sz +# dx = float(w) / ws +# dy = float(h) / hs +# im_data = np.zeros((hs,ws,3)) +# for a1 in range(0,hs): +# for a2 in range(0,ws): +# for a3 in range(0,3): +# im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3] +# return im_data + diff --git a/gpufmkmgr/__init__.py b/gpufmkmgr/__init__.py new file mode 100644 index 0000000..1984bfb --- /dev/null +++ b/gpufmkmgr/__init__.py @@ -0,0 +1 @@ +from .gpufmkmgr import * \ No newline at end of file diff --git a/gpufmkmgr/gpufmkmgr.py b/gpufmkmgr/gpufmkmgr.py new file mode 100644 index 0000000..7fe6ead --- /dev/null +++ b/gpufmkmgr/gpufmkmgr.py @@ -0,0 +1,244 @@ +import os +import sys +import contextlib + +from utils import std_utils +from .pynvml import * + +dlib_module = None +def import_dlib(device_idx): + global dlib_module + if dlib_module is not None: + raise Exception ('Multiple import of dlib is not allowed, reorganize your program.') + + import dlib + dlib_module = dlib + dlib_module.cuda.set_device(device_idx) + return dlib_module + +tf_module = None +tf_session = None +keras_module = None +keras_contrib_module = None +keras_vggface_module = None + +def get_tf_session(): + global tf_session + return tf_session + +#allow_growth=False for keras model +#allow_growth=True for tf only model +def import_tf( device_idxs_list, allow_growth ): + global tf_module + global tf_session + + if tf_module is not None: + raise Exception ('Multiple import of tf is not allowed, reorganize your program.') + + if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': + suppressor = std_utils.suppress_stdout_stderr().__enter__() + else: + suppressor = None + + if 'CUDA_VISIBLE_DEVICES' in os.environ.keys(): + os.environ.pop('CUDA_VISIBLE_DEVICES') + + os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2' + + import tensorflow as tf + tf_module = tf + + visible_device_list = '' + for idx in device_idxs_list: visible_device_list += str(idx) + ',' + visible_device_list = visible_device_list[:-1] + + config = tf_module.ConfigProto() + config.gpu_options.allow_growth = allow_growth + config.gpu_options.visible_device_list=visible_device_list + config.gpu_options.force_gpu_compatible = True + tf_session = tf_module.Session(config=config) + + if suppressor is not None: + suppressor.__exit__() + + return tf_module + +def finalize_tf(): + global tf_module + global tf_session + + tf_session.close() + tf_session = None + tf_module = None + +def import_keras(): + global keras_module + + if keras_module is not None: + raise Exception ('Multiple import of keras is not allowed, reorganize your program.') + + sess = get_tf_session() + if sess is None: + raise Exception ('No TF session found. Import TF first.') + + if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': + suppressor = std_utils.suppress_stdout_stderr().__enter__() + + import keras + + keras.backend.tensorflow_backend.set_session(sess) + + if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': + suppressor.__exit__() + + keras_module = keras + return keras_module + +def finalize_keras(): + global keras_module + keras_module.backend.clear_session() + keras_module = None + +def import_keras_contrib(): + global keras_contrib_module + + if keras_contrib_module is not None: + raise Exception ('Multiple import of keras_contrib is not allowed, reorganize your program.') + import keras_contrib + keras_contrib_module = keras_contrib + return keras_contrib_module + +def finalize_keras_contrib(): + global keras_contrib_module + keras_contrib_module = None + +def import_keras_vggface(optional=False): + global keras_vggface_module + + if keras_vggface_module is not None: + raise Exception ('Multiple import of keras_vggface_module is not allowed, reorganize your program.') + + try: + import keras_vggface + except: + if optional: + print ("Unable to import keras_vggface. It will not be used.") + else: + raise Exception ("Unable to import keras_vggface.") + keras_vggface = None + + keras_vggface_module = keras_vggface + return keras_vggface_module + +def finalize_keras_vggface(): + global keras_vggface_module + keras_vggface_module = None + +#returns [ (device_idx, device_name), ... ] +def getDevicesWithAtLeastFreeMemory(freememsize): + result = [] + + nvmlInit() + for i in range(0, nvmlDeviceGetCount() ): + handle = nvmlDeviceGetHandleByIndex(i) + memInfo = nvmlDeviceGetMemoryInfo( handle ) + if (memInfo.total - memInfo.used) >= freememsize: + result.append (i) + + nvmlShutdown() + + return result + +def getDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb): + result = [] + + nvmlInit() + for i in range(0, nvmlDeviceGetCount() ): + handle = nvmlDeviceGetHandleByIndex(i) + memInfo = nvmlDeviceGetMemoryInfo( handle ) + if (memInfo.total) >= totalmemsize_gb*1024*1024*1024: + result.append (i) + + nvmlShutdown() + + return result +def getAllDevicesIdxsList (): + nvmlInit() + result = [ i for i in range(0, nvmlDeviceGetCount() ) ] + nvmlShutdown() + return result + +def getDeviceVRAMFree (idx): + result = 0 + nvmlInit() + if idx < nvmlDeviceGetCount(): + handle = nvmlDeviceGetHandleByIndex(idx) + memInfo = nvmlDeviceGetMemoryInfo( handle ) + result = (memInfo.total - memInfo.used) + nvmlShutdown() + return result + +def getDeviceVRAMTotalGb (idx): + result = 0 + nvmlInit() + if idx < nvmlDeviceGetCount(): + handle = nvmlDeviceGetHandleByIndex(idx) + memInfo = nvmlDeviceGetMemoryInfo( handle ) + result = memInfo.total / (1024*1024*1024) + nvmlShutdown() + return result + +def getBestDeviceIdx(): + nvmlInit() + idx = -1 + idx_mem = 0 + for i in range(0, nvmlDeviceGetCount() ): + handle = nvmlDeviceGetHandleByIndex(i) + memInfo = nvmlDeviceGetMemoryInfo( handle ) + if memInfo.total > idx_mem: + idx = i + idx_mem = memInfo.total + + nvmlShutdown() + return idx + +def getWorstDeviceIdx(): + nvmlInit() + idx = -1 + idx_mem = sys.maxsize + for i in range(0, nvmlDeviceGetCount() ): + handle = nvmlDeviceGetHandleByIndex(i) + memInfo = nvmlDeviceGetMemoryInfo( handle ) + if memInfo.total < idx_mem: + idx = i + idx_mem = memInfo.total + + nvmlShutdown() + return idx + +def isValidDeviceIdx(idx): + nvmlInit() + result = (idx < nvmlDeviceGetCount()) + nvmlShutdown() + return result + +def getDeviceIdxsEqualModel(idx): + result = [] + + nvmlInit() + idx_name = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode() + + for i in range(0, nvmlDeviceGetCount() ): + if nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() == idx_name: + result.append (i) + + nvmlShutdown() + return result + +def getDeviceName (idx): + result = '' + nvmlInit() + if idx < nvmlDeviceGetCount(): + result = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode() + nvmlShutdown() + return result \ No newline at end of file diff --git a/gpufmkmgr/pynvml.py b/gpufmkmgr/pynvml.py new file mode 100644 index 0000000..c4b2600 --- /dev/null +++ b/gpufmkmgr/pynvml.py @@ -0,0 +1,1701 @@ +##### +# Copyright (c) 2011-2015, NVIDIA Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the NVIDIA Corporation nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +##### + +## +# Python bindings for the NVML library +## +from ctypes import * +from ctypes.util import find_library +import sys +import os +import threading +import string + +## C Type mappings ## +## Enums +_nvmlEnableState_t = c_uint +NVML_FEATURE_DISABLED = 0 +NVML_FEATURE_ENABLED = 1 + +_nvmlBrandType_t = c_uint +NVML_BRAND_UNKNOWN = 0 +NVML_BRAND_QUADRO = 1 +NVML_BRAND_TESLA = 2 +NVML_BRAND_NVS = 3 +NVML_BRAND_GRID = 4 +NVML_BRAND_GEFORCE = 5 +NVML_BRAND_COUNT = 6 + +_nvmlTemperatureThresholds_t = c_uint +NVML_TEMPERATURE_THRESHOLD_SHUTDOWN = 0 +NVML_TEMPERATURE_THRESHOLD_SLOWDOWN = 1 +NVML_TEMPERATURE_THRESHOLD_COUNT = 1 + +_nvmlTemperatureSensors_t = c_uint +NVML_TEMPERATURE_GPU = 0 +NVML_TEMPERATURE_COUNT = 1 + +_nvmlComputeMode_t = c_uint +NVML_COMPUTEMODE_DEFAULT = 0 +NVML_COMPUTEMODE_EXCLUSIVE_THREAD = 1 +NVML_COMPUTEMODE_PROHIBITED = 2 +NVML_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 +NVML_COMPUTEMODE_COUNT = 4 + +_nvmlMemoryLocation_t = c_uint +NVML_MEMORY_LOCATION_L1_CACHE = 0 +NVML_MEMORY_LOCATION_L2_CACHE = 1 +NVML_MEMORY_LOCATION_DEVICE_MEMORY = 2 +NVML_MEMORY_LOCATION_REGISTER_FILE = 3 +NVML_MEMORY_LOCATION_TEXTURE_MEMORY = 4 +NVML_MEMORY_LOCATION_COUNT = 5 + +# These are deprecated, instead use _nvmlMemoryErrorType_t +_nvmlEccBitType_t = c_uint +NVML_SINGLE_BIT_ECC = 0 +NVML_DOUBLE_BIT_ECC = 1 +NVML_ECC_ERROR_TYPE_COUNT = 2 + +_nvmlEccCounterType_t = c_uint +NVML_VOLATILE_ECC = 0 +NVML_AGGREGATE_ECC = 1 +NVML_ECC_COUNTER_TYPE_COUNT = 2 + +_nvmlMemoryErrorType_t = c_uint +NVML_MEMORY_ERROR_TYPE_CORRECTED = 0 +NVML_MEMORY_ERROR_TYPE_UNCORRECTED = 1 +NVML_MEMORY_ERROR_TYPE_COUNT = 2 + +_nvmlClockType_t = c_uint +NVML_CLOCK_GRAPHICS = 0 +NVML_CLOCK_SM = 1 +NVML_CLOCK_MEM = 2 +NVML_CLOCK_COUNT = 3 + +_nvmlDriverModel_t = c_uint +NVML_DRIVER_WDDM = 0 +NVML_DRIVER_WDM = 1 + +_nvmlPstates_t = c_uint +NVML_PSTATE_0 = 0 +NVML_PSTATE_1 = 1 +NVML_PSTATE_2 = 2 +NVML_PSTATE_3 = 3 +NVML_PSTATE_4 = 4 +NVML_PSTATE_5 = 5 +NVML_PSTATE_6 = 6 +NVML_PSTATE_7 = 7 +NVML_PSTATE_8 = 8 +NVML_PSTATE_9 = 9 +NVML_PSTATE_10 = 10 +NVML_PSTATE_11 = 11 +NVML_PSTATE_12 = 12 +NVML_PSTATE_13 = 13 +NVML_PSTATE_14 = 14 +NVML_PSTATE_15 = 15 +NVML_PSTATE_UNKNOWN = 32 + +_nvmlInforomObject_t = c_uint +NVML_INFOROM_OEM = 0 +NVML_INFOROM_ECC = 1 +NVML_INFOROM_POWER = 2 +NVML_INFOROM_COUNT = 3 + +_nvmlReturn_t = c_uint +NVML_SUCCESS = 0 +NVML_ERROR_UNINITIALIZED = 1 +NVML_ERROR_INVALID_ARGUMENT = 2 +NVML_ERROR_NOT_SUPPORTED = 3 +NVML_ERROR_NO_PERMISSION = 4 +NVML_ERROR_ALREADY_INITIALIZED = 5 +NVML_ERROR_NOT_FOUND = 6 +NVML_ERROR_INSUFFICIENT_SIZE = 7 +NVML_ERROR_INSUFFICIENT_POWER = 8 +NVML_ERROR_DRIVER_NOT_LOADED = 9 +NVML_ERROR_TIMEOUT = 10 +NVML_ERROR_IRQ_ISSUE = 11 +NVML_ERROR_LIBRARY_NOT_FOUND = 12 +NVML_ERROR_FUNCTION_NOT_FOUND = 13 +NVML_ERROR_CORRUPTED_INFOROM = 14 +NVML_ERROR_GPU_IS_LOST = 15 +NVML_ERROR_RESET_REQUIRED = 16 +NVML_ERROR_OPERATING_SYSTEM = 17 +NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18 +NVML_ERROR_UNKNOWN = 999 + +_nvmlFanState_t = c_uint +NVML_FAN_NORMAL = 0 +NVML_FAN_FAILED = 1 + +_nvmlLedColor_t = c_uint +NVML_LED_COLOR_GREEN = 0 +NVML_LED_COLOR_AMBER = 1 + +_nvmlGpuOperationMode_t = c_uint +NVML_GOM_ALL_ON = 0 +NVML_GOM_COMPUTE = 1 +NVML_GOM_LOW_DP = 2 + +_nvmlPageRetirementCause_t = c_uint +NVML_PAGE_RETIREMENT_CAUSE_DOUBLE_BIT_ECC_ERROR = 0 +NVML_PAGE_RETIREMENT_CAUSE_MULTIPLE_SINGLE_BIT_ECC_ERRORS = 1 +NVML_PAGE_RETIREMENT_CAUSE_COUNT = 2 + +_nvmlRestrictedAPI_t = c_uint +NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0 +NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1 +NVML_RESTRICTED_API_COUNT = 2 + +_nvmlBridgeChipType_t = c_uint +NVML_BRIDGE_CHIP_PLX = 0 +NVML_BRIDGE_CHIP_BRO4 = 1 +NVML_MAX_PHYSICAL_BRIDGE = 128 + +_nvmlValueType_t = c_uint +NVML_VALUE_TYPE_DOUBLE = 0 +NVML_VALUE_TYPE_UNSIGNED_INT = 1 +NVML_VALUE_TYPE_UNSIGNED_LONG = 2 +NVML_VALUE_TYPE_UNSIGNED_LONG_LONG = 3 +NVML_VALUE_TYPE_COUNT = 4 + +_nvmlPerfPolicyType_t = c_uint +NVML_PERF_POLICY_POWER = 0 +NVML_PERF_POLICY_THERMAL = 1 +NVML_PERF_POLICY_COUNT = 2 + +_nvmlSamplingType_t = c_uint +NVML_TOTAL_POWER_SAMPLES = 0 +NVML_GPU_UTILIZATION_SAMPLES = 1 +NVML_MEMORY_UTILIZATION_SAMPLES = 2 +NVML_ENC_UTILIZATION_SAMPLES = 3 +NVML_DEC_UTILIZATION_SAMPLES = 4 +NVML_PROCESSOR_CLK_SAMPLES = 5 +NVML_MEMORY_CLK_SAMPLES = 6 +NVML_SAMPLINGTYPE_COUNT = 7 + +_nvmlPcieUtilCounter_t = c_uint +NVML_PCIE_UTIL_TX_BYTES = 0 +NVML_PCIE_UTIL_RX_BYTES = 1 +NVML_PCIE_UTIL_COUNT = 2 + +_nvmlGpuTopologyLevel_t = c_uint +NVML_TOPOLOGY_INTERNAL = 0 +NVML_TOPOLOGY_SINGLE = 10 +NVML_TOPOLOGY_MULTIPLE = 20 +NVML_TOPOLOGY_HOSTBRIDGE = 30 +NVML_TOPOLOGY_CPU = 40 +NVML_TOPOLOGY_SYSTEM = 50 + +# C preprocessor defined values +nvmlFlagDefault = 0 +nvmlFlagForce = 1 + +# buffer size +NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE = 16 +NVML_DEVICE_UUID_BUFFER_SIZE = 80 +NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE = 81 +NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE = 80 +NVML_DEVICE_NAME_BUFFER_SIZE = 64 +NVML_DEVICE_SERIAL_BUFFER_SIZE = 30 +NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE = 32 +NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE = 16 + +NVML_VALUE_NOT_AVAILABLE_ulonglong = c_ulonglong(-1) +NVML_VALUE_NOT_AVAILABLE_uint = c_uint(-1) + +## Lib loading ## +nvmlLib = None +libLoadLock = threading.Lock() +_nvmlLib_refcount = 0 # Incremented on each nvmlInit and decremented on nvmlShutdown + +## Error Checking ## +class NVMLError(Exception): + _valClassMapping = dict() + # List of currently known error codes + _errcode_to_string = { + NVML_ERROR_UNINITIALIZED: "Uninitialized", + NVML_ERROR_INVALID_ARGUMENT: "Invalid Argument", + NVML_ERROR_NOT_SUPPORTED: "Not Supported", + NVML_ERROR_NO_PERMISSION: "Insufficient Permissions", + NVML_ERROR_ALREADY_INITIALIZED: "Already Initialized", + NVML_ERROR_NOT_FOUND: "Not Found", + NVML_ERROR_INSUFFICIENT_SIZE: "Insufficient Size", + NVML_ERROR_INSUFFICIENT_POWER: "Insufficient External Power", + NVML_ERROR_DRIVER_NOT_LOADED: "Driver Not Loaded", + NVML_ERROR_TIMEOUT: "Timeout", + NVML_ERROR_IRQ_ISSUE: "Interrupt Request Issue", + NVML_ERROR_LIBRARY_NOT_FOUND: "NVML Shared Library Not Found", + NVML_ERROR_FUNCTION_NOT_FOUND: "Function Not Found", + NVML_ERROR_CORRUPTED_INFOROM: "Corrupted infoROM", + NVML_ERROR_GPU_IS_LOST: "GPU is lost", + NVML_ERROR_RESET_REQUIRED: "GPU requires restart", + NVML_ERROR_OPERATING_SYSTEM: "The operating system has blocked the request.", + NVML_ERROR_LIB_RM_VERSION_MISMATCH: "RM has detected an NVML/RM version mismatch.", + NVML_ERROR_UNKNOWN: "Unknown Error", + } + def __new__(typ, value): + ''' + Maps value to a proper subclass of NVMLError. + See _extractNVMLErrorsAsClasses function for more details + ''' + if typ == NVMLError: + typ = NVMLError._valClassMapping.get(value, typ) + obj = Exception.__new__(typ) + obj.value = value + return obj + def __str__(self): + try: + if self.value not in NVMLError._errcode_to_string: + NVMLError._errcode_to_string[self.value] = str(nvmlErrorString(self.value)) + return NVMLError._errcode_to_string[self.value] + except NVMLError_Uninitialized: + return "NVML Error with code %d" % self.value + def __eq__(self, other): + return self.value == other.value + +def _extractNVMLErrorsAsClasses(): + ''' + Generates a hierarchy of classes on top of NVMLError class. + + Each NVML Error gets a new NVMLError subclass. This way try,except blocks can filter appropriate + exceptions more easily. + + NVMLError is a parent class. Each NVML_ERROR_* gets it's own subclass. + e.g. NVML_ERROR_ALREADY_INITIALIZED will be turned into NVMLError_AlreadyInitialized + ''' + this_module = sys.modules[__name__] + nvmlErrorsNames = filter(lambda x: x.startswith("NVML_ERROR_"), dir(this_module)) + for err_name in nvmlErrorsNames: + # e.g. Turn NVML_ERROR_ALREADY_INITIALIZED into NVMLError_AlreadyInitialized + class_name = "NVMLError_" + string.capwords(err_name.replace("NVML_ERROR_", ""), "_").replace("_", "") + err_val = getattr(this_module, err_name) + def gen_new(val): + def new(typ): + obj = NVMLError.__new__(typ, val) + return obj + return new + new_error_class = type(class_name, (NVMLError,), {'__new__': gen_new(err_val)}) + new_error_class.__module__ = __name__ + setattr(this_module, class_name, new_error_class) + NVMLError._valClassMapping[err_val] = new_error_class +_extractNVMLErrorsAsClasses() + +def _nvmlCheckReturn(ret): + if (ret != NVML_SUCCESS): + raise NVMLError(ret) + return ret + +## Function access ## +_nvmlGetFunctionPointer_cache = dict() # function pointers are cached to prevent unnecessary libLoadLock locking +def _nvmlGetFunctionPointer(name): + global nvmlLib + + if name in _nvmlGetFunctionPointer_cache: + return _nvmlGetFunctionPointer_cache[name] + + libLoadLock.acquire() + try: + # ensure library was loaded + if (nvmlLib == None): + raise NVMLError(NVML_ERROR_UNINITIALIZED) + try: + _nvmlGetFunctionPointer_cache[name] = getattr(nvmlLib, name) + return _nvmlGetFunctionPointer_cache[name] + except AttributeError: + raise NVMLError(NVML_ERROR_FUNCTION_NOT_FOUND) + finally: + # lock is always freed + libLoadLock.release() + +## Alternative object +# Allows the object to be printed +# Allows mismatched types to be assigned +# - like None when the Structure variant requires c_uint +class nvmlFriendlyObject(object): + def __init__(self, dictionary): + for x in dictionary: + setattr(self, x, dictionary[x]) + def __str__(self): + return self.__dict__.__str__() + +def nvmlStructToFriendlyObject(struct): + d = {} + for x in struct._fields_: + key = x[0] + value = getattr(struct, key) + d[key] = value + obj = nvmlFriendlyObject(d) + return obj + +# pack the object so it can be passed to the NVML library +def nvmlFriendlyObjectToStruct(obj, model): + for x in model._fields_: + key = x[0] + value = obj.__dict__[key] + setattr(model, key, value) + return model + +## Unit structures +class struct_c_nvmlUnit_t(Structure): + pass # opaque handle +c_nvmlUnit_t = POINTER(struct_c_nvmlUnit_t) + +class _PrintableStructure(Structure): + """ + Abstract class that produces nicer __str__ output than ctypes.Structure. + e.g. instead of: + >>> print str(obj) + + this class will print + class_name(field_name: formatted_value, field_name: formatted_value) + + _fmt_ dictionary of -> + e.g. class that has _field_ 'hex_value', c_uint could be formatted with + _fmt_ = {"hex_value" : "%08X"} + to produce nicer output. + Default fomratting string for all fields can be set with key "" like: + _fmt_ = {"" : "%d MHz"} # e.g all values are numbers in MHz. + If not set it's assumed to be just "%s" + + Exact format of returned str from this class is subject to change in the future. + """ + _fmt_ = {} + def __str__(self): + result = [] + for x in self._fields_: + key = x[0] + value = getattr(self, key) + fmt = "%s" + if key in self._fmt_: + fmt = self._fmt_[key] + elif "" in self._fmt_: + fmt = self._fmt_[""] + result.append(("%s: " + fmt) % (key, value)) + return self.__class__.__name__ + "(" + string.join(result, ", ") + ")" + +class c_nvmlUnitInfo_t(_PrintableStructure): + _fields_ = [ + ('name', c_char * 96), + ('id', c_char * 96), + ('serial', c_char * 96), + ('firmwareVersion', c_char * 96), + ] + +class c_nvmlLedState_t(_PrintableStructure): + _fields_ = [ + ('cause', c_char * 256), + ('color', _nvmlLedColor_t), + ] + +class c_nvmlPSUInfo_t(_PrintableStructure): + _fields_ = [ + ('state', c_char * 256), + ('current', c_uint), + ('voltage', c_uint), + ('power', c_uint), + ] + +class c_nvmlUnitFanInfo_t(_PrintableStructure): + _fields_ = [ + ('speed', c_uint), + ('state', _nvmlFanState_t), + ] + +class c_nvmlUnitFanSpeeds_t(_PrintableStructure): + _fields_ = [ + ('fans', c_nvmlUnitFanInfo_t * 24), + ('count', c_uint) + ] + +## Device structures +class struct_c_nvmlDevice_t(Structure): + pass # opaque handle +c_nvmlDevice_t = POINTER(struct_c_nvmlDevice_t) + +class nvmlPciInfo_t(_PrintableStructure): + _fields_ = [ + ('busId', c_char * 16), + ('domain', c_uint), + ('bus', c_uint), + ('device', c_uint), + ('pciDeviceId', c_uint), + + # Added in 2.285 + ('pciSubSystemId', c_uint), + ('reserved0', c_uint), + ('reserved1', c_uint), + ('reserved2', c_uint), + ('reserved3', c_uint), + ] + _fmt_ = { + 'domain' : "0x%04X", + 'bus' : "0x%02X", + 'device' : "0x%02X", + 'pciDeviceId' : "0x%08X", + 'pciSubSystemId' : "0x%08X", + } + +class c_nvmlMemory_t(_PrintableStructure): + _fields_ = [ + ('total', c_ulonglong), + ('free', c_ulonglong), + ('used', c_ulonglong), + ] + _fmt_ = {'': "%d B"} + +class c_nvmlBAR1Memory_t(_PrintableStructure): + _fields_ = [ + ('bar1Total', c_ulonglong), + ('bar1Free', c_ulonglong), + ('bar1Used', c_ulonglong), + ] + _fmt_ = {'': "%d B"} + +# On Windows with the WDDM driver, usedGpuMemory is reported as None +# Code that processes this structure should check for None, I.E. +# +# if (info.usedGpuMemory == None): +# # TODO handle the error +# pass +# else: +# print("Using %d MiB of memory" % (info.usedGpuMemory / 1024 / 1024)) +# +# See NVML documentation for more information +class c_nvmlProcessInfo_t(_PrintableStructure): + _fields_ = [ + ('pid', c_uint), + ('usedGpuMemory', c_ulonglong), + ] + _fmt_ = {'usedGpuMemory': "%d B"} + +class c_nvmlBridgeChipInfo_t(_PrintableStructure): + _fields_ = [ + ('type', _nvmlBridgeChipType_t), + ('fwVersion', c_uint), + ] + +class c_nvmlBridgeChipHierarchy_t(_PrintableStructure): + _fields_ = [ + ('bridgeCount', c_uint), + ('bridgeChipInfo', c_nvmlBridgeChipInfo_t * 128), + ] + +class c_nvmlEccErrorCounts_t(_PrintableStructure): + _fields_ = [ + ('l1Cache', c_ulonglong), + ('l2Cache', c_ulonglong), + ('deviceMemory', c_ulonglong), + ('registerFile', c_ulonglong), + ] + +class c_nvmlUtilization_t(_PrintableStructure): + _fields_ = [ + ('gpu', c_uint), + ('memory', c_uint), + ] + _fmt_ = {'': "%d %%"} + +# Added in 2.285 +class c_nvmlHwbcEntry_t(_PrintableStructure): + _fields_ = [ + ('hwbcId', c_uint), + ('firmwareVersion', c_char * 32), + ] + +class c_nvmlValue_t(Union): + _fields_ = [ + ('dVal', c_double), + ('uiVal', c_uint), + ('ulVal', c_ulong), + ('ullVal', c_ulonglong), + ] + +class c_nvmlSample_t(_PrintableStructure): + _fields_ = [ + ('timeStamp', c_ulonglong), + ('sampleValue', c_nvmlValue_t), + ] + +class c_nvmlViolationTime_t(_PrintableStructure): + _fields_ = [ + ('referenceTime', c_ulonglong), + ('violationTime', c_ulonglong), + ] + +## Event structures +class struct_c_nvmlEventSet_t(Structure): + pass # opaque handle +c_nvmlEventSet_t = POINTER(struct_c_nvmlEventSet_t) + +nvmlEventTypeSingleBitEccError = 0x0000000000000001 +nvmlEventTypeDoubleBitEccError = 0x0000000000000002 +nvmlEventTypePState = 0x0000000000000004 +nvmlEventTypeXidCriticalError = 0x0000000000000008 +nvmlEventTypeClock = 0x0000000000000010 +nvmlEventTypeNone = 0x0000000000000000 +nvmlEventTypeAll = ( + nvmlEventTypeNone | + nvmlEventTypeSingleBitEccError | + nvmlEventTypeDoubleBitEccError | + nvmlEventTypePState | + nvmlEventTypeClock | + nvmlEventTypeXidCriticalError + ) + +## Clock Throttle Reasons defines +nvmlClocksThrottleReasonGpuIdle = 0x0000000000000001 +nvmlClocksThrottleReasonApplicationsClocksSetting = 0x0000000000000002 +nvmlClocksThrottleReasonUserDefinedClocks = nvmlClocksThrottleReasonApplicationsClocksSetting # deprecated, use nvmlClocksThrottleReasonApplicationsClocksSetting +nvmlClocksThrottleReasonSwPowerCap = 0x0000000000000004 +nvmlClocksThrottleReasonHwSlowdown = 0x0000000000000008 +nvmlClocksThrottleReasonUnknown = 0x8000000000000000 +nvmlClocksThrottleReasonNone = 0x0000000000000000 +nvmlClocksThrottleReasonAll = ( + nvmlClocksThrottleReasonNone | + nvmlClocksThrottleReasonGpuIdle | + nvmlClocksThrottleReasonApplicationsClocksSetting | + nvmlClocksThrottleReasonSwPowerCap | + nvmlClocksThrottleReasonHwSlowdown | + nvmlClocksThrottleReasonUnknown + ) + +class c_nvmlEventData_t(_PrintableStructure): + _fields_ = [ + ('device', c_nvmlDevice_t), + ('eventType', c_ulonglong), + ('eventData', c_ulonglong) + ] + _fmt_ = {'eventType': "0x%08X"} + +class c_nvmlAccountingStats_t(_PrintableStructure): + _fields_ = [ + ('gpuUtilization', c_uint), + ('memoryUtilization', c_uint), + ('maxMemoryUsage', c_ulonglong), + ('time', c_ulonglong), + ('startTime', c_ulonglong), + ('isRunning', c_uint), + ('reserved', c_uint * 5) + ] + +## C function wrappers ## +def nvmlInit(): + _LoadNvmlLibrary() + + # + # Initialize the library + # + fn = _nvmlGetFunctionPointer("nvmlInit_v2") + ret = fn() + _nvmlCheckReturn(ret) + + # Atomically update refcount + global _nvmlLib_refcount + libLoadLock.acquire() + _nvmlLib_refcount += 1 + libLoadLock.release() + return None + +def _LoadNvmlLibrary(): + ''' + Load the library if it isn't loaded already + ''' + global nvmlLib + + if (nvmlLib == None): + # lock to ensure only one caller loads the library + libLoadLock.acquire() + + try: + # ensure the library still isn't loaded + if (nvmlLib == None): + try: + if (sys.platform[:3] == "win"): + # cdecl calling convention + # load nvml.dll from %ProgramFiles%/NVIDIA Corporation/NVSMI/nvml.dll + nvmlLib = CDLL(os.path.join(os.getenv("ProgramFiles", "C:/Program Files"), "NVIDIA Corporation/NVSMI/nvml.dll")) + else: + # assume linux + nvmlLib = CDLL("libnvidia-ml.so.1") + except OSError as ose: + _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) + if (nvmlLib == None): + _nvmlCheckReturn(NVML_ERROR_LIBRARY_NOT_FOUND) + finally: + # lock is always freed + libLoadLock.release() + +def nvmlShutdown(): + # + # Leave the library loaded, but shutdown the interface + # + fn = _nvmlGetFunctionPointer("nvmlShutdown") + ret = fn() + _nvmlCheckReturn(ret) + + # Atomically update refcount + global _nvmlLib_refcount + libLoadLock.acquire() + if (0 < _nvmlLib_refcount): + _nvmlLib_refcount -= 1 + libLoadLock.release() + return None + +# Added in 2.285 +def nvmlErrorString(result): + fn = _nvmlGetFunctionPointer("nvmlErrorString") + fn.restype = c_char_p # otherwise return is an int + ret = fn(result) + return ret + +# Added in 2.285 +def nvmlSystemGetNVMLVersion(): + c_version = create_string_buffer(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlSystemGetNVMLVersion") + ret = fn(c_version, c_uint(NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 2.285 +def nvmlSystemGetProcessName(pid): + c_name = create_string_buffer(1024) + fn = _nvmlGetFunctionPointer("nvmlSystemGetProcessName") + ret = fn(c_uint(pid), c_name, c_uint(1024)) + _nvmlCheckReturn(ret) + return c_name.value + +def nvmlSystemGetDriverVersion(): + c_version = create_string_buffer(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlSystemGetDriverVersion") + ret = fn(c_version, c_uint(NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 2.285 +def nvmlSystemGetHicVersion(): + c_count = c_uint(0) + hics = None + fn = _nvmlGetFunctionPointer("nvmlSystemGetHicVersion") + + # get the count + ret = fn(byref(c_count), None) + + # this should only fail with insufficient size + if ((ret != NVML_SUCCESS) and + (ret != NVML_ERROR_INSUFFICIENT_SIZE)): + raise NVMLError(ret) + + # if there are no hics + if (c_count.value == 0): + return [] + + hic_array = c_nvmlHwbcEntry_t * c_count.value + hics = hic_array() + ret = fn(byref(c_count), hics) + _nvmlCheckReturn(ret) + return hics + +## Unit get functions +def nvmlUnitGetCount(): + c_count = c_uint() + fn = _nvmlGetFunctionPointer("nvmlUnitGetCount") + ret = fn(byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlUnitGetHandleByIndex(index): + c_index = c_uint(index) + unit = c_nvmlUnit_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetHandleByIndex") + ret = fn(c_index, byref(unit)) + _nvmlCheckReturn(ret) + return unit + +def nvmlUnitGetUnitInfo(unit): + c_info = c_nvmlUnitInfo_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetUnitInfo") + ret = fn(unit, byref(c_info)) + _nvmlCheckReturn(ret) + return c_info + +def nvmlUnitGetLedState(unit): + c_state = c_nvmlLedState_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetLedState") + ret = fn(unit, byref(c_state)) + _nvmlCheckReturn(ret) + return c_state + +def nvmlUnitGetPsuInfo(unit): + c_info = c_nvmlPSUInfo_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetPsuInfo") + ret = fn(unit, byref(c_info)) + _nvmlCheckReturn(ret) + return c_info + +def nvmlUnitGetTemperature(unit, type): + c_temp = c_uint() + fn = _nvmlGetFunctionPointer("nvmlUnitGetTemperature") + ret = fn(unit, c_uint(type), byref(c_temp)) + _nvmlCheckReturn(ret) + return c_temp.value + +def nvmlUnitGetFanSpeedInfo(unit): + c_speeds = c_nvmlUnitFanSpeeds_t() + fn = _nvmlGetFunctionPointer("nvmlUnitGetFanSpeedInfo") + ret = fn(unit, byref(c_speeds)) + _nvmlCheckReturn(ret) + return c_speeds + +# added to API +def nvmlUnitGetDeviceCount(unit): + c_count = c_uint(0) + # query the unit to determine device count + fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") + ret = fn(unit, byref(c_count), None) + if (ret == NVML_ERROR_INSUFFICIENT_SIZE): + ret = NVML_SUCCESS + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlUnitGetDevices(unit): + c_count = c_uint(nvmlUnitGetDeviceCount(unit)) + device_array = c_nvmlDevice_t * c_count.value + c_devices = device_array() + fn = _nvmlGetFunctionPointer("nvmlUnitGetDevices") + ret = fn(unit, byref(c_count), c_devices) + _nvmlCheckReturn(ret) + return c_devices + +## Device get functions +def nvmlDeviceGetCount(): + c_count = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCount_v2") + ret = fn(byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlDeviceGetHandleByIndex(index): + c_index = c_uint(index) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByIndex_v2") + ret = fn(c_index, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetHandleBySerial(serial): + c_serial = c_char_p(serial) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleBySerial") + ret = fn(c_serial, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetHandleByUUID(uuid): + c_uuid = c_char_p(uuid) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByUUID") + ret = fn(c_uuid, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetHandleByPciBusId(pciBusId): + c_busId = c_char_p(pciBusId) + device = c_nvmlDevice_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetHandleByPciBusId_v2") + ret = fn(c_busId, byref(device)) + _nvmlCheckReturn(ret) + return device + +def nvmlDeviceGetName(handle): + c_name = create_string_buffer(NVML_DEVICE_NAME_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetName") + ret = fn(handle, c_name, c_uint(NVML_DEVICE_NAME_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_name.value + +def nvmlDeviceGetBoardId(handle): + c_id = c_uint(); + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBoardId") + ret = fn(handle, byref(c_id)) + _nvmlCheckReturn(ret) + return c_id.value + +def nvmlDeviceGetMultiGpuBoard(handle): + c_multiGpu = c_uint(); + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMultiGpuBoard") + ret = fn(handle, byref(c_multiGpu)) + _nvmlCheckReturn(ret) + return c_multiGpu.value + +def nvmlDeviceGetBrand(handle): + c_type = _nvmlBrandType_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBrand") + ret = fn(handle, byref(c_type)) + _nvmlCheckReturn(ret) + return c_type.value + +def nvmlDeviceGetSerial(handle): + c_serial = create_string_buffer(NVML_DEVICE_SERIAL_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSerial") + ret = fn(handle, c_serial, c_uint(NVML_DEVICE_SERIAL_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_serial.value + +def nvmlDeviceGetCpuAffinity(handle, cpuSetSize): + affinity_array = c_ulonglong * cpuSetSize + c_affinity = affinity_array() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCpuAffinity") + ret = fn(handle, cpuSetSize, byref(c_affinity)) + _nvmlCheckReturn(ret) + return c_affinity + +def nvmlDeviceSetCpuAffinity(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetCpuAffinity") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceClearCpuAffinity(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceClearCpuAffinity") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetMinorNumber(handle): + c_minor_number = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMinorNumber") + ret = fn(handle, byref(c_minor_number)) + _nvmlCheckReturn(ret) + return c_minor_number.value + +def nvmlDeviceGetUUID(handle): + c_uuid = create_string_buffer(NVML_DEVICE_UUID_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetUUID") + ret = fn(handle, c_uuid, c_uint(NVML_DEVICE_UUID_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_uuid.value + +def nvmlDeviceGetInforomVersion(handle, infoRomObject): + c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomVersion") + ret = fn(handle, _nvmlInforomObject_t(infoRomObject), + c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 4.304 +def nvmlDeviceGetInforomImageVersion(handle): + c_version = create_string_buffer(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomImageVersion") + ret = fn(handle, c_version, c_uint(NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 4.304 +def nvmlDeviceGetInforomConfigurationChecksum(handle): + c_checksum = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetInforomConfigurationChecksum") + ret = fn(handle, byref(c_checksum)) + _nvmlCheckReturn(ret) + return c_checksum.value + +# Added in 4.304 +def nvmlDeviceValidateInforom(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceValidateInforom") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetDisplayMode(handle): + c_mode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayMode") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + +def nvmlDeviceGetDisplayActive(handle): + c_mode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDisplayActive") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + + +def nvmlDeviceGetPersistenceMode(handle): + c_state = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPersistenceMode") + ret = fn(handle, byref(c_state)) + _nvmlCheckReturn(ret) + return c_state.value + +def nvmlDeviceGetPciInfo(handle): + c_info = nvmlPciInfo_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPciInfo_v2") + ret = fn(handle, byref(c_info)) + _nvmlCheckReturn(ret) + return c_info + +def nvmlDeviceGetClockInfo(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetClockInfo") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 2.285 +def nvmlDeviceGetMaxClockInfo(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxClockInfo") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 4.304 +def nvmlDeviceGetApplicationsClock(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetApplicationsClock") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 5.319 +def nvmlDeviceGetDefaultApplicationsClock(handle, type): + c_clock = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDefaultApplicationsClock") + ret = fn(handle, _nvmlClockType_t(type), byref(c_clock)) + _nvmlCheckReturn(ret) + return c_clock.value + +# Added in 4.304 +def nvmlDeviceGetSupportedMemoryClocks(handle): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedMemoryClocks") + ret = fn(handle, byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no clocks + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + clocks_array = c_uint * c_count.value + c_clocks = clocks_array() + + # make the call again + ret = fn(handle, byref(c_count), c_clocks) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + procs.append(c_clocks[i]) + + return procs + else: + # error case + raise NVMLError(ret) + +# Added in 4.304 +def nvmlDeviceGetSupportedGraphicsClocks(handle, memoryClockMHz): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedGraphicsClocks") + ret = fn(handle, c_uint(memoryClockMHz), byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no clocks + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + clocks_array = c_uint * c_count.value + c_clocks = clocks_array() + + # make the call again + ret = fn(handle, c_uint(memoryClockMHz), byref(c_count), c_clocks) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + procs.append(c_clocks[i]) + + return procs + else: + # error case + raise NVMLError(ret) + +def nvmlDeviceGetFanSpeed(handle): + c_speed = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetFanSpeed") + ret = fn(handle, byref(c_speed)) + _nvmlCheckReturn(ret) + return c_speed.value + +def nvmlDeviceGetTemperature(handle, sensor): + c_temp = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperature") + ret = fn(handle, _nvmlTemperatureSensors_t(sensor), byref(c_temp)) + _nvmlCheckReturn(ret) + return c_temp.value + +def nvmlDeviceGetTemperatureThreshold(handle, threshold): + c_temp = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTemperatureThreshold") + ret = fn(handle, _nvmlTemperatureThresholds_t(threshold), byref(c_temp)) + _nvmlCheckReturn(ret) + return c_temp.value + +# DEPRECATED use nvmlDeviceGetPerformanceState +def nvmlDeviceGetPowerState(handle): + c_pstate = _nvmlPstates_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerState") + ret = fn(handle, byref(c_pstate)) + _nvmlCheckReturn(ret) + return c_pstate.value + +def nvmlDeviceGetPerformanceState(handle): + c_pstate = _nvmlPstates_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPerformanceState") + ret = fn(handle, byref(c_pstate)) + _nvmlCheckReturn(ret) + return c_pstate.value + +def nvmlDeviceGetPowerManagementMode(handle): + c_pcapMode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementMode") + ret = fn(handle, byref(c_pcapMode)) + _nvmlCheckReturn(ret) + return c_pcapMode.value + +def nvmlDeviceGetPowerManagementLimit(handle): + c_limit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimit") + ret = fn(handle, byref(c_limit)) + _nvmlCheckReturn(ret) + return c_limit.value + +# Added in 4.304 +def nvmlDeviceGetPowerManagementLimitConstraints(handle): + c_minLimit = c_uint() + c_maxLimit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementLimitConstraints") + ret = fn(handle, byref(c_minLimit), byref(c_maxLimit)) + _nvmlCheckReturn(ret) + return [c_minLimit.value, c_maxLimit.value] + +# Added in 4.304 +def nvmlDeviceGetPowerManagementDefaultLimit(handle): + c_limit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerManagementDefaultLimit") + ret = fn(handle, byref(c_limit)) + _nvmlCheckReturn(ret) + return c_limit.value + + +# Added in 331 +def nvmlDeviceGetEnforcedPowerLimit(handle): + c_limit = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetEnforcedPowerLimit") + ret = fn(handle, byref(c_limit)) + _nvmlCheckReturn(ret) + return c_limit.value + +def nvmlDeviceGetPowerUsage(handle): + c_watts = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPowerUsage") + ret = fn(handle, byref(c_watts)) + _nvmlCheckReturn(ret) + return c_watts.value + +# Added in 4.304 +def nvmlDeviceGetGpuOperationMode(handle): + c_currState = _nvmlGpuOperationMode_t() + c_pendingState = _nvmlGpuOperationMode_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetGpuOperationMode") + ret = fn(handle, byref(c_currState), byref(c_pendingState)) + _nvmlCheckReturn(ret) + return [c_currState.value, c_pendingState.value] + +# Added in 4.304 +def nvmlDeviceGetCurrentGpuOperationMode(handle): + return nvmlDeviceGetGpuOperationMode(handle)[0] + +# Added in 4.304 +def nvmlDeviceGetPendingGpuOperationMode(handle): + return nvmlDeviceGetGpuOperationMode(handle)[1] + +def nvmlDeviceGetMemoryInfo(handle): + c_memory = c_nvmlMemory_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryInfo") + ret = fn(handle, byref(c_memory)) + _nvmlCheckReturn(ret) + return c_memory + +def nvmlDeviceGetBAR1MemoryInfo(handle): + c_bar1_memory = c_nvmlBAR1Memory_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBAR1MemoryInfo") + ret = fn(handle, byref(c_bar1_memory)) + _nvmlCheckReturn(ret) + return c_bar1_memory + +def nvmlDeviceGetComputeMode(handle): + c_mode = _nvmlComputeMode_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeMode") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + +def nvmlDeviceGetEccMode(handle): + c_currState = _nvmlEnableState_t() + c_pendingState = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetEccMode") + ret = fn(handle, byref(c_currState), byref(c_pendingState)) + _nvmlCheckReturn(ret) + return [c_currState.value, c_pendingState.value] + +# added to API +def nvmlDeviceGetCurrentEccMode(handle): + return nvmlDeviceGetEccMode(handle)[0] + +# added to API +def nvmlDeviceGetPendingEccMode(handle): + return nvmlDeviceGetEccMode(handle)[1] + +def nvmlDeviceGetTotalEccErrors(handle, errorType, counterType): + c_count = c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTotalEccErrors") + ret = fn(handle, _nvmlMemoryErrorType_t(errorType), + _nvmlEccCounterType_t(counterType), byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +# This is deprecated, instead use nvmlDeviceGetMemoryErrorCounter +def nvmlDeviceGetDetailedEccErrors(handle, errorType, counterType): + c_counts = c_nvmlEccErrorCounts_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDetailedEccErrors") + ret = fn(handle, _nvmlMemoryErrorType_t(errorType), + _nvmlEccCounterType_t(counterType), byref(c_counts)) + _nvmlCheckReturn(ret) + return c_counts + +# Added in 4.304 +def nvmlDeviceGetMemoryErrorCounter(handle, errorType, counterType, locationType): + c_count = c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMemoryErrorCounter") + ret = fn(handle, + _nvmlMemoryErrorType_t(errorType), + _nvmlEccCounterType_t(counterType), + _nvmlMemoryLocation_t(locationType), + byref(c_count)) + _nvmlCheckReturn(ret) + return c_count.value + +def nvmlDeviceGetUtilizationRates(handle): + c_util = c_nvmlUtilization_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetUtilizationRates") + ret = fn(handle, byref(c_util)) + _nvmlCheckReturn(ret) + return c_util + +def nvmlDeviceGetEncoderUtilization(handle): + c_util = c_uint() + c_samplingPeriod = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetEncoderUtilization") + ret = fn(handle, byref(c_util), byref(c_samplingPeriod)) + _nvmlCheckReturn(ret) + return [c_util.value, c_samplingPeriod.value] + +def nvmlDeviceGetDecoderUtilization(handle): + c_util = c_uint() + c_samplingPeriod = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDecoderUtilization") + ret = fn(handle, byref(c_util), byref(c_samplingPeriod)) + _nvmlCheckReturn(ret) + return [c_util.value, c_samplingPeriod.value] + +def nvmlDeviceGetPcieReplayCounter(handle): + c_replay = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPcieReplayCounter") + ret = fn(handle, byref(c_replay)) + _nvmlCheckReturn(ret) + return c_replay.value + +def nvmlDeviceGetDriverModel(handle): + c_currModel = _nvmlDriverModel_t() + c_pendingModel = _nvmlDriverModel_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetDriverModel") + ret = fn(handle, byref(c_currModel), byref(c_pendingModel)) + _nvmlCheckReturn(ret) + return [c_currModel.value, c_pendingModel.value] + +# added to API +def nvmlDeviceGetCurrentDriverModel(handle): + return nvmlDeviceGetDriverModel(handle)[0] + +# added to API +def nvmlDeviceGetPendingDriverModel(handle): + return nvmlDeviceGetDriverModel(handle)[1] + +# Added in 2.285 +def nvmlDeviceGetVbiosVersion(handle): + c_version = create_string_buffer(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetVbiosVersion") + ret = fn(handle, c_version, c_uint(NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE)) + _nvmlCheckReturn(ret) + return c_version.value + +# Added in 2.285 +def nvmlDeviceGetComputeRunningProcesses(handle): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetComputeRunningProcesses") + ret = fn(handle, byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no running processes + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + # oversize the array incase more processes are created + c_count.value = c_count.value * 2 + 5 + proc_array = c_nvmlProcessInfo_t * c_count.value + c_procs = proc_array() + + # make the call again + ret = fn(handle, byref(c_count), c_procs) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + # use an alternative struct for this object + obj = nvmlStructToFriendlyObject(c_procs[i]) + if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): + # special case for WDDM on Windows, see comment above + obj.usedGpuMemory = None + procs.append(obj) + + return procs + else: + # error case + raise NVMLError(ret) + +def nvmlDeviceGetGraphicsRunningProcesses(handle): + # first call to get the size + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetGraphicsRunningProcesses") + ret = fn(handle, byref(c_count), None) + + if (ret == NVML_SUCCESS): + # special case, no running processes + return [] + elif (ret == NVML_ERROR_INSUFFICIENT_SIZE): + # typical case + # oversize the array incase more processes are created + c_count.value = c_count.value * 2 + 5 + proc_array = c_nvmlProcessInfo_t * c_count.value + c_procs = proc_array() + + # make the call again + ret = fn(handle, byref(c_count), c_procs) + _nvmlCheckReturn(ret) + + procs = [] + for i in range(c_count.value): + # use an alternative struct for this object + obj = nvmlStructToFriendlyObject(c_procs[i]) + if (obj.usedGpuMemory == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): + # special case for WDDM on Windows, see comment above + obj.usedGpuMemory = None + procs.append(obj) + + return procs + else: + # error case + raise NVMLError(ret) + +def nvmlDeviceGetAutoBoostedClocksEnabled(handle): + c_isEnabled = _nvmlEnableState_t() + c_defaultIsEnabled = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAutoBoostedClocksEnabled") + ret = fn(handle, byref(c_isEnabled), byref(c_defaultIsEnabled)) + _nvmlCheckReturn(ret) + return [c_isEnabled.value, c_defaultIsEnabled.value] + #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks + +## Set functions +def nvmlUnitSetLedState(unit, color): + fn = _nvmlGetFunctionPointer("nvmlUnitSetLedState") + ret = fn(unit, _nvmlLedColor_t(color)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetPersistenceMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetPersistenceMode") + ret = fn(handle, _nvmlEnableState_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetComputeMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetComputeMode") + ret = fn(handle, _nvmlComputeMode_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetEccMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetEccMode") + ret = fn(handle, _nvmlEnableState_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceClearEccErrorCounts(handle, counterType): + fn = _nvmlGetFunctionPointer("nvmlDeviceClearEccErrorCounts") + ret = fn(handle, _nvmlEccCounterType_t(counterType)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetDriverModel(handle, model): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetDriverModel") + ret = fn(handle, _nvmlDriverModel_t(model)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceSetAutoBoostedClocksEnabled(handle, enabled): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetAutoBoostedClocksEnabled") + ret = fn(handle, _nvmlEnableState_t(enabled)) + _nvmlCheckReturn(ret) + return None + #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks + +def nvmlDeviceSetDefaultAutoBoostedClocksEnabled(handle, enabled, flags): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetDefaultAutoBoostedClocksEnabled") + ret = fn(handle, _nvmlEnableState_t(enabled), c_uint(flags)) + _nvmlCheckReturn(ret) + return None + #Throws NVML_ERROR_NOT_SUPPORTED if hardware doesn't support setting auto boosted clocks + +# Added in 4.304 +def nvmlDeviceSetApplicationsClocks(handle, maxMemClockMHz, maxGraphicsClockMHz): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetApplicationsClocks") + ret = fn(handle, c_uint(maxMemClockMHz), c_uint(maxGraphicsClockMHz)) + _nvmlCheckReturn(ret) + return None + +# Added in 4.304 +def nvmlDeviceResetApplicationsClocks(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceResetApplicationsClocks") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +# Added in 4.304 +def nvmlDeviceSetPowerManagementLimit(handle, limit): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetPowerManagementLimit") + ret = fn(handle, c_uint(limit)) + _nvmlCheckReturn(ret) + return None + +# Added in 4.304 +def nvmlDeviceSetGpuOperationMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetGpuOperationMode") + ret = fn(handle, _nvmlGpuOperationMode_t(mode)) + _nvmlCheckReturn(ret) + return None + +# Added in 2.285 +def nvmlEventSetCreate(): + fn = _nvmlGetFunctionPointer("nvmlEventSetCreate") + eventSet = c_nvmlEventSet_t() + ret = fn(byref(eventSet)) + _nvmlCheckReturn(ret) + return eventSet + +# Added in 2.285 +def nvmlDeviceRegisterEvents(handle, eventTypes, eventSet): + fn = _nvmlGetFunctionPointer("nvmlDeviceRegisterEvents") + ret = fn(handle, c_ulonglong(eventTypes), eventSet) + _nvmlCheckReturn(ret) + return None + +# Added in 2.285 +def nvmlDeviceGetSupportedEventTypes(handle): + c_eventTypes = c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedEventTypes") + ret = fn(handle, byref(c_eventTypes)) + _nvmlCheckReturn(ret) + return c_eventTypes.value + +# Added in 2.285 +# raises NVML_ERROR_TIMEOUT exception on timeout +def nvmlEventSetWait(eventSet, timeoutms): + fn = _nvmlGetFunctionPointer("nvmlEventSetWait") + data = c_nvmlEventData_t() + ret = fn(eventSet, byref(data), c_uint(timeoutms)) + _nvmlCheckReturn(ret) + return data + +# Added in 2.285 +def nvmlEventSetFree(eventSet): + fn = _nvmlGetFunctionPointer("nvmlEventSetFree") + ret = fn(eventSet) + _nvmlCheckReturn(ret) + return None + +# Added in 3.295 +def nvmlDeviceOnSameBoard(handle1, handle2): + fn = _nvmlGetFunctionPointer("nvmlDeviceOnSameBoard") + onSameBoard = c_int() + ret = fn(handle1, handle2, byref(onSameBoard)) + _nvmlCheckReturn(ret) + return (onSameBoard.value != 0) + +# Added in 3.295 +def nvmlDeviceGetCurrPcieLinkGeneration(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkGeneration") + gen = c_uint() + ret = fn(handle, byref(gen)) + _nvmlCheckReturn(ret) + return gen.value + +# Added in 3.295 +def nvmlDeviceGetMaxPcieLinkGeneration(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkGeneration") + gen = c_uint() + ret = fn(handle, byref(gen)) + _nvmlCheckReturn(ret) + return gen.value + +# Added in 3.295 +def nvmlDeviceGetCurrPcieLinkWidth(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrPcieLinkWidth") + width = c_uint() + ret = fn(handle, byref(width)) + _nvmlCheckReturn(ret) + return width.value + +# Added in 3.295 +def nvmlDeviceGetMaxPcieLinkWidth(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetMaxPcieLinkWidth") + width = c_uint() + ret = fn(handle, byref(width)) + _nvmlCheckReturn(ret) + return width.value + +# Added in 4.304 +def nvmlDeviceGetSupportedClocksThrottleReasons(handle): + c_reasons= c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSupportedClocksThrottleReasons") + ret = fn(handle, byref(c_reasons)) + _nvmlCheckReturn(ret) + return c_reasons.value + +# Added in 4.304 +def nvmlDeviceGetCurrentClocksThrottleReasons(handle): + c_reasons= c_ulonglong() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCurrentClocksThrottleReasons") + ret = fn(handle, byref(c_reasons)) + _nvmlCheckReturn(ret) + return c_reasons.value + +# Added in 5.319 +def nvmlDeviceGetIndex(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceGetIndex") + c_index = c_uint() + ret = fn(handle, byref(c_index)) + _nvmlCheckReturn(ret) + return c_index.value + +# Added in 5.319 +def nvmlDeviceGetAccountingMode(handle): + c_mode = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingMode") + ret = fn(handle, byref(c_mode)) + _nvmlCheckReturn(ret) + return c_mode.value + +def nvmlDeviceSetAccountingMode(handle, mode): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetAccountingMode") + ret = fn(handle, _nvmlEnableState_t(mode)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceClearAccountingPids(handle): + fn = _nvmlGetFunctionPointer("nvmlDeviceClearAccountingPids") + ret = fn(handle) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetAccountingStats(handle, pid): + stats = c_nvmlAccountingStats_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingStats") + ret = fn(handle, c_uint(pid), byref(stats)) + _nvmlCheckReturn(ret) + if (stats.maxMemoryUsage == NVML_VALUE_NOT_AVAILABLE_ulonglong.value): + # special case for WDDM on Windows, see comment above + stats.maxMemoryUsage = None + return stats + +def nvmlDeviceGetAccountingPids(handle): + count = c_uint(nvmlDeviceGetAccountingBufferSize(handle)) + pids = (c_uint * count.value)() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingPids") + ret = fn(handle, byref(count), pids) + _nvmlCheckReturn(ret) + return map(int, pids[0:count.value]) + +def nvmlDeviceGetAccountingBufferSize(handle): + bufferSize = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAccountingBufferSize") + ret = fn(handle, byref(bufferSize)) + _nvmlCheckReturn(ret) + return int(bufferSize.value) + +def nvmlDeviceGetRetiredPages(device, sourceFilter): + c_source = _nvmlPageRetirementCause_t(sourceFilter) + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetRetiredPages") + + # First call will get the size + ret = fn(device, c_source, byref(c_count), None) + + # this should only fail with insufficient size + if ((ret != NVML_SUCCESS) and + (ret != NVML_ERROR_INSUFFICIENT_SIZE)): + raise NVMLError(ret) + + # call again with a buffer + # oversize the array for the rare cases where additional pages + # are retired between NVML calls + c_count.value = c_count.value * 2 + 5 + page_array = c_ulonglong * c_count.value + c_pages = page_array() + ret = fn(device, c_source, byref(c_count), c_pages) + _nvmlCheckReturn(ret) + return map(int, c_pages[0:c_count.value]) + +def nvmlDeviceGetRetiredPagesPendingStatus(device): + c_pending = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetRetiredPagesPendingStatus") + ret = fn(device, byref(c_pending)) + _nvmlCheckReturn(ret) + return int(c_pending.value) + +def nvmlDeviceGetAPIRestriction(device, apiType): + c_permission = _nvmlEnableState_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetAPIRestriction") + ret = fn(device, _nvmlRestrictedAPI_t(apiType), byref(c_permission)) + _nvmlCheckReturn(ret) + return int(c_permission.value) + +def nvmlDeviceSetAPIRestriction(handle, apiType, isRestricted): + fn = _nvmlGetFunctionPointer("nvmlDeviceSetAPIRestriction") + ret = fn(handle, _nvmlRestrictedAPI_t(apiType), _nvmlEnableState_t(isRestricted)) + _nvmlCheckReturn(ret) + return None + +def nvmlDeviceGetBridgeChipInfo(handle): + bridgeHierarchy = c_nvmlBridgeChipHierarchy_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetBridgeChipInfo") + ret = fn(handle, byref(bridgeHierarchy)) + _nvmlCheckReturn(ret) + return bridgeHierarchy + +def nvmlDeviceGetSamples(device, sampling_type, timeStamp): + c_sampling_type = _nvmlSamplingType_t(sampling_type) + c_time_stamp = c_ulonglong(timeStamp) + c_sample_count = c_uint(0) + c_sample_value_type = _nvmlValueType_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetSamples") + + ## First Call gets the size + ret = fn(device, c_sampling_type, c_time_stamp, byref(c_sample_value_type), byref(c_sample_count), None) + + # Stop if this fails + if (ret != NVML_SUCCESS): + raise NVMLError(ret) + + sampleArray = c_sample_count.value * c_nvmlSample_t + c_samples = sampleArray() + ret = fn(device, c_sampling_type, c_time_stamp, byref(c_sample_value_type), byref(c_sample_count), c_samples) + _nvmlCheckReturn(ret) + return (c_sample_value_type.value, c_samples[0:c_sample_count.value]) + +def nvmlDeviceGetViolationStatus(device, perfPolicyType): + c_perfPolicy_type = _nvmlPerfPolicyType_t(perfPolicyType) + c_violTime = c_nvmlViolationTime_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetViolationStatus") + + ## Invoke the method to get violation time + ret = fn(device, c_perfPolicy_type, byref(c_violTime)) + _nvmlCheckReturn(ret) + return c_violTime + +def nvmlDeviceGetPcieThroughput(device, counter): + c_util = c_uint() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetPcieThroughput") + ret = fn(device, _nvmlPcieUtilCounter_t(counter), byref(c_util)) + _nvmlCheckReturn(ret) + return c_util.value + +def nvmlSystemGetTopologyGpuSet(cpuNumber): + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlSystemGetTopologyGpuSet") + + # First call will get the size + ret = fn(cpuNumber, byref(c_count), None) + + if ret != NVML_SUCCESS: + raise NVMLError(ret) + print ("c_count.value") + # call again with a buffer + device_array = c_nvmlDevice_t * c_count.value + c_devices = device_array() + ret = fn(cpuNumber, byref(c_count), c_devices) + _nvmlCheckReturn(ret) + return map(None, c_devices[0:c_count.value]) + +def nvmlDeviceGetTopologyNearestGpus(device, level): + c_count = c_uint(0) + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTopologyNearestGpus") + + # First call will get the size + ret = fn(device, level, byref(c_count), None) + + if ret != NVML_SUCCESS: + raise NVMLError(ret) + + # call again with a buffer + device_array = c_nvmlDevice_t * c_count.value + c_devices = device_array() + ret = fn(device, level, byref(c_count), c_devices) + _nvmlCheckReturn(ret) + return map(None, c_devices[0:c_count.value]) + +def nvmlDeviceGetTopologyCommonAncestor(device1, device2): + c_level = _nvmlGpuTopologyLevel_t() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetTopologyCommonAncestor") + ret = fn(device1, device2, byref(c_level)) + _nvmlCheckReturn(ret) + return c_level.value diff --git a/localization/__init__.py b/localization/__init__.py new file mode 100644 index 0000000..f3bcf09 --- /dev/null +++ b/localization/__init__.py @@ -0,0 +1,2 @@ +from .localization import get_default_ttf_font_name + diff --git a/localization/localization.py b/localization/localization.py new file mode 100644 index 0000000..4ccd9c8 --- /dev/null +++ b/localization/localization.py @@ -0,0 +1,29 @@ +import sys +import locale + +system_locale = locale.getdefaultlocale()[0] +system_language = system_locale[0:2] + +windows_font_name_map = { + 'en' : 'cour', + 'ru' : 'cour', + 'zn' : 'simsun_01' +} + +darwin_font_name_map = { + 'en' : 'cour', + 'ru' : 'cour', + 'zn' : 'Apple LiSung Light' +} + +linux_font_name_map = { + 'en' : 'cour', + 'ru' : 'cour', + 'zn' : 'cour' +} + +def get_default_ttf_font_name(): + platform = sys.platform + if platform == 'win32': return windows_font_name_map.get(system_language, 'cour') + elif platform == 'darwin': return darwin_font_name_map.get(system_language, 'cour') + else: return linux_font_name_map.get(system_language, 'cour') diff --git a/main.py b/main.py new file mode 100644 index 0000000..c2b42d1 --- /dev/null +++ b/main.py @@ -0,0 +1,188 @@ +import os +import sys +import argparse +from utils import Path_utils +from utils import os_utils +from pathlib import Path +import numpy as np + +if sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 2): + raise Exception("This program requires at least Python 3.2") + +class fixPathAction(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, os.path.abspath(os.path.expanduser(values))) + +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +if __name__ == "__main__": + os_utils.set_process_lowest_prio() + + parser = argparse.ArgumentParser() + parser.add_argument('--tf-suppress-std', action="store_true", dest="tf_suppress_std", default=False, help="Suppress tensorflow initialization info. May not works on some python builds such as anaconda python 3.6.4. If you can fix it, you are welcome.") + + subparsers = parser.add_subparsers() + + def process_extract(arguments): + from mainscripts import Extractor + Extractor.main ( + input_dir=arguments.input_dir, + output_dir=arguments.output_dir, + debug=arguments.debug, + face_type=arguments.face_type, + detector=arguments.detector, + multi_gpu=arguments.multi_gpu, + manual_fix=arguments.manual_fix, + manual_window_size=arguments.manual_window_size) + + extract_parser = subparsers.add_parser( "extract", help="Extract the faces from a pictures.") + extract_parser.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") + extract_parser.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.") + extract_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Writes debug images to [output_dir]_debug\ directory.") + extract_parser.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'avatar', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'") + extract_parser.add_argument('--detector', dest="detector", choices=['dlib','mt','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.") + extract_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") + extract_parser.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.") + extract_parser.add_argument('--manual-window-size', type=int, dest="manual_window_size", default=0, help="Manual fix window size. Example: 1368. Default: frame size.") + + extract_parser.set_defaults (func=process_extract) + + def process_sort(arguments): + from mainscripts import Sorter + Sorter.main (input_path=arguments.input_dir, sort_by_method=arguments.sort_by_method) + + sort_parser = subparsers.add_parser( "sort", help="Sort faces in a directory.") + sort_parser.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") + sort_parser.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "hist", "hist-dissim", "hist-blur", "ssim", "brightness", "hue", "origname"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." ) + sort_parser.set_defaults (func=process_sort) + + def process_train(arguments): + + if 'DFL_TARGET_EPOCH' in os.environ.keys(): + arguments.target_epoch = int ( os.environ['DFL_TARGET_EPOCH'] ) + + if 'DFL_BATCH_SIZE' in os.environ.keys(): + arguments.batch_size = int ( os.environ['DFL_TARGET_EPOCH'] ) + + from mainscripts import Trainer + Trainer.main ( + training_data_src_dir=arguments.training_data_src_dir, + training_data_dst_dir=arguments.training_data_dst_dir, + model_path=arguments.model_dir, + model_name=arguments.model_name, + debug = arguments.debug, + #**options + batch_size = arguments.batch_size, + write_preview_history = arguments.write_preview_history, + target_epoch = arguments.target_epoch, + save_interval_min = arguments.save_interval_min, + choose_worst_gpu = arguments.choose_worst_gpu, + force_best_gpu_idx = arguments.force_best_gpu_idx, + multi_gpu = arguments.multi_gpu, + force_gpu_idxs = arguments.force_gpu_idxs, + ) + + train_parser = subparsers.add_parser( "train", help="Trainer") + train_parser.add_argument('--training-data-src-dir', required=True, action=fixPathAction, dest="training_data_src_dir", help="Dir of src-set.") + train_parser.add_argument('--training-data-dst-dir', required=True, action=fixPathAction, dest="training_data_dst_dir", help="Dir of dst-set.") + train_parser.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") + train_parser.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") + train_parser.add_argument('--write-preview-history', action="store_true", dest="write_preview_history", default=False, help="Enable write preview history.") + train_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug training.") + train_parser.add_argument('--batch-size', type=int, dest="batch_size", default=0, help="Model batch size. Default - auto. Environment variable: ODFS_BATCH_SIZE.") + train_parser.add_argument('--target-epoch', type=int, dest="target_epoch", default=0, help="Train until target epoch. Default - unlimited. Environment variable: ODFS_TARGET_EPOCH.") + train_parser.add_argument('--save-interval-min', type=int, dest="save_interval_min", default=10, help="Save interval in minutes. Default 10.") + train_parser.add_argument('--choose-worst-gpu', action="store_true", dest="choose_worst_gpu", default=False, help="Choose worst GPU instead of best.") + train_parser.add_argument('--force-best-gpu-idx', type=int, dest="force_best_gpu_idx", default=-1, help="Force to choose this GPU idx as best(worst).") + train_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="MultiGPU option. It will select only same best(worst) GPU models.") + train_parser.add_argument('--force-gpu-idxs', type=str, dest="force_gpu_idxs", default=None, help="Override final GPU idxs. Example: 0,1,2.") + train_parser.set_defaults (func=process_train) + + def process_convert(arguments): + if arguments.ask_for_params: + try: + mode = int ( input ("Choose mode: (1) hist match, (2) hist match bw, (3) seamless (default), (4) seamless hist match : ") ) + except: + mode = 3 + + if mode == 1: + arguments.mode = 'hist-match' + elif mode == 2: + arguments.mode = 'hist-match-bw' + elif mode == 3: + arguments.mode = 'seamless' + elif mode == 4: + arguments.mode = 'seamless-hist-match' + + if arguments.mode == 'hist-match' or arguments.mode == 'hist-match-bw': + try: + choice = int ( input ("Masked hist match? [0..1] (default - model choice) : ") ) + arguments.masked_hist_match = (choice != 0) + except: + arguments.masked_hist_match = None + + try: + arguments.erode_mask_modifier = int ( input ("Choose erode mask modifier [-100..100] (default 0) : ") ) + except: + arguments.erode_mask_modifier = 0 + + try: + arguments.blur_mask_modifier = int ( input ("Choose blur mask modifier [-100..200] (default 0) : ") ) + except: + arguments.blur_mask_modifier = 0 + + arguments.erode_mask_modifier = np.clip ( int(arguments.erode_mask_modifier), -100, 100) + arguments.blur_mask_modifier = np.clip ( int(arguments.blur_mask_modifier), -100, 200) + + from mainscripts import Converter + Converter.main ( + input_dir=arguments.input_dir, + output_dir=arguments.output_dir, + aligned_dir=arguments.aligned_dir, + model_dir=arguments.model_dir, + model_name=arguments.model_name, + debug = arguments.debug, + mode = arguments.mode, + masked_hist_match = arguments.masked_hist_match, + erode_mask_modifier = arguments.erode_mask_modifier, + blur_mask_modifier = arguments.blur_mask_modifier, + force_best_gpu_idx = arguments.force_best_gpu_idx + ) + + convert_parser = subparsers.add_parser( "convert", help="Converter") + convert_parser.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") + convert_parser.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the converted files will be stored.") + convert_parser.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", help="Aligned directory. This is where the aligned files stored. Not used in AVATAR model.") + convert_parser.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") + convert_parser.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") + convert_parser.add_argument('--ask-for-params', action="store_true", dest="ask_for_params", default=False, help="Ask for params.") + convert_parser.add_argument('--mode', dest="mode", choices=['seamless','hist-match', 'hist-match-bw','seamless-hist-match'], default='seamless', help="Face overlaying mode. Seriously affects result.") + convert_parser.add_argument('--masked-hist-match', type=str2bool, nargs='?', const=True, default=None, help="True or False. Excludes background for hist match. Default - model decide.") + convert_parser.add_argument('--erode-mask-modifier', type=int, dest="erode_mask_modifier", default=0, help="Automatic erode mask modifier. Valid range [-100..100].") + convert_parser.add_argument('--blur-mask-modifier', type=int, dest="blur_mask_modifier", default=0, help="Automatic blur mask modifier. Valid range [-100..200].") + convert_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug converter.") + convert_parser.add_argument('--force-best-gpu-idx', type=int, dest="force_best_gpu_idx", default=-1, help="Force to choose this GPU idx as best.") + + convert_parser.set_defaults(func=process_convert) + + def bad_args(arguments): + parser.print_help() + exit(0) + parser.set_defaults(func=bad_args) + + arguments = parser.parse_args() + if arguments.tf_suppress_std: + os.environ['TF_SUPPRESS_STD'] = '1' + arguments.func(arguments) + + +''' +import code +code.interact(local=dict(globals(), **locals())) +''' \ No newline at end of file diff --git a/mainscripts/Converter.py b/mainscripts/Converter.py new file mode 100644 index 0000000..1fc27a4 --- /dev/null +++ b/mainscripts/Converter.py @@ -0,0 +1,283 @@ +import traceback +from pathlib import Path +from utils import Path_utils +import cv2 +from tqdm import tqdm +from utils.AlignedPNG import AlignedPNG +from utils import image_utils +import shutil +import numpy as np +import time +import multiprocessing +from models import ConverterBase + +class model_process_predictor(object): + def __init__(self, sq, cq, lock): + self.sq = sq + self.cq = cq + self.lock = lock + + def __call__(self, face): + self.lock.acquire() + + self.sq.put ( {'op': 'predict', 'face' : face} ) + while True: + if not self.cq.empty(): + obj = self.cq.get() + obj_op = obj['op'] + if obj_op == 'predict_result': + self.lock.release() + return obj['result'] + time.sleep(0.005) + +def model_process(model_name, model_dir, in_options, sq, cq): + try: + model_path = Path(model_dir) + + import models + model = models.import_model(model_name)(model_path, **in_options) + converter = model.get_converter(**in_options) + converter.dummy_predict() + + cq.put ( {'op':'init', 'converter' : converter.copy_and_set_predictor( None ) } ) + + closing = False + while not closing: + while not sq.empty(): + obj = sq.get() + obj_op = obj['op'] + if obj_op == 'predict': + result = converter.predictor ( obj['face'] ) + cq.put ( {'op':'predict_result', 'result':result} ) + elif obj_op == 'close': + closing = True + break + time.sleep(0.005) + + model.finalize() + + except Exception as e: + print ( 'Error: %s' % (str(e))) + traceback.print_exc() + +from utils.SubprocessorBase import SubprocessorBase +class ConvertSubprocessor(SubprocessorBase): + + #override + def __init__(self, converter, input_path_image_paths, output_path, alignments, debug): + super().__init__('Converter') + self.converter = converter + self.input_path_image_paths = input_path_image_paths + self.output_path = output_path + self.alignments = alignments + self.debug = debug + + self.input_data = self.input_path_image_paths + self.files_processed = 0 + self.faces_processed = 0 + + #override + def process_info_generator(self): + r = [0] if self.debug else range(multiprocessing.cpu_count()) + for i in r: + yield 'CPU%d' % (i), {}, {'device_idx': i, + 'device_name': 'CPU%d' % (i), + 'converter' : self.converter, + 'output_dir' : str(self.output_path), + 'alignments' : self.alignments, + 'debug': self.debug } + + #override + def get_no_process_started_message(self): + return 'Unable to start CPU processes.' + + #override + def onHostGetProgressBarDesc(self): + return "Converting" + + #override + def onHostGetProgressBarLen(self): + return len (self.input_data) + + #override + def onHostGetData(self): + if len (self.input_data) > 0: + return self.input_data.pop(0) + return None + + #override + def onHostDataReturn (self, data): + self.input_data.insert(0, data) + + #override + def onClientInitialize(self, client_dict): + print ('Running on %s.' % (client_dict['device_name']) ) + self.device_idx = client_dict['device_idx'] + self.device_name = client_dict['device_name'] + self.converter = client_dict['converter'] + self.output_path = Path(client_dict['output_dir']) if 'output_dir' in client_dict.keys() else None + self.alignments = client_dict['alignments'] + self.debug = client_dict['debug'] + return None + + #override + def onClientFinalize(self): + pass + + #override + def onClientProcessData(self, data): + filename_path = Path(data) + + files_processed = 1 + faces_processed = 0 + + output_filename_path = self.output_path / filename_path.name + if self.converter.get_mode() == ConverterBase.MODE_FACE and filename_path.stem not in self.alignments.keys(): + if not self.debug: + print ( 'no faces found for %s, copying without faces' % (filename_path.name) ) + shutil.copy ( str(filename_path), str(output_filename_path) ) + else: + image = (cv2.imread(str(filename_path)) / 255.0).astype(np.float32) + + if self.converter.get_mode() == ConverterBase.MODE_IMAGE: + image_landmarks = None + a_png = AlignedPNG.load( str(filename_path) ) + if a_png is not None: + d = a_png.getFaceswapDictData() + if d is not None and 'landmarks' in d.keys(): + image_landmarks = np.array(d['landmarks']) + + image = self.converter.convert_image(image, image_landmarks, self.debug) + if self.debug: + for img in image: + cv2.imshow ('Debug convert', img ) + cv2.waitKey(0) + faces_processed = 1 + elif self.converter.get_mode() == ConverterBase.MODE_FACE: + faces = self.alignments[filename_path.stem] + for image_landmarks in faces: + image = self.converter.convert_face(image, image_landmarks, self.debug) + if self.debug: + for img in image: + cv2.imshow ('Debug convert', img ) + cv2.waitKey(0) + faces_processed = len(faces) + + if not self.debug: + cv2.imwrite (str(output_filename_path), (image*255).astype(np.uint8) ) + + + return (files_processed, faces_processed) + + #override + def onHostResult (self, data, result): + self.files_processed += result[0] + self.faces_processed += result[1] + return 1 + + #override + def get_start_return(self): + return self.files_processed, self.faces_processed + +def main (input_dir, output_dir, aligned_dir, model_dir, model_name, **in_options): + print ("Running converter.\r\n") + + debug = in_options['debug'] + + try: + input_path = Path(input_dir) + output_path = Path(output_dir) + aligned_path = Path(aligned_dir) + model_path = Path(model_dir) + + if not input_path.exists(): + print('Input directory not found. Please ensure it exists.') + return + + if output_path.exists(): + for filename in Path_utils.get_image_paths(output_path): + Path(filename).unlink() + else: + output_path.mkdir(parents=True, exist_ok=True) + + if not aligned_path.exists(): + print('Aligned directory not found. Please ensure it exists.') + return + + if not model_path.exists(): + print('Model directory not found. Please ensure it exists.') + return + + model_sq = multiprocessing.Queue() + model_cq = multiprocessing.Queue() + model_lock = multiprocessing.Lock() + + model_p = multiprocessing.Process(target=model_process, args=(model_name, model_dir, in_options, model_sq, model_cq)) + model_p.start() + + while True: + if not model_cq.empty(): + obj = model_cq.get() + obj_op = obj['op'] + if obj_op == 'init': + converter = obj['converter'] + break + + alignments = {} + if converter.get_mode() == ConverterBase.MODE_FACE: + aligned_path_image_paths = Path_utils.get_image_paths(aligned_path) + for filename in tqdm(aligned_path_image_paths, desc= "Collecting alignments" ): + a_png = AlignedPNG.load( str(filename) ) + if a_png is None: + print ( "%s - no embedded data found." % (filename) ) + continue + d = a_png.getFaceswapDictData() + if d is None or d['source_filename'] is None or d['source_rect'] is None or d['source_landmarks'] is None: + print ( "%s - no embedded data found." % (filename) ) + continue + + source_filename_stem = Path(d['source_filename']).stem + if source_filename_stem not in alignments.keys(): + alignments[ source_filename_stem ] = [] + + alignments[ source_filename_stem ].append ( np.array(d['source_landmarks']) ) + + files_processed, faces_processed = ConvertSubprocessor ( + converter = converter.copy_and_set_predictor( model_process_predictor(model_sq,model_cq,model_lock) ), + input_path_image_paths = Path_utils.get_image_paths(input_path), + output_path = output_path, + alignments = alignments, + debug = debug ).process() + + model_sq.put ( {'op':'close'} ) + model_p.join() + + ''' + if model_name == 'AVATAR': + output_path_image_paths = Path_utils.get_image_paths(output_path) + + last_ok_frame = -1 + for filename in output_path_image_paths: + filename_path = Path(filename) + stem = Path(filename).stem + try: + frame = int(stem) + except: + raise Exception ('Aligned avatars must be created from indexed sequence files.') + + if frame-last_ok_frame > 1: + start = last_ok_frame + 1 + end = frame - 1 + + print ("Filling gaps: [%d...%d]" % (start, end) ) + for i in range (start, end+1): + shutil.copy ( str(filename), str( output_path / ('%.5d%s' % (i, filename_path.suffix )) ) ) + + last_ok_frame = frame + ''' + + except Exception as e: + print ( 'Error: %s' % (str(e))) + traceback.print_exc() + + diff --git a/mainscripts/Extractor.py b/mainscripts/Extractor.py new file mode 100644 index 0000000..86bb133 --- /dev/null +++ b/mainscripts/Extractor.py @@ -0,0 +1,378 @@ +import traceback +import os +import sys +import time +import multiprocessing +from tqdm import tqdm +from pathlib import Path +import numpy as np +import cv2 +from utils import Path_utils +from utils.AlignedPNG import AlignedPNG +from utils import image_utils +from facelib import FaceType +import facelib +import gpufmkmgr + +from utils.SubprocessorBase import SubprocessorBase +class ExtractSubprocessor(SubprocessorBase): + + #override + def __init__(self, input_data, type, image_size, face_type, debug, multi_gpu=False, manual=False, manual_window_size=0, detector=None, output_path=None ): + self.input_data = input_data + self.type = type + self.image_size = image_size + self.face_type = face_type + self.debug = debug + self.multi_gpu = multi_gpu + self.detector = detector + self.output_path = output_path + self.manual = manual + self.manual_window_size = manual_window_size + self.result = [] + + no_response_time_sec = 60 if not self.manual else 999999 + super().__init__('Extractor', no_response_time_sec) + + #override + def onHostClientsInitialized(self): + if self.manual == True: + self.wnd_name = 'Manual pass' + cv2.namedWindow(self.wnd_name) + + self.landmarks = None + self.param_x = -1 + self.param_y = -1 + self.param_rect_size = -1 + self.param = {'x': 0, 'y': 0, 'rect_size' : 5} + + def onMouse(event, x, y, flags, param): + if event == cv2.EVENT_MOUSEWHEEL: + mod = 1 if flags > 0 else -1 + param['rect_size'] = max (5, param['rect_size'] + 10*mod) + else: + param['x'] = x + param['y'] = y + + cv2.setMouseCallback(self.wnd_name, onMouse, self.param) + + def get_devices_for_type (self, type, multi_gpu): + if (type == 'rects' or type == 'landmarks'): + if not multi_gpu: + devices = [gpufmkmgr.getBestDeviceIdx()] + else: + devices = gpufmkmgr.getDevicesWithAtLeastTotalMemoryGB(2) + devices = [ (idx, gpufmkmgr.getDeviceName(idx), gpufmkmgr.getDeviceVRAMTotalGb(idx) ) for idx in devices] + + elif type == 'final': + devices = [ (i, 'CPU%d' % (i), 0 ) for i in range(0, multiprocessing.cpu_count()) ] + + return devices + + #override + def process_info_generator(self): + for (device_idx, device_name, device_total_vram_gb) in self.get_devices_for_type(self.type, self.multi_gpu): + num_processes = 1 + if not self.manual and self.type == 'rects' and self.detector == 'mt': + num_processes = int ( max (1, device_total_vram_gb / 2) ) + + for i in range(0, num_processes ): + device_name_for_process = device_name if num_processes == 1 else '%s #%d' % (device_name,i) + yield device_name_for_process, {}, {'type' : self.type, + 'device_idx' : device_idx, + 'device_name' : device_name_for_process, + 'image_size': self.image_size, + 'face_type': self.face_type, + 'debug': self.debug, + 'output_dir': str(self.output_path), + 'detector': self.detector} + + #override + def get_no_process_started_message(self): + if (self.type == 'rects' or self.type == 'landmarks'): + print ( 'You have no capable GPUs. Try to close programs which can consume VRAM, and run again.') + elif self.type == 'final': + print ( 'Unable to start CPU processes.') + + #override + def onHostGetProgressBarDesc(self): + return None + + #override + def onHostGetProgressBarLen(self): + return len (self.input_data) + + #override + def onHostGetData(self): + if not self.manual: + if len (self.input_data) > 0: + return self.input_data.pop(0) + else: + while len (self.input_data) > 0: + data = self.input_data[0] + filename, faces = data + is_frame_done = False + if len(faces) == 0: + self.original_image = cv2.imread(filename) + + (h,w,c) = self.original_image.shape + self.view_scale = 1.0 if self.manual_window_size == 0 else self.manual_window_size / (w if w > h else h) + self.original_image = cv2.resize (self.original_image, ( int(w*self.view_scale), int(h*self.view_scale) ), interpolation=cv2.INTER_LINEAR) + + self.text_lines_img = (image_utils.get_draw_text_lines ( self.original_image, (0,0, self.original_image.shape[1], min(100, self.original_image.shape[0]) ), + [ 'Match landmarks with face exactly.', + '[Enter] - confirm frame', + '[Space] - skip frame', + '[Mouse wheel] - change rect' + ], (1, 1, 1) )*255).astype(np.uint8) + + while True: + key = cv2.waitKey(1) & 0xFF + + if key == ord('\r') or key == ord('\n'): + faces.append ( [(self.rect), self.landmarks] ) + is_frame_done = True + break + elif key == ord(' '): + is_frame_done = True + break + + if self.param_x != self.param['x'] / self.view_scale or \ + self.param_y != self.param['y'] / self.view_scale or \ + self.param_rect_size != self.param['rect_size']: + + self.param_x = self.param['x'] / self.view_scale + self.param_y = self.param['y'] / self.view_scale + self.param_rect_size = self.param['rect_size'] + + self.rect = (self.param_x-self.param_rect_size, self.param_y-self.param_rect_size, self.param_x+self.param_rect_size, self.param_y+self.param_rect_size) + return [filename, [self.rect]] + + else: + is_frame_done = True + + if is_frame_done: + self.result.append ( data ) + self.input_data.pop(0) + self.inc_progress_bar(1) + + return None + + #override + def onHostDataReturn (self, data): + if not self.manual: + self.input_data.insert(0, data) + + #override + def onClientInitialize(self, client_dict): + self.safe_print ('Running on %s.' % (client_dict['device_name']) ) + self.type = client_dict['type'] + self.image_size = client_dict['image_size'] + self.face_type = client_dict['face_type'] + self.device_idx = client_dict['device_idx'] + self.output_path = Path(client_dict['output_dir']) if 'output_dir' in client_dict.keys() else None + self.debug = client_dict['debug'] + self.detector = client_dict['detector'] + + self.keras = None + self.tf = None + self.tf_session = None + + self.e = None + if self.type == 'rects': + if self.detector is not None: + if self.detector == 'mt': + self.tf = gpufmkmgr.import_tf ([self.device_idx], allow_growth=True) + self.tf_session = gpufmkmgr.get_tf_session() + self.keras = gpufmkmgr.import_keras() + self.e = facelib.MTCExtractor(self.keras, self.tf, self.tf_session) + elif self.detector == 'dlib': + self.dlib = gpufmkmgr.import_dlib( self.device_idx ) + self.e = facelib.DLIBExtractor(self.dlib) + self.e.__enter__() + + elif self.type == 'landmarks': + self.tf = gpufmkmgr.import_tf([self.device_idx], allow_growth=True) + self.tf_session = gpufmkmgr.get_tf_session() + self.keras = gpufmkmgr.import_keras() + self.e = facelib.LandmarksExtractor(self.keras) + self.e.__enter__() + + elif self.type == 'final': + pass + + return None + + #override + def onClientFinalize(self): + if self.e is not None: + self.e.__exit__() + + #override + def onClientProcessData(self, data): + filename_path = Path( data[0] ) + + image = cv2.imread( str(filename_path) ) + if image is None: + print ( 'Failed to extract %s, reason: cv2.imread() fail.' % ( str(filename_path) ) ) + else: + if self.type == 'rects': + rects = self.e.extract_from_bgr (image) + return [str(filename_path), rects] + + elif self.type == 'landmarks': + rects = data[1] + landmarks = self.e.extract_from_bgr (image, rects) + return [str(filename_path), landmarks] + + elif self.type == 'final': + result = [] + faces = data[1] + + if self.debug: + debug_output_file = '{}_{}'.format( str(Path(str(self.output_path) + '_debug') / filename_path.stem), 'debug.png') + debug_image = image.copy() + + for (face_idx, face) in enumerate(faces): + output_file = '{}_{}{}'.format(str(self.output_path / filename_path.stem), str(face_idx), '.png') + + rect = face[0] + image_landmarks = np.array(face[1]) + + if self.debug: + facelib.LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type) + + if self.face_type == FaceType.MARK_ONLY: + face_image = image + face_image_landmarks = image_landmarks + else: + image_to_face_mat = facelib.LandmarksProcessor.get_transform_mat (image_landmarks, self.image_size, self.face_type) + face_image = cv2.warpAffine(image, image_to_face_mat, (self.image_size, self.image_size), cv2.INTER_LANCZOS4) + face_image_landmarks = facelib.LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat) + + cv2.imwrite(output_file, face_image) + + a_png = AlignedPNG.load (output_file) + + d = { + 'face_type': FaceType.toString(self.face_type), + 'landmarks': face_image_landmarks.tolist(), + 'yaw_value': facelib.LandmarksProcessor.calc_face_yaw (face_image_landmarks), + 'pitch_value': facelib.LandmarksProcessor.calc_face_pitch (face_image_landmarks), + 'source_filename': filename_path.name, + 'source_rect': rect, + 'source_landmarks': image_landmarks.tolist() + } + a_png.setFaceswapDictData (d) + a_png.save(output_file) + + result.append (output_file) + + if self.debug: + cv2.imwrite(debug_output_file, debug_image ) + + return result + return None + + #overridable + def onClientGetDataName (self, data): + #return string identificator of your data + return data[0] + + #override + def onHostResult (self, data, result): + if self.manual == True: + self.landmarks = result[1][0][1] + + image = cv2.addWeighted (self.original_image,1.0,self.text_lines_img,1.0,0) + view_rect = (np.array(self.rect) * self.view_scale).astype(np.int).tolist() + view_landmarks = (np.array(self.landmarks) * self.view_scale).astype(np.int).tolist() + facelib.LandmarksProcessor.draw_rect_landmarks (image, view_rect, view_landmarks, self.image_size, self.face_type) + + cv2.imshow (self.wnd_name, image) + return 0 + else: + if self.type == 'rects': + self.result.append ( result ) + elif self.type == 'landmarks': + self.result.append ( result ) + elif self.type == 'final': + self.result += result + + return 1 + + #override + def onHostProcessEnd(self): + if self.manual == True: + cv2.destroyAllWindows() + + #override + def get_start_return(self): + return self.result + +''' +detector + 'dlib' + 'mt' + 'manual' + +face_type + 'full_face' + 'avatar' +''' +def main (input_dir, output_dir, debug, detector='mt', multi_gpu=True, manual_fix=False, manual_window_size=0, image_size=256, face_type='full_face'): + print ("Running extractor.\r\n") + + input_path = Path(input_dir) + output_path = Path(output_dir) + face_type = FaceType.fromString(face_type) + + if not input_path.exists(): + print('Input directory not found. Please ensure it exists.') + return + + if output_path.exists(): + for filename in Path_utils.get_image_paths(output_path): + Path(filename).unlink() + else: + output_path.mkdir(parents=True, exist_ok=True) + + if debug: + debug_output_path = Path(str(output_path) + '_debug') + if debug_output_path.exists(): + for filename in Path_utils.get_image_paths(debug_output_path): + Path(filename).unlink() + else: + debug_output_path.mkdir(parents=True, exist_ok=True) + + input_path_image_paths = Path_utils.get_image_unique_filestem_paths(input_path, verbose=True) + images_found = len(input_path_image_paths) + faces_detected = 0 + if images_found != 0: + if detector == 'manual': + print ('Performing manual extract...') + extracted_faces = ExtractSubprocessor ([ (filename,[]) for filename in input_path_image_paths ], 'landmarks', image_size, face_type, debug, manual=True, manual_window_size=manual_window_size).process() + else: + print ('Performing 1st pass...') + extracted_rects = ExtractSubprocessor ([ (x,) for x in input_path_image_paths ], 'rects', image_size, face_type, debug, multi_gpu=multi_gpu, manual=False, detector=detector).process() + + print ('Performing 2nd pass...') + extracted_faces = ExtractSubprocessor (extracted_rects, 'landmarks', image_size, face_type, debug, multi_gpu=multi_gpu, manual=False).process() + + if manual_fix: + print ('Performing manual fix...') + + if all ( np.array ( [ len(data[1]) > 0 for data in extracted_faces] ) == True ): + print ('All faces are detected, manual fix not needed.') + else: + extracted_faces = ExtractSubprocessor (extracted_faces, 'landmarks', image_size, face_type, debug, manual=True, manual_window_size=manual_window_size).process() + + if len(extracted_faces) > 0: + print ('Performing 3rd pass...') + final_imgs_paths = ExtractSubprocessor (extracted_faces, 'final', image_size, face_type, debug, multi_gpu=multi_gpu, manual=False, output_path=output_path).process() + faces_detected = len(final_imgs_paths) + + print('-------------------------') + print('Images found: %d' % (images_found) ) + print('Faces detected: %d' % (faces_detected) ) + print('-------------------------') \ No newline at end of file diff --git a/mainscripts/Sorter.py b/mainscripts/Sorter.py new file mode 100644 index 0000000..2b1e63d --- /dev/null +++ b/mainscripts/Sorter.py @@ -0,0 +1,351 @@ +import os +import sys +import operator +import numpy as np +import cv2 +from tqdm import tqdm +from shutil import copyfile + +from pathlib import Path +from utils import Path_utils +from utils.AlignedPNG import AlignedPNG +from facelib import LandmarksProcessor + +def estimate_blur(image): + if image.ndim == 3: + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + blur_map = cv2.Laplacian(image, cv2.CV_64F) + score = np.var(blur_map) + return score + +def sort_by_brightness(input_path): + print ("Sorting by brightness...") + img_list = [ [x, np.mean ( cv2.cvtColor(cv2.imread(x), cv2.COLOR_BGR2HSV)[...,2].flatten() )] for x in tqdm( Path_utils.get_image_paths(input_path), desc="Loading") ] + print ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + return img_list + +def sort_by_hue(input_path): + print ("Sorting by hue...") + img_list = [ [x, np.mean ( cv2.cvtColor(cv2.imread(x), cv2.COLOR_BGR2HSV)[...,0].flatten() )] for x in tqdm( Path_utils.get_image_paths(input_path), desc="Loading") ] + print ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + return img_list + +def sort_by_blur(input_path): + img_list = [] + print ("Sorting by blur...") + for filepath in tqdm( Path_utils.get_image_paths(input_path), desc="Loading"): + #never mask it by face hull, it worse than whole image blur estimate + img_list.append ( [filepath, estimate_blur (cv2.imread( filepath ))] ) + + print ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + + return img_list + +def sort_by_face(input_path): + + print ("Sorting by face similarity...") + + img_list = [] + for filepath in tqdm( Path_utils.get_image_paths(input_path), desc="Loading"): + filepath = Path(filepath) + + if filepath.suffix != '.png': + print ("%s is not a png file required for sort_by_face" % (filepath.name) ) + continue + + a_png = AlignedPNG.load (str(filepath)) + if a_png is None: + print ("%s failed to load" % (filepath.name) ) + continue + + d = a_png.getFaceswapDictData() + + if d is None or d['landmarks'] is None: + print ("%s - no embedded data found required for sort_by_face" % (filepath.name) ) + continue + + img_list.append( [str(filepath), np.array(d['landmarks']) ] ) + + + img_list_len = len(img_list) + for i in tqdm ( range(0, img_list_len-1), desc="Sorting"): + min_score = float("inf") + j_min_score = i+1 + for j in range(i+1,len(img_list)): + + fl1 = img_list[i][1] + fl2 = img_list[j][1] + score = np.sum ( np.absolute ( (fl2 - fl1).flatten() ) ) + + if score < min_score: + min_score = score + j_min_score = j + img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1] + + return img_list + +def sort_by_face_dissim(input_path): + + print ("Sorting by face dissimilarity...") + + img_list = [] + for filepath in tqdm( Path_utils.get_image_paths(input_path), desc="Loading"): + filepath = Path(filepath) + + if filepath.suffix != '.png': + print ("%s is not a png file required for sort_by_face_dissim" % (filepath.name) ) + continue + + a_png = AlignedPNG.load (str(filepath)) + if a_png is None: + print ("%s failed to load" % (filepath.name) ) + continue + + d = a_png.getFaceswapDictData() + + if d is None or d['landmarks'] is None: + print ("%s - no embedded data found required for sort_by_face_dissim" % (filepath.name) ) + continue + + img_list.append( [str(filepath), np.array(d['landmarks']), 0 ] ) + + img_list_len = len(img_list) + for i in tqdm( range(0, img_list_len-1), desc="Sorting"): + score_total = 0 + for j in range(i+1,len(img_list)): + if i == j: + continue + fl1 = img_list[i][1] + fl2 = img_list[j][1] + score_total += np.sum ( np.absolute ( (fl2 - fl1).flatten() ) ) + + img_list[i][2] = score_total + + print ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True) + + return img_list + +def sort_by_face_yaw(input_path): + print ("Sorting by face yaw...") + img_list = [] + for filepath in tqdm( Path_utils.get_image_paths(input_path), desc="Loading"): + filepath = Path(filepath) + + if filepath.suffix != '.png': + print ("%s is not a png file required for sort_by_face_dissim" % (filepath.name) ) + continue + + a_png = AlignedPNG.load (str(filepath)) + if a_png is None: + print ("%s failed to load" % (filepath.name) ) + continue + + d = a_png.getFaceswapDictData() + + if d is None or d['yaw_value'] is None: + print ("%s - no embedded data found required for sort_by_face_dissim" % (filepath.name) ) + continue + + img_list.append( [str(filepath), np.array(d['yaw_value']) ] ) + + print ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + + return img_list + +def sort_by_hist_blur(input_path): + + print ("Sorting by histogram similarity and blur...") + + img_list = [] + for x in tqdm( Path_utils.get_image_paths(input_path), desc="Loading"): + img = cv2.imread(x) + img_list.append ([x, cv2.calcHist([img], [0], None, [256], [0, 256]), + cv2.calcHist([img], [1], None, [256], [0, 256]), + cv2.calcHist([img], [2], None, [256], [0, 256]), + estimate_blur(img) + ]) + + img_list_len = len(img_list) + for i in tqdm( range(0, img_list_len-1), desc="Sorting"): + min_score = float("inf") + j_min_score = i+1 + for j in range(i+1,len(img_list)): + score = cv2.compareHist(img_list[i][1], img_list[j][1], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][2], img_list[j][2], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][3], img_list[j][3], cv2.HISTCMP_BHATTACHARYYA) + if score < min_score: + min_score = score + j_min_score = j + img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1] + + l = [] + for i in range(0, img_list_len-1): + score = cv2.compareHist(img_list[i][1], img_list[i+1][1], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][2], img_list[i+1][2], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][3], img_list[i+1][3], cv2.HISTCMP_BHATTACHARYYA) + l += [score] + l = np.array(l) + v = np.mean(l) + if v*2 < np.max(l): + v *= 2 + + new_img_list = [] + + start_group_i = 0 + odd_counter = 0 + for i in tqdm( range(0, img_list_len), desc="Sorting"): + end_group_i = -1 + if i < img_list_len-1: + score = cv2.compareHist(img_list[i][1], img_list[i+1][1], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][2], img_list[i+1][2], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][3], img_list[i+1][3], cv2.HISTCMP_BHATTACHARYYA) + + if score >= v: + end_group_i = i + + elif i == img_list_len-1: + end_group_i = i + + if end_group_i >= start_group_i: + odd_counter += 1 + + s = sorted(img_list[start_group_i:end_group_i+1] , key=operator.itemgetter(4), reverse=True) + if odd_counter % 2 == 0: + new_img_list = new_img_list + s + else: + new_img_list = s + new_img_list + + start_group_i = i + 1 + + return new_img_list + +def sort_by_hist(input_path): + + print ("Sorting by histogram similarity...") + + img_list = [] + for x in tqdm( Path_utils.get_image_paths(input_path), desc="Loading"): + img = cv2.imread(x) + img_list.append ([x, cv2.calcHist([img], [0], None, [256], [0, 256]), + cv2.calcHist([img], [1], None, [256], [0, 256]), + cv2.calcHist([img], [2], None, [256], [0, 256]) + ]) + + img_list_len = len(img_list) + for i in tqdm( range(0, img_list_len-1), desc="Sorting"): + min_score = float("inf") + j_min_score = i+1 + for j in range(i+1,len(img_list)): + score = cv2.compareHist(img_list[i][1], img_list[j][1], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][2], img_list[j][2], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][3], img_list[j][3], cv2.HISTCMP_BHATTACHARYYA) + if score < min_score: + min_score = score + j_min_score = j + img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1] + + return img_list + +def sort_by_hist_dissim(input_path): + + print ("Sorting by histogram dissimilarity...") + + img_list = [] + for x in tqdm( Path_utils.get_image_paths(input_path), desc="Loading"): + img = cv2.imread(x) + img_list.append ([x, cv2.calcHist([img], [0], None, [256], [0, 256]), + cv2.calcHist([img], [1], None, [256], [0, 256]), + cv2.calcHist([img], [2], None, [256], [0, 256]), 0 + ]) + + img_list_len = len(img_list) + for i in tqdm ( range(0, img_list_len), desc="Sorting"): + score_total = 0 + for j in range( 0, img_list_len): + if i == j: + continue + score_total += cv2.compareHist(img_list[i][1], img_list[j][1], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][2], img_list[j][2], cv2.HISTCMP_BHATTACHARYYA) + \ + cv2.compareHist(img_list[i][3], img_list[j][3], cv2.HISTCMP_BHATTACHARYYA) + + img_list[i][4] = score_total + + + print ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(4), reverse=True) + + return img_list + +def final_rename(input_path, img_list): + for i in tqdm( range(0,len(img_list)), desc="Renaming" , leave=False): + src = Path (img_list[i][0]) + dst = input_path / ('%.5d_%s' % (i, src.name )) + try: + src.rename (dst) + except: + print ('fail to rename %s' % (src.name) ) + + for i in tqdm( range(0,len(img_list)) , desc="Renaming" ): + src = Path (img_list[i][0]) + + src = input_path / ('%.5d_%s' % (i, src.name)) + dst = input_path / ('%.5d%s' % (i, src.suffix)) + try: + src.rename (dst) + except: + print ('fail to rename %s' % (src.name) ) + +def sort_by_origname(input_path): + print ("Sort by original filename...") + + img_list = [] + for filepath in tqdm( Path_utils.get_image_paths(input_path), desc="Loading"): + filepath = Path(filepath) + + if filepath.suffix != '.png': + print ("%s is not a png file required for sort_by_origname" % (filepath.name) ) + continue + + a_png = AlignedPNG.load (str(filepath)) + if a_png is None: + print ("%s failed to load" % (filepath.name) ) + continue + + d = a_png.getFaceswapDictData() + + if d is None or d['source_filename'] is None: + print ("%s - no embedded data found required for sort_by_origname" % (filepath.name) ) + continue + + img_list.append( [str(filepath), d['source_filename']] ) + + print ("Sorting...") + img_list = sorted(img_list, key=operator.itemgetter(1)) + return img_list + +def main (input_path, sort_by_method): + input_path = Path(input_path) + sort_by_method = sort_by_method.lower() + + print ("Running sort tool.\r\n") + + img_list = [] + + if sort_by_method == 'blur': img_list = sort_by_blur (input_path) + elif sort_by_method == 'face': img_list = sort_by_face (input_path) + elif sort_by_method == 'face-dissim': img_list = sort_by_face_dissim (input_path) + elif sort_by_method == 'face-yaw': img_list = sort_by_face_yaw (input_path) + elif sort_by_method == 'hist': img_list = sort_by_hist (input_path) + elif sort_by_method == 'hist-dissim': img_list = sort_by_hist_dissim (input_path) + elif sort_by_method == 'hist-blur': img_list = sort_by_hist_blur (input_path) + elif sort_by_method == 'brightness': img_list = sort_by_brightness (input_path) + elif sort_by_method == 'hue': img_list = sort_by_hue (input_path) + elif sort_by_method == 'origname': img_list = sort_by_origname (input_path) + + final_rename (input_path, img_list) \ No newline at end of file diff --git a/mainscripts/Trainer.py b/mainscripts/Trainer.py new file mode 100644 index 0000000..96b4542 --- /dev/null +++ b/mainscripts/Trainer.py @@ -0,0 +1,289 @@ +import sys +import traceback +import queue +import colorsys +import time +import numpy as np +import itertools + +from pathlib import Path +from utils import Path_utils +from utils import image_utils +import cv2 + +def trainerThread (input_queue, output_queue, training_data_src_dir, training_data_dst_dir, model_path, model_name, save_interval_min=10, debug=False, target_epoch=0, **in_options): + + while True: + try: + training_data_src_path = Path(training_data_src_dir) + training_data_dst_path = Path(training_data_dst_dir) + model_path = Path(model_path) + + if not training_data_src_path.exists(): + print( 'Training data src directory is not exists.') + return + + if not training_data_dst_path.exists(): + print( 'Training data dst directory is not exists.') + return + + if not model_path.exists(): + model_path.mkdir(exist_ok=True) + + + + import models + model = models.import_model(model_name)( + model_path, + training_data_src_path=training_data_src_path, + training_data_dst_path=training_data_dst_path, + debug=debug, + **in_options) + + is_reached_goal = (target_epoch > 0 and model.get_epoch() >= target_epoch) + + def model_save(): + if not debug and not is_reached_goal: + model.save() + + def send_preview(): + if not debug: + previews = model.get_previews() + output_queue.put ( {'op':'show', 'previews': previews, 'epoch':model.get_epoch(), 'loss_history': model.get_loss_history().copy() } ) + else: + previews = [( 'debug, press update for new', model.debug_one_epoch())] + output_queue.put ( {'op':'show', 'previews': previews} ) + + + if model.is_first_run(): + model_save() + + if target_epoch != 0: + if is_reached_goal: + print ('Model already trained to target epoch. You can use preview.') + else: + print('Starting. Target epoch: %d. Press "Enter" to stop training and save model.' % (target_epoch) ) + else: + print('Starting. Press "Enter" to stop training and save model.') + + last_save_time = time.time() + for i in itertools.count(0,1): + if not debug: + if not is_reached_goal: + loss_string = model.train_one_epoch() + + print (loss_string, end='\r') + if target_epoch != 0 and model.get_epoch() >= target_epoch: + print ('Reached target epoch.') + model_save() + is_reached_goal = True + print ('You can use preview now.') + + if not is_reached_goal and (time.time() - last_save_time) >= save_interval_min*60: + last_save_time = time.time() + model_save() + send_preview() + + if i==0: + if is_reached_goal: + model.pass_one_epoch() + send_preview() + + if debug: + time.sleep(0.005) + + while not input_queue.empty(): + input = input_queue.get() + op = input['op'] + if op == 'save': + model_save() + elif op == 'preview': + if is_reached_goal: + model.pass_one_epoch() + send_preview() + elif op == 'close': + model_save() + i = -1 + break + + if i == -1: + break + + + + model.finalize() + + except Exception as e: + print ('Error: %s' % (str(e))) + traceback.print_exc() + break + output_queue.put ( {'op':'close'} ) + +def previewThread (input_queue, output_queue): + + + previews = None + loss_history = None + selected_preview = 0 + update_preview = False + is_showing = False + is_waiting_preview = False + epoch = 0 + while True: + if not input_queue.empty(): + input = input_queue.get() + op = input['op'] + if op == 'show': + is_waiting_preview = False + loss_history = input['loss_history'] if 'loss_history' in input.keys() else None + previews = input['previews'] if 'previews' in input.keys() else None + epoch = input['epoch'] if 'epoch' in input.keys() else 0 + if previews is not None: + max_w = 0 + max_h = 0 + for (preview_name, preview_rgb) in previews: + (h, w, c) = preview_rgb.shape + max_h = max (max_h, h) + max_w = max (max_w, w) + + max_size = 800 + if max_h > max_size: + max_w = int( max_w / (max_h / max_size) ) + max_h = max_size + + #make all previews size equal + for preview in previews[:]: + (preview_name, preview_rgb) = preview + (h, w, c) = preview_rgb.shape + if h != max_h or w != max_w: + previews.remove(preview) + previews.append ( (preview_name, cv2.resize(preview_rgb, (max_w, max_h))) ) + selected_preview = selected_preview % len(previews) + update_preview = True + elif op == 'close': + break + + if update_preview: + update_preview = False + (h,w,c) = previews[0][1].shape + + selected_preview_name = previews[selected_preview][0] + selected_preview_rgb = previews[selected_preview][1] + + # HEAD + head_text_color = [0.8]*c + head_lines = [ + '[s]:save [enter]:exit', + '[p]:update [space]:next preview', + 'Preview: "%s" [%d/%d]' % (selected_preview_name,selected_preview+1, len(previews) ) + ] + head_line_height = 15 + head_height = len(head_lines) * head_line_height + head = np.ones ( (head_height,w,c) ) * 0.1 + + for i in range(0, len(head_lines)): + t = i*head_line_height + b = (i+1)*head_line_height + head[t:b, 0:w] += image_utils.get_text_image ( (w,head_line_height,c) , head_lines[i], color=head_text_color ) + + final = head + + if loss_history is not None: + # LOSS HISTORY + loss_history = np.array (loss_history) + + lh_height = 100 + lh_img = np.ones ( (lh_height,w,c) ) * 0.1 + loss_count = len(loss_history[0]) + lh_len = len(loss_history) + + l_per_col = lh_len / w + plist_max = [ [ max (0.0, 0.0, *[ loss_history[i_ab][p] + for i_ab in range( int(col*l_per_col), int((col+1)*l_per_col) ) + ] + ) + for p in range(0,loss_count) + ] + for col in range(0, w) + ] + + + plist_min = [ [ min (plist_max[col][p], + plist_max[col][p], + *[ loss_history[i_ab][p] + for i_ab in range( int(col*l_per_col), int((col+1)*l_per_col) ) + ] + ) + for p in range(0,loss_count) + ] + for col in range(0, w) + ] + plist_abs_max = np.mean(loss_history[ len(loss_history) // 5 : ]) * 2 + + if l_per_col >= 1.0: + for col in range(0, w): + for p in range(0,loss_count): + point_color = [1.0]*c + point_color[0:3] = colorsys.hsv_to_rgb ( p * (1.0/loss_count), 1.0, 1.0 ) + + ph_max = int ( (plist_max[col][p] / plist_abs_max) * (lh_height-1) ) + ph_max = np.clip( ph_max, 0, lh_height-1 ) + + ph_min = int ( (plist_min[col][p] / plist_abs_max) * (lh_height-1) ) + ph_min = np.clip( ph_min, 0, lh_height-1 ) + + for ph in range(ph_min, ph_max+1): + lh_img[ (lh_height-ph-1), col ] = point_color + + lh_lines = 5 + lh_line_height = (lh_height-1)/lh_lines + for i in range(0,lh_lines+1): + lh_img[ int(i*lh_line_height), : ] = (0.8,)*c + + last_line_t = int((lh_lines-1)*lh_line_height) + last_line_b = int(lh_lines*lh_line_height) + + if epoch != 0: + lh_text = 'Loss history. Epoch: %d' % (epoch) + else: + lh_text = 'Loss history.' + + lh_img[last_line_t:last_line_b, 0:w] += image_utils.get_text_image ( (w,last_line_b-last_line_t,c), lh_text, color=head_text_color ) + + final = np.concatenate ( [final, lh_img], axis=0 ) + + final = np.concatenate ( [final, selected_preview_rgb], axis=0 ) + + cv2.imshow ( 'Training preview', final) + is_showing = True + + if is_showing: + key = cv2.waitKey(100) + else: + time.sleep(0.1) + key = 0 + + if key == ord('\n') or key == ord('\r'): + output_queue.put ( {'op': 'close'} ) + elif key == ord('s'): + output_queue.put ( {'op': 'save'} ) + elif key == ord('p'): + if not is_waiting_preview: + is_waiting_preview = True + output_queue.put ( {'op': 'preview'} ) + elif key == ord(' '): + selected_preview = (selected_preview + 1) % len(previews) + update_preview = True + + cv2.destroyAllWindows() + +def main (training_data_src_dir, training_data_dst_dir, model_path, model_name, **in_options): + print ("Running trainer.\r\n") + + output_queue = queue.Queue() + input_queue = queue.Queue() + import threading + thread = threading.Thread(target=trainerThread, args=(output_queue, input_queue, training_data_src_dir, training_data_dst_dir, model_path, model_name), kwargs=in_options ) + thread.start() + + previewThread (input_queue, output_queue) \ No newline at end of file diff --git a/mathlib/umeyama.py b/mathlib/umeyama.py new file mode 100644 index 0000000..aad6235 --- /dev/null +++ b/mathlib/umeyama.py @@ -0,0 +1,71 @@ +import numpy as np + +def umeyama(src, dst, estimate_scale): + """Estimate N-D similarity transformation with or without scaling. + Parameters + ---------- + src : (M, N) array + Source coordinates. + dst : (M, N) array + Destination coordinates. + estimate_scale : bool + Whether to estimate scaling factor. + Returns + ------- + T : (N + 1, N + 1) + The homogeneous similarity transformation matrix. The matrix contains + NaN values only if the problem is not well-conditioned. + References + ---------- + .. [1] "Least-squares estimation of transformation parameters between two + point patterns", Shinji Umeyama, PAMI 1991, DOI: 10.1109/34.88573 + """ + + num = src.shape[0] + dim = src.shape[1] + + # Compute mean of src and dst. + src_mean = src.mean(axis=0) + dst_mean = dst.mean(axis=0) + + # Subtract mean from src and dst. + src_demean = src - src_mean + dst_demean = dst - dst_mean + + # Eq. (38). + A = np.dot(dst_demean.T, src_demean) / num + + # Eq. (39). + d = np.ones((dim,), dtype=np.double) + if np.linalg.det(A) < 0: + d[dim - 1] = -1 + + T = np.eye(dim + 1, dtype=np.double) + + U, S, V = np.linalg.svd(A) + + # Eq. (40) and (43). + rank = np.linalg.matrix_rank(A) + if rank == 0: + return np.nan * T + elif rank == dim - 1: + if np.linalg.det(U) * np.linalg.det(V) > 0: + T[:dim, :dim] = np.dot(U, V) + else: + s = d[dim - 1] + d[dim - 1] = -1 + T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V)) + d[dim - 1] = s + else: + T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V.T)) + + if estimate_scale: + # Eq. (41) and (42). + scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d) + else: + scale = 1.0 + + T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T) + T[:dim, :dim] *= scale + + return T \ No newline at end of file diff --git a/models/BaseTypes.py b/models/BaseTypes.py new file mode 100644 index 0000000..98f6136 --- /dev/null +++ b/models/BaseTypes.py @@ -0,0 +1,50 @@ +from enum import IntEnum +import cv2 +import numpy as np +from random import randint +from facelib import FaceType + + +class TrainingDataType(IntEnum): + IMAGE = 0 #raw image + + FACE_BEGIN = 1 + FACE = 1 #aligned face unsorted + FACE_YAW_SORTED = 2 #sorted by yaw + FACE_YAW_SORTED_AS_TARGET = 3 #sorted by yaw and included only yaws which exist in TARGET also automatic mirrored + FACE_END = 3 + + QTY = 4 + + +class TrainingDataSample(object): + + def __init__(self, filename=None, face_type=None, shape=None, landmarks=None, yaw=None, mirror=None, nearest_target_list=None): + self.filename = filename + self.face_type = face_type + self.shape = shape + self.landmarks = np.array(landmarks) if landmarks is not None else None + self.yaw = yaw + self.mirror = mirror + self.nearest_target_list = nearest_target_list + + def copy_and_set(self, filename=None, face_type=None, shape=None, landmarks=None, yaw=None, mirror=None, nearest_target_list=None): + return TrainingDataSample( + filename=filename if filename is not None else self.filename, + face_type=face_type if face_type is not None else self.face_type, + shape=shape if shape is not None else self.shape, + landmarks=landmarks if landmarks is not None else self.landmarks.copy(), + yaw=yaw if yaw is not None else self.yaw, + mirror=mirror if mirror is not None else self.mirror, + nearest_target_list=nearest_target_list if nearest_target_list is not None else self.nearest_target_list) + + def load_bgr(self): + img = cv2.imread (self.filename).astype(np.float32) / 255.0 + if self.mirror: + img = img[:,::-1].copy() + return img + + def get_random_nearest_target_sample(self): + if self.nearest_target_list is None: + return None + return self.nearest_target_list[randint (0, len(self.nearest_target_list)-1)] \ No newline at end of file diff --git a/models/ConverterBase.py b/models/ConverterBase.py new file mode 100644 index 0000000..21519aa --- /dev/null +++ b/models/ConverterBase.py @@ -0,0 +1,44 @@ +import copy +''' +You can implement your own Converter, check example ConverterMasked.py +''' + +class ConverterBase(object): + MODE_FACE = 0 + MODE_IMAGE = 1 + + #overridable + def __init__(self, predictor): + self.predictor = predictor + + #overridable + def get_mode(self): + #MODE_FACE calls convert_face + #MODE_IMAGE calls convert_image + return ConverterBase.MODE_FACE + + #overridable + def convert_face (self, img_bgr, img_face_landmarks, debug): + #return float32 image + #if debug , return tuple ( images of any size and channels, ...) + return image + + #overridable + def convert_image (self, img_bgr, img_landmarks, debug): + #img_landmarks not None, if input image is png with embedded data + #return float32 image + #if debug , return tuple ( images of any size and channels, ...) + return image + + #overridable + def dummy_predict(self): + #do dummy predict here + pass + + def copy(self): + return copy.copy(self) + + def copy_and_set_predictor(self, predictor): + result = self.copy() + result.predictor = predictor + return result \ No newline at end of file diff --git a/models/ConverterImage.py b/models/ConverterImage.py new file mode 100644 index 0000000..1ccfcb9 --- /dev/null +++ b/models/ConverterImage.py @@ -0,0 +1,46 @@ +from models import ConverterBase +from facelib import LandmarksProcessor +from facelib import FaceType + +import cv2 +import numpy as np +from utils import image_utils + +''' +predictor: + input: [predictor_input_size, predictor_input_size, BGR] + output: [predictor_input_size, predictor_input_size, BGR] +''' + +class ConverterImage(ConverterBase): + + #override + def __init__(self, predictor, + predictor_input_size=0, + output_size=0, + **in_options): + + super().__init__(predictor) + + self.predictor_input_size = predictor_input_size + self.output_size = output_size + + #override + def get_mode(self): + return ConverterBase.MODE_IMAGE + + #override + def dummy_predict(self): + self.predictor ( np.zeros ( (self.predictor_input_size, self.predictor_input_size,3), dtype=np.float32) ) + + #override + def convert_image (self, img_bgr, img_landmarks, debug): + img_size = img_bgr.shape[1], img_bgr.shape[0] + + predictor_input_bgr = cv2.resize ( img_bgr, (self.predictor_input_size, self.predictor_input_size), cv2.INTER_LANCZOS4 ) + predicted_bgr = self.predictor ( predictor_input_bgr ) + + output = cv2.resize ( predicted_bgr, (self.output_size, self.output_size), cv2.INTER_LANCZOS4 ) + if debug: + return (img_bgr,output,) + return output diff --git a/models/ConverterMasked.py b/models/ConverterMasked.py new file mode 100644 index 0000000..daba910 --- /dev/null +++ b/models/ConverterMasked.py @@ -0,0 +1,194 @@ +from models import ConverterBase +from facelib import LandmarksProcessor +from facelib import FaceType +import cv2 +import numpy as np +from utils import image_utils + +''' +predictor: + input: [predictor_input_size, predictor_input_size, BGRA] + output: [predictor_input_size, predictor_input_size, BGRA] +''' + +class ConverterMasked(ConverterBase): + + #override + def __init__(self, predictor, + predictor_input_size=0, + output_size=0, + face_type=FaceType.FULL, + erode_mask = True, + blur_mask = True, + clip_border_mask_per = 0, + masked_hist_match = False, + mode='seamless', + erode_mask_modifier=0, + blur_mask_modifier=0, + **in_options): + + super().__init__(predictor) + + self.predictor_input_size = predictor_input_size + self.output_size = output_size + self.face_type = face_type + self.erode_mask = erode_mask + self.blur_mask = blur_mask + self.clip_border_mask_per = clip_border_mask_per + self.masked_hist_match = masked_hist_match + self.mode = mode + self.erode_mask_modifier = erode_mask_modifier + self.blur_mask_modifier = blur_mask_modifier + + if self.erode_mask_modifier != 0 and not self.erode_mask: + print ("Erode mask modifier not used in this model.") + + if self.blur_mask_modifier != 0 and not self.blur_mask: + print ("Blur modifier not used in this model.") + + #override + def get_mode(self): + return ConverterBase.MODE_FACE + + #override + def dummy_predict(self): + self.predictor ( np.zeros ( (self.predictor_input_size,self.predictor_input_size,4), dtype=np.float32 ) ) + + #override + def convert_face (self, img_bgr, img_face_landmarks, debug): + if debug: + debugs = [img_bgr.copy()] + + img_size = img_bgr.shape[1], img_bgr.shape[0] + + img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr, img_face_landmarks) + + face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, self.output_size, face_type=self.face_type) + dst_face_bgr = cv2.warpAffine( img_bgr , face_mat, (self.output_size, self.output_size), flags=cv2.INTER_LANCZOS4 ) + dst_face_mask_a_0 = cv2.warpAffine( img_face_mask_a, face_mat, (self.output_size, self.output_size), flags=cv2.INTER_LANCZOS4 ) + + predictor_input_bgr = cv2.resize (dst_face_bgr, (self.predictor_input_size,self.predictor_input_size)) + predictor_input_mask_a_0 = cv2.resize (dst_face_mask_a_0, (self.predictor_input_size,self.predictor_input_size)) + predictor_input_mask_a = np.expand_dims (predictor_input_mask_a_0, -1) + + predicted_bgra = self.predictor ( np.concatenate( (predictor_input_bgr, predictor_input_mask_a), -1) ) + + prd_face_bgr = np.clip (predicted_bgra[:,:,0:3], 0, 1.0 ) + prd_face_mask_a_0 = np.clip (predicted_bgra[:,:,3], 0.0, 1.0) + prd_face_mask_a_0[ prd_face_mask_a_0 < 0.001 ] = 0.0 + + prd_face_mask_a = np.expand_dims (prd_face_mask_a_0, axis=-1) + prd_face_mask_aaa = np.repeat (prd_face_mask_a, (3,), axis=-1) + + img_prd_face_mask_aaa = cv2.warpAffine( prd_face_mask_aaa, face_mat, img_size, np.zeros(img_bgr.shape, dtype=float), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4 ) + img_prd_face_mask_aaa = np.clip (img_prd_face_mask_aaa, 0.0, 1.0) + + img_face_mask_aaa = img_prd_face_mask_aaa + + if debug: + debugs += [img_face_mask_aaa.copy()] + + img_face_mask_aaa [ img_face_mask_aaa <= 0.1 ] = 0.0 + + img_face_mask_flatten_aaa = img_face_mask_aaa.copy() + img_face_mask_flatten_aaa[img_face_mask_flatten_aaa > 0.9] = 1.0 + + maxregion = np.argwhere(img_face_mask_flatten_aaa==1.0) + + out_img = img_bgr.copy() + if maxregion.size != 0: + miny,minx = maxregion.min(axis=0)[:2] + maxy,maxx = maxregion.max(axis=0)[:2] + lenx = maxx - minx + leny = maxy - miny + masky = int(minx+(lenx//2)) + maskx = int(miny+(leny//2)) + lowest_len = min (lenx, leny) + + if debug: + print ("lowest_len = %f" % (lowest_len) ) + + ero = int( lowest_len * ( 0.126 - lowest_len * 0.00004551365 ) * 0.01*self.erode_mask_modifier ) + blur = int( lowest_len * 0.10 * 0.01*self.blur_mask_modifier ) + + if debug: + print ("ero = %d, blur = %d" % (ero, blur) ) + + img_mask_blurry_aaa = img_face_mask_aaa + if self.erode_mask: + if ero > 0: + img_mask_blurry_aaa = cv2.erode(img_mask_blurry_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) + elif ero < 0: + img_mask_blurry_aaa = cv2.dilate(img_mask_blurry_aaa, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(-ero,-ero)), iterations = 1 ) + + if self.blur_mask and blur > 0: + img_mask_blurry_aaa = cv2.blur(img_mask_blurry_aaa, (blur, blur) ) + + img_mask_blurry_aaa = np.clip( img_mask_blurry_aaa, 0, 1.0 ) + + if self.clip_border_mask_per > 0: + prd_border_rect_mask_a = np.ones ( prd_face_mask_a.shape, dtype=prd_face_mask_a.dtype) + prd_border_size = int ( prd_border_rect_mask_a.shape[1] * self.clip_border_mask_per ) + + prd_border_rect_mask_a[0:prd_border_size,:,:] = 0 + prd_border_rect_mask_a[-prd_border_size:,:,:] = 0 + prd_border_rect_mask_a[:,0:prd_border_size,:] = 0 + prd_border_rect_mask_a[:,-prd_border_size:,:] = 0 + prd_border_rect_mask_a = np.expand_dims(cv2.blur(prd_border_rect_mask_a, (prd_border_size, prd_border_size) ),-1) + + if self.mode == 'hist-match-bw': + prd_face_bgr = cv2.cvtColor(prd_face_bgr, cv2.COLOR_BGR2GRAY) + prd_face_bgr = np.repeat( np.expand_dims (prd_face_bgr, -1), (3,), -1 ) + + if self.mode == 'hist-match' or self.mode == 'hist-match-bw': + if debug: + debugs += [ cv2.warpAffine( prd_face_bgr, face_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) ] + + hist_mask_a = np.ones ( prd_face_bgr.shape[:2] + (1,) , dtype=prd_face_bgr.dtype) + + if self.masked_hist_match: + hist_mask_a *= prd_face_mask_a + + new_prd_face_bgr = image_utils.color_hist_match(prd_face_bgr*hist_mask_a, dst_face_bgr*hist_mask_a ) + + prd_face_bgr = new_prd_face_bgr + + if self.mode == 'hist-match-bw': + prd_face_bgr = prd_face_bgr.astype(np.float32) + + out_img = cv2.warpAffine( prd_face_bgr, face_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + + if debug: + debugs += [out_img.copy()] + debugs += [img_mask_blurry_aaa.copy()] + + if self.mode == 'seamless' or self.mode == 'seamless-hist-match': + out_img = np.clip( img_bgr*(1-img_face_mask_aaa) + (out_img*img_face_mask_aaa) , 0, 1.0 ) + if debug: + debugs += [out_img.copy()] + out_img = cv2.seamlessClone( (out_img*255).astype(np.uint8), (img_bgr*255).astype(np.uint8), (img_face_mask_flatten_aaa*255).astype(np.uint8), (masky,maskx) , cv2.NORMAL_CLONE ) + out_img = out_img.astype(np.float32) / 255.0 + + if debug: + debugs += [out_img.copy()] + + if self.clip_border_mask_per > 0: + img_prd_border_rect_mask_a = cv2.warpAffine( prd_border_rect_mask_a, face_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + img_prd_border_rect_mask_a = np.expand_dims (img_prd_border_rect_mask_a, -1) + + out_img = out_img * img_prd_border_rect_mask_a + img_bgr * (1.0 - img_prd_border_rect_mask_a) + img_mask_blurry_aaa *= img_prd_border_rect_mask_a + + out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (out_img*img_mask_blurry_aaa) , 0, 1.0 ) + + if self.mode == 'seamless-hist-match': + out_face_bgr = cv2.warpAffine( out_img, face_mat, (self.output_size, self.output_size) ) + new_out_face_bgr = image_utils.color_hist_match(out_face_bgr, dst_face_bgr ) + new_out = cv2.warpAffine( new_out_face_bgr, face_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) + out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (new_out*img_mask_blurry_aaa) , 0, 1.0 ) + + if debug: + debugs += [out_img.copy()] + + return debugs if debug else out_img + \ No newline at end of file diff --git a/models/ModelBase.py b/models/ModelBase.py new file mode 100644 index 0000000..1ee368c --- /dev/null +++ b/models/ModelBase.py @@ -0,0 +1,332 @@ +import os +import time +import inspect +import operator +import pickle +from pathlib import Path +from utils import Path_utils +from utils import std_utils +from utils import image_utils +import numpy as np +import cv2 +import gpufmkmgr +from .TrainingDataGeneratorBase import TrainingDataGeneratorBase + +''' +You can implement your own model. Check examples. +''' +class ModelBase(object): + + #DONT OVERRIDE + def __init__(self, model_path, training_data_src_path=None, training_data_dst_path=None, + batch_size=0, + multi_gpu = False, + choose_worst_gpu = False, + force_best_gpu_idx = -1, + force_gpu_idxs = None, + write_preview_history = False, + debug = False, **in_options + ): + print ("Loading model...") + self.model_path = model_path + self.model_data_path = Path( self.get_strpath_storage_for_file('data.dat') ) + + self.training_data_src_path = training_data_src_path + self.training_data_dst_path = training_data_dst_path + + self.src_images_paths = None + self.dst_images_paths = None + self.src_yaw_images_paths = None + self.dst_yaw_images_paths = None + self.src_data_generator = None + self.dst_data_generator = None + self.is_training_mode = (training_data_src_path is not None and training_data_dst_path is not None) + self.batch_size = batch_size + self.write_preview_history = write_preview_history + self.debug = debug + self.supress_std_once = ('TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1') + + if self.model_data_path.exists(): + model_data = pickle.loads ( self.model_data_path.read_bytes() ) + self.epoch = model_data['epoch'] + self.options = model_data['options'] + self.loss_history = model_data['loss_history'] if 'loss_history' in model_data.keys() else [] + self.generator_dict_states = model_data['generator_dict_states'] if 'generator_dict_states' in model_data.keys() else None + self.sample_for_preview = model_data['sample_for_preview'] if 'sample_for_preview' in model_data.keys() else None + else: + self.epoch = 0 + self.options = {} + self.loss_history = [] + self.generator_dict_states = None + self.sample_for_preview = None + + if self.write_preview_history: + self.preview_history_path = self.model_path / ( '%s_history' % (self.get_model_name()) ) + + if not self.preview_history_path.exists(): + self.preview_history_path.mkdir(exist_ok=True) + else: + if self.epoch == 0: + for filename in Path_utils.get_image_paths(self.preview_history_path): + Path(filename).unlink() + + self.multi_gpu = multi_gpu + + gpu_idx = force_best_gpu_idx if (force_best_gpu_idx >= 0 and gpufmkmgr.isValidDeviceIdx(force_best_gpu_idx)) else gpufmkmgr.getBestDeviceIdx() if not choose_worst_gpu else gpufmkmgr.getWorstDeviceIdx() + gpu_total_vram_gb = gpufmkmgr.getDeviceVRAMTotalGb (gpu_idx) + is_gpu_low_mem = (gpu_total_vram_gb < 4) + + self.gpu_total_vram_gb = gpu_total_vram_gb + + if self.epoch == 0: + #first run + self.options['created_vram_gb'] = gpu_total_vram_gb + self.created_vram_gb = gpu_total_vram_gb + else: + #not first run + if 'created_vram_gb' in self.options.keys(): + self.created_vram_gb = self.options['created_vram_gb'] + else: + self.options['created_vram_gb'] = gpu_total_vram_gb + self.created_vram_gb = gpu_total_vram_gb + + if force_gpu_idxs is not None: + self.gpu_idxs = [ int(x) for x in force_gpu_idxs.split(',') ] + else: + if self.multi_gpu: + self.gpu_idxs = gpufmkmgr.getDeviceIdxsEqualModel( gpu_idx ) + if len(self.gpu_idxs) <= 1: + self.multi_gpu = False + else: + self.gpu_idxs = [gpu_idx] + + self.tf = gpufmkmgr.import_tf(self.gpu_idxs,allow_growth=False) + self.tf_sess = gpufmkmgr.get_tf_session() + self.keras = gpufmkmgr.import_keras() + self.keras_contrib = gpufmkmgr.import_keras_contrib() + + self.onInitialize(**in_options) + + if self.debug or self.batch_size == 0: + self.batch_size = 1 + + if self.is_training_mode: + if self.generator_list is None: + raise Exception( 'You didnt set_training_data_generators()') + else: + for i, generator in enumerate(self.generator_list): + if not isinstance(generator, TrainingDataGeneratorBase): + raise Exception('training data generator is not subclass of TrainingDataGeneratorBase') + + if self.generator_dict_states is not None and i < len(self.generator_dict_states): + generator.set_dict_state ( self.generator_dict_states[i] ) + + if self.sample_for_preview is None: + self.sample_for_preview = self.generate_next_sample() + + print ("===== Model summary =====") + print ("== Model name: " + self.get_model_name()) + print ("==") + print ("== Current epoch: " + str(self.epoch) ) + print ("==") + print ("== Options:") + print ("== |== batch_size : %s " % (self.batch_size) ) + print ("== |== multi_gpu : %s " % (self.multi_gpu) ) + for key in self.options.keys(): + print ("== |== %s : %s" % (key, self.options[key]) ) + + print ("== Running on:") + for idx in self.gpu_idxs: + print ("== |== [%d : %s]" % (idx, gpufmkmgr.getDeviceName(idx)) ) + + if self.gpu_total_vram_gb == 2: + print ("==") + print ("== WARNING: You are using 2GB GPU. Result quality may be significantly decreased.") + print ("== If training does not start, close all programs and try again.") + print ("== Also you can disable Windows Aero Desktop to get extra free VRAM.") + print ("==") + + print ("=========================") + + #overridable + def onInitialize(self, **in_options): + ''' + initialize your keras models + + store and retrieve your model options in self.options[''] + + check example + ''' + pass + + #overridable + def onSave(self): + #save your keras models here + pass + + #overridable + def onTrainOneEpoch(self, sample): + #train your keras models here + + #return array of losses + return ( ('loss_src', 0), ('loss_dst', 0) ) + + #overridable + def onGetPreview(self, sample): + #you can return multiple previews + #return [ ('preview_name',preview_rgb), ... ] + return [] + + #overridable if you want model name differs from folder name + def get_model_name(self): + return Path(inspect.getmodule(self).__file__).parent.name.rsplit("_", 1)[1] + + #overridable + def get_converter(self, **in_options): + #return existing or your own converter which derived from base + from .ConverterBase import ConverterBase + return ConverterBase(self, **in_options) + + def to_multi_gpu_model_if_possible (self, models_list): + if len(self.gpu_idxs) > 1: + #make batch_size to divide on GPU count without remainder + self.batch_size = int( self.batch_size / len(self.gpu_idxs) ) + if self.batch_size == 0: + self.batch_size = 1 + self.batch_size *= len(self.gpu_idxs) + + result = [] + for model in models_list: + for i in range( len(model.output_names) ): + model.output_names = 'output_%d' % (i) + result += [ self.keras.utils.multi_gpu_model( model, self.gpu_idxs ) ] + + return result + else: + return models_list + + def get_previews(self): + return self.onGetPreview ( self.last_sample ) + + def get_static_preview(self): + return self.onGetPreview (self.sample_for_preview)[0][1] #first preview, and bgr + + def save(self): + print ("Saving...") + + if self.supress_std_once: + supressor = std_utils.suppress_stdout_stderr() + supressor.__enter__() + + self.onSave() + + if self.supress_std_once: + supressor.__exit__() + + model_data = { + 'epoch': self.epoch, + 'options': self.options, + 'loss_history': self.loss_history, + 'generator_dict_states' : [generator.get_dict_state() for generator in self.generator_list], + 'sample_for_preview' : self.sample_for_preview + } + self.model_data_path.write_bytes( pickle.dumps(model_data) ) + + def save_weights_safe(self, model_filename_list): + for model, filename in model_filename_list: + model.save_weights( filename + '.tmp' ) + + for model, filename in model_filename_list: + source_filename = Path(filename+'.tmp') + target_filename = Path(filename) + if target_filename.exists(): + target_filename.unlink() + + source_filename.rename ( str(target_filename) ) + + def debug_one_epoch(self): + images = [] + for generator in self.generator_list: + for i,batch in enumerate(next(generator)): + images.append( batch[0] ) + + return image_utils.equalize_and_stack_square (images) + + def generate_next_sample(self): + return [next(generator) for generator in self.generator_list] + + def train_one_epoch(self): + if self.supress_std_once: + supressor = std_utils.suppress_stdout_stderr() + supressor.__enter__() + + self.last_sample = self.generate_next_sample() + + epoch_time = time.time() + + losses = self.onTrainOneEpoch(self.last_sample) + + epoch_time = time.time() - epoch_time + + self.loss_history.append ( [float(loss[1]) for loss in losses] ) + + if self.supress_std_once: + supressor.__exit__() + self.supress_std_once = False + + if self.write_preview_history: + if self.epoch % 10 == 0: + img = (self.get_static_preview() * 255).astype(np.uint8) + cv2.imwrite ( str (self.preview_history_path / ('%.6d.jpg' %( self.epoch) )), img ) + + self.epoch += 1 + + #............."Saving... + loss_string = "Training [#{0:06d}][{1:04d}ms]".format ( self.epoch, int(epoch_time*1000) % 10000 ) + for (loss_name, loss_value) in losses: + loss_string += " %s:%.3f" % (loss_name, loss_value) + + return loss_string + + def pass_one_epoch(self): + self.last_sample = self.generate_next_sample() + + def finalize(self): + gpufmkmgr.finalize_keras() + + def is_first_run(self): + return self.epoch == 0 + + def is_debug(self): + return self.debug + + def get_epoch(self): + return self.epoch + + def get_loss_history(self): + return self.loss_history + + def set_training_data_generators (self, generator_list): + self.generator_list = generator_list + + def get_training_data_generators (self): + return self.generator_list + + def get_strpath_storage_for_file(self, filename): + return str( self.model_path / (self.get_model_name() + '_' + filename) ) + + def set_vram_batch_requirements (self, d): + #example d = {2:2,3:4,4:8,5:16,6:32,7:32,8:32,9:48} + keys = [x for x in d.keys()] + + if self.gpu_total_vram_gb < keys[0]: + raise Exception ('Sorry, this model works only on %dGB+ GPU' % ( keys[0] ) ) + + if self.batch_size == 0: + for x in keys: + if self.gpu_total_vram_gb <= x: + self.batch_size = d[x] + break + + if self.batch_size == 0: + self.batch_size = d[ keys[-1] ] \ No newline at end of file diff --git a/models/Model_AVATAR/Model.py b/models/Model_AVATAR/Model.py new file mode 100644 index 0000000..6557726 --- /dev/null +++ b/models/Model_AVATAR/Model.py @@ -0,0 +1,223 @@ +from models import ModelBase +from models import TrainingDataType +import numpy as np +import cv2 +from nnlib import tf_dssim +from nnlib import conv +from nnlib import upscale + +class Model(ModelBase): + + encoder64H5 = 'encoder64.h5' + decoder64_srcH5 = 'decoder64_src.h5' + decoder64_dstH5 = 'decoder64_dst.h5' + encoder128H5 = 'encoder128.h5' + decoder128_srcH5 = 'decoder128_src.h5' + + #override + def onInitialize(self, **in_options): + tf = self.tf + keras = self.keras + K = keras.backend + + self.set_vram_batch_requirements( {4:8,5:16,6:20,7:24,8:32,9:48} ) + + self.encoder64, self.decoder64_src, self.decoder64_dst, self.encoder128, self.decoder128_src = self.BuildAE() + img_shape64 = (64,64,1) + img_shape128 = (256,256,3) + + if not self.is_first_run(): + self.encoder64.load_weights (self.get_strpath_storage_for_file(self.encoder64H5)) + self.decoder64_src.load_weights (self.get_strpath_storage_for_file(self.decoder64_srcH5)) + self.decoder64_dst.load_weights (self.get_strpath_storage_for_file(self.decoder64_dstH5)) + self.encoder128.load_weights (self.get_strpath_storage_for_file(self.encoder128H5)) + self.decoder128_src.load_weights (self.get_strpath_storage_for_file(self.decoder128_srcH5)) + + if self.is_training_mode: + self.encoder64, self.decoder64_src, self.decoder64_dst, self.encoder128, self.decoder128_src = self.to_multi_gpu_model_if_possible ( [self.encoder64, self.decoder64_src, self.decoder64_dst, self.encoder128, self.decoder128_src] ) + + input_src_64 = keras.layers.Input(img_shape64) + input_src_target64 = keras.layers.Input(img_shape64) + input_src_target128 = keras.layers.Input(img_shape128) + input_dst_64 = keras.layers.Input(img_shape64) + input_dst_target64 = keras.layers.Input(img_shape64) + + src_code64 = self.encoder64(input_src_64) + dst_code64 = self.encoder64(input_dst_64) + + rec_src64 = self.decoder64_src(src_code64) + rec_dst64 = self.decoder64_dst(dst_code64) + + src64_loss = tf_dssim(tf, input_src_target64, rec_src64) + dst64_loss = tf_dssim(tf, input_dst_target64, rec_dst64) + total64_loss = src64_loss + dst64_loss + + self.ed64_train = K.function ([input_src_64, input_src_target64, input_dst_64, input_dst_target64],[K.mean(total64_loss)], + self.keras.optimizers.Adam(lr=5e-5, beta_1=0.5, beta_2=0.999).get_updates(total64_loss, self.encoder64.trainable_weights + self.decoder64_src.trainable_weights + self.decoder64_dst.trainable_weights) + ) + + src_code128 = self.encoder128(input_src_64) + rec_src128 = self.decoder128_src(src_code128) + src128_loss = tf_dssim(tf, input_src_target128, rec_src128) + + self.ed128_train = K.function ([input_src_64, input_src_target128],[K.mean(src128_loss)], + self.keras.optimizers.Adam(lr=5e-5, beta_1=0.5, beta_2=0.999).get_updates(src128_loss, self.encoder128.trainable_weights + self.decoder128_src.trainable_weights) + ) + + src_code128 = self.encoder128(rec_src64) + rec_src128 = self.decoder128_src(src_code128) + + self.src128_view = K.function ([input_src_64], [rec_src128]) + + if self.is_training_mode: + from models import TrainingDataGenerator + f = TrainingDataGenerator.SampleTypeFlags + self.set_training_data_generators ([ + TrainingDataGenerator(TrainingDataType.FACE, self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ + [f.WARPED_TRANSFORMED | f.HALF_FACE | f.MODE_G, 64], + [f.TRANSFORMED | f.HALF_FACE | f.MODE_G, 64], + [f.TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 256], + [f.SOURCE | f.HALF_FACE | f.MODE_G, 64], + [f.SOURCE | f.HALF_FACE | f.MODE_GGG, 256] ] ), + + TrainingDataGenerator(TrainingDataType.FACE, self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ + [f.WARPED_TRANSFORMED | f.HALF_FACE | f.MODE_G, 64], + [f.TRANSFORMED | f.HALF_FACE | f.MODE_G, 64], + [f.SOURCE | f.HALF_FACE | f.MODE_G, 64], + [f.SOURCE | f.HALF_FACE | f.MODE_GGG, 256] ] ) + ]) + #override + def onSave(self): + self.save_weights_safe( [[self.encoder64, self.get_strpath_storage_for_file(self.encoder64H5)], + [self.decoder64_src, self.get_strpath_storage_for_file(self.decoder64_srcH5)], + [self.decoder64_dst, self.get_strpath_storage_for_file(self.decoder64_dstH5)], + [self.encoder128, self.get_strpath_storage_for_file(self.encoder128H5)], + [self.decoder128_src, self.get_strpath_storage_for_file(self.decoder128_srcH5)], + ] ) + + #override + def onTrainOneEpoch(self, sample): + warped_src64, target_src64, target_src128, target_src_source64_G, target_src_source128_GGG = sample[0] + warped_dst64, target_dst64, target_dst_source64_G, target_dst_source128_GGG = sample[1] + + loss64, = self.ed64_train ([warped_src64, target_src64, warped_dst64, target_dst64]) + loss256, = self.ed128_train ([warped_src64, target_src128]) + + return ( ('loss64', loss64), ('loss256', loss256) ) + + #override + def onGetPreview(self, sample): + n_samples = 4 + test_B = sample[1][2][0:n_samples] + test_B128 = sample[1][3][0:n_samples] + + BB, = self.src128_view ([test_B]) + + st = [] + for i in range(n_samples // 2): + st.append ( np.concatenate ( ( + test_B128[i*2+0], BB[i*2+0], test_B128[i*2+1], BB[i*2+1], + ), axis=1) ) + return [ ('AVATAR', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, img): + x, = self.src128_view ([ np.expand_dims(img, 0) ])[0] + return x + + #override + def get_converter(self, **in_options): + return ConverterAvatar(self.predictor_func, predictor_input_size=64, output_size=256, **in_options) + + def BuildAE(self): + keras, K = self.keras, self.keras.backend + + def Encoder(_input): + x = keras.layers.convolutional.Conv2D(90, kernel_size=5, strides=1, padding='same')(_input) + x = keras.layers.convolutional.Conv2D(90, kernel_size=5, strides=1, padding='same')(x) + x = keras.layers.MaxPooling2D(pool_size=(3, 3), strides=2, padding='same')(x) + + x = keras.layers.convolutional.Conv2D(180, kernel_size=3, strides=1, padding='same')(x) + x = keras.layers.convolutional.Conv2D(180, kernel_size=3, strides=1, padding='same')(x) + x = keras.layers.MaxPooling2D(pool_size=(3, 3), strides=2, padding='same')(x) + + x = keras.layers.convolutional.Conv2D(360, kernel_size=3, strides=1, padding='same')(x) + x = keras.layers.convolutional.Conv2D(360, kernel_size=3, strides=1, padding='same')(x) + x = keras.layers.MaxPooling2D(pool_size=(3, 3), strides=2, padding='same')(x) + + x = keras.layers.Dense (1024)(x) + x = keras.layers.advanced_activations.LeakyReLU(0.1)(x) + x = keras.layers.Dropout(0.5)(x) + + x = keras.layers.Dense (1024)(x) + x = keras.layers.advanced_activations.LeakyReLU(0.1)(x) + x = keras.layers.Dropout(0.5)(x) + x = keras.layers.Flatten()(x) + x = keras.layers.Dense (64)(x) + return keras.models.Model (_input, x) + + encoder128 = Encoder( keras.layers.Input ( (64, 64, 1) ) ) + encoder64 = Encoder( keras.layers.Input ( (64, 64, 1) ) ) + + def decoder128_3(encoder): + decoder_input = keras.layers.Input ( K.int_shape(encoder.outputs[0])[1:] ) + x = decoder_input + x = self.keras.layers.Dense(16 * 16 * 720)(x) + x = keras.layers.Reshape ( (16, 16, 720) )(x) + x = upscale(keras, x, 720) + x = upscale(keras, x, 360) + x = upscale(keras, x, 180) + x = upscale(keras, x, 90) + x = keras.layers.convolutional.Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + return keras.models.Model(decoder_input, x) + + def decoder64_1(encoder): + decoder_input = keras.layers.Input ( K.int_shape(encoder.outputs[0])[1:] ) + x = decoder_input + x = self.keras.layers.Dense(8 * 8 * 720)(x) + x = keras.layers.Reshape ( (8,8,720) )(x) + x = upscale(keras, x, 360) + x = upscale(keras, x, 180) + x = upscale(keras, x, 90) + x = keras.layers.convolutional.Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(x) + return keras.models.Model(decoder_input, x) + + return encoder64, decoder64_1(encoder64), decoder64_1(encoder64), encoder128, decoder128_3(encoder128) + +from models import ConverterBase +from facelib import FaceType +from facelib import LandmarksProcessor +class ConverterAvatar(ConverterBase): + + #override + def __init__(self, predictor, + predictor_input_size=0, + output_size=0, + **in_options): + + super().__init__(predictor) + + self.predictor_input_size = predictor_input_size + self.output_size = output_size + + #override + def get_mode(self): + return ConverterBase.MODE_IMAGE + + #override + def dummy_predict(self): + self.predictor ( np.zeros ( (self.predictor_input_size, self.predictor_input_size,1), dtype=np.float32) ) + + #override + def convert_image (self, img_bgr, img_face_landmarks, debug): + img_size = img_bgr.shape[1], img_bgr.shape[0] + + face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, self.predictor_input_size, face_type=FaceType.HALF ) + predictor_input_bgr = cv2.warpAffine( img_bgr, face_mat, (self.predictor_input_size, self.predictor_input_size), flags=cv2.INTER_LANCZOS4 ) + predictor_input_g = np.expand_dims(cv2.cvtColor(predictor_input_bgr, cv2.COLOR_BGR2GRAY),-1) + + predicted_bgr = self.predictor ( predictor_input_g ) + + output = cv2.resize ( predicted_bgr, (self.output_size, self.output_size), cv2.INTER_LANCZOS4 ) + if debug: + return (img_bgr,output,) + return output \ No newline at end of file diff --git a/models/Model_AVATAR/__init__.py b/models/Model_AVATAR/__init__.py new file mode 100644 index 0000000..cdb3fe7 --- /dev/null +++ b/models/Model_AVATAR/__init__.py @@ -0,0 +1 @@ +from .Model import Model \ No newline at end of file diff --git a/models/Model_DF/Model.py b/models/Model_DF/Model.py new file mode 100644 index 0000000..ce907a2 --- /dev/null +++ b/models/Model_DF/Model.py @@ -0,0 +1,153 @@ +from models import ModelBase +from models import TrainingDataType +import numpy as np +import cv2 + +from nnlib import DSSIMMaskLossClass +from nnlib import conv +from nnlib import upscale +from facelib import FaceType + +class Model(ModelBase): + + encoderH5 = 'encoder.h5' + decoder_srcH5 = 'decoder_src.h5' + decoder_dstH5 = 'decoder_dst.h5' + + #override + def onInitialize(self, **in_options): + self.set_vram_batch_requirements( {5:16,6:16,7:16,8:24,9:24,10:32,11:32,12:32,13:48} ) + + ae_input_layer = self.keras.layers.Input(shape=(128, 128, 3)) + mask_layer = self.keras.layers.Input(shape=(128, 128, 1)) #same as output + + self.encoder = self.Encoder(ae_input_layer) + self.decoder_src = self.Decoder() + self.decoder_dst = self.Decoder() + + if not self.is_first_run(): + self.encoder.load_weights (self.get_strpath_storage_for_file(self.encoderH5)) + self.decoder_src.load_weights (self.get_strpath_storage_for_file(self.decoder_srcH5)) + self.decoder_dst.load_weights (self.get_strpath_storage_for_file(self.decoder_dstH5)) + + self.autoencoder_src = self.keras.models.Model([ae_input_layer,mask_layer], self.decoder_src(self.encoder(ae_input_layer))) + self.autoencoder_dst = self.keras.models.Model([ae_input_layer,mask_layer], self.decoder_dst(self.encoder(ae_input_layer))) + + if self.is_training_mode: + self.autoencoder_src, self.autoencoder_dst = self.to_multi_gpu_model_if_possible ( [self.autoencoder_src, self.autoencoder_dst] ) + + optimizer = self.keras.optimizers.Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) + dssimloss = DSSIMMaskLossClass(self.tf)([mask_layer]) + self.autoencoder_src.compile(optimizer=optimizer, loss=[dssimloss, 'mse'] ) + self.autoencoder_dst.compile(optimizer=optimizer, loss=[dssimloss, 'mse'] ) + + if self.is_training_mode: + from models import TrainingDataGenerator + f = TrainingDataGenerator.SampleTypeFlags + self.set_training_data_generators ([ + TrainingDataGenerator(TrainingDataType.FACE, self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_M | f.MASK_FULL, 128] ], random_flip=True ), + TrainingDataGenerator(TrainingDataType.FACE, self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_M | f.MASK_FULL, 128] ], random_flip=True ) + ]) + #override + def onSave(self): + self.save_weights_safe( [[self.encoder, self.get_strpath_storage_for_file(self.encoderH5)], + [self.decoder_src, self.get_strpath_storage_for_file(self.decoder_srcH5)], + [self.decoder_dst, self.get_strpath_storage_for_file(self.decoder_dstH5)]] ) + + #override + def onTrainOneEpoch(self, sample): + warped_src, target_src, target_src_mask = sample[0] + warped_dst, target_dst, target_dst_mask = sample[1] + + loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) + loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) + + return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) + + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] #first 4 samples + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) + AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) + BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + AA[i], + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('DF', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + + face_128_bgr = face[...,0:3] + face_128_mask = np.expand_dims(face[...,3],-1) + + x, mx = self.autoencoder_src.predict ( [ np.expand_dims(face_128_bgr,0), np.expand_dims(face_128_mask,0) ] ) + x, mx = x[0], mx[0] + + return np.concatenate ( (x,mx), -1 ) + + #override + def get_converter(self, **in_options): + from models import ConverterMasked + + if 'masked_hist_match' not in in_options.keys() or in_options['masked_hist_match'] is None: + in_options['masked_hist_match'] = True + + if 'erode_mask_modifier' not in in_options.keys(): + in_options['erode_mask_modifier'] = 0 + in_options['erode_mask_modifier'] += 30 + + if 'blur_mask_modifier' not in in_options.keys(): + in_options['blur_mask_modifier'] = 0 + + return ConverterMasked(self.predictor_func, predictor_input_size=128, output_size=128, face_type=FaceType.FULL, clip_border_mask_per=0.046875, **in_options) + + def Encoder(self, input_layer): + x = input_layer + x = conv(self.keras, x, 128) + x = conv(self.keras, x, 256) + x = conv(self.keras, x, 512) + x = conv(self.keras, x, 1024) + + x = self.keras.layers.Dense(512)(self.keras.layers.Flatten()(x)) + x = self.keras.layers.Dense(8 * 8 * 512)(x) + x = self.keras.layers.Reshape((8, 8, 512))(x) + x = upscale(self.keras, x, 512) + + return self.keras.models.Model(input_layer, x) + + def Decoder(self): + input_ = self.keras.layers.Input(shape=(16, 16, 512)) + x = input_ + x = upscale(self.keras, x, 512) + x = upscale(self.keras, x, 256) + x = upscale(self.keras, x, 128) + + y = input_ #mask decoder + y = upscale(self.keras, y, 512) + y = upscale(self.keras, y, 256) + y = upscale(self.keras, y, 128) + + x = self.keras.layers.convolutional.Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + y = self.keras.layers.convolutional.Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) + + return self.keras.models.Model(input_, [x,y]) diff --git a/models/Model_DF/__init__.py b/models/Model_DF/__init__.py new file mode 100644 index 0000000..cdb3fe7 --- /dev/null +++ b/models/Model_DF/__init__.py @@ -0,0 +1 @@ +from .Model import Model \ No newline at end of file diff --git a/models/Model_H128/Model.py b/models/Model_H128/Model.py new file mode 100644 index 0000000..1b6ab97 --- /dev/null +++ b/models/Model_H128/Model.py @@ -0,0 +1,174 @@ +from models import ModelBase +from models import TrainingDataType +import numpy as np + +from nnlib import DSSIMMaskLossClass +from nnlib import conv +from nnlib import upscale +from facelib import FaceType + +import cv2 + +class Model(ModelBase): + + encoderH5 = 'encoder.h5' + decoder_srcH5 = 'decoder_src.h5' + decoder_dstH5 = 'decoder_dst.h5' + + #override + def onInitialize(self, **in_options): + self.set_vram_batch_requirements( {3:2,4:2,4:4,5:8,6:8,7:16,8:16,9:24,10:24,11:32,12:32,13:48} ) + + ae_input_layer = self.keras.layers.Input(shape=(128, 128, 3)) + mask_layer = self.keras.layers.Input(shape=(128, 128, 1)) #same as output + + self.encoder = self.Encoder(ae_input_layer, self.created_vram_gb) + self.decoder_src = self.Decoder(self.created_vram_gb) + self.decoder_dst = self.Decoder(self.created_vram_gb) + + if not self.is_first_run(): + self.encoder.load_weights (self.get_strpath_storage_for_file(self.encoderH5)) + self.decoder_src.load_weights (self.get_strpath_storage_for_file(self.decoder_srcH5)) + self.decoder_dst.load_weights (self.get_strpath_storage_for_file(self.decoder_dstH5)) + + self.autoencoder_src = self.keras.models.Model([ae_input_layer,mask_layer], self.decoder_src(self.encoder(ae_input_layer))) + self.autoencoder_dst = self.keras.models.Model([ae_input_layer,mask_layer], self.decoder_dst(self.encoder(ae_input_layer))) + + if self.is_training_mode: + self.autoencoder_src, self.autoencoder_dst = self.to_multi_gpu_model_if_possible ( [self.autoencoder_src, self.autoencoder_dst] ) + + optimizer = self.keras.optimizers.Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) + dssimloss = DSSIMMaskLossClass(self.tf)([mask_layer]) + self.autoencoder_src.compile(optimizer=optimizer, loss=[dssimloss, 'mae']) + self.autoencoder_dst.compile(optimizer=optimizer, loss=[dssimloss, 'mae']) + + if self.is_training_mode: + from models import TrainingDataGenerator + f = TrainingDataGenerator.SampleTypeFlags + self.set_training_data_generators ([ + TrainingDataGenerator(TrainingDataType.FACE, self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.HALF_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.HALF_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.HALF_FACE | f.MODE_M | f.MASK_FULL, 128] ], random_flip=True ), + TrainingDataGenerator(TrainingDataType.FACE, self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.HALF_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.HALF_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.HALF_FACE | f.MODE_M | f.MASK_FULL, 128] ], random_flip=True ) + ]) + + #override + def onSave(self): + self.save_weights_safe( [[self.encoder, self.get_strpath_storage_for_file(self.encoderH5)], + [self.decoder_src, self.get_strpath_storage_for_file(self.decoder_srcH5)], + [self.decoder_dst, self.get_strpath_storage_for_file(self.decoder_dstH5)]]) + + #override + def onTrainOneEpoch(self, sample): + warped_src, target_src, target_src_mask = sample[0] + warped_dst, target_dst, target_dst_mask = sample[1] + + loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) + loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) + + return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] #first 4 samples + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) + AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) + BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + AA[i], + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('H128', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + face_128_bgr = face[...,0:3] + face_128_mask = np.expand_dims(face[...,3],-1) + + x, mx = self.autoencoder_src.predict ( [ np.expand_dims(face_128_bgr,0), np.expand_dims(face_128_mask,0) ] ) + x, mx = x[0], mx[0] + + return np.concatenate ( (x,mx), -1 ) + + #override + def get_converter(self, **in_options): + from models import ConverterMasked + + if 'masked_hist_match' not in in_options.keys() or in_options['masked_hist_match'] is None: + in_options['masked_hist_match'] = True + + if 'erode_mask_modifier' not in in_options.keys(): + in_options['erode_mask_modifier'] = 0 + in_options['erode_mask_modifier'] += 100 + + if 'blur_mask_modifier' not in in_options.keys(): + in_options['blur_mask_modifier'] = 0 + in_options['blur_mask_modifier'] += 100 + + return ConverterMasked(self.predictor_func, predictor_input_size=128, output_size=128, face_type=FaceType.HALF, **in_options) + + def Encoder(self, input_layer, created_vram_gb): + x = input_layer + + if created_vram_gb >= 5: + x = conv(self.keras, x, 128) + x = conv(self.keras, x, 256) + x = conv(self.keras, x, 512) + x = conv(self.keras, x, 1024) + x = self.keras.layers.Dense(512)(self.keras.layers.Flatten()(x)) + x = self.keras.layers.Dense(8 * 8 * 512)(x) + x = self.keras.layers.Reshape((8, 8, 512))(x) + x = upscale(self.keras, x, 512) + else: + x = conv(self.keras, x, 128) + x = conv(self.keras, x, 256) + x = conv(self.keras, x, 512) + x = conv(self.keras, x, 1024) + x = self.keras.layers.Dense(256)(self.keras.layers.Flatten()(x)) + x = self.keras.layers.Dense(8 * 8 * 256)(x) + x = self.keras.layers.Reshape((8, 8, 256))(x) + x = upscale(self.keras, x, 256) + + return self.keras.models.Model(input_layer, x) + + def Decoder(self, created_vram_gb): + if created_vram_gb >= 5: + input_ = self.keras.layers.Input(shape=(16, 16, 512)) + x = input_ + x = upscale(self.keras, x, 512) + x = upscale(self.keras, x, 256) + x = upscale(self.keras, x, 128) + + y = input_ #mask decoder + y = upscale(self.keras, y, 512) + y = upscale(self.keras, y, 256) + y = upscale(self.keras, y, 128) + else: + input_ = self.keras.layers.Input(shape=(16, 16, 256)) + x = input_ + x = upscale(self.keras, x, 256) + x = upscale(self.keras, x, 128) + x = upscale(self.keras, x, 64) + + y = input_ #mask decoder + y = upscale(self.keras, y, 256) + y = upscale(self.keras, y, 128) + y = upscale(self.keras, y, 64) + + x = self.keras.layers.convolutional.Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + y = self.keras.layers.convolutional.Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) + return self.keras.models.Model(input_, [x,y]) diff --git a/models/Model_H128/__init__.py b/models/Model_H128/__init__.py new file mode 100644 index 0000000..cdb3fe7 --- /dev/null +++ b/models/Model_H128/__init__.py @@ -0,0 +1 @@ +from .Model import Model \ No newline at end of file diff --git a/models/Model_H64/Model.py b/models/Model_H64/Model.py new file mode 100644 index 0000000..611532f --- /dev/null +++ b/models/Model_H64/Model.py @@ -0,0 +1,167 @@ +from models import ModelBase +from models import TrainingDataType +import numpy as np + +from nnlib import DSSIMMaskLossClass +from nnlib import conv +from nnlib import upscale +from facelib import FaceType + +class Model(ModelBase): + + encoderH5 = 'encoder.h5' + decoder_srcH5 = 'decoder_src.h5' + decoder_dstH5 = 'decoder_dst.h5' + + #override + def onInitialize(self, **in_options): + self.set_vram_batch_requirements( {2:2,3:4,4:8,5:16,6:32,7:32,8:32,9:48} ) + + ae_input_layer = self.keras.layers.Input(shape=(64, 64, 3)) + mask_layer = self.keras.layers.Input(shape=(64, 64, 1)) #same as output + + self.encoder = self.Encoder(ae_input_layer, self.created_vram_gb) + self.decoder_src = self.Decoder(self.created_vram_gb) + self.decoder_dst = self.Decoder(self.created_vram_gb) + + if not self.is_first_run(): + self.encoder.load_weights (self.get_strpath_storage_for_file(self.encoderH5)) + self.decoder_src.load_weights (self.get_strpath_storage_for_file(self.decoder_srcH5)) + self.decoder_dst.load_weights (self.get_strpath_storage_for_file(self.decoder_dstH5)) + + self.autoencoder_src = self.keras.models.Model([ae_input_layer,mask_layer], self.decoder_src(self.encoder(ae_input_layer))) + self.autoencoder_dst = self.keras.models.Model([ae_input_layer,mask_layer], self.decoder_dst(self.encoder(ae_input_layer))) + + if self.is_training_mode: + self.autoencoder_src, self.autoencoder_dst = self.to_multi_gpu_model_if_possible ( [self.autoencoder_src, self.autoencoder_dst] ) + + optimizer = self.keras.optimizers.Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) + dssimloss = DSSIMMaskLossClass(self.tf)([mask_layer]) + self.autoencoder_src.compile(optimizer=optimizer, loss=[dssimloss, 'mae']) + self.autoencoder_dst.compile(optimizer=optimizer, loss=[dssimloss, 'mae']) + + if self.is_training_mode: + from models import TrainingDataGenerator + f = TrainingDataGenerator.SampleTypeFlags + self.set_training_data_generators ([ + TrainingDataGenerator(TrainingDataType.FACE, self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.HALF_FACE | f.MODE_BGR, 64], [f.TRANSFORMED | f.HALF_FACE | f.MODE_BGR, 64], [f.TRANSFORMED | f.HALF_FACE | f.MODE_M | f.MASK_FULL, 64] ], random_flip=True ), + TrainingDataGenerator(TrainingDataType.FACE, self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.HALF_FACE | f.MODE_BGR, 64], [f.TRANSFORMED | f.HALF_FACE | f.MODE_BGR, 64], [f.TRANSFORMED | f.HALF_FACE | f.MODE_M | f.MASK_FULL, 64] ], random_flip=True ) + ]) + + #override + def onSave(self): + self.save_weights_safe( [[self.encoder, self.get_strpath_storage_for_file(self.encoderH5)], + [self.decoder_src, self.get_strpath_storage_for_file(self.decoder_srcH5)], + [self.decoder_dst, self.get_strpath_storage_for_file(self.decoder_dstH5)]] ) + + #override + def onTrainOneEpoch(self, sample): + warped_src, target_src, target_src_full_mask = sample[0] + warped_dst, target_dst, target_dst_full_mask = sample[1] + + loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_full_mask], [target_src, target_src_full_mask] ) + loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_full_mask], [target_dst, target_dst_full_mask] ) + + return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) + AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) + BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + AA[i], + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('H64', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + + face_64_bgr = face[...,0:3] + face_64_mask = np.expand_dims(face[...,3],-1) + + x, mx = self.autoencoder_src.predict ( [ np.expand_dims(face_64_bgr,0), np.expand_dims(face_64_mask,0) ] ) + x, mx = x[0], mx[0] + + return np.concatenate ( (x,mx), -1 ) + + #override + def get_converter(self, **in_options): + from models import ConverterMasked + + if 'masked_hist_match' not in in_options.keys() or in_options['masked_hist_match'] is None: + in_options['masked_hist_match'] = True + + if 'erode_mask_modifier' not in in_options.keys(): + in_options['erode_mask_modifier'] = 0 + in_options['erode_mask_modifier'] += 100 + + if 'blur_mask_modifier' not in in_options.keys(): + in_options['blur_mask_modifier'] = 0 + in_options['blur_mask_modifier'] += 100 + + return ConverterMasked(self.predictor_func, predictor_input_size=64, output_size=64, face_type=FaceType.HALF, **in_options) + + def Encoder(self, input_layer, created_vram_gb): + x = input_layer + if created_vram_gb >= 4: + x = conv(self.keras, x, 128) + x = conv(self.keras, x, 256) + x = conv(self.keras, x, 512) + x = conv(self.keras, x, 1024) + x = self.keras.layers.Dense(1024)(self.keras.layers.Flatten()(x)) + x = self.keras.layers.Dense(4 * 4 * 1024)(x) + x = self.keras.layers.Reshape((4, 4, 1024))(x) + x = upscale(self.keras, x, 512) + else: + x = conv(self.keras, x, 128 ) + x = conv(self.keras, x, 256 ) + x = conv(self.keras, x, 512 ) + x = conv(self.keras, x, 768 ) + x = self.keras.layers.Dense(512)(self.keras.layers.Flatten()(x)) + x = self.keras.layers.Dense(4 * 4 * 512)(x) + x = self.keras.layers.Reshape((4, 4, 512))(x) + x = upscale(self.keras, x, 256) + + return self.keras.models.Model(input_layer, x) + + def Decoder(self, created_vram_gb): + if created_vram_gb >= 4: + input_ = self.keras.layers.Input(shape=(8, 8, 512)) + else: + input_ = self.keras.layers.Input(shape=(8, 8, 256)) + + x = input_ + x = upscale(self.keras, x, 256) + x = upscale(self.keras, x, 128) + x = upscale(self.keras, x, 64) + + y = input_ #mask decoder + y = upscale(self.keras, y, 256) + y = upscale(self.keras, y, 128) + y = upscale(self.keras, y, 64) + + x = self.keras.layers.convolutional.Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + y = self.keras.layers.convolutional.Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) + + + return self.keras.models.Model(input_, [x,y]) diff --git a/models/Model_H64/__init__.py b/models/Model_H64/__init__.py new file mode 100644 index 0000000..cdb3fe7 --- /dev/null +++ b/models/Model_H64/__init__.py @@ -0,0 +1 @@ +from .Model import Model \ No newline at end of file diff --git a/models/Model_LIAEF128/Model.py b/models/Model_LIAEF128/Model.py new file mode 100644 index 0000000..df5105c --- /dev/null +++ b/models/Model_LIAEF128/Model.py @@ -0,0 +1,164 @@ +from models import ModelBase +from models import TrainingDataType +import numpy as np +import cv2 + +from nnlib import DSSIMMaskLossClass +from nnlib import conv +from nnlib import upscale +from facelib import FaceType + +class Model(ModelBase): + + encoderH5 = 'encoder.h5' + decoderH5 = 'decoder.h5' + inter_BH5 = 'inter_B.h5' + inter_ABH5 = 'inter_AB.h5' + + #override + def onInitialize(self, **in_options): + self.set_vram_batch_requirements( {5:4,6:8,7:12,8:16,9:20,10:24,11:24,12:32,13:48} ) + + ae_input_layer = self.keras.layers.Input(shape=(128, 128, 3)) + mask_layer = self.keras.layers.Input(shape=(128, 128, 1)) #same as output + + self.encoder = self.Encoder(ae_input_layer) + self.decoder = self.Decoder() + self.inter_B = self.Intermediate () + self.inter_AB = self.Intermediate () + + if not self.is_first_run(): + self.encoder.load_weights (self.get_strpath_storage_for_file(self.encoderH5)) + self.decoder.load_weights (self.get_strpath_storage_for_file(self.decoderH5)) + self.inter_B.load_weights (self.get_strpath_storage_for_file(self.inter_BH5)) + self.inter_AB.load_weights (self.get_strpath_storage_for_file(self.inter_ABH5)) + + code = self.encoder(ae_input_layer) + AB = self.inter_AB(code) + B = self.inter_B(code) + self.autoencoder_src = self.keras.models.Model([ae_input_layer,mask_layer], self.decoder(self.keras.layers.Concatenate()([AB, AB])) ) + self.autoencoder_dst = self.keras.models.Model([ae_input_layer,mask_layer], self.decoder(self.keras.layers.Concatenate()([B, AB])) ) + + if self.is_training_mode: + self.autoencoder_src, self.autoencoder_dst = self.to_multi_gpu_model_if_possible ( [self.autoencoder_src, self.autoencoder_dst] ) + + optimizer = self.keras.optimizers.Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) + dssimloss = DSSIMMaskLossClass(self.tf)([mask_layer]) + self.autoencoder_src.compile(optimizer=optimizer, loss=[dssimloss, 'mse'] ) + self.autoencoder_dst.compile(optimizer=optimizer, loss=[dssimloss, 'mse'] ) + + if self.is_training_mode: + from models import TrainingDataGenerator + f = TrainingDataGenerator.SampleTypeFlags + self.set_training_data_generators ([ + TrainingDataGenerator(TrainingDataType.FACE, self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_M | f.MASK_FULL, 128] ], random_flip=True ), + TrainingDataGenerator(TrainingDataType.FACE, self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_M | f.MASK_FULL, 128] ], random_flip=True ) + ]) + + #override + def onSave(self): + self.save_weights_safe( [[self.encoder, self.get_strpath_storage_for_file(self.encoderH5)], + [self.decoder, self.get_strpath_storage_for_file(self.decoderH5)], + [self.inter_B, self.get_strpath_storage_for_file(self.inter_BH5)], + [self.inter_AB, self.get_strpath_storage_for_file(self.inter_ABH5)]] ) + + #override + def onTrainOneEpoch(self, sample): + warped_src, target_src, target_src_mask = sample[0] + warped_dst, target_dst, target_dst_mask = sample[1] + + loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) + loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) + + return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) + + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] #first 4 samples + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) + AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) + BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + AA[i], + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('LIAEF128', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + + face_128_bgr = face[...,0:3] + face_128_mask = np.expand_dims(face[...,3],-1) + + x, mx = self.autoencoder_src.predict ( [ np.expand_dims(face_128_bgr,0), np.expand_dims(face_128_mask,0) ] ) + x, mx = x[0], mx[0] + + return np.concatenate ( (x,mx), -1 ) + + #override + def get_converter(self, **in_options): + from models import ConverterMasked + + if 'masked_hist_match' not in in_options.keys() or in_options['masked_hist_match'] is None: + in_options['masked_hist_match'] = True + + if 'erode_mask_modifier' not in in_options.keys(): + in_options['erode_mask_modifier'] = 0 + in_options['erode_mask_modifier'] += 30 + + if 'blur_mask_modifier' not in in_options.keys(): + in_options['blur_mask_modifier'] = 0 + + return ConverterMasked(self.predictor_func, predictor_input_size=128, output_size=128, face_type=FaceType.FULL, clip_border_mask_per=0.046875, **in_options) + + def Encoder(self, input_layer,): + x = input_layer + x = conv(self.keras, x, 128) + x = conv(self.keras, x, 256) + x = conv(self.keras, x, 512) + x = conv(self.keras, x, 1024) + x = self.keras.layers.Flatten()(x) + return self.keras.models.Model(input_layer, x) + + def Intermediate(self): + input_layer = self.keras.layers.Input(shape=(None, 8 * 8 * 1024)) + x = input_layer + x = self.keras.layers.Dense(256)(x) + x = self.keras.layers.Dense(8 * 8 * 512)(x) + x = self.keras.layers.Reshape((8, 8, 512))(x) + x = upscale(self.keras, x, 512) + return self.keras.models.Model(input_layer, x) + + def Decoder(self): + input_ = self.keras.layers.Input(shape=(16, 16, 1024)) + x = input_ + x = upscale(self.keras, x, 512) + x = upscale(self.keras, x, 256) + x = upscale(self.keras, x, 128) + x = self.keras.layers.convolutional.Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + + y = input_ #mask decoder + y = upscale(self.keras, y, 512) + y = upscale(self.keras, y, 256) + y = upscale(self.keras, y, 128) + y = self.keras.layers.convolutional.Conv2D(1, kernel_size=5, padding='same', activation='sigmoid' )(y) + + return self.keras.models.Model(input_, [x,y]) diff --git a/models/Model_LIAEF128/__init__.py b/models/Model_LIAEF128/__init__.py new file mode 100644 index 0000000..cdb3fe7 --- /dev/null +++ b/models/Model_LIAEF128/__init__.py @@ -0,0 +1 @@ +from .Model import Model \ No newline at end of file diff --git a/models/Model_LIAEF128YAW/Model.py b/models/Model_LIAEF128YAW/Model.py new file mode 100644 index 0000000..aa7f668 --- /dev/null +++ b/models/Model_LIAEF128YAW/Model.py @@ -0,0 +1,164 @@ +from models import ModelBase +from models import TrainingDataType +import numpy as np +import cv2 + +from nnlib import DSSIMMaskLossClass +from nnlib import conv +from nnlib import upscale +from facelib import FaceType + +class Model(ModelBase): + + encoderH5 = 'encoder.h5' + decoderH5 = 'decoder.h5' + inter_BH5 = 'inter_B.h5' + inter_ABH5 = 'inter_AB.h5' + + #override + def onInitialize(self, **in_options): + self.set_vram_batch_requirements( {5:4,6:8,7:12,8:16,9:20,10:24,11:24,12:32,13:48} ) + + ae_input_layer = self.keras.layers.Input(shape=(128, 128, 3)) + mask_layer = self.keras.layers.Input(shape=(128, 128, 1)) #same as output + + self.encoder = self.Encoder(ae_input_layer) + self.decoder = self.Decoder() + self.inter_B = self.Intermediate () + self.inter_AB = self.Intermediate () + + if not self.is_first_run(): + self.encoder.load_weights (self.get_strpath_storage_for_file(self.encoderH5)) + self.decoder.load_weights (self.get_strpath_storage_for_file(self.decoderH5)) + self.inter_B.load_weights (self.get_strpath_storage_for_file(self.inter_BH5)) + self.inter_AB.load_weights (self.get_strpath_storage_for_file(self.inter_ABH5)) + + code = self.encoder(ae_input_layer) + AB = self.inter_AB(code) + B = self.inter_B(code) + self.autoencoder_src = self.keras.models.Model([ae_input_layer,mask_layer], self.decoder(self.keras.layers.Concatenate()([AB, AB])) ) + self.autoencoder_dst = self.keras.models.Model([ae_input_layer,mask_layer], self.decoder(self.keras.layers.Concatenate()([B, AB])) ) + + if self.is_training_mode: + self.autoencoder_src, self.autoencoder_dst = self.to_multi_gpu_model_if_possible ( [self.autoencoder_src, self.autoencoder_dst] ) + + optimizer = self.keras.optimizers.Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) + dssimloss = DSSIMMaskLossClass(self.tf)([mask_layer]) + self.autoencoder_src.compile(optimizer=optimizer, loss=[dssimloss, 'mse'] ) + self.autoencoder_dst.compile(optimizer=optimizer, loss=[dssimloss, 'mse'] ) + + if self.is_training_mode: + from models import TrainingDataGenerator + f = TrainingDataGenerator.SampleTypeFlags + self.set_training_data_generators ([ + TrainingDataGenerator(TrainingDataType.FACE_YAW_SORTED_AS_TARGET, self.training_data_src_path, target_training_data_path=self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_M | f.MASK_FULL, 128] ], random_flip=True ), + TrainingDataGenerator(TrainingDataType.FACE, self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_M | f.MASK_FULL, 128] ], random_flip=True ) + ]) + + #override + def onSave(self): + self.save_weights_safe( [[self.encoder, self.get_strpath_storage_for_file(self.encoderH5)], + [self.decoder, self.get_strpath_storage_for_file(self.decoderH5)], + [self.inter_B, self.get_strpath_storage_for_file(self.inter_BH5)], + [self.inter_AB, self.get_strpath_storage_for_file(self.inter_ABH5)]] ) + + #override + def onTrainOneEpoch(self, sample): + warped_src, target_src, target_src_mask = sample[0] + warped_dst, target_dst, target_dst_mask = sample[1] + + loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) + loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) + + return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) + + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] #first 4 samples + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) + AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) + BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + test_A[i,:,:,0:3], + AA[i], + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('LIAEF128YAW', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + + face_128_bgr = face[...,0:3] + face_128_mask = np.expand_dims(face[...,3],-1) + + x, mx = self.autoencoder_src.predict ( [ np.expand_dims(face_128_bgr,0), np.expand_dims(face_128_mask,0) ] ) + x, mx = x[0], mx[0] + + return np.concatenate ( (x,mx), -1 ) + + #override + def get_converter(self, **in_options): + from models import ConverterMasked + + if 'masked_hist_match' not in in_options.keys() or in_options['masked_hist_match'] is None: + in_options['masked_hist_match'] = True + + if 'erode_mask_modifier' not in in_options.keys(): + in_options['erode_mask_modifier'] = 0 + in_options['erode_mask_modifier'] += 30 + + if 'blur_mask_modifier' not in in_options.keys(): + in_options['blur_mask_modifier'] = 0 + + return ConverterMasked(self.predictor_func, predictor_input_size=128, output_size=128, face_type=FaceType.FULL, clip_border_mask_per=0.046875, **in_options) + + def Encoder(self, input_layer,): + x = input_layer + x = conv(self.keras, x, 128) + x = conv(self.keras, x, 256) + x = conv(self.keras, x, 512) + x = conv(self.keras, x, 1024) + x = self.keras.layers.Flatten()(x) + return self.keras.models.Model(input_layer, x) + + def Intermediate(self): + input_layer = self.keras.layers.Input(shape=(None, 8 * 8 * 1024)) + x = input_layer + x = self.keras.layers.Dense(256)(x) + x = self.keras.layers.Dense(8 * 8 * 512)(x) + x = self.keras.layers.Reshape((8, 8, 512))(x) + x = upscale(self.keras, x, 512) + return self.keras.models.Model(input_layer, x) + + def Decoder(self): + input_ = self.keras.layers.Input(shape=(16, 16, 1024)) + x = input_ + x = upscale(self.keras, x, 512) + x = upscale(self.keras, x, 256) + x = upscale(self.keras, x, 128) + x = self.keras.layers.convolutional.Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + + y = input_ #mask decoder + y = upscale(self.keras, y, 512) + y = upscale(self.keras, y, 256) + y = upscale(self.keras, y, 128) + y = self.keras.layers.convolutional.Conv2D(1, kernel_size=5, padding='same', activation='sigmoid' )(y) + + return self.keras.models.Model(input_, [x,y]) diff --git a/models/Model_LIAEF128YAW/__init__.py b/models/Model_LIAEF128YAW/__init__.py new file mode 100644 index 0000000..cdb3fe7 --- /dev/null +++ b/models/Model_LIAEF128YAW/__init__.py @@ -0,0 +1 @@ +from .Model import Model \ No newline at end of file diff --git a/models/Model_MIAEF128/Model.py b/models/Model_MIAEF128/Model.py new file mode 100644 index 0000000..9c09fe0 --- /dev/null +++ b/models/Model_MIAEF128/Model.py @@ -0,0 +1,217 @@ +from models import ModelBase +from models import TrainingDataType +import numpy as np +import cv2 + +from nnlib import DSSIMMaskLossClass +from nnlib import conv +from nnlib import upscale +from facelib import FaceType + +class Model(ModelBase): + + encoderH5 = 'encoder.h5' + decoderMaskH5 = 'decoderMask.h5' + decoderCommonAH5 = 'decoderCommonA.h5' + decoderCommonBH5 = 'decoderCommonB.h5' + decoderRGBH5 = 'decoderRGB.h5' + decoderBWH5 = 'decoderBW.h5' + inter_BH5 = 'inter_B.h5' + inter_AH5 = 'inter_A.h5' + + #override + def onInitialize(self, **in_options): + self.set_vram_batch_requirements( {5:4,6:8,7:12,8:16,9:20,10:24,11:24,12:32,13:48} ) + + ae_input_layer = self.keras.layers.Input(shape=(128, 128, 3)) + mask_layer = self.keras.layers.Input(shape=(128, 128, 1)) #same as output + + self.encoder = self.Encoder(ae_input_layer) + self.decoderMask = self.DecoderMask() + self.decoderCommonA = self.DecoderCommon() + self.decoderCommonB = self.DecoderCommon() + self.decoderRGB = self.DecoderRGB() + self.decoderBW = self.DecoderBW() + self.inter_A = self.Intermediate () + self.inter_B = self.Intermediate () + + if not self.is_first_run(): + self.encoder.load_weights (self.get_strpath_storage_for_file(self.encoderH5)) + self.decoderMask.load_weights (self.get_strpath_storage_for_file(self.decoderMaskH5)) + self.decoderCommonA.load_weights (self.get_strpath_storage_for_file(self.decoderCommonAH5)) + self.decoderCommonB.load_weights (self.get_strpath_storage_for_file(self.decoderCommonBH5)) + self.decoderRGB.load_weights (self.get_strpath_storage_for_file(self.decoderRGBH5)) + self.decoderBW.load_weights (self.get_strpath_storage_for_file(self.decoderBWH5)) + self.inter_A.load_weights (self.get_strpath_storage_for_file(self.inter_AH5)) + self.inter_B.load_weights (self.get_strpath_storage_for_file(self.inter_BH5)) + + code = self.encoder(ae_input_layer) + A = self.inter_A(code) + B = self.inter_B(code) + + inter_A_A = self.keras.layers.Concatenate()([A, A]) + inter_B_A = self.keras.layers.Concatenate()([B, A]) + + x1,m1 = self.decoderCommonA (inter_A_A) + x2,m2 = self.decoderCommonA (inter_A_A) + self.autoencoder_src = self.keras.models.Model([ae_input_layer,mask_layer], + [ self.decoderBW (self.keras.layers.Concatenate()([x1,x2]) ), + self.decoderMask(self.keras.layers.Concatenate()([m1,m2]) ) + ]) + + x1,m1 = self.decoderCommonA (inter_A_A) + x2,m2 = self.decoderCommonB (inter_A_A) + self.autoencoder_src_RGB = self.keras.models.Model([ae_input_layer,mask_layer], + [ self.decoderRGB (self.keras.layers.Concatenate()([x1,x2]) ), + self.decoderMask (self.keras.layers.Concatenate()([m1,m2]) ) + ]) + + x1,m1 = self.decoderCommonA (inter_B_A) + x2,m2 = self.decoderCommonB (inter_B_A) + self.autoencoder_dst = self.keras.models.Model([ae_input_layer,mask_layer], + [ self.decoderRGB (self.keras.layers.Concatenate()([x1,x2]) ), + self.decoderMask (self.keras.layers.Concatenate()([m1,m2]) ) + ]) + + if self.is_training_mode: + self.autoencoder_src, self.autoencoder_dst = self.to_multi_gpu_model_if_possible ( [self.autoencoder_src, self.autoencoder_dst] ) + + optimizer = self.keras.optimizers.Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) + dssimloss = DSSIMMaskLossClass(self.tf)([mask_layer]) + self.autoencoder_src.compile(optimizer=optimizer, loss=[dssimloss, 'mse'] ) + self.autoencoder_dst.compile(optimizer=optimizer, loss=[dssimloss, 'mse'] ) + + if self.is_training_mode: + from models import TrainingDataGenerator + f = TrainingDataGenerator.SampleTypeFlags + self.set_training_data_generators ([ + TrainingDataGenerator(TrainingDataType.FACE, self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.FULL_FACE | f.MODE_GGG, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_G , 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_M | f.MASK_FULL, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_GGG, 128] ], random_flip=True ), + TrainingDataGenerator(TrainingDataType.FACE, self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_BGR, 128], [f.TRANSFORMED | f.FULL_FACE | f.MODE_M | f.MASK_FULL, 128]], random_flip=True ) + ]) + #override + def onSave(self): + self.save_weights_safe( [[self.encoder, self.get_strpath_storage_for_file(self.encoderH5)], + [self.decoderMask, self.get_strpath_storage_for_file(self.decoderMaskH5)], + [self.decoderCommonA, self.get_strpath_storage_for_file(self.decoderCommonAH5)], + [self.decoderCommonB, self.get_strpath_storage_for_file(self.decoderCommonBH5)], + [self.decoderRGB, self.get_strpath_storage_for_file(self.decoderRGBH5)], + [self.decoderBW, self.get_strpath_storage_for_file(self.decoderBWH5)], + [self.inter_A, self.get_strpath_storage_for_file(self.inter_AH5)], + [self.inter_B, self.get_strpath_storage_for_file(self.inter_BH5)]] ) + + + #override + def onTrainOneEpoch(self, sample): + warped_src, target_src, target_src_mask, target_src_GGG = sample[0] + warped_dst, target_dst, target_dst_mask = sample[1] + + loss_src = self.autoencoder_src.train_on_batch( [ warped_src, target_src_mask], [ target_src, target_src_mask] ) + loss_dst = self.autoencoder_dst.train_on_batch( [ warped_dst, target_dst_mask], [ target_dst, target_dst_mask] ) + + return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) + + #override + def onGetPreview(self, sample): + test_A = sample[0][3][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] #first 4 samples + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) + AB, mAB = self.autoencoder_src_RGB.predict([test_B, test_B_m]) + BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) + + mAA = np.repeat ( mAA, (3,), -1) + mAB = np.repeat ( mAB, (3,), -1) + mBB = np.repeat ( mBB, (3,), -1) + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + np.repeat (np.expand_dims (test_A[i,:,:,0],-1), (3,), -1) , + np.repeat (AA[i], (3,), -1), + #mAA[i], + test_B[i,:,:,0:3], + BB[i], + #mBB[i], + AB[i], + #mAB[i] + ), axis=1) ) + + return [ ('MIAEF128', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + face_128_bgr = face[...,0:3] + face_128_mask = np.expand_dims(face[...,-1],-1) + + x, mx = self.autoencoder_src_RGB.predict ( [ np.expand_dims(face_128_bgr,0), np.expand_dims(face_128_mask,0) ] ) + x, mx = x[0], mx[0] + + return np.concatenate ( (x,mx), -1 ) + + #override + def get_converter(self, **in_options): + from models import ConverterMasked + + if 'masked_hist_match' not in in_options.keys() or in_options['masked_hist_match'] is None: + in_options['masked_hist_match'] = False + + if 'erode_mask_modifier' not in in_options.keys(): + in_options['erode_mask_modifier'] = 0 + in_options['erode_mask_modifier'] += 30 + + if 'blur_mask_modifier' not in in_options.keys(): + in_options['blur_mask_modifier'] = 0 + + return ConverterMasked(self.predictor_func, predictor_input_size=128, output_size=128, face_type=FaceType.FULL, clip_border_mask_per=0.046875, **in_options) + + + def Encoder(self, input_layer,): + x = input_layer + x = conv(self.keras, x, 128) + x = conv(self.keras, x, 256) + x = conv(self.keras, x, 512) + x = conv(self.keras, x, 1024) + x = self.keras.layers.Flatten()(x) + return self.keras.models.Model(input_layer, x) + + def Intermediate(self): + input_layer = self.keras.layers.Input(shape=(None, 8 * 8 * 1024)) + x = input_layer + x = self.keras.layers.Dense(256)(x) + x = self.keras.layers.Dense(8 * 8 * 512)(x) + x = self.keras.layers.Reshape((8, 8, 512))(x) + x = upscale(self.keras, x, 512) + return self.keras.models.Model(input_layer, x) + + def DecoderCommon(self): + input_ = self.keras.layers.Input(shape=(16, 16, 1024)) + x = input_ + x = upscale(self.keras, x, 512) + x = upscale(self.keras, x, 256) + x = upscale(self.keras, x, 128) + + y = input_ + y = upscale(self.keras, y, 256) + y = upscale(self.keras, y, 128) + y = upscale(self.keras, y, 64) + + return self.keras.models.Model(input_, [x,y]) + + def DecoderRGB(self): + input_ = self.keras.layers.Input(shape=(128, 128, 256)) + x = input_ + x = self.keras.layers.convolutional.Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) + return self.keras.models.Model(input_, [x]) + + def DecoderBW(self): + input_ = self.keras.layers.Input(shape=(128, 128, 256)) + x = input_ + x = self.keras.layers.convolutional.Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(x) + return self.keras.models.Model(input_, [x]) + + def DecoderMask(self): + input_ = self.keras.layers.Input(shape=(128, 128, 128)) + y = input_ + y = self.keras.layers.convolutional.Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) + return self.keras.models.Model(input_, [y]) diff --git a/models/Model_MIAEF128/__init__.py b/models/Model_MIAEF128/__init__.py new file mode 100644 index 0000000..cdb3fe7 --- /dev/null +++ b/models/Model_MIAEF128/__init__.py @@ -0,0 +1 @@ +from .Model import Model \ No newline at end of file diff --git a/models/TrainingDataGenerator.py b/models/TrainingDataGenerator.py new file mode 100644 index 0000000..624abfa --- /dev/null +++ b/models/TrainingDataGenerator.py @@ -0,0 +1,149 @@ +from facelib import FaceType +from facelib import LandmarksProcessor +import cv2 +import numpy as np +from models import TrainingDataGeneratorBase +from utils import image_utils +from utils import random_utils +from enum import IntEnum +from models import TrainingDataType + +class TrainingDataGenerator(TrainingDataGeneratorBase): + class SampleTypeFlags(IntEnum): + SOURCE = 0x000001, + WARPED = 0x000002, + WARPED_TRANSFORMED = 0x000004, + TRANSFORMED = 0x000008, + + HALF_FACE = 0x000010, + FULL_FACE = 0x000020, + HEAD_FACE = 0x000040, + AVATAR_FACE = 0x000080, + MARK_ONLY_FACE = 0x000100, + + MODE_BGR = 0x001000, #BGR + MODE_G = 0x002000, #Grayscale + MODE_GGG = 0x004000, #3xGrayscale + MODE_M = 0x008000, #mask only + MODE_BGR_SHUFFLE = 0x010000, #BGR shuffle + + MASK_FULL = 0x100000, + MASK_EYES = 0x200000, + + #overrided + def onInitialize(self, random_flip=False, normalize_tanh=False, rotation_range=[-10,10], scale_range=[-0.05, 0.05], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05], output_sample_types=[], **kwargs): + self.random_flip = random_flip + self.normalize_tanh = normalize_tanh + self.output_sample_types = output_sample_types + self.rotation_range = rotation_range + self.scale_range = scale_range + self.tx_range = tx_range + self.ty_range = ty_range + + #overrided + def onProcessSample(self, sample, debug): + source = sample.load_bgr() + h,w,c = source.shape + + is_face_sample = self.trainingdatatype >= TrainingDataType.FACE_BEGIN and self.trainingdatatype <= TrainingDataType.FACE_END + + if debug and is_face_sample: + LandmarksProcessor.draw_landmarks (source, sample.landmarks, (0, 1, 0)) + + params = image_utils.gen_warp_params(source, self.random_flip, rotation_range=self.rotation_range, scale_range=self.scale_range, tx_range=self.tx_range, ty_range=self.ty_range ) + + images = [[None]*3 for _ in range(4)] + + outputs = [] + for t,size in self.output_sample_types: + if t & self.SampleTypeFlags.SOURCE != 0: + img_type = 0 + elif t & self.SampleTypeFlags.WARPED != 0: + img_type = 1 + elif t & self.SampleTypeFlags.WARPED_TRANSFORMED != 0: + img_type = 2 + elif t & self.SampleTypeFlags.TRANSFORMED != 0: + img_type = 3 + else: + raise ValueError ('expected SampleTypeFlags type') + + mask_type = 0 + if t & self.SampleTypeFlags.MASK_FULL != 0: + mask_type = 1 + elif t & self.SampleTypeFlags.MASK_EYES != 0: + mask_type = 2 + + if images[img_type][mask_type] is None: + img = source + if is_face_sample: + if mask_type == 1: + img = np.concatenate( (img, LandmarksProcessor.get_image_hull_mask (source, sample.landmarks) ), -1 ) + elif mask_type == 2: + mask = LandmarksProcessor.get_image_eye_mask (source, sample.landmarks) + mask = np.expand_dims (cv2.blur (mask, ( w // 32, w // 32 ) ), -1) + mask[mask > 0.0] = 1.0 + img = np.concatenate( (img, mask ), -1 ) + + images[img_type][mask_type] = image_utils.warp_by_params (params, img, (img_type==1 or img_type==2), (img_type==2 or img_type==3), img_type != 0) + + img = images[img_type][mask_type] + + target_face_type = -1 + if t & self.SampleTypeFlags.HALF_FACE != 0: + target_face_type = FaceType.HALF + elif t & self.SampleTypeFlags.FULL_FACE != 0: + target_face_type = FaceType.FULL + elif t & self.SampleTypeFlags.HEAD_FACE != 0: + target_face_type = FaceType.HEAD + elif t & self.SampleTypeFlags.AVATAR_FACE != 0: + target_face_type = FaceType.AVATAR + elif t & self.SampleTypeFlags.MARK_ONLY_FACE != 0: + target_face_type = FaceType.MARK_ONLY + + if is_face_sample and target_face_type != -1 and target_face_type != FaceType.MARK_ONLY: + if target_face_type > sample.face_type: + raise Exception ('sample %s type %s does not match model requirement %s. Consider extract necessary type of faces.' % (sample.filename, sample.face_type, target_face_type) ) + + img = cv2.warpAffine( img, LandmarksProcessor.get_transform_mat (sample.landmarks, size, target_face_type), (size,size), flags=cv2.INTER_LANCZOS4 ) + else: + img = cv2.resize( img, (size,size), cv2.INTER_LANCZOS4 ) + + img_bgr = img[...,0:3] + img_mask = img[...,3:4] + + if t & self.SampleTypeFlags.MODE_BGR != 0: + img = img + elif t & self.SampleTypeFlags.MODE_BGR_SHUFFLE != 0: + img_bgr = np.take (img_bgr, np.random.permutation(img_bgr.shape[-1]), axis=-1) + img = np.concatenate ( (img_bgr,img_mask) , -1 ) + elif t & self.SampleTypeFlags.MODE_G != 0: + img = np.concatenate ( (np.expand_dims(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY),-1),img_mask) , -1 ) + elif t & self.SampleTypeFlags.MODE_GGG != 0: + img = np.concatenate ( ( np.repeat ( np.expand_dims(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY),-1), (3,), -1), img_mask), -1) + elif is_face_sample and t & self.SampleTypeFlags.MODE_M != 0: + if mask_type== 0: + raise ValueError ('no mask mode defined') + img = img_mask + else: + raise ValueError ('expected SampleTypeFlags mode') + + if not debug and self.normalize_tanh: + img = img * 2.0 - 1.0 + + outputs.append ( img ) + + if debug: + result = () + + for output in outputs: + if output.shape[2] < 4: + result += (output,) + elif output.shape[2] == 4: + result += (output[...,0:3]*output[...,3:4],) + + return result + else: + return outputs + + + \ No newline at end of file diff --git a/models/TrainingDataGeneratorBase.py b/models/TrainingDataGeneratorBase.py new file mode 100644 index 0000000..f6a1df0 --- /dev/null +++ b/models/TrainingDataGeneratorBase.py @@ -0,0 +1,245 @@ +import traceback +import random +from pathlib import Path +from tqdm import tqdm +import numpy as np +import cv2 +from utils.AlignedPNG import AlignedPNG +from utils import iter_utils +from utils import Path_utils +from .BaseTypes import TrainingDataType +from .BaseTypes import TrainingDataSample +from facelib import FaceType +from facelib import LandmarksProcessor + +''' +You can implement your own TrainingDataGenerator +''' +class TrainingDataGeneratorBase(object): + cache = dict() + + #DONT OVERRIDE + #use YourOwnTrainingDataGenerator (..., your_opt=1) + #and then this opt will be passed in YourOwnTrainingDataGenerator.onInitialize ( your_opt ) + def __init__ (self, trainingdatatype, training_data_path, target_training_data_path=None, debug=False, batch_size=1, **kwargs): + if not isinstance(trainingdatatype, TrainingDataType): + raise Exception('TrainingDataGeneratorBase() trainingdatatype is not TrainingDataType') + + if training_data_path is None: + raise Exception('training_data_path is None') + + self.training_data_path = Path(training_data_path) + self.target_training_data_path = Path(target_training_data_path) if target_training_data_path is not None else None + + self.debug = debug + self.batch_size = 1 if self.debug else batch_size + self.trainingdatatype = trainingdatatype + self.data = TrainingDataGeneratorBase.load (trainingdatatype, self.training_data_path, self.target_training_data_path) + + if self.debug: + self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, self.data)] + else: + if len(self.data) > 1: + self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, self.data[0::2] ), + iter_utils.SubprocessGenerator ( self.batch_func, self.data[1::2] )] + else: + self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, self.data )] + + self.generator_counter = -1 + self.onInitialize(**kwargs) + + #overridable + def onInitialize(self, **kwargs): + #your TrainingDataGenerator initialization here + pass + + #overridable + def onProcessSample(self, sample, debug): + #process sample and return tuple of images for your model in onTrainOneEpoch + return ( np.zeros( (64,64,4), dtype=np.float32 ), ) + + def __iter__(self): + return self + + def __next__(self): + self.generator_counter += 1 + generator = self.generators[self.generator_counter % len(self.generators) ] + x = next(generator) + return x + + def batch_func(self, data): + data_len = len(data) + if data_len == 0: + raise ValueError('No training data provided.') + + if self.trainingdatatype == TrainingDataType.FACE_YAW_SORTED or self.trainingdatatype == TrainingDataType.FACE_YAW_SORTED_AS_TARGET: + if all ( [ x == None for x in data] ): + raise ValueError('Not enough training data. Gather more faces!') + + if self.trainingdatatype == TrainingDataType.IMAGE or self.trainingdatatype == TrainingDataType.FACE: + shuffle_idxs = [] + elif self.trainingdatatype == TrainingDataType.FACE_YAW_SORTED or self.trainingdatatype == TrainingDataType.FACE_YAW_SORTED_AS_TARGET: + shuffle_idxs = [] + shuffle_idxs_2D = [[]]*data_len + + while True: + + batches = None + for n_batch in range(0, self.batch_size): + while True: + sample = None + + if self.trainingdatatype == TrainingDataType.IMAGE or self.trainingdatatype == TrainingDataType.FACE: + if len(shuffle_idxs) == 0: + shuffle_idxs = [ i for i in range(0, data_len) ] + random.shuffle(shuffle_idxs) + idx = shuffle_idxs.pop() + sample = data[ idx ] + elif self.trainingdatatype == TrainingDataType.FACE_YAW_SORTED or self.trainingdatatype == TrainingDataType.FACE_YAW_SORTED_AS_TARGET: + if len(shuffle_idxs) == 0: + shuffle_idxs = [ i for i in range(0, data_len) ] + random.shuffle(shuffle_idxs) + + idx = shuffle_idxs.pop() + if data[idx] != None: + if len(shuffle_idxs_2D[idx]) == 0: + shuffle_idxs_2D[idx] = [ i for i in range(0, len(data[idx])) ] + random.shuffle(shuffle_idxs_2D[idx]) + + idx2 = shuffle_idxs_2D[idx].pop() + sample = data[idx][idx2] + + if sample is not None: + try: + x = self.onProcessSample (sample, self.debug) + except: + raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) + + if type(x) != tuple and type(x) != list: + raise Exception('TrainingDataGenerator.onProcessSample() returns NOT tuple/list') + + x_len = len(x) + if batches is None: + batches = [ [] for _ in range(0,x_len) ] + + for i in range(0,x_len): + batches[i].append ( x[i] ) + + break + + yield [ np.array(batch) for batch in batches] + + def get_dict_state(self): + return {} + + def set_dict_state(self, state): + pass + + @staticmethod + def load(trainingdatatype, training_data_path, target_training_data_path=None): + cache = TrainingDataGeneratorBase.cache + + if str(training_data_path) not in cache.keys(): + cache[str(training_data_path)] = [None]*TrainingDataType.QTY + + if target_training_data_path is not None and str(target_training_data_path) not in cache.keys(): + cache[str(target_training_data_path)] = [None]*TrainingDataType.QTY + + datas = cache[str(training_data_path)] + + if trainingdatatype == TrainingDataType.IMAGE: + if datas[trainingdatatype] is None: + datas[trainingdatatype] = [ TrainingDataSample(filename=filename) for filename in tqdm( Path_utils.get_image_paths(training_data_path), desc="Loading" ) ] + + elif trainingdatatype == TrainingDataType.FACE: + if datas[trainingdatatype] is None: + datas[trainingdatatype] = X_LOAD( [ TrainingDataSample(filename=filename) for filename in Path_utils.get_image_paths(training_data_path) ] ) + + elif trainingdatatype == TrainingDataType.FACE_YAW_SORTED: + if datas[trainingdatatype] is None: + datas[trainingdatatype] = X_YAW_SORTED( TrainingDataGeneratorBase.load(TrainingDataType.FACE, training_data_path) ) + + elif trainingdatatype == TrainingDataType.FACE_YAW_SORTED_AS_TARGET: + if datas[trainingdatatype] is None: + if target_training_data_path is None: + raise Exception('target_training_data_path is None for FACE_YAW_SORTED_AS_TARGET') + datas[trainingdatatype] = X_YAW_AS_Y_SORTED( TrainingDataGeneratorBase.load(TrainingDataType.FACE_YAW_SORTED, training_data_path), TrainingDataGeneratorBase.load(TrainingDataType.FACE_YAW_SORTED, target_training_data_path) ) + + return datas[trainingdatatype] + +def X_LOAD ( RAWS ): + sample_list = [] + + for s in tqdm( RAWS, desc="Loading" ): + + s_filename_path = Path(s.filename) + if s_filename_path.suffix != '.png': + print ("%s is not a png file required for training" % (s_filename_path.name) ) + continue + + a_png = AlignedPNG.load ( str(s_filename_path) ) + if a_png is None: + print ("%s failed to load" % (s_filename_path.name) ) + continue + + d = a_png.getFaceswapDictData() + if d is None or d['landmarks'] is None or d['yaw_value'] is None: + print ("%s - no embedded faceswap info found required for training" % (s_filename_path.name) ) + continue + + face_type = d['face_type'] if 'face_type' in d.keys() else 'full_face' + face_type = FaceType.fromString (face_type) + sample_list.append( s.copy_and_set(face_type=face_type, shape=a_png.get_shape(), landmarks=d['landmarks'], yaw=d['yaw_value']) ) + + return sample_list + +def X_YAW_SORTED( YAW_RAWS ): + + lowest_yaw, highest_yaw = -32, +32 + gradations = 64 + diff_rot_per_grad = abs(highest_yaw-lowest_yaw) / gradations + + yaws_sample_list = [None]*gradations + + for i in tqdm( range(0, gradations), desc="Sorting" ): + yaw = lowest_yaw + i*diff_rot_per_grad + next_yaw = lowest_yaw + (i+1)*diff_rot_per_grad + + yaw_samples = [] + for s in YAW_RAWS: + s_yaw = s.yaw + if (i == 0 and s_yaw < next_yaw) or \ + (i < gradations-1 and s_yaw >= yaw and s_yaw < next_yaw) or \ + (i == gradations-1 and s_yaw >= yaw): + yaw_samples.append ( s ) + + if len(yaw_samples) > 0: + yaws_sample_list[i] = yaw_samples + + return yaws_sample_list + +def X_YAW_AS_Y_SORTED (s, t): + l = len(s) + if l != len(t): + raise Exception('X_YAW_AS_Y_SORTED() s_len != t_len') + b = l // 2 + + s_idxs = np.argwhere ( np.array ( [ 1 if x != None else 0 for x in s] ) == 1 )[:,0] + t_idxs = np.argwhere ( np.array ( [ 1 if x != None else 0 for x in t] ) == 1 )[:,0] + + new_s = [None]*l + + for t_idx in t_idxs: + search_idxs = [] + for i in range(0,l): + search_idxs += [t_idx - i, (l-t_idx-1) - i, t_idx + i, (l-t_idx-1) + i] + + for search_idx in search_idxs: + if search_idx in s_idxs: + mirrored = ( t_idx != search_idx and ((t_idx < b and search_idx >= b) or (search_idx < b and t_idx >= b)) ) + new_s[t_idx] = [ sample.copy_and_set(mirror=True, yaw=-sample.yaw, landmarks=LandmarksProcessor.mirror_landmarks (sample.landmarks, sample.shape[1] )) + for sample in s[search_idx] + ] if mirrored else s[search_idx] + break + + return new_s diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..0515eb8 --- /dev/null +++ b/models/__init__.py @@ -0,0 +1,13 @@ +from .BaseTypes import TrainingDataType +from .BaseTypes import TrainingDataSample + +from .ModelBase import ModelBase +from .ConverterBase import ConverterBase +from .ConverterMasked import ConverterMasked +from .ConverterImage import ConverterImage +from .TrainingDataGeneratorBase import TrainingDataGeneratorBase +from .TrainingDataGenerator import TrainingDataGenerator + +def import_model(name): + module = __import__('Model_'+name, globals(), locals(), [], 1) + return getattr(module, 'Model') \ No newline at end of file diff --git a/nnlib/__init__.py b/nnlib/__init__.py new file mode 100644 index 0000000..69fe140 --- /dev/null +++ b/nnlib/__init__.py @@ -0,0 +1,198 @@ +def tf_image_histogram (tf, input): + x = input + x += 1 / 255.0 + + output = [] + for i in range(256, 0, -1): + v = i / 255.0 + y = (x - v) * 1000 + + y = tf.clip_by_value (y, -1.0, 0.0) + 1 + + output.append ( tf.reduce_sum (y) ) + x -= y*v + + return tf.stack ( output[::-1] ) + +def tf_dssim(tf, t1, t2): + return (1.0 - tf.image.ssim (t1, t2, 1.0)) / 2.0 + +def tf_ssim(tf, t1, t2): + return tf.image.ssim (t1, t2, 1.0) + +def DSSIMMaskLossClass(tf): + class DSSIMMaskLoss(object): + def __init__(self, mask_list, is_tanh=False): + self.mask_list = mask_list + self.is_tanh = is_tanh + + def __call__(self,y_true, y_pred): + total_loss = None + for mask in self.mask_list: + + if not self.is_tanh: + loss = (1.0 - tf.image.ssim (y_true*mask, y_pred*mask, 1.0)) / 2.0 + else: + loss = (1.0 - tf.image.ssim ( (y_true/2+0.5)*(mask/2+0.5), (y_pred/2+0.5)*(mask/2+0.5), 1.0)) / 2.0 + + if total_loss is None: + total_loss = loss + else: + total_loss += loss + + return total_loss + + return DSSIMMaskLoss + +def MSEMaskLossClass(keras): + class MSEMaskLoss(object): + def __init__(self, mask_list, is_tanh=False): + self.mask_list = mask_list + self.is_tanh = is_tanh + + def __call__(self,y_true, y_pred): + K = keras.backend + + total_loss = None + for mask in self.mask_list: + + if not self.is_tanh: + loss = K.mean(K.square(y_true*mask - y_pred*mask)) + else: + loss = K.mean(K.square( (y_true/2+0.5)*(mask/2+0.5) - (y_pred/2+0.5)*(mask/2+0.5) )) + + if total_loss is None: + total_loss = loss + else: + total_loss += loss + + return total_loss + + return MSEMaskLoss + +def PixelShufflerClass(keras): + class PixelShuffler(keras.engine.topology.Layer): + def __init__(self, size=(2, 2), data_format=None, **kwargs): + super(PixelShuffler, self).__init__(**kwargs) + self.data_format = keras.utils.conv_utils.normalize_data_format(data_format) + self.size = keras.utils.conv_utils.normalize_tuple(size, 2, 'size') + + def call(self, inputs): + + input_shape = keras.backend.int_shape(inputs) + if len(input_shape) != 4: + raise ValueError('Inputs should have rank ' + + str(4) + + '; Received input shape:', str(input_shape)) + + if self.data_format == 'channels_first': + batch_size, c, h, w = input_shape + if batch_size is None: + batch_size = -1 + rh, rw = self.size + oh, ow = h * rh, w * rw + oc = c // (rh * rw) + + out = keras.backend.reshape(inputs, (batch_size, rh, rw, oc, h, w)) + out = keras.backend.permute_dimensions(out, (0, 3, 4, 1, 5, 2)) + out = keras.backend.reshape(out, (batch_size, oc, oh, ow)) + return out + + elif self.data_format == 'channels_last': + batch_size, h, w, c = input_shape + if batch_size is None: + batch_size = -1 + rh, rw = self.size + oh, ow = h * rh, w * rw + oc = c // (rh * rw) + + out = keras.backend.reshape(inputs, (batch_size, h, w, rh, rw, oc)) + out = keras.backend.permute_dimensions(out, (0, 1, 3, 2, 4, 5)) + out = keras.backend.reshape(out, (batch_size, oh, ow, oc)) + return out + + def compute_output_shape(self, input_shape): + + if len(input_shape) != 4: + raise ValueError('Inputs should have rank ' + + str(4) + + '; Received input shape:', str(input_shape)) + + if self.data_format == 'channels_first': + height = input_shape[2] * self.size[0] if input_shape[2] is not None else None + width = input_shape[3] * self.size[1] if input_shape[3] is not None else None + channels = input_shape[1] // self.size[0] // self.size[1] + + if channels * self.size[0] * self.size[1] != input_shape[1]: + raise ValueError('channels of input and size are incompatible') + + return (input_shape[0], + channels, + height, + width) + + elif self.data_format == 'channels_last': + height = input_shape[1] * self.size[0] if input_shape[1] is not None else None + width = input_shape[2] * self.size[1] if input_shape[2] is not None else None + channels = input_shape[3] // self.size[0] // self.size[1] + + if channels * self.size[0] * self.size[1] != input_shape[3]: + raise ValueError('channels of input and size are incompatible') + + return (input_shape[0], + height, + width, + channels) + + def get_config(self): + config = {'size': self.size, + 'data_format': self.data_format} + base_config = super(PixelShuffler, self).get_config() + + return dict(list(base_config.items()) + list(config.items())) + return PixelShuffler + +def conv(keras, input_tensor, filters): + x = input_tensor + x = keras.layers.convolutional.Conv2D(filters, kernel_size=5, strides=2, padding='same')(x) + x = keras.layers.advanced_activations.LeakyReLU(0.1)(x) + return x + +def upscale(keras, input_tensor, filters, k_size=3): + x = input_tensor + x = keras.layers.convolutional.Conv2D(filters * 4, kernel_size=k_size, padding='same')(x) + x = keras.layers.advanced_activations.LeakyReLU(0.1)(x) + x = PixelShufflerClass(keras)()(x) + return x + +def upscale4(keras, input_tensor, filters): + x = input_tensor + x = keras.layers.convolutional.Conv2D(filters * 16, kernel_size=3, padding='same')(x) + x = keras.layers.advanced_activations.LeakyReLU(0.1)(x) + x = PixelShufflerClass(keras)(size=(4, 4))(x) + return x + +def res(keras, input_tensor, filters): + x = input_tensor + x = keras.layers.convolutional.Conv2D(filters, kernel_size=3, kernel_initializer=keras.initializers.RandomNormal(0, 0.02), use_bias=False, padding="same")(x) + x = keras.layers.advanced_activations.LeakyReLU(alpha=0.2)(x) + x = keras.layers.convolutional.Conv2D(filters, kernel_size=3, kernel_initializer=keras.initializers.RandomNormal(0, 0.02), use_bias=False, padding="same")(x) + x = keras.layers.Add()([x, input_tensor]) + x = keras.layers.advanced_activations.LeakyReLU(alpha=0.2)(x) + return x + +def resize_like(tf, keras, ref_tensor, input_tensor): + def func(input_tensor, ref_tensor): + H, W = ref_tensor.get_shape()[1], ref_tensor.get_shape()[2] + return tf.image.resize_bilinear(input_tensor, [H.value, W.value]) + + return keras.layers.Lambda(func, arguments={'ref_tensor':ref_tensor})(input_tensor) + +def total_variation_loss(keras, x): + K = keras.backend + assert K.ndim(x) == 4 + B,H,W,C = K.int_shape(x) + a = K.square(x[:, :H - 1, :W - 1, :] - x[:, 1:, :W - 1, :]) + b = K.square(x[:, :H - 1, :W - 1, :] - x[:, :H - 1, 1:, :]) + + return K.mean (a+b) \ No newline at end of file diff --git a/requirements-gpu-cuda9-cudnn7.txt b/requirements-gpu-cuda9-cudnn7.txt new file mode 100644 index 0000000..5e10dd0 --- /dev/null +++ b/requirements-gpu-cuda9-cudnn7.txt @@ -0,0 +1,10 @@ +pathlib==1.0.1 +scandir==1.6 +h5py==2.7.1 +Keras==2.1.6 +opencv-python==3.4.0.12 +tensorflow-gpu==1.8.0 +scikit-image +dlib==19.10.0 +tqdm +git+https://www.github.com/keras-team/keras-contrib.git \ No newline at end of file diff --git a/utils/AlignedPNG.py b/utils/AlignedPNG.py new file mode 100644 index 0000000..6167cab --- /dev/null +++ b/utils/AlignedPNG.py @@ -0,0 +1,296 @@ +PNG_HEADER = b"\x89PNG\r\n\x1a\n" + +import string +import struct +import zlib +import pickle + +class Chunk(object): + def __init__(self, name=None, data=None): + self.length = 0 + self.crc = 0 + self.name = name if name else "noNe" + self.data = data if data else b"" + + @classmethod + def load(cls, data): + """Load a chunk including header and footer""" + inst = cls() + if len(data) < 12: + msg = "Chunk-data too small" + raise ValueError(msg) + + # chunk header & data + (inst.length, raw_name) = struct.unpack("!I4s", data[0:8]) + inst.data = data[8:-4] + inst.verify_length() + inst.name = raw_name.decode("ascii") + inst.verify_name() + + # chunk crc + inst.crc = struct.unpack("!I", data[8+inst.length:8+inst.length+4])[0] + inst.verify_crc() + + return inst + + def dump(self, auto_crc=True, auto_length=True): + """Return the chunk including header and footer""" + if auto_length: self.update_length() + if auto_crc: self.update_crc() + self.verify_name() + return struct.pack("!I", self.length) + self.get_raw_name() + self.data + struct.pack("!I", self.crc) + + def verify_length(self): + if len(self.data) != self.length: + msg = "Data length ({}) does not match length in chunk header ({})".format(len(self.data), self.length) + raise ValueError(msg) + return True + + def verify_name(self): + for c in self.name: + if c not in string.ascii_letters: + msg = "Invalid character in chunk name: {}".format(repr(self.name)) + raise ValueError(msg) + return True + + def verify_crc(self): + calculated_crc = self.get_crc() + if self.crc != calculated_crc: + msg = "CRC mismatch: {:08X} (header), {:08X} (calculated)".format(self.crc, calculated_crc) + raise ValueError(msg) + return True + + def update_length(self): + self.length = len(self.data) + + def update_crc(self): + self.crc = self.get_crc() + + def get_crc(self): + return zlib.crc32(self.get_raw_name() + self.data) + + def get_raw_name(self): + return self.name if isinstance(self.name, bytes) else self.name.encode("ascii") + + # name helper methods + + def ancillary(self, set=None): + """Set and get ancillary=True/critical=False bit""" + if set is True: + self.name[0] = self.name[0].lower() + elif set is False: + self.name[0] = self.name[0].upper() + return self.name[0].islower() + + def private(self, set=None): + """Set and get private=True/public=False bit""" + if set is True: + self.name[1] = self.name[1].lower() + elif set is False: + self.name[1] = self.name[1].upper() + return self.name[1].islower() + + def reserved(self, set=None): + """Set and get reserved_valid=True/invalid=False bit""" + if set is True: + self.name[2] = self.name[2].upper() + elif set is False: + self.name[2] = self.name[2].lower() + return self.name[2].isupper() + + def safe_to_copy(self, set=None): + """Set and get save_to_copy=True/unsafe=False bit""" + if set is True: + self.name[3] = self.name[3].lower() + elif set is False: + self.name[3] = self.name[3].upper() + return self.name[3].islower() + + def __str__(self): + return "".format(**self.__dict__) + +class IHDR(Chunk): + """IHDR Chunk + width, height, bit_depth, color_type, compression_method, + filter_method, interlace_method contain the data extracted + from the chunk. Modify those and use and build() to recreate + the chunk. Valid values for bit_depth depend on the color_type + and can be looked up in color_types or in the PNG specification + + See: + http://www.libpng.org/pub/png/spec/1.2/PNG-Chunks.html#C.IHDR + """ + # color types with name & allowed bit depths + COLOR_TYPE_GRAY = 0 + COLOR_TYPE_RGB = 2 + COLOR_TYPE_PLTE = 3 + COLOR_TYPE_GRAYA = 4 + COLOR_TYPE_RGBA = 6 + color_types = { + COLOR_TYPE_GRAY: ("Grayscale", (1,2,4,8,16)), + COLOR_TYPE_RGB: ("RGB", (8,16)), + COLOR_TYPE_PLTE: ("Palette", (1,2,4,8)), + COLOR_TYPE_GRAYA: ("Greyscale+Alpha", (8,16)), + COLOR_TYPE_RGBA: ("RGBA", (8,16)), + } + + def __init__(self, width=0, height=0, bit_depth=8, color_type=2, \ + compression_method=0, filter_method=0, interlace_method=0): + self.width = width + self.height = height + self.bit_depth = bit_depth + self.color_type = color_type + self.compression_method = compression_method + self.filter_method = filter_method + self.interlace_method = interlace_method + super().__init__("IHDR") + + @classmethod + def load(cls, data): + inst = super().load(data) + fields = struct.unpack("!IIBBBBB", inst.data) + inst.width = fields[0] + inst.height = fields[1] + inst.bit_depth = fields[2] # per channel + inst.color_type = fields[3] # see specs + inst.compression_method = fields[4] # always 0(=deflate/inflate) + inst.filter_method = fields[5] # always 0(=adaptive filtering with 5 methods) + inst.interlace_method = fields[6] # 0(=no interlace) or 1(=Adam7 interlace) + return inst + + def dump(self): + self.data = struct.pack("!IIBBBBB", \ + self.width, self.height, self.bit_depth, self.color_type, \ + self.compression_method, self.filter_method, self.interlace_method) + return super().dump() + + def __str__(self): + return "" \ + .format(self.color_types[self.color_type][0], **self.__dict__) + +class IEND(Chunk): + def __init__(self): + super().__init__("IEND") + + def dump(self): + if len(self.data) != 0: + msg = "IEND has data which is not allowed" + raise ValueError(msg) + if self.length != 0: + msg = "IEND data lenght is not 0 which is not allowed" + raise ValueError(msg) + return super().dump() + + def __str__(self): + return "".format(**self.__dict__) + +class FaceswapChunk(Chunk): + def __init__(self, dict_data=None): + super().__init__("fcWp") + self.dict_data = dict_data + + def setDictData(self, dict_data): + self.dict_data = dict_data + + def getDictData(self): + return self.dict_data + + @classmethod + def load(cls, data): + inst = super().load(data) + inst.dict_data = pickle.loads( inst.data ) + return inst + + def dump(self): + self.data = pickle.dumps (self.dict_data) + return super().dump() + +chunk_map = { + b"IHDR": IHDR, + b"fcWp": FaceswapChunk, + b"IEND": IEND +} + +class AlignedPNG(object): + def __init__(self): + self.data = b"" + self.length = 0 + self.chunks = [] + + @staticmethod + def load(data): + + try: + with open(data, "rb") as f: + data = f.read() + except: + raise FileNotFoundError(data) + + inst = AlignedPNG() + inst.data = data + inst.length = len(data) + + if data[0:8] != PNG_HEADER: + msg = "No Valid PNG header" + raise ValueError(msg) + + chunk_start = 8 + while chunk_start < inst.length: + (chunk_length, chunk_name) = struct.unpack("!I4s", data[chunk_start:chunk_start+8]) + chunk_end = chunk_start + chunk_length + 12 + + chunk = chunk_map.get(chunk_name, Chunk).load(data[chunk_start:chunk_end]) + inst.chunks.append(chunk) + chunk_start = chunk_end + + return inst + + + def save(self, filename): + try: + with open(filename, "wb") as f: + f.write ( self.dump() ) + except: + raise Exception( 'cannot save %s' % (filename) ) + + def dump(self): + data = PNG_HEADER + for chunk in self.chunks: + data += chunk.dump() + return data + + def get_shape(self): + for chunk in self.chunks: + if type(chunk) == IHDR: + c = 3 if chunk.color_type == IHDR.COLOR_TYPE_RGB else 4 + w = chunk.width + h = chunk.height + return (h,w,c) + return (0,0,0) + + def get_height(self): + for chunk in self.chunks: + if type(chunk) == IHDR: + return chunk.height + return 0 + + def getFaceswapDictData(self): + for chunk in self.chunks: + if type(chunk) == FaceswapChunk: + return chunk.getDictData() + return None + + def setFaceswapDictData (self, dict_data=None): + for chunk in self.chunks: + if type(chunk) == FaceswapChunk: + self.chunks.remove(chunk) + break + + if not dict_data is None: + chunk = FaceswapChunk(dict_data) + self.chunks.insert(-1, chunk) + + + + def __str__(self): + return "".format(len(self.chunks), **self.__dict__) diff --git a/utils/Path_utils.py b/utils/Path_utils.py new file mode 100644 index 0000000..390fdb4 --- /dev/null +++ b/utils/Path_utils.py @@ -0,0 +1,40 @@ +from pathlib import Path +from scandir import scandir + +image_extensions = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] + +def get_image_paths(dir_path): + dir_path = Path (dir_path) + + result = [] + if dir_path.exists(): + for x in list(scandir(str(dir_path))): + if any([x.name.lower().endswith(ext) for ext in image_extensions]): + result.append(x.path) + return result + +def get_image_unique_filestem_paths(dir_path, verbose=False): + result = get_image_paths(dir_path) + result_dup = set() + + for f in result[:]: + f_stem = Path(f).stem + if f_stem in result_dup: + result.remove(f) + if verbose: + print ("Duplicate filenames are not allowed, skipping: %s" % Path(f).name ) + continue + result_dup.add(f_stem) + + return result + +def get_all_dir_names_startswith (dir_path, startswith): + dir_path = Path (dir_path) + startswith = startswith.lower() + + result = [] + if dir_path.exists(): + for x in list(scandir(str(dir_path))): + if x.name.lower().startswith(startswith): + result.append ( x.name[len(startswith):] ) + return result diff --git a/utils/SubprocessorBase.py b/utils/SubprocessorBase.py new file mode 100644 index 0000000..c47a1c6 --- /dev/null +++ b/utils/SubprocessorBase.py @@ -0,0 +1,246 @@ +import traceback +from tqdm import tqdm +import multiprocessing +import time +import sys + +class SubprocessorBase(object): + + #overridable + def __init__(self, name, no_response_time_sec = 60): + self.name = name + self.no_response_time_sec = no_response_time_sec + + #overridable + def process_info_generator(self): + #yield name, host_dict, client_dict - per process + yield 'first process', {}, {} + + #overridable + def get_no_process_started_message(self): + return "No process started." + + #overridable + def onHostGetProgressBarDesc(self): + return "Processing" + + #overridable + def onHostGetProgressBarLen(self): + return 0 + + #overridable + def onHostGetData(self): + #return data here + return None + + #overridable + def onHostDataReturn (self, data): + #input_data.insert(0, obj['data']) + pass + + #overridable + def onClientInitialize(self, client_dict): + #return fail message or None if ok + return None + + #overridable + def onClientFinalize(self): + pass + + #overridable + def onClientProcessData(self, data): + #return result object + return None + + #overridable + def onClientGetDataName (self, data): + #return string identificator of your data + return "undefined" + + #overridable + def onHostClientsInitialized(self): + pass + + #overridable + def onHostResult (self, data, result): + #return count of progress bar update + return 1 + + #overridable + def onHostProcessEnd(self): + pass + + #overridable + def get_start_return(self): + return None + + def inc_progress_bar(self, c): + self.progress_bar.update(c) + + def safe_print(self, msg): + self.print_lock.acquire() + print (msg) + self.print_lock.release() + + def process(self): + #returns start_return + + self.processes = [] + + self.print_lock = multiprocessing.Lock() + for name, host_dict, client_dict in self.process_info_generator(): + sq = multiprocessing.Queue() + cq = multiprocessing.Queue() + + client_dict.update ( {'print_lock' : self.print_lock} ) + + p = multiprocessing.Process(target=self.subprocess, args=(sq,cq,client_dict)) + p.daemon = True + p.start() + self.processes.append ( { 'process' : p, + 'sq' : sq, + 'cq' : cq, + 'state' : 'busy', + 'sent_time': time.time(), + 'name': name, + 'host_dict' : host_dict + } ) + + while True: + for p in self.processes[:]: + while not p['cq'].empty(): + obj = p['cq'].get() + obj_op = obj['op'] + + if obj_op == 'init_ok': + p['state'] = 'free' + elif obj_op == 'error': + if obj['close'] == True: + p['process'].terminate() + p['process'].join() + self.processes.remove(p) + break + + if all ([ p['state'] == 'free' for p in self.processes ] ): + break + + if len(self.processes) == 0: + print ( self.get_no_process_started_message() ) + return self.get_start_return() + + self.onHostClientsInitialized() + + self.progress_bar = tqdm( total=self.onHostGetProgressBarLen(), desc=self.onHostGetProgressBarDesc() ) + + try: + while True: + for p in self.processes[:]: + while not p['cq'].empty(): + obj = p['cq'].get() + obj_op = obj['op'] + + if obj_op == 'success': + data = obj['data'] + result = obj['result'] + + c = self.onHostResult (data, result) + if c > 0: + self.progress_bar.update(c) + + p['state'] = 'free' + + elif obj_op == 'error': + if 'data' in obj.keys(): + self.onHostDataReturn ( obj['data'] ) + + if obj['close'] == True: + p['sq'].put ( {'op': 'close'} ) + p['process'].join() + self.processes.remove(p) + break + p['state'] = 'free' + + for p in self.processes[:]: + if p['state'] == 'free': + data = self.onHostGetData() + if data is not None: + p['sq'].put ( {'op': 'data', 'data' : data} ) + p['sent_time'] = time.time() + p['sent_data'] = data + p['state'] = 'busy' + + elif p['state'] == 'busy': + if (time.time() - p['sent_time']) > self.no_response_time_sec: + print ( '%s doesnt response, terminating it.' % (p['name']) ) + self.onHostDataReturn ( p['sent_data'] ) + p['sq'].put ( {'op': 'close'} ) + p['process'].join() + self.processes.remove(p) + + if all ([p['state'] == 'free' for p in self.processes]): + break + + time.sleep(0.005) + except: + print ("Exception occured in Subprocessor.start(): %s" % (traceback.format_exc()) ) + + self.progress_bar.close() + + for p in self.processes[:]: + p['sq'].put ( {'op': 'close'} ) + + while True: + for p in self.processes[:]: + while not p['cq'].empty(): + obj = p['cq'].get() + obj_op = obj['op'] + if obj_op == 'finalized': + p['state'] = 'finalized' + + if all ([p['state'] == 'finalized' for p in self.processes]): + break + + for p in self.processes[:]: + p['process'].terminate() + + self.onHostProcessEnd() + + return self.get_start_return() + + def subprocess(self, sq, cq, client_dict): + self.print_lock = client_dict['print_lock'] + + try: + fail_message = self.onClientInitialize(client_dict) + except: + fail_message = 'Exception while initialization: %s' % (traceback.format_exc()) + + if fail_message is None: + cq.put ( {'op': 'init_ok'} ) + else: + print (fail_message) + cq.put ( {'op': 'error', 'close': True} ) + return + + while True: + obj = sq.get() + obj_op = obj['op'] + + if obj_op == 'data': + data = obj['data'] + try: + result = self.onClientProcessData (data) + cq.put ( {'op': 'success', 'data' : data, 'result' : result} ) + except: + + print ( 'Exception while process data [%s]: %s' % (self.onClientGetDataName(data), traceback.format_exc()) ) + cq.put ( {'op': 'error', 'close': True, 'data' : data } ) + elif obj_op == 'close': + break + + time.sleep(0.005) + + self.onClientFinalize() + cq.put ( {'op': 'finalized'} ) + while True: + time.sleep(0.1) \ No newline at end of file diff --git a/utils/image_utils.py b/utils/image_utils.py new file mode 100644 index 0000000..6954f35 --- /dev/null +++ b/utils/image_utils.py @@ -0,0 +1,264 @@ +import sys +from utils import random_utils +import numpy as np +import cv2 +import localization +from scipy.spatial import Delaunay +from PIL import Image, ImageDraw, ImageFont + +def channel_hist_match(source, template, mask=None): + # Code borrowed from: + # https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x + masked_source = source + masked_template = template + + if mask is not None: + masked_source = source * mask + masked_template = template * mask + + oldshape = source.shape + source = source.ravel() + template = template.ravel() + masked_source = masked_source.ravel() + masked_template = masked_template.ravel() + s_values, bin_idx, s_counts = np.unique(source, return_inverse=True, + return_counts=True) + t_values, t_counts = np.unique(template, return_counts=True) + ms_values, mbin_idx, ms_counts = np.unique(source, return_inverse=True, + return_counts=True) + mt_values, mt_counts = np.unique(template, return_counts=True) + + s_quantiles = np.cumsum(s_counts).astype(np.float64) + s_quantiles /= s_quantiles[-1] + t_quantiles = np.cumsum(t_counts).astype(np.float64) + t_quantiles /= t_quantiles[-1] + interp_t_values = np.interp(s_quantiles, t_quantiles, t_values) + + return interp_t_values[bin_idx].reshape(oldshape) + +def color_hist_match(src_im, tar_im, mask=None): + h,w,c = src_im.shape + matched_R = channel_hist_match(src_im[:,:,0], tar_im[:,:,0], mask) + matched_G = channel_hist_match(src_im[:,:,1], tar_im[:,:,1], mask) + matched_B = channel_hist_match(src_im[:,:,2], tar_im[:,:,2], mask) + + to_stack = (matched_R, matched_G, matched_B) + for i in range(3, c): + to_stack += ( src_im[:,:,i],) + + + matched = np.stack(to_stack, axis=-1).astype(src_im.dtype) + return matched + + +pil_fonts = {} +def _get_pil_font (font, size): + global pil_fonts + try: + font_str_id = '%s_%d' % (font, size) + if font_str_id not in pil_fonts.keys(): + pil_fonts[font_str_id] = ImageFont.truetype(font + ".ttf", size=size, encoding="unic") + pil_font = pil_fonts[font_str_id] + return pil_font + except: + return ImageFont.load_default() + +def get_text_image( shape, text, color=(1,1,1), border=0.2, font=None): + try: + size = shape[1] + pil_font = _get_pil_font( localization.get_default_ttf_font_name() , size) + text_width, text_height = pil_font.getsize(text) + + canvas = Image.new('RGB', shape[0:2], (0,0,0) ) + draw = ImageDraw.Draw(canvas) + offset = ( 0, 0) + draw.text(offset, text, font=pil_font, fill=tuple((np.array(color)*255).astype(np.int)) ) + + result = np.asarray(canvas) / 255 + if shape[2] != 3: + result = np.concatenate ( (result, np.ones ( (shape[1],) + (shape[0],) + (shape[2]-3,)) ), axis=2 ) + + return result + except: + return np.zeros ( (shape[1], shape[0], shape[2]), dtype=np.float32 ) + +def draw_text( image, rect, text, color=(1,1,1), border=0.2, font=None): + h,w,c = image.shape + + l,t,r,b = rect + l = np.clip (l, 0, w-1) + r = np.clip (r, 0, w-1) + t = np.clip (t, 0, h-1) + b = np.clip (b, 0, h-1) + + image[t:b, l:r] += get_text_image ( (r-l,b-t,c) , text, color, border, font ) + +def draw_text_lines (image, rect, text_lines, color=(1,1,1), border=0.2, font=None): + text_lines_len = len(text_lines) + if text_lines_len == 0: + return + + l,t,r,b = rect + h = b-t + h_per_line = h // text_lines_len + + for i in range(0, text_lines_len): + draw_text (image, (l, i*h_per_line, r, (i+1)*h_per_line), text_lines[i], color, border, font) + +def get_draw_text_lines ( image, rect, text_lines, color=(1,1,1), border=0.2, font=None): + image = np.zeros ( image.shape, dtype=np.float ) + draw_text_lines ( image, rect, text_lines, color, border, font) + return image + + +def draw_polygon (image, points, color, thickness = 1): + points_len = len(points) + for i in range (0, points_len): + p0 = tuple( points[i] ) + p1 = tuple( points[ (i+1) % points_len] ) + cv2.line (image, p0, p1, color, thickness=thickness) + +def draw_rect(image, rect, color, thickness=1): + l,t,r,b = rect + draw_polygon (image, [ (l,t), (r,t), (r,b), (l,b ) ], color, thickness) + +def rectContains(rect, point) : + return not (point[0] < rect[0] or point[0] >= rect[2] or point[1] < rect[1] or point[1] >= rect[3]) + +def applyAffineTransform(src, srcTri, dstTri, size) : + warpMat = cv2.getAffineTransform( np.float32(srcTri), np.float32(dstTri) ) + return cv2.warpAffine( src, warpMat, (size[0], size[1]), None, flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101 ) + +def morphTriangle(dst_img, src_img, st, dt) : + (h,w,c) = dst_img.shape + sr = np.array( cv2.boundingRect(np.float32(st)) ) + dr = np.array( cv2.boundingRect(np.float32(dt)) ) + sRect = st - sr[0:2] + dRect = dt - dr[0:2] + d_mask = np.zeros((dr[3], dr[2], c), dtype = np.float32) + cv2.fillConvexPoly(d_mask, np.int32(dRect), (1.0,)*c, 8, 0); + imgRect = src_img[sr[1]:sr[1] + sr[3], sr[0]:sr[0] + sr[2]] + size = (dr[2], dr[3]) + warpImage1 = applyAffineTransform(imgRect, sRect, dRect, size) + dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]] = dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]]*(1-d_mask) + warpImage1 * d_mask + +def morph_by_points (image, sp, dp): + if sp.shape != dp.shape: + raise ValueError ('morph_by_points() sp.shape != dp.shape') + (h,w,c) = image.shape + + result_image = np.zeros(image.shape, dtype = image.dtype) + + for tri in Delaunay(dp).simplices: + morphTriangle(result_image, image, sp[tri], dp[tri]) + + return result_image + +def equalize_and_stack_square (images, axis=1): + max_c = max ([ 1 if len(image.shape) == 2 else image.shape[2] for image in images ] ) + + target_wh = 99999 + for i,image in enumerate(images): + if len(image.shape) == 2: + h,w = image.shape + c = 1 + else: + h,w,c = image.shape + + if h < target_wh: + target_wh = h + + if w < target_wh: + target_wh = w + + for i,image in enumerate(images): + if len(image.shape) == 2: + h,w = image.shape + c = 1 + else: + h,w,c = image.shape + + if c < max_c: + if c == 1: + if len(image.shape) == 2: + image = np.expand_dims ( image, -1 ) + image = np.concatenate ( (image,)*max_c, -1 ) + elif c == 2: #GA + image = np.expand_dims ( image[...,0], -1 ) + image = np.concatenate ( (image,)*max_c, -1 ) + else: + image = np.concatenate ( (image, np.ones((h,w,max_c - c))), -1 ) + + if h != target_wh or w != target_wh: + image = cv2.resize ( image, (target_wh, target_wh) ) + h,w,c = image.shape + + images[i] = image + + return np.concatenate ( images, axis = 1 ) + +def bgr2hsv (img): + return cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + +def hsv2bgr (img): + return cv2.cvtColor(img, cv2.COLOR_HSV2BGR) + +def bgra2hsva (img): + return np.concatenate ( (cv2.cvtColor(img[...,0:3], cv2.COLOR_BGR2HSV ), np.expand_dims (img[...,3], -1)), -1 ) + +def bgra2hsva_list (imgs): + return [ bgra2hsva(img) for img in imgs ] + +def hsva2bgra (img): + return np.concatenate ( (cv2.cvtColor(img[...,0:3], cv2.COLOR_HSV2BGR ), np.expand_dims (img[...,3], -1)), -1 ) + +def hsva2bgra_list (imgs): + return [ hsva2bgra(img) for img in imgs ] + +def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05] ): + h,w,c = source.shape + if (h != w) or (w != 64 and w != 128 and w != 256 and w != 512 and w != 1024): + raise ValueError ('TrainingDataGenerator accepts only square power of 2 images.') + + rotation = np.random.uniform( rotation_range[0], rotation_range[1] ) + scale = np.random.uniform(1 +scale_range[0], 1 +scale_range[1]) + tx = np.random.uniform( tx_range[0], tx_range[1] ) + ty = np.random.uniform( ty_range[0], ty_range[1] ) + + #random warp by grid + cell_size = [ w // (2**i) for i in range(1,4) ] [ np.random.randint(3) ] + cell_count = w // cell_size + 1 + + grid_points = np.linspace( 0, w, cell_count) + mapx = np.broadcast_to(grid_points, (cell_count, cell_count)).copy() + mapy = mapx.T + + mapx[1:-1,1:-1] = mapx[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) + mapy[1:-1,1:-1] = mapy[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) + + half_cell_size = cell_size // 2 + + mapx = cv2.resize(mapx, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32) + mapy = cv2.resize(mapy, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32) + + #random transform + random_transform_mat = cv2.getRotationMatrix2D((w // 2, w // 2), rotation, scale) + random_transform_mat[:, 2] += (tx*w, ty*w) + + params = dict() + params['mapx'] = mapx + params['mapy'] = mapy + params['rmat'] = random_transform_mat + params['w'] = w + params['flip'] = flip and np.random.randint(10) < 4 + + return params + +def warp_by_params (params, img, warp, transform, flip): + if warp: + img = cv2.remap(img, params['mapx'], params['mapy'], cv2.INTER_LANCZOS4 ) + if transform: + img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_LANCZOS4 ) + if flip and params['flip']: + img = img[:,::-1,:] + return img \ No newline at end of file diff --git a/utils/iter_utils.py b/utils/iter_utils.py new file mode 100644 index 0000000..a1a55e6 --- /dev/null +++ b/utils/iter_utils.py @@ -0,0 +1,63 @@ +import threading +import queue as Queue +import multiprocessing +import time + + +class ThisThreadGenerator(object): + def __init__(self, generator_func, user_param=None): + super().__init__() + self.generator_func = generator_func + self.user_param = user_param + self.initialized = False + + def __iter__(self): + return self + + def __next__(self): + if not self.initialized: + self.initialized = True + self.generator_func = self.generator_func(self.user_param) + + return next(self.generator_func) + +class SubprocessGenerator(object): + def __init__(self, generator_func, user_param=None, prefetch=2): + super().__init__() + self.prefetch = prefetch + self.generator_func = generator_func + self.user_param = user_param + self.sc_queue = multiprocessing.Queue() + self.cs_queue = multiprocessing.Queue() + self.p = None + + def process_func(self): + self.generator_func = self.generator_func(self.user_param) + while True: + while self.prefetch > -1: + try: + gen_data = next (self.generator_func) + except StopIteration: + self.cs_queue.put (None) + return + self.cs_queue.put (gen_data) + self.prefetch -= 1 + self.sc_queue.get() + self.prefetch += 1 + + def __iter__(self): + return self + + def __next__(self): + if self.p == None: + self.p = multiprocessing.Process(target=self.process_func, args=()) + self.p.daemon = True + self.p.start() + + gen_data = self.cs_queue.get() + if gen_data is None: + self.p.terminate() + self.p.join() + raise StopIteration() + self.sc_queue.put (1) + return gen_data \ No newline at end of file diff --git a/utils/os_utils.py b/utils/os_utils.py new file mode 100644 index 0000000..87074fa --- /dev/null +++ b/utils/os_utils.py @@ -0,0 +1,18 @@ +import sys + +if sys.platform[0:3] == 'win': + from ctypes import windll + from ctypes import wintypes + +def set_process_lowest_prio(): + if sys.platform[0:3] == 'win': + GetCurrentProcess = windll.kernel32.GetCurrentProcess + GetCurrentProcess.restype = wintypes.HANDLE + + SetPriorityClass = windll.kernel32.SetPriorityClass + SetPriorityClass.argtypes = (wintypes.HANDLE, wintypes.DWORD) + SetPriorityClass ( GetCurrentProcess(), 0x00000040 ) + +def set_process_dpi_aware(): + if sys.platform[0:3] == 'win': + windll.user32.SetProcessDPIAware(True) \ No newline at end of file diff --git a/utils/random_utils.py b/utils/random_utils.py new file mode 100644 index 0000000..0bbc6f7 --- /dev/null +++ b/utils/random_utils.py @@ -0,0 +1,14 @@ +import numpy as np + +def random_normal( size=(1,), trunc_val = 2.5 ): + len = np.array(size).prod() + result = np.empty ( (len,) , dtype=np.float32) + + for i in range (len): + while True: + x = np.random.normal() + if x >= -trunc_val and x <= trunc_val: + break + result[i] = (x / trunc_val) + + return result.reshape ( size ) \ No newline at end of file diff --git a/utils/std_utils.py b/utils/std_utils.py new file mode 100644 index 0000000..81af906 --- /dev/null +++ b/utils/std_utils.py @@ -0,0 +1,36 @@ +import os +import sys + +class suppress_stdout_stderr(object): + def __enter__(self): + self.outnull_file = open(os.devnull, 'w') + self.errnull_file = open(os.devnull, 'w') + + self.old_stdout_fileno_undup = sys.stdout.fileno() + self.old_stderr_fileno_undup = sys.stderr.fileno() + + self.old_stdout_fileno = os.dup ( sys.stdout.fileno() ) + self.old_stderr_fileno = os.dup ( sys.stderr.fileno() ) + + self.old_stdout = sys.stdout + self.old_stderr = sys.stderr + + os.dup2 ( self.outnull_file.fileno(), self.old_stdout_fileno_undup ) + os.dup2 ( self.errnull_file.fileno(), self.old_stderr_fileno_undup ) + + sys.stdout = self.outnull_file + sys.stderr = self.errnull_file + return self + + def __exit__(self, *_): + sys.stdout = self.old_stdout + sys.stderr = self.old_stderr + + os.dup2 ( self.old_stdout_fileno, self.old_stdout_fileno_undup ) + os.dup2 ( self.old_stderr_fileno, self.old_stderr_fileno_undup ) + + os.close ( self.old_stdout_fileno ) + os.close ( self.old_stderr_fileno ) + + self.outnull_file.close() + self.errnull_file.close() \ No newline at end of file