removing default yaw_value from DFLIMG files,

added better pitch/yaw estimator from 68 landmarks, improving face yaw accuracy for sorting and trainers, added sort by face-pitch
2025-07-06 13:02:15 -07:00 · 2019-02-12 21:31:37 +04:00 · 2019-02-12 21:31:37 +04:00 · 06fe1314d8
commit 06fe1314d8
parent 535041f7bb
13 changed files with 182 additions and 37 deletions
--- a/doc/doc_sort_tool.md
+++ b/doc/doc_sort_tool.md
@ -8,6 +8,10 @@

 `hist-blur` sort by blur in groups of similar content

+`face-pitch` sort by face pitch direction
+
+`face-yaw` sort by face yaw direction
+
 `brightness` 

 `hue`
--- a/facelib/LandmarksProcessor.py
+++ b/facelib/LandmarksProcessor.py
@ -2,6 +2,7 @@ import colorsys
 import cv2
 import numpy as np
 from enum import IntEnum
+import mathlib
 from mathlib.umeyama import umeyama
 from utils import image_utils
 from facelib import FaceType
@ -36,6 +37,77 @@ landmarks_68_pt = { "mouth": (48,68),
                    "nose": (27, 36), # missed one point
                    "jaw": (0, 17) }
    
+
+landmarks_68_3D = np.array( [
+[-73.393523  , -29.801432   , 47.667532   ],
+[-72.775014  , -10.949766   , 45.909403   ],
+[-70.533638  , 7.929818     , 44.842580   ],
+[-66.850058  , 26.074280    , 43.141114   ],
+[-59.790187  , 42.564390    , 38.635298   ],
+[-48.368973  , 56.481080    , 30.750622   ],
+[-34.121101  , 67.246992    , 18.456453   ],
+[-17.875411  , 75.056892    , 3.609035    ],
+[0.098749    , 77.061286    , -0.881698   ],
+[17.477031   , 74.758448    , 5.181201    ],
+[32.648966   , 66.929021    , 19.176563   ],
+[46.372358   , 56.311389    , 30.770570   ],
+[57.343480   , 42.419126    , 37.628629   ],
+[64.388482   , 25.455880    , 40.886309   ],
+[68.212038   , 6.990805     , 42.281449   ],
+[70.486405   , -11.666193   , 44.142567   ],
+[71.375822   , -30.365191   , 47.140426   ],
+[-61.119406  , -49.361602   , 14.254422   ],
+[-51.287588  , -58.769795   , 7.268147    ],
+[-37.804800  , -61.996155   , 0.442051    ],
+[-24.022754  , -61.033399   , -6.606501   ],
+[-11.635713  , -56.686759   , -11.967398  ],
+[12.056636   , -57.391033   , -12.051204  ],
+[25.106256   , -61.902186   , -7.315098   ],
+[38.338588   , -62.777713   , -1.022953   ],
+[51.191007   , -59.302347   , 5.349435    ],
+[60.053851   , -50.190255   , 11.615746   ],
+[0.653940    , -42.193790   , -13.380835  ],
+[0.804809    , -30.993721   , -21.150853  ],
+[0.992204    , -19.944596   , -29.284036  ],
+[1.226783    , -8.414541    , -36.948060  ],
+[-14.772472  , 2.598255     , -20.132003  ],
+[-7.180239   , 4.751589     , -23.536684  ],
+[0.555920    , 6.562900     , -25.944448  ],
+[8.272499    , 4.661005     , -23.695741  ],
+[15.214351   , 2.643046     , -20.858157  ],
+[-46.047290  , -37.471411   , 7.037989    ],
+[-37.674688  , -42.730510   , 3.021217    ],
+[-27.883856  , -42.711517   , 1.353629    ],
+[-19.648268  , -36.754742   , -0.111088   ],
+[-28.272965  , -35.134493   , -0.147273   ],
+[-38.082418  , -34.919043   , 1.476612    ],
+[19.265868   , -37.032306   , -0.665746   ],
+[27.894191   , -43.342445   , 0.247660    ],
+[37.437529   , -43.110822   , 1.696435    ],
+[45.170805   , -38.086515   , 4.894163    ],
+[38.196454   , -35.532024   , 0.282961    ],
+[28.764989   , -35.484289   , -1.172675   ],
+[-28.916267  , 28.612716    , -2.240310   ],
+[-17.533194  , 22.172187    , -15.934335  ],
+[-6.684590   , 19.029051    , -22.611355  ],
+[0.381001    , 20.721118    , -23.748437  ],
+[8.375443    , 19.035460    , -22.721995  ],
+[18.876618   , 22.394109    , -15.610679  ],
+[28.794412   , 28.079924    , -3.217393   ],
+[19.057574   , 36.298248    , -14.987997  ],
+[8.956375    , 39.634575    , -22.554245  ],
+[0.381549    , 40.395647    , -23.591626  ],
+[-7.428895   , 39.836405    , -22.406106  ],
+[-18.160634  , 36.677899    , -15.121907  ],
+[-24.377490  , 28.677771    , -4.785684   ],
+[-6.897633   , 25.475976    , -20.893742  ],
+[0.340663    , 26.014269    , -22.220479  ],
+[8.444722    , 25.326198    , -21.025520  ],
+[24.474473   , 28.323008    , -5.712776   ],
+[8.449166    , 30.596216    , -20.671489  ],
+[0.205322    , 31.408738    , -21.903670  ],
+[-7.198266   , 30.844876    , -20.328022  ] ], dtype=np.float32)
+    
 def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0):
    if not isinstance(image_landmarks, np.ndarray):
        image_landmarks = np.array (image_landmarks) 
@ -214,6 +286,7 @@ def calc_face_pitch(landmarks):
    t = ( (landmarks[6][1]-landmarks[8][1]) + (landmarks[10][1]-landmarks[8][1]) ) / 2.0   
    b = landmarks[8][1]
    return float(b-t)
+    
 def calc_face_yaw(landmarks):
    if not isinstance(landmarks, np.ndarray):
        landmarks = np.array (landmarks)
@ -221,3 +294,23 @@ def calc_face_yaw(landmarks):
    r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0
    return float(r-l)
  
+#returns pitch,yaw [-1...+1]
+def estimate_pitch_yaw(aligned_256px_landmarks):
+    shape = (256,256)
+    focal_length = shape[1]
+    camera_center = (shape[1] / 2, shape[0] / 2)
+    camera_matrix = np.array(
+        [[focal_length, 0, camera_center[0]],
+         [0, focal_length, camera_center[1]],
+         [0, 0, 1]], dtype=np.float32)
+
+    (_, rotation_vector, translation_vector) = cv2.solvePnP(
+        landmarks_68_3D,
+        aligned_256px_landmarks.astype(np.float32),
+        camera_matrix,
+        np.zeros((4, 1)) )
+ 
+    pitch, yaw, _ = mathlib.rotationMatrixToEulerAngles( cv2.Rodrigues(rotation_vector)[0] )
+    pitch = np.clip ( pitch*1.25, -1.0, 1.0 )
+    yaw = np.clip ( yaw*1.25, -1.0, 1.0 )
+    return pitch, yaw
--- a/main.py
+++ b/main.py
@ -61,7 +61,7 @@ if __name__ == "__main__":
        
    sort_parser = subparsers.add_parser( "sort", help="Sort faces in a directory.")     
    sort_parser.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.")
-    sort_parser.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "hist", "hist-dissim", "brightness", "hue", "black", "origname", "final", "test"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." )
+    sort_parser.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "face-pitch", "hist", "hist-dissim", "brightness", "hue", "black", "origname", "final", "test"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." )
    sort_parser.set_defaults (func=process_sort)
    
    def process_util(arguments):        
--- a/mainscripts/Extractor.py
+++ b/mainscripts/Extractor.py
@ -334,8 +334,6 @@ class ExtractSubprocessor(SubprocessorBase):

                    DFLJPG.embed_data(output_file, face_type = FaceType.toString(self.face_type),
                                                   landmarks = face_image_landmarks.tolist(),
-                                                   yaw_value = LandmarksProcessor.calc_face_yaw (face_image_landmarks),
-                                                   pitch_value = LandmarksProcessor.calc_face_pitch (face_image_landmarks),
                                                   source_filename = filename_path.name,
                                                   source_rect=  rect,
                                                   source_landmarks = image_landmarks.tolist()
--- a/mainscripts/Sorter.py
+++ b/mainscripts/Sorter.py
@ -237,7 +237,32 @@ def sort_by_face_yaw(input_path):
            print ("%s is not a dfl image file" % (filepath.name) ) 
            continue
        
-        img_list.append( [str(filepath), np.array( dflimg.get_yaw_value() ) ] )
+        pitch, yaw = LandmarksProcessor.estimate_pitch_yaw ( dflimg.get_landmarks() )
+       
+        img_list.append( [str(filepath), yaw ] )
+
+    print ("Sorting...")
+    img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True)
+    
+    return img_list
+    
+def sort_by_face_pitch(input_path):
+    print ("Sorting by face pitch...")
+    img_list = []
+    for filepath in tqdm( Path_utils.get_image_paths(input_path), desc="Loading", ascii=True):
+        filepath = Path(filepath)
+        
+        if filepath.suffix == '.png':
+            dflimg = DFLPNG.load( str(filepath), print_on_no_embedded_data=True )
+        elif filepath.suffix == '.jpg':
+            dflimg = DFLJPG.load ( str(filepath), print_on_no_embedded_data=True )
+        else:
+            print ("%s is not a dfl image file" % (filepath.name) ) 
+            continue
+        
+        pitch, yaw = LandmarksProcessor.estimate_pitch_yaw ( dflimg.get_landmarks() )
+       
+        img_list.append( [str(filepath), pitch ] )

    print ("Sorting...")
    img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True)
@ -543,12 +568,14 @@ class FinalLoaderSubprocessor(SubprocessorBase):
            gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)        
            gray_masked = ( gray * LandmarksProcessor.get_image_hull_mask (bgr.shape, dflimg.get_landmarks() )[:,:,0] ).astype(np.uint8)
            sharpness = estimate_sharpness(gray_masked)
+            pitch, yaw = LandmarksProcessor.estimate_pitch_yaw ( dflimg.get_landmarks() )
+            
            hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
        except Exception as e:
            print (e)
            return [ 1, [str(filepath)] ]
            
-        return [ 0, [str(filepath), sharpness, hist, dflimg.get_yaw_value() ] ]
+        return [ 0, [str(filepath), sharpness, hist, yaw ] ]
        

    #override
@ -577,7 +604,7 @@ def sort_final(input_path):
    grads = 128
    imgs_per_grad = 15 

-    grads_space = np.linspace (-255,255,grads)
+    grads_space = np.linspace (-1.0,1.0,grads)
    
    yaws_sample_list = [None]*grads
    for g in tqdm ( range(grads), desc="Sort by yaw", ascii=True ):    
@ -732,6 +759,7 @@ def main (input_path, sort_by_method):
    elif sort_by_method == 'face':          img_list = sort_by_face (input_path)
    elif sort_by_method == 'face-dissim':   img_list = sort_by_face_dissim (input_path)
    elif sort_by_method == 'face-yaw':      img_list = sort_by_face_yaw (input_path)
+    elif sort_by_method == 'face-pitch':    img_list = sort_by_face_pitch (input_path)
    elif sort_by_method == 'hist':          img_list = sort_by_hist (input_path)
    elif sort_by_method == 'hist-dissim':   img_list = sort_by_hist_dissim (input_path)
    elif sort_by_method == 'brightness':    img_list = sort_by_brightness (input_path)
--- a/mainscripts/Util.py
+++ b/mainscripts/Util.py
@ -34,8 +34,6 @@ def convert_png_to_jpg_file (filepath):
    DFLJPG.embed_data( new_filepath, 
                       face_type=dfl_dict.get('face_type', None),
                       landmarks=dfl_dict.get('landmarks', None),
-                       yaw_value=dfl_dict.get('yaw_value', None),
-                       pitch_value=dfl_dict.get('pitch_value', None),
                       source_filename=dfl_dict.get('source_filename', None),
                       source_rect=dfl_dict.get('source_rect', None),
                       source_landmarks=dfl_dict.get('source_landmarks', None) )
--- a/mathlib/init.py
+++ b/mathlib/init.py
@ -1,3 +1,5 @@
+import numpy as np
+import math
 from .umeyama import umeyama

 def get_power_of_two(x):
@ -5,3 +7,16 @@ def get_power_of_two(x):
    while (1 << i) < x:
        i += 1
    return i
+    
+def rotationMatrixToEulerAngles(R) :
+    sy = math.sqrt(R[0,0] * R[0,0] +  R[1,0] * R[1,0])     
+    singular = sy < 1e-6 
+    if  not singular :
+        x = math.atan2(R[2,1] , R[2,2])
+        y = math.atan2(-R[2,0], sy)
+        z = math.atan2(R[1,0], R[0,0])
+    else :
+        x = math.atan2(-R[1,2], R[1,1])
+        y = math.atan2(-R[2,0], sy)
+        z = 0 
+    return np.array([x, y, z])
--- a/nnlib/nnlib.py
+++ b/nnlib/nnlib.py
@ -85,6 +85,7 @@ tanh = keras.layers.Activation('tanh')
 sigmoid = keras.layers.Activation('sigmoid')
 Dropout = keras.layers.Dropout

+Lambda = keras.layers.Lambda
 Add = keras.layers.Add
 Concatenate = keras.layers.Concatenate

--- a/samples/Sample.py
+++ b/samples/Sample.py
@ -16,23 +16,25 @@ class SampleType(IntEnum):
    QTY = 5
    
 class Sample(object):    
-    def __init__(self, sample_type=None, filename=None, face_type=None, shape=None, landmarks=None, yaw=None, mirror=None, close_target_list=None):
+    def __init__(self, sample_type=None, filename=None, face_type=None, shape=None, landmarks=None, pitch=None, yaw=None, mirror=None, close_target_list=None):
        self.sample_type = sample_type if sample_type is not None else SampleType.IMAGE
        self.filename = filename
        self.face_type = face_type
        self.shape = shape
        self.landmarks = np.array(landmarks) if landmarks is not None else None
+        self.pitch = pitch
        self.yaw = yaw
        self.mirror = mirror
        self.close_target_list = close_target_list
    
-    def copy_and_set(self, sample_type=None, filename=None, face_type=None, shape=None, landmarks=None, yaw=None, mirror=None, close_target_list=None):
+    def copy_and_set(self, sample_type=None, filename=None, face_type=None, shape=None, landmarks=None, pitch=None, yaw=None, mirror=None, close_target_list=None):
        return Sample( 
            sample_type=sample_type if sample_type is not None else self.sample_type, 
            filename=filename if filename is not None else self.filename, 
            face_type=face_type if face_type is not None else self.face_type, 
            shape=shape if shape is not None else self.shape, 
            landmarks=landmarks if landmarks is not None else self.landmarks.copy(), 
+            pitch=pitch if pitch is not None else self.pitch, 
            yaw=yaw if yaw is not None else self.yaw, 
            mirror=mirror if mirror is not None else self.mirror, 
            close_target_list=close_target_list if close_target_list is not None else self.close_target_list)
--- a/samples/SampleGeneratorFace.py
+++ b/samples/SampleGeneratorFace.py
@ -4,6 +4,7 @@ import random
 import cv2
 import multiprocessing
 from utils import iter_utils
+from facelib import LandmarksProcessor

 from samples import SampleType
 from samples import SampleProcessor
@ -18,11 +19,13 @@ output_sample_types = [
                      ]
 '''
 class SampleGeneratorFace(SampleGeneratorBase):
-    def __init__ (self, samples_path, debug, batch_size, sort_by_yaw=False, sort_by_yaw_target_samples_path=None, with_close_to_self=False, sample_process_options=SampleProcessor.Options(), output_sample_types=[], add_sample_idx=False, generators_count=2, **kwargs):
+    def __init__ (self, samples_path, debug, batch_size, sort_by_yaw=False, sort_by_yaw_target_samples_path=None, with_close_to_self=False, sample_process_options=SampleProcessor.Options(), output_sample_types=[], add_sample_idx=False, add_pitch=False, add_yaw=False, generators_count=2, **kwargs):
        super().__init__(samples_path, debug, batch_size)
        self.sample_process_options = sample_process_options
        self.output_sample_types = output_sample_types
        self.add_sample_idx = add_sample_idx
+        self.add_pitch = add_pitch
+        self.add_yaw = add_yaw
               
        if sort_by_yaw_target_samples_path is not None:
            self.sample_type = SampleType.FACE_YAW_SORTED_AS_TARGET
@ -136,12 +139,28 @@ class SampleGeneratorFace(SampleGeneratorBase):
                            batches = [ [] for _ in range(len(x)) ]
                            if self.add_sample_idx:
                                batches += [ [] ]
+                                i_sample_idx = len(batches)-1
+                            if self.add_pitch:
+                                batches += [ [] ]
+                                i_pitch = len(batches)-1
+                            if self.add_yaw:
+                                batches += [ [] ]    
+                                i_yaw = len(batches)-1
                                
                        for i in range(len(x)):
                            batches[i].append ( x[i] )

                        if self.add_sample_idx:
-                            batches[-1].append (idx)
+                            batches[i_sample_idx].append (idx)
+                        
+                        if self.add_pitch or self.add_yaw:
+                            pitch, yaw = LandmarksProcessor.estimate_pitch_yaw (sample.landmarks)
+                            
+                        if self.add_pitch:
+                            batches[i_pitch].append (pitch)
+                            
+                        if self.add_yaw:
+                            batches[i_yaw].append (yaw)
                            
                        break
            yield [ np.array(batch) for batch in batches]
--- a/samples/SampleLoader.py
+++ b/samples/SampleLoader.py
@ -68,11 +68,14 @@ class SampleLoader:
                    print ("%s is not a dfl image file required for training" % (s_filename_path.name) ) 
                    continue
                    
+                pitch, yaw = LandmarksProcessor.estimate_pitch_yaw ( dflimg.get_landmarks() )
+
                sample_list.append( s.copy_and_set(sample_type=SampleType.FACE,
                                                   face_type=FaceType.fromString (dflimg.get_face_type()),
                                                   shape=dflimg.get_shape(), 
                                                   landmarks=dflimg.get_landmarks(),
-                                                   yaw=dflimg.get_yaw_value()) )
+                                                   pitch=pitch,
+                                                   yaw=yaw) )
            except:
                print ("Unable to load %s , error: %s" % (str(s_filename_path), traceback.format_exc() ) )
                
@ -114,7 +117,7 @@ class SampleLoader:
    @staticmethod
    def upgradeToFaceYawSortedSamples( samples ):

-        lowest_yaw, highest_yaw = -256, +256      
+        lowest_yaw, highest_yaw = -1.0, 1.0
        gradations = 64
        diff_rot_per_grad = abs(highest_yaw-lowest_yaw) / gradations

--- a/utils/DFLJPG.py
+++ b/utils/DFLJPG.py
@ -152,8 +152,6 @@ class DFLJPG(object):
    @staticmethod
    def embed_data(filename, face_type=None,
                             landmarks=None,
-                             yaw_value=None,
-                             pitch_value=None,
                             source_filename=None,
                             source_rect=None,
                             source_landmarks=None
@ -163,8 +161,6 @@ class DFLJPG(object):
        inst.setDFLDictData ({
                                'face_type': face_type,
                                'landmarks': landmarks,
-                                'yaw_value': yaw_value,
-                                'pitch_value': pitch_value,
                                'source_filename': source_filename,
                                'source_rect': source_rect,
                                'source_landmarks': source_landmarks
@ -226,8 +222,6 @@ class DFLJPG(object):
       
    def get_face_type(self): return self.dfl_dict['face_type']
    def get_landmarks(self): return np.array ( self.dfl_dict['landmarks'] )   
-    def get_yaw_value(self): return self.dfl_dict['yaw_value']
-    def get_pitch_value(self): return self.dfl_dict['pitch_value']        
    def get_source_filename(self): return self.dfl_dict['source_filename']        
    def get_source_rect(self): return self.dfl_dict['source_rect']        
    def get_source_landmarks(self): return np.array ( self.dfl_dict['source_landmarks'] )
--- a/utils/DFLPNG.py
+++ b/utils/DFLPNG.py
@ -267,8 +267,6 @@ class DFLPNG(object):
    @staticmethod
    def embed_data(filename, face_type=None,
                             landmarks=None,
-                             yaw_value=None,
-                             pitch_value=None,
                             source_filename=None,
                             source_rect=None,
                             source_landmarks=None
@ -278,8 +276,6 @@ class DFLPNG(object):
        inst.setDFLDictData ({
                                'face_type': face_type,
                                'landmarks': landmarks,
-                                'yaw_value': yaw_value,
-                                'pitch_value': pitch_value,
                                'source_filename': source_filename,
                                'source_rect': source_rect,
                                'source_landmarks': source_landmarks
@ -334,12 +330,6 @@ class DFLPNG(object):
    def get_landmarks(self):                   
        return np.array ( self.fcwp_dict['landmarks'] )

-    def get_yaw_value(self):                   
-        return self.fcwp_dict['yaw_value']
-        
-    def get_pitch_value(self):                   
-        return self.fcwp_dict['pitch_value']    
-        
    def get_source_filename(self):                   
        return self.fcwp_dict['source_filename']