From 9926dc626a7ea63a0b546caed1e009b3447d0d91 Mon Sep 17 00:00:00 2001
From: iperov <lepersorium@gmail.com>
Date: Thu, 20 Dec 2018 12:43:00 +0400
Subject: [PATCH] refactorings, improved sort by hist-dissim

---
 main.py                             |   2 +-
 mainscripts/Converter.py            |  22 ++----
 mainscripts/Extractor.py            |  25 +++----
 mainscripts/Sorter.py               | 102 ++++++++++------------------
 models/TrainingDataGeneratorBase.py |  19 ++----
 utils/{AlignedPNG.py => DFLPNG.py}  |  85 ++++++++++++++++++-----
 6 files changed, 128 insertions(+), 127 deletions(-)
 rename utils/{AlignedPNG.py => DFLPNG.py} (76%)

diff --git a/main.py b/main.py
index 37dfe6b..fcbc024 100644
--- a/main.py
+++ b/main.py
@@ -58,7 +58,7 @@ if __name__ == "__main__":
         
     sort_parser = subparsers.add_parser( "sort", help="Sort faces in a directory.")     
     sort_parser.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.")
-    sort_parser.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "hist", "hist-dissim", "hist-blur", "ssim", "brightness", "hue", "black", "origname"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." )
+    sort_parser.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "hist", "hist-dissim", "hist-blur", "brightness", "hue", "black", "origname"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." )
     sort_parser.set_defaults (func=process_sort)
     
     def process_train(arguments):      
diff --git a/mainscripts/Converter.py b/mainscripts/Converter.py
index 9bd092b..b71027f 100644
--- a/mainscripts/Converter.py
+++ b/mainscripts/Converter.py
@@ -3,7 +3,7 @@ from pathlib import Path
 from utils import Path_utils
 import cv2
 from tqdm import tqdm
-from utils.AlignedPNG import AlignedPNG
+from utils.DFLPNG import DFLPNG
 from utils import image_utils
 import shutil
 import numpy as np
@@ -156,12 +156,7 @@ class ConvertSubprocessor(SubprocessorBase):
             image = (cv2.imread(str(filename_path)) / 255.0).astype(np.float32)
 
             if self.converter.get_mode() == ConverterBase.MODE_IMAGE:
-                image_landmarks = None
-                a_png = AlignedPNG.load( str(filename_path) )
-                if a_png is not None:                 
-                    d = a_png.getFaceswapDictData()
-                    if d is not None and 'landmarks' in d.keys():
-                        image_landmarks = np.array(d['landmarks'])
+                image_landmarks = DFLPNG.load( str(filename_path), throw_on_no_embedded_data=True ).get_landmarks()
                         
                 image = self.converter.convert_image(image, image_landmarks, self.debug)
                 if self.debug:
@@ -258,20 +253,15 @@ def main (input_dir, output_dir, model_dir, model_name, aligned_dir=None, **in_o
             
             aligned_path_image_paths = Path_utils.get_image_paths(aligned_path)
             for filename in tqdm(aligned_path_image_paths, desc= "Collecting alignments" ):
-                a_png = AlignedPNG.load( str(filename) )
-                if a_png is None:
-                    print ( "%s - no embedded data found." % (filename) )
-                    continue
-                d = a_png.getFaceswapDictData()
-                if d is None or d['source_filename'] is None or d['source_rect'] is None or d['source_landmarks'] is None:
-                    print ( "%s - no embedded data found." % (filename) )
+                dflpng = DFLPNG.load( str(filename), print_on_no_embedded_data=True )                
+                if dflpng is None:
                     continue
                 
-                source_filename_stem = Path(d['source_filename']).stem
+                source_filename_stem = Path( dflpng.get_source_filename() ).stem
                 if source_filename_stem not in alignments.keys():
                     alignments[ source_filename_stem ] = []
 
-                alignments[ source_filename_stem ].append ( np.array(d['source_landmarks']) )
+                alignments[ source_filename_stem ].append (dflpng.get_source_landmarks())
         
         
         files_processed, faces_processed = ConvertSubprocessor ( 
diff --git a/mainscripts/Extractor.py b/mainscripts/Extractor.py
index e2d0f07..ed03280 100644
--- a/mainscripts/Extractor.py
+++ b/mainscripts/Extractor.py
@@ -8,7 +8,7 @@ from pathlib import Path
 import numpy as np
 import cv2
 from utils import Path_utils
-from utils.AlignedPNG import AlignedPNG
+from utils.DFLPNG import DFLPNG
 from utils import image_utils
 from facelib import FaceType
 import facelib 
@@ -313,20 +313,15 @@ class ExtractSubprocessor(SubprocessorBase):
                         face_image_landmarks = facelib.LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat)
                     
                     cv2.imwrite(output_file, face_image)
-                    
-                    a_png = AlignedPNG.load (output_file)
-                    
-                    d = {
-                      'face_type': FaceType.toString(self.face_type),
-                      'landmarks': face_image_landmarks.tolist(),
-                      'yaw_value': facelib.LandmarksProcessor.calc_face_yaw (face_image_landmarks),
-                      'pitch_value': facelib.LandmarksProcessor.calc_face_pitch (face_image_landmarks),
-                      'source_filename': filename_path.name,
-                      'source_rect': rect,
-                      'source_landmarks': image_landmarks.tolist()
-                    }
-                    a_png.setFaceswapDictData (d)
-                    a_png.save(output_file)  
+
+                    DFLPNG.embed_data(output_file, face_type = FaceType.toString(self.face_type),
+                                                   landmarks = face_image_landmarks.tolist(),
+                                                   yaw_value = facelib.LandmarksProcessor.calc_face_yaw (face_image_landmarks),
+                                                   pitch_value = facelib.LandmarksProcessor.calc_face_pitch (face_image_landmarks),
+                                                   source_filename = filename_path.name,
+                                                   source_rect=  rect,
+                                                   source_landmarks = image_landmarks.tolist()
+                                        )  
                         
                     result.append (output_file)
                     
diff --git a/mainscripts/Sorter.py b/mainscripts/Sorter.py
index 3c17299..514be2a 100644
--- a/mainscripts/Sorter.py
+++ b/mainscripts/Sorter.py
@@ -8,7 +8,8 @@ from shutil import copyfile
 
 from pathlib import Path
 from utils import Path_utils
-from utils.AlignedPNG import AlignedPNG
+from utils import image_utils
+from utils.DFLPNG import DFLPNG
 from facelib import LandmarksProcessor
 from utils.SubprocessorBase import SubprocessorBase
 import multiprocessing
@@ -86,22 +87,16 @@ class BlurEstimatorSubprocessor(SubprocessorBase):
     #override
     def onClientProcessData(self, data):
         filename_path = Path( data[0] )
-        image = cv2.imread( str(filename_path) )
-        face_mask = None        
-        
-        a_png = AlignedPNG.load( str(filename_path) )        
-        if a_png is not None:
-            d = a_png.getFaceswapDictData()
-            if (d is not None) and (d['landmarks'] is not None):            
-                face_mask = LandmarksProcessor.get_image_hull_mask (image, np.array(d['landmarks']))
-        
-        if face_mask is not None:
-            image = (image*face_mask).astype(np.uint8)
+
+        dflpng = DFLPNG.load( str(filename_path), print_on_no_embedded_data=True )        
+        if dflpng is not None:
+            image = cv2.imread( str(filename_path) )
+            image = ( image * \
+                      LandmarksProcessor.get_image_hull_mask (image, dflpng.get_landmarks()) \
+                     ).astype(np.uint8)
+            return [ str(filename_path), estimate_sharpness( image ) ]
         else:
-            print ( "%s - no embedded data found." % (str(filename_path)) ) 
             return [ str(filename_path), 0 ]
-        
-        return [ str(filename_path), estimate_sharpness( image ) ]
 
     #override
     def onClientGetDataName (self, data):
@@ -164,18 +159,11 @@ def sort_by_face(input_path):
             print ("%s is not a png file required for sort_by_face" % (filepath.name) ) 
             continue
         
-        a_png = AlignedPNG.load (str(filepath))
-        if a_png is None:
-            print ("%s failed to load" % (filepath.name) ) 
-            continue
-            
-        d = a_png.getFaceswapDictData()
-        
-        if d is None or d['landmarks'] is None:          
-            print ("%s - no embedded data found required for sort_by_face" % (filepath.name) )
+        dflpng = DFLPNG.load (str(filepath), print_on_no_embedded_data=True)
+        if dflpng is None:
             continue
         
-        img_list.append( [str(filepath), np.array(d['landmarks']) ] )
+        img_list.append( [str(filepath), dflpng.get_landmarks()] )
         
 
     img_list_len = len(img_list)
@@ -207,18 +195,11 @@ def sort_by_face_dissim(input_path):
             print ("%s is not a png file required for sort_by_face_dissim" % (filepath.name) ) 
             continue
         
-        a_png = AlignedPNG.load (str(filepath))
-        if a_png is None:
-            print ("%s failed to load" % (filepath.name) ) 
-            continue
-            
-        d = a_png.getFaceswapDictData()
-        
-        if d is None or d['landmarks'] is None:          
-            print ("%s - no embedded data found required for sort_by_face_dissim" % (filepath.name) )
+        dflpng = DFLPNG.load (str(filepath), print_on_no_embedded_data=True)
+        if dflpng is None:
             continue
         
-        img_list.append( [str(filepath), np.array(d['landmarks']), 0 ] )
+        img_list.append( [str(filepath), dflpng.get_landmarks(), 0 ] )
         
     img_list_len = len(img_list)
     for i in tqdm( range(0, img_list_len-1), desc="Sorting"):
@@ -247,18 +228,11 @@ def sort_by_face_yaw(input_path):
             print ("%s is not a png file required for sort_by_face_dissim" % (filepath.name) ) 
             continue
         
-        a_png = AlignedPNG.load (str(filepath))
-        if a_png is None:
-            print ("%s failed to load" % (filepath.name) ) 
-            continue
-            
-        d = a_png.getFaceswapDictData()
-        
-        if d is None or d['yaw_value'] is None:          
-            print ("%s - no embedded data found required for sort_by_face_dissim" % (filepath.name) )
+        dflpng = DFLPNG.load (str(filepath), print_on_no_embedded_data=True)
+        if dflpng is None:
             continue
         
-        img_list.append( [str(filepath), np.array(d['yaw_value']) ] )
+        img_list.append( [str(filepath), np.array( dflpng.get_yaw_value() ) ] )
 
     print ("Sorting...")
     img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True)
@@ -423,9 +397,7 @@ class HistDissimSubprocessor(SubprocessorBase):
         for j in range( 0, self.img_list_len):
             if i == j:
                 continue
-            score_total += cv2.compareHist(self.img_list[i][1], self.img_list[j][1], cv2.HISTCMP_BHATTACHARYYA) + \
-                           cv2.compareHist(self.img_list[i][2], self.img_list[j][2], cv2.HISTCMP_BHATTACHARYYA) + \
-                           cv2.compareHist(self.img_list[i][3], self.img_list[j][3], cv2.HISTCMP_BHATTACHARYYA)
+            score_total += cv2.compareHist(self.img_list[i][1], self.img_list[j][1], cv2.HISTCMP_BHATTACHARYYA)
 
         return score_total
 
@@ -436,7 +408,7 @@ class HistDissimSubprocessor(SubprocessorBase):
         
     #override
     def onHostResult (self, data, result):
-        self.img_list[data[0]][4] = result
+        self.img_list[data[0]][2] = result
         return 1
     
     #override    
@@ -451,17 +423,20 @@ def sort_by_hist_dissim(input_path):
     print ("Sorting by histogram dissimilarity...")
 
     img_list = []
-    for x in tqdm( Path_utils.get_image_paths(input_path), desc="Loading"):
-        img = cv2.imread(x)    
-        img_list.append ([x, cv2.calcHist([img], [0], None, [256], [0, 256]),
-                             cv2.calcHist([img], [1], None, [256], [0, 256]),
-                             cv2.calcHist([img], [2], None, [256], [0, 256]), 0
-                         ])
+    for filename_path in tqdm( Path_utils.get_image_paths(input_path), desc="Loading"):
+        image = cv2.imread(filename_path)
+        
+        dflpng = DFLPNG.load( str(filename_path), print_on_no_embedded_data=True )        
+        if dflpng is not None:        
+            face_mask = LandmarksProcessor.get_image_hull_mask (image, dflpng.get_landmarks())
+            image = (image*face_mask).astype(np.uint8)
+
+        img_list.append ([filename_path, cv2.calcHist([cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)], [0], None, [256], [0, 256]), 0 ])
 
     img_list = HistDissimSubprocessor(img_list).process()
                          
     print ("Sorting...")
-    img_list = sorted(img_list, key=operator.itemgetter(4), reverse=True)
+    img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True)
 
     return img_list
     
@@ -508,18 +483,11 @@ def sort_by_origname(input_path):
             print ("%s is not a png file required for sort_by_origname" % (filepath.name) ) 
             continue
         
-        a_png = AlignedPNG.load (str(filepath))
-        if a_png is None:
-            print ("%s failed to load" % (filepath.name) ) 
-            continue
-            
-        d = a_png.getFaceswapDictData()
-        
-        if d is None or d['source_filename'] is None:          
-            print ("%s - no embedded data found required for sort_by_origname" % (filepath.name) )
+        dflpng = DFLPNG.load (str(filepath), print_on_no_embedded_data=True)
+        if dflpng is None:
             continue
 
-        img_list.append( [str(filepath), d['source_filename']] )
+        img_list.append( [str(filepath), dflpng.get_source_filename()] )
 
     print ("Sorting...")
     img_list = sorted(img_list, key=operator.itemgetter(1))
@@ -545,4 +513,4 @@ def main (input_path, sort_by_method):
     elif sort_by_method == 'black':         img_list = sort_by_black (input_path)    
     elif sort_by_method == 'origname':      img_list = sort_by_origname (input_path)       
     
-    final_rename (input_path, img_list)
\ No newline at end of file
+    final_rename (input_path, img_list)
diff --git a/models/TrainingDataGeneratorBase.py b/models/TrainingDataGeneratorBase.py
index f6a1df0..2f78685 100644
--- a/models/TrainingDataGeneratorBase.py
+++ b/models/TrainingDataGeneratorBase.py
@@ -4,7 +4,7 @@ from pathlib import Path
 from tqdm import tqdm
 import numpy as np
 import cv2
-from utils.AlignedPNG import AlignedPNG
+from utils.DFLPNG import DFLPNG
 from utils import iter_utils
 from utils import Path_utils
 from .BaseTypes import TrainingDataType
@@ -177,19 +177,14 @@ def X_LOAD ( RAWS ):
             print ("%s is not a png file required for training" % (s_filename_path.name) ) 
             continue
         
-        a_png = AlignedPNG.load ( str(s_filename_path) )
-        if a_png is None:
-            print ("%s failed to load" % (s_filename_path.name) )
+        dflpng = DFLPNG.load ( str(s_filename_path), print_on_no_embedded_data=True )
+        if dflpng is None:
             continue
 
-        d = a_png.getFaceswapDictData()
-        if d is None or d['landmarks'] is None or d['yaw_value'] is None:
-            print ("%s - no embedded faceswap info found required for training" % (s_filename_path.name) ) 
-            continue
-            
-        face_type = d['face_type'] if 'face_type' in d.keys() else 'full_face'        
-        face_type = FaceType.fromString (face_type) 
-        sample_list.append( s.copy_and_set(face_type=face_type, shape=a_png.get_shape(), landmarks=d['landmarks'], yaw=d['yaw_value']) )
+        sample_list.append( s.copy_and_set(face_type=FaceType.fromString (dflpng.get_face_type()),
+                                            shape=dflpng.get_shape(), 
+                                            landmarks=dflpng.get_landmarks(),
+                                            yaw=dflpng.get_yaw_value()) )
         
     return sample_list
     
diff --git a/utils/AlignedPNG.py b/utils/DFLPNG.py
similarity index 76%
rename from utils/AlignedPNG.py
rename to utils/DFLPNG.py
index 6167cab..48ecf41 100644
--- a/utils/AlignedPNG.py
+++ b/utils/DFLPNG.py
@@ -4,6 +4,7 @@ import string
 import struct
 import zlib
 import pickle
+import numpy as np
 
 class Chunk(object):
     def __init__(self, name=None, data=None):
@@ -184,7 +185,7 @@ class IEND(Chunk):
     def __str__(self):
         return "<Chunk:IEND>".format(**self.__dict__)
 
-class FaceswapChunk(Chunk):
+class DFLChunk(Chunk):
     def __init__(self, dict_data=None):
         super().__init__("fcWp")
         self.dict_data = dict_data       
@@ -207,26 +208,26 @@ class FaceswapChunk(Chunk):
         
 chunk_map = {
     b"IHDR": IHDR,
-    b"fcWp": FaceswapChunk,
+    b"fcWp": DFLChunk,
     b"IEND": IEND
 }
 
-class AlignedPNG(object):
+class DFLPNG(object):
     def __init__(self):
         self.data = b""
         self.length = 0
         self.chunks = []
-
+        self.fcwp_dict = None
+        
     @staticmethod
-    def load(data):
-
+    def load_raw(filename):
         try:
-            with open(data, "rb") as f:
+            with open(filename, "rb") as f:
                 data = f.read()
         except:
             raise FileNotFoundError(data)
     
-        inst = AlignedPNG()
+        inst = DFLPNG()
         inst.data = data
         inst.length = len(data)
         
@@ -242,14 +243,47 @@ class AlignedPNG(object):
             chunk = chunk_map.get(chunk_name, Chunk).load(data[chunk_start:chunk_end])
             inst.chunks.append(chunk)
             chunk_start = chunk_end
-
+        
         return inst
         
+    @staticmethod
+    def load(filename, print_on_no_embedded_data=False, throw_on_no_embedded_data=False):
+        inst = DFLPNG.load_raw (filename)
+        inst.fcwp_dict = inst.getDFLDictData()
         
-    def save(self, filename):
+        if inst.fcwp_dict == None:
+            if print_on_no_embedded_data:
+                print ( "No DFL data found in %s" % (filename) )
+            if throw_on_no_embedded_data:
+                raise ValueError("No DFL data found in %s" % (filename) )
+            return None
+        
+        return inst
+        
+    @staticmethod
+    def embed_data(filename, face_type=None,
+                             landmarks=None,
+                             yaw_value=None,
+                             pitch_value=None,
+                             source_filename=None,
+                             source_rect=None,
+                             source_landmarks=None
+                   ):
+    
+        inst = DFLPNG.load_raw (filename)
+        inst.setDFLDictData ({
+                                'face_type': face_type,
+                                'landmarks': landmarks,
+                                'yaw_value': yaw_value,
+                                'pitch_value': pitch_value,
+                                'source_filename': source_filename,
+                                'source_rect': source_rect,
+                                'source_landmarks': source_landmarks
+                             })
+    
         try:
             with open(filename, "wb") as f:
-                f.write ( self.dump() )
+                f.write ( inst.dump() )
         except:
             raise Exception( 'cannot save %s' % (filename) )
 
@@ -274,23 +308,42 @@ class AlignedPNG(object):
                 return chunk.height
         return 0
         
-    def getFaceswapDictData(self):        
+    def getDFLDictData(self):        
         for chunk in self.chunks:
-            if type(chunk) == FaceswapChunk:
+            if type(chunk) == DFLChunk:
                 return chunk.getDictData()
         return None
                 
-    def setFaceswapDictData (self, dict_data=None):
+    def setDFLDictData (self, dict_data=None):
         for chunk in self.chunks:
-            if type(chunk) == FaceswapChunk:
+            if type(chunk) == DFLChunk:
                 self.chunks.remove(chunk)
                 break
     
         if not dict_data is None:
-            chunk = FaceswapChunk(dict_data)
+            chunk = DFLChunk(dict_data)
             self.chunks.insert(-1, chunk)
+       
+    def get_face_type(self):                   
+        return self.fcwp_dict['face_type']
         
+    def get_landmarks(self):                   
+        return np.array ( self.fcwp_dict['landmarks'] )
         
+    def get_yaw_value(self):                   
+        return self.fcwp_dict['yaw_value']
         
+    def get_pitch_value(self):                   
+        return self.fcwp_dict['pitch_value']    
+        
+    def get_source_filename(self):                   
+        return self.fcwp_dict['source_filename']    
+        
+    def get_source_rect(self):                   
+        return self.fcwp_dict['source_rect']    
+        
+    def get_source_landmarks(self):                   
+        return np.array ( self.fcwp_dict['source_landmarks'] )
+
     def __str__(self):
         return "<PNG length={length} chunks={}>".format(len(self.chunks), **self.__dict__)