Added interactive converter.

With interactive converter you can change any parameter of any frame and see the result in real time. Converter: added motion_blur_power param. Motion blur is applied by precomputed motion vectors. So the moving face will look more realistic. RecycleGAN model is removed. Added experimental AVATAR model. Minimum required VRAM is 6GB (NVIDIA), 12GB (AMD) Usage: 1) place data_src.mp4 10-20min square resolution video of news reporter sitting at the table with static background, other faces should not appear in frames. 2) process "extract images from video data_src.bat" with FULL fps 3) place data_dst.mp4 video of face who will control the src face 4) process "extract images from video data_dst FULL FPS.bat" 5) process "data_src mark faces S3FD best GPU.bat" 6) process "data_dst extract unaligned faces S3FD best GPU.bat" 7) train AVATAR.bat stage 1, tune batch size to maximum for your card (32 for 6GB), train to 50k+ iters. 8) train AVATAR.bat stage 2, tune batch size to maximum for your card (4 for 6GB), train to decent sharpness. 9) convert AVATAR.bat 10) converted to mp4.bat updated versions of modules
2025-08-20 13:33:24 -07:00 · 2019-08-24 12:57:29 +04:00 · 2019-08-24 12:57:29 +04:00 · 407ce3b1ca
commit 407ce3b1ca
parent 3f0bf2e994
46 changed files with 2394 additions and 1659 deletions
--- a/imagelib/init.py
+++ b/imagelib/init.py
@ -1,26 +1,21 @@
 from .estimate_sharpness import estimate_sharpness
 from .equalize_and_stack_square import equalize_and_stack_square

-from .text import get_text_image
-from .text import get_draw_text_lines
+from .text import get_text_image, get_draw_text_lines

-from .draw import draw_polygon
-from .draw import draw_rect
+from .draw import draw_polygon, draw_rect

 from .morph import morph_by_points

-from .warp import gen_warp_params
-from .warp import warp_by_params
+from .warp import gen_warp_params, warp_by_params

 from .reduce_colors import reduce_colors

-from .color_transfer import color_hist_match
-from .color_transfer import reinhard_color_transfer
-from .color_transfer import linear_color_transfer
+from .color_transfer import color_hist_match, reinhard_color_transfer, linear_color_transfer

 from .DCSCN import DCSCN

-from .common import normalize_channels
+from .common import normalize_channels, overlay_alpha_image

 from .IEPolys import IEPolys

--- a/imagelib/blur.py
+++ b/imagelib/blur.py
@ -1,143 +1,9 @@
-import math
+import cv2
 import numpy as np
-from PIL import Image
-from scipy.signal import convolve2d
-from skimage.draw import line

-class LineDictionary:
-    def __init__(self):
-        self.lines = {}
-        self.Create3x3Lines()
-        self.Create5x5Lines()
-        self.Create7x7Lines()
-        self.Create9x9Lines()
-        return
-    
-    def Create3x3Lines(self):
-        lines = {}
-        lines[0] = [1,0,1,2]
-        lines[45] = [2,0,0,2]
-        lines[90] = [0,1,2,1]
-        lines[135] = [0,0,2,2]
-        self.lines[3] = lines
-        return
-    
-    def Create5x5Lines(self):
-        lines = {}        
-        lines[0] = [2,0,2,4]
-        lines[22.5] = [3,0,1,4]
-        lines[45] = [0,4,4,0]
-        lines[67.5] = [0,3,4,1]
-        lines[90] = [0,2,4,2]
-        lines[112.5] = [0,1,4,3]
-        lines[135] = [0,0,4,4]
-        lines[157.5]= [1,0,3,4]
-        self.lines[5] = lines
-        return
-        
-    def Create7x7Lines(self):
-        lines = {}
-        lines[0] = [3,0,3,6]
-        lines[15] = [4,0,2,6]
-        lines[30] = [5,0,1,6]
-        lines[45] = [6,0,0,6]
-        lines[60] = [6,1,0,5]
-        lines[75] = [6,2,0,4]
-        lines[90] = [0,3,6,3]
-        lines[105] = [0,2,6,4]
-        lines[120] = [0,1,6,5]
-        lines[135] = [0,0,6,6]
-        lines[150] = [1,0,5,6]
-        lines[165] = [2,0,4,6]
-        self.lines[7] = lines 
-        return
-    
-    def Create9x9Lines(self):
-        lines = {}
-        lines[0] = [4,0,4,8]
-        lines[11.25] = [5,0,3,8]
-        lines[22.5] = [6,0,2,8]
-        lines[33.75] = [7,0,1,8]
-        lines[45] = [8,0,0,8]
-        lines[56.25] = [8,1,0,7]
-        lines[67.5] = [8,2,0,6]
-        lines[78.75] = [8,3,0,5]
-        lines[90] = [8,4,0,4]
-        lines[101.25] = [0,3,8,5]
-        lines[112.5] = [0,2,8,6]
-        lines[123.75] = [0,1,8,7]
-        lines[135] = [0,0,8,8]
-        lines[146.25] = [1,0,7,8]
-        lines[157.5] = [2,0,6,8]
-        lines[168.75] = [3,0,5,8]
-        self.lines[9] = lines
-        return
-        
-lineLengths =[3,5,7,9]
-lineTypes = ["full", "right", "left"]
-
-lineDict = LineDictionary()
-
-def LinearMotionBlur_random(img):
-    lineLengthIdx = np.random.randint(0, len(lineLengths))
-    lineTypeIdx = np.random.randint(0, len(lineTypes)) 
-    lineLength = lineLengths[lineLengthIdx]
-    lineType = lineTypes[lineTypeIdx]
-    lineAngle = randomAngle(lineLength)
-    return LinearMotionBlur(img, lineLength, lineAngle, lineType)
-
-def LinearMotionBlur(img, dim, angle, linetype='full'):
-    if len(img.shape) == 2:
-        h, w = img.shape
-        c = 1
-        img = img[...,np.newaxis]
-    elif len(img.shape) == 3:
-        h,w,c = img.shape
-    else:
-        raise ValueError('unsupported img.shape')
-    
-    kernel = LineKernel(dim, angle, linetype)
-    
-    imgs = []    
-    for i in range(c):
-        imgs.append ( convolve2d(img[...,i], kernel, mode='same') )
-    
-    img = np.stack(imgs, axis=-1)
-    img = np.squeeze(img) 
-    return img
-
-def LineKernel(dim, angle, linetype):
-    kernelwidth = dim
-    kernelCenter = int(math.floor(dim/2))
-    angle = SanitizeAngleValue(kernelCenter, angle)
-    kernel = np.zeros((kernelwidth, kernelwidth), dtype=np.float32)
-    lineAnchors = lineDict.lines[dim][angle]
-    if(linetype == 'right'):
-        lineAnchors[0] = kernelCenter
-        lineAnchors[1] = kernelCenter
-    if(linetype == 'left'):
-        lineAnchors[2] = kernelCenter
-        lineAnchors[3] = kernelCenter
-    rr,cc = line(lineAnchors[0], lineAnchors[1], lineAnchors[2], lineAnchors[3])
-    kernel[rr,cc]=1
-    normalizationFactor = np.count_nonzero(kernel)
-    kernel = kernel / normalizationFactor        
-    return kernel
-
-def SanitizeAngleValue(kernelCenter, angle):
-    numDistinctLines = kernelCenter * 4
-    angle = math.fmod(angle, 180.0)
-    validLineAngles = np.linspace(0,180, numDistinctLines, endpoint = False)
-    angle = nearestValue(angle, validLineAngles)
-    return angle
-
-def nearestValue(theta, validAngles):
-    idx = (np.abs(validAngles-theta)).argmin()
-    return validAngles[idx]
-
-def randomAngle(kerneldim):
-    kernelCenter = int(math.floor(kerneldim/2))
-    numDistinctLines = kernelCenter * 4
-    validLineAngles = np.linspace(0,180, numDistinctLines, endpoint = False)
-    angleIdx = np.random.randint(0, len(validLineAngles))
-    return int(validLineAngles[angleIdx])
+def LinearMotionBlur(image, size, angle):
+    k = np.zeros((size, size), dtype=np.float32)
+    k[ (size-1)// 2 , :] = np.ones(size, dtype=np.float32)
+    k = cv2.warpAffine(k, cv2.getRotationMatrix2D( (size / 2 -0.5 , size / 2 -0.5 ) , angle, 1.0), (size, size) )
+    k = k * ( 1.0 / np.sum(k) )
+    return cv2.filter2D(image, -1, k)
--- a/imagelib/common.py
+++ b/imagelib/common.py
@ -9,13 +9,28 @@ def normalize_channels(img, target_channels):
        h, w, c = img.shape
    else:
        raise ValueError("normalize: incorrect image dimensions.")
-        
+
    if c == 0 and target_channels > 0:
-        img = img[...,np.newaxis]        
+        img = img[...,np.newaxis]
    if c == 1 and target_channels > 1:
-        img = np.repeat (img, target_channels, -1)   
-    if c > target_channels:         
+        img = np.repeat (img, target_channels, -1)
+    if c > target_channels:
        img = img[...,0:target_channels]
        c = target_channels
-        
-    return img
+
+    return img
+
+def overlay_alpha_image(img_target, img_source, xy_offset=(0,0) ):
+    (h,w,c) = img_source.shape
+    if c != 4:
+        raise ValueError("overlay_alpha_image, img_source must have 4 channels")
+
+    x1, x2 = xy_offset[0], xy_offset[0] + w
+    y1, y2 = xy_offset[1], xy_offset[1] + h
+
+    alpha_s = img_source[:, :, 3] / 255.0
+    alpha_l = 1.0 - alpha_s
+
+    for c in range(0, 3):
+        img_target[y1:y2, x1:x2, c] = (alpha_s * img_source[:, :, c] +
+                                        alpha_l * img_target[y1:y2, x1:x2, c])