mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-08-14 02:37:00 -07:00
added support of AMD videocards
added Intel's plaidML backend to use OpenCL engine. Check new requirements. smart choosing of backend in device.py env var 'force_plaidML' can be choosed to forced using plaidML all tf functions transferred to pure keras MTCNN transferred to pure keras, but it works slow on plaidML (forced to CPU in this case) default batch size for all models and VRAMs now 4, feel free to adjust it on your own SAE: default style options now ZERO, because there are no best values for all scenes, set them on your own. SAE: return back option pixel_loss, feel free to enable it on your own. SAE: added option multiscale_decoder default is true, but you can disable it to get 100% same as H,DF,LIAEF model behaviour. fix converter output to .png added linux fork reference to doc/doc_build_and_repository_info.md
This commit is contained in:
parent
3a9d450281
commit
72ba6b103c
24 changed files with 2694 additions and 1489 deletions
|
@ -3,15 +3,11 @@ import os
|
|||
import cv2
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from .mtcnn import *
|
||||
from nnlib import nnlib
|
||||
|
||||
class MTCExtractor(object):
|
||||
def __init__(self, keras, tf, tf_session):
|
||||
def __init__(self):
|
||||
self.scale_to = 1920
|
||||
self.keras = keras
|
||||
self.tf = tf
|
||||
self.tf_session = tf_session
|
||||
|
||||
self.min_face_size = self.scale_to * 0.042
|
||||
self.thresh1 = 0.7
|
||||
|
@ -19,25 +15,72 @@ class MTCExtractor(object):
|
|||
self.thresh3 = 0.6
|
||||
self.scale_factor = 0.95
|
||||
|
||||
exec( nnlib.import_all(), locals(), globals() )
|
||||
PNet_Input = Input ( (None, None,3) )
|
||||
x = PNet_Input
|
||||
x = Conv2D (10, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x)
|
||||
x = PReLU (shared_axes=[1,2], name="PReLU1" )(x)
|
||||
x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x)
|
||||
x = Conv2D (16, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x)
|
||||
x = PReLU (shared_axes=[1,2], name="PReLU2" )(x)
|
||||
x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x)
|
||||
x = PReLU (shared_axes=[1,2], name="PReLU3" )(x)
|
||||
prob = Conv2D (2, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv41")(x)
|
||||
prob = Softmax()(prob)
|
||||
x = Conv2D (4, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv42")(x)
|
||||
|
||||
PNet_model = Model(PNet_Input, [x,prob] )
|
||||
PNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_pnet.h5').__str__() )
|
||||
|
||||
RNet_Input = Input ( (24, 24, 3) )
|
||||
x = RNet_Input
|
||||
x = Conv2D (28, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x)
|
||||
x = PReLU (shared_axes=[1,2], name="prelu1" )(x)
|
||||
x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x)
|
||||
x = Conv2D (48, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x)
|
||||
x = PReLU (shared_axes=[1,2], name="prelu2" )(x)
|
||||
x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x)
|
||||
x = Conv2D (64, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv3")(x)
|
||||
x = PReLU (shared_axes=[1,2], name="prelu3" )(x)
|
||||
x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x)
|
||||
x = Dense (128, name='conv4')(x)
|
||||
x = PReLU (name="prelu4" )(x)
|
||||
prob = Dense (2, name='conv51')(x)
|
||||
prob = Softmax()(prob)
|
||||
x = Dense (4, name='conv52')(x)
|
||||
RNet_model = Model(RNet_Input, [x,prob] )
|
||||
RNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_rnet.h5').__str__() )
|
||||
|
||||
ONet_Input = Input ( (48, 48, 3) )
|
||||
x = ONet_Input
|
||||
x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x)
|
||||
x = PReLU (shared_axes=[1,2], name="prelu1" )(x)
|
||||
x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x)
|
||||
x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x)
|
||||
x = PReLU (shared_axes=[1,2], name="prelu2" )(x)
|
||||
x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x)
|
||||
x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x)
|
||||
x = PReLU (shared_axes=[1,2], name="prelu3" )(x)
|
||||
x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x)
|
||||
x = Conv2D (128, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv4")(x)
|
||||
x = PReLU (shared_axes=[1,2], name="prelu4" )(x)
|
||||
x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x)
|
||||
x = Dense (256, name='conv5')(x)
|
||||
x = PReLU (name="prelu5" )(x)
|
||||
prob = Dense (2, name='conv61')(x)
|
||||
prob = Softmax()(prob)
|
||||
x1 = Dense (4, name='conv62')(x)
|
||||
x2 = Dense (10, name='conv63')(x)
|
||||
ONet_model = Model(ONet_Input, [x1,x2,prob] )
|
||||
ONet_model.load_weights ( (Path(__file__).parent / 'mtcnn_onet.h5').__str__() )
|
||||
|
||||
self.pnet_fun = K.function ( PNet_model.inputs, PNet_model.outputs )
|
||||
self.rnet_fun = K.function ( RNet_model.inputs, RNet_model.outputs )
|
||||
self.onet_fun = K.function ( ONet_model.inputs, ONet_model.outputs )
|
||||
|
||||
def __enter__(self):
|
||||
with self.tf.variable_scope('pnet2'):
|
||||
data = self.tf.placeholder(self.tf.float32, (None,None,None,3), 'input')
|
||||
pnet2 = PNet(self.tf, {'data':data})
|
||||
pnet2.load(str(Path(__file__).parent/'det1.npy'), self.tf_session)
|
||||
with self.tf.variable_scope('rnet2'):
|
||||
data = self.tf.placeholder(self.tf.float32, (None,24,24,3), 'input')
|
||||
rnet2 = RNet(self.tf, {'data':data})
|
||||
rnet2.load(str(Path(__file__).parent/'det2.npy'), self.tf_session)
|
||||
with self.tf.variable_scope('onet2'):
|
||||
data = self.tf.placeholder(self.tf.float32, (None,48,48,3), 'input')
|
||||
onet2 = ONet(self.tf, {'data':data})
|
||||
onet2.load(str(Path(__file__).parent/'det3.npy'), self.tf_session)
|
||||
|
||||
self.pnet_fun = self.keras.backend.function([pnet2.layers['data']],[pnet2.layers['conv4-2'], pnet2.layers['prob1']])
|
||||
self.rnet_fun = self.keras.backend.function([rnet2.layers['data']],[rnet2.layers['conv5-2'], rnet2.layers['prob1']])
|
||||
self.onet_fun = self.keras.backend.function([onet2.layers['data']],[onet2.layers['conv6-2'], onet2.layers['conv6-3'], onet2.layers['prob1']])
|
||||
|
||||
faces, pnts = detect_face ( np.zeros ( (self.scale_to, self.scale_to, 3)), self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor )
|
||||
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type=None, exc_value=None, traceback=None):
|
||||
|
@ -47,7 +90,6 @@ class MTCExtractor(object):
|
|||
input_image = input_image[:,:,::-1].copy()
|
||||
(h, w, ch) = input_image.shape
|
||||
|
||||
|
||||
input_scale = self.scale_to / (w if w > h else h)
|
||||
input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
|
@ -56,3 +98,249 @@ class MTCExtractor(object):
|
|||
|
||||
return detected_faces
|
||||
|
||||
def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
|
||||
"""Detects faces in an image, and returns bounding boxes and points for them.
|
||||
img: input image
|
||||
minsize: minimum faces' size
|
||||
pnet, rnet, onet: caffemodel
|
||||
threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
|
||||
factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
|
||||
"""
|
||||
factor_count=0
|
||||
total_boxes=np.empty((0,9))
|
||||
points=np.empty(0)
|
||||
h=img.shape[0]
|
||||
w=img.shape[1]
|
||||
minl=np.amin([h, w])
|
||||
m=12.0/minsize
|
||||
minl=minl*m
|
||||
# create scale pyramid
|
||||
scales=[]
|
||||
while minl>=12:
|
||||
scales += [m*np.power(factor, factor_count)]
|
||||
minl = minl*factor
|
||||
factor_count += 1
|
||||
# first stage
|
||||
for scale in scales:
|
||||
hs=int(np.ceil(h*scale))
|
||||
ws=int(np.ceil(w*scale))
|
||||
#print ('scale %f %d %d' % (scale, ws,hs))
|
||||
im_data = imresample(img, (hs, ws))
|
||||
im_data = (im_data-127.5)*0.0078125
|
||||
img_x = np.expand_dims(im_data, 0)
|
||||
img_y = np.transpose(img_x, (0,2,1,3))
|
||||
out = pnet([img_y])
|
||||
out0 = np.transpose(out[0], (0,2,1,3))
|
||||
out1 = np.transpose(out[1], (0,2,1,3))
|
||||
|
||||
boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
|
||||
|
||||
# inter-scale nms
|
||||
pick = nms(boxes.copy(), 0.5, 'Union')
|
||||
if boxes.size>0 and pick.size>0:
|
||||
boxes = boxes[pick,:]
|
||||
total_boxes = np.append(total_boxes, boxes, axis=0)
|
||||
|
||||
numbox = total_boxes.shape[0]
|
||||
if numbox>0:
|
||||
pick = nms(total_boxes.copy(), 0.7, 'Union')
|
||||
total_boxes = total_boxes[pick,:]
|
||||
regw = total_boxes[:,2]-total_boxes[:,0]
|
||||
regh = total_boxes[:,3]-total_boxes[:,1]
|
||||
qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
|
||||
qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
|
||||
qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
|
||||
qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
|
||||
total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
|
||||
total_boxes = rerec(total_boxes.copy())
|
||||
total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
|
||||
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
|
||||
|
||||
numbox = total_boxes.shape[0]
|
||||
if numbox>0:
|
||||
# second stage
|
||||
tempimg = np.zeros((24,24,3,numbox))
|
||||
for k in range(0,numbox):
|
||||
tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
|
||||
tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
|
||||
if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
|
||||
tempimg[:,:,:,k] = imresample(tmp, (24, 24))
|
||||
else:
|
||||
return np.empty()
|
||||
tempimg = (tempimg-127.5)*0.0078125
|
||||
tempimg1 = np.transpose(tempimg, (3,1,0,2))
|
||||
out = rnet([tempimg1])
|
||||
out0 = np.transpose(out[0])
|
||||
out1 = np.transpose(out[1])
|
||||
score = out1[1,:]
|
||||
ipass = np.where(score>threshold[1])
|
||||
total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
|
||||
mv = out0[:,ipass[0]]
|
||||
if total_boxes.shape[0]>0:
|
||||
pick = nms(total_boxes, 0.7, 'Union')
|
||||
total_boxes = total_boxes[pick,:]
|
||||
total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
|
||||
total_boxes = rerec(total_boxes.copy())
|
||||
|
||||
numbox = total_boxes.shape[0]
|
||||
if numbox>0:
|
||||
# third stage
|
||||
total_boxes = np.fix(total_boxes).astype(np.int32)
|
||||
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
|
||||
tempimg = np.zeros((48,48,3,numbox))
|
||||
for k in range(0,numbox):
|
||||
tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
|
||||
tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
|
||||
if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
|
||||
tempimg[:,:,:,k] = imresample(tmp, (48, 48))
|
||||
else:
|
||||
return np.empty()
|
||||
tempimg = (tempimg-127.5)*0.0078125
|
||||
tempimg1 = np.transpose(tempimg, (3,1,0,2))
|
||||
out = onet([tempimg1])
|
||||
out0 = np.transpose(out[0])
|
||||
out1 = np.transpose(out[1])
|
||||
out2 = np.transpose(out[2])
|
||||
score = out2[1,:]
|
||||
points = out1
|
||||
ipass = np.where(score>threshold[2])
|
||||
points = points[:,ipass[0]]
|
||||
total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
|
||||
mv = out0[:,ipass[0]]
|
||||
|
||||
w = total_boxes[:,2]-total_boxes[:,0]+1
|
||||
h = total_boxes[:,3]-total_boxes[:,1]+1
|
||||
points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
|
||||
points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
|
||||
if total_boxes.shape[0]>0:
|
||||
total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
|
||||
pick = nms(total_boxes.copy(), 0.7, 'Min')
|
||||
total_boxes = total_boxes[pick,:]
|
||||
points = points[:,pick]
|
||||
|
||||
return total_boxes, points
|
||||
|
||||
|
||||
# function [boundingbox] = bbreg(boundingbox,reg)
|
||||
def bbreg(boundingbox,reg):
|
||||
"""Calibrate bounding boxes"""
|
||||
if reg.shape[1]==1:
|
||||
reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
|
||||
|
||||
w = boundingbox[:,2]-boundingbox[:,0]+1
|
||||
h = boundingbox[:,3]-boundingbox[:,1]+1
|
||||
b1 = boundingbox[:,0]+reg[:,0]*w
|
||||
b2 = boundingbox[:,1]+reg[:,1]*h
|
||||
b3 = boundingbox[:,2]+reg[:,2]*w
|
||||
b4 = boundingbox[:,3]+reg[:,3]*h
|
||||
boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
|
||||
return boundingbox
|
||||
|
||||
def generateBoundingBox(imap, reg, scale, t):
|
||||
"""Use heatmap to generate bounding boxes"""
|
||||
stride=2
|
||||
cellsize=12
|
||||
|
||||
imap = np.transpose(imap)
|
||||
dx1 = np.transpose(reg[:,:,0])
|
||||
dy1 = np.transpose(reg[:,:,1])
|
||||
dx2 = np.transpose(reg[:,:,2])
|
||||
dy2 = np.transpose(reg[:,:,3])
|
||||
y, x = np.where(imap >= t)
|
||||
if y.shape[0]==1:
|
||||
dx1 = np.flipud(dx1)
|
||||
dy1 = np.flipud(dy1)
|
||||
dx2 = np.flipud(dx2)
|
||||
dy2 = np.flipud(dy2)
|
||||
score = imap[(y,x)]
|
||||
reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
|
||||
if reg.size==0:
|
||||
reg = np.empty((0,3))
|
||||
bb = np.transpose(np.vstack([y,x]))
|
||||
q1 = np.fix((stride*bb+1)/scale)
|
||||
q2 = np.fix((stride*bb+cellsize-1+1)/scale)
|
||||
boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
|
||||
return boundingbox, reg
|
||||
|
||||
# function pick = nms(boxes,threshold,type)
|
||||
def nms(boxes, threshold, method):
|
||||
if boxes.size==0:
|
||||
return np.empty((0,3))
|
||||
x1 = boxes[:,0]
|
||||
y1 = boxes[:,1]
|
||||
x2 = boxes[:,2]
|
||||
y2 = boxes[:,3]
|
||||
s = boxes[:,4]
|
||||
area = (x2-x1+1) * (y2-y1+1)
|
||||
I = np.argsort(s)
|
||||
pick = np.zeros_like(s, dtype=np.int16)
|
||||
counter = 0
|
||||
while I.size>0:
|
||||
i = I[-1]
|
||||
pick[counter] = i
|
||||
counter += 1
|
||||
idx = I[0:-1]
|
||||
xx1 = np.maximum(x1[i], x1[idx])
|
||||
yy1 = np.maximum(y1[i], y1[idx])
|
||||
xx2 = np.minimum(x2[i], x2[idx])
|
||||
yy2 = np.minimum(y2[i], y2[idx])
|
||||
w = np.maximum(0.0, xx2-xx1+1)
|
||||
h = np.maximum(0.0, yy2-yy1+1)
|
||||
inter = w * h
|
||||
if method is 'Min':
|
||||
o = inter / np.minimum(area[i], area[idx])
|
||||
else:
|
||||
o = inter / (area[i] + area[idx] - inter)
|
||||
I = I[np.where(o<=threshold)]
|
||||
pick = pick[0:counter]
|
||||
return pick
|
||||
|
||||
# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
|
||||
def pad(total_boxes, w, h):
|
||||
"""Compute the padding coordinates (pad the bounding boxes to square)"""
|
||||
tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
|
||||
tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
|
||||
numbox = total_boxes.shape[0]
|
||||
|
||||
dx = np.ones((numbox), dtype=np.int32)
|
||||
dy = np.ones((numbox), dtype=np.int32)
|
||||
edx = tmpw.copy().astype(np.int32)
|
||||
edy = tmph.copy().astype(np.int32)
|
||||
|
||||
x = total_boxes[:,0].copy().astype(np.int32)
|
||||
y = total_boxes[:,1].copy().astype(np.int32)
|
||||
ex = total_boxes[:,2].copy().astype(np.int32)
|
||||
ey = total_boxes[:,3].copy().astype(np.int32)
|
||||
|
||||
tmp = np.where(ex>w)
|
||||
edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
|
||||
ex[tmp] = w
|
||||
|
||||
tmp = np.where(ey>h)
|
||||
edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
|
||||
ey[tmp] = h
|
||||
|
||||
tmp = np.where(x<1)
|
||||
dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
|
||||
x[tmp] = 1
|
||||
|
||||
tmp = np.where(y<1)
|
||||
dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
|
||||
y[tmp] = 1
|
||||
|
||||
return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
|
||||
|
||||
# function [bboxA] = rerec(bboxA)
|
||||
def rerec(bboxA):
|
||||
"""Convert bboxA to square."""
|
||||
h = bboxA[:,3]-bboxA[:,1]
|
||||
w = bboxA[:,2]-bboxA[:,0]
|
||||
l = np.maximum(w, h)
|
||||
bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
|
||||
bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
|
||||
bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
|
||||
return bboxA
|
||||
|
||||
def imresample(img, sz):
|
||||
im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_LINEAR) #@UndefinedVariable
|
||||
return im_data
|
||||
|
|
BIN
facelib/det1.npy
BIN
facelib/det1.npy
Binary file not shown.
761
facelib/mtcnn.py
761
facelib/mtcnn.py
|
@ -1,761 +0,0 @@
|
|||
# Source: https://github.com/davidsandberg/facenet/blob/master/src/align/
|
||||
|
||||
""" Tensorflow implementation of the face detection / alignment algorithm found at
|
||||
https://github.com/kpzhang93/MTCNN_face_detection_alignment
|
||||
"""
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2016 David Sandberg
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from six import string_types, iteritems
|
||||
|
||||
import numpy as np
|
||||
#from math import floor
|
||||
import cv2
|
||||
import os
|
||||
|
||||
def layer(op):
|
||||
"""Decorator for composable network layers."""
|
||||
|
||||
def layer_decorated(self, *args, **kwargs):
|
||||
# Automatically set a name if not provided.
|
||||
name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
|
||||
# Figure out the layer inputs.
|
||||
if len(self.terminals) == 0:
|
||||
raise RuntimeError('No input variables found for layer %s.' % name)
|
||||
elif len(self.terminals) == 1:
|
||||
layer_input = self.terminals[0]
|
||||
else:
|
||||
layer_input = list(self.terminals)
|
||||
# Perform the operation and get the output.
|
||||
layer_output = op(self, layer_input, *args, **kwargs)
|
||||
# Add to layer LUT.
|
||||
self.layers[name] = layer_output
|
||||
# This output is now the input for the next layer.
|
||||
self.feed(layer_output)
|
||||
# Return self for chained calls.
|
||||
return self
|
||||
|
||||
return layer_decorated
|
||||
|
||||
class Network(object):
|
||||
|
||||
def __init__(self, tf, inputs, trainable=True):
|
||||
# The input nodes for this network
|
||||
self.tf = tf
|
||||
self.inputs = inputs
|
||||
# The current list of terminal nodes
|
||||
self.terminals = []
|
||||
# Mapping from layer names to layers
|
||||
self.layers = dict(inputs)
|
||||
# If true, the resulting variables are set as trainable
|
||||
self.trainable = trainable
|
||||
|
||||
self.setup()
|
||||
|
||||
def setup(self):
|
||||
"""Construct the network. """
|
||||
raise NotImplementedError('Must be implemented by the subclass.')
|
||||
|
||||
def load(self, data_path, session, ignore_missing=False):
|
||||
"""Load network weights.
|
||||
data_path: The path to the numpy-serialized network weights
|
||||
session: The current TensorFlow session
|
||||
ignore_missing: If true, serialized weights for missing layers are ignored.
|
||||
"""
|
||||
data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member
|
||||
|
||||
for op_name in data_dict:
|
||||
with self.tf.variable_scope(op_name, reuse=True):
|
||||
for param_name, data in iteritems(data_dict[op_name]):
|
||||
try:
|
||||
var = self.tf.get_variable(param_name)
|
||||
session.run(var.assign(data))
|
||||
except ValueError:
|
||||
if not ignore_missing:
|
||||
raise
|
||||
|
||||
def feed(self, *args):
|
||||
"""Set the input(s) for the next operation by replacing the terminal nodes.
|
||||
The arguments can be either layer names or the actual layers.
|
||||
"""
|
||||
assert len(args) != 0
|
||||
self.terminals = []
|
||||
for fed_layer in args:
|
||||
if isinstance(fed_layer, string_types):
|
||||
try:
|
||||
fed_layer = self.layers[fed_layer]
|
||||
except KeyError:
|
||||
raise KeyError('Unknown layer name fed: %s' % fed_layer)
|
||||
self.terminals.append(fed_layer)
|
||||
return self
|
||||
|
||||
def get_output(self):
|
||||
"""Returns the current network output."""
|
||||
return self.terminals[-1]
|
||||
|
||||
def get_unique_name(self, prefix):
|
||||
"""Returns an index-suffixed unique name for the given prefix.
|
||||
This is used for auto-generating layer names based on the type-prefix.
|
||||
"""
|
||||
ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
|
||||
return '%s_%d' % (prefix, ident)
|
||||
|
||||
def make_var(self, name, shape):
|
||||
"""Creates a new TensorFlow variable."""
|
||||
return self.tf.get_variable(name, shape, trainable=self.trainable)
|
||||
|
||||
def validate_padding(self, padding):
|
||||
"""Verifies that the padding is one of the supported ones."""
|
||||
assert padding in ('SAME', 'VALID')
|
||||
|
||||
@layer
|
||||
def conv(self,
|
||||
inp,
|
||||
k_h,
|
||||
k_w,
|
||||
c_o,
|
||||
s_h,
|
||||
s_w,
|
||||
name,
|
||||
relu=True,
|
||||
padding='SAME',
|
||||
group=1,
|
||||
biased=True):
|
||||
# Verify that the padding is acceptable
|
||||
self.validate_padding(padding)
|
||||
# Get the number of channels in the input
|
||||
c_i = int(inp.get_shape()[-1])
|
||||
# Verify that the grouping parameter is valid
|
||||
assert c_i % group == 0
|
||||
assert c_o % group == 0
|
||||
# Convolution for a given input and kernel
|
||||
convolve = lambda i, k: self.tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
|
||||
with self.tf.variable_scope(name) as scope:
|
||||
kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
|
||||
# This is the common-case. Convolve the input without any further complications.
|
||||
output = convolve(inp, kernel)
|
||||
# Add the biases
|
||||
if biased:
|
||||
biases = self.make_var('biases', [c_o])
|
||||
output = self.tf.nn.bias_add(output, biases)
|
||||
if relu:
|
||||
# ReLU non-linearity
|
||||
output = self.tf.nn.relu(output, name=scope.name)
|
||||
return output
|
||||
|
||||
@layer
|
||||
def prelu(self, inp, name):
|
||||
with self.tf.variable_scope(name):
|
||||
i = int(inp.get_shape()[-1])
|
||||
alpha = self.make_var('alpha', shape=(i,))
|
||||
output = self.tf.nn.relu(inp) + self.tf.multiply(alpha, -self.tf.nn.relu(-inp))
|
||||
return output
|
||||
|
||||
@layer
|
||||
def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
|
||||
self.validate_padding(padding)
|
||||
return self.tf.nn.max_pool(inp,
|
||||
ksize=[1, k_h, k_w, 1],
|
||||
strides=[1, s_h, s_w, 1],
|
||||
padding=padding,
|
||||
name=name)
|
||||
|
||||
@layer
|
||||
def fc(self, inp, num_out, name, relu=True):
|
||||
with self.tf.variable_scope(name):
|
||||
input_shape = inp.get_shape()
|
||||
if input_shape.ndims == 4:
|
||||
# The input is spatial. Vectorize it first.
|
||||
dim = 1
|
||||
for d in input_shape[1:].as_list():
|
||||
dim *= int(d)
|
||||
feed_in = self.tf.reshape(inp, [-1, dim])
|
||||
else:
|
||||
feed_in, dim = (inp, input_shape[-1].value)
|
||||
weights = self.make_var('weights', shape=[dim, num_out])
|
||||
biases = self.make_var('biases', [num_out])
|
||||
op = self.tf.nn.relu_layer if relu else self.tf.nn.xw_plus_b
|
||||
fc = op(feed_in, weights, biases, name=name)
|
||||
return fc
|
||||
|
||||
|
||||
"""
|
||||
Multi dimensional softmax,
|
||||
refer to https://github.com/tensorflow/tensorflow/issues/210
|
||||
compute softmax along the dimension of target
|
||||
the native softmax only supports batch_size x dimension
|
||||
"""
|
||||
@layer
|
||||
def softmax(self, target, axis, name=None):
|
||||
max_axis = self.tf.reduce_max(target, axis, keepdims=True)
|
||||
target_exp = self.tf.exp(target-max_axis)
|
||||
normalize = self.tf.reduce_sum(target_exp, axis, keepdims=True)
|
||||
softmax = self.tf.div(target_exp, normalize, name)
|
||||
return softmax
|
||||
|
||||
class PNet(Network):
|
||||
def setup(self):
|
||||
(self.feed('data') #pylint: disable=no-value-for-parameter, no-member
|
||||
.conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
|
||||
.prelu(name='PReLU1')
|
||||
.max_pool(2, 2, 2, 2, name='pool1')
|
||||
.conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
|
||||
.prelu(name='PReLU2')
|
||||
.conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
|
||||
.prelu(name='PReLU3')
|
||||
.conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
|
||||
.softmax(3,name='prob1'))
|
||||
|
||||
(self.feed('PReLU3') #pylint: disable=no-value-for-parameter
|
||||
.conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
|
||||
|
||||
class RNet(Network):
|
||||
def setup(self):
|
||||
(self.feed('data') #pylint: disable=no-value-for-parameter, no-member
|
||||
.conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
|
||||
.prelu(name='prelu1')
|
||||
.max_pool(3, 3, 2, 2, name='pool1')
|
||||
.conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
|
||||
.prelu(name='prelu2')
|
||||
.max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
|
||||
.conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
|
||||
.prelu(name='prelu3')
|
||||
.fc(128, relu=False, name='conv4')
|
||||
.prelu(name='prelu4')
|
||||
.fc(2, relu=False, name='conv5-1')
|
||||
.softmax(1,name='prob1'))
|
||||
|
||||
(self.feed('prelu4') #pylint: disable=no-value-for-parameter
|
||||
.fc(4, relu=False, name='conv5-2'))
|
||||
|
||||
class ONet(Network):
|
||||
def setup(self):
|
||||
(self.feed('data') #pylint: disable=no-value-for-parameter, no-member
|
||||
.conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
|
||||
.prelu(name='prelu1')
|
||||
.max_pool(3, 3, 2, 2, name='pool1')
|
||||
.conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
|
||||
.prelu(name='prelu2')
|
||||
.max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
|
||||
.conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
|
||||
.prelu(name='prelu3')
|
||||
.max_pool(2, 2, 2, 2, name='pool3')
|
||||
.conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
|
||||
.prelu(name='prelu4')
|
||||
.fc(256, relu=False, name='conv5')
|
||||
.prelu(name='prelu5')
|
||||
.fc(2, relu=False, name='conv6-1')
|
||||
.softmax(1, name='prob1'))
|
||||
|
||||
(self.feed('prelu5') #pylint: disable=no-value-for-parameter
|
||||
.fc(4, relu=False, name='conv6-2'))
|
||||
|
||||
(self.feed('prelu5') #pylint: disable=no-value-for-parameter
|
||||
.fc(10, relu=False, name='conv6-3'))
|
||||
|
||||
def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
|
||||
"""Detects faces in an image, and returns bounding boxes and points for them.
|
||||
img: input image
|
||||
minsize: minimum faces' size
|
||||
pnet, rnet, onet: caffemodel
|
||||
threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
|
||||
factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
|
||||
"""
|
||||
factor_count=0
|
||||
total_boxes=np.empty((0,9))
|
||||
points=np.empty(0)
|
||||
h=img.shape[0]
|
||||
w=img.shape[1]
|
||||
minl=np.amin([h, w])
|
||||
m=12.0/minsize
|
||||
minl=minl*m
|
||||
# create scale pyramid
|
||||
scales=[]
|
||||
while minl>=12:
|
||||
scales += [m*np.power(factor, factor_count)]
|
||||
minl = minl*factor
|
||||
factor_count += 1
|
||||
# first stage
|
||||
for scale in scales:
|
||||
hs=int(np.ceil(h*scale))
|
||||
ws=int(np.ceil(w*scale))
|
||||
#print ('scale %f %d %d' % (scale, ws,hs))
|
||||
im_data = imresample(img, (hs, ws))
|
||||
im_data = (im_data-127.5)*0.0078125
|
||||
img_x = np.expand_dims(im_data, 0)
|
||||
img_y = np.transpose(img_x, (0,2,1,3))
|
||||
out = pnet([img_y])
|
||||
out0 = np.transpose(out[0], (0,2,1,3))
|
||||
out1 = np.transpose(out[1], (0,2,1,3))
|
||||
|
||||
boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
|
||||
|
||||
# inter-scale nms
|
||||
pick = nms(boxes.copy(), 0.5, 'Union')
|
||||
if boxes.size>0 and pick.size>0:
|
||||
boxes = boxes[pick,:]
|
||||
total_boxes = np.append(total_boxes, boxes, axis=0)
|
||||
|
||||
numbox = total_boxes.shape[0]
|
||||
if numbox>0:
|
||||
pick = nms(total_boxes.copy(), 0.7, 'Union')
|
||||
total_boxes = total_boxes[pick,:]
|
||||
regw = total_boxes[:,2]-total_boxes[:,0]
|
||||
regh = total_boxes[:,3]-total_boxes[:,1]
|
||||
qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
|
||||
qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
|
||||
qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
|
||||
qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
|
||||
total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
|
||||
total_boxes = rerec(total_boxes.copy())
|
||||
total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
|
||||
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
|
||||
|
||||
numbox = total_boxes.shape[0]
|
||||
if numbox>0:
|
||||
# second stage
|
||||
tempimg = np.zeros((24,24,3,numbox))
|
||||
for k in range(0,numbox):
|
||||
tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
|
||||
tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
|
||||
if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
|
||||
tempimg[:,:,:,k] = imresample(tmp, (24, 24))
|
||||
else:
|
||||
return np.empty()
|
||||
tempimg = (tempimg-127.5)*0.0078125
|
||||
tempimg1 = np.transpose(tempimg, (3,1,0,2))
|
||||
out = rnet([tempimg1])
|
||||
out0 = np.transpose(out[0])
|
||||
out1 = np.transpose(out[1])
|
||||
score = out1[1,:]
|
||||
ipass = np.where(score>threshold[1])
|
||||
total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
|
||||
mv = out0[:,ipass[0]]
|
||||
if total_boxes.shape[0]>0:
|
||||
pick = nms(total_boxes, 0.7, 'Union')
|
||||
total_boxes = total_boxes[pick,:]
|
||||
total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
|
||||
total_boxes = rerec(total_boxes.copy())
|
||||
|
||||
numbox = total_boxes.shape[0]
|
||||
if numbox>0:
|
||||
# third stage
|
||||
total_boxes = np.fix(total_boxes).astype(np.int32)
|
||||
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
|
||||
tempimg = np.zeros((48,48,3,numbox))
|
||||
for k in range(0,numbox):
|
||||
tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
|
||||
tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
|
||||
if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
|
||||
tempimg[:,:,:,k] = imresample(tmp, (48, 48))
|
||||
else:
|
||||
return np.empty()
|
||||
tempimg = (tempimg-127.5)*0.0078125
|
||||
tempimg1 = np.transpose(tempimg, (3,1,0,2))
|
||||
out = onet([tempimg1])
|
||||
out0 = np.transpose(out[0])
|
||||
out1 = np.transpose(out[1])
|
||||
out2 = np.transpose(out[2])
|
||||
score = out2[1,:]
|
||||
points = out1
|
||||
ipass = np.where(score>threshold[2])
|
||||
points = points[:,ipass[0]]
|
||||
total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
|
||||
mv = out0[:,ipass[0]]
|
||||
|
||||
w = total_boxes[:,2]-total_boxes[:,0]+1
|
||||
h = total_boxes[:,3]-total_boxes[:,1]+1
|
||||
points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
|
||||
points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
|
||||
if total_boxes.shape[0]>0:
|
||||
total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
|
||||
pick = nms(total_boxes.copy(), 0.7, 'Min')
|
||||
total_boxes = total_boxes[pick,:]
|
||||
points = points[:,pick]
|
||||
|
||||
return total_boxes, points
|
||||
|
||||
|
||||
def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
|
||||
"""Detects faces in a list of images
|
||||
images: list containing input images
|
||||
detection_window_size_ratio: ratio of minimum face size to smallest image dimension
|
||||
pnet, rnet, onet: caffemodel
|
||||
threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
|
||||
factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
|
||||
"""
|
||||
all_scales = [None] * len(images)
|
||||
images_with_boxes = [None] * len(images)
|
||||
|
||||
for i in range(len(images)):
|
||||
images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
|
||||
|
||||
# create scale pyramid
|
||||
for index, img in enumerate(images):
|
||||
all_scales[index] = []
|
||||
h = img.shape[0]
|
||||
w = img.shape[1]
|
||||
minsize = int(detection_window_size_ratio * np.minimum(w, h))
|
||||
factor_count = 0
|
||||
minl = np.amin([h, w])
|
||||
if minsize <= 12:
|
||||
minsize = 12
|
||||
|
||||
m = 12.0 / minsize
|
||||
minl = minl * m
|
||||
while minl >= 12:
|
||||
all_scales[index].append(m * np.power(factor, factor_count))
|
||||
minl = minl * factor
|
||||
factor_count += 1
|
||||
|
||||
# # # # # # # # # # # # #
|
||||
# first stage - fast proposal network (pnet) to obtain face candidates
|
||||
# # # # # # # # # # # # #
|
||||
|
||||
images_obj_per_resolution = {}
|
||||
|
||||
# TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
|
||||
|
||||
for index, scales in enumerate(all_scales):
|
||||
h = images[index].shape[0]
|
||||
w = images[index].shape[1]
|
||||
|
||||
for scale in scales:
|
||||
hs = int(np.ceil(h * scale))
|
||||
ws = int(np.ceil(w * scale))
|
||||
|
||||
if (ws, hs) not in images_obj_per_resolution:
|
||||
images_obj_per_resolution[(ws, hs)] = []
|
||||
|
||||
im_data = imresample(images[index], (hs, ws))
|
||||
im_data = (im_data - 127.5) * 0.0078125
|
||||
img_y = np.transpose(im_data, (1, 0, 2)) # caffe uses different dimensions ordering
|
||||
images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
|
||||
|
||||
for resolution in images_obj_per_resolution:
|
||||
images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
|
||||
outs = pnet(images_per_resolution)
|
||||
|
||||
for index in range(len(outs[0])):
|
||||
scale = images_obj_per_resolution[resolution][index]['scale']
|
||||
image_index = images_obj_per_resolution[resolution][index]['index']
|
||||
out0 = np.transpose(outs[0][index], (1, 0, 2))
|
||||
out1 = np.transpose(outs[1][index], (1, 0, 2))
|
||||
|
||||
boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
|
||||
|
||||
# inter-scale nms
|
||||
pick = nms(boxes.copy(), 0.5, 'Union')
|
||||
if boxes.size > 0 and pick.size > 0:
|
||||
boxes = boxes[pick, :]
|
||||
images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
|
||||
boxes,
|
||||
axis=0)
|
||||
|
||||
for index, image_obj in enumerate(images_with_boxes):
|
||||
numbox = image_obj['total_boxes'].shape[0]
|
||||
if numbox > 0:
|
||||
h = images[index].shape[0]
|
||||
w = images[index].shape[1]
|
||||
pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
|
||||
image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
|
||||
regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
|
||||
regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
|
||||
qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
|
||||
qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
|
||||
qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
|
||||
qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
|
||||
image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
|
||||
image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
|
||||
image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
|
||||
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
|
||||
|
||||
numbox = image_obj['total_boxes'].shape[0]
|
||||
tempimg = np.zeros((24, 24, 3, numbox))
|
||||
|
||||
if numbox > 0:
|
||||
for k in range(0, numbox):
|
||||
tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
|
||||
tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
|
||||
if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
|
||||
tempimg[:, :, :, k] = imresample(tmp, (24, 24))
|
||||
else:
|
||||
return np.empty()
|
||||
|
||||
tempimg = (tempimg - 127.5) * 0.0078125
|
||||
image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
|
||||
|
||||
# # # # # # # # # # # # #
|
||||
# second stage - refinement of face candidates with rnet
|
||||
# # # # # # # # # # # # #
|
||||
|
||||
bulk_rnet_input = np.empty((0, 24, 24, 3))
|
||||
for index, image_obj in enumerate(images_with_boxes):
|
||||
if 'rnet_input' in image_obj:
|
||||
bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
|
||||
|
||||
out = rnet(bulk_rnet_input)
|
||||
out0 = np.transpose(out[0])
|
||||
out1 = np.transpose(out[1])
|
||||
score = out1[1, :]
|
||||
|
||||
i = 0
|
||||
for index, image_obj in enumerate(images_with_boxes):
|
||||
if 'rnet_input' not in image_obj:
|
||||
continue
|
||||
|
||||
rnet_input_count = image_obj['rnet_input'].shape[0]
|
||||
score_per_image = score[i:i + rnet_input_count]
|
||||
out0_per_image = out0[:, i:i + rnet_input_count]
|
||||
|
||||
ipass = np.where(score_per_image > threshold[1])
|
||||
image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
|
||||
np.expand_dims(score_per_image[ipass].copy(), 1)])
|
||||
|
||||
mv = out0_per_image[:, ipass[0]]
|
||||
|
||||
if image_obj['total_boxes'].shape[0] > 0:
|
||||
h = images[index].shape[0]
|
||||
w = images[index].shape[1]
|
||||
pick = nms(image_obj['total_boxes'], 0.7, 'Union')
|
||||
image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
|
||||
image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
|
||||
image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
|
||||
|
||||
numbox = image_obj['total_boxes'].shape[0]
|
||||
|
||||
if numbox > 0:
|
||||
tempimg = np.zeros((48, 48, 3, numbox))
|
||||
image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
|
||||
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
|
||||
|
||||
for k in range(0, numbox):
|
||||
tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
|
||||
tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
|
||||
if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
|
||||
tempimg[:, :, :, k] = imresample(tmp, (48, 48))
|
||||
else:
|
||||
return np.empty()
|
||||
tempimg = (tempimg - 127.5) * 0.0078125
|
||||
image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
|
||||
|
||||
i += rnet_input_count
|
||||
|
||||
# # # # # # # # # # # # #
|
||||
# third stage - further refinement and facial landmarks positions with onet
|
||||
# # # # # # # # # # # # #
|
||||
|
||||
bulk_onet_input = np.empty((0, 48, 48, 3))
|
||||
for index, image_obj in enumerate(images_with_boxes):
|
||||
if 'onet_input' in image_obj:
|
||||
bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
|
||||
|
||||
out = onet(bulk_onet_input)
|
||||
|
||||
out0 = np.transpose(out[0])
|
||||
out1 = np.transpose(out[1])
|
||||
out2 = np.transpose(out[2])
|
||||
score = out2[1, :]
|
||||
points = out1
|
||||
|
||||
i = 0
|
||||
ret = []
|
||||
for index, image_obj in enumerate(images_with_boxes):
|
||||
if 'onet_input' not in image_obj:
|
||||
ret.append(None)
|
||||
continue
|
||||
|
||||
onet_input_count = image_obj['onet_input'].shape[0]
|
||||
|
||||
out0_per_image = out0[:, i:i + onet_input_count]
|
||||
score_per_image = score[i:i + onet_input_count]
|
||||
points_per_image = points[:, i:i + onet_input_count]
|
||||
|
||||
ipass = np.where(score_per_image > threshold[2])
|
||||
points_per_image = points_per_image[:, ipass[0]]
|
||||
|
||||
image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
|
||||
np.expand_dims(score_per_image[ipass].copy(), 1)])
|
||||
mv = out0_per_image[:, ipass[0]]
|
||||
|
||||
w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
|
||||
h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
|
||||
points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
|
||||
image_obj['total_boxes'][:, 0], (5, 1)) - 1
|
||||
points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
|
||||
image_obj['total_boxes'][:, 1], (5, 1)) - 1
|
||||
|
||||
if image_obj['total_boxes'].shape[0] > 0:
|
||||
image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
|
||||
pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
|
||||
image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
|
||||
points_per_image = points_per_image[:, pick]
|
||||
|
||||
ret.append((image_obj['total_boxes'], points_per_image))
|
||||
else:
|
||||
ret.append(None)
|
||||
|
||||
i += onet_input_count
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
# function [boundingbox] = bbreg(boundingbox,reg)
|
||||
def bbreg(boundingbox,reg):
|
||||
"""Calibrate bounding boxes"""
|
||||
if reg.shape[1]==1:
|
||||
reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
|
||||
|
||||
w = boundingbox[:,2]-boundingbox[:,0]+1
|
||||
h = boundingbox[:,3]-boundingbox[:,1]+1
|
||||
b1 = boundingbox[:,0]+reg[:,0]*w
|
||||
b2 = boundingbox[:,1]+reg[:,1]*h
|
||||
b3 = boundingbox[:,2]+reg[:,2]*w
|
||||
b4 = boundingbox[:,3]+reg[:,3]*h
|
||||
boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
|
||||
return boundingbox
|
||||
|
||||
def generateBoundingBox(imap, reg, scale, t):
|
||||
"""Use heatmap to generate bounding boxes"""
|
||||
stride=2
|
||||
cellsize=12
|
||||
|
||||
imap = np.transpose(imap)
|
||||
dx1 = np.transpose(reg[:,:,0])
|
||||
dy1 = np.transpose(reg[:,:,1])
|
||||
dx2 = np.transpose(reg[:,:,2])
|
||||
dy2 = np.transpose(reg[:,:,3])
|
||||
y, x = np.where(imap >= t)
|
||||
if y.shape[0]==1:
|
||||
dx1 = np.flipud(dx1)
|
||||
dy1 = np.flipud(dy1)
|
||||
dx2 = np.flipud(dx2)
|
||||
dy2 = np.flipud(dy2)
|
||||
score = imap[(y,x)]
|
||||
reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
|
||||
if reg.size==0:
|
||||
reg = np.empty((0,3))
|
||||
bb = np.transpose(np.vstack([y,x]))
|
||||
q1 = np.fix((stride*bb+1)/scale)
|
||||
q2 = np.fix((stride*bb+cellsize-1+1)/scale)
|
||||
boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
|
||||
return boundingbox, reg
|
||||
|
||||
# function pick = nms(boxes,threshold,type)
|
||||
def nms(boxes, threshold, method):
|
||||
if boxes.size==0:
|
||||
return np.empty((0,3))
|
||||
x1 = boxes[:,0]
|
||||
y1 = boxes[:,1]
|
||||
x2 = boxes[:,2]
|
||||
y2 = boxes[:,3]
|
||||
s = boxes[:,4]
|
||||
area = (x2-x1+1) * (y2-y1+1)
|
||||
I = np.argsort(s)
|
||||
pick = np.zeros_like(s, dtype=np.int16)
|
||||
counter = 0
|
||||
while I.size>0:
|
||||
i = I[-1]
|
||||
pick[counter] = i
|
||||
counter += 1
|
||||
idx = I[0:-1]
|
||||
xx1 = np.maximum(x1[i], x1[idx])
|
||||
yy1 = np.maximum(y1[i], y1[idx])
|
||||
xx2 = np.minimum(x2[i], x2[idx])
|
||||
yy2 = np.minimum(y2[i], y2[idx])
|
||||
w = np.maximum(0.0, xx2-xx1+1)
|
||||
h = np.maximum(0.0, yy2-yy1+1)
|
||||
inter = w * h
|
||||
if method is 'Min':
|
||||
o = inter / np.minimum(area[i], area[idx])
|
||||
else:
|
||||
o = inter / (area[i] + area[idx] - inter)
|
||||
I = I[np.where(o<=threshold)]
|
||||
pick = pick[0:counter]
|
||||
return pick
|
||||
|
||||
# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
|
||||
def pad(total_boxes, w, h):
|
||||
"""Compute the padding coordinates (pad the bounding boxes to square)"""
|
||||
tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
|
||||
tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
|
||||
numbox = total_boxes.shape[0]
|
||||
|
||||
dx = np.ones((numbox), dtype=np.int32)
|
||||
dy = np.ones((numbox), dtype=np.int32)
|
||||
edx = tmpw.copy().astype(np.int32)
|
||||
edy = tmph.copy().astype(np.int32)
|
||||
|
||||
x = total_boxes[:,0].copy().astype(np.int32)
|
||||
y = total_boxes[:,1].copy().astype(np.int32)
|
||||
ex = total_boxes[:,2].copy().astype(np.int32)
|
||||
ey = total_boxes[:,3].copy().astype(np.int32)
|
||||
|
||||
tmp = np.where(ex>w)
|
||||
edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
|
||||
ex[tmp] = w
|
||||
|
||||
tmp = np.where(ey>h)
|
||||
edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
|
||||
ey[tmp] = h
|
||||
|
||||
tmp = np.where(x<1)
|
||||
dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
|
||||
x[tmp] = 1
|
||||
|
||||
tmp = np.where(y<1)
|
||||
dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
|
||||
y[tmp] = 1
|
||||
|
||||
return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
|
||||
|
||||
# function [bboxA] = rerec(bboxA)
|
||||
def rerec(bboxA):
|
||||
"""Convert bboxA to square."""
|
||||
h = bboxA[:,3]-bboxA[:,1]
|
||||
w = bboxA[:,2]-bboxA[:,0]
|
||||
l = np.maximum(w, h)
|
||||
bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
|
||||
bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
|
||||
bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
|
||||
return bboxA
|
||||
|
||||
def imresample(img, sz):
|
||||
im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_LINEAR) #@UndefinedVariable
|
||||
return im_data
|
||||
|
||||
# This method is kept for debugging purpose
|
||||
# h=img.shape[0]
|
||||
# w=img.shape[1]
|
||||
# hs, ws = sz
|
||||
# dx = float(w) / ws
|
||||
# dy = float(h) / hs
|
||||
# im_data = np.zeros((hs,ws,3))
|
||||
# for a1 in range(0,hs):
|
||||
# for a2 in range(0,ws):
|
||||
# for a3 in range(0,3):
|
||||
# im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
|
||||
# return im_data
|
||||
|
Binary file not shown.
BIN
facelib/mtcnn_pnet.h5
Normal file
BIN
facelib/mtcnn_pnet.h5
Normal file
Binary file not shown.
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue