This commit is contained in:
iperov 2019-04-30 17:14:02 +04:00
parent 1f1f94848b
commit 659aa5705a
2 changed files with 107 additions and 102 deletions

View file

@ -38,22 +38,32 @@ class PoseEstimator(object):
self.model_weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) )
self.input_bgr_shape = (resolution, resolution, 3)
def ResamplerFunc(input):
mean_t, logvar_t = input
return mean_t + K.exp(0.5*logvar_t)*K.random_normal(K.shape(mean_t))
self.BVAEResampler = Lambda ( lambda x: x[0] + K.exp(0.5*x[1])*K.random_normal(K.shape(x[0])),
output_shape=K.int_shape(self.encoder.outputs[0])[1:] )
inp_t = Input (self.input_bgr_shape)
inp_mask_t = Input ( (resolution, resolution, 1) )
inp_real_t = Input (self.input_bgr_shape)
inp_pitch_t = Input ( (1,) )
inp_yaw_t = Input ( (1,) )
inp_roll_t = Input ( (1,) )
mean_t, logvar_t = self.encoder(inp_t)
latent_t = self.BVAEResampler([mean_t, logvar_t])
if training:
latent_t = self.encoder(inp_t)
bgr_t = self.decoder (latent_t)
pyrs_t = self.model_l(latent_t)
else:
self.model = Model(inp_t, self.model_l(self.encoder(inp_t)) )
self.model = Model(inp_t, self.model_l(latent_t) )
pyrs_t = self.model(inp_t)
if load_weights:
if training:
self.encoder.load_weights (str(self.encoder_weights_path))
@ -88,19 +98,31 @@ class PoseEstimator(object):
a = self.alpha_cat_losses[i]
pyr_loss += [ a*K.mean( K.square ( inp_pyrs_t[i] - pyrs_t[i]) ) ]
bgr_loss = K.mean( 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( inp_real_t*inp_mask_t, bgr_t*inp_mask_t) )
def BVAELoss(beta=4):
#keep in mind loss per sample, not per minibatch
def func(input):
mean_t, logvar_t = input
return beta * K.mean ( K.sum( -0.5*(1 + logvar_t - K.exp(logvar_t) - K.square(mean_t)), axis=1 ), axis=0, keepdims=True )
return func
BVAE_loss = BVAELoss(4)([mean_t, logvar_t])#beta * K.mean ( K.sum( -0.5*(1 + logvar_t - K.exp(logvar_t) - K.square(mean_t)), axis=1 ), axis=0, keepdims=True )
bgr_loss = K.mean(K.square(inp_real_t-bgr_t), axis=0, keepdims=True)
#train_loss = BVAE_loss + bgr_loss
pyr_loss = sum(pyr_loss)
self.train = K.function ([inp_t, inp_real_t, inp_mask_t],
[bgr_loss], Adam(lr=2e-4, beta_1=0.5, beta_2=0.999).get_updates( bgr_loss, self.encoder.trainable_weights+self.decoder.trainable_weights ) )
self.train = K.function ([inp_t, inp_real_t],
[ K.mean (BVAE_loss)+K.mean(bgr_loss) ], Adam(lr=0.0005, beta_1=0.9, beta_2=0.999).get_updates( [BVAE_loss, bgr_loss], self.encoder.trainable_weights+self.decoder.trainable_weights ) )
self.train_l = K.function ([inp_t] + inp_pyrs_t,
[pyr_loss], Adam(lr=0.0001).get_updates( pyr_loss, self.model_l.trainable_weights) )
self.view = K.function ([inp_t], [ pyrs_t[0] ] )
self.view = K.function ([inp_t], [ bgr_t, pyrs_t[0] ] )
def __enter__(self):
return self
@ -114,21 +136,25 @@ class PoseEstimator(object):
self.model_l.save_weights (str(self.l_weights_path))
inp_t = Input (self.input_bgr_shape)
Model(inp_t, self.model_l(self.encoder(inp_t)) ).save_weights (str(self.model_weights_path))
def train_on_batch(self, warps, imgs, masks, pitch_yaw_roll, skip_bgr_train=False):
Model(inp_t, self.model_l(self.BVAEResampler(self.encoder(inp_t))) ).save_weights (str(self.model_weights_path))
def train_on_batch(self, warps, imgs, pyr_tanh, skip_bgr_train=False):
if not skip_bgr_train:
bgr_loss, = self.train( [warps, imgs, masks] )
bgr_loss, = self.train( [warps, imgs] )
pyr_loss = 0
else:
bgr_loss = 0
feed = [imgs]
for i, (angle, class_num) in enumerate(zip(self.angles, self.class_nums)):
c = np.round( np.round(pitch_yaw_roll * angle) / angle ) #.astype(K.floatx())
a = angle / 2
c = np.round( (pyr_tanh+1) * a ) / a -1 #.astype(K.floatx())
feed += [c]
pyr_loss, = self.train_l(feed)
return bgr_loss, pyr_loss
def extract (self, input_image, is_input_tanh=False):
@ -139,26 +165,27 @@ class PoseEstimator(object):
if input_shape_len == 3:
input_image = input_image[np.newaxis,...]
result, = self.view( [input_image] )
bgr, result, = self.view( [input_image] )
#result = np.clip ( result / (self.angles[0] / 2) - 1, 0.0, 1.0 )
if input_shape_len == 3:
bgr = bgr[0]
result = result[0]
return result
return bgr, result
@staticmethod
def BuildModels ( resolution, class_nums):
def BuildModels ( resolution, class_nums, ae_dims=128):
exec( nnlib.import_all(), locals(), globals() )
x = inp = Input ( (resolution,resolution,3) )
x = PoseEstimator.EncFlow()(x)
x = PoseEstimator.EncFlow(ae_dims)(x)
encoder = Model(inp,x)
x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) )
x = PoseEstimator.DecFlow(resolution)(x)
x = PoseEstimator.DecFlow(resolution, ae_dims)(x)
decoder = Model(inp,x)
x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) )
@ -168,61 +195,52 @@ class PoseEstimator(object):
return encoder, decoder, model_l
@staticmethod
def EncFlow():
def EncFlow(ae_dims):
exec( nnlib.import_all(), locals(), globals() )
XConv2D = partial(Conv2D, padding='zero')
def Act(lrelu_alpha=0.1):
return LeakyReLU(alpha=lrelu_alpha)
def downscale (dim, **kwargs):
def func(x):
return Act() ( XConv2D(dim, kernel_size=5, strides=2)(x))
return ReLU() ( ( XConv2D(dim, kernel_size=4, strides=2)(x)) )
return func
def upscale (dim, **kwargs):
def func(x):
return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
return func
def to_bgr (output_nc, **kwargs):
def func(x):
return XConv2D(output_nc, kernel_size=5, activation='sigmoid')(x)
return func
upscale = partial(upscale)
downscale = partial(downscale)
ae_dims = 512
ed_ch_dims = 128
def func(input):
x = input
x = downscale(64)(x)
x = downscale(128)(x)
x = downscale(256)(x)
x = downscale(512)(x)
x = Dense(ae_dims, name="latent", use_bias=False)(Flatten()(x))
x = Lambda ( lambda x: x + 0.1*K.random_normal(K.shape(x), 0, 1) , output_shape=(None,ae_dims) ) (x)
return x
x = Flatten()(x)
x = Dense(256)(x)
x = ReLU()(x)
x = Dense(256)(x)
x = ReLU()(x)
mean = Dense(ae_dims)(x)
logvar = Dense(ae_dims)(x)
return mean, logvar
return func
@staticmethod
def DecFlow(resolution):
def DecFlow(resolution, ae_dims):
exec( nnlib.import_all(), locals(), globals() )
XConv2D = partial(Conv2D, padding='zero')
def Act(lrelu_alpha=0.1):
return LeakyReLU(alpha=lrelu_alpha)
def downscale (dim, **kwargs):
def upscale (dim, strides=2, **kwargs):
def func(x):
return MaxPooling2D()( Act() ( XConv2D(dim, kernel_size=5, strides=1)(x)) )
return func
def upscale (dim, **kwargs):
def func(x):
return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
return ReLU()( ( Conv2DTranspose(dim, kernel_size=4, strides=strides, padding='same')(x)) )
return func
def to_bgr (output_nc, **kwargs):
@ -231,21 +249,29 @@ class PoseEstimator(object):
return func
upscale = partial(upscale)
downscale = partial(downscale)
lowest_dense_res = resolution // 16
def func(input):
x = input
x = Dense(lowest_dense_res * lowest_dense_res * 256, use_bias=False)(x)
x = Dense(256)(x)
x = ReLU()(x)
x = Dense(256)(x)
x = ReLU()(x)
x = Dense( (lowest_dense_res*lowest_dense_res*256) ) (x)
x = ReLU()(x)
x = Reshape( (lowest_dense_res,lowest_dense_res,256) )(x)
x = upscale(512)(x)
x = upscale(256)(x)
x = upscale(128)(x)
x = upscale(64)(x)
bgr = to_bgr(3)(x)
return [bgr]
x = to_bgr(3)(x)
return x
return func
@staticmethod
@ -254,39 +280,18 @@ class PoseEstimator(object):
XConv2D = partial(Conv2D, padding='zero')
def Act(lrelu_alpha=0.1):
return LeakyReLU(alpha=lrelu_alpha)
def downscale (dim, **kwargs):
def func(x):
return MaxPooling2D()( Act() ( XConv2D(dim, kernel_size=5, strides=1)(x)) )
return func
def upscale (dim, **kwargs):
def func(x):
return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
return func
def to_bgr (output_nc, **kwargs):
def func(x):
return XConv2D(output_nc, kernel_size=5, use_bias=True, activation='sigmoid')(x)
return func
upscale = partial(upscale)
downscale = partial(downscale)
def func(latent):
x = latent
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(2048, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(4096, activation='relu')(x)
x = Dense(1024, activation='relu')(x)
# x = Dropout(0.5)(x)
# x = Dense(4096, activation='relu')(x)
output = []
for class_num in class_nums:
pyr = Dense(3, activation='sigmoid')(x)
pyr = Dense(3, activation='tanh')(x)
output += [pyr]
return output

View file

@ -37,7 +37,7 @@ class Model(ModelBase):
#override
def onInitialize(self):
exec(nnlib.import_all(), locals(), globals())
self.set_vram_batch_requirements( {4:32} )
self.set_vram_batch_requirements( {4:64} )
self.resolution = 128
self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF
@ -58,14 +58,13 @@ class Model(ModelBase):
sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True,
output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) },
{'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution },
{'types': (t.IMG_TRANSFORMED, face_type, t.MODE_M, t.FACE_MASK_FULL), 'resolution':self.resolution },
{'types': (t.IMG_PITCH_YAW_ROLL_SIGMOID,)}
{'types': (t.IMG_PITCH_YAW_ROLL,)}
]),
SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4,
sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True,
output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution },
{'types': (t.IMG_PITCH_YAW_ROLL_SIGMOID,)}
output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':self.resolution },
{'types': (t.IMG_PITCH_YAW_ROLL,)}
])
])
@ -75,16 +74,16 @@ class Model(ModelBase):
#override
def onTrainOneIter(self, generators_samples, generators_list):
target_srcw, target_src, target_srcm, pitch_yaw_roll = generators_samples[0]
target_srcw, target_src, pitch_yaw_roll = generators_samples[0]
bgr_loss, pyr_loss = self.pose_est.train_on_batch( target_srcw, target_src, target_srcm, pitch_yaw_roll, skip_bgr_train=not self.options['train_bgr'] )
bgr_loss, pyr_loss = self.pose_est.train_on_batch( target_srcw, target_src, pitch_yaw_roll, skip_bgr_train=not self.options['train_bgr'] )
return ( ('bgr_loss', bgr_loss), ('pyr_loss', pyr_loss), )
#override
def onGetPreview(self, generators_samples):
test_src = generators_samples[0][1][0:4] #first 4 samples
test_pyr_src = generators_samples[0][3][0:4]
test_pyr_src = generators_samples[0][2][0:4]
test_dst = generators_samples[1][0][0:4]
test_pyr_dst = generators_samples[1][1][0:4]
@ -94,7 +93,7 @@ class Model(ModelBase):
result = []
for name, img, pyr in [ ['training data', test_src, test_pyr_src], \
['evaluating data',test_dst, test_pyr_dst] ]:
pyr_pred = self.pose_est.extract(img)
bgr_pred, pyr_pred = self.pose_est.extract(img)
hor_imgs = []
for i in range(len(img)):
@ -112,6 +111,7 @@ class Model(ModelBase):
hor_imgs.append ( np.concatenate ( (
img[i,:,:,0:3],
bgr_pred[i],
img_info
), axis=1) )