This commit is contained in:
iperov 2019-04-23 17:31:58 +04:00
parent 268b402513
commit 2809d495c2
7 changed files with 37 additions and 23 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -19,7 +19,7 @@ class PoseEstimator(object):
def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False): def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False):
exec( nnlib.import_all(), locals(), globals() ) exec( nnlib.import_all(), locals(), globals() )
self.class_num = 180 self.class_num = 91
self.model = PoseEstimator.BuildModel(resolution, class_num=self.class_num) self.model = PoseEstimator.BuildModel(resolution, class_num=self.class_num)
@ -35,9 +35,7 @@ class PoseEstimator(object):
idx_tensor = np.array([idx for idx in range(self.class_num)], dtype=K.floatx() ) idx_tensor = np.array([idx for idx in range(self.class_num)], dtype=K.floatx() )
idx_tensor = K.constant(idx_tensor) idx_tensor = K.constant(idx_tensor)
#inp_t = Input ( (resolution,resolution,3) )
inp_t, = self.model.inputs inp_t, = self.model.inputs
pitch_bins_t, yaw_bins_t, roll_bins_t = self.model.outputs pitch_bins_t, yaw_bins_t, roll_bins_t = self.model.outputs
@ -51,22 +49,26 @@ class PoseEstimator(object):
inp_roll_bins_t = Input ( (self.class_num,) ) inp_roll_bins_t = Input ( (self.class_num,) )
inp_roll_t = Input ( (1,) ) inp_roll_t = Input ( (1,) )
alpha = 0.001
pitch_loss = K.categorical_crossentropy(inp_pitch_bins_t, pitch_bins_t) \ pitch_loss = K.categorical_crossentropy(inp_pitch_bins_t, pitch_bins_t) \
+ 0.001 * K.mean(K.square( inp_pitch_t - pitch_t), -1) + alpha * K.mean(K.square( inp_pitch_t - pitch_t), -1)
yaw_loss = K.categorical_crossentropy(inp_yaw_bins_t, yaw_bins_t) \ yaw_loss = K.categorical_crossentropy(inp_yaw_bins_t, yaw_bins_t) \
+ 0.001 * K.mean(K.square( inp_yaw_t - yaw_t), -1) + alpha * K.mean(K.square( inp_yaw_t - yaw_t), -1)
roll_loss = K.categorical_crossentropy(inp_roll_bins_t, roll_bins_t) \ roll_loss = K.categorical_crossentropy(inp_roll_bins_t, roll_bins_t) \
+ 0.001 * K.mean(K.square( inp_roll_t - roll_t), -1) + alpha * K.mean(K.square( inp_roll_t - roll_t), -1)
loss = K.mean( pitch_loss + yaw_loss + roll_loss ) loss = K.mean( pitch_loss + yaw_loss + roll_loss )
opt = Adam(lr=0.001, tf_cpu_mode=2)
if training: if training:
self.train = K.function ([inp_t, inp_pitch_bins_t, inp_pitch_t, inp_yaw_bins_t, inp_yaw_t, inp_roll_bins_t, inp_roll_t], self.train = K.function ([inp_t, inp_pitch_bins_t, inp_pitch_t, inp_yaw_bins_t, inp_yaw_t, inp_roll_bins_t, inp_roll_t],
[loss], Adam(tf_cpu_mode=2).get_updates(loss, self.model.trainable_weights) ) [loss], opt.get_updates(loss, self.model.trainable_weights) )
self.view = K.function ([inp_t], [pitch_t, yaw_t, roll_t] ) self.view = K.function ([inp_t], [pitch_t, yaw_t, roll_t] )
@ -80,7 +82,7 @@ class PoseEstimator(object):
self.model.save_weights (str(self.weights_path)) self.model.save_weights (str(self.weights_path))
def train_on_batch(self, imgs, pitch_yaw_roll): def train_on_batch(self, imgs, pitch_yaw_roll):
c = ( (pitch_yaw_roll+1) * 90.0 ).astype(np.int).astype(K.floatx()) c = ( (pitch_yaw_roll+1) * 45.0 ).astype(np.int).astype(K.floatx())
inp_pitch = c[:,0:1] inp_pitch = c[:,0:1]
inp_yaw = c[:,1:2] inp_yaw = c[:,1:2]
@ -104,7 +106,7 @@ class PoseEstimator(object):
pitch, yaw, roll = self.view( [input_image] ) pitch, yaw, roll = self.view( [input_image] )
result = np.concatenate( (pitch[...,np.newaxis], yaw[...,np.newaxis], roll[...,np.newaxis]), -1 ) result = np.concatenate( (pitch[...,np.newaxis], yaw[...,np.newaxis], roll[...,np.newaxis]), -1 )
result = np.clip ( result / 90.0 - 1, -1, 1 ) result = np.clip ( result / 45.0 - 1, -1.0, 1.0 )
if input_shape_len == 3: if input_shape_len == 3:
result = result[0] result = result[0]
@ -126,7 +128,15 @@ class PoseEstimator(object):
def func(input): def func(input):
x = input x = input
# resnet50 = keras.applications.ResNet50(include_top=False, weights='imagenet', input_shape=K.int_shape(x)[1:], pooling='avg')
# x = resnet50(x)
# pitch = Dense(class_num, activation='softmax', name='pitch')(x)
# yaw = Dense(class_num, activation='softmax', name='yaw')(x)
# roll = Dense(class_num, activation='softmax', name='roll')(x)
# return [pitch, yaw, roll]
x = Conv2D(64, kernel_size=11, strides=4, padding='same', activation='relu')(x) x = Conv2D(64, kernel_size=11, strides=4, padding='same', activation='relu')(x)
x = MaxPooling2D( (3,3), strides=2 )(x) x = MaxPooling2D( (3,3), strides=2 )(x)
@ -139,11 +149,9 @@ class PoseEstimator(object):
x = MaxPooling2D( (3,3), strides=2 )(x) x = MaxPooling2D( (3,3), strides=2 )(x)
x = Flatten()(x) x = Flatten()(x)
x = Dense(4096, activation='relu')(x) x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x) x = Dropout(0.5)(x)
x = Dense(4096, activation='relu')(x) x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(1000, activation='relu')(x)
pitch = Dense(class_num, activation='softmax', name='pitch')(x) pitch = Dense(class_num, activation='softmax', name='pitch')(x)
yaw = Dense(class_num, activation='softmax', name='yaw')(x) yaw = Dense(class_num, activation='softmax', name='yaw')(x)
@ -151,5 +159,4 @@ class PoseEstimator(object):
return [pitch, yaw, roll] return [pitch, yaw, roll]
return func return func

View file

@ -29,9 +29,9 @@ class Model(ModelBase):
#override #override
def onInitialize(self): def onInitialize(self):
exec(nnlib.import_all(), locals(), globals()) exec(nnlib.import_all(), locals(), globals())
self.set_vram_batch_requirements( {4:64} ) self.set_vram_batch_requirements( {4:32} )
self.resolution = 227 self.resolution = 128
self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF
@ -45,16 +45,17 @@ class Model(ModelBase):
f = SampleProcessor.TypeFlags f = SampleProcessor.TypeFlags
face_type = f.FACE_TYPE_FULL if self.options['face_type'] == 'f' else f.FACE_TYPE_HALF face_type = f.FACE_TYPE_FULL if self.options['face_type'] == 'f' else f.FACE_TYPE_HALF
normalize_vgg = False
self.set_training_data_generators ([ self.set_training_data_generators ([
SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size,
sample_process_options=SampleProcessor.Options( motion_blur = [25, 1] ), #random_flip=True, sample_process_options=SampleProcessor.Options( motion_blur = [25, 1] ), #random_flip=True,
output_sample_types=[ [f.TRANSFORMED | face_type | f.MODE_BGR_SHUFFLE | f.OPT_APPLY_MOTION_BLUR, self.resolution], output_sample_types=[ [f.TRANSFORMED | face_type | f.MODE_BGR_SHUFFLE | f.OPT_APPLY_MOTION_BLUR, self.resolution, {'normalize_vgg':normalize_vgg} ],
[f.PITCH_YAW_ROLL], [f.PITCH_YAW_ROLL],
]), ]),
SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size,
sample_process_options=SampleProcessor.Options(), #random_flip=True, sample_process_options=SampleProcessor.Options(), #random_flip=True,
output_sample_types=[ [f.TRANSFORMED | face_type | f.MODE_BGR_SHUFFLE, self.resolution], output_sample_types=[ [f.TRANSFORMED | face_type | f.MODE_BGR_SHUFFLE, self.resolution, {'normalize_vgg':normalize_vgg} ],
[f.PITCH_YAW_ROLL], [f.PITCH_YAW_ROLL],
]) ])
]) ])

View file

@ -85,7 +85,8 @@ class SampleProcessor(object):
random_sub_size = opts.get('random_sub_size', 0) random_sub_size = opts.get('random_sub_size', 0)
normalize_std_dev = opts.get('normalize_std_dev', False) normalize_std_dev = opts.get('normalize_std_dev', False)
normalize_vgg = opts.get('normalize_vgg', False)
if f & SPTF.SOURCE != 0: if f & SPTF.SOURCE != 0:
img_type = 0 img_type = 0
elif f & SPTF.WARPED != 0: elif f & SPTF.WARPED != 0:
@ -228,7 +229,12 @@ class SampleProcessor(object):
if normalize_std_dev: if normalize_std_dev:
img_bgr = (img_bgr - img_bgr.mean( (0,1)) ) / img_bgr.std( (0,1) ) img_bgr = (img_bgr - img_bgr.mean( (0,1)) ) / img_bgr.std( (0,1) )
elif normalize_vgg:
img_bgr = np.clip(img_bgr*255, 0, 255)
img_bgr[:,:,0] -= 103.939
img_bgr[:,:,1] -= 116.779
img_bgr[:,:,2] -= 123.68
if f & SPTF.MODE_BGR != 0: if f & SPTF.MODE_BGR != 0:
img = img_bgr img = img_bgr
elif f & SPTF.MODE_BGR_SHUFFLE != 0: elif f & SPTF.MODE_BGR_SHUFFLE != 0: