diff --git a/doc/manual_en_google_translated.docx b/doc/manual_en_google_translated.docx index cd6c76d..31673a4 100644 Binary files a/doc/manual_en_google_translated.docx and b/doc/manual_en_google_translated.docx differ diff --git a/doc/manual_en_google_translated.pdf b/doc/manual_en_google_translated.pdf index 828349d..0bb7ec5 100644 Binary files a/doc/manual_en_google_translated.pdf and b/doc/manual_en_google_translated.pdf differ diff --git a/doc/manual_ru.pdf b/doc/manual_ru.pdf index 6712983..93037c5 100644 Binary files a/doc/manual_ru.pdf and b/doc/manual_ru.pdf differ diff --git a/doc/manual_ru_source.docx b/doc/manual_ru_source.docx index d733115..9a2f3e2 100644 Binary files a/doc/manual_ru_source.docx and b/doc/manual_ru_source.docx differ diff --git a/facelib/PoseEstimator.py b/facelib/PoseEstimator.py index ac8dce5..97abccb 100644 --- a/facelib/PoseEstimator.py +++ b/facelib/PoseEstimator.py @@ -19,7 +19,7 @@ class PoseEstimator(object): def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False): exec( nnlib.import_all(), locals(), globals() ) - self.class_num = 180 + self.class_num = 91 self.model = PoseEstimator.BuildModel(resolution, class_num=self.class_num) @@ -35,9 +35,7 @@ class PoseEstimator(object): idx_tensor = np.array([idx for idx in range(self.class_num)], dtype=K.floatx() ) idx_tensor = K.constant(idx_tensor) - - #inp_t = Input ( (resolution,resolution,3) ) - + inp_t, = self.model.inputs pitch_bins_t, yaw_bins_t, roll_bins_t = self.model.outputs @@ -51,22 +49,26 @@ class PoseEstimator(object): inp_roll_bins_t = Input ( (self.class_num,) ) inp_roll_t = Input ( (1,) ) + + alpha = 0.001 pitch_loss = K.categorical_crossentropy(inp_pitch_bins_t, pitch_bins_t) \ - + 0.001 * K.mean(K.square( inp_pitch_t - pitch_t), -1) + + alpha * K.mean(K.square( inp_pitch_t - pitch_t), -1) yaw_loss = K.categorical_crossentropy(inp_yaw_bins_t, yaw_bins_t) \ - + 0.001 * K.mean(K.square( inp_yaw_t - yaw_t), -1) + + alpha * K.mean(K.square( inp_yaw_t - yaw_t), -1) roll_loss = K.categorical_crossentropy(inp_roll_bins_t, roll_bins_t) \ - + 0.001 * K.mean(K.square( inp_roll_t - roll_t), -1) + + alpha * K.mean(K.square( inp_roll_t - roll_t), -1) loss = K.mean( pitch_loss + yaw_loss + roll_loss ) - + + opt = Adam(lr=0.001, tf_cpu_mode=2) + if training: self.train = K.function ([inp_t, inp_pitch_bins_t, inp_pitch_t, inp_yaw_bins_t, inp_yaw_t, inp_roll_bins_t, inp_roll_t], - [loss], Adam(tf_cpu_mode=2).get_updates(loss, self.model.trainable_weights) ) + [loss], opt.get_updates(loss, self.model.trainable_weights) ) self.view = K.function ([inp_t], [pitch_t, yaw_t, roll_t] ) @@ -80,7 +82,7 @@ class PoseEstimator(object): self.model.save_weights (str(self.weights_path)) def train_on_batch(self, imgs, pitch_yaw_roll): - c = ( (pitch_yaw_roll+1) * 90.0 ).astype(np.int).astype(K.floatx()) + c = ( (pitch_yaw_roll+1) * 45.0 ).astype(np.int).astype(K.floatx()) inp_pitch = c[:,0:1] inp_yaw = c[:,1:2] @@ -104,7 +106,7 @@ class PoseEstimator(object): pitch, yaw, roll = self.view( [input_image] ) result = np.concatenate( (pitch[...,np.newaxis], yaw[...,np.newaxis], roll[...,np.newaxis]), -1 ) - result = np.clip ( result / 90.0 - 1, -1, 1 ) + result = np.clip ( result / 45.0 - 1, -1.0, 1.0 ) if input_shape_len == 3: result = result[0] @@ -126,7 +128,15 @@ class PoseEstimator(object): def func(input): x = input - + + # resnet50 = keras.applications.ResNet50(include_top=False, weights='imagenet', input_shape=K.int_shape(x)[1:], pooling='avg') + # x = resnet50(x) + # pitch = Dense(class_num, activation='softmax', name='pitch')(x) + # yaw = Dense(class_num, activation='softmax', name='yaw')(x) + # roll = Dense(class_num, activation='softmax', name='roll')(x) + + # return [pitch, yaw, roll] + x = Conv2D(64, kernel_size=11, strides=4, padding='same', activation='relu')(x) x = MaxPooling2D( (3,3), strides=2 )(x) @@ -139,11 +149,9 @@ class PoseEstimator(object): x = MaxPooling2D( (3,3), strides=2 )(x) x = Flatten()(x) - x = Dense(4096, activation='relu')(x) + x = Dense(1024, activation='relu')(x) x = Dropout(0.5)(x) - x = Dense(4096, activation='relu')(x) - x = Dropout(0.5)(x) - x = Dense(1000, activation='relu')(x) + x = Dense(1024, activation='relu')(x) pitch = Dense(class_num, activation='softmax', name='pitch')(x) yaw = Dense(class_num, activation='softmax', name='yaw')(x) @@ -151,5 +159,4 @@ class PoseEstimator(object): return [pitch, yaw, roll] - return func diff --git a/models/Model_DEV_POSEEST/Model.py b/models/Model_DEV_POSEEST/Model.py index c6267d9..f74abda 100644 --- a/models/Model_DEV_POSEEST/Model.py +++ b/models/Model_DEV_POSEEST/Model.py @@ -29,9 +29,9 @@ class Model(ModelBase): #override def onInitialize(self): exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {4:64} ) + self.set_vram_batch_requirements( {4:32} ) - self.resolution = 227 + self.resolution = 128 self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF @@ -45,16 +45,17 @@ class Model(ModelBase): f = SampleProcessor.TypeFlags face_type = f.FACE_TYPE_FULL if self.options['face_type'] == 'f' else f.FACE_TYPE_HALF + normalize_vgg = False self.set_training_data_generators ([ SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, sample_process_options=SampleProcessor.Options( motion_blur = [25, 1] ), #random_flip=True, - output_sample_types=[ [f.TRANSFORMED | face_type | f.MODE_BGR_SHUFFLE | f.OPT_APPLY_MOTION_BLUR, self.resolution], + output_sample_types=[ [f.TRANSFORMED | face_type | f.MODE_BGR_SHUFFLE | f.OPT_APPLY_MOTION_BLUR, self.resolution, {'normalize_vgg':normalize_vgg} ], [f.PITCH_YAW_ROLL], ]), SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, sample_process_options=SampleProcessor.Options(), #random_flip=True, - output_sample_types=[ [f.TRANSFORMED | face_type | f.MODE_BGR_SHUFFLE, self.resolution], + output_sample_types=[ [f.TRANSFORMED | face_type | f.MODE_BGR_SHUFFLE, self.resolution, {'normalize_vgg':normalize_vgg} ], [f.PITCH_YAW_ROLL], ]) ]) diff --git a/samplelib/SampleProcessor.py b/samplelib/SampleProcessor.py index 39d953f..7939aaa 100644 --- a/samplelib/SampleProcessor.py +++ b/samplelib/SampleProcessor.py @@ -85,7 +85,8 @@ class SampleProcessor(object): random_sub_size = opts.get('random_sub_size', 0) normalize_std_dev = opts.get('normalize_std_dev', False) - + normalize_vgg = opts.get('normalize_vgg', False) + if f & SPTF.SOURCE != 0: img_type = 0 elif f & SPTF.WARPED != 0: @@ -228,7 +229,12 @@ class SampleProcessor(object): if normalize_std_dev: img_bgr = (img_bgr - img_bgr.mean( (0,1)) ) / img_bgr.std( (0,1) ) - + elif normalize_vgg: + img_bgr = np.clip(img_bgr*255, 0, 255) + img_bgr[:,:,0] -= 103.939 + img_bgr[:,:,1] -= 116.779 + img_bgr[:,:,2] -= 123.68 + if f & SPTF.MODE_BGR != 0: img = img_bgr elif f & SPTF.MODE_BGR_SHUFFLE != 0: