mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-07-06 21:12:07 -07:00
Model files names will be prefixed with GPU index if GPU choosed explicitly on train/convert start. if you leave GPU idx choice default, then best GPU idx will be choosed and model file names will not contain index prefix. It gives you possibility to train same fake with various models or options on multiple GPUs. H64 and H128: now you can choose 'Lighter autoencoder'. It is same as vram gb <= 4 before this update. added archived_models.zip contains old experiments RecycleGAN: archived devicelib: if your system has no NVML installed (some old cards), then it will work with gpu_idx=0 as 'Generic GeForce GPU' with 2GB vram. refactorings
203 lines
9.1 KiB
Python
203 lines
9.1 KiB
Python
import numpy as np
|
|
|
|
from nnlib import nnlib
|
|
from models import ModelBase
|
|
from facelib import FaceType
|
|
from samples import *
|
|
|
|
class Model(ModelBase):
|
|
|
|
encoderH5 = 'encoder.h5'
|
|
decoder_srcH5 = 'decoder_src.h5'
|
|
decoder_dstH5 = 'decoder_dst.h5'
|
|
|
|
#override
|
|
def onInitializeOptions(self, is_first_run, ask_override):
|
|
if is_first_run:
|
|
self.options['lighter_ae'] = input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.")
|
|
else:
|
|
default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future
|
|
if 'created_vram_gb' in self.options.keys():
|
|
self.options.pop ('created_vram_gb')
|
|
self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae)
|
|
|
|
#override
|
|
def onInitialize(self, **in_options):
|
|
exec(nnlib.import_all(), locals(), globals())
|
|
self.set_vram_batch_requirements( {2.5:2,3:2,4:2,4:4,5:8,6:12,7:16,8:16,9:24,10:24,11:32,12:32,13:48} )
|
|
|
|
bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build( self.options['lighter_ae'] )
|
|
if not self.is_first_run():
|
|
self.encoder.load_weights (self.get_strpath_storage_for_file(self.encoderH5))
|
|
self.decoder_src.load_weights (self.get_strpath_storage_for_file(self.decoder_srcH5))
|
|
self.decoder_dst.load_weights (self.get_strpath_storage_for_file(self.decoder_dstH5))
|
|
|
|
input_src_bgr = Input(bgr_shape)
|
|
input_src_mask = Input(mask_shape)
|
|
input_dst_bgr = Input(bgr_shape)
|
|
input_dst_mask = Input(mask_shape)
|
|
|
|
rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) )
|
|
rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) )
|
|
|
|
self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] )
|
|
|
|
self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999),
|
|
loss=[ DSSIMMaskLoss([input_src_mask]), 'mae', DSSIMMaskLoss([input_dst_mask]), 'mae' ] )
|
|
|
|
self.src_view = K.function([input_src_bgr],[rec_src_bgr, rec_src_mask])
|
|
self.dst_view = K.function([input_dst_bgr],[rec_dst_bgr, rec_dst_mask])
|
|
|
|
if self.is_training_mode:
|
|
f = SampleProcessor.TypeFlags
|
|
self.set_training_data_generators ([
|
|
SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None,
|
|
debug=self.is_debug(), batch_size=self.batch_size,
|
|
sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ),
|
|
output_sample_types=[ [f.WARPED_TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_BGR, 128],
|
|
[f.TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_BGR, 128],
|
|
[f.TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_M | f.FACE_MASK_FULL, 128] ] ),
|
|
|
|
SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size,
|
|
sample_process_options=SampleProcessor.Options(random_flip=self.random_flip),
|
|
output_sample_types=[ [f.WARPED_TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_BGR, 128],
|
|
[f.TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_BGR, 128],
|
|
[f.TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_M | f.FACE_MASK_FULL, 128] ] )
|
|
])
|
|
|
|
#override
|
|
def onSave(self):
|
|
self.save_weights_safe( [[self.encoder, self.get_strpath_storage_for_file(self.encoderH5)],
|
|
[self.decoder_src, self.get_strpath_storage_for_file(self.decoder_srcH5)],
|
|
[self.decoder_dst, self.get_strpath_storage_for_file(self.decoder_dstH5)]])
|
|
|
|
#override
|
|
def onTrainOneEpoch(self, sample):
|
|
warped_src, target_src, target_src_mask = sample[0]
|
|
warped_dst, target_dst, target_dst_mask = sample[1]
|
|
|
|
total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_mask, warped_dst, target_dst_mask], [target_src, target_src_mask, target_dst, target_dst_mask] )
|
|
|
|
return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) )
|
|
|
|
#override
|
|
def onGetPreview(self, sample):
|
|
test_A = sample[0][1][0:4] #first 4 samples
|
|
test_A_m = sample[0][2][0:4] #first 4 samples
|
|
test_B = sample[1][1][0:4]
|
|
test_B_m = sample[1][2][0:4]
|
|
|
|
AA, mAA = self.src_view([test_A])
|
|
AB, mAB = self.src_view([test_B])
|
|
BB, mBB = self.dst_view([test_B])
|
|
|
|
mAA = np.repeat ( mAA, (3,), -1)
|
|
mAB = np.repeat ( mAB, (3,), -1)
|
|
mBB = np.repeat ( mBB, (3,), -1)
|
|
|
|
st = []
|
|
for i in range(0, len(test_A)):
|
|
st.append ( np.concatenate ( (
|
|
test_A[i,:,:,0:3],
|
|
AA[i],
|
|
#mAA[i],
|
|
test_B[i,:,:,0:3],
|
|
BB[i],
|
|
#mBB[i],
|
|
AB[i],
|
|
#mAB[i]
|
|
), axis=1) )
|
|
|
|
return [ ('H128', np.concatenate ( st, axis=0 ) ) ]
|
|
|
|
def predictor_func (self, face):
|
|
face_128_bgr = face[...,0:3]
|
|
face_128_mask = np.expand_dims(face[...,3],-1)
|
|
|
|
x, mx = self.src_view ( [ np.expand_dims(face_128_bgr,0) ] )
|
|
x, mx = x[0], mx[0]
|
|
|
|
return np.concatenate ( (x,mx), -1 )
|
|
|
|
#override
|
|
def get_converter(self, **in_options):
|
|
from models import ConverterMasked
|
|
return ConverterMasked(self.predictor_func,
|
|
predictor_input_size=128,
|
|
output_size=128,
|
|
face_type=FaceType.HALF,
|
|
base_erode_mask_modifier=100,
|
|
base_blur_mask_modifier=100,
|
|
**in_options)
|
|
|
|
def Build(self, lighter_ae):
|
|
exec(nnlib.code_import_all, locals(), globals())
|
|
|
|
bgr_shape = (128, 128, 3)
|
|
mask_shape = (128, 128, 1)
|
|
|
|
def downscale (dim):
|
|
def func(x):
|
|
return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x))
|
|
return func
|
|
|
|
def upscale (dim):
|
|
def func(x):
|
|
return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x)))
|
|
return func
|
|
|
|
def Encoder(input_shape):
|
|
input_layer = Input(input_shape)
|
|
x = input_layer
|
|
if not lighter_ae:
|
|
x = downscale(128)(x)
|
|
x = downscale(256)(x)
|
|
x = downscale(512)(x)
|
|
x = downscale(1024)(x)
|
|
x = Dense(512)(Flatten()(x))
|
|
x = Dense(8 * 8 * 512)(x)
|
|
x = Reshape((8, 8, 512))(x)
|
|
x = upscale(512)(x)
|
|
else:
|
|
x = downscale(128)(x)
|
|
x = downscale(256)(x)
|
|
x = downscale(512)(x)
|
|
x = downscale(1024)(x)
|
|
x = Dense(256)(Flatten()(x))
|
|
x = Dense(8 * 8 * 256)(x)
|
|
x = Reshape((8, 8, 256))(x)
|
|
x = upscale(256)(x)
|
|
|
|
return Model(input_layer, x)
|
|
|
|
def Decoder():
|
|
if not lighter_ae:
|
|
input_ = Input(shape=(16, 16, 512))
|
|
x = input_
|
|
x = upscale(512)(x)
|
|
x = upscale(256)(x)
|
|
x = upscale(128)(x)
|
|
|
|
y = input_ #mask decoder
|
|
y = upscale(512)(y)
|
|
y = upscale(256)(y)
|
|
y = upscale(128)(y)
|
|
else:
|
|
input_ = Input(shape=(16, 16, 256))
|
|
x = input_
|
|
x = upscale(256)(x)
|
|
x = upscale(128)(x)
|
|
x = upscale(64)(x)
|
|
|
|
y = input_ #mask decoder
|
|
y = upscale(256)(y)
|
|
y = upscale(128)(y)
|
|
y = upscale(64)(y)
|
|
|
|
x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x)
|
|
y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y)
|
|
|
|
|
|
return Model(input_, [x,y])
|
|
|
|
return bgr_shape, mask_shape, Encoder(bgr_shape), Decoder(), Decoder()
|