diff options
| -rw-r--r-- | become_yukarin/config/config.py | 10 | ||||
| -rw-r--r-- | become_yukarin/config/sr_config.py | 9 | ||||
| -rw-r--r-- | become_yukarin/model/model.py | 94 | ||||
| -rw-r--r-- | become_yukarin/model/sr_model.py | 97 |
4 files changed, 145 insertions, 65 deletions
diff --git a/become_yukarin/config/config.py b/become_yukarin/config/config.py index f49b185..68ba1bd 100644 --- a/become_yukarin/config/config.py +++ b/become_yukarin/config/config.py @@ -30,6 +30,11 @@ class DatasetConfig(NamedTuple): class ModelConfig(NamedTuple): in_channels: int out_channels: int + generator_base_channels: int + generator_extensive_layers: int + discriminator_base_channels: int + discriminator_extensive_layers: int + weak_discriminator: bool class LossConfig(NamedTuple): @@ -103,6 +108,11 @@ def create_from_json(s: Union[str, Path]): model=ModelConfig( in_channels=d['model']['in_channels'], out_channels=d['model']['out_channels'], + generator_base_channels=d['model']['generator_base_channels'], + generator_extensive_layers=d['model']['generator_extensive_layers'], + discriminator_base_channels=d['model']['discriminator_base_channels'], + discriminator_extensive_layers=d['model']['discriminator_extensive_layers'], + weak_discriminator=d['model']['weak_discriminator'], ), loss=LossConfig( mse=d['loss']['mse'], diff --git a/become_yukarin/config/sr_config.py b/become_yukarin/config/sr_config.py index 4f980a2..75cf6ff 100644 --- a/become_yukarin/config/sr_config.py +++ b/become_yukarin/config/sr_config.py @@ -20,7 +20,10 @@ class SRDatasetConfig(NamedTuple): class SRModelConfig(NamedTuple): - pass + generator_base_channels: int + generator_extensive_layers: int + discriminator_base_channels: int + discriminator_extensive_layers: int class SRLossConfig(NamedTuple): @@ -85,6 +88,10 @@ def create_from_json(s: Union[str, Path]): num_test=d['dataset']['num_test'], ), model=SRModelConfig( + generator_base_channels=d['model']['generator_base_channels'], + generator_extensive_layers=d['model']['generator_extensive_layers'], + discriminator_base_channels=d['model']['discriminator_base_channels'], + discriminator_extensive_layers=d['model']['discriminator_extensive_layers'], ), loss=SRLossConfig( mse=d['loss']['mse'], diff --git a/become_yukarin/model/model.py b/become_yukarin/model/model.py index 56870d9..42333d0 100644 --- a/become_yukarin/model/model.py +++ b/become_yukarin/model/model.py @@ -52,8 +52,10 @@ class CBR(chainer.Chain): with self.init_scope(): if sample == 'down': self.c = Convolution1D(ch0, ch1, 4, 2, 1, initialW=w) - else: + elif sample == 'up': self.c = Deconvolution1D(ch0, ch1, 4, 2, 1, initialW=w) + else: + self.c = Convolution1D(ch0, ch1, 1, 1, 0, initialW=w) if bn: self.batchnorm = L.BatchNormalization(ch1) @@ -69,18 +71,23 @@ class CBR(chainer.Chain): class Encoder(chainer.Chain): - def __init__(self, in_ch) -> None: + def __init__(self, in_ch, base=64, extensive_layers=8) -> None: super().__init__() w = chainer.initializers.Normal(0.02) with self.init_scope(): - self.c0 = Convolution1D(in_ch, 64, 3, 1, 1, initialW=w) - self.c1 = CBR(64, 128, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c2 = CBR(128, 256, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c3 = CBR(256, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c4 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c5 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c6 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c7 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False) + if extensive_layers > 0: + self.c0 = Convolution1D(in_ch, base * 1, 3, 1, 1, initialW=w) + else: + self.c0 = Convolution1D(in_ch, base * 1, 1, 1, 0, initialW=w) + + _choose = lambda i: 'down' if i < extensive_layers else 'same' + self.c1 = CBR(base * 1, base * 2, bn=True, sample=_choose(1), activation=F.leaky_relu, dropout=False) + self.c2 = CBR(base * 2, base * 4, bn=True, sample=_choose(2), activation=F.leaky_relu, dropout=False) + self.c3 = CBR(base * 4, base * 8, bn=True, sample=_choose(3), activation=F.leaky_relu, dropout=False) + self.c4 = CBR(base * 8, base * 8, bn=True, sample=_choose(4), activation=F.leaky_relu, dropout=False) + self.c5 = CBR(base * 8, base * 8, bn=True, sample=_choose(5), activation=F.leaky_relu, dropout=False) + self.c6 = CBR(base * 8, base * 8, bn=True, sample=_choose(6), activation=F.leaky_relu, dropout=False) + self.c7 = CBR(base * 8, base * 8, bn=True, sample=_choose(7), activation=F.leaky_relu, dropout=False) def __call__(self, x): hs = [F.leaky_relu(self.c0(x))] @@ -90,18 +97,23 @@ class Encoder(chainer.Chain): class Decoder(chainer.Chain): - def __init__(self, out_ch) -> None: + def __init__(self, out_ch, base=64, extensive_layers=8) -> None: super().__init__() w = chainer.initializers.Normal(0.02) with self.init_scope(): - self.c0 = CBR(512, 512, bn=True, sample='up', activation=F.relu, dropout=True) - self.c1 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=True) - self.c2 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=True) - self.c3 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=False) - self.c4 = CBR(1024, 256, bn=True, sample='up', activation=F.relu, dropout=False) - self.c5 = CBR(512, 128, bn=True, sample='up', activation=F.relu, dropout=False) - self.c6 = CBR(256, 64, bn=True, sample='up', activation=F.relu, dropout=False) - self.c7 = Convolution1D(128, out_ch, 3, 1, 1, initialW=w) + _choose = lambda i: 'up' if i >= 8 - extensive_layers else 'same' + self.c0 = CBR(base * 8, base * 8, bn=True, sample=_choose(0), activation=F.relu, dropout=True) + self.c1 = CBR(base * 16, base * 8, bn=True, sample=_choose(1), activation=F.relu, dropout=True) + self.c2 = CBR(base * 16, base * 8, bn=True, sample=_choose(2), activation=F.relu, dropout=True) + self.c3 = CBR(base * 16, base * 8, bn=True, sample=_choose(3), activation=F.relu, dropout=False) + self.c4 = CBR(base * 16, base * 4, bn=True, sample=_choose(4), activation=F.relu, dropout=False) + self.c5 = CBR(base * 8, base * 2, bn=True, sample=_choose(5), activation=F.relu, dropout=False) + self.c6 = CBR(base * 4, base * 1, bn=True, sample=_choose(6), activation=F.relu, dropout=False) + + if extensive_layers > 0: + self.c7 = Convolution1D(base * 2, out_ch, 3, 1, 1, initialW=w) + else: + self.c7 = Convolution1D(base * 2, out_ch, 1, 1, 0, initialW=w) def __call__(self, hs): h = self.c0(hs[-1]) @@ -115,27 +127,32 @@ class Decoder(chainer.Chain): class Predictor(chainer.Chain): - def __init__(self, in_ch, out_ch) -> None: + def __init__(self, in_ch, out_ch, base=64, extensive_layers=8) -> None: super().__init__() with self.init_scope(): - self.encoder = Encoder(in_ch) - self.decoder = Decoder(out_ch) + self.encoder = Encoder(in_ch, base=base, extensive_layers=extensive_layers) + self.decoder = Decoder(out_ch, base=base, extensive_layers=extensive_layers) def __call__(self, x): return self.decoder(self.encoder(x)) class Discriminator(chainer.Chain): - def __init__(self, in_ch, out_ch) -> None: + def __init__(self, in_ch, out_ch, base=32, extensive_layers=5, is_weak=False) -> None: super().__init__() w = chainer.initializers.Normal(0.02) with self.init_scope(): - self.c0_0 = CBR(in_ch, 32, bn=False, sample='down', activation=F.leaky_relu, dropout=False) - self.c0_1 = CBR(out_ch, 32, bn=False, sample='down', activation=F.leaky_relu, dropout=False) - self.c1 = CBR(64, 128, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c2 = CBR(128, 256, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c3 = CBR(256, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c4 = Convolution1D(512, 1, 3, 1, 1, initialW=w) + _choose = lambda i: 'down' if i < extensive_layers else 'same' + self.c0_0 = CBR(in_ch, base * 1, bn=False, sample=_choose(0), activation=F.leaky_relu, dropout=is_weak) + self.c0_1 = CBR(out_ch, base * 1, bn=False, sample=_choose(0), activation=F.leaky_relu, dropout=is_weak) + self.c1 = CBR(base * 2, base * 4, bn=True, sample=_choose(1), activation=F.leaky_relu, dropout=is_weak) + self.c2 = CBR(base * 4, base * 8, bn=True, sample=_choose(2), activation=F.leaky_relu, dropout=is_weak) + self.c3 = CBR(base * 8, base * 16, bn=True, sample=_choose(3), activation=F.leaky_relu, dropout=is_weak) + + if extensive_layers > 4: + self.c4 = Convolution1D(base * 16, 1, 3, 1, 1, initialW=w) + else: + self.c4 = Convolution1D(base * 16, 1, 1, 1, 0, initialW=w) def __call__(self, x_0, x_1): h = F.concat([self.c0_0(x_0), self.c0_1(x_1)]) @@ -148,10 +165,25 @@ class Discriminator(chainer.Chain): def create_predictor(config: ModelConfig): - return Predictor(in_ch=config.in_channels, out_ch=config.out_channels) + return Predictor( + in_ch=config.in_channels, + out_ch=config.out_channels, + base=config.generator_base_channels, + extensive_layers=config.generator_extensive_layers, + ) + + +def create_discriminator(config: ModelConfig): + return Discriminator( + in_ch=config.in_channels, + out_ch=config.out_channels, + base=config.discriminator_base_channels, + extensive_layers=config.discriminator_extensive_layers, + is_weak=config.weak_discriminator, + ) def create(config: ModelConfig): predictor = create_predictor(config) - discriminator = Discriminator(in_ch=config.in_channels, out_ch=config.out_channels) + discriminator = create_discriminator(config) return predictor, discriminator diff --git a/become_yukarin/model/sr_model.py b/become_yukarin/model/sr_model.py index f8e55d6..12863a7 100644 --- a/become_yukarin/model/sr_model.py +++ b/become_yukarin/model/sr_model.py @@ -16,8 +16,10 @@ class CBR(chainer.Chain): with self.init_scope(): if sample == 'down': self.c = L.Convolution2D(ch0, ch1, 4, 2, 1, initialW=w) - else: + elif sample == 'up': self.c = L.Deconvolution2D(ch0, ch1, 4, 2, 1, initialW=w) + else: + self.c = L.Convolution2D(ch0, ch1, 1, 1, 0, initialW=w) if bn: self.batchnorm = L.BatchNormalization(ch1) @@ -32,19 +34,24 @@ class CBR(chainer.Chain): return h -class Encoder(chainer.Chain): - def __init__(self, in_ch) -> None: +class SREncoder(chainer.Chain): + def __init__(self, in_ch, base=64, extensive_layers=8) -> None: super().__init__() w = chainer.initializers.Normal(0.02) with self.init_scope(): - self.c0 = L.Convolution2D(in_ch, 64, 3, 1, 1, initialW=w) - self.c1 = CBR(64, 128, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c2 = CBR(128, 256, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c3 = CBR(256, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c4 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c5 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c6 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c7 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False) + if extensive_layers > 0: + self.c0 = L.Convolution2D(in_ch, base * 1, 3, 1, 1, initialW=w) + else: + self.c0 = L.Convolution2D(in_ch, base * 1, 1, 1, 0, initialW=w) + + _choose = lambda i: 'down' if i < extensive_layers else 'same' + self.c1 = CBR(base * 1, base * 2, bn=True, sample=_choose(1), activation=F.leaky_relu, dropout=False) + self.c2 = CBR(base * 2, base * 4, bn=True, sample=_choose(2), activation=F.leaky_relu, dropout=False) + self.c3 = CBR(base * 4, base * 8, bn=True, sample=_choose(3), activation=F.leaky_relu, dropout=False) + self.c4 = CBR(base * 8, base * 8, bn=True, sample=_choose(4), activation=F.leaky_relu, dropout=False) + self.c5 = CBR(base * 8, base * 8, bn=True, sample=_choose(5), activation=F.leaky_relu, dropout=False) + self.c6 = CBR(base * 8, base * 8, bn=True, sample=_choose(6), activation=F.leaky_relu, dropout=False) + self.c7 = CBR(base * 8, base * 8, bn=True, sample=_choose(7), activation=F.leaky_relu, dropout=False) def __call__(self, x): hs = [F.leaky_relu(self.c0(x))] @@ -53,19 +60,24 @@ class Encoder(chainer.Chain): return hs -class Decoder(chainer.Chain): - def __init__(self, out_ch) -> None: +class SRDecoder(chainer.Chain): + def __init__(self, out_ch, base=64, extensive_layers=8) -> None: super().__init__() w = chainer.initializers.Normal(0.02) with self.init_scope(): - self.c0 = CBR(512, 512, bn=True, sample='up', activation=F.relu, dropout=True) - self.c1 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=True) - self.c2 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=True) - self.c3 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=False) - self.c4 = CBR(1024, 256, bn=True, sample='up', activation=F.relu, dropout=False) - self.c5 = CBR(512, 128, bn=True, sample='up', activation=F.relu, dropout=False) - self.c6 = CBR(256, 64, bn=True, sample='up', activation=F.relu, dropout=False) - self.c7 = L.Convolution2D(128, out_ch, 3, 1, 1, initialW=w) + _choose = lambda i: 'up' if i >= 8 - extensive_layers else 'same' + self.c0 = CBR(base * 8, base * 8, bn=True, sample=_choose(0), activation=F.relu, dropout=True) + self.c1 = CBR(base * 16, base * 8, bn=True, sample=_choose(1), activation=F.relu, dropout=True) + self.c2 = CBR(base * 16, base * 8, bn=True, sample=_choose(2), activation=F.relu, dropout=True) + self.c3 = CBR(base * 16, base * 8, bn=True, sample=_choose(3), activation=F.relu, dropout=False) + self.c4 = CBR(base * 16, base * 4, bn=True, sample=_choose(4), activation=F.relu, dropout=False) + self.c5 = CBR(base * 8, base * 2, bn=True, sample=_choose(5), activation=F.relu, dropout=False) + self.c6 = CBR(base * 4, base * 1, bn=True, sample=_choose(6), activation=F.relu, dropout=False) + + if extensive_layers > 0: + self.c7 = L.Convolution2D(base * 2, out_ch, 3, 1, 1, initialW=w) + else: + self.c7 = L.Convolution2D(base * 2, out_ch, 1, 1, 0, initialW=w) def __call__(self, hs): h = self.c0(hs[-1]) @@ -79,27 +91,32 @@ class Decoder(chainer.Chain): class SRPredictor(chainer.Chain): - def __init__(self, in_ch, out_ch) -> None: + def __init__(self, in_ch, out_ch, base, extensive_layers) -> None: super().__init__() with self.init_scope(): - self.encoder = Encoder(in_ch) - self.decoder = Decoder(out_ch) + self.encoder = Encoder(in_ch, base=base, extensive_layers=extensive_layers) + self.decoder = Decoder(out_ch, base=base, extensive_layers=extensive_layers) def __call__(self, x): return self.decoder(self.encoder(x)) class SRDiscriminator(chainer.Chain): - def __init__(self, in_ch, out_ch) -> None: + def __init__(self, in_ch, out_ch, base=32, extensive_layers=5) -> None: super().__init__() w = chainer.initializers.Normal(0.02) with self.init_scope(): - self.c0_0 = CBR(in_ch, 32, bn=False, sample='down', activation=F.leaky_relu, dropout=False) - self.c0_1 = CBR(out_ch, 32, bn=False, sample='down', activation=F.leaky_relu, dropout=False) - self.c1 = CBR(64, 128, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c2 = CBR(128, 256, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c3 = CBR(256, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False) - self.c4 = L.Convolution2D(512, 1, 3, 1, 1, initialW=w) + _choose = lambda i: 'down' if i < extensive_layers else 'same' + self.c0_0 = CBR(in_ch, base * 1, bn=False, sample=_choose(0), activation=F.leaky_relu, dropout=False) + self.c0_1 = CBR(out_ch, base * 1, bn=False, sample=_choose(0), activation=F.leaky_relu, dropout=False) + self.c1 = CBR(base * 2, base * 4, bn=True, sample=_choose(1), activation=F.leaky_relu, dropout=False) + self.c2 = CBR(base * 4, base * 8, bn=True, sample=_choose(2), activation=F.leaky_relu, dropout=False) + self.c3 = CBR(base * 8, base * 16, bn=True, sample=_choose(3), activation=F.leaky_relu, dropout=False) + + if extensive_layers > 4: + self.c4 = L.Convolution2D(base * 16, 1, 3, 1, 1, initialW=w) + else: + self.c4 = L.Convolution2D(base * 16, 1, 1, 1, 0, initialW=w) def __call__(self, x_0, x_1): h = F.concat([self.c0_0(x_0), self.c0_1(x_1)]) @@ -112,10 +129,24 @@ class SRDiscriminator(chainer.Chain): def create_predictor_sr(config: SRModelConfig): - return SRPredictor(in_ch=1, out_ch=1) + return SRPredictor( + in_ch=1, + out_ch=1, + base=config.generator_base_channels, + extensive_layers=config.generator_extensive_layers, + ) + + +def create_discriminator_sr(config: SRModelConfig): + return SRDiscriminator( + in_ch=1, + out_ch=1, + base=config.discriminator_base_channels, + extensive_layers=config.discriminator_extensive_layers, + ) def create_sr(config: SRModelConfig): predictor = create_predictor_sr(config) - discriminator = SRDiscriminator(in_ch=1, out_ch=1) + discriminator = create_discriminator_sr(config) return predictor, discriminator |
