4 files changed, 145 insertions, 65 deletions
diff --git a/become_yukarin/config/config.py b/become_yukarin/config/config.py
index f49b185..68ba1bd 100644
--- a/become_yukarin/config/config.py
+++ b/become_yukarin/config/config.py
@@ -30,6 +30,11 @@ class DatasetConfig(NamedTuple):
 class ModelConfig(NamedTuple):
     in_channels: int
     out_channels: int
+    generator_base_channels: int
+    generator_extensive_layers: int
+    discriminator_base_channels: int
+    discriminator_extensive_layers: int
+    weak_discriminator: bool
 
 
 class LossConfig(NamedTuple):
@@ -103,6 +108,11 @@ def create_from_json(s: Union[str, Path]):
         model=ModelConfig(
             in_channels=d['model']['in_channels'],
             out_channels=d['model']['out_channels'],
+            generator_base_channels=d['model']['generator_base_channels'],
+            generator_extensive_layers=d['model']['generator_extensive_layers'],
+            discriminator_base_channels=d['model']['discriminator_base_channels'],
+            discriminator_extensive_layers=d['model']['discriminator_extensive_layers'],
+            weak_discriminator=d['model']['weak_discriminator'],
         ),
         loss=LossConfig(
             mse=d['loss']['mse'],
diff --git a/become_yukarin/config/sr_config.py b/become_yukarin/config/sr_config.py
index 4f980a2..75cf6ff 100644
--- a/become_yukarin/config/sr_config.py
+++ b/become_yukarin/config/sr_config.py
@@ -20,7 +20,10 @@ class SRDatasetConfig(NamedTuple):
 
 
 class SRModelConfig(NamedTuple):
-    pass
+    generator_base_channels: int
+    generator_extensive_layers: int
+    discriminator_base_channels: int
+    discriminator_extensive_layers: int
 
 
 class SRLossConfig(NamedTuple):
@@ -85,6 +88,10 @@ def create_from_json(s: Union[str, Path]):
             num_test=d['dataset']['num_test'],
         ),
         model=SRModelConfig(
+            generator_base_channels=d['model']['generator_base_channels'],
+            generator_extensive_layers=d['model']['generator_extensive_layers'],
+            discriminator_base_channels=d['model']['discriminator_base_channels'],
+            discriminator_extensive_layers=d['model']['discriminator_extensive_layers'],
         ),
         loss=SRLossConfig(
             mse=d['loss']['mse'],
diff --git a/become_yukarin/model/model.py b/become_yukarin/model/model.py
index 56870d9..42333d0 100644
--- a/become_yukarin/model/model.py
+++ b/become_yukarin/model/model.py
@@ -52,8 +52,10 @@ class CBR(chainer.Chain):
         with self.init_scope():
             if sample == 'down':
                 self.c = Convolution1D(ch0, ch1, 4, 2, 1, initialW=w)
-            else:
+            elif sample == 'up':
                 self.c = Deconvolution1D(ch0, ch1, 4, 2, 1, initialW=w)
+            else:
+                self.c = Convolution1D(ch0, ch1, 1, 1, 0, initialW=w)
             if bn:
                 self.batchnorm = L.BatchNormalization(ch1)
 
@@ -69,18 +71,23 @@ class CBR(chainer.Chain):
 
 
 class Encoder(chainer.Chain):
-    def __init__(self, in_ch) -> None:
+    def __init__(self, in_ch, base=64, extensive_layers=8) -> None:
         super().__init__()
         w = chainer.initializers.Normal(0.02)
         with self.init_scope():
-            self.c0 = Convolution1D(in_ch, 64, 3, 1, 1, initialW=w)
-            self.c1 = CBR(64, 128, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c2 = CBR(128, 256, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c3 = CBR(256, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c4 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c5 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c6 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c7 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
+            if extensive_layers > 0:
+                self.c0 = Convolution1D(in_ch, base * 1, 3, 1, 1, initialW=w)
+            else:
+                self.c0 = Convolution1D(in_ch, base * 1, 1, 1, 0, initialW=w)
+
+            _choose = lambda i: 'down' if i < extensive_layers else 'same'
+            self.c1 = CBR(base * 1, base * 2, bn=True, sample=_choose(1), activation=F.leaky_relu, dropout=False)
+            self.c2 = CBR(base * 2, base * 4, bn=True, sample=_choose(2), activation=F.leaky_relu, dropout=False)
+            self.c3 = CBR(base * 4, base * 8, bn=True, sample=_choose(3), activation=F.leaky_relu, dropout=False)
+            self.c4 = CBR(base * 8, base * 8, bn=True, sample=_choose(4), activation=F.leaky_relu, dropout=False)
+            self.c5 = CBR(base * 8, base * 8, bn=True, sample=_choose(5), activation=F.leaky_relu, dropout=False)
+            self.c6 = CBR(base * 8, base * 8, bn=True, sample=_choose(6), activation=F.leaky_relu, dropout=False)
+            self.c7 = CBR(base * 8, base * 8, bn=True, sample=_choose(7), activation=F.leaky_relu, dropout=False)
 
     def __call__(self, x):
         hs = [F.leaky_relu(self.c0(x))]
@@ -90,18 +97,23 @@ class Encoder(chainer.Chain):
 
 
 class Decoder(chainer.Chain):
-    def __init__(self, out_ch) -> None:
+    def __init__(self, out_ch, base=64, extensive_layers=8) -> None:
         super().__init__()
         w = chainer.initializers.Normal(0.02)
         with self.init_scope():
-            self.c0 = CBR(512, 512, bn=True, sample='up', activation=F.relu, dropout=True)
-            self.c1 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=True)
-            self.c2 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=True)
-            self.c3 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=False)
-            self.c4 = CBR(1024, 256, bn=True, sample='up', activation=F.relu, dropout=False)
-            self.c5 = CBR(512, 128, bn=True, sample='up', activation=F.relu, dropout=False)
-            self.c6 = CBR(256, 64, bn=True, sample='up', activation=F.relu, dropout=False)
-            self.c7 = Convolution1D(128, out_ch, 3, 1, 1, initialW=w)
+            _choose = lambda i: 'up' if i >= 8 - extensive_layers else 'same'
+            self.c0 = CBR(base * 8, base * 8, bn=True, sample=_choose(0), activation=F.relu, dropout=True)
+            self.c1 = CBR(base * 16, base * 8, bn=True, sample=_choose(1), activation=F.relu, dropout=True)
+            self.c2 = CBR(base * 16, base * 8, bn=True, sample=_choose(2), activation=F.relu, dropout=True)
+            self.c3 = CBR(base * 16, base * 8, bn=True, sample=_choose(3), activation=F.relu, dropout=False)
+            self.c4 = CBR(base * 16, base * 4, bn=True, sample=_choose(4), activation=F.relu, dropout=False)
+            self.c5 = CBR(base * 8, base * 2, bn=True, sample=_choose(5), activation=F.relu, dropout=False)
+            self.c6 = CBR(base * 4, base * 1, bn=True, sample=_choose(6), activation=F.relu, dropout=False)
+
+            if extensive_layers > 0:
+                self.c7 = Convolution1D(base * 2, out_ch, 3, 1, 1, initialW=w)
+            else:
+                self.c7 = Convolution1D(base * 2, out_ch, 1, 1, 0, initialW=w)
 
     def __call__(self, hs):
         h = self.c0(hs[-1])
@@ -115,27 +127,32 @@ class Decoder(chainer.Chain):
 
 
 class Predictor(chainer.Chain):
-    def __init__(self, in_ch, out_ch) -> None:
+    def __init__(self, in_ch, out_ch, base=64, extensive_layers=8) -> None:
         super().__init__()
         with self.init_scope():
-            self.encoder = Encoder(in_ch)
-            self.decoder = Decoder(out_ch)
+            self.encoder = Encoder(in_ch, base=base, extensive_layers=extensive_layers)
+            self.decoder = Decoder(out_ch, base=base, extensive_layers=extensive_layers)
 
     def __call__(self, x):
         return self.decoder(self.encoder(x))
 
 
 class Discriminator(chainer.Chain):
-    def __init__(self, in_ch, out_ch) -> None:
+    def __init__(self, in_ch, out_ch, base=32, extensive_layers=5, is_weak=False) -> None:
         super().__init__()
         w = chainer.initializers.Normal(0.02)
         with self.init_scope():
-            self.c0_0 = CBR(in_ch, 32, bn=False, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c0_1 = CBR(out_ch, 32, bn=False, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c1 = CBR(64, 128, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c2 = CBR(128, 256, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c3 = CBR(256, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c4 = Convolution1D(512, 1, 3, 1, 1, initialW=w)
+            _choose = lambda i: 'down' if i < extensive_layers else 'same'
+            self.c0_0 = CBR(in_ch, base * 1, bn=False, sample=_choose(0), activation=F.leaky_relu, dropout=is_weak)
+            self.c0_1 = CBR(out_ch, base * 1, bn=False, sample=_choose(0), activation=F.leaky_relu, dropout=is_weak)
+            self.c1 = CBR(base * 2, base * 4, bn=True, sample=_choose(1), activation=F.leaky_relu, dropout=is_weak)
+            self.c2 = CBR(base * 4, base * 8, bn=True, sample=_choose(2), activation=F.leaky_relu, dropout=is_weak)
+            self.c3 = CBR(base * 8, base * 16, bn=True, sample=_choose(3), activation=F.leaky_relu, dropout=is_weak)
+
+            if extensive_layers > 4:
+                self.c4 = Convolution1D(base * 16, 1, 3, 1, 1, initialW=w)
+            else:
+                self.c4 = Convolution1D(base * 16, 1, 1, 1, 0, initialW=w)
 
     def __call__(self, x_0, x_1):
         h = F.concat([self.c0_0(x_0), self.c0_1(x_1)])
@@ -148,10 +165,25 @@ class Discriminator(chainer.Chain):
 
 
 def create_predictor(config: ModelConfig):
-    return Predictor(in_ch=config.in_channels, out_ch=config.out_channels)
+    return Predictor(
+        in_ch=config.in_channels,
+        out_ch=config.out_channels,
+        base=config.generator_base_channels,
+        extensive_layers=config.generator_extensive_layers,
+    )
+
+
+def create_discriminator(config: ModelConfig):
+    return Discriminator(
+        in_ch=config.in_channels,
+        out_ch=config.out_channels,
+        base=config.discriminator_base_channels,
+        extensive_layers=config.discriminator_extensive_layers,
+        is_weak=config.weak_discriminator,
+    )
 
 
 def create(config: ModelConfig):
     predictor = create_predictor(config)
-    discriminator = Discriminator(in_ch=config.in_channels, out_ch=config.out_channels)
+    discriminator = create_discriminator(config)
     return predictor, discriminator
diff --git a/become_yukarin/model/sr_model.py b/become_yukarin/model/sr_model.py
index f8e55d6..12863a7 100644
--- a/become_yukarin/model/sr_model.py
+++ b/become_yukarin/model/sr_model.py
@@ -16,8 +16,10 @@ class CBR(chainer.Chain):
         with self.init_scope():
             if sample == 'down':
                 self.c = L.Convolution2D(ch0, ch1, 4, 2, 1, initialW=w)
-            else:
+            elif sample == 'up':
                 self.c = L.Deconvolution2D(ch0, ch1, 4, 2, 1, initialW=w)
+            else:
+                self.c = L.Convolution2D(ch0, ch1, 1, 1, 0, initialW=w)
             if bn:
                 self.batchnorm = L.BatchNormalization(ch1)
 
@@ -32,19 +34,24 @@ class CBR(chainer.Chain):
         return h
 
 
-class Encoder(chainer.Chain):
-    def __init__(self, in_ch) -> None:
+class SREncoder(chainer.Chain):
+    def __init__(self, in_ch, base=64, extensive_layers=8) -> None:
         super().__init__()
         w = chainer.initializers.Normal(0.02)
         with self.init_scope():
-            self.c0 = L.Convolution2D(in_ch, 64, 3, 1, 1, initialW=w)
-            self.c1 = CBR(64, 128, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c2 = CBR(128, 256, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c3 = CBR(256, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c4 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c5 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c6 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c7 = CBR(512, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
+            if extensive_layers > 0:
+                self.c0 = L.Convolution2D(in_ch, base * 1, 3, 1, 1, initialW=w)
+            else:
+                self.c0 = L.Convolution2D(in_ch, base * 1, 1, 1, 0, initialW=w)
+
+            _choose = lambda i: 'down' if i < extensive_layers else 'same'
+            self.c1 = CBR(base * 1, base * 2, bn=True, sample=_choose(1), activation=F.leaky_relu, dropout=False)
+            self.c2 = CBR(base * 2, base * 4, bn=True, sample=_choose(2), activation=F.leaky_relu, dropout=False)
+            self.c3 = CBR(base * 4, base * 8, bn=True, sample=_choose(3), activation=F.leaky_relu, dropout=False)
+            self.c4 = CBR(base * 8, base * 8, bn=True, sample=_choose(4), activation=F.leaky_relu, dropout=False)
+            self.c5 = CBR(base * 8, base * 8, bn=True, sample=_choose(5), activation=F.leaky_relu, dropout=False)
+            self.c6 = CBR(base * 8, base * 8, bn=True, sample=_choose(6), activation=F.leaky_relu, dropout=False)
+            self.c7 = CBR(base * 8, base * 8, bn=True, sample=_choose(7), activation=F.leaky_relu, dropout=False)
 
     def __call__(self, x):
         hs = [F.leaky_relu(self.c0(x))]
@@ -53,19 +60,24 @@ class Encoder(chainer.Chain):
         return hs
 
 
-class Decoder(chainer.Chain):
-    def __init__(self, out_ch) -> None:
+class SRDecoder(chainer.Chain):
+    def __init__(self, out_ch, base=64, extensive_layers=8) -> None:
         super().__init__()
         w = chainer.initializers.Normal(0.02)
         with self.init_scope():
-            self.c0 = CBR(512, 512, bn=True, sample='up', activation=F.relu, dropout=True)
-            self.c1 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=True)
-            self.c2 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=True)
-            self.c3 = CBR(1024, 512, bn=True, sample='up', activation=F.relu, dropout=False)
-            self.c4 = CBR(1024, 256, bn=True, sample='up', activation=F.relu, dropout=False)
-            self.c5 = CBR(512, 128, bn=True, sample='up', activation=F.relu, dropout=False)
-            self.c6 = CBR(256, 64, bn=True, sample='up', activation=F.relu, dropout=False)
-            self.c7 = L.Convolution2D(128, out_ch, 3, 1, 1, initialW=w)
+            _choose = lambda i: 'up' if i >= 8 - extensive_layers else 'same'
+            self.c0 = CBR(base * 8, base * 8, bn=True, sample=_choose(0), activation=F.relu, dropout=True)
+            self.c1 = CBR(base * 16, base * 8, bn=True, sample=_choose(1), activation=F.relu, dropout=True)
+            self.c2 = CBR(base * 16, base * 8, bn=True, sample=_choose(2), activation=F.relu, dropout=True)
+            self.c3 = CBR(base * 16, base * 8, bn=True, sample=_choose(3), activation=F.relu, dropout=False)
+            self.c4 = CBR(base * 16, base * 4, bn=True, sample=_choose(4), activation=F.relu, dropout=False)
+            self.c5 = CBR(base * 8, base * 2, bn=True, sample=_choose(5), activation=F.relu, dropout=False)
+            self.c6 = CBR(base * 4, base * 1, bn=True, sample=_choose(6), activation=F.relu, dropout=False)
+
+            if extensive_layers > 0:
+                self.c7 = L.Convolution2D(base * 2, out_ch, 3, 1, 1, initialW=w)
+            else:
+                self.c7 = L.Convolution2D(base * 2, out_ch, 1, 1, 0, initialW=w)
 
     def __call__(self, hs):
         h = self.c0(hs[-1])
@@ -79,27 +91,32 @@ class Decoder(chainer.Chain):
 
 
 class SRPredictor(chainer.Chain):
-    def __init__(self, in_ch, out_ch) -> None:
+    def __init__(self, in_ch, out_ch, base, extensive_layers) -> None:
         super().__init__()
         with self.init_scope():
-            self.encoder = Encoder(in_ch)
-            self.decoder = Decoder(out_ch)
+            self.encoder = Encoder(in_ch, base=base, extensive_layers=extensive_layers)
+            self.decoder = Decoder(out_ch, base=base, extensive_layers=extensive_layers)
 
     def __call__(self, x):
         return self.decoder(self.encoder(x))
 
 
 class SRDiscriminator(chainer.Chain):
-    def __init__(self, in_ch, out_ch) -> None:
+    def __init__(self, in_ch, out_ch, base=32, extensive_layers=5) -> None:
         super().__init__()
         w = chainer.initializers.Normal(0.02)
         with self.init_scope():
-            self.c0_0 = CBR(in_ch, 32, bn=False, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c0_1 = CBR(out_ch, 32, bn=False, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c1 = CBR(64, 128, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c2 = CBR(128, 256, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c3 = CBR(256, 512, bn=True, sample='down', activation=F.leaky_relu, dropout=False)
-            self.c4 = L.Convolution2D(512, 1, 3, 1, 1, initialW=w)
+            _choose = lambda i: 'down' if i < extensive_layers else 'same'
+            self.c0_0 = CBR(in_ch, base * 1, bn=False, sample=_choose(0), activation=F.leaky_relu, dropout=False)
+            self.c0_1 = CBR(out_ch, base * 1, bn=False, sample=_choose(0), activation=F.leaky_relu, dropout=False)
+            self.c1 = CBR(base * 2, base * 4, bn=True, sample=_choose(1), activation=F.leaky_relu, dropout=False)
+            self.c2 = CBR(base * 4, base * 8, bn=True, sample=_choose(2), activation=F.leaky_relu, dropout=False)
+            self.c3 = CBR(base * 8, base * 16, bn=True, sample=_choose(3), activation=F.leaky_relu, dropout=False)
+
+            if extensive_layers > 4:
+                self.c4 = L.Convolution2D(base * 16, 1, 3, 1, 1, initialW=w)
+            else:
+                self.c4 = L.Convolution2D(base * 16, 1, 1, 1, 0, initialW=w)
 
     def __call__(self, x_0, x_1):
         h = F.concat([self.c0_0(x_0), self.c0_1(x_1)])
@@ -112,10 +129,24 @@ class SRDiscriminator(chainer.Chain):
 
 
 def create_predictor_sr(config: SRModelConfig):
-    return SRPredictor(in_ch=1, out_ch=1)
+    return SRPredictor(
+        in_ch=1,
+        out_ch=1,
+        base=config.generator_base_channels,
+        extensive_layers=config.generator_extensive_layers,
+    )
+
+
+def create_discriminator_sr(config: SRModelConfig):
+    return SRDiscriminator(
+        in_ch=1,
+        out_ch=1,
+        base=config.discriminator_base_channels,
+        extensive_layers=config.discriminator_extensive_layers,
+    )
 
 
 def create_sr(config: SRModelConfig):
     predictor = create_predictor_sr(config)
-    discriminator = SRDiscriminator(in_ch=1, out_ch=1)
+    discriminator = create_discriminator_sr(config)
     return predictor, discriminator