atr subset scirpts

author: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> 2017-11-24 06:43:24 +0900
committer: Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp> 2017-11-28 15:32:38 +0700
commit: cf71104424ca3234da06abf4ce6e5794fda594c5 (patch)
tree: 51332c400fda036c3f16c1863ac0491e6ad00673
parent: 9f87a74de09e38f9d8f3e7ebb5fd26fac44a3b0e (diff)
6 files changed, 141 insertions, 33 deletions
diff --git a/become_yukarin/data_struct.py b/become_yukarin/data_struct.py
index 786dd7e..73b9b3b 100644
--- a/become_yukarin/data_struct.py
+++ b/become_yukarin/data_struct.py
@@ -23,7 +23,16 @@ class AcousticFeature(NamedTuple):
             spectrogram=self.spectrogram.astype(dtype),
             aperiodicity=self.aperiodicity.astype(dtype),
             mfcc=self.mfcc.astype(dtype),
-            voiced=self.mfcc.astype(dtype),
+            voiced=self.voiced.astype(dtype),
+        )
+
+    def astype_only_float(self, dtype):
+        return AcousticFeature(
+            f0=self.f0.astype(dtype),
+            spectrogram=self.spectrogram.astype(dtype),
+            aperiodicity=self.aperiodicity.astype(dtype),
+            mfcc=self.mfcc.astype(dtype),
+            voiced=self.voiced,
         )
 
     def validate(self):
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 329226c..dae2324 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -126,21 +126,28 @@ class AcousticFeatureLoadProcess(BaseDataProcess):
 
 
 class AcousticFeatureSaveProcess(BaseDataProcess):
-    def __init__(self, validate=False):
+    def __init__(self, validate=False, ignore: List[str] = None):
         self._validate = validate
+        self._ignore = ignore if ignore is not None else []
 
     def __call__(self, data: Dict[str, any], test=None):
         path = data['path']  # type: Path
         feature = data['feature']  # type: AcousticFeature
         if self._validate:
             feature.validate()
-        numpy.save(path.absolute(), dict(
+
+        d = dict(
             f0=feature.f0,
             spectrogram=feature.spectrogram,
             aperiodicity=feature.aperiodicity,
             mfcc=feature.mfcc,
             voiced=feature.voiced,
-        ))
+        )
+        for k in self._ignore:
+            assert k in d
+            d[k] = numpy.nan
+
+        numpy.save(path.absolute(), d)
 
 
 class DistillateUsingFeatureProcess(BaseDataProcess):
diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py
index efde9b1..eba3253 100644
--- a/scripts/extract_acoustic_feature.py
+++ b/scripts/extract_acoustic_feature.py
@@ -30,7 +30,9 @@ parser.add_argument('--top_db', type=float, default=base_voice_param.top_db)
 parser.add_argument('--frame_period', type=int, default=base_acoustic_feature_param.frame_period)
 parser.add_argument('--order', type=int, default=base_acoustic_feature_param.order)
 parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha)
+parser.add_argument('--ignore_feature', nargs='+', default=['spectrogram', 'aperiodicity'])
 parser.add_argument('--disable_alignment', action='store_true')
+parser.add_argument('--enable_overwrite', action='store_true')
 arguments = parser.parse_args()
 
 
@@ -48,7 +50,12 @@ def make_feature(
 
 
 def generate_feature(path1, path2):
-    # load wave and padding
+    out1 = Path(arguments.output1_directory, path1.stem + '.npy')
+    out2 = Path(arguments.output2_directory, path2.stem + '.npy')
+    if out1.exists() and out2.exists() and not arguments.enable_overwrite:
+        return
+
+        # load wave and padding
     wave_file_load_process = WaveFileLoadProcess(
         sample_rate=arguments.sample_rate,
         top_db=arguments.top_db,
@@ -62,8 +69,8 @@ def generate_feature(path1, path2):
         order=arguments.order,
         alpha=arguments.alpha,
     )
-    f1 = acoustic_feature_process(wave1, test=True)
-    f2 = acoustic_feature_process(wave2, test=True)
+    f1 = acoustic_feature_process(wave1, test=True).astype_only_float(numpy.float32)
+    f2 = acoustic_feature_process(wave2, test=True).astype_only_float(numpy.float32)
 
     # alignment
     if not arguments.disable_alignment:
@@ -94,28 +101,12 @@ def generate_feature(path1, path2):
         f2.validate()
 
     # save
-    acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True)
-    path = Path(arguments.output1_directory, path1.stem + '.npy')
-    feature = AcousticFeature(
-        f0=f1.f0,
-        spectrogram=f1.spectrogram,
-        aperiodicity=f1.aperiodicity,
-        mfcc=f1.mfcc,
-        voiced=f1.voiced,
-    )
-    acoustic_feature_save_process({'path': path, 'feature': feature})
-    print('saved!', path)
+    acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True, ignore=arguments.ignore_feature)
+    acoustic_feature_save_process({'path': out1, 'feature': f1})
+    print('saved!', out1)
 
-    path = Path(arguments.output2_directory, path2.stem + '.npy')
-    feature = AcousticFeature(
-        f0=f2.f0,
-        spectrogram=f2.spectrogram,
-        aperiodicity=f2.aperiodicity,
-        mfcc=f2.mfcc,
-        voiced=f2.voiced,
-    )
-    acoustic_feature_save_process({'path': path, 'feature': feature})
-    print('saved!', path)
+    acoustic_feature_save_process({'path': out2, 'feature': f2})
+    print('saved!', out2)
 
 
 def generate_mean_var(path_directory: Path):
@@ -126,7 +117,7 @@ def generate_mean_var(path_directory: Path):
     if var_mean.exists():
         var_mean.unlink()
 
-    acoustic_feature_load_process = AcousticFeatureLoadProcess(validate=True)
+    acoustic_feature_load_process = AcousticFeatureLoadProcess(validate=False)
     acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=False)
 
     f0_list = []
@@ -135,10 +126,10 @@ def generate_mean_var(path_directory: Path):
     mfcc_list = []
     for path in path_directory.glob('*'):
         feature = acoustic_feature_load_process(path)
-        f0_list.append(feature.f0[feature.voiced].ravel())  # remove unvoiced
-        spectrogram_list.append(feature.spectrogram.ravel())
-        aperiodicity_list.append(feature.aperiodicity.ravel())
-        mfcc_list.append(feature.mfcc.ravel())
+        f0_list.append(numpy.ravel(feature.f0[feature.voiced]))  # remove unvoiced
+        spectrogram_list.append(numpy.ravel(feature.spectrogram))
+        aperiodicity_list.append(numpy.ravel(feature.aperiodicity))
+        mfcc_list.append(numpy.ravel(feature.mfcc))
 
     f0_list = numpy.concatenate(f0_list)
     spectrogram_list = numpy.concatenate(spectrogram_list)
diff --git a/scripts/ln_apply_subset.py b/scripts/ln_apply_subset.py
new file mode 100644
index 0000000..92345dd
--- /dev/null
+++ b/scripts/ln_apply_subset.py
@@ -0,0 +1,48 @@
+"""
+ある話者のATR503サブセットを、他の話者に対応するようにコピーする。
+targetは、拡張子前3文字がATR503サブセットでないといけない。
+"""
+
+import argparse
+from pathlib import Path
+import re
+from itertools import chain, groupby
+
+parser = argparse.ArgumentParser()
+parser.add_argument('source', type=Path)
+parser.add_argument('target', type=Path)
+parser.add_argument('output', type=Path)
+parser.add_argument('--prefix', default='')
+argument = parser.parse_args()
+
+source = argument.source  # type: Path
+target = argument.target  # type: Path
+output = argument.output  # type: Path
+
+# source
+sources = list(sorted(source.glob('*')))
+assert len(sources) == 503
+
+names = ['{}{:02d}'.format(s, n + 1) for s in 'ABCDEFGHIJ' for n in range(50)]
+names += ['J51', 'J52', 'J53']
+
+assert all(n in s.name for s, n in zip(sources, names))
+
+map_source = {n: s for s, n in zip(sources, names)}
+
+# target
+keyfunc = lambda t: t.stem[-3:]
+targets = list(target.glob('*'))
+map_targets = {n: list(vs) for n, vs in groupby(sorted(targets, key=keyfunc), key=keyfunc)}
+
+assert all(n in names for n in map_targets.keys())
+assert len(list(chain.from_iterable(map_targets.values()))) == len(targets)
+
+# output
+output.mkdir(exist_ok=True)
+
+for n in names:
+    s = map_source[n]
+    for t in map_targets[n]:
+        out = output / (argument.prefix + t.stem + s.suffix)
+        out.symlink_to(s)
diff --git a/scripts/ln_atr503_to_subset.py b/scripts/ln_atr503_to_subset.py
new file mode 100644
index 0000000..bb3c4d2
--- /dev/null
+++ b/scripts/ln_atr503_to_subset.py
@@ -0,0 +1,23 @@
+import argparse
+from pathlib import Path
+
+parser = argparse.ArgumentParser()
+parser.add_argument('input', type=Path)
+parser.add_argument('output', type=Path)
+parser.add_argument('--prefix', default='')
+argument = parser.parse_args()
+
+input = argument.input  # type: Path
+output = argument.output  # type: Path
+
+paths = list(sorted(input.glob('*'), key=lambda p: int(''.join(filter(str.isdigit, p.name)))))
+assert len(paths) == 503
+
+output.mkdir(exist_ok=True)
+
+names = ['{}{:02d}'.format(s, n + 1) for s in 'ABCDEFGHIJ' for n in range(50)]
+names += ['J51', 'J52', 'J53']
+
+for p, n in zip(paths, names):
+    out = output / (argument.prefix + n + p.suffix)
+    out.symlink_to(p)
diff --git a/scripts/ln_jnas_subset.py b/scripts/ln_jnas_subset.py
new file mode 100644
index 0000000..e5aba5c
--- /dev/null
+++ b/scripts/ln_jnas_subset.py
@@ -0,0 +1,30 @@
+import argparse
+import multiprocessing
+from pathlib import Path
+
+from jnas_metadata_loader import load_from_directory
+from jnas_metadata_loader.jnas_metadata import JnasMetadata
+
+parser = argparse.ArgumentParser()
+parser.add_argument('jnas', type=Path)
+parser.add_argument('output', type=Path)
+parser.add_argument('--format', default='{sex}{text_id}_{mic}_atr_{subset}{sen_id}.wav')
+argument = parser.parse_args()
+
+jnas = argument.jnas  # type: Path
+output = argument.output  # type: Path
+
+jnas_list = load_from_directory(str(jnas))
+atr_list = jnas_list.subset_news_or_atr('B')
+
+output.mkdir(exist_ok=True)
+
+
+def process(d: JnasMetadata):
+    p = d.path
+    out = output / argument.format.format(**d._asdict())
+    out.symlink_to(p)
+
+
+pool = multiprocessing.Pool()
+pool.map(process, atr_list)
author	Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>	2017-11-24 06:43:24 +0900
committer	Hiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>	2017-11-28 15:32:38 +0700
commit	cf71104424ca3234da06abf4ce6e5794fda594c5 (patch)
tree	51332c400fda036c3f16c1863ac0491e6ad00673
parent	9f87a74de09e38f9d8f3e7ebb5fd26fac44a3b0e (diff)