summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2017-11-24 06:43:24 +0900
committerHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2017-11-28 15:32:38 +0700
commitcf71104424ca3234da06abf4ce6e5794fda594c5 (patch)
tree51332c400fda036c3f16c1863ac0491e6ad00673
parent9f87a74de09e38f9d8f3e7ebb5fd26fac44a3b0e (diff)
atr subset scirpts
-rw-r--r--become_yukarin/data_struct.py11
-rw-r--r--become_yukarin/dataset/dataset.py13
-rw-r--r--scripts/extract_acoustic_feature.py49
-rw-r--r--scripts/ln_apply_subset.py48
-rw-r--r--scripts/ln_atr503_to_subset.py23
-rw-r--r--scripts/ln_jnas_subset.py30
6 files changed, 141 insertions, 33 deletions
diff --git a/become_yukarin/data_struct.py b/become_yukarin/data_struct.py
index 786dd7e..73b9b3b 100644
--- a/become_yukarin/data_struct.py
+++ b/become_yukarin/data_struct.py
@@ -23,7 +23,16 @@ class AcousticFeature(NamedTuple):
spectrogram=self.spectrogram.astype(dtype),
aperiodicity=self.aperiodicity.astype(dtype),
mfcc=self.mfcc.astype(dtype),
- voiced=self.mfcc.astype(dtype),
+ voiced=self.voiced.astype(dtype),
+ )
+
+ def astype_only_float(self, dtype):
+ return AcousticFeature(
+ f0=self.f0.astype(dtype),
+ spectrogram=self.spectrogram.astype(dtype),
+ aperiodicity=self.aperiodicity.astype(dtype),
+ mfcc=self.mfcc.astype(dtype),
+ voiced=self.voiced,
)
def validate(self):
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 329226c..dae2324 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -126,21 +126,28 @@ class AcousticFeatureLoadProcess(BaseDataProcess):
class AcousticFeatureSaveProcess(BaseDataProcess):
- def __init__(self, validate=False):
+ def __init__(self, validate=False, ignore: List[str] = None):
self._validate = validate
+ self._ignore = ignore if ignore is not None else []
def __call__(self, data: Dict[str, any], test=None):
path = data['path'] # type: Path
feature = data['feature'] # type: AcousticFeature
if self._validate:
feature.validate()
- numpy.save(path.absolute(), dict(
+
+ d = dict(
f0=feature.f0,
spectrogram=feature.spectrogram,
aperiodicity=feature.aperiodicity,
mfcc=feature.mfcc,
voiced=feature.voiced,
- ))
+ )
+ for k in self._ignore:
+ assert k in d
+ d[k] = numpy.nan
+
+ numpy.save(path.absolute(), d)
class DistillateUsingFeatureProcess(BaseDataProcess):
diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py
index efde9b1..eba3253 100644
--- a/scripts/extract_acoustic_feature.py
+++ b/scripts/extract_acoustic_feature.py
@@ -30,7 +30,9 @@ parser.add_argument('--top_db', type=float, default=base_voice_param.top_db)
parser.add_argument('--frame_period', type=int, default=base_acoustic_feature_param.frame_period)
parser.add_argument('--order', type=int, default=base_acoustic_feature_param.order)
parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha)
+parser.add_argument('--ignore_feature', nargs='+', default=['spectrogram', 'aperiodicity'])
parser.add_argument('--disable_alignment', action='store_true')
+parser.add_argument('--enable_overwrite', action='store_true')
arguments = parser.parse_args()
@@ -48,7 +50,12 @@ def make_feature(
def generate_feature(path1, path2):
- # load wave and padding
+ out1 = Path(arguments.output1_directory, path1.stem + '.npy')
+ out2 = Path(arguments.output2_directory, path2.stem + '.npy')
+ if out1.exists() and out2.exists() and not arguments.enable_overwrite:
+ return
+
+ # load wave and padding
wave_file_load_process = WaveFileLoadProcess(
sample_rate=arguments.sample_rate,
top_db=arguments.top_db,
@@ -62,8 +69,8 @@ def generate_feature(path1, path2):
order=arguments.order,
alpha=arguments.alpha,
)
- f1 = acoustic_feature_process(wave1, test=True)
- f2 = acoustic_feature_process(wave2, test=True)
+ f1 = acoustic_feature_process(wave1, test=True).astype_only_float(numpy.float32)
+ f2 = acoustic_feature_process(wave2, test=True).astype_only_float(numpy.float32)
# alignment
if not arguments.disable_alignment:
@@ -94,28 +101,12 @@ def generate_feature(path1, path2):
f2.validate()
# save
- acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True)
- path = Path(arguments.output1_directory, path1.stem + '.npy')
- feature = AcousticFeature(
- f0=f1.f0,
- spectrogram=f1.spectrogram,
- aperiodicity=f1.aperiodicity,
- mfcc=f1.mfcc,
- voiced=f1.voiced,
- )
- acoustic_feature_save_process({'path': path, 'feature': feature})
- print('saved!', path)
+ acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True, ignore=arguments.ignore_feature)
+ acoustic_feature_save_process({'path': out1, 'feature': f1})
+ print('saved!', out1)
- path = Path(arguments.output2_directory, path2.stem + '.npy')
- feature = AcousticFeature(
- f0=f2.f0,
- spectrogram=f2.spectrogram,
- aperiodicity=f2.aperiodicity,
- mfcc=f2.mfcc,
- voiced=f2.voiced,
- )
- acoustic_feature_save_process({'path': path, 'feature': feature})
- print('saved!', path)
+ acoustic_feature_save_process({'path': out2, 'feature': f2})
+ print('saved!', out2)
def generate_mean_var(path_directory: Path):
@@ -126,7 +117,7 @@ def generate_mean_var(path_directory: Path):
if var_mean.exists():
var_mean.unlink()
- acoustic_feature_load_process = AcousticFeatureLoadProcess(validate=True)
+ acoustic_feature_load_process = AcousticFeatureLoadProcess(validate=False)
acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=False)
f0_list = []
@@ -135,10 +126,10 @@ def generate_mean_var(path_directory: Path):
mfcc_list = []
for path in path_directory.glob('*'):
feature = acoustic_feature_load_process(path)
- f0_list.append(feature.f0[feature.voiced].ravel()) # remove unvoiced
- spectrogram_list.append(feature.spectrogram.ravel())
- aperiodicity_list.append(feature.aperiodicity.ravel())
- mfcc_list.append(feature.mfcc.ravel())
+ f0_list.append(numpy.ravel(feature.f0[feature.voiced])) # remove unvoiced
+ spectrogram_list.append(numpy.ravel(feature.spectrogram))
+ aperiodicity_list.append(numpy.ravel(feature.aperiodicity))
+ mfcc_list.append(numpy.ravel(feature.mfcc))
f0_list = numpy.concatenate(f0_list)
spectrogram_list = numpy.concatenate(spectrogram_list)
diff --git a/scripts/ln_apply_subset.py b/scripts/ln_apply_subset.py
new file mode 100644
index 0000000..92345dd
--- /dev/null
+++ b/scripts/ln_apply_subset.py
@@ -0,0 +1,48 @@
+"""
+ある話者のATR503サブセットを、他の話者に対応するようにコピーする。
+targetは、拡張子前3文字がATR503サブセットでないといけない。
+"""
+
+import argparse
+from pathlib import Path
+import re
+from itertools import chain, groupby
+
+parser = argparse.ArgumentParser()
+parser.add_argument('source', type=Path)
+parser.add_argument('target', type=Path)
+parser.add_argument('output', type=Path)
+parser.add_argument('--prefix', default='')
+argument = parser.parse_args()
+
+source = argument.source # type: Path
+target = argument.target # type: Path
+output = argument.output # type: Path
+
+# source
+sources = list(sorted(source.glob('*')))
+assert len(sources) == 503
+
+names = ['{}{:02d}'.format(s, n + 1) for s in 'ABCDEFGHIJ' for n in range(50)]
+names += ['J51', 'J52', 'J53']
+
+assert all(n in s.name for s, n in zip(sources, names))
+
+map_source = {n: s for s, n in zip(sources, names)}
+
+# target
+keyfunc = lambda t: t.stem[-3:]
+targets = list(target.glob('*'))
+map_targets = {n: list(vs) for n, vs in groupby(sorted(targets, key=keyfunc), key=keyfunc)}
+
+assert all(n in names for n in map_targets.keys())
+assert len(list(chain.from_iterable(map_targets.values()))) == len(targets)
+
+# output
+output.mkdir(exist_ok=True)
+
+for n in names:
+ s = map_source[n]
+ for t in map_targets[n]:
+ out = output / (argument.prefix + t.stem + s.suffix)
+ out.symlink_to(s)
diff --git a/scripts/ln_atr503_to_subset.py b/scripts/ln_atr503_to_subset.py
new file mode 100644
index 0000000..bb3c4d2
--- /dev/null
+++ b/scripts/ln_atr503_to_subset.py
@@ -0,0 +1,23 @@
+import argparse
+from pathlib import Path
+
+parser = argparse.ArgumentParser()
+parser.add_argument('input', type=Path)
+parser.add_argument('output', type=Path)
+parser.add_argument('--prefix', default='')
+argument = parser.parse_args()
+
+input = argument.input # type: Path
+output = argument.output # type: Path
+
+paths = list(sorted(input.glob('*'), key=lambda p: int(''.join(filter(str.isdigit, p.name)))))
+assert len(paths) == 503
+
+output.mkdir(exist_ok=True)
+
+names = ['{}{:02d}'.format(s, n + 1) for s in 'ABCDEFGHIJ' for n in range(50)]
+names += ['J51', 'J52', 'J53']
+
+for p, n in zip(paths, names):
+ out = output / (argument.prefix + n + p.suffix)
+ out.symlink_to(p)
diff --git a/scripts/ln_jnas_subset.py b/scripts/ln_jnas_subset.py
new file mode 100644
index 0000000..e5aba5c
--- /dev/null
+++ b/scripts/ln_jnas_subset.py
@@ -0,0 +1,30 @@
+import argparse
+import multiprocessing
+from pathlib import Path
+
+from jnas_metadata_loader import load_from_directory
+from jnas_metadata_loader.jnas_metadata import JnasMetadata
+
+parser = argparse.ArgumentParser()
+parser.add_argument('jnas', type=Path)
+parser.add_argument('output', type=Path)
+parser.add_argument('--format', default='{sex}{text_id}_{mic}_atr_{subset}{sen_id}.wav')
+argument = parser.parse_args()
+
+jnas = argument.jnas # type: Path
+output = argument.output # type: Path
+
+jnas_list = load_from_directory(str(jnas))
+atr_list = jnas_list.subset_news_or_atr('B')
+
+output.mkdir(exist_ok=True)
+
+
+def process(d: JnasMetadata):
+ p = d.path
+ out = output / argument.format.format(**d._asdict())
+ out.symlink_to(p)
+
+
+pool = multiprocessing.Pool()
+pool.map(process, atr_list)