diff options
| -rw-r--r-- | become_yukarin/data_struct.py | 11 | ||||
| -rw-r--r-- | become_yukarin/dataset/dataset.py | 13 | ||||
| -rw-r--r-- | scripts/extract_acoustic_feature.py | 49 | ||||
| -rw-r--r-- | scripts/ln_apply_subset.py | 48 | ||||
| -rw-r--r-- | scripts/ln_atr503_to_subset.py | 23 | ||||
| -rw-r--r-- | scripts/ln_jnas_subset.py | 30 |
6 files changed, 141 insertions, 33 deletions
diff --git a/become_yukarin/data_struct.py b/become_yukarin/data_struct.py index 786dd7e..73b9b3b 100644 --- a/become_yukarin/data_struct.py +++ b/become_yukarin/data_struct.py @@ -23,7 +23,16 @@ class AcousticFeature(NamedTuple): spectrogram=self.spectrogram.astype(dtype), aperiodicity=self.aperiodicity.astype(dtype), mfcc=self.mfcc.astype(dtype), - voiced=self.mfcc.astype(dtype), + voiced=self.voiced.astype(dtype), + ) + + def astype_only_float(self, dtype): + return AcousticFeature( + f0=self.f0.astype(dtype), + spectrogram=self.spectrogram.astype(dtype), + aperiodicity=self.aperiodicity.astype(dtype), + mfcc=self.mfcc.astype(dtype), + voiced=self.voiced, ) def validate(self): diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py index 329226c..dae2324 100644 --- a/become_yukarin/dataset/dataset.py +++ b/become_yukarin/dataset/dataset.py @@ -126,21 +126,28 @@ class AcousticFeatureLoadProcess(BaseDataProcess): class AcousticFeatureSaveProcess(BaseDataProcess): - def __init__(self, validate=False): + def __init__(self, validate=False, ignore: List[str] = None): self._validate = validate + self._ignore = ignore if ignore is not None else [] def __call__(self, data: Dict[str, any], test=None): path = data['path'] # type: Path feature = data['feature'] # type: AcousticFeature if self._validate: feature.validate() - numpy.save(path.absolute(), dict( + + d = dict( f0=feature.f0, spectrogram=feature.spectrogram, aperiodicity=feature.aperiodicity, mfcc=feature.mfcc, voiced=feature.voiced, - )) + ) + for k in self._ignore: + assert k in d + d[k] = numpy.nan + + numpy.save(path.absolute(), d) class DistillateUsingFeatureProcess(BaseDataProcess): diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py index efde9b1..eba3253 100644 --- a/scripts/extract_acoustic_feature.py +++ b/scripts/extract_acoustic_feature.py @@ -30,7 +30,9 @@ parser.add_argument('--top_db', type=float, default=base_voice_param.top_db) parser.add_argument('--frame_period', type=int, default=base_acoustic_feature_param.frame_period) parser.add_argument('--order', type=int, default=base_acoustic_feature_param.order) parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha) +parser.add_argument('--ignore_feature', nargs='+', default=['spectrogram', 'aperiodicity']) parser.add_argument('--disable_alignment', action='store_true') +parser.add_argument('--enable_overwrite', action='store_true') arguments = parser.parse_args() @@ -48,7 +50,12 @@ def make_feature( def generate_feature(path1, path2): - # load wave and padding + out1 = Path(arguments.output1_directory, path1.stem + '.npy') + out2 = Path(arguments.output2_directory, path2.stem + '.npy') + if out1.exists() and out2.exists() and not arguments.enable_overwrite: + return + + # load wave and padding wave_file_load_process = WaveFileLoadProcess( sample_rate=arguments.sample_rate, top_db=arguments.top_db, @@ -62,8 +69,8 @@ def generate_feature(path1, path2): order=arguments.order, alpha=arguments.alpha, ) - f1 = acoustic_feature_process(wave1, test=True) - f2 = acoustic_feature_process(wave2, test=True) + f1 = acoustic_feature_process(wave1, test=True).astype_only_float(numpy.float32) + f2 = acoustic_feature_process(wave2, test=True).astype_only_float(numpy.float32) # alignment if not arguments.disable_alignment: @@ -94,28 +101,12 @@ def generate_feature(path1, path2): f2.validate() # save - acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True) - path = Path(arguments.output1_directory, path1.stem + '.npy') - feature = AcousticFeature( - f0=f1.f0, - spectrogram=f1.spectrogram, - aperiodicity=f1.aperiodicity, - mfcc=f1.mfcc, - voiced=f1.voiced, - ) - acoustic_feature_save_process({'path': path, 'feature': feature}) - print('saved!', path) + acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=True, ignore=arguments.ignore_feature) + acoustic_feature_save_process({'path': out1, 'feature': f1}) + print('saved!', out1) - path = Path(arguments.output2_directory, path2.stem + '.npy') - feature = AcousticFeature( - f0=f2.f0, - spectrogram=f2.spectrogram, - aperiodicity=f2.aperiodicity, - mfcc=f2.mfcc, - voiced=f2.voiced, - ) - acoustic_feature_save_process({'path': path, 'feature': feature}) - print('saved!', path) + acoustic_feature_save_process({'path': out2, 'feature': f2}) + print('saved!', out2) def generate_mean_var(path_directory: Path): @@ -126,7 +117,7 @@ def generate_mean_var(path_directory: Path): if var_mean.exists(): var_mean.unlink() - acoustic_feature_load_process = AcousticFeatureLoadProcess(validate=True) + acoustic_feature_load_process = AcousticFeatureLoadProcess(validate=False) acoustic_feature_save_process = AcousticFeatureSaveProcess(validate=False) f0_list = [] @@ -135,10 +126,10 @@ def generate_mean_var(path_directory: Path): mfcc_list = [] for path in path_directory.glob('*'): feature = acoustic_feature_load_process(path) - f0_list.append(feature.f0[feature.voiced].ravel()) # remove unvoiced - spectrogram_list.append(feature.spectrogram.ravel()) - aperiodicity_list.append(feature.aperiodicity.ravel()) - mfcc_list.append(feature.mfcc.ravel()) + f0_list.append(numpy.ravel(feature.f0[feature.voiced])) # remove unvoiced + spectrogram_list.append(numpy.ravel(feature.spectrogram)) + aperiodicity_list.append(numpy.ravel(feature.aperiodicity)) + mfcc_list.append(numpy.ravel(feature.mfcc)) f0_list = numpy.concatenate(f0_list) spectrogram_list = numpy.concatenate(spectrogram_list) diff --git a/scripts/ln_apply_subset.py b/scripts/ln_apply_subset.py new file mode 100644 index 0000000..92345dd --- /dev/null +++ b/scripts/ln_apply_subset.py @@ -0,0 +1,48 @@ +""" +ある話者のATR503サブセットを、他の話者に対応するようにコピーする。 +targetは、拡張子前3文字がATR503サブセットでないといけない。 +""" + +import argparse +from pathlib import Path +import re +from itertools import chain, groupby + +parser = argparse.ArgumentParser() +parser.add_argument('source', type=Path) +parser.add_argument('target', type=Path) +parser.add_argument('output', type=Path) +parser.add_argument('--prefix', default='') +argument = parser.parse_args() + +source = argument.source # type: Path +target = argument.target # type: Path +output = argument.output # type: Path + +# source +sources = list(sorted(source.glob('*'))) +assert len(sources) == 503 + +names = ['{}{:02d}'.format(s, n + 1) for s in 'ABCDEFGHIJ' for n in range(50)] +names += ['J51', 'J52', 'J53'] + +assert all(n in s.name for s, n in zip(sources, names)) + +map_source = {n: s for s, n in zip(sources, names)} + +# target +keyfunc = lambda t: t.stem[-3:] +targets = list(target.glob('*')) +map_targets = {n: list(vs) for n, vs in groupby(sorted(targets, key=keyfunc), key=keyfunc)} + +assert all(n in names for n in map_targets.keys()) +assert len(list(chain.from_iterable(map_targets.values()))) == len(targets) + +# output +output.mkdir(exist_ok=True) + +for n in names: + s = map_source[n] + for t in map_targets[n]: + out = output / (argument.prefix + t.stem + s.suffix) + out.symlink_to(s) diff --git a/scripts/ln_atr503_to_subset.py b/scripts/ln_atr503_to_subset.py new file mode 100644 index 0000000..bb3c4d2 --- /dev/null +++ b/scripts/ln_atr503_to_subset.py @@ -0,0 +1,23 @@ +import argparse +from pathlib import Path + +parser = argparse.ArgumentParser() +parser.add_argument('input', type=Path) +parser.add_argument('output', type=Path) +parser.add_argument('--prefix', default='') +argument = parser.parse_args() + +input = argument.input # type: Path +output = argument.output # type: Path + +paths = list(sorted(input.glob('*'), key=lambda p: int(''.join(filter(str.isdigit, p.name))))) +assert len(paths) == 503 + +output.mkdir(exist_ok=True) + +names = ['{}{:02d}'.format(s, n + 1) for s in 'ABCDEFGHIJ' for n in range(50)] +names += ['J51', 'J52', 'J53'] + +for p, n in zip(paths, names): + out = output / (argument.prefix + n + p.suffix) + out.symlink_to(p) diff --git a/scripts/ln_jnas_subset.py b/scripts/ln_jnas_subset.py new file mode 100644 index 0000000..e5aba5c --- /dev/null +++ b/scripts/ln_jnas_subset.py @@ -0,0 +1,30 @@ +import argparse +import multiprocessing +from pathlib import Path + +from jnas_metadata_loader import load_from_directory +from jnas_metadata_loader.jnas_metadata import JnasMetadata + +parser = argparse.ArgumentParser() +parser.add_argument('jnas', type=Path) +parser.add_argument('output', type=Path) +parser.add_argument('--format', default='{sex}{text_id}_{mic}_atr_{subset}{sen_id}.wav') +argument = parser.parse_args() + +jnas = argument.jnas # type: Path +output = argument.output # type: Path + +jnas_list = load_from_directory(str(jnas)) +atr_list = jnas_list.subset_news_or_atr('B') + +output.mkdir(exist_ok=True) + + +def process(d: JnasMetadata): + p = d.path + out = output / argument.format.format(**d._asdict()) + out.symlink_to(p) + + +pool = multiprocessing.Pool() +pool.map(process, atr_list) |
