From f279994afdba8e08fc5e042a25f50db548ddbae3 Mon Sep 17 00:00:00 2001 From: Hiroshiba Kazuyuki Date: Fri, 9 Mar 2018 02:52:24 +0900 Subject: リファクタリング MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- become_yukarin/data_struct.py | 64 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 7 deletions(-) (limited to 'become_yukarin/data_struct.py') diff --git a/become_yukarin/data_struct.py b/become_yukarin/data_struct.py index 78c8cf3..4474331 100644 --- a/become_yukarin/data_struct.py +++ b/become_yukarin/data_struct.py @@ -1,8 +1,9 @@ -from typing import NamedTuple +from typing import NamedTuple, Dict, List +import numpy import pyworld -import numpy +_min_mc = -18.3 class Wave(NamedTuple): @@ -11,11 +12,21 @@ class Wave(NamedTuple): class AcousticFeature(NamedTuple): - f0: numpy.ndarray - spectrogram: numpy.ndarray - aperiodicity: numpy.ndarray - mfcc: numpy.ndarray - voiced: numpy.ndarray + f0: numpy.ndarray = numpy.nan + spectrogram: numpy.ndarray = numpy.nan + aperiodicity: numpy.ndarray = numpy.nan + mfcc: numpy.ndarray = numpy.nan + voiced: numpy.ndarray = numpy.nan + + @staticmethod + def dtypes(): + return dict( + f0=numpy.float32, + spectrogram=numpy.float32, + aperiodicity=numpy.float32, + mfcc=numpy.float32, + voiced=numpy.bool, + ) def astype(self, dtype): return AcousticFeature( @@ -50,6 +61,45 @@ class AcousticFeature(NamedTuple): assert self.voiced.dtype == numpy.bool + @staticmethod + def silent(length: int, sizes: Dict[str, int], keys: List[str]): + d = {} + if 'f0' in keys: + d['f0'] = numpy.zeros((length, sizes['f0']), dtype=AcousticFeature.dtypes()['f0']) + if 'spectrogram' in keys: + d['spectrogram'] = numpy.zeros((length, sizes['spectrogram']), + dtype=AcousticFeature.dtypes()['spectrogram']) + if 'aperiodicity' in keys: + d['aperiodicity'] = numpy.zeros((length, sizes['aperiodicity']), + dtype=AcousticFeature.dtypes()['aperiodicity']) + if 'mfcc' in keys: + d['mfcc'] = numpy.hstack(( + numpy.ones((length, 1), dtype=AcousticFeature.dtypes()['mfcc']) * _min_mc, + numpy.zeros((length, sizes['mfcc'] - 1), dtype=AcousticFeature.dtypes()['mfcc']) + )) + if 'voiced' in keys: + d['voiced'] = numpy.zeros((length, sizes['voiced']), dtype=AcousticFeature.dtypes()['voiced']) + feature = AcousticFeature(**d) + return feature + + @staticmethod + def concatenate(fs: List['AcousticFeature'], keys: List[str]): + is_target = lambda a: not numpy.any(numpy.isnan(a)) + return AcousticFeature(**{ + key: numpy.concatenate([getattr(f, key) for f in fs]) if is_target(getattr(fs[0], key)) else numpy.nan + for key in keys + }) + + def pick(self, first: int, last: int): + is_target = lambda a: not numpy.any(numpy.isnan(a)) + return AcousticFeature( + f0=self.f0[first:last] if is_target(self.f0) else numpy.nan, + spectrogram=self.spectrogram[first:last] if is_target(self.spectrogram) else numpy.nan, + aperiodicity=self.aperiodicity[first:last] if is_target(self.aperiodicity) else numpy.nan, + mfcc=self.mfcc[first:last] if is_target(self.mfcc) else numpy.nan, + voiced=self.voiced[first:last] if is_target(self.voiced) else numpy.nan, + ) + @staticmethod def get_sizes(sampling_rate: int, order: int): fft_size = pyworld.get_cheaptrick_fft_size(fs=sampling_rate) -- cgit v1.2.3-70-g09d2