summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2018-01-25 20:10:01 +0900
committerHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2018-01-25 20:10:01 +0900
commitc44e1ec9b24a70cc30de5682bf1855afe5eb0485 (patch)
treedea956f863f5d04a5206a43dbc8fc62eb6c90275
parent3ecf878ec5f35c8242b0b5e488d8f8d1f50e9aaf (diff)
mypy & harvest追加
-rw-r--r--become_yukarin/acoustic_converter.py17
-rw-r--r--become_yukarin/dataset/dataset.py66
-rw-r--r--become_yukarin/dataset/utility.py7
-rw-r--r--become_yukarin/model/model.py14
-rw-r--r--become_yukarin/model/sr_model.py10
-rw-r--r--become_yukarin/param.py1
-rw-r--r--become_yukarin/super_resolution.py13
-rw-r--r--become_yukarin/updater/sr_updater.py2
-rw-r--r--become_yukarin/updater/updater.py2
-rw-r--r--become_yukarin/voice_changer.py2
-rw-r--r--scripts/extract_acoustic_feature.py22
-rw-r--r--scripts/voice_conversion_test.py4
12 files changed, 95 insertions, 65 deletions
diff --git a/become_yukarin/acoustic_converter.py b/become_yukarin/acoustic_converter.py
index 13e6225..498bdb1 100644
--- a/become_yukarin/acoustic_converter.py
+++ b/become_yukarin/acoustic_converter.py
@@ -21,7 +21,7 @@ from become_yukarin.model.model import create_predictor
class AcousticConverter(object):
- def __init__(self, config: Config, model_path: Path, gpu: int = None):
+ def __init__(self, config: Config, model_path: Path, gpu: int = None) -> None:
self.config = config
self.model_path = model_path
self.gpu = gpu
@@ -40,6 +40,7 @@ class AcousticConverter(object):
frame_period=param.acoustic_feature_param.frame_period,
order=param.acoustic_feature_param.order,
alpha=param.acoustic_feature_param.alpha,
+ f0_estimating_method=param.acoustic_feature_param.f0_estimating_method,
)
self._acoustic_feature_load_process = acoustic_feature_load_process = AcousticFeatureLoadProcess()
@@ -114,14 +115,14 @@ class AcousticConverter(object):
).astype(numpy.float64)
return out
- def convert_from_audio_path(self, input: Path, out_sampling_rate: Optional[int] = None):
- input = self._wave_process(str(input), test=True)
- input = self._feature_process(input, test=True)
- return self.convert_from_feature(input, out_sampling_rate)
+ def convert_from_audio_path(self, path: Path, out_sampling_rate: Optional[int] = None):
+ wave = self._wave_process(str(path), test=True)
+ feature = self._feature_process(wave, test=True)
+ return self.convert_from_feature(feature, out_sampling_rate)
- def convert_from_feature_path(self, input: Path, out_sampling_rate: Optional[int] = None):
- input = self._acoustic_feature_load_process(input, test=True)
- return self.convert_from_feature(input, out_sampling_rate)
+ def convert_from_feature_path(self, path: Path, out_sampling_rate: Optional[int] = None):
+ feature = self._acoustic_feature_load_process(path, test=True)
+ return self.convert_from_feature(feature, out_sampling_rate)
def convert_from_feature(self, input: AcousticFeature, out_sampling_rate: Optional[int] = None):
if out_sampling_rate is None:
diff --git a/become_yukarin/dataset/dataset.py b/become_yukarin/dataset/dataset.py
index 90dcd4a..1a1438a 100644
--- a/become_yukarin/dataset/dataset.py
+++ b/become_yukarin/dataset/dataset.py
@@ -4,6 +4,7 @@ import typing
from abc import ABCMeta, abstractmethod
from collections import defaultdict
from pathlib import Path
+from typing import Any
from typing import Callable
from typing import Dict
from typing import List
@@ -28,7 +29,7 @@ class BaseDataProcess(metaclass=ABCMeta):
class LambdaProcess(BaseDataProcess):
- def __init__(self, process: Callable[[any, bool], any]):
+ def __init__(self, process: Callable[[Any, bool], Any]) -> None:
self._process = process
def __call__(self, data, test):
@@ -36,15 +37,15 @@ class LambdaProcess(BaseDataProcess):
class DictKeyReplaceProcess(BaseDataProcess):
- def __init__(self, key_map: Dict[str, str]):
+ def __init__(self, key_map: Dict[str, str]) -> None:
self._key_map = key_map
- def __call__(self, data: Dict[str, any], test):
+ def __call__(self, data: Dict[str, Any], test):
return {key_after: data[key_before] for key_after, key_before in self._key_map}
class ChainProcess(BaseDataProcess):
- def __init__(self, process: typing.Iterable[BaseDataProcess]):
+ def __init__(self, process: typing.Iterable[BaseDataProcess]) -> None:
self._process = list(process)
def __call__(self, data, test):
@@ -57,7 +58,7 @@ class ChainProcess(BaseDataProcess):
class SplitProcess(BaseDataProcess):
- def __init__(self, process: typing.Dict[str, typing.Optional[BaseDataProcess]]):
+ def __init__(self, process: typing.Dict[str, typing.Optional[BaseDataProcess]]) -> None:
self._process = process
def __call__(self, data, test):
@@ -69,7 +70,7 @@ class SplitProcess(BaseDataProcess):
class WaveFileLoadProcess(BaseDataProcess):
- def __init__(self, sample_rate: int, top_db: float = None, pad_second: float = 0, dtype=numpy.float32):
+ def __init__(self, sample_rate: int, top_db: float = None, pad_second: float = 0, dtype=numpy.float32) -> None:
self._sample_rate = sample_rate
self._top_db = top_db
self._pad_second = pad_second
@@ -86,17 +87,24 @@ class WaveFileLoadProcess(BaseDataProcess):
class AcousticFeatureProcess(BaseDataProcess):
- def __init__(self, frame_period, order, alpha, dtype=numpy.float32):
+ def __init__(self, frame_period, order, alpha, f0_estimating_method, f0_floor=71, f0_ceil=800, dtype=numpy.float32) -> None:
self._frame_period = frame_period
self._order = order
self._alpha = alpha
+ self._f0_estimating_method = f0_estimating_method
+ self._f0_floor = f0_floor
+ self._f0_ceil = f0_ceil
self._dtype = dtype
def __call__(self, data: Wave, test=None):
x = data.wave.astype(numpy.float64)
fs = data.sampling_rate
- _f0, t = pyworld.dio(x, fs, frame_period=self._frame_period)
+ if self._f0_estimating_method == 'dio':
+ _f0, t = pyworld.dio(x, fs, frame_period=self._frame_period, f0_floor=self._f0_floor, f0_ceil=self._f0_ceil)
+ else:
+ _f0, t = pyworld.harvest(x, fs, frame_period=self._frame_period, f0_floor=self._f0_floor,
+ f0_ceil=self._f0_ceil)
f0 = pyworld.stonemask(x, _f0, t, fs)
spectrogram = pyworld.cheaptrick(x, f0, t, fs)
aperiodicity = pyworld.d4c(x, f0, t, fs)
@@ -116,11 +124,12 @@ class AcousticFeatureProcess(BaseDataProcess):
class LowHighSpectrogramFeatureProcess(BaseDataProcess):
- def __init__(self, frame_period, order, alpha, dtype=numpy.float32):
+ def __init__(self, frame_period, order, alpha, f0_estimating_method, dtype=numpy.float32) -> None:
self._acoustic_feature_process = AcousticFeatureProcess(
frame_period=frame_period,
order=order,
alpha=alpha,
+ f0_estimating_method=f0_estimating_method,
)
self._dtype = dtype
self._alpha = alpha
@@ -145,7 +154,7 @@ class LowHighSpectrogramFeatureProcess(BaseDataProcess):
class AcousticFeatureLoadProcess(BaseDataProcess):
- def __init__(self, validate=False):
+ def __init__(self, validate=False) -> None:
self._validate = validate
def __call__(self, path: Path, test=None):
@@ -163,7 +172,7 @@ class AcousticFeatureLoadProcess(BaseDataProcess):
class LowHighSpectrogramFeatureLoadProcess(BaseDataProcess):
- def __init__(self, validate=False):
+ def __init__(self, validate=False) -> None:
self._validate = validate
def __call__(self, path: Path, test=None):
@@ -178,11 +187,11 @@ class LowHighSpectrogramFeatureLoadProcess(BaseDataProcess):
class AcousticFeatureSaveProcess(BaseDataProcess):
- def __init__(self, validate=False, ignore: List[str] = None):
+ def __init__(self, validate=False, ignore: List[str] = None) -> None:
self._validate = validate
self._ignore = ignore if ignore is not None else []
- def __call__(self, data: Dict[str, any], test=None):
+ def __call__(self, data: Dict[str, Any], test=None):
path = data['path'] # type: Path
feature = data['feature'] # type: AcousticFeature
if self._validate:
@@ -203,7 +212,7 @@ class AcousticFeatureSaveProcess(BaseDataProcess):
class DistillateUsingFeatureProcess(BaseDataProcess):
- def __init__(self, targets: List[str]):
+ def __init__(self, targets: List[str]) -> None:
self._targets = targets
def __call__(self, feature: AcousticFeature, test=None):
@@ -218,7 +227,7 @@ class DistillateUsingFeatureProcess(BaseDataProcess):
class MakeMaskProcess(BaseDataProcess):
- def __init__(self):
+ def __init__(self) -> None:
pass
def __call__(self, feature: AcousticFeature, test=None):
@@ -232,7 +241,7 @@ class MakeMaskProcess(BaseDataProcess):
class AcousticFeatureNormalizeProcess(BaseDataProcess):
- def __init__(self, mean: AcousticFeature, var: AcousticFeature):
+ def __init__(self, mean: AcousticFeature, var: AcousticFeature) -> None:
self._mean = mean
self._var = var
@@ -249,7 +258,7 @@ class AcousticFeatureNormalizeProcess(BaseDataProcess):
class AcousticFeatureDenormalizeProcess(BaseDataProcess):
- def __init__(self, mean: AcousticFeature, var: AcousticFeature):
+ def __init__(self, mean: AcousticFeature, var: AcousticFeature) -> None:
self._mean = mean
self._var = var
@@ -266,7 +275,7 @@ class AcousticFeatureDenormalizeProcess(BaseDataProcess):
class EncodeFeatureProcess(BaseDataProcess):
- def __init__(self, targets: List[str]):
+ def __init__(self, targets: List[str]) -> None:
self._targets = targets
def __call__(self, data: AcousticFeature, test):
@@ -276,7 +285,7 @@ class EncodeFeatureProcess(BaseDataProcess):
class DecodeFeatureProcess(BaseDataProcess):
- def __init__(self, targets: List[str], sizes: Dict[str, int]):
+ def __init__(self, targets: List[str], sizes: Dict[str, int]) -> None:
assert all(t in sizes for t in targets)
self._targets = targets
self._sizes = sizes
@@ -312,11 +321,11 @@ class ShapeAlignProcess(BaseDataProcess):
class RandomPaddingProcess(BaseDataProcess):
- def __init__(self, min_size: int, time_axis: int = 1):
+ def __init__(self, min_size: int, time_axis: int = 1) -> None:
self._min_size = min_size
self._time_axis = time_axis
- def __call__(self, datas: Dict[str, any], test=True):
+ def __call__(self, datas: Dict[str, Any], test=True):
assert not test
data, seed = datas['data'], datas['seed']
@@ -333,7 +342,7 @@ class RandomPaddingProcess(BaseDataProcess):
class LastPaddingProcess(BaseDataProcess):
- def __init__(self, min_size: int, time_axis: int = 1):
+ def __init__(self, min_size: int, time_axis: int = 1) -> None:
assert time_axis == 1
self._min_size = min_size
self._time_axis = time_axis
@@ -347,11 +356,11 @@ class LastPaddingProcess(BaseDataProcess):
class RandomCropProcess(BaseDataProcess):
- def __init__(self, crop_size: int, time_axis: int = 1):
+ def __init__(self, crop_size: int, time_axis: int = 1) -> None:
self._crop_size = crop_size
self._time_axis = time_axis
- def __call__(self, datas: Dict[str, any], test=True):
+ def __call__(self, datas: Dict[str, Any], test=True):
assert not test
data, seed = datas['data'], datas['seed']
@@ -365,7 +374,7 @@ class RandomCropProcess(BaseDataProcess):
class FirstCropProcess(BaseDataProcess):
- def __init__(self, crop_size: int, time_axis: int = 1):
+ def __init__(self, crop_size: int, time_axis: int = 1) -> None:
self._crop_size = crop_size
self._time_axis = time_axis
@@ -374,7 +383,7 @@ class FirstCropProcess(BaseDataProcess):
class AddNoiseProcess(BaseDataProcess):
- def __init__(self, p_global: float = None, p_local: float = None):
+ def __init__(self, p_global: float = None, p_local: float = None) -> None:
assert p_global is None or 0 <= p_global
assert p_local is None or 0 <= p_local
self._p_global = p_global
@@ -389,7 +398,7 @@ class AddNoiseProcess(BaseDataProcess):
class DataProcessDataset(chainer.dataset.DatasetMixin):
- def __init__(self, data: typing.List, data_process: BaseDataProcess):
+ def __init__(self, data: typing.List, data_process: BaseDataProcess) -> None:
self._data = data
self._data_process = data_process
@@ -431,7 +440,8 @@ def create(config: DatasetConfig):
)),
]),
)),
- LambdaProcess(lambda d, test: dict(input=d['input'], target=d['target']['feature'], mask=d['target']['mask'])),
+ LambdaProcess(
+ lambda d, test: dict(input=d['input'], target=d['target']['feature'], mask=d['target']['mask'])),
ShapeAlignProcess(),
])
diff --git a/become_yukarin/dataset/utility.py b/become_yukarin/dataset/utility.py
index 9797818..ca68acf 100644
--- a/become_yukarin/dataset/utility.py
+++ b/become_yukarin/dataset/utility.py
@@ -9,7 +9,7 @@ class DTWAligner(object):
from https://github.com/r9y9/nnmnkwii/blob/4cade86b5c35b4e35615a2a8162ddc638018af0e/nnmnkwii/preprocessing/alignment.py#L14
"""
- def __init__(self, x, y, dist=lambda x, y: numpy.linalg.norm(x - y), radius=1):
+ def __init__(self, x, y, dist=lambda x, y: numpy.linalg.norm(x - y), radius=1) -> None:
assert x.ndim == 2 and y.ndim == 2
_, path = fastdtw.fastdtw(x, y, radius=radius, dist=dist)
@@ -40,10 +40,11 @@ class DTWAligner(object):
class MFCCAligner(DTWAligner):
- def __init__(self, x, y, *args, **kwargs):
+ def __init__(self, x, y, *args, **kwargs) -> None:
x = self._calc_aligner_feature(x)
y = self._calc_aligner_feature(y)
- super().__init__(x, y, *args, dist=nnmnkwii.metrics.melcd, **kwargs)
+ kwargs.update(dist=nnmnkwii.metrics.melcd)
+ super().__init__(x, y, *args, **kwargs)
@classmethod
def _calc_delta(cls, x):
diff --git a/become_yukarin/model/model.py b/become_yukarin/model/model.py
index fc2d722..71fb805 100644
--- a/become_yukarin/model/model.py
+++ b/become_yukarin/model/model.py
@@ -70,7 +70,7 @@ class ConvHighway(chainer.link.Chain):
class PreNet(chainer.link.Chain):
- def __init__(self, in_channels: int, hidden_channels: int, out_channels: int):
+ def __init__(self, in_channels: int, hidden_channels: int, out_channels: int) -> None:
super().__init__()
with self.init_scope():
self.conv1 = Convolution1D(in_channels, hidden_channels, 1)
@@ -84,7 +84,7 @@ class PreNet(chainer.link.Chain):
class Conv1DBank(chainer.link.Chain):
- def __init__(self, in_channels: int, out_channels: int, k: int):
+ def __init__(self, in_channels: int, out_channels: int, k: int) -> None:
super().__init__()
self.stacked_channels = out_channels * k
self.pads = [
@@ -106,7 +106,7 @@ class Conv1DBank(chainer.link.Chain):
class Conv1DProjections(chainer.link.Chain):
- def __init__(self, in_channels: int, hidden_channels: int, out_channels: int):
+ def __init__(self, in_channels: int, hidden_channels: int, out_channels: int) -> None:
super().__init__()
with self.init_scope():
@@ -133,7 +133,7 @@ class CBHG(chainer.link.Chain):
highway_layers: int,
out_channels: int,
disable_last_rnn: bool,
- ):
+ ) -> None:
super().__init__()
self.max_pooling_padding = partial(
chainer.functions.pad,
@@ -182,7 +182,7 @@ class CBHG(chainer.link.Chain):
class Predictor(chainer.link.Chain):
- def __init__(self, network, out_size: int):
+ def __init__(self, network, out_size: int) -> None:
super().__init__()
with self.init_scope():
self.network = network
@@ -196,7 +196,7 @@ class Predictor(chainer.link.Chain):
class Aligner(chainer.link.Chain):
- def __init__(self, in_size: int, out_time_length: int):
+ def __init__(self, in_size: int, out_time_length: int) -> None:
super().__init__()
with self.init_scope():
self.gru = chainer.links.NStepBiGRU(
@@ -222,7 +222,7 @@ class Aligner(chainer.link.Chain):
class Discriminator(chainer.link.Chain):
- def __init__(self, in_channels: int, hidden_channels_list: List[int]):
+ def __init__(self, in_channels: int, hidden_channels_list: List[int]) -> None:
super().__init__()
with self.init_scope():
self.convs = chainer.link.ChainList(*(
diff --git a/become_yukarin/model/sr_model.py b/become_yukarin/model/sr_model.py
index 2e83526..f8e55d6 100644
--- a/become_yukarin/model/sr_model.py
+++ b/become_yukarin/model/sr_model.py
@@ -6,7 +6,7 @@ from become_yukarin.config.sr_config import SRModelConfig
class CBR(chainer.Chain):
- def __init__(self, ch0, ch1, bn=True, sample='down', activation=F.relu, dropout=False):
+ def __init__(self, ch0, ch1, bn=True, sample='down', activation=F.relu, dropout=False) -> None:
super().__init__()
self.bn = bn
self.activation = activation
@@ -33,7 +33,7 @@ class CBR(chainer.Chain):
class Encoder(chainer.Chain):
- def __init__(self, in_ch):
+ def __init__(self, in_ch) -> None:
super().__init__()
w = chainer.initializers.Normal(0.02)
with self.init_scope():
@@ -54,7 +54,7 @@ class Encoder(chainer.Chain):
class Decoder(chainer.Chain):
- def __init__(self, out_ch):
+ def __init__(self, out_ch) -> None:
super().__init__()
w = chainer.initializers.Normal(0.02)
with self.init_scope():
@@ -79,7 +79,7 @@ class Decoder(chainer.Chain):
class SRPredictor(chainer.Chain):
- def __init__(self, in_ch, out_ch):
+ def __init__(self, in_ch, out_ch) -> None:
super().__init__()
with self.init_scope():
self.encoder = Encoder(in_ch)
@@ -90,7 +90,7 @@ class SRPredictor(chainer.Chain):
class SRDiscriminator(chainer.Chain):
- def __init__(self, in_ch, out_ch):
+ def __init__(self, in_ch, out_ch) -> None:
super().__init__()
w = chainer.initializers.Normal(0.02)
with self.init_scope():
diff --git a/become_yukarin/param.py b/become_yukarin/param.py
index 5a43d74..101c960 100644
--- a/become_yukarin/param.py
+++ b/become_yukarin/param.py
@@ -11,6 +11,7 @@ class AcousticFeatureParam(NamedTuple):
frame_period: int = 5
order: int = 8
alpha: float = 0.466
+ f0_estimating_method: str = 'harvest' # dio / harvest
class Param(NamedTuple):
diff --git a/become_yukarin/super_resolution.py b/become_yukarin/super_resolution.py
index 163057d..7c53b9d 100644
--- a/become_yukarin/super_resolution.py
+++ b/become_yukarin/super_resolution.py
@@ -15,7 +15,7 @@ from become_yukarin.model.sr_model import create_predictor_sr
class SuperResolution(object):
- def __init__(self, config: SRConfig, model_path: Path, gpu: int = None):
+ def __init__(self, config: SRConfig, model_path: Path, gpu: int = None) -> None:
self.config = config
self.model_path = model_path
self.gpu = gpu
@@ -34,6 +34,7 @@ class SuperResolution(object):
frame_period=param.acoustic_feature_param.frame_period,
order=param.acoustic_feature_param.order,
alpha=param.acoustic_feature_param.alpha,
+ f0_estimating_method=param.acoustic_feature_param.f0_estimating_method,
)
self._low_high_spectrogram_load_process = LowHighSpectrogramFeatureLoadProcess(
validate=True,
@@ -76,13 +77,13 @@ class SuperResolution(object):
return Wave(out, sampling_rate=sampling_rate)
def convert_from_audio_path(self, input: Path):
- input = self._wave_process(str(input), test=True)
- input = self._low_high_spectrogram_process(input, test=True)
- return self.convert(input.low)
+ wave = self._wave_process(str(input), test=True)
+ feature = self._low_high_spectrogram_process(wave, test=True)
+ return self.convert(feature.low)
def convert_from_feature_path(self, input: Path):
- input = self._low_high_spectrogram_load_process(input, test=True)
- return self.convert(input.low)
+ feature = self._low_high_spectrogram_load_process(input, test=True)
+ return self.convert(feature.low)
def __call__(
self,
diff --git a/become_yukarin/updater/sr_updater.py b/become_yukarin/updater/sr_updater.py
index 6e2b400..88f4bb3 100644
--- a/become_yukarin/updater/sr_updater.py
+++ b/become_yukarin/updater/sr_updater.py
@@ -14,7 +14,7 @@ class SRUpdater(chainer.training.StandardUpdater):
discriminator: SRDiscriminator,
*args,
**kwargs,
- ):
+ ) -> None:
super().__init__(*args, **kwargs)
self.loss_config = loss_config
self.predictor = predictor
diff --git a/become_yukarin/updater/updater.py b/become_yukarin/updater/updater.py
index ef77e77..8dcb215 100644
--- a/become_yukarin/updater/updater.py
+++ b/become_yukarin/updater/updater.py
@@ -17,7 +17,7 @@ class Updater(chainer.training.StandardUpdater):
discriminator: Discriminator = None,
*args,
**kwargs,
- ):
+ ) -> None:
super().__init__(*args, **kwargs)
self.loss_config = loss_config
self.predictor = predictor
diff --git a/become_yukarin/voice_changer.py b/become_yukarin/voice_changer.py
index 30fbf28..7269053 100644
--- a/become_yukarin/voice_changer.py
+++ b/become_yukarin/voice_changer.py
@@ -10,7 +10,7 @@ class VoiceChanger(object):
acoustic_converter: AcousticConverter,
super_resolution: SuperResolution,
output_sampling_rate: int = None,
- ):
+ ) -> None:
if output_sampling_rate is None:
output_sampling_rate = super_resolution.config.dataset.param.voice_param.sample_rate
diff --git a/scripts/extract_acoustic_feature.py b/scripts/extract_acoustic_feature.py
index d6e7711..7015f2a 100644
--- a/scripts/extract_acoustic_feature.py
+++ b/scripts/extract_acoustic_feature.py
@@ -36,6 +36,11 @@ parser.add_argument('--pad_second', type=float, default=base_voice_param.pad_sec
parser.add_argument('--frame_period', type=int, default=base_acoustic_feature_param.frame_period)
parser.add_argument('--order', type=int, default=base_acoustic_feature_param.order)
parser.add_argument('--alpha', type=float, default=base_acoustic_feature_param.alpha)
+parser.add_argument('--f0_estimating_method', type=str, default=base_acoustic_feature_param.f0_estimating_method)
+parser.add_argument('--f0_floor1', type=float, default=71)
+parser.add_argument('--f0_ceil1', type=float, default=800)
+parser.add_argument('--f0_floor2', type=float, default=71)
+parser.add_argument('--f0_ceil2', type=float, default=800)
parser.add_argument('--ignore_feature', nargs='+', default=['spectrogram', 'aperiodicity'])
parser.add_argument('--disable_alignment', action='store_true')
parser.add_argument('--enable_overwrite', action='store_true')
@@ -67,13 +72,24 @@ def generate_feature(path1, path2):
wave2 = wave_file_load_process(path2, test=True)
# make acoustic feature
- acoustic_feature_process = AcousticFeatureProcess(
+ acoustic_feature_process1 = AcousticFeatureProcess(
frame_period=arguments.frame_period,
order=arguments.order,
alpha=arguments.alpha,
+ f0_estimating_method=arguments.f0_estimating_method,
+ f0_floor=arguments.f0_floor1,
+ f0_ceil=arguments.f0_ceil1,
)
- f1 = acoustic_feature_process(wave1, test=True).astype_only_float(numpy.float32)
- f2 = acoustic_feature_process(wave2, test=True).astype_only_float(numpy.float32)
+ acoustic_feature_process2 = AcousticFeatureProcess(
+ frame_period=arguments.frame_period,
+ order=arguments.order,
+ alpha=arguments.alpha,
+ f0_estimating_method=arguments.f0_estimating_method,
+ f0_floor=arguments.f0_floor2,
+ f0_ceil=arguments.f0_ceil2,
+ )
+ f1 = acoustic_feature_process1(wave1, test=True).astype_only_float(numpy.float32)
+ f2 = acoustic_feature_process2(wave2, test=True).astype_only_float(numpy.float32)
# pre convert
if pre_convert:
diff --git a/scripts/voice_conversion_test.py b/scripts/voice_conversion_test.py
index 43c66d5..d96d5ce 100644
--- a/scripts/voice_conversion_test.py
+++ b/scripts/voice_conversion_test.py
@@ -34,8 +34,8 @@ def extract_number(f):
def process(p: Path, acoustic_converter: AcousticConverter):
try:
if p.suffix in ['.npy', '.npz']:
- p = glob.glob(str(input_wave_directory / p.stem) + '.*')[0]
- p = Path(p)
+ fn = glob.glob(str(input_wave_directory / p.stem) + '.*')[0]
+ p = Path(fn)
wave = acoustic_converter(p)
librosa.output.write_wav(str(output / p.stem) + '.wav', wave.wave, wave.sampling_rate, norm=True)
except: