summaryrefslogtreecommitdiff
path: root/scripts/ln_jnas_subset.py
blob: e5aba5caba869c5341f34fc0f381a93f837be13a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import argparse
import multiprocessing
from pathlib import Path

from jnas_metadata_loader import load_from_directory
from jnas_metadata_loader.jnas_metadata import JnasMetadata

parser = argparse.ArgumentParser()
parser.add_argument('jnas', type=Path)
parser.add_argument('output', type=Path)
parser.add_argument('--format', default='{sex}{text_id}_{mic}_atr_{subset}{sen_id}.wav')
argument = parser.parse_args()

jnas = argument.jnas  # type: Path
output = argument.output  # type: Path

jnas_list = load_from_directory(str(jnas))
atr_list = jnas_list.subset_news_or_atr('B')

output.mkdir(exist_ok=True)


def process(d: JnasMetadata):
    p = d.path
    out = output / argument.format.format(**d._asdict())
    out.symlink_to(p)


pool = multiprocessing.Pool()
pool.map(process, atr_list)