summaryrefslogtreecommitdiff
path: root/scripts/ln_jnas_subset.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/ln_jnas_subset.py')
-rw-r--r--scripts/ln_jnas_subset.py30
1 files changed, 30 insertions, 0 deletions
diff --git a/scripts/ln_jnas_subset.py b/scripts/ln_jnas_subset.py
new file mode 100644
index 0000000..e5aba5c
--- /dev/null
+++ b/scripts/ln_jnas_subset.py
@@ -0,0 +1,30 @@
+import argparse
+import multiprocessing
+from pathlib import Path
+
+from jnas_metadata_loader import load_from_directory
+from jnas_metadata_loader.jnas_metadata import JnasMetadata
+
+parser = argparse.ArgumentParser()
+parser.add_argument('jnas', type=Path)
+parser.add_argument('output', type=Path)
+parser.add_argument('--format', default='{sex}{text_id}_{mic}_atr_{subset}{sen_id}.wav')
+argument = parser.parse_args()
+
+jnas = argument.jnas # type: Path
+output = argument.output # type: Path
+
+jnas_list = load_from_directory(str(jnas))
+atr_list = jnas_list.subset_news_or_atr('B')
+
+output.mkdir(exist_ok=True)
+
+
+def process(d: JnasMetadata):
+ p = d.path
+ out = output / argument.format.format(**d._asdict())
+ out.symlink_to(p)
+
+
+pool = multiprocessing.Pool()
+pool.map(process, atr_list)