summaryrefslogtreecommitdiff
path: root/scripts/ln_apply_subset.py
diff options
context:
space:
mode:
authorHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2017-11-24 06:43:24 +0900
committerHiroshiba Kazuyuki <kazuyuki_hiroshiba@dwango.co.jp>2017-11-28 15:32:38 +0700
commitcf71104424ca3234da06abf4ce6e5794fda594c5 (patch)
tree51332c400fda036c3f16c1863ac0491e6ad00673 /scripts/ln_apply_subset.py
parent9f87a74de09e38f9d8f3e7ebb5fd26fac44a3b0e (diff)
atr subset scirpts
Diffstat (limited to 'scripts/ln_apply_subset.py')
-rw-r--r--scripts/ln_apply_subset.py48
1 files changed, 48 insertions, 0 deletions
diff --git a/scripts/ln_apply_subset.py b/scripts/ln_apply_subset.py
new file mode 100644
index 0000000..92345dd
--- /dev/null
+++ b/scripts/ln_apply_subset.py
@@ -0,0 +1,48 @@
+"""
+ある話者のATR503サブセットを、他の話者に対応するようにコピーする。
+targetは、拡張子前3文字がATR503サブセットでないといけない。
+"""
+
+import argparse
+from pathlib import Path
+import re
+from itertools import chain, groupby
+
+parser = argparse.ArgumentParser()
+parser.add_argument('source', type=Path)
+parser.add_argument('target', type=Path)
+parser.add_argument('output', type=Path)
+parser.add_argument('--prefix', default='')
+argument = parser.parse_args()
+
+source = argument.source # type: Path
+target = argument.target # type: Path
+output = argument.output # type: Path
+
+# source
+sources = list(sorted(source.glob('*')))
+assert len(sources) == 503
+
+names = ['{}{:02d}'.format(s, n + 1) for s in 'ABCDEFGHIJ' for n in range(50)]
+names += ['J51', 'J52', 'J53']
+
+assert all(n in s.name for s, n in zip(sources, names))
+
+map_source = {n: s for s, n in zip(sources, names)}
+
+# target
+keyfunc = lambda t: t.stem[-3:]
+targets = list(target.glob('*'))
+map_targets = {n: list(vs) for n, vs in groupby(sorted(targets, key=keyfunc), key=keyfunc)}
+
+assert all(n in names for n in map_targets.keys())
+assert len(list(chain.from_iterable(map_targets.values()))) == len(targets)
+
+# output
+output.mkdir(exist_ok=True)
+
+for n in names:
+ s = map_source[n]
+ for t in map_targets[n]:
+ out = output / (argument.prefix + t.stem + s.suffix)
+ out.symlink_to(s)