summaryrefslogtreecommitdiff
path: root/split-csv.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-11-25 22:19:15 +0100
committerJules Laplace <julescarbon@gmail.com>2018-11-25 22:19:15 +0100
commitee3d0d98e19f1d8177d85af1866fd0ee431fe9ea (patch)
tree41372528e78d4328bc2a47bbbabac7e809c58894 /split-csv.py
parent255b8178af1e25a71fd23703d30c0d1f74911f47 (diff)
moving stuff
Diffstat (limited to 'split-csv.py')
-rw-r--r--split-csv.py50
1 files changed, 0 insertions, 50 deletions
diff --git a/split-csv.py b/split-csv.py
deleted file mode 100644
index 122d2ddc..00000000
--- a/split-csv.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import os
-import sys
-import csv
-from math import ceil
-import subprocess
-import random
-from util import *
-
-import click
-
-@click.command()
-@click.option('--count', '-c', default=2, help='Number of subdivisions.')
-@click.option('--has_keys/--no_keys', '-k', default=False, help='Whether to split off the keys.')
-@click.option('--shuffle/--no_shuffle', default=False, help='Whether to shuffle.')
-@click.argument('filename')
-def split_csv(count, has_keys, shuffle, filename):
- """Split a CSV into groups."""
- with open(filename, 'r') as f:
- reader = csv.reader(f)
- lines = list(unfussy_reader(reader))
- if has_keys:
- keys = lines[0]
- lines = lines[1:]
- else:
- keys = None
-
- fn, ext = os.path.splitext(filename)
- if shuffle:
- random.shuffle(lines)
- n = max(1, ceil(len(lines) / count))
- for index in range(count):
- m = index * n
- chunk = lines[m:m+n]
- print(chunk[0])
- out_fn = fn + '-' + str(index+1) + ext
- write_csv(out_fn, keys, chunk)
-
-# Write a CSV
-def write_csv(fn, keys, chunk):
- print(fn)
- with open(fn, 'w') as f:
- writer = csv.writer(f)
- if keys is not None:
- writer.writerow(keys)
- for row in chunk:
- writer.writerow(row)
-
-
-if __name__ == '__main__':
- split_csv()