summaryrefslogtreecommitdiff
path: root/split-csv.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-11-03 18:28:26 +0100
committerJules Laplace <julescarbon@gmail.com>2018-11-03 18:28:26 +0100
commitfde14c19ef77f1bbe67f4cac7cadddbd9d3129b3 (patch)
tree7784e3fbfa9531b8a7a245f733ec2d68e2057912 /split-csv.py
parent07518b3542ed4344421baf7b62391d39c36aee25 (diff)
fix missing
Diffstat (limited to 'split-csv.py')
-rw-r--r--split-csv.py21
1 files changed, 12 insertions, 9 deletions
diff --git a/split-csv.py b/split-csv.py
index 62dc1597..2db45d85 100644
--- a/split-csv.py
+++ b/split-csv.py
@@ -9,23 +9,30 @@ import click
@click.command()
@click.option('--count', '-c', default=2, help='Number of subdivisions.')
+@click.option('--has_keys/--has_no_keys', '-k', default=False, help='Whether to split off the keys.')
@click.option('--shuffle/--no_shuffle', default=False, help='Whether to shuffle.')
@click.argument('filename')
-def split_csv(count, shuffle, filename):
+def split_csv(count, has_keys, shuffle, filename):
"""Split a CSV into groups."""
with open(filename, 'r') as f:
reader = csv.reader(f)
lines = list(reader)
- keys = lines[0]
- lines = lines[1:]
+ if has_keys:
+ keys = lines[0]
+ lines = lines[1:]
+ else:
+ keys = None
fn, ext = os.path.splitext(filename)
if shuffle:
random.shuffle(lines)
- for index, chunk in enumerate(chunks(lines, count)):
+ n = max(1, ceil(len(lines) / count))
+ for index in range(count):
+ m = index * n
+ chunk = lines[m:m+n]
+ print(chunk[0])
out_fn = fn + '-' + str(index+1) + ext
write_csv(out_fn, keys, chunk)
- # sys.exit(1)
# Write a CSV
def write_csv(fn, keys, chunk):
@@ -37,10 +44,6 @@ def write_csv(fn, keys, chunk):
for row in chunk:
writer.writerow(row)
-# Split an array into chunks
-def chunks(l, n):
- n = max(1, ceil(len(l) / n))
- return (l[i:i+n] for i in range(0, len(l), n))
if __name__ == '__main__':
split_csv()