From ee3d0d98e19f1d8177d85af1866fd0ee431fe9ea Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Sun, 25 Nov 2018 22:19:15 +0100 Subject: moving stuff --- scraper/check-counts.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 scraper/check-counts.py (limited to 'scraper/check-counts.py') diff --git a/scraper/check-counts.py b/scraper/check-counts.py new file mode 100644 index 00000000..4fed4494 --- /dev/null +++ b/scraper/check-counts.py @@ -0,0 +1,39 @@ +import os +import sys +import csv +from math import ceil +import subprocess +import random + +import click + +@click.command() +def check_counts(): + """Split a CSV into groups.""" + mypath = './datasets/scholar/entries/' + onlyfiles = [f for f in os.listdir(mypath) if os.path.isfile(os.path.join(mypath, f))] + recs = [] + for f in onlyfiles: + with open(os.path.join(mypath, f), 'rb') as f: + reader = csv.reader(f, delimiter='|') + print f + print repr(reader) + lines = list(reader) + rec = lines[0] + recs.append(rec) + + out_fn = './datasets/scholar_entries.csv' + write_csv(out_fn, keys=None, chunk=recs) + +# Write a CSV +def write_csv(fn, keys, chunk): + print(fn) + with open(fn, 'w') as f: + writer = csv.writer(f) + if keys is not None: + writer.writerow(keys) + for row in chunk: + writer.writerow(row) + +if __name__ == '__main__': + check_counts() -- cgit v1.2.3-70-g09d2