From aacdf0fa056b51000ff88479da479ded3f36b59c Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Tue, 6 Nov 2018 15:05:40 +0100 Subject: we geocoding --- util.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'util.py') diff --git a/util.py b/util.py index 400c7ee3..b38cfec1 100644 --- a/util.py +++ b/util.py @@ -14,15 +14,20 @@ def read_citation_list(index=0): lines = lines[1:] return keys, lines -def read_csv(fn, keys=True): - with open(fn, 'r') as f: - reader = csv.reader(f) - lines = list(reader) - if keys: - keys = lines[0] - lines = lines[1:] - return keys, lines - return lines +def read_csv(fn, keys=True, create=False): + try: + with open(fn, 'r') as f: + reader = csv.reader( (line.replace('\0','') for line in f) ) + lines = list(reader) + if keys: + keys = lines[0] + lines = lines[1:] + return keys, lines + return lines + except: + if create: + return [] + raise def read_json(fn): with open(fn, 'r') as json_file: @@ -41,6 +46,7 @@ def write_csv(fn, keys, rows): writer.writerow(row) def write_report(fn, title=None, keys=None, rows=[]): + count = 0 with open(fn, 'w') as f: f.write("") f.write("") @@ -57,6 +63,9 @@ def write_report(fn, title=None, keys=None, rows=[]): for key in keys: f.write("{}".format(key)) for row in rows: + if row is None: + return + count += 1 f.write("") for cell in row: if isinstance(cell, list) or isinstance(cell, tuple): @@ -67,6 +76,7 @@ def write_report(fn, title=None, keys=None, rows=[]): f.write("") f.write("") f.write("") + print("{} {}".format(fn, count)) def paper_path(key='papers', paper_id=''): return '{}/{}/{}/{}/paper.json'.format('./datasets/s2', key, paper_id[0:2], paper_id) @@ -100,12 +110,11 @@ class RawPaper(object): return [ (author[0]['ids'][0], author[0]['name']) for author in self.data['authors'] ] def load_paper(paper_id): - print('_______________') if os.path.exists(paper_path('db_papers', paper_id)): - print('db paper') + # print('db paper') return DbPaper(paper_id) if os.path.exists(paper_path('raw_papers', paper_id)): - print('raw paper') + # print('raw paper') return RawPaper(paper_id) print('no paper') return None -- cgit v1.2.3-70-g09d2