diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-11-06 15:05:40 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-11-06 15:05:40 +0100 |
| commit | aacdf0fa056b51000ff88479da479ded3f36b59c (patch) | |
| tree | de9e221ffd9bf8c67ef54607d6267f00b5233312 /util.py | |
| parent | 002e72bb172c34bb71756f9e6c23294913f1ef85 (diff) | |
we geocoding
Diffstat (limited to 'util.py')
| -rw-r--r-- | util.py | 33 |
1 files changed, 21 insertions, 12 deletions
@@ -14,15 +14,20 @@ def read_citation_list(index=0): lines = lines[1:] return keys, lines -def read_csv(fn, keys=True): - with open(fn, 'r') as f: - reader = csv.reader(f) - lines = list(reader) - if keys: - keys = lines[0] - lines = lines[1:] - return keys, lines - return lines +def read_csv(fn, keys=True, create=False): + try: + with open(fn, 'r') as f: + reader = csv.reader( (line.replace('\0','') for line in f) ) + lines = list(reader) + if keys: + keys = lines[0] + lines = lines[1:] + return keys, lines + return lines + except: + if create: + return [] + raise def read_json(fn): with open(fn, 'r') as json_file: @@ -41,6 +46,7 @@ def write_csv(fn, keys, rows): writer.writerow(row) def write_report(fn, title=None, keys=None, rows=[]): + count = 0 with open(fn, 'w') as f: f.write("<!doctype html>") f.write("<html>") @@ -57,6 +63,9 @@ def write_report(fn, title=None, keys=None, rows=[]): for key in keys: f.write("<th>{}</th>".format(key)) for row in rows: + if row is None: + return + count += 1 f.write("<tr>") for cell in row: if isinstance(cell, list) or isinstance(cell, tuple): @@ -67,6 +76,7 @@ def write_report(fn, title=None, keys=None, rows=[]): f.write("</table>") f.write("</body>") f.write("</html>") + print("{} {}".format(fn, count)) def paper_path(key='papers', paper_id=''): return '{}/{}/{}/{}/paper.json'.format('./datasets/s2', key, paper_id[0:2], paper_id) @@ -100,12 +110,11 @@ class RawPaper(object): return [ (author[0]['ids'][0], author[0]['name']) for author in self.data['authors'] ] def load_paper(paper_id): - print('_______________') if os.path.exists(paper_path('db_papers', paper_id)): - print('db paper') + # print('db paper') return DbPaper(paper_id) if os.path.exists(paper_path('raw_papers', paper_id)): - print('raw paper') + # print('raw paper') return RawPaper(paper_id) print('no paper') return None |
