diff options
Diffstat (limited to 'scraper')
| -rw-r--r-- | scraper/s2-final-report.py | 2 | ||||
| -rw-r--r-- | scraper/util.py | 20 |
2 files changed, 10 insertions, 12 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index c0e005be..a3209d24 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -94,7 +94,7 @@ def process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_ paper_institutions = load_institutions(paper.paper_id, paper_location_lookup) paper_address = None for inst in sorted(paper_institutions, key=operator.itemgetter(1)): - # print(inst[1]) + print(inst[1]) institution = inst[1] if paper_address is None: paper_address = addresses.findObject(institution) diff --git a/scraper/util.py b/scraper/util.py index 0cffd5d2..0c3e2169 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -343,24 +343,22 @@ def load_institutions(paperId, paper_location_lookup={}): items = paper_location_lookup[paperId] result = [] if items['institution_1']: - result.append([items['institution_1'], '', '']) + result.append(['', items['institution_1'], '', '']) if items['institution_2']: - result.append([items['institution_2'], '', '']) + result.append(['', items['institution_2'], '', '']) if items['institution_3']: - result.append([items['institution_3'], '', '']) + result.append(['', items['institution_3'], '', '']) if items['institution_4']: - result.append([items['institution_4'], '', '']) + result.append(['', items['institution_4'], '', '']) if items['institution_5']: - result.append([items['institution_5'], '', '']) + result.append(['', items['institution_5'], '', '']) if items['institution_6']: - result.append([items['institution_6'], '', '']) + result.append(['', items['institution_6'], '', '']) if items['institution_7']: - result.append([items['institution_7'], '', '']) + result.append(['', items['institution_7'], '', '']) if items['institution_8']: - result.append([items['institution_8'], '', '']) - return { - 'institutions': result - } + result.append(['', items['institution_8'], '', '']) + return result if os.path.exists(file_path('pdf', paperId, 'institutions.json')): return read_json(file_path('pdf', paperId, 'institutions.json'))['institutions'] elif os.path.exists(file_path('doi', paperId, 'institutions.json')): |
