diff options
| author | jules@lens <julescarbon@gmail.com> | 2019-02-18 14:07:19 +0100 |
|---|---|---|
| committer | jules@lens <julescarbon@gmail.com> | 2019-02-18 14:07:19 +0100 |
| commit | 362c0ce0cfb7eaaee77510356b3b3a31771e5768 (patch) | |
| tree | 3ce4c30e173aef79daae865cd16c52b283575948 /scraper | |
| parent | 3fc5bb42b0dd94b56d0f11b1568d30a1ff835629 (diff) | |
adding our papers
Diffstat (limited to 'scraper')
| -rw-r--r-- | scraper/s2-final-report.py | 2 | ||||
| -rw-r--r-- | scraper/util.py | 20 |
2 files changed, 10 insertions, 12 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index c0e005be..a3209d24 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -94,7 +94,7 @@ def process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_ paper_institutions = load_institutions(paper.paper_id, paper_location_lookup) paper_address = None for inst in sorted(paper_institutions, key=operator.itemgetter(1)): - # print(inst[1]) + print(inst[1]) institution = inst[1] if paper_address is None: paper_address = addresses.findObject(institution) diff --git a/scraper/util.py b/scraper/util.py index 0cffd5d2..0c3e2169 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -343,24 +343,22 @@ def load_institutions(paperId, paper_location_lookup={}): items = paper_location_lookup[paperId] result = [] if items['institution_1']: - result.append([items['institution_1'], '', '']) + result.append(['', items['institution_1'], '', '']) if items['institution_2']: - result.append([items['institution_2'], '', '']) + result.append(['', items['institution_2'], '', '']) if items['institution_3']: - result.append([items['institution_3'], '', '']) + result.append(['', items['institution_3'], '', '']) if items['institution_4']: - result.append([items['institution_4'], '', '']) + result.append(['', items['institution_4'], '', '']) if items['institution_5']: - result.append([items['institution_5'], '', '']) + result.append(['', items['institution_5'], '', '']) if items['institution_6']: - result.append([items['institution_6'], '', '']) + result.append(['', items['institution_6'], '', '']) if items['institution_7']: - result.append([items['institution_7'], '', '']) + result.append(['', items['institution_7'], '', '']) if items['institution_8']: - result.append([items['institution_8'], '', '']) - return { - 'institutions': result - } + result.append(['', items['institution_8'], '', '']) + return result if os.path.exists(file_path('pdf', paperId, 'institutions.json')): return read_json(file_path('pdf', paperId, 'institutions.json'))['institutions'] elif os.path.exists(file_path('doi', paperId, 'institutions.json')): |
