summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
authorjules@lens <julescarbon@gmail.com>2019-02-18 14:07:19 +0100
committerjules@lens <julescarbon@gmail.com>2019-02-18 14:07:19 +0100
commit362c0ce0cfb7eaaee77510356b3b3a31771e5768 (patch)
tree3ce4c30e173aef79daae865cd16c52b283575948 /scraper
parent3fc5bb42b0dd94b56d0f11b1568d30a1ff835629 (diff)
adding our papers
Diffstat (limited to 'scraper')
-rw-r--r--scraper/s2-final-report.py2
-rw-r--r--scraper/util.py20
2 files changed, 10 insertions, 12 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py
index c0e005be..a3209d24 100644
--- a/scraper/s2-final-report.py
+++ b/scraper/s2-final-report.py
@@ -94,7 +94,7 @@ def process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_
paper_institutions = load_institutions(paper.paper_id, paper_location_lookup)
paper_address = None
for inst in sorted(paper_institutions, key=operator.itemgetter(1)):
- # print(inst[1])
+ print(inst[1])
institution = inst[1]
if paper_address is None:
paper_address = addresses.findObject(institution)
diff --git a/scraper/util.py b/scraper/util.py
index 0cffd5d2..0c3e2169 100644
--- a/scraper/util.py
+++ b/scraper/util.py
@@ -343,24 +343,22 @@ def load_institutions(paperId, paper_location_lookup={}):
items = paper_location_lookup[paperId]
result = []
if items['institution_1']:
- result.append([items['institution_1'], '', ''])
+ result.append(['', items['institution_1'], '', ''])
if items['institution_2']:
- result.append([items['institution_2'], '', ''])
+ result.append(['', items['institution_2'], '', ''])
if items['institution_3']:
- result.append([items['institution_3'], '', ''])
+ result.append(['', items['institution_3'], '', ''])
if items['institution_4']:
- result.append([items['institution_4'], '', ''])
+ result.append(['', items['institution_4'], '', ''])
if items['institution_5']:
- result.append([items['institution_5'], '', ''])
+ result.append(['', items['institution_5'], '', ''])
if items['institution_6']:
- result.append([items['institution_6'], '', ''])
+ result.append(['', items['institution_6'], '', ''])
if items['institution_7']:
- result.append([items['institution_7'], '', ''])
+ result.append(['', items['institution_7'], '', ''])
if items['institution_8']:
- result.append([items['institution_8'], '', ''])
- return {
- 'institutions': result
- }
+ result.append(['', items['institution_8'], '', ''])
+ return result
if os.path.exists(file_path('pdf', paperId, 'institutions.json')):
return read_json(file_path('pdf', paperId, 'institutions.json'))['institutions']
elif os.path.exists(file_path('doi', paperId, 'institutions.json')):