summaryrefslogtreecommitdiff
path: root/scraper/s2-doi-report.py
diff options
context:
space:
mode:
authorjules@lens <julescarbon@gmail.com>2019-02-16 15:19:13 +0100
committerjules@lens <julescarbon@gmail.com>2019-02-16 15:19:13 +0100
commit68a3a4a479f326fc1446d565510f01cb9d6fa86b (patch)
tree86fdaa683206ac89b75458bd5e405437946d62dd /scraper/s2-doi-report.py
parent1ed1e140cbdff2eb1168d35a043f67cbb897264d (diff)
updating doi report
Diffstat (limited to 'scraper/s2-doi-report.py')
-rw-r--r--scraper/s2-doi-report.py28
1 files changed, 17 insertions, 11 deletions
diff --git a/scraper/s2-doi-report.py b/scraper/s2-doi-report.py
index b10b5da1..1d7bf44a 100644
--- a/scraper/s2-doi-report.py
+++ b/scraper/s2-doi-report.py
@@ -38,6 +38,10 @@ def doi_report():
domain = url_info['domain']
paper_id = url_info['paper_id']
paper = load_paper(paper_id)
+ if paper is None:
+ continue
+ if paper.data is None:
+ continue
doi_fn = fn.replace('.url', '.doi')
address = None
if domain in domains:
@@ -127,11 +131,13 @@ def load_ieee(paper, fn):
except:
print('ieee: could not read data')
return None
- affiliations = [ author['affiliation'] for author in data['authors'] ]
- institutions = [ [ paper.paper_id, author['affiliation'], author['affiliation'] ] for author in data['authors'] ]
- # print(affiliations)
- write_json('{}/{}'.format(paper_path(paper.paper_id), 'institutions.json'), { 'institutions': institutions })
- return affiliations
+ if 'authors' in data:
+ affiliations = [ author['affiliation'] for author in data['authors'] if 'affiliation' in author ]
+ institutions = [ [ paper.paper_id, author['affiliation'], author['affiliation'] ] for author in data['authors'] if 'affiliation' in author ]
+ # print(affiliations)
+ write_json('{}/{}'.format(paper_path(paper.paper_id), 'institutions.json'), { 'institutions': institutions })
+ return affiliations
+ return None
def load_springer(paper, fn):
# print('springer: {}'.format(paper.paper_id))
@@ -216,12 +222,12 @@ def load_elsevier(paper, fn):
print('elsevier: could not read data')
return None
item = soup.find_all("input", attrs={"name": 'redirectURL'})[0]
- new_url = unquote(item['value'])
- if new_url:
- print(new_url)
- doi.fetch_doi(paper.paper_id, new_url, replace=True)
- else:
- print("missing redirect url: {}".format(paper.paper_id))
+ #new_url = unquote(item['value'])
+ #if new_url:
+ # print(new_url)
+ # doi.fetch_doi(paper.paper_id, new_url, replace=True)
+ #else:
+ # print("missing redirect url: {}".format(paper.paper_id))
# print('elsevier: {}'.format(paper.paper_id))
# with open(fn, 'r') as f:
# try: