updating doi report

author: jules@lens <julescarbon@gmail.com> 2019-02-16 15:19:13 +0100
committer: jules@lens <julescarbon@gmail.com> 2019-02-16 15:19:13 +0100
commit: 68a3a4a479f326fc1446d565510f01cb9d6fa86b (patch)
tree: 86fdaa683206ac89b75458bd5e405437946d62dd /scraper/s2-doi-report.py
parent: 1ed1e140cbdff2eb1168d35a043f67cbb897264d (diff)
1 files changed, 17 insertions, 11 deletions
diff --git a/scraper/s2-doi-report.py b/scraper/s2-doi-report.py
index b10b5da1..1d7bf44a 100644
--- a/scraper/s2-doi-report.py
+++ b/scraper/s2-doi-report.py
@@ -38,6 +38,10 @@ def doi_report():
     domain = url_info['domain']
     paper_id = url_info['paper_id']
     paper = load_paper(paper_id)
+    if paper is None:
+      continue
+    if paper.data is None:
+      continue
     doi_fn = fn.replace('.url', '.doi')
     address = None
     if domain in domains:
@@ -127,11 +131,13 @@ def load_ieee(paper, fn):
     except:
       print('ieee: could not read data')
       return None
-    affiliations = [ author['affiliation'] for author in data['authors'] ]
-    institutions = [ [ paper.paper_id, author['affiliation'], author['affiliation'] ] for author in data['authors'] ]
-    # print(affiliations)
-    write_json('{}/{}'.format(paper_path(paper.paper_id), 'institutions.json'), { 'institutions': institutions })
-    return affiliations
+    if 'authors' in data:
+      affiliations = [ author['affiliation'] for author in data['authors'] if 'affiliation' in author ]
+      institutions = [ [ paper.paper_id, author['affiliation'], author['affiliation'] ] for author in data['authors'] if 'affiliation' in author ]
+      # print(affiliations)
+      write_json('{}/{}'.format(paper_path(paper.paper_id), 'institutions.json'), { 'institutions': institutions })
+      return affiliations
+    return None
 
 def load_springer(paper, fn):
   # print('springer: {}'.format(paper.paper_id))
@@ -216,12 +222,12 @@ def load_elsevier(paper, fn):
         print('elsevier: could not read data')
         return None
     item = soup.find_all("input", attrs={"name": 'redirectURL'})[0]
-    new_url = unquote(item['value'])
-    if new_url:
-      print(new_url)
-      doi.fetch_doi(paper.paper_id, new_url, replace=True)
-    else:
-      print("missing redirect url: {}".format(paper.paper_id))
+    #new_url = unquote(item['value'])
+    #if new_url:
+    #  print(new_url)
+    #  doi.fetch_doi(paper.paper_id, new_url, replace=True)
+    #else:
+    #  print("missing redirect url: {}".format(paper.paper_id))
   # print('elsevier: {}'.format(paper.paper_id))
   # with open(fn, 'r') as f:
   #   try:
author	jules@lens <julescarbon@gmail.com>	2019-02-16 15:19:13 +0100
committer	jules@lens <julescarbon@gmail.com>	2019-02-16 15:19:13 +0100
commit	68a3a4a479f326fc1446d565510f01cb9d6fa86b (patch)
tree	86fdaa683206ac89b75458bd5e405437946d62dd /scraper/s2-doi-report.py
parent	1ed1e140cbdff2eb1168d35a043f67cbb897264d (diff)