diff options
Diffstat (limited to 'scraper/s2-doi-report.py')
| -rw-r--r-- | scraper/s2-doi-report.py | 28 |
1 files changed, 17 insertions, 11 deletions
diff --git a/scraper/s2-doi-report.py b/scraper/s2-doi-report.py index b10b5da1..1d7bf44a 100644 --- a/scraper/s2-doi-report.py +++ b/scraper/s2-doi-report.py @@ -38,6 +38,10 @@ def doi_report(): domain = url_info['domain'] paper_id = url_info['paper_id'] paper = load_paper(paper_id) + if paper is None: + continue + if paper.data is None: + continue doi_fn = fn.replace('.url', '.doi') address = None if domain in domains: @@ -127,11 +131,13 @@ def load_ieee(paper, fn): except: print('ieee: could not read data') return None - affiliations = [ author['affiliation'] for author in data['authors'] ] - institutions = [ [ paper.paper_id, author['affiliation'], author['affiliation'] ] for author in data['authors'] ] - # print(affiliations) - write_json('{}/{}'.format(paper_path(paper.paper_id), 'institutions.json'), { 'institutions': institutions }) - return affiliations + if 'authors' in data: + affiliations = [ author['affiliation'] for author in data['authors'] if 'affiliation' in author ] + institutions = [ [ paper.paper_id, author['affiliation'], author['affiliation'] ] for author in data['authors'] if 'affiliation' in author ] + # print(affiliations) + write_json('{}/{}'.format(paper_path(paper.paper_id), 'institutions.json'), { 'institutions': institutions }) + return affiliations + return None def load_springer(paper, fn): # print('springer: {}'.format(paper.paper_id)) @@ -216,12 +222,12 @@ def load_elsevier(paper, fn): print('elsevier: could not read data') return None item = soup.find_all("input", attrs={"name": 'redirectURL'})[0] - new_url = unquote(item['value']) - if new_url: - print(new_url) - doi.fetch_doi(paper.paper_id, new_url, replace=True) - else: - print("missing redirect url: {}".format(paper.paper_id)) + #new_url = unquote(item['value']) + #if new_url: + # print(new_url) + # doi.fetch_doi(paper.paper_id, new_url, replace=True) + #else: + # print("missing redirect url: {}".format(paper.paper_id)) # print('elsevier: {}'.format(paper.paper_id)) # with open(fn, 'r') as f: # try: |
