From 4f1d44719221bb8195e32b8f1e97feb4c3e14991 Mon Sep 17 00:00:00 2001 From: "jules@lens" Date: Thu, 30 May 2019 14:30:39 +0200 Subject: fetching verified papers --- scraper/s2-doi-report.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'scraper/s2-doi-report.py') diff --git a/scraper/s2-doi-report.py b/scraper/s2-doi-report.py index ea708de2..c715b647 100644 --- a/scraper/s2-doi-report.py +++ b/scraper/s2-doi-report.py @@ -129,7 +129,7 @@ def load_ieee(paper, fn): write_json(fn.replace('paper.doi', 'ieee.json'), data) # print(data) except: - print('ieee: could not read data') + #print('ieee: could not read data') return None if 'authors' in data: affiliations = [ author['affiliation'] for author in data['authors'] if 'affiliation' in author ] @@ -145,7 +145,7 @@ def load_springer(paper, fn): try: soup = BeautifulSoup(f.read(), 'html.parser') except: - print('springer: could not read data') + # print('springer: could not read data') return None items = soup.find_all(class_='affiliation__item') affiliations = [ ', '.join(item.strings) for item in items ] @@ -159,7 +159,7 @@ def load_sciencedirect(paper, fn): try: soup = BeautifulSoup(f.read(), 'html.parser') except: - print('sciencedirect: could not read data') + # print('sciencedirect: could not read data') return None items = soup.find_all("script", type='application/json', limit=1) @@ -171,7 +171,7 @@ def load_sciencedirect(paper, fn): write_json(fn.replace('paper.doi', 'sciencedirect.json'), data) # print(data) except: - print('sciencedirect: json error') + # print('sciencedirect: json error') return None affiliations = [value['$$'][0]['_'] for value in data['authors']['affiliations'].values()] @@ -186,7 +186,7 @@ def load_acm(paper, fn): try: soup = BeautifulSoup(f.read(), 'html.parser') except: - print('acm: could not read data') + #print('acm: could not read data') return None items = soup.find_all("a", title='Institutional Profile Page') affiliations = [ item.string for item in items ] @@ -213,13 +213,13 @@ def load_computerorg(paper, fn): # return affiliations def load_elsevier(paper, fn): - print('elsevier: {}'.format(paper.paper_id)) + #print('elsevier: {}'.format(paper.paper_id)) if not os.path.exists(doi.old_doi_fn(paper.paper_id)): with open(fn, 'r') as f: try: soup = BeautifulSoup(f.read(), 'html.parser') except: - print('elsevier: could not read data') + #print('elsevier: could not read data') return None item = soup.find_all("input", attrs={"name": 'redirectURL'})[0] #new_url = unquote(item['value']) -- cgit v1.2.3-70-g09d2