summaryrefslogtreecommitdiff
path: root/scraper/s2-doi-report.py
diff options
context:
space:
mode:
authorjules@lens <julescarbon@gmail.com>2019-05-30 14:30:39 +0200
committerjules@lens <julescarbon@gmail.com>2019-05-30 14:30:39 +0200
commit4f1d44719221bb8195e32b8f1e97feb4c3e14991 (patch)
treeb147a5186f18413f43c2dfaf0c065aa1e5edc04d /scraper/s2-doi-report.py
parentec239bc69e292a0c629e019a1edc3aea53109ab1 (diff)
fetching verified papers
Diffstat (limited to 'scraper/s2-doi-report.py')
-rw-r--r--scraper/s2-doi-report.py14
1 files changed, 7 insertions, 7 deletions
diff --git a/scraper/s2-doi-report.py b/scraper/s2-doi-report.py
index ea708de2..c715b647 100644
--- a/scraper/s2-doi-report.py
+++ b/scraper/s2-doi-report.py
@@ -129,7 +129,7 @@ def load_ieee(paper, fn):
write_json(fn.replace('paper.doi', 'ieee.json'), data)
# print(data)
except:
- print('ieee: could not read data')
+ #print('ieee: could not read data')
return None
if 'authors' in data:
affiliations = [ author['affiliation'] for author in data['authors'] if 'affiliation' in author ]
@@ -145,7 +145,7 @@ def load_springer(paper, fn):
try:
soup = BeautifulSoup(f.read(), 'html.parser')
except:
- print('springer: could not read data')
+ # print('springer: could not read data')
return None
items = soup.find_all(class_='affiliation__item')
affiliations = [ ', '.join(item.strings) for item in items ]
@@ -159,7 +159,7 @@ def load_sciencedirect(paper, fn):
try:
soup = BeautifulSoup(f.read(), 'html.parser')
except:
- print('sciencedirect: could not read data')
+ # print('sciencedirect: could not read data')
return None
items = soup.find_all("script", type='application/json', limit=1)
@@ -171,7 +171,7 @@ def load_sciencedirect(paper, fn):
write_json(fn.replace('paper.doi', 'sciencedirect.json'), data)
# print(data)
except:
- print('sciencedirect: json error')
+ # print('sciencedirect: json error')
return None
affiliations = [value['$$'][0]['_'] for value in data['authors']['affiliations'].values()]
@@ -186,7 +186,7 @@ def load_acm(paper, fn):
try:
soup = BeautifulSoup(f.read(), 'html.parser')
except:
- print('acm: could not read data')
+ #print('acm: could not read data')
return None
items = soup.find_all("a", title='Institutional Profile Page')
affiliations = [ item.string for item in items ]
@@ -213,13 +213,13 @@ def load_computerorg(paper, fn):
# return affiliations
def load_elsevier(paper, fn):
- print('elsevier: {}'.format(paper.paper_id))
+ #print('elsevier: {}'.format(paper.paper_id))
if not os.path.exists(doi.old_doi_fn(paper.paper_id)):
with open(fn, 'r') as f:
try:
soup = BeautifulSoup(f.read(), 'html.parser')
except:
- print('elsevier: could not read data')
+ #print('elsevier: could not read data')
return None
item = soup.find_all("input", attrs={"name": 'redirectURL'})[0]
#new_url = unquote(item['value'])