summaryrefslogtreecommitdiff
path: root/scraper/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'scraper/util.py')
-rw-r--r--scraper/util.py30
1 files changed, 21 insertions, 9 deletions
diff --git a/scraper/util.py b/scraper/util.py
index 6c671cec..1b1a0a9b 100644
--- a/scraper/util.py
+++ b/scraper/util.py
@@ -195,25 +195,37 @@ class RawPaper(object):
@property
def authors(self):
return [ (author[0]['ids'][0] if len(author[0]['ids']) else '', author[0]['name']) for author in self.data['authors'] ]
- @property
- def pdf_link(self):
- if 'primaryPaperLink' in self.data:
- link = self.data['primaryPaperLink']
+ def paper_links(self):
+ def url_part(link):
if type(link) == dict and 'url' in link:
return link['url']
return link
- return None
+ paper_links = []
+ if 'primaryPaperLink' in self.data:
+ paper_links.append(url_part(self.data['primaryPaperLink']))
+ if 'alternatePaperLinks' in self.data:
+ for link in self.data['alternatePaperLinks']:
+ paper_links.append(url_part(link))
+ def pdf_links(self):
+ return [ link for link in self.paper_links() if 'pdf' in link ]
+ def doi_links(self):
+ return [ link for link in self.paper_links() if 'pdf' not in link ]
+ @property
+ def pdf_link(self):
+ links = self.pdf_links()
+ return links[0] if len(links) else None
def record(self):
return [ self.paper_id, self.title, self.journal, self.year ]
def load_paper(paper_id):
- if os.path.exists(paper_path('db_papers', paper_id)):
- # print('db paper')
- return DbPaper(paper_id)
+ # no longer using DB papers :p
+ # if os.path.exists(paper_path('db_papers', paper_id))
+ # print('db paper')
+ # return DbPaper(paper_id)
if os.path.exists(paper_path('raw_papers', paper_id)):
# print('raw paper')
return RawPaper(paper_id)
- print('no paper')
+ print('no raw paper: {}'.format(paper_id))
return None
def dedupe(a):