diff options
Diffstat (limited to 'scraper/util.py')
| -rw-r--r-- | scraper/util.py | 30 |
1 files changed, 21 insertions, 9 deletions
diff --git a/scraper/util.py b/scraper/util.py index 6c671cec..1b1a0a9b 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -195,25 +195,37 @@ class RawPaper(object): @property def authors(self): return [ (author[0]['ids'][0] if len(author[0]['ids']) else '', author[0]['name']) for author in self.data['authors'] ] - @property - def pdf_link(self): - if 'primaryPaperLink' in self.data: - link = self.data['primaryPaperLink'] + def paper_links(self): + def url_part(link): if type(link) == dict and 'url' in link: return link['url'] return link - return None + paper_links = [] + if 'primaryPaperLink' in self.data: + paper_links.append(url_part(self.data['primaryPaperLink'])) + if 'alternatePaperLinks' in self.data: + for link in self.data['alternatePaperLinks']: + paper_links.append(url_part(link)) + def pdf_links(self): + return [ link for link in self.paper_links() if 'pdf' in link ] + def doi_links(self): + return [ link for link in self.paper_links() if 'pdf' not in link ] + @property + def pdf_link(self): + links = self.pdf_links() + return links[0] if len(links) else None def record(self): return [ self.paper_id, self.title, self.journal, self.year ] def load_paper(paper_id): - if os.path.exists(paper_path('db_papers', paper_id)): - # print('db paper') - return DbPaper(paper_id) + # no longer using DB papers :p + # if os.path.exists(paper_path('db_papers', paper_id)) + # print('db paper') + # return DbPaper(paper_id) if os.path.exists(paper_path('raw_papers', paper_id)): # print('raw paper') return RawPaper(paper_id) - print('no paper') + print('no raw paper: {}'.format(paper_id)) return None def dedupe(a): |
