From 2ede97c42b203c37a8a9f98784af4f31f01961f4 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Wed, 31 Oct 2018 03:41:37 +0100 Subject: merge --- vendor/scholar.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'vendor') diff --git a/vendor/scholar.py b/vendor/scholar.py index 13ccd439..8070ff0d 100755 --- a/vendor/scholar.py +++ b/vendor/scholar.py @@ -241,11 +241,11 @@ class ScholarConf(object): VERSION = '2.10' LOG_LEVEL = 1 MAX_PAGE_RESULTS = 10 # Current default for per-page results - SCHOLAR_SITE = 'http://scholar.google.com' + SCHOLAR_SITE = 'https://scholar.google.de' # USER_AGENT = 'Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv:1.9.2.9) Gecko/20100913 Firefox/3.6.9' # Let's update at this point (3/14): - USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:27.0) Gecko/20100101 Firefox/27.0' + USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:63.0) Gecko/20100101 Firefox/63.0' # If set, we will use this file to read/save cookies to enable # cookie use across sessions. @@ -514,6 +514,8 @@ class ScholarArticleParser(object): """Helper, returns full URL in case path isn't one.""" if path.startswith('http://'): return path + if path.startswith('https://'): + return path if not path.startswith('/'): path = '/' + path return self.site + path @@ -1022,6 +1024,8 @@ class ScholarQuerier(object): log_msg='dump of query response HTML', err_msg='results retrieval failed') if html is None: + print(query.get_url()) + print("html is none") return self.parse(html) @@ -1100,10 +1104,9 @@ class ScholarQuerier(object): ScholarUtils.log('debug', 'headers:\n' + str(hdl.info())) ScholarUtils.log('debug', 'data:\n' + html.decode('utf-8')) # For Python 3 ScholarUtils.log('debug', '<<<<' + '-'*68) - - return html except Exception as err: ScholarUtils.log('info', err_msg + ': %s' % err) + print(err.read()) return None -- cgit v1.2.3-70-g09d2