summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-10-31 03:41:37 +0100
committerJules Laplace <julescarbon@gmail.com>2018-10-31 03:41:37 +0100
commit2ede97c42b203c37a8a9f98784af4f31f01961f4 (patch)
tree70bec449d9d8af3238efb1ae995dbda922469b89
parenta16c3cf801b70670dffc7041d92f7ccec56a0e18 (diff)
merge
-rw-r--r--datasets/scholar/entries/.csv (renamed from datasets/scholar/entries/Pushing the frontiers of unconstrained face detection and recognition: IARPA Janus Benchmark A.csv)0
-rw-r--r--datasets/scholar/entries/Pushing the frontiers of unconstrained face detection and recognition: IARPA Janus Benchmark A .csv0
-rw-r--r--fetch-entries.py14
-rwxr-xr-xvendor/scholar.py11
4 files changed, 15 insertions, 10 deletions
diff --git a/datasets/scholar/entries/Pushing the frontiers of unconstrained face detection and recognition: IARPA Janus Benchmark A.csv b/datasets/scholar/entries/.csv
index e69de29b..e69de29b 100644
--- a/datasets/scholar/entries/Pushing the frontiers of unconstrained face detection and recognition: IARPA Janus Benchmark A.csv
+++ b/datasets/scholar/entries/.csv
diff --git a/datasets/scholar/entries/Pushing the frontiers of unconstrained face detection and recognition: IARPA Janus Benchmark A .csv b/datasets/scholar/entries/Pushing the frontiers of unconstrained face detection and recognition: IARPA Janus Benchmark A .csv
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/datasets/scholar/entries/Pushing the frontiers of unconstrained face detection and recognition: IARPA Janus Benchmark A .csv
diff --git a/fetch-entries.py b/fetch-entries.py
index 37cd37f9..8c50d9a1 100644
--- a/fetch-entries.py
+++ b/fetch-entries.py
@@ -16,22 +16,24 @@ def fetch_entries(index):
label = line[0]
title = line[1]
entries_fn = './datasets/scholar/entries/{}.csv'.format(title)
- print(entries_fn)
+ # print(entries_fn)
if not os.path.exists(entries_fn):
with open(entries_fn, 'w') as f:
- t = re.sub(r'\W+', '', title)
+ t = re.sub(r'[^-0-9a-zA-Z ]+', '', title)
+ print(t)
subprocess.call([
'./vendor/scholar.py',
'-t', '-A', t, '--csv',
], stdout=f)
- time.sleep(random.randint(20, 32))
+ # time.sleep(random.randint(20, 32))
def read_citation_list(index):
filename = './datasets/citations.csv'
- fn, ext = os.path.splitext(filename)
- in_fn = fn + '-' + str(index) + ext
- with open(in_fn, 'r') as f:
+ # fn, ext = os.path.splitext(filename)
+ # in_fn = fn + '-' + str(index) + ext
+ # with open(in_fn, 'r') as f:
+ with open(filename, 'r') as f:
reader = csv.reader(f)
lines = list(reader)
keys = lines[0]
diff --git a/vendor/scholar.py b/vendor/scholar.py
index 13ccd439..8070ff0d 100755
--- a/vendor/scholar.py
+++ b/vendor/scholar.py
@@ -241,11 +241,11 @@ class ScholarConf(object):
VERSION = '2.10'
LOG_LEVEL = 1
MAX_PAGE_RESULTS = 10 # Current default for per-page results
- SCHOLAR_SITE = 'http://scholar.google.com'
+ SCHOLAR_SITE = 'https://scholar.google.de'
# USER_AGENT = 'Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv:1.9.2.9) Gecko/20100913 Firefox/3.6.9'
# Let's update at this point (3/14):
- USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:27.0) Gecko/20100101 Firefox/27.0'
+ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:63.0) Gecko/20100101 Firefox/63.0'
# If set, we will use this file to read/save cookies to enable
# cookie use across sessions.
@@ -514,6 +514,8 @@ class ScholarArticleParser(object):
"""Helper, returns full URL in case path isn't one."""
if path.startswith('http://'):
return path
+ if path.startswith('https://'):
+ return path
if not path.startswith('/'):
path = '/' + path
return self.site + path
@@ -1022,6 +1024,8 @@ class ScholarQuerier(object):
log_msg='dump of query response HTML',
err_msg='results retrieval failed')
if html is None:
+ print(query.get_url())
+ print("html is none")
return
self.parse(html)
@@ -1100,10 +1104,9 @@ class ScholarQuerier(object):
ScholarUtils.log('debug', 'headers:\n' + str(hdl.info()))
ScholarUtils.log('debug', 'data:\n' + html.decode('utf-8')) # For Python 3
ScholarUtils.log('debug', '<<<<' + '-'*68)
-
- return html
except Exception as err:
ScholarUtils.log('info', err_msg + ': %s' % err)
+ print(err.read())
return None