summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-02-11 13:50:14 +0100
committerJules Laplace <julescarbon@gmail.com>2019-02-11 13:50:14 +0100
commit178381871ce0b8b2be7946320f90b0568ba58911 (patch)
treeedb25065c07444474391d9e59ca223293988cbff /scraper
parent730a32a51cac1d1b70fdade93d0986b8b4e1ac69 (diff)
update
Diffstat (limited to 'scraper')
-rw-r--r--scraper/datasets/citation_lookup.csv4
-rw-r--r--scraper/s2-final-report.py14
-rw-r--r--scraper/util.py14
3 files changed, 16 insertions, 16 deletions
diff --git a/scraper/datasets/citation_lookup.csv b/scraper/datasets/citation_lookup.csv
index d48c1025..6738bc3f 100644
--- a/scraper/datasets/citation_lookup.csv
+++ b/scraper/datasets/citation_lookup.csv
@@ -116,8 +116,8 @@ ifad,IFAD,Indian Face Age Database: A Database for Face Recognition with Age Var
ifdb,IFDB,"Iranian Face Database with age, pose and expression",066d71fcd997033dce4ca58df924397dfe0b5fd1
ifdb,IFDB,Iranian Face Database and Evaluation with a New Detection Algorithm,066d71fcd997033dce4ca58df924397dfe0b5fd1
iit_dehli_ear,IIT Dehli Ear,Automated human identification using ear imaging,faf40ce28857aedf183e193486f5b4b0a8c478a2
-ijb_a,IJB-A,Pushing the Frontiers of Unconstrained Face Detection and Recognition: IARPA Janus Benchmark A,140c95e53c619eac594d70f6369f518adfea12ef
-ijb_b,IJB-B,IARPA Janus Benchmark-B Face Dataset,0cb2dd5f178e3a297a0c33068961018659d0f443
+ijb_c,IJB-A,Pushing the Frontiers of Unconstrained Face Detection and Recognition: IARPA Janus Benchmark A,140c95e53c619eac594d70f6369f518adfea12ef
+ijb_c,IJB-B,IARPA Janus Benchmark-B Face Dataset,0cb2dd5f178e3a297a0c33068961018659d0f443
ijb_c,IJB-C,IARPA Janus Benchmark C,57178b36c21fd7f4529ac6748614bb3374714e91
ilids_mcts,,"Imagery Library for Intelligent Detection Systems:
The i-LIDS User Guide",0297448f3ed948e136bb06ceff10eccb34e5bb77
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py
index c99edaee..e8307b8d 100644
--- a/scraper/s2-final-report.py
+++ b/scraper/s2-final-report.py
@@ -34,7 +34,6 @@ def process_paper(row, addresses):
address = res['address']
if not len(papers):
return
- for papers[0]['address']
with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f:
json.dump({
'id': papers[0]['paper_id'],
@@ -189,18 +188,5 @@ def load_megapixels_lookup():
# recs.append(rec)
return lookup
-def load_institutions(paperId):
- if os.path.exists(file_path('pdf', paperId, 'institutions.json')):
- return read_json(file_path('pdf', paperId, 'institutions.json'))['institutions']
- elif os.path.exists(file_path('doi', paperId, 'institutions.json')):
- return read_json(file_path('doi', paperId, 'institutions.json'))['institutions']
- else:
- return []
-
-def data_path(key, paper_id):
- return 'datasets/s2/{}/{}/{}'.format(key, paper_id[0:2], paper_id)
-def file_path(key, paper_id, fn):
- return os.path.join(data_path(key, paper_id), fn)
-
if __name__ == '__main__':
s2_final_report()
diff --git a/scraper/util.py b/scraper/util.py
index d3f4e751..788caa3f 100644
--- a/scraper/util.py
+++ b/scraper/util.py
@@ -303,6 +303,20 @@ class AddressBook (object):
}
return None
+
+def load_institutions(paperId):
+ if os.path.exists(file_path('pdf', paperId, 'institutions.json')):
+ return read_json(file_path('pdf', paperId, 'institutions.json'))['institutions']
+ elif os.path.exists(file_path('doi', paperId, 'institutions.json')):
+ return read_json(file_path('doi', paperId, 'institutions.json'))['institutions']
+ else:
+ return []
+
+def data_path(key, paper_id):
+ return 'datasets/s2/{}/{}/{}'.format(key, paper_id[0:2], paper_id)
+def file_path(key, paper_id, fn):
+ return os.path.join(data_path(key, paper_id), fn)
+
def parallelize(func, rows):
print("Fetching {} items".format(len(rows)))
chunksize = 3