summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-02-25 16:15:28 +0100
committerJules Laplace <julescarbon@gmail.com>2019-02-25 16:15:28 +0100
commit1d3e450b5204baf72765f2a319adc8146ba460ad (patch)
treedf2bc9f4f1ff905376141b2ed8e795a1fbb1fdaf /scraper
parent13d7a450affe8ea4f368a97ea2014faa17702a4c (diff)
check ft field
Diffstat (limited to 'scraper')
-rw-r--r--scraper/s2-final-report.py3
-rw-r--r--scraper/s2-scrape.sh4
2 files changed, 4 insertions, 3 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py
index 4b74750a..fb7a78dc 100644
--- a/scraper/s2-final-report.py
+++ b/scraper/s2-final-report.py
@@ -19,7 +19,7 @@ def s2_final_report():
megapixels = load_megapixels_lookup()
items = []
for key, item in megapixels.items():
- if item['dataset']['ft_share'] == '1':
+ if 'ft_share' in item['dataset'] and item['dataset']['ft_share'] == 'Y':
items.append((item,))
parallelize(process_paper, items)
@@ -184,6 +184,7 @@ def load_megapixels_lookup():
lookup[paper_key]['dataset'] = dataset_lookup[paper_key]
else:
print("not in datasets lookup:", paper_key)
+ lookup[paper_key]['dataset'] = {}
# recs.append(rec)
return lookup
diff --git a/scraper/s2-scrape.sh b/scraper/s2-scrape.sh
index 3f548e6e..8c5632a0 100644
--- a/scraper/s2-scrape.sh
+++ b/scraper/s2-scrape.sh
@@ -8,8 +8,8 @@ python s2-papers.py
echo ">> s2-dump-ids.py"
python s2-dump-ids.py
-echo ">> s2-dump-ids.py"
-#python s2-extract-papers.py
+# echo ">> s2-extract-papers.py"
+# python s2-extract-papers.py
echo ">> s2-dump-missing-paper-ids.py"
python s2-dump-missing-paper-ids.py