summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjules@lens <julescarbon@gmail.com>2018-12-07 18:50:56 +0100
committerjules@lens <julescarbon@gmail.com>2018-12-07 18:50:56 +0100
commit2db3a85c09b5c578b3b62c3e3615b89db6ed0f00 (patch)
treeac8afc3c65c988edae70085b3abc5c2a172d48e1
parentafc2b03c1fd9151a7b4c75f164e2785fc62441b9 (diff)
parent588c96ab6d38f30bbef3aa773163b36838538355 (diff)
Merge branch 'master' of asdf.us:megapixels_dev
-rw-r--r--scraper/README.md8
-rw-r--r--scraper/s2-extract-papers.py2
-rw-r--r--scraper/s2-raw-papers.py4
-rw-r--r--site/assets/js/app/face.js14
-rw-r--r--site/templates/home.html1
5 files changed, 25 insertions, 4 deletions
diff --git a/scraper/README.md b/scraper/README.md
index 782fa30a..318bba9a 100644
--- a/scraper/README.md
+++ b/scraper/README.md
@@ -36,18 +36,24 @@ The Open Research Corpus (ORC) is produced by the Allen Institute / Semantic Sch
http://labs.semanticscholar.org/corpus/
+We do a two-stage fetch process as only about 66% of their papers are in this dataset.
+
### s2-search.py
Loads titles from citations file and queries the S2 search API to get paper IDs, then uses the paper IDs from the search entries to query the S2 papers API to get first-degree citations, authors, etc.
### s2-dump-ids.py
-Extract all the paper IDs and citation IDs from the queried papers.
+Dump all the paper IDs and citation IDs from the queried papers.
### s2-extract-papers.py
Extracts papers from the ORC dataset which have been queried from the API.
+### s2-dump-missing-paper-ids.py
+
+Dump the citation IDs that were not found in the ORC dataset.
+
### s2-raw-papers.py
Some papers are not in the ORC dataset and must be scraped from S2 directly.
diff --git a/scraper/s2-extract-papers.py b/scraper/s2-extract-papers.py
index 85a85fa3..1969643a 100644
--- a/scraper/s2-extract-papers.py
+++ b/scraper/s2-extract-papers.py
@@ -5,7 +5,7 @@ import click
from util import *
S2_DIR = '/media/blue/undisclosed/semantic-scholar/corpus-2018-05-03'
-DATA_DIR = '/home/lens/undisclosed/megapixels_dev/datasets/s2/db_papers'
+DATA_DIR = '/home/lens/undisclosed/megapixels_dev/scraper/datasets/s2/db_papers'
@click.command()
@click.option('--fn', '-f', default='ids.json', help='List of IDs to extract from the big dataset.')
diff --git a/scraper/s2-raw-papers.py b/scraper/s2-raw-papers.py
index 2323ec63..089055da 100644
--- a/scraper/s2-raw-papers.py
+++ b/scraper/s2-raw-papers.py
@@ -29,10 +29,10 @@ def fetch_raw_paper(paper_id):
paper = s2.raw_paper(paper_id)
if paper is None:
print("Got empty paper?? {}".format(paper_id))
- time.sleep(random.randint(5, 10))
+ # time.sleep(random.randint(5, 10))
return None
write_json(paper_fn, paper)
- time.sleep(random.randint(5, 10))
+ # time.sleep(random.randint(5, 10))
return paper
def make_raw_paper_path(paper_id):
diff --git a/site/assets/js/app/face.js b/site/assets/js/app/face.js
index e8bcd313..bdaa0313 100644
--- a/site/assets/js/app/face.js
+++ b/site/assets/js/app/face.js
@@ -61,6 +61,20 @@ var face = (function(){
container.appendChild(renderer.domElement)
document.body.addEventListener('mousemove', onMouseMove)
// renderer.domElement.addEventListener('mousedown', swap)
+ // oktween.add({
+ // obj: el.style,
+ // units: "px",
+ // from: { left: 0 },
+ // to: { left: 100 },
+ // duration: 1000,
+ // easing: oktween.easing.circ_out,
+ // update: function(obj){
+ // console.log(obj.left)
+ // }
+ // finished: function(){
+ // console.log("done")
+ // }
+ // })
setInterval(swap, 5000)
}
function build(points) {
diff --git a/site/templates/home.html b/site/templates/home.html
index 436c1ddf..59f8cf76 100644
--- a/site/templates/home.html
+++ b/site/templates/home.html
@@ -28,5 +28,6 @@
{% block scripts %}
<script src="/assets/js/vendor/three.min.js"></script>
<script src="/assets/js/vendor/three.meshline.js"></script>
+<script src="/assets/js/vendor/oktween.js"></script>
<script src="/assets/js/app/face.js"></script>
{% endblock %}