summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
Diffstat (limited to 'scraper')
-rw-r--r--scraper/reports/geocode_papers.html1
-rw-r--r--scraper/s2-final-report.py8
-rw-r--r--scraper/s2-geocode-server.py40
-rw-r--r--scraper/util.py3
4 files changed, 31 insertions, 21 deletions
diff --git a/scraper/reports/geocode_papers.html b/scraper/reports/geocode_papers.html
index 529ee9c7..84ffe356 100644
--- a/scraper/reports/geocode_papers.html
+++ b/scraper/reports/geocode_papers.html
@@ -33,5 +33,6 @@ html,body { margin: 0; padding: 0; width: 100%; height: 100%; }
<div id="container">
</div>
</body>
+<script src="/reports/geocode-app.js"></script>
</html>
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py
index 675709ea..58ac481f 100644
--- a/scraper/s2-final-report.py
+++ b/scraper/s2-final-report.py
@@ -32,8 +32,8 @@ def process_paper(row, addresses):
res = process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_citations)
if res:
papers.append(res)
- if res['address']:
- address = res['address']
+ if res['address']:
+ address = res['address']
if not len(papers):
return
with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f:
@@ -48,9 +48,7 @@ def process_paper(row, addresses):
json.dump({
'id': papers[0]['paper_id'],
'paper': papers[0],
- 'address': address,
- 'additional_papers': papers[1:],
- 'citations': [aggregate_citations[key] for key in aggregate_citations.keys()],
+ 'citations': [unknown_citations[key] for key in unknown_citations.keys()],
}, f)
def process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_citations):
diff --git a/scraper/s2-geocode-server.py b/scraper/s2-geocode-server.py
index 1c624a52..0b1b0937 100644
--- a/scraper/s2-geocode-server.py
+++ b/scraper/s2-geocode-server.py
@@ -13,15 +13,19 @@ load_dotenv()
from util import *
-app = Flask(__name__, static_url_path="/reports/", static_folder="reports")
+locations_worksheet = fetch_worksheet('paper_locations')
+
+app = Flask(__name__, static_url_path="/reports", static_folder=os.path.abspath("reports"))
# static api route
@app.route('/', methods=['GET'])
def index():
- return app.send_static_file('reports/geocode-papers.html')
+ return app.send_static_file('geocode_papers.html')
+
+@app.errorhandler(404)
+def page_not_found(e):
+ return app.send_static_file('geocode_papers.html')
-# route to list the papers
-# route to get all the un-geocoded citations for a paper
# route to get all the manually geocoded IDs (to dedupe)
# route to add a geocoding for a paper
@@ -36,23 +40,29 @@ def list_locations():
@app.route('/api/papers', methods=['GET'])
def list_papers():
lookup_keys, lines = fetch_google_sheet('citation_lookup')
- lookup = {}
+ paper_lookup = {}
for line in lines:
- lookup[line[0]] = line
- return jsonify({
- 'papers': papers,
- })
-
-@app.route('/api/papers/:citation/', methods=['GET'])
-def list_citations(citation):
+ paper_lookup[line[0]] = line
return jsonify({
+ 'papers': paper_lookup,
})
-@app.route('/api/geocode', methods=['POST'])
-def geocode_paper():
+@app.route('/api/address', methods=['POST'])
+def add_address():
+ # id, title, institution_1, institution_2, institution_3, institution_4, notes
+ locations_worksheet.insert_row([
+ request.form['paper_id'],
+ request.form['title'],
+ request.form['institution_1'],
+ request.form['institution_2'],
+ request.form['institution_3'],
+ request.form['institution_4'],
+ request.form['notes'],
+ ])
return jsonify({
+ 'status': 'ok'
})
if __name__=="__main__":
- app.run("0.0.0.0", debug=False)
+ app.run("0.0.0.0", debug=False)
diff --git a/scraper/util.py b/scraper/util.py
index 6f3fc08b..9b47510a 100644
--- a/scraper/util.py
+++ b/scraper/util.py
@@ -343,7 +343,8 @@ def fetch_paper(s2, paper_id):
def fetch_spreadsheet():
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
- credentials = ServiceAccountCredentials.from_json_keyfile_name('./.creds/Megapixels-ef28f91112a9.json', scope)
+ path = os.path.dirname(os.path.abspath(__file__))
+ credentials = ServiceAccountCredentials.from_json_keyfile_name(os.path.join(path, '.creds/Megapixels-ef28f91112a9.json'), scope)
docid = "1denb7TjYsN9igHyvYah7fQ0daABW32Z30lwV7QrDJQc"
client = gspread.authorize(credentials)
spreadsheet = client.open_by_key(docid)