diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-02-13 02:00:50 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-02-13 02:00:50 +0100 |
| commit | dc1889f15ab1b1338c557cda0b1bcd989e1fdf9b (patch) | |
| tree | 9b230694e37de6a8c18e5af311e1bf6bef6398e1 /scraper | |
| parent | 63133aa1bda1ef101772b1d85e27b8d320d32412 (diff) | |
update
Diffstat (limited to 'scraper')
| -rw-r--r-- | scraper/reports/geocode_papers.html | 1 | ||||
| -rw-r--r-- | scraper/s2-final-report.py | 8 | ||||
| -rw-r--r-- | scraper/s2-geocode-server.py | 40 | ||||
| -rw-r--r-- | scraper/util.py | 3 |
4 files changed, 31 insertions, 21 deletions
diff --git a/scraper/reports/geocode_papers.html b/scraper/reports/geocode_papers.html index 529ee9c7..84ffe356 100644 --- a/scraper/reports/geocode_papers.html +++ b/scraper/reports/geocode_papers.html @@ -33,5 +33,6 @@ html,body { margin: 0; padding: 0; width: 100%; height: 100%; } <div id="container"> </div> </body> +<script src="/reports/geocode-app.js"></script> </html> diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index 675709ea..58ac481f 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -32,8 +32,8 @@ def process_paper(row, addresses): res = process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_citations) if res: papers.append(res) - if res['address']: - address = res['address'] + if res['address']: + address = res['address'] if not len(papers): return with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f: @@ -48,9 +48,7 @@ def process_paper(row, addresses): json.dump({ 'id': papers[0]['paper_id'], 'paper': papers[0], - 'address': address, - 'additional_papers': papers[1:], - 'citations': [aggregate_citations[key] for key in aggregate_citations.keys()], + 'citations': [unknown_citations[key] for key in unknown_citations.keys()], }, f) def process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_citations): diff --git a/scraper/s2-geocode-server.py b/scraper/s2-geocode-server.py index 1c624a52..0b1b0937 100644 --- a/scraper/s2-geocode-server.py +++ b/scraper/s2-geocode-server.py @@ -13,15 +13,19 @@ load_dotenv() from util import * -app = Flask(__name__, static_url_path="/reports/", static_folder="reports") +locations_worksheet = fetch_worksheet('paper_locations') + +app = Flask(__name__, static_url_path="/reports", static_folder=os.path.abspath("reports")) # static api route @app.route('/', methods=['GET']) def index(): - return app.send_static_file('reports/geocode-papers.html') + return app.send_static_file('geocode_papers.html') + +@app.errorhandler(404) +def page_not_found(e): + return app.send_static_file('geocode_papers.html') -# route to list the papers -# route to get all the un-geocoded citations for a paper # route to get all the manually geocoded IDs (to dedupe) # route to add a geocoding for a paper @@ -36,23 +40,29 @@ def list_locations(): @app.route('/api/papers', methods=['GET']) def list_papers(): lookup_keys, lines = fetch_google_sheet('citation_lookup') - lookup = {} + paper_lookup = {} for line in lines: - lookup[line[0]] = line - return jsonify({ - 'papers': papers, - }) - -@app.route('/api/papers/:citation/', methods=['GET']) -def list_citations(citation): + paper_lookup[line[0]] = line return jsonify({ + 'papers': paper_lookup, }) -@app.route('/api/geocode', methods=['POST']) -def geocode_paper(): +@app.route('/api/address', methods=['POST']) +def add_address(): + # id, title, institution_1, institution_2, institution_3, institution_4, notes + locations_worksheet.insert_row([ + request.form['paper_id'], + request.form['title'], + request.form['institution_1'], + request.form['institution_2'], + request.form['institution_3'], + request.form['institution_4'], + request.form['notes'], + ]) return jsonify({ + 'status': 'ok' }) if __name__=="__main__": - app.run("0.0.0.0", debug=False) + app.run("0.0.0.0", debug=False) diff --git a/scraper/util.py b/scraper/util.py index 6f3fc08b..9b47510a 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -343,7 +343,8 @@ def fetch_paper(s2, paper_id): def fetch_spreadsheet(): scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive'] - credentials = ServiceAccountCredentials.from_json_keyfile_name('./.creds/Megapixels-ef28f91112a9.json', scope) + path = os.path.dirname(os.path.abspath(__file__)) + credentials = ServiceAccountCredentials.from_json_keyfile_name(os.path.join(path, '.creds/Megapixels-ef28f91112a9.json'), scope) docid = "1denb7TjYsN9igHyvYah7fQ0daABW32Z30lwV7QrDJQc" client = gspread.authorize(credentials) spreadsheet = client.open_by_key(docid) |
