summaryrefslogtreecommitdiff
path: root/scraper/util.py
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2018-12-16 15:02:59 +0100
committerJules Laplace <julescarbon@gmail.com>2018-12-16 15:02:59 +0100
commit110f3a34f1f36d0ea999d4aa34bbe66d5f2a01da (patch)
treef21fbeccb6a7e8d3af5d5c537ed5931ecbd62d7e /scraper/util.py
parent2cb31d4999649a22a0ac659a59a0aa0a0f7a241e (diff)
skip empty, pull citations again
Diffstat (limited to 'scraper/util.py')
-rw-r--r--scraper/util.py16
1 files changed, 10 insertions, 6 deletions
diff --git a/scraper/util.py b/scraper/util.py
index a435f91a..c02f018c 100644
--- a/scraper/util.py
+++ b/scraper/util.py
@@ -259,7 +259,7 @@ class AddressBook (object):
def __init__(self):
entities = {}
lookup = {}
- data = fetch_google_sheet()
+ keys, data = fetch_google_sheet()
# keys, data = read_csv('reports/pdf_institutions_deduped.csv', keys=True)
for index, line in enumerate(data):
if line[0] == line[1] or line[0] not in entities:
@@ -283,16 +283,20 @@ class AddressBook (object):
return self.data[index]
return None
-def fetch_worksheet():
+def fetch_spreadsheet():
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name('./.creds/Megapixels-ef28f91112a9.json', scope)
docid = "1denb7TjYsN9igHyvYah7fQ0daABW32Z30lwV7QrDJQc"
client = gspread.authorize(credentials)
spreadsheet = client.open_by_key(docid)
- return spreadsheet.worksheet("institutions")
+ return spreadsheet
-def fetch_google_sheet():
- rows = fetch_worksheet().get_all_values()
+def fetch_worksheet(name="institutions"):
+ spreadsheet = fetch_spreadsheet()
+ return spreadsheet.worksheet(name)
+
+def fetch_google_sheet(name="institutions"):
+ rows = fetch_worksheet(name).get_all_values()
keys = rows[0]
lines = rows[1:]
- return lines
+ return keys, lines