summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-02-20 16:19:08 +0100
committerJules Laplace <julescarbon@gmail.com>2019-02-20 16:19:08 +0100
commit881d559cb0491c532264b151ed922c401f30db96 (patch)
treec3857062d1b155afc71e33ddcfc07b53fcc0bd82 /scraper
parent7885a180e1b3ddc37ef2192c74a897b911e48a14 (diff)
avoid adding very short cnames
Diffstat (limited to 'scraper')
-rw-r--r--scraper/s2-geocode-spreadsheet.py2
-rw-r--r--scraper/s2-geocode.py2
2 files changed, 3 insertions, 1 deletions
diff --git a/scraper/s2-geocode-spreadsheet.py b/scraper/s2-geocode-spreadsheet.py
index b21a8453..98baf4b5 100644
--- a/scraper/s2-geocode-spreadsheet.py
+++ b/scraper/s2-geocode-spreadsheet.py
@@ -26,6 +26,8 @@ def s2_geocode_spreadsheet():
for i, row in enumerate(rows):
# row_tuples.append((i, row,))
cname, name, address, lat, lng, org_type, extra_address, country = row
+ if len(cname) < 3:
+ print("very short cname: {}".format(cname))
if cname == name or cname not in cname_lookup:
cname_lookup[cname] = i
print("built lookup")
diff --git a/scraper/s2-geocode.py b/scraper/s2-geocode.py
index 989c17bf..705f3a17 100644
--- a/scraper/s2-geocode.py
+++ b/scraper/s2-geocode.py
@@ -30,7 +30,7 @@ def s2_geocode(fn):
for i, row in enumerate(rows):
name = row[2]
name = remove_department_name(name)
- if not name:
+ if not name or len(name) < 2:
continue
try:
location = geolocator.geocode(name)