diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-02-20 16:19:08 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-02-20 16:19:08 +0100 |
| commit | 881d559cb0491c532264b151ed922c401f30db96 (patch) | |
| tree | c3857062d1b155afc71e33ddcfc07b53fcc0bd82 /scraper | |
| parent | 7885a180e1b3ddc37ef2192c74a897b911e48a14 (diff) | |
avoid adding very short cnames
Diffstat (limited to 'scraper')
| -rw-r--r-- | scraper/s2-geocode-spreadsheet.py | 2 | ||||
| -rw-r--r-- | scraper/s2-geocode.py | 2 |
2 files changed, 3 insertions, 1 deletions
diff --git a/scraper/s2-geocode-spreadsheet.py b/scraper/s2-geocode-spreadsheet.py index b21a8453..98baf4b5 100644 --- a/scraper/s2-geocode-spreadsheet.py +++ b/scraper/s2-geocode-spreadsheet.py @@ -26,6 +26,8 @@ def s2_geocode_spreadsheet(): for i, row in enumerate(rows): # row_tuples.append((i, row,)) cname, name, address, lat, lng, org_type, extra_address, country = row + if len(cname) < 3: + print("very short cname: {}".format(cname)) if cname == name or cname not in cname_lookup: cname_lookup[cname] = i print("built lookup") diff --git a/scraper/s2-geocode.py b/scraper/s2-geocode.py index 989c17bf..705f3a17 100644 --- a/scraper/s2-geocode.py +++ b/scraper/s2-geocode.py @@ -30,7 +30,7 @@ def s2_geocode(fn): for i, row in enumerate(rows): name = row[2] name = remove_department_name(name) - if not name: + if not name or len(name) < 2: continue try: location = geolocator.geocode(name) |
