diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2019-03-08 17:19:36 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2019-03-08 17:19:36 +0100 |
| commit | 1dcb61dab45b4891b1b737877d3d95e1211f33b3 (patch) | |
| tree | a13ee4541e0a55dcbdfe5701b79ddd8da6ed8ca9 /scraper | |
| parent | 6ddf79260f153aa52d2b444772c03cae502f4d35 (diff) | |
sortable dataset list
Diffstat (limited to 'scraper')
| -rw-r--r-- | scraper/s2-final-report.py | 7 | ||||
| -rw-r--r-- | scraper/s2-geocode-spreadsheet.py | 3 |
2 files changed, 9 insertions, 1 deletions
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index ec4ad25b..123dd201 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -23,6 +23,13 @@ def s2_final_report(): if 'ft_share' in item['dataset'] and item['dataset']['ft_share'] == 'Y': items.append((item,)) parallelize(process_paper, items) + # key name_short name_full purpose url + # wild indoor outdoor campus cyberspace parent + # child source usernames names year_start year_end year_published + # ongoing images videos identities img_per_person num_cameras + # faces_persons female male landmarks width height color gray + # derivative_of tags size_gb agreement + # citations_count subprocess.call([ "s3cmd", "put", "-P", "--recursive", DIR_PUBLIC_CITATIONS + '/', diff --git a/scraper/s2-geocode-spreadsheet.py b/scraper/s2-geocode-spreadsheet.py index 375c8fde..32d7c669 100644 --- a/scraper/s2-geocode-spreadsheet.py +++ b/scraper/s2-geocode-spreadsheet.py @@ -38,7 +38,8 @@ def s2_geocode_spreadsheet(): if (i % 1000) == 0: print("{}...".format(i)) if row[1] in seen: - continue + pass + # continue seen[row[1]] = True hit_api = s2_geocode_row(i, row) if hit_api: |
