summaryrefslogtreecommitdiff
path: root/scraper/s2-merge-csv.py
blob: 301f47ae307700628d49f64801b66951992ca1ae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import os
import glob
import time
import simplejson as json
import click
import operator
from util import *
import random

@click.command()
@click.option('--path', '-d', default='report/institutions_geocoded', help='Path to CSVs')
def s2_merge_csv(path):
  print(path)
  lookup = {}
  for fn in glob.iglob('{}/*.csv'.format(path)):
    rows = read_csv(fn, keys=False)
    for row in rows:
      key = row[0]
      if key not in lookup:
        lookup[key] = row
  keys = sorted(lookup.keys())
  deduped = []
  for key in keys:
    deduped.append(lookup[key])
  write_csv('{}.csv'.format(path), keys=None, rows=deduped)

if __name__ == '__main__':
  s2_merge_csv()