from util import * import glob old_urls = [] new_urls = [] url_list = {} with open('embassy-list-4.txt', 'r') as f: for line in f.readlines(): old_urls.append(line.strip()) url = line.split(' ')[0] url_list[url] = '' for f in glob.iglob('./embassy/by_country/*.txt'): links = read_json(f) for link in links: # https://www.flickr.com/photos/us_mission_canada/37316818631 url = link['url'] url_partz = url.split('/')[:5] if len(url_partz) < 5: continue username = url_partz[4] if '@' in username: continue url_fix = '/'.join(url_partz) if url_fix not in url_list: new_urls.append(url_fix + ' ' + link['title']) url_list[url_fix] = link['title'] for line in old_urls: print(line) for url in sorted(new_urls): print(url)