# Chrome # wget https://chromedriver.storage.googleapis.com/73.0.3683.20/chromedriver_linux64.zip # Firefox # wget https://github.com/mozilla/geckodriver/releases/download/v0.24.0/geckodriver-v0.24.0-linux64.tar.gz # PhantomJS # npm install -g phantomjs import click from app.settings import app_cfg #/data_store/datasets/people/ijb_c/downloads/tars/IJB/IJB-C/license/cs3_media.csv @click.command() @click.option('-i', '--input', 'opt_fp_in', required=True, help='Input license data CSV') @click.option('-o', '--output', 'opt_fp_out', required=True, help='Output directory') @click.pass_context def cli(ctx, opt_fp_in, opt_fp_out): """IJB-C screenshot sources""" import sys from glob import glob from os.path import join from pathlib import Path import time import pandas as pd import cv2 as cv from tqdm import tqdm from selenium import webdriver from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.common.by import By from app.utils import file_utils, im_utils, logger_utils log = logger_utils.Logger.getLogger() chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-dev-shm-usage') driver = webdriver.Chrome(chrome_options=chrome_options) driver.set_window_size(1920,1080) df_licenses = pd.read_csv(opt_fp_in) log.info(f'{len(df_licenses)} rows') for df_idx, df_license in tqdm(df_licenses.iterrows(), total=len(df_licenses)): filepath = df_license['Media ID'] if 'frames/' in filepath or 'img/' in filepath: continue url = df_license['Media URL'] if not ('http://' in url or 'https://' in url): url = 'http://' + url log.debug(f'getting: {url}') driver.get(url) if 'youtube.com' in url: try: wait = WebDriverWait(driver,3).until(EC.visibility_of_element_located((By.CLASS_NAME,'ytd-video-secondary-info-renderer'))) time.sleep(1) # wait for element except Exception as e: log.debug(f'error: {e}') pass else: wait = WebDriverWait(driver,5) time.sleep(1) # wait for element fp_media = filepath.replace(Path(filepath).suffix, '.png') fp_out = join(opt_fp_out, fp_media) file_utils.mkdirs(fp_out) log.debug(f'save to: {fp_out}') driver.get_screenshot_as_file(fp_out) driver.quit() #wait = WebDriverWait(driver,3).until(EC.presence_of_element_located((By.CLASS_NAME,'ytd-watch-next-secondary-results-renderer'))) #wait = WebDriverWait(driver,3).until(EC.text_to_be_present_in_element_value((By.CLASS_NAME,'yt-next-continuation'), 'show')) #wait = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CLASS_NAME,'ytd-video-secondary-info-renderer'))) #driver.execute_script("document.getElementById('related').style.display = 'None';") ''' title_is title_contains presence_of_element_located visibility_of_element_located visibility_of presence_of_all_elements_located text_to_be_present_in_element text_to_be_present_in_element_value frame_to_be_available_and_switch_to_it invisibility_of_element_located element_to_be_clickable - it is Displayed and Enabled. staleness_of element_to_be_selected element_located_to_be_selected element_selection_state_to_be element_located_selection_state_to_be alert_is_present '''