From 37da49ae72b78bc771441b73e155a7cd20371430 Mon Sep 17 00:00:00 2001 From: adamhrv Date: Sun, 10 Feb 2019 16:23:15 +0100 Subject: update --- megapixels/commands/datasets/ijb_screenshot.py | 105 ++++++++++++++ megapixels/commands/datasets/ijb_screenshot_mt.py | 156 +++++++++++++++++++++ megapixels/commands/demo/face_3ddfa.py | 3 + .../commands/processor/_old_files_to_face_rois.py | 2 +- megapixels/commands/visualize/weasyprinter.py | 54 +++++++ 5 files changed, 319 insertions(+), 1 deletion(-) create mode 100644 megapixels/commands/datasets/ijb_screenshot.py create mode 100644 megapixels/commands/datasets/ijb_screenshot_mt.py create mode 100644 megapixels/commands/visualize/weasyprinter.py (limited to 'megapixels/commands') diff --git a/megapixels/commands/datasets/ijb_screenshot.py b/megapixels/commands/datasets/ijb_screenshot.py new file mode 100644 index 00000000..e6940d88 --- /dev/null +++ b/megapixels/commands/datasets/ijb_screenshot.py @@ -0,0 +1,105 @@ +# Chrome +# wget https://chromedriver.storage.googleapis.com/73.0.3683.20/chromedriver_linux64.zip +# Firefox +# wget https://github.com/mozilla/geckodriver/releases/download/v0.24.0/geckodriver-v0.24.0-linux64.tar.gz +# PhantomJS +# npm install -g phantomjs + +import click + +from app.settings import app_cfg + +#/data_store/datasets/people/ijb_c/downloads/tars/IJB/IJB-C/license/cs3_media.csv + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Input license data CSV') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output directory') +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_out): + """IJB-C screenshot sources""" + + import sys + from glob import glob + from os.path import join + from pathlib import Path + import time + + import pandas as pd + import cv2 as cv + from tqdm import tqdm + + from selenium import webdriver + from selenium.webdriver.support import expected_conditions as EC + from selenium.webdriver.support.wait import WebDriverWait + from selenium.webdriver.common.by import By + + from app.utils import file_utils, im_utils, logger_utils + + log = logger_utils.Logger.getLogger() + + chrome_options = webdriver.ChromeOptions() + chrome_options.add_argument('--no-sandbox') + chrome_options.add_argument('--headless') + chrome_options.add_argument('--disable-dev-shm-usage') + driver = webdriver.Chrome(chrome_options=chrome_options) + driver.set_window_size(1920,1080) + + df_licenses = pd.read_csv(opt_fp_in) + log.info(f'{len(df_licenses)} rows') + + for df_idx, df_license in tqdm(df_licenses.iterrows(), total=len(df_licenses)): + filepath = df_license['Media ID'] + if 'frames/' in filepath or 'img/' in filepath: + continue + url = df_license['Media URL'] + if not ('http://' in url or 'https://' in url): + url = 'http://' + url + log.debug(f'getting: {url}') + driver.get(url) + if 'youtube.com' in url: + try: + wait = WebDriverWait(driver,3).until(EC.visibility_of_element_located((By.CLASS_NAME,'ytd-video-secondary-info-renderer'))) + time.sleep(1) # wait for element + except Exception as e: + log.debug(f'error: {e}') + pass + else: + wait = WebDriverWait(driver,5) + time.sleep(1) # wait for element + + fp_media = filepath.replace(Path(filepath).suffix, '.png') + fp_out = join(opt_fp_out, fp_media) + file_utils.mkdirs(fp_out) + log.debug(f'save to: {fp_out}') + driver.get_screenshot_as_file(fp_out) + + driver.quit() + + + +#wait = WebDriverWait(driver,3).until(EC.presence_of_element_located((By.CLASS_NAME,'ytd-watch-next-secondary-results-renderer'))) +#wait = WebDriverWait(driver,3).until(EC.text_to_be_present_in_element_value((By.CLASS_NAME,'yt-next-continuation'), 'show')) +#wait = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CLASS_NAME,'ytd-video-secondary-info-renderer'))) +#driver.execute_script("document.getElementById('related').style.display = 'None';") + +''' +title_is +title_contains +presence_of_element_located +visibility_of_element_located +visibility_of +presence_of_all_elements_located +text_to_be_present_in_element +text_to_be_present_in_element_value +frame_to_be_available_and_switch_to_it +invisibility_of_element_located +element_to_be_clickable - it is Displayed and Enabled. +staleness_of +element_to_be_selected +element_located_to_be_selected +element_selection_state_to_be +element_located_selection_state_to_be +alert_is_present +''' \ No newline at end of file diff --git a/megapixels/commands/datasets/ijb_screenshot_mt.py b/megapixels/commands/datasets/ijb_screenshot_mt.py new file mode 100644 index 00000000..616893c7 --- /dev/null +++ b/megapixels/commands/datasets/ijb_screenshot_mt.py @@ -0,0 +1,156 @@ +"""Create screenshots for YouTube.com URLs in the IJB dataset + +TODO +- grey out boxes in sidebar +- resize driver screenshot area to include author text + +Installing webdrivers: + +Chrome +wget https://chromedriver.storage.googleapis.com/73.0.3683.20/chromedriver_linux64.zip + +Firefox +wget https://github.com/mozilla/geckodriver/releases/download/v0.24.0/geckodriver-v0.24.0-linux64.tar.gz + +PhantomJS +npm install -g phantomjs +""" + +import click + +from app.settings import app_cfg + +#/data_store/datasets/people/ijb_c/downloads/tars/IJB/IJB-C/license/cs3_media.csv + +@click.command() +@click.option('-i', '--input', 'opt_fp_in', required=True, + help='Input license data CSV') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output directory') +@click.option('-t', '--threads', 'opt_threads', default=20, + help='Number of threads') +@click.pass_context +def cli(ctx, opt_fp_in, opt_fp_out, opt_threads): + """IJB-C screenshot sources""" + + import sys + from glob import glob + from os.path import join + from pathlib import Path + import time + from functools import partial + from multiprocessing.dummy import Pool as ThreadPool + + import pandas as pd + import cv2 as cv + from tqdm import tqdm + + from selenium import webdriver + from selenium.webdriver.support import expected_conditions as EC + from selenium.webdriver.support.wait import WebDriverWait + from selenium.webdriver.common.by import By + + from app.utils import file_utils, im_utils, logger_utils + + log = logger_utils.Logger.getLogger() + + chrome_options = webdriver.ChromeOptions() + chrome_options.add_argument('--no-sandbox') + chrome_options.add_argument('--headless') + chrome_options.add_argument('--disable-dev-shm-usage') + + + def pool_process(route, chrome_options): + # Threaded image resize function + try: + pbar.update(1) + + driver = webdriver.Chrome(chrome_options=chrome_options) + driver.set_window_size(1920,1080) + + url = route['url'] + fp_out = route['dst'] + log.debug(f'url: {url}, dst: {fp_out}') + driver.get(url) + + if 'youtube.com' in url: + try: + wait = WebDriverWait(driver,30).until(EC.visibility_of_element_located((By.CLASS_NAME,'ytd-video-secondary-info-renderer'))) + except Exception as e: + log.debug(f'error: {e}') + pass + else: + wait = WebDriverWait(driver,10) + time.sleep(1) # wait for element + + time.sleep(10) # wait for element + file_utils.mkdirs(fp_out) + log.debug(f'save to: {fp_out}') + driver.get_screenshot_as_file(fp_out) + driver.quit() + + return True + except: + return False + + # load + routes = [] + df_licenses = pd.read_csv(opt_fp_in) + log.info(f'{len(df_licenses)} rows') + for df_idx, df_license in tqdm(df_licenses.iterrows(), total=len(df_licenses)): + filepath = df_license['Media ID'] + if not 'video/' in filepath: + continue + url = str(df_license['Media URL']) + if not ('http://' in url or 'https://' in url): + url = 'http://' + url + fp_media = filepath.replace(Path(filepath).suffix, '.png') + fp_out = join(opt_fp_out, fp_media) + obj = {'url': url, 'dst': fp_out} + routes.append(obj) + + # setup multithreading + for route in routes: + log.debug(f'url: {route["url"]}, dst: {route["dst"]}') + + return + results = [] + pbar = tqdm(total=len(routes)) + pool_process = partial(pool_process, chrome_options=chrome_options) + pool = ThreadPool(opt_threads) + with tqdm(total=len(routes)) as pbar: + results = pool.map(pool_process, routes) + pbar.close() + + + + + + + + + +#wait = WebDriverWait(driver,3).until(EC.presence_of_element_located((By.CLASS_NAME,'ytd-watch-next-secondary-results-renderer'))) +#wait = WebDriverWait(driver,3).until(EC.text_to_be_present_in_element_value((By.CLASS_NAME,'yt-next-continuation'), 'show')) +#wait = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CLASS_NAME,'ytd-video-secondary-info-renderer'))) +#driver.execute_script("document.getElementById('related').style.display = 'None';") + +''' +title_is +title_contains +presence_of_element_located +visibility_of_element_located +visibility_of +presence_of_all_elements_located +text_to_be_present_in_element +text_to_be_present_in_element_value +frame_to_be_available_and_switch_to_it +invisibility_of_element_located +element_to_be_clickable - it is Displayed and Enabled. +staleness_of +element_to_be_selected +element_located_to_be_selected +element_selection_state_to_be +element_located_selection_state_to_be +alert_is_present +''' \ No newline at end of file diff --git a/megapixels/commands/demo/face_3ddfa.py b/megapixels/commands/demo/face_3ddfa.py index 90359159..23bbcf7e 100644 --- a/megapixels/commands/demo/face_3ddfa.py +++ b/megapixels/commands/demo/face_3ddfa.py @@ -300,6 +300,9 @@ def cli(ctx, opt_fp_in, opt_dir_out, opt_gpu, opt_bbox_init, fp_out = join(fpp_out, f'{fpp_in.stem}.obj') write_obj_with_colors(fp_out, vertices_orig, triangles, colors) + fp_out = join(fpp_out, f'{fpp_in.stem}.txt') + np.savetxt(fp_out, pts68, fmt='%.3f') + # ---------------------------------------------------------------------------- # display diff --git a/megapixels/commands/processor/_old_files_to_face_rois.py b/megapixels/commands/processor/_old_files_to_face_rois.py index d92cbd74..895f4718 100644 --- a/megapixels/commands/processor/_old_files_to_face_rois.py +++ b/megapixels/commands/processor/_old_files_to_face_rois.py @@ -1,4 +1,4 @@ -""" + """ Crop images to prepare for training """ diff --git a/megapixels/commands/visualize/weasyprinter.py b/megapixels/commands/visualize/weasyprinter.py new file mode 100644 index 00000000..4e9cd1ac --- /dev/null +++ b/megapixels/commands/visualize/weasyprinter.py @@ -0,0 +1,54 @@ +""" +Crop images to prepare for training +""" + +import click +# from PIL import Image, ImageOps, ImageFilter, ImageDraw + +from app.settings import types +from app.utils import click_utils +from app.settings import app_cfg as cfg + +color_filters = {'color': 1, 'gray': 2, 'all': 3} + +@click.command() +@click.option('--html', 'opt_fp_in_html', required=True, + help='Input HTML') +@click.option('--css', 'opt_fp_in_css', required=True, + help='Input CSS') +@click.option('-o', '--output', 'opt_fp_out', required=True, + help='Output PDF') +@click.option('-f', '--force', 'opt_force', is_flag=True, + help='Force overwrite file') +@click.pass_context +def cli(ctx, opt_fp_in_html, opt_fp_in_css, opt_fp_out, opt_force): + """Generates PDF files from HTML, CSS""" + + import sys + import os + from os.path import join + from pathlib import Path + + from tqdm import tqdm + import numpy as np + import pandas as pd + from weasyprint import HTML, CSS + from weasyprint.fonts import FontConfiguration + + from app.utils import logger_utils, file_utils, im_utils, display_utils, draw_utils + from app.utils import plot_utils + from app.models.data_store import DataStore + + # ------------------------------------------------- + # init here + + log = logger_utils.Logger.getLogger() + + + font_config = FontConfiguration() + html = HTML(filename=opt_fp_in_html) + css = CSS(filename=opt_fp_in_css) + document = html.render(stylesheets=[css], font_config=font_config) + document.resolve_links() + document.make_bookmark_tree() + document.write_pdf(opt_fp_out) \ No newline at end of file -- cgit v1.2.3-70-g09d2