From 37da49ae72b78bc771441b73e155a7cd20371430 Mon Sep 17 00:00:00 2001
From: adamhrv <adam@ahprojects.com>
Date: Sun, 10 Feb 2019 16:23:15 +0100
Subject: update

---
 megapixels/commands/datasets/ijb_screenshot.py     | 105 ++++++++++++++
 megapixels/commands/datasets/ijb_screenshot_mt.py  | 156 +++++++++++++++++++++
 megapixels/commands/demo/face_3ddfa.py             |   3 +
 .../commands/processor/_old_files_to_face_rois.py  |   2 +-
 megapixels/commands/visualize/weasyprinter.py      |  54 +++++++
 5 files changed, 319 insertions(+), 1 deletion(-)
 create mode 100644 megapixels/commands/datasets/ijb_screenshot.py
 create mode 100644 megapixels/commands/datasets/ijb_screenshot_mt.py
 create mode 100644 megapixels/commands/visualize/weasyprinter.py

(limited to 'megapixels/commands')

diff --git a/megapixels/commands/datasets/ijb_screenshot.py b/megapixels/commands/datasets/ijb_screenshot.py
new file mode 100644
index 00000000..e6940d88
--- /dev/null
+++ b/megapixels/commands/datasets/ijb_screenshot.py
@@ -0,0 +1,105 @@
+# Chrome
+# wget https://chromedriver.storage.googleapis.com/73.0.3683.20/chromedriver_linux64.zip
+# Firefox
+# wget https://github.com/mozilla/geckodriver/releases/download/v0.24.0/geckodriver-v0.24.0-linux64.tar.gz
+# PhantomJS
+# npm install -g phantomjs
+
+import click
+
+from app.settings import app_cfg
+
+#/data_store/datasets/people/ijb_c/downloads/tars/IJB/IJB-C/license/cs3_media.csv
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+  help='Input license data CSV')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+  help='Output directory')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out):
+  """IJB-C screenshot sources"""
+  
+  import sys
+  from glob import glob
+  from os.path import join
+  from pathlib import Path
+  import time
+
+  import pandas as pd
+  import cv2 as cv
+  from tqdm import tqdm
+
+  from selenium import webdriver
+  from selenium.webdriver.support import expected_conditions as EC
+  from selenium.webdriver.support.wait import WebDriverWait
+  from selenium.webdriver.common.by import By 
+
+  from app.utils import file_utils, im_utils, logger_utils
+
+  log = logger_utils.Logger.getLogger()
+
+  chrome_options = webdriver.ChromeOptions()
+  chrome_options.add_argument('--no-sandbox')
+  chrome_options.add_argument('--headless')
+  chrome_options.add_argument('--disable-dev-shm-usage')
+  driver = webdriver.Chrome(chrome_options=chrome_options)
+  driver.set_window_size(1920,1080)
+  
+  df_licenses = pd.read_csv(opt_fp_in)
+  log.info(f'{len(df_licenses)} rows')
+
+  for df_idx, df_license in tqdm(df_licenses.iterrows(), total=len(df_licenses)):
+    filepath = df_license['Media ID']
+    if 'frames/' in filepath or 'img/' in filepath:
+      continue
+    url = df_license['Media URL']
+    if not ('http://' in url or 'https://' in url):
+      url = 'http://' + url 
+    log.debug(f'getting: {url}')
+    driver.get(url)
+    if 'youtube.com' in url:
+      try:
+        wait = WebDriverWait(driver,3).until(EC.visibility_of_element_located((By.CLASS_NAME,'ytd-video-secondary-info-renderer')))
+        time.sleep(1)  # wait for element
+      except Exception as e:
+        log.debug(f'error: {e}')
+        pass
+    else:
+      wait = WebDriverWait(driver,5)
+      time.sleep(1)  # wait for element
+
+    fp_media = filepath.replace(Path(filepath).suffix, '.png')
+    fp_out = join(opt_fp_out, fp_media)
+    file_utils.mkdirs(fp_out)
+    log.debug(f'save to: {fp_out}')
+    driver.get_screenshot_as_file(fp_out)
+
+  driver.quit()
+
+
+
+#wait = WebDriverWait(driver,3).until(EC.presence_of_element_located((By.CLASS_NAME,'ytd-watch-next-secondary-results-renderer')))
+#wait = WebDriverWait(driver,3).until(EC.text_to_be_present_in_element_value((By.CLASS_NAME,'yt-next-continuation'), 'show'))
+#wait = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CLASS_NAME,'ytd-video-secondary-info-renderer')))
+#driver.execute_script("document.getElementById('related').style.display = 'None';")
+
+'''
+title_is
+title_contains
+presence_of_element_located
+visibility_of_element_located
+visibility_of
+presence_of_all_elements_located
+text_to_be_present_in_element
+text_to_be_present_in_element_value
+frame_to_be_available_and_switch_to_it
+invisibility_of_element_located
+element_to_be_clickable - it is Displayed and Enabled.
+staleness_of
+element_to_be_selected
+element_located_to_be_selected
+element_selection_state_to_be
+element_located_selection_state_to_be
+alert_is_present
+'''
\ No newline at end of file
diff --git a/megapixels/commands/datasets/ijb_screenshot_mt.py b/megapixels/commands/datasets/ijb_screenshot_mt.py
new file mode 100644
index 00000000..616893c7
--- /dev/null
+++ b/megapixels/commands/datasets/ijb_screenshot_mt.py
@@ -0,0 +1,156 @@
+"""Create screenshots for YouTube.com URLs in the IJB dataset
+
+TODO
+- grey out boxes in sidebar
+- resize driver screenshot area to include author text
+
+Installing webdrivers:
+
+Chrome
+wget https://chromedriver.storage.googleapis.com/73.0.3683.20/chromedriver_linux64.zip
+
+Firefox
+wget https://github.com/mozilla/geckodriver/releases/download/v0.24.0/geckodriver-v0.24.0-linux64.tar.gz
+
+PhantomJS
+npm install -g phantomjs
+"""
+
+import click
+
+from app.settings import app_cfg
+
+#/data_store/datasets/people/ijb_c/downloads/tars/IJB/IJB-C/license/cs3_media.csv
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+  help='Input license data CSV')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+  help='Output directory')
+@click.option('-t', '--threads', 'opt_threads', default=20,
+  help='Number of threads')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_threads):
+  """IJB-C screenshot sources"""
+  
+  import sys
+  from glob import glob
+  from os.path import join
+  from pathlib import Path
+  import time
+  from functools import partial
+  from multiprocessing.dummy import Pool as ThreadPool
+
+  import pandas as pd
+  import cv2 as cv
+  from tqdm import tqdm
+
+  from selenium import webdriver
+  from selenium.webdriver.support import expected_conditions as EC
+  from selenium.webdriver.support.wait import WebDriverWait
+  from selenium.webdriver.common.by import By 
+
+  from app.utils import file_utils, im_utils, logger_utils
+
+  log = logger_utils.Logger.getLogger()
+
+  chrome_options = webdriver.ChromeOptions()
+  chrome_options.add_argument('--no-sandbox')
+  chrome_options.add_argument('--headless')
+  chrome_options.add_argument('--disable-dev-shm-usage')
+
+
+  def pool_process(route, chrome_options):
+    # Threaded image resize function
+    try:
+      pbar.update(1)
+      
+      driver = webdriver.Chrome(chrome_options=chrome_options)
+      driver.set_window_size(1920,1080)
+      
+      url = route['url']
+      fp_out = route['dst']
+      log.debug(f'url: {url}, dst: {fp_out}')
+      driver.get(url)
+
+      if 'youtube.com' in url:
+        try:
+          wait = WebDriverWait(driver,30).until(EC.visibility_of_element_located((By.CLASS_NAME,'ytd-video-secondary-info-renderer')))
+        except Exception as e:
+          log.debug(f'error: {e}')
+          pass
+      else:
+        wait = WebDriverWait(driver,10)
+        time.sleep(1)  # wait for element
+
+      time.sleep(10)  # wait for element
+      file_utils.mkdirs(fp_out)
+      log.debug(f'save to: {fp_out}')
+      driver.get_screenshot_as_file(fp_out)
+      driver.quit()
+
+      return True
+    except:
+      return False
+
+  # load
+  routes = []
+  df_licenses = pd.read_csv(opt_fp_in)
+  log.info(f'{len(df_licenses)} rows')
+  for df_idx, df_license in tqdm(df_licenses.iterrows(), total=len(df_licenses)):
+    filepath = df_license['Media ID']
+    if not 'video/' in filepath:
+      continue
+    url = str(df_license['Media URL'])
+    if not ('http://' in url or 'https://' in url):
+      url = 'http://' + url 
+    fp_media = filepath.replace(Path(filepath).suffix, '.png')
+    fp_out = join(opt_fp_out, fp_media)
+    obj = {'url': url, 'dst': fp_out}
+    routes.append(obj)
+  
+  # setup multithreading
+  for route in routes:
+    log.debug(f'url: {route["url"]}, dst: {route["dst"]}')
+
+  return
+  results = []
+  pbar = tqdm(total=len(routes))
+  pool_process = partial(pool_process, chrome_options=chrome_options)
+  pool = ThreadPool(opt_threads) 
+  with tqdm(total=len(routes)) as pbar:
+    results = pool.map(pool_process, routes)
+  pbar.close()
+
+
+
+
+
+  
+
+
+
+#wait = WebDriverWait(driver,3).until(EC.presence_of_element_located((By.CLASS_NAME,'ytd-watch-next-secondary-results-renderer')))
+#wait = WebDriverWait(driver,3).until(EC.text_to_be_present_in_element_value((By.CLASS_NAME,'yt-next-continuation'), 'show'))
+#wait = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CLASS_NAME,'ytd-video-secondary-info-renderer')))
+#driver.execute_script("document.getElementById('related').style.display = 'None';")
+
+'''
+title_is
+title_contains
+presence_of_element_located
+visibility_of_element_located
+visibility_of
+presence_of_all_elements_located
+text_to_be_present_in_element
+text_to_be_present_in_element_value
+frame_to_be_available_and_switch_to_it
+invisibility_of_element_located
+element_to_be_clickable - it is Displayed and Enabled.
+staleness_of
+element_to_be_selected
+element_located_to_be_selected
+element_selection_state_to_be
+element_located_selection_state_to_be
+alert_is_present
+'''
\ No newline at end of file
diff --git a/megapixels/commands/demo/face_3ddfa.py b/megapixels/commands/demo/face_3ddfa.py
index 90359159..23bbcf7e 100644
--- a/megapixels/commands/demo/face_3ddfa.py
+++ b/megapixels/commands/demo/face_3ddfa.py
@@ -300,6 +300,9 @@ def cli(ctx, opt_fp_in, opt_dir_out, opt_gpu, opt_bbox_init,
     fp_out = join(fpp_out, f'{fpp_in.stem}.obj')
     write_obj_with_colors(fp_out, vertices_orig, triangles, colors)
 
+    fp_out = join(fpp_out, f'{fpp_in.stem}.txt')
+    np.savetxt(fp_out, pts68, fmt='%.3f')
+
 
   # ----------------------------------------------------------------------------
   # display
diff --git a/megapixels/commands/processor/_old_files_to_face_rois.py b/megapixels/commands/processor/_old_files_to_face_rois.py
index d92cbd74..895f4718 100644
--- a/megapixels/commands/processor/_old_files_to_face_rois.py
+++ b/megapixels/commands/processor/_old_files_to_face_rois.py
@@ -1,4 +1,4 @@
-"""
+ """
 Crop images to prepare for training
 """
 
diff --git a/megapixels/commands/visualize/weasyprinter.py b/megapixels/commands/visualize/weasyprinter.py
new file mode 100644
index 00000000..4e9cd1ac
--- /dev/null
+++ b/megapixels/commands/visualize/weasyprinter.py
@@ -0,0 +1,54 @@
+"""
+Crop images to prepare for training
+"""
+
+import click
+# from PIL import Image, ImageOps, ImageFilter, ImageDraw
+
+from app.settings import types
+from app.utils import click_utils
+from app.settings import app_cfg as cfg
+
+color_filters = {'color': 1, 'gray': 2, 'all': 3}
+
+@click.command()
+@click.option('--html', 'opt_fp_in_html', required=True,
+  help='Input HTML')
+@click.option('--css', 'opt_fp_in_css', required=True,
+  help='Input CSS')
+@click.option('-o', '--output', 'opt_fp_out', required=True,
+  help='Output PDF')
+@click.option('-f', '--force', 'opt_force', is_flag=True,
+  help='Force overwrite file')
+@click.pass_context
+def cli(ctx, opt_fp_in_html, opt_fp_in_css, opt_fp_out, opt_force):
+  """Generates PDF files from HTML, CSS"""
+  
+  import sys
+  import os
+  from os.path import join
+  from pathlib import Path
+  
+  from tqdm import tqdm
+  import numpy as np
+  import pandas as pd
+  from weasyprint import HTML, CSS
+  from weasyprint.fonts import FontConfiguration
+
+  from app.utils import logger_utils, file_utils, im_utils, display_utils, draw_utils
+  from app.utils import plot_utils
+  from app.models.data_store import DataStore
+
+  # -------------------------------------------------
+  # init here
+
+  log = logger_utils.Logger.getLogger()
+
+
+  font_config = FontConfiguration()
+  html = HTML(filename=opt_fp_in_html)
+  css  = CSS(filename=opt_fp_in_css)
+  document = html.render(stylesheets=[css], font_config=font_config)
+  document.resolve_links()
+  document.make_bookmark_tree()
+  document.write_pdf(opt_fp_out)
\ No newline at end of file
-- 
cgit v1.2.3-70-g09d2