From c8e7a10be948c2405d46d8c3caf4a8c6675eee29 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Wed, 27 Feb 2019 19:35:54 +0100 Subject: rebuild --- megapixels/app/site/parser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'megapixels/app/site/parser.py') diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index f739315a..d6705214 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -127,6 +127,7 @@ def parse_research_index(research_posts): """ content = "
" for post in research_posts: + print(post) s3_path = s3.make_s3_path(cfg.S3_SITE_PATH, post['path']) if 'image' in post: post_image = s3_path + post['image'] @@ -240,7 +241,7 @@ def read_post_index(basedir): Generate an index of posts """ posts = [] - for fn in sorted(glob.glob('../site/content/{}/*/index.md'.format(basedir))): + for fn in sorted(glob.glob(os.path.join(cfg.DIR_SITE_CONTENT, basedir, '*/index.md'))): metadata, valid_sections = read_metadata(fn) if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft': continue -- cgit v1.2.3-70-g09d2 From 67896d3cdde877de940a282bebacd10ca1c56499 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Wed, 27 Feb 2019 20:29:08 +0100 Subject: site watcher / loader --- README.md | 2 +- megapixels/app/site/builder.py | 22 ++-- megapixels/app/site/loader.py | 123 +++++++++++++++++++ megapixels/app/site/parser.py | 204 ++++++++----------------------- megapixels/commands/site/watch.py | 44 +++++++ site/assets/css/css.css | 1 + site/content/pages/datasets/lfw/index.md | 55 ++++----- site/public/datasets/lfw/index.html | 43 ++----- 8 files changed, 266 insertions(+), 228 deletions(-) create mode 100644 megapixels/app/site/loader.py create mode 100644 megapixels/commands/site/watch.py (limited to 'megapixels/app/site/parser.py') diff --git a/README.md b/README.md index e1a2c1d0..e46a6289 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ pip install numpy Pillow pip install dlib pip install requests simplejson click pdfminer.six pip install urllib3 flask flask_sqlalchemy mysql-connector -pip install pymediainfo tqdm opencv-python imutils +pip install pymediainfo tqdm opencv-python imutils watchdog pip install scikit-image python-dotenv imagehash scikit-learn colorlog pip install celery keras tensorflow pip install python.app # OSX only! needed for matplotlib diff --git a/megapixels/app/site/builder.py b/megapixels/app/site/builder.py index 188fbc25..15055110 100644 --- a/megapixels/app/site/builder.py +++ b/megapixels/app/site/builder.py @@ -7,6 +7,7 @@ from jinja2 import Environment, FileSystemLoader, select_autoescape import app.settings.app_cfg as cfg import app.site.s3 as s3 +import app.site.loader as loader import app.site.parser as parser env = Environment( @@ -21,7 +22,7 @@ def build_page(fn, research_posts, datasets): - syncs any assets with s3 - handles certain index pages... """ - metadata, sections = parser.read_metadata(fn) + metadata, sections = loader.read_metadata(fn) if metadata is None: print("{} has no metadata".format(fn)) @@ -55,7 +56,7 @@ def build_page(fn, research_posts, datasets): if 'index.md' in fn: s3.sync_directory(dirname, s3_dir, metadata) - content = parser.parse_markdown(sections, s3_path, skip_h1=skip_h1) + content = parser.parse_markdown(metadata, sections, s3_path, skip_h1=skip_h1) html = template.render( metadata=metadata, @@ -73,11 +74,11 @@ def build_index(key, research_posts, datasets): """ build the index of research (blog) posts """ - metadata, sections = parser.read_metadata(os.path.join(cfg.DIR_SITE_CONTENT, key, 'index.md')) + metadata, sections = loader.read_metadata(os.path.join(cfg.DIR_SITE_CONTENT, key, 'index.md')) template = env.get_template("page.html") s3_path = s3.make_s3_path(cfg.S3_SITE_PATH, metadata['path']) - content = parser.parse_markdown(sections, s3_path, skip_h1=False) - content += parser.parse_research_index(research_posts) + content = parser.parse_markdown(metadata, sections, s3_path, skip_h1=False) + content += loader.parse_research_index(research_posts) html = template.render( metadata=metadata, content=content, @@ -93,8 +94,8 @@ def build_site(): """ build the site! =^) """ - research_posts = parser.read_research_post_index() - datasets = parser.read_datasets_index() + research_posts = loader.read_research_post_index() + datasets = loader.read_datasets_index() for fn in glob.iglob(os.path.join(cfg.DIR_SITE_CONTENT, "**/*.md"), recursive=True): build_page(fn, research_posts, datasets) build_index('research', research_posts, datasets) @@ -103,7 +104,8 @@ def build_file(fn): """ build just one page from a filename! =^) """ - research_posts = parser.read_research_post_index() - datasets = parser.read_datasets_index() - fn = os.path.join(cfg.DIR_SITE_CONTENT, fn) + research_posts = loader.read_research_post_index() + datasets = loader.read_datasets_index() + if cfg.DIR_SITE_CONTENT not in fn: + fn = os.path.join(cfg.DIR_SITE_CONTENT, fn) build_page(fn, research_posts, datasets) diff --git a/megapixels/app/site/loader.py b/megapixels/app/site/loader.py new file mode 100644 index 00000000..691efb25 --- /dev/null +++ b/megapixels/app/site/loader.py @@ -0,0 +1,123 @@ +import os +import re +import glob +import simplejson as json + +import app.settings.app_cfg as cfg + +def read_metadata(fn): + """ + Read in read a markdown file and extract the metadata + """ + with open(fn, "r") as file: + data = file.read() + data = data.replace("\n ", "\n") + if "\n" in data: + data = data.replace("\r", "") + else: + data = data.replace("\r", "\n") + sections = data.split("\n\n") + return parse_metadata(fn, sections) + + +default_metadata = { + 'status': 'published', + 'title': 'Untitled Page', + 'desc': '', + 'slug': '', + 'published': '2018-12-31', + 'updated': '2018-12-31', + 'authors': 'Adam Harvey', + 'sync': 'true', + 'tagline': '', +} + +def parse_metadata(fn, sections): + """ + parse the metadata headers in a markdown file + (everything before the second ---------) + also generates appropriate urls for this page :) + """ + found_meta = False + metadata = {} + valid_sections = [] + for section in sections: + if not found_meta and ': ' in section: + found_meta = True + parse_metadata_section(metadata, section) + continue + if '-----' in section: + continue + if found_meta: + valid_sections.append(section) + + if 'title' not in metadata: + print('warning: {} has no title'.format(fn)) + for key in default_metadata: + if key not in metadata: + metadata[key] = default_metadata[key] + + basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, '')) + basename = os.path.basename(fn) + if basedir == '/': + metadata['path'] = '/' + metadata['url'] = '/' + elif basename == 'index.md': + metadata['path'] = basedir + '/' + metadata['url'] = metadata['path'] + else: + metadata['path'] = basedir + '/' + metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/' + + if metadata['status'] == 'published|draft|private': + metadata['status'] = 'published' + + metadata['sync'] = metadata['sync'] != 'false' + + metadata['author_html'] = '
'.join(metadata['authors'].split(',')) + + return metadata, valid_sections + +def parse_metadata_section(metadata, section): + """ + parse a metadata key: value pair + """ + for line in section.split("\n"): + if ': ' not in line: + continue + key, value = line.split(': ', 1) + metadata[key.lower()] = value + + +def read_research_post_index(): + """ + Generate an index of the research (blog) posts + """ + return read_post_index('research') + + +def read_datasets_index(): + """ + Generate an index of the datasets + """ + return read_post_index('datasets') + + +def read_post_index(basedir): + """ + Generate an index of posts + """ + posts = [] + for fn in sorted(glob.glob(os.path.join(cfg.DIR_SITE_CONTENT, basedir, '*/index.md'))): + metadata, valid_sections = read_metadata(fn) + if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft': + continue + posts.append(metadata) + if not len(posts): + posts.append({ + 'title': 'Placeholder', + 'slug': 'placeholder', + 'date': 'Placeholder', + 'url': '/', + }) + return posts diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index d6705214..3792e6f1 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -10,6 +10,49 @@ import app.site.s3 as s3 renderer = mistune.Renderer(escape=False) markdown = mistune.Markdown(renderer=renderer) +def parse_markdown(metadata, sections, s3_path, skip_h1=False): + """ + parse page into sections, preprocess the markdown to handle our modifications + """ + groups = [] + current_group = [] + for section in sections: + if skip_h1 and section.startswith('# '): + continue + elif section.strip().startswith('```'): + groups.append(format_section(current_group, s3_path)) + current_group = [] + current_group.append(section) + if section.strip().endswith('```'): + groups.append(format_applet("\n\n".join(current_group), s3_path)) + current_group = [] + elif section.strip().endswith('```'): + current_group.append(section) + groups.append(format_applet("\n\n".join(current_group), s3_path)) + current_group = [] + elif section.startswith('+ '): + groups.append(format_section(current_group, s3_path)) + groups.append(format_metadata(section)) + current_group = [] + elif '![fullwidth:' in section: + groups.append(format_section(current_group, s3_path)) + groups.append(format_section([section], s3_path, type='fullwidth')) + current_group = [] + elif '![wide:' in section: + groups.append(format_section(current_group, s3_path)) + groups.append(format_section([section], s3_path, type='wide')) + current_group = [] + elif '![' in section: + groups.append(format_section(current_group, s3_path)) + groups.append(format_section([section], s3_path, type='images')) + current_group = [] + else: + current_group.append(section) + groups.append(format_section(current_group, s3_path)) + content = "".join(groups) + return content + + def fix_images(lines, s3_path): """ do our own tranformation of the markdown around images to handle wide images etc @@ -32,6 +75,7 @@ def fix_images(lines, s3_path): real_lines.append(line) return "\n".join(real_lines) + def format_section(lines, s3_path, type=''): """ format a normal markdown section @@ -44,6 +88,7 @@ def format_section(lines, s3_path, type=''): return "
" + markdown(lines) + "
" return "" + def format_metadata(section): """ format a metadata section (+ key: value pairs) @@ -54,7 +99,11 @@ def format_metadata(section): meta.append("
{}
{}
".format(key, value)) return "
{}
".format(''.join(meta)) + def format_applet(section, s3_path): + """ + Format the applets, which load javascript modules like the map and CSVs + """ # print(section) payload = section.strip('```').strip().strip('```').strip().split('\n') applet = {} @@ -79,47 +128,6 @@ def format_applet(section, s3_path): applet['fields'] = payload[1:] return "
".format(json.dumps(applet)) -def parse_markdown(sections, s3_path, skip_h1=False): - """ - parse page into sections, preprocess the markdown to handle our modifications - """ - groups = [] - current_group = [] - for section in sections: - if skip_h1 and section.startswith('# '): - continue - elif section.strip().startswith('```'): - groups.append(format_section(current_group, s3_path)) - current_group = [] - current_group.append(section) - if section.strip().endswith('```'): - groups.append(format_applet("\n\n".join(current_group), s3_path)) - current_group = [] - elif section.strip().endswith('```'): - current_group.append(section) - groups.append(format_applet("\n\n".join(current_group), s3_path)) - current_group = [] - elif section.startswith('+ '): - groups.append(format_section(current_group, s3_path)) - groups.append(format_metadata(section)) - current_group = [] - elif '![fullwidth:' in section: - groups.append(format_section(current_group, s3_path)) - groups.append(format_section([section], s3_path, type='fullwidth')) - current_group = [] - elif '![wide:' in section: - groups.append(format_section(current_group, s3_path)) - groups.append(format_section([section], s3_path, type='wide')) - current_group = [] - elif '![' in section: - groups.append(format_section(current_group, s3_path)) - groups.append(format_section([section], s3_path, type='images')) - current_group = [] - else: - current_group.append(section) - groups.append(format_section(current_group, s3_path)) - content = "".join(groups) - return content def parse_research_index(research_posts): """ @@ -141,117 +149,3 @@ def parse_research_index(research_posts): content += row content += '
' return content - -def read_metadata(fn): - """ - Read in read a markdown file and extract the metadata - """ - with open(fn, "r") as file: - data = file.read() - data = data.replace("\n ", "\n") - if "\n" in data: - data = data.replace("\r", "") - else: - data = data.replace("\r", "\n") - sections = data.split("\n\n") - return parse_metadata(fn, sections) - -default_metadata = { - 'status': 'published', - 'title': 'Untitled Page', - 'desc': '', - 'slug': '', - 'published': '2018-12-31', - 'updated': '2018-12-31', - 'authors': 'Adam Harvey', - 'sync': 'true', - 'tagline': '', -} - -def parse_metadata_section(metadata, section): - """ - parse a metadata key: value pair - """ - for line in section.split("\n"): - if ': ' not in line: - continue - key, value = line.split(': ', 1) - metadata[key.lower()] = value - -def parse_metadata(fn, sections): - """ - parse the metadata headers in a markdown file - (everything before the second ---------) - also generates appropriate urls for this page :) - """ - found_meta = False - metadata = {} - valid_sections = [] - for section in sections: - if not found_meta and ': ' in section: - found_meta = True - parse_metadata_section(metadata, section) - continue - if '-----' in section: - continue - if found_meta: - valid_sections.append(section) - - if 'title' not in metadata: - print('warning: {} has no title'.format(fn)) - for key in default_metadata: - if key not in metadata: - metadata[key] = default_metadata[key] - - basedir = os.path.dirname(fn.replace(cfg.DIR_SITE_CONTENT, '')) - basename = os.path.basename(fn) - if basedir == '/': - metadata['path'] = '/' - metadata['url'] = '/' - elif basename == 'index.md': - metadata['path'] = basedir + '/' - metadata['url'] = metadata['path'] - else: - metadata['path'] = basedir + '/' - metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/' - - if metadata['status'] == 'published|draft|private': - metadata['status'] = 'published' - - metadata['sync'] = metadata['sync'] != 'false' - - metadata['author_html'] = '
'.join(metadata['authors'].split(',')) - - return metadata, valid_sections - -def read_research_post_index(): - """ - Generate an index of the research (blog) posts - """ - return read_post_index('research') - -def read_datasets_index(): - """ - Generate an index of the datasets - """ - return read_post_index('datasets') - -def read_post_index(basedir): - """ - Generate an index of posts - """ - posts = [] - for fn in sorted(glob.glob(os.path.join(cfg.DIR_SITE_CONTENT, basedir, '*/index.md'))): - metadata, valid_sections = read_metadata(fn) - if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft': - continue - posts.append(metadata) - if not len(posts): - posts.append({ - 'title': 'Placeholder', - 'slug': 'placeholder', - 'date': 'Placeholder', - 'url': '/', - }) - return posts - diff --git a/megapixels/commands/site/watch.py b/megapixels/commands/site/watch.py new file mode 100644 index 00000000..7fd3ba7c --- /dev/null +++ b/megapixels/commands/site/watch.py @@ -0,0 +1,44 @@ +""" +Watch for changes in the static site and build them +""" + +import click +import time +from watchdog.observers import Observer +from watchdog.events import PatternMatchingEventHandler + +import app.settings.app_cfg as cfg +from app.site.builder import build_site, build_file + +class SiteBuilder(PatternMatchingEventHandler): + """ + Handler for filesystem changes to the content path + """ + patterns = ["*.md"] + + def on_modified(self, event): + print(event.src_path, event.event_type) + build_file(event.src_path) + + def on_created(self, event): + print(event.src_path, event.event_type) + build_file(event.src_path) + +@click.command() +@click.pass_context +def cli(ctx): + """ + Run the observer and start watching for changes + """ + print("{} is now being watched for changes.".format(cfg.DIR_SITE_CONTENT)) + observer = Observer() + observer.schedule(SiteBuilder(), path=cfg.DIR_SITE_CONTENT, recursive=True) + observer.start() + + try: + while True: + time.sleep(1) + except KeyboardInterrupt: + observer.stop() + + observer.join() diff --git a/site/assets/css/css.css b/site/assets/css/css.css index 858d98eb..7b2e19fc 100644 --- a/site/assets/css/css.css +++ b/site/assets/css/css.css @@ -346,6 +346,7 @@ section.wide .image { } section.fullwidth { width: 100%; + background-size: contain; } section.fullwidth .image { max-width: 100%; diff --git a/site/content/pages/datasets/lfw/index.md b/site/content/pages/datasets/lfw/index.md index 8b37f035..48d86e1f 100644 --- a/site/content/pages/datasets/lfw/index.md +++ b/site/content/pages/datasets/lfw/index.md @@ -4,6 +4,8 @@ status: published title: Labeled Faces in The Wild desc: Labeled Faces in The Wild (LFW) is a database of face photographs designed for studying the problem of unconstrained face recognition subdesc: It includes 13,456 images of 4,432 people’s images copied from the Internet during 2002-2004. +image: lfw_index.gif +caption: Eighteen of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms. slug: lfw published: 2019-2-23 updated: 2019-2-23 @@ -12,22 +14,13 @@ authors: Adam Harvey ------------ -# LFW +### Statistics + Years: 2002-2004 + Images: 13,233 + Identities: 5,749 + Origin: Yahoo News Images -+ Funding: (Possibly, partially CIA*) - -![fullwidth:Eighteen of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms.](assets/lfw_index.gif) - -*Labeled Faces in The Wild* (LFW) is "a database of face photographs designed for studying the problem of unconstrained face recognition[^lfw_www]. It is used to evaluate and improve the performance of facial recognition algorithms in academic, commercial, and government research. According to BiometricUpdate.com[^lfw_pingan], LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong." - -The LFW dataset includes 13,233 images of 5,749 people that were collected between 2002-2004. LFW is a subset of *Names of Faces* and is part of the first facial recognition training dataset created entirely from images appearing on the Internet. The people appearing in LFW are... - -The *Names and Faces* dataset was the first face recognition dataset created entire from online photos. However, *Names and Faces* and *LFW* are not the first face recognition dataset created entirely "in the wild". That title belongs to the [UCD dataset](/datasets/ucd_faces/). Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer. - ++ Funding: (Possibly, partially CIA) ### Analysis @@ -39,25 +32,35 @@ The *Names and Faces* dataset was the first face recognition dataset created ent - In all 3 of the LFW publications [^lfw_original_paper], [^lfw_survey], [^lfw_tech_report] the words "ethics", "consent", and "privacy" appear 0 times - The word "future" appears 71 times +## Labeled Faces in the Wild + +*Labeled Faces in The Wild* (LFW) is "a database of face photographs designed for studying the problem of unconstrained face recognition[^lfw_www]. It is used to evaluate and improve the performance of facial recognition algorithms in academic, commercial, and government research. According to BiometricUpdate.com[^lfw_pingan], LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong." + +The LFW dataset includes 13,233 images of 5,749 people that were collected between 2002-2004. LFW is a subset of *Names of Faces* and is part of the first facial recognition training dataset created entirely from images appearing on the Internet. The people appearing in LFW are... + +The *Names and Faces* dataset was the first face recognition dataset created entire from online photos. However, *Names and Faces* and *LFW* are not the first face recognition dataset created entirely "in the wild". That title belongs to the [UCD dataset](/datasets/ucd_faces/). Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer. + ### Synthetic Faces To visualize the types of photos in the dataset without explicitly publishing individual's identities a generative adversarial network (GAN) was trained on the entire dataset. The images in this video show a neural network learning the visual latent space and then interpolating between archetypical identities within the LFW dataset. ![fullwidth:](assets/lfw_synthetic.jpg) - ### Biometric Trade Routes To understand how this dataset has been used, its citations have been geocoded to show an approximate geographic digital trade route of the biometric data. Lines indicate an organization (education, commercial, or governmental) that has cited the LFW dataset in their research. Data is compiled from [SemanticScholar](https://www.semanticscholar.org). -[add map here] +``` +map +``` ### Citations Browse or download the geocoded citation data collected for the LFW dataset. -[add citations table here] - +``` +citations +``` ### Additional Information @@ -69,27 +72,14 @@ Browse or download the geocoded citation data collected for the LFW dataset. - The faces in the LFW dataset were detected using the Viola-Jones haarcascade face detector [^lfw_website] [^lfw-survey] - The LFW dataset is used by several of the largest tech companies in the world including "Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong." [^lfw_pingan] - All images in the LFW dataset were copied from Yahoo News between 2002 - 2004 -<<<<<<< HEAD -- In 2014, two of the four original authors of the LFW dataset received funding from IARPA and ODNI for their follow up paper [Labeled Faces in the Wild: Updates and New Reporting Procedures](https://www.semanticscholar.org/paper/Labeled-Faces-in-the-Wild-%3A-Updates-and-New-Huang-Learned-Miller/2d3482dcff69c7417c7b933f22de606a0e8e42d4) via IARPA contract number 2014-14071600010 +- In 2014, two of the four original authors of the LFW dataset received funding from IARPA and ODNI for their followup paper [Labeled Faces in the Wild: Updates and New Reporting Procedures](https://www.semanticscholar.org/paper/Labeled-Faces-in-the-Wild-%3A-Updates-and-New-Huang-Learned-Miller/2d3482dcff69c7417c7b933f22de606a0e8e42d4) via IARPA contract number 2014-14071600010 - The dataset includes 2 images of [George Tenet](http://vis-www.cs.umass.edu/lfw/person/George_Tenet.html), the former Director of Central Intelligence (DCI) for the Central Intelligence Agency whose facial biometrics were eventually used to help train facial recognition software in China and Russia -======= -- In 2014, 2/4 of the original authors of the LFW dataset received funding from IARPA and ODNI for their follow up paper "Labeled Faces in the Wild: Updates and New Reporting Procedures" via IARPA contract number 2014-14071600010 -- The LFW dataset was used Center for Intelligent Information Retrieval, the Central Intelligence Agency, the National Security Agency and National - -TODO (need citations for the following) - -- SenseTime, who has relied on LFW for benchmarking their facial recognition performance, is one the leading provider of surveillance to the Chinese Government [need citation for this fact. is it the most? or is that Tencent?] -- Two out of 4 of the original authors received funding from the Office of Director of National Intelligence and IARPA for their 2016 LFW survey follow up report - ->>>>>>> 13d7a450affe8ea4f368a97ea2014faa17702a4c ![Person with the most face images in LFW: former President George W. Bush](assets/lfw_montage_top1_640.jpg) ![Persons with the next most face images in LFW: Colin Powell (236), Tony Blair (144), and Donald Rumsfeld (121)](assets/lfw_montage_top2_4_640.jpg) ![All 5,379 faces in the Labeled Faces in The Wild Dataset](assets/lfw_montage_all_crop.jpg) - - ## Code The LFW dataset is so widely used that a popular code library called Sci-Kit Learn includes a function called `fetch_lfw_people` to download the faces in the LFW dataset. @@ -133,7 +123,6 @@ imageio.imwrite('lfw_montage_960.jpg', montage) ### Supplementary Material - ``` load_file assets/lfw_commercial_use.csv name_display, company_url, example_url, country, description @@ -141,14 +130,13 @@ name_display, company_url, example_url, country, description Text and graphics ©Adam Harvey / megapixels.cc - ------- Ignore text below these lines ------- -Research +### Research - "In our experiments, we used 10000 images and associated captions from the Faces in the wilddata set [3]." - "This work was supported in part by the Center for Intelligent Information Retrieval, the Central Intelligence Agency, the National Security Agency and National Science Foundation under CAREER award IIS-0546666 and grant IIS-0326249." @@ -159,6 +147,9 @@ Research - This research is based upon work supported in part by the Office of the Director of National Intelligence (ODNI), Intelligence Advanced Research Projects Activity (IARPA), via contract number 2014-14071600010. - From "Labeled Faces in the Wild: Updates and New Reporting Procedures" +### Footnotes + [^lfw_www]: [^lfw_baidu]: Jingtuo Liu, Yafeng Deng, Tao Bai, Zhengping Wei, Chang Huang. Targeting Ultimate Accuracy: Face Recognition via Deep Embedding. [^lfw_pingan]: Lee, Justin. "PING AN Tech facial recognition receives high score in latest LFW test results". BiometricUpdate.com. Feb 13, 2017. + diff --git a/site/public/datasets/lfw/index.html b/site/public/datasets/lfw/index.html index f83d8a66..86f49c52 100644 --- a/site/public/datasets/lfw/index.html +++ b/site/public/datasets/lfw/index.html @@ -27,11 +27,8 @@
-

LFW

-
Years
2002-2004
Images
13,233
Identities
5,749
Origin
Yahoo News Images
Funding
(Possibly, partially CIA*)
Eighteen of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms.
Eighteen of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms.

Labeled Faces in The Wild (LFW) is "a database of face photographs designed for studying the problem of unconstrained face recognition[^lfw_www]. It is used to evaluate and improve the performance of facial recognition algorithms in academic, commercial, and government research. According to BiometricUpdate.com[^lfw_pingan], LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong."

-

The LFW dataset includes 13,233 images of 5,749 people that were collected between 2002-2004. LFW is a subset of Names of Faces and is part of the first facial recognition training dataset created entirely from images appearing on the Internet. The people appearing in LFW are...

-

The Names and Faces dataset was the first face recognition dataset created entire from online photos. However, Names and Faces and LFW are not the first face recognition dataset created entirely "in the wild". That title belongs to the UCD dataset. Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer.

-

Analysis

+

Statistics

+
Years
2002-2004
Images
13,233
Identities
5,749
Origin
Yahoo News Images
Funding
(Possibly, partially CIA)

Analysis

  • There are about 3 men for every 1 woman (4,277 men and 1,472 women) in the LFW dataset[^lfw_www]
  • The person with the most images is George W. Bush with 530
  • @@ -41,15 +38,17 @@
  • In all 3 of the LFW publications [^lfw_original_paper], [^lfw_survey], [^lfw_tech_report] the words "ethics", "consent", and "privacy" appear 0 times
  • The word "future" appears 71 times
+

Labeled Faces in the Wild

+

Labeled Faces in The Wild (LFW) is "a database of face photographs designed for studying the problem of unconstrained face recognition[^lfw_www]. It is used to evaluate and improve the performance of facial recognition algorithms in academic, commercial, and government research. According to BiometricUpdate.com[^lfw_pingan], LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong."

+

The LFW dataset includes 13,233 images of 5,749 people that were collected between 2002-2004. LFW is a subset of Names of Faces and is part of the first facial recognition training dataset created entirely from images appearing on the Internet. The people appearing in LFW are...

+

The Names and Faces dataset was the first face recognition dataset created entire from online photos. However, Names and Faces and LFW are not the first face recognition dataset created entirely "in the wild". That title belongs to the UCD dataset. Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer.

Synthetic Faces

To visualize the types of photos in the dataset without explicitly publishing individual's identities a generative adversarial network (GAN) was trained on the entire dataset. The images in this video show a neural network learning the visual latent space and then interpolating between archetypical identities within the LFW dataset.

Biometric Trade Routes

To understand how this dataset has been used, its citations have been geocoded to show an approximate geographic digital trade route of the biometric data. Lines indicate an organization (education, commercial, or governmental) that has cited the LFW dataset in their research. Data is compiled from SemanticScholar.

-

[add map here]

-

Citations

+

Citations

Browse or download the geocoded citation data collected for the LFW dataset.

-

[add citations table here]

-

Additional Information

+

Additional Information

(tweet-sized snippets go here)

  • The LFW dataset is considered the "most popular benchmark for face recognition" [^lfw_baidu]
  • @@ -57,27 +56,10 @@
  • All images in LFW dataset were obtained "in the wild" meaning without any consent from the subject or from the photographer
  • The faces in the LFW dataset were detected using the Viola-Jones haarcascade face detector [^lfw_website] [^lfw-survey]
  • The LFW dataset is used by several of the largest tech companies in the world including "Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong." [^lfw_pingan]
  • -
  • All images in the LFW dataset were copied from Yahoo News between 2002 - 2004 -<<<<<<< HEAD
  • -
  • In 2014, two of the four original authors of the LFW dataset received funding from IARPA and ODNI for their follow up paper Labeled Faces in the Wild: Updates and New Reporting Procedures via IARPA contract number 2014-14071600010
  • -
  • The dataset includes 2 images of George Tenet, the former Director of Central Intelligence (DCI) for the Central Intelligence Agency whose facial biometrics were eventually used to help train facial recognition software in China and Russia

    -
  • -
  • In 2014, 2/4 of the original authors of the LFW dataset received funding from IARPA and ODNI for their follow up paper "Labeled Faces in the Wild: Updates and New Reporting Procedures" via IARPA contract number 2014-14071600010
  • -
  • The LFW dataset was used Center for Intelligent Information Retrieval, the Central Intelligence Agency, the National Security Agency and National
  • -
-

TODO (need citations for the following)

-
    -
  • SenseTime, who has relied on LFW for benchmarking their facial recognition performance, is one the leading provider of surveillance to the Chinese Government [need citation for this fact. is it the most? or is that Tencent?]
  • -
  • Two out of 4 of the original authors received funding from the Office of Director of National Intelligence and IARPA for their 2016 LFW survey follow up report
  • +
  • All images in the LFW dataset were copied from Yahoo News between 2002 - 2004
  • +
  • In 2014, two of the four original authors of the LFW dataset received funding from IARPA and ODNI for their followup paper Labeled Faces in the Wild: Updates and New Reporting Procedures via IARPA contract number 2014-14071600010
  • +
  • The dataset includes 2 images of George Tenet, the former Director of Central Intelligence (DCI) for the Central Intelligence Agency whose facial biometrics were eventually used to help train facial recognition software in China and Russia
-

> 13d7a450affe8ea4f368a97ea2014faa17702a4c

-
-
-
-
-
-
-
 former President George W. Bush
former President George W. Bush
 Colin Powell (236), Tony Blair (144), and Donald Rumsfeld (121)
Colin Powell (236), Tony Blair (144), and Donald Rumsfeld (121)
All 5,379 faces in the Labeled Faces in The Wild Dataset
All 5,379 faces in the Labeled Faces in The Wild Dataset

Code

The LFW dataset is so widely used that a popular code library called Sci-Kit Learn includes a function called fetch_lfw_people to download the faces in the LFW dataset.

@@ -113,7 +95,7 @@ imageio.imwrite('lfw_montage_960.jpg', montage)

Supplementary Material

Text and graphics ©Adam Harvey / megapixels.cc

Ignore text below these lines

-

Research

+

Research

  • "In our experiments, we used 10000 images and associated captions from the Faces in the wilddata set [3]."
  • "This work was supported in part by the Center for Intelligent Information Retrieval, the Central Intelligence Agency, the National Security Agency and National Science Foundation under CAREER award IIS-0546666 and grant IIS-0326249."
  • @@ -125,6 +107,7 @@ imageio.imwrite('lfw_montage_960.jpg', montage)
  • From "Labeled Faces in the Wild: Updates and New Reporting Procedures"
+

Footnotes


    -- cgit v1.2.3-70-g09d2 From 9bac173e85865e4f0d1dba5071b40eb7ebe3dd1a Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Wed, 27 Feb 2019 22:15:03 +0100 Subject: new intro header for datasets page and sidebar --- client/index.js | 6 +-- megapixels/app/site/parser.py | 70 ++++++++++++++++++++++++++---- megapixels/commands/site/watch.py | 2 + site/assets/css/css.css | 72 ++++++++++++++++++++++++++----- site/assets/css/tabulator.css | 2 +- site/content/pages/datasets/lfw/index.md | 25 +++++------ site/content/pages/datasets/uccs/index.md | 2 +- site/public/datasets/lfw/index.html | 36 ++++------------ 8 files changed, 152 insertions(+), 63 deletions(-) (limited to 'megapixels/app/site/parser.py') diff --git a/client/index.js b/client/index.js index c9335f14..37906f30 100644 --- a/client/index.js +++ b/client/index.js @@ -110,9 +110,9 @@ function runApplets() { function main() { const paras = document.querySelectorAll('section p') - if (paras.length) { - paras[0].classList.add('first_paragraph') - } + // if (paras.length) { + // paras[0].classList.add('first_paragraph') + // } toArray(document.querySelectorAll('header .links a')).forEach(tag => { if (window.location.href.match(tag.href)) { tag.classList.add('active') diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index 3792e6f1..dc53177b 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -16,9 +16,30 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): """ groups = [] current_group = [] + in_stats = False + + if 'desc' in metadata and 'subdesc' in metadata: + groups.append(intro_section(metadata, s3_path)) + for section in sections: if skip_h1 and section.startswith('# '): continue + elif section.strip().startswith('---'): + continue + elif section.lower().strip().startswith('ignore text'): + break + elif '### Statistics' in section: + if len(current_group): + groups.append(format_section(current_group, s3_path)) + current_group = [] + current_group.append(section) + in_stats = True + elif in_stats and not section.strip().startswith('## '): + current_group.append(section) + elif in_stats and section.strip().startswith('## '): + current_group = [format_section(current_group, s3_path, 'right-sidebar', tag='div')] + current_group.append(section) + in_stats = False elif section.strip().startswith('```'): groups.append(format_section(current_group, s3_path)) current_group = [] @@ -32,7 +53,7 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): current_group = [] elif section.startswith('+ '): groups.append(format_section(current_group, s3_path)) - groups.append(format_metadata(section)) + groups.append('
    ' + format_metadata(section) + '
    ') current_group = [] elif '![fullwidth:' in section: groups.append(format_section(current_group, s3_path)) @@ -52,6 +73,32 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): content = "".join(groups) return content +def intro_section(metadata, s3_path): + """ + Build the intro section for datasets + """ + + section = "
    ".format(s3_path + metadata['image']) + section += "
    " + + parts = [] + if 'desc' in metadata: + desc = metadata['desc'] + if 'color' in metadata and metadata['title'] in desc: + desc = desc.replace(metadata['title'], "{}".format(metadata['color'], metadata['title'])) + section += "
    {}
    ".format(desc, desc) + + if 'subdesc' in metadata: + subdesc = markdown(metadata['subdesc']).replace('

    ', '').replace('

    ', '') + section += "
    {}
    ".format(subdesc, subdesc) + + section += "
    " + section += "
    " + + if 'caption' in metadata: + section += "
    {}
    ".format(metadata['caption']) + + return section def fix_images(lines, s3_path): """ @@ -75,19 +122,26 @@ def fix_images(lines, s3_path): real_lines.append(line) return "\n".join(real_lines) - -def format_section(lines, s3_path, type=''): +def format_section(lines, s3_path, type='', tag='section'): """ format a normal markdown section """ if len(lines): + lines = fix_meta(lines) lines = fix_images(lines, s3_path) if type: - return "
    {}
    ".format(type, markdown(lines)) + return "<{} class='{}'>{}".format(tag, type, markdown(lines), tag) else: - return "
    " + markdown(lines) + "
    " + return "<{}>{}".format(tag, markdown(lines), tag) return "" +def fix_meta(lines): + new_lines = [] + for line in lines: + if line.startswith('+ '): + line = format_metadata(line) + new_lines.append(line) + return new_lines def format_metadata(section): """ @@ -97,8 +151,7 @@ def format_metadata(section): for line in section.split('\n'): key, value = line[2:].split(': ', 1) meta.append("
    {}
    {}
    ".format(key, value)) - return "
    {}
    ".format(''.join(meta)) - + return "
    {}
    ".format(''.join(meta)) def format_applet(section, s3_path): """ @@ -107,12 +160,13 @@ def format_applet(section, s3_path): # print(section) payload = section.strip('```').strip().strip('```').strip().split('\n') applet = {} - print(payload) + # print(payload) if ': ' in payload[0]: command, opt = payload[0].split(': ') else: command = payload[0] opt = None + print(command) if command == 'python' or command == 'javascript' or command == 'code': return format_section([ section ], s3_path) if command == '': diff --git a/megapixels/commands/site/watch.py b/megapixels/commands/site/watch.py index 7fd3ba7c..7bd71038 100644 --- a/megapixels/commands/site/watch.py +++ b/megapixels/commands/site/watch.py @@ -35,6 +35,8 @@ def cli(ctx): observer.schedule(SiteBuilder(), path=cfg.DIR_SITE_CONTENT, recursive=True) observer.start() + build_file(cfg.DIR_SITE_CONTENT + "/datasets/lfw/index.md") + try: while True: time.sleep(1) diff --git a/site/assets/css/css.css b/site/assets/css/css.css index 7b2e19fc..fed381a7 100644 --- a/site/assets/css/css.css +++ b/site/assets/css/css.css @@ -4,12 +4,12 @@ html, body { padding: 0; width: 100%; min-height: 100%; - font-family: 'Roboto', sans-serif; - color: #b8b8b8; + font-family: 'Roboto Mono', sans-serif; + color: #eee; overflow-x: hidden; } html { - background: #191919; + background: #111111; } .content { @@ -146,8 +146,8 @@ h2 { h3 { margin: 0 0 20px 0; padding: 0; - font-size: 11pt; - font-weight: 500; + font-size: 14pt; + font-weight: 600; transition: color 0.2s cubic-bezier(0,0,1,1); } h4 { @@ -165,8 +165,15 @@ h4 { color: #fff; text-decoration: underline; } +.right-sidebar h3 { + margin: 0; + padding: 0 0 10px 0; + font-family: 'Roboto Mono'; + text-transform: uppercase; + letter-spacing: 2px; +} -th, .gray, h3, h4 { +th, .gray { font-family: 'Roboto Mono', monospace; font-weight: 400; text-transform: uppercase; @@ -201,6 +208,7 @@ section { } p { margin: 0 0 20px 0; + line-height: 2; } .content a { color: #ddd; @@ -229,10 +237,13 @@ p { } .right-sidebar { float: right; - width: 200px; + width: 240px; margin-left: 20px; + padding-top: 10px; padding-left: 20px; border-left: 1px solid #444; + font-family: 'Roboto'; + font-size: 14px; } .right-sidebar .meta { flex-direction: column; @@ -240,6 +251,9 @@ p { .right-sidebar .meta > div { margin-bottom: 10px; } +.right-sidebar ul { + margin-bottom: 10px; +} /* lists */ @@ -346,17 +360,17 @@ section.wide .image { } section.fullwidth { width: 100%; - background-size: contain; } section.fullwidth .image { max-width: 100%; } .caption { - text-align: center; + text-align: left; font-size: 9pt; - color: #888; - max-width: 620px; + color: #bbb; + max-width: 960px; margin: 10px auto 0 auto; + font-family: 'Roboto'; } /* blog index */ @@ -499,3 +513,39 @@ section.fullwidth .image { .dataset-list a:nth-child(3n+3) { background-color: rgba(255, 255, 0, 0.1); } .desktop .dataset-list .dataset:nth-child(3n+3):hover { background-color: rgba(255, 255, 0, 0.2); } + + +/* intro section for datasets */ + +section.intro_section { + font-family: 'Roboto Mono'; + width: 100%; + background-size: cover; + background-position: bottom left; + padding: 50px 0; + min-height: 60vh; + display: flex; + justify-content: center; + align-items: center; + background-color: #111111; +} +.intro_section .inner { + max-width: 960px; + margin: 0 auto; +} +.intro_section .hero_desc { + font-size: 38px; + line-height: 60px; + margin-bottom: 30px; + color: #fff; +} +.intro_section .hero_subdesc { + font-size: 18px; + line-height: 36px; + max-width: 640px; + color: #ddd; +} +.intro_section span { + box-shadow: -10px -10px #000, 10px -10px #000, 10px 10px #000, -10px 10px #000; + background: #000; +} \ No newline at end of file diff --git a/site/assets/css/tabulator.css b/site/assets/css/tabulator.css index 200f0c5c..63abf050 100755 --- a/site/assets/css/tabulator.css +++ b/site/assets/css/tabulator.css @@ -493,7 +493,7 @@ display: inline-block; position: relative; box-sizing: border-box; - padding: 4px; + padding: 10px; border-right: 1px solid #333; vertical-align: middle; white-space: nowrap; diff --git a/site/content/pages/datasets/lfw/index.md b/site/content/pages/datasets/lfw/index.md index 48d86e1f..1995e1f9 100644 --- a/site/content/pages/datasets/lfw/index.md +++ b/site/content/pages/datasets/lfw/index.md @@ -2,14 +2,14 @@ status: published title: Labeled Faces in The Wild -desc: Labeled Faces in The Wild (LFW) is a database of face photographs designed for studying the problem of unconstrained face recognition +desc: Labeled Faces in The Wild (LFW) is a database of face photographs designed for studying the problem of unconstrained face recognition. subdesc: It includes 13,456 images of 4,432 people’s images copied from the Internet during 2002-2004. -image: lfw_index.gif +image: assets/lfw_feature.jpg caption: Eighteen of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms. slug: lfw published: 2019-2-23 updated: 2019-2-23 -color: #00FF00 +color: #ff0000 authors: Adam Harvey ------------ @@ -22,12 +22,11 @@ authors: Adam Harvey + Origin: Yahoo News Images + Funding: (Possibly, partially CIA) -### Analysis +### INSIGHTS - There are about 3 men for every 1 woman (4,277 men and 1,472 women) in the LFW dataset[^lfw_www] - The person with the most images is [George W. Bush](http://vis-www.cs.umass.edu/lfw/person/George_W_Bush_comp.html) with 530 - There are about 3 George W. Bush's for every 1 [Tony Blair](http://vis-www.cs.umass.edu/lfw/person/Tony_Blair.html) -- 70% of people in the dataset have only 1 image and 29% have 2 or more images - The LFW dataset includes over 500 actors, 30 models, 10 presidents, 124 basketball players, 24 football players, 11 kings, 7 queens, and 1 [Moby](http://vis-www.cs.umass.edu/lfw/person/Moby.html) - In all 3 of the LFW publications [^lfw_original_paper], [^lfw_survey], [^lfw_tech_report] the words "ethics", "consent", and "privacy" appear 0 times - The word "future" appears 71 times @@ -40,20 +39,20 @@ The LFW dataset includes 13,233 images of 5,749 people that were collected betwe The *Names and Faces* dataset was the first face recognition dataset created entire from online photos. However, *Names and Faces* and *LFW* are not the first face recognition dataset created entirely "in the wild". That title belongs to the [UCD dataset](/datasets/ucd_faces/). Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer. -### Synthetic Faces - -To visualize the types of photos in the dataset without explicitly publishing individual's identities a generative adversarial network (GAN) was trained on the entire dataset. The images in this video show a neural network learning the visual latent space and then interpolating between archetypical identities within the LFW dataset. - -![fullwidth:](assets/lfw_synthetic.jpg) - ### Biometric Trade Routes -To understand how this dataset has been used, its citations have been geocoded to show an approximate geographic digital trade route of the biometric data. Lines indicate an organization (education, commercial, or governmental) that has cited the LFW dataset in their research. Data is compiled from [SemanticScholar](https://www.semanticscholar.org). +To understand how this dataset has been used, its citations have been geocoded to show an approximate geographic digital trade route of the biometric data. Lines indicate an organization (education, commercial, or governmental) that has cited the LFW dataset in their research. Data is compiled from [Semantic Scholar](https://www.semanticscholar.org). ``` map ``` +### Synthetic Faces + +To visualize the types of photos in the dataset without explicitly publishing individual's identities a generative adversarial network (GAN) was trained on the entire dataset. The images in this video show a neural network learning the visual latent space and then interpolating between archetypical identities within the LFW dataset. + +![fullwidth:](assets/lfw_synthetic.jpg) + ### Citations Browse or download the geocoded citation data collected for the LFW dataset. @@ -136,6 +135,7 @@ Ignore text below these lines ------- + ### Research - "In our experiments, we used 10000 images and associated captions from the Faces in the wilddata set [3]." @@ -146,6 +146,7 @@ Ignore text below these lines - This research is based upon work supported in part by the Office of the Director of National Intelligence (ODNI), Intelligence Advanced Research Projects Activity (IARPA), via contract number 2014-14071600010. - From "Labeled Faces in the Wild: Updates and New Reporting Procedures" +- 70% of people in the dataset have only 1 image and 29% have 2 or more images ### Footnotes diff --git a/site/content/pages/datasets/uccs/index.md b/site/content/pages/datasets/uccs/index.md index d40dce22..be1d2474 100644 --- a/site/content/pages/datasets/uccs/index.md +++ b/site/content/pages/datasets/uccs/index.md @@ -68,7 +68,7 @@ The more recent UCCS version of the dataset received funding from [^funding_uccs - You are welcomed to use these images for academic and journalistic use including for research papers, news stories, presentations. - Please use the following citation: -```MegaPixels.cc Adam Harvey 2013-2109.``` +```MegaPixels.cc Adam Harvey 2013-2019.``` [^funding_sb]: Sapkota, Archana and Boult, Terrance. "Large Scale Unconstrained Open Set Face Database." 2013. [^funding_uccs]: Günther, M. et. al. "Unconstrained Face Detection and Open-Set Face Recognition Challenge," 2018. Arxiv 1708.02337v3. \ No newline at end of file diff --git a/site/public/datasets/lfw/index.html b/site/public/datasets/lfw/index.html index 86f49c52..1242df0c 100644 --- a/site/public/datasets/lfw/index.html +++ b/site/public/datasets/lfw/index.html @@ -4,7 +4,7 @@ MegaPixels - + @@ -27,26 +27,26 @@
    -

    Statistics

    -
    Years
    2002-2004
    Images
    13,233
    Identities
    5,749
    Origin
    Yahoo News Images
    Funding
    (Possibly, partially CIA)

    Analysis

    +
    Labeled Faces in The Wild (LFW) is a database of face photographs designed for studying the problem of unconstrained face recognition.
    It includes 13,456 images of 4,432 people’s images copied from the Internet during 2002-2004. +
    Eighteen of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms.

    Labeled Faces in the Wild

    Labeled Faces in The Wild (LFW) is "a database of face photographs designed for studying the problem of unconstrained face recognition[^lfw_www]. It is used to evaluate and improve the performance of facial recognition algorithms in academic, commercial, and government research. According to BiometricUpdate.com[^lfw_pingan], LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong."

    The LFW dataset includes 13,233 images of 5,749 people that were collected between 2002-2004. LFW is a subset of Names of Faces and is part of the first facial recognition training dataset created entirely from images appearing on the Internet. The people appearing in LFW are...

    The Names and Faces dataset was the first face recognition dataset created entire from online photos. However, Names and Faces and LFW are not the first face recognition dataset created entirely "in the wild". That title belongs to the UCD dataset. Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer.

    -

    Synthetic Faces

    +

    Biometric Trade Routes

    +

    To understand how this dataset has been used, its citations have been geocoded to show an approximate geographic digital trade route of the biometric data. Lines indicate an organization (education, commercial, or governmental) that has cited the LFW dataset in their research. Data is compiled from Semantic Scholar.

    +

    Synthetic Faces

    To visualize the types of photos in the dataset without explicitly publishing individual's identities a generative adversarial network (GAN) was trained on the entire dataset. The images in this video show a neural network learning the visual latent space and then interpolating between archetypical identities within the LFW dataset.

    -

    Biometric Trade Routes

    -

    To understand how this dataset has been used, its citations have been geocoded to show an approximate geographic digital trade route of the biometric data. Lines indicate an organization (education, commercial, or governmental) that has cited the LFW dataset in their research. Data is compiled from SemanticScholar.

    -

    Citations

    +

    Citations

    Browse or download the geocoded citation data collected for the LFW dataset.

    Additional Information

    (tweet-sized snippets go here)

    @@ -94,24 +94,6 @@ imageio.imwrite('lfw_montage_960.jpg', montage)

    Supplementary Material

    Text and graphics ©Adam Harvey / megapixels.cc

    -

    Ignore text below these lines

    -

    Research

    -
      -
    • "In our experiments, we used 10000 images and associated captions from the Faces in the wilddata set [3]."
    • -
    • "This work was supported in part by the Center for Intelligent Information Retrieval, the Central Intelligence Agency, the National Security Agency and National Science Foundation under CAREER award IIS-0546666 and grant IIS-0326249."
    • -
    • From: "People-LDA: Anchoring Topics to People using Face Recognition" https://www.semanticscholar.org/paper/People-LDA%3A-Anchoring-Topics-to-People-using-Face-Jain-Learned-Miller/10f17534dba06af1ddab96c4188a9c98a020a459 and https://ieeexplore.ieee.org/document/4409055
    • -
    • This paper was presented at IEEE 11th ICCV conference Oct 14-21 and the main LFW paper "Labeled Faces in the Wild: A Database for Studying Face Recognition in Unconstrained Environments" was also published that same year
    • -
    • 10f17534dba06af1ddab96c4188a9c98a020a459

      -
    • -
    • This research is based upon work supported in part by the Office of the Director of National Intelligence (ODNI), Intelligence Advanced Research Projects Activity (IARPA), via contract number 2014-14071600010.

      -
    • -
    • From "Labeled Faces in the Wild: Updates and New Reporting Procedures"
    • -
    -

    Footnotes

    -
    -
    -
      -
      -- cgit v1.2.3-70-g09d2 From 421adbea75c5a4282630a7399f8b1018c4f0dd90 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Wed, 27 Feb 2019 23:02:17 +0100 Subject: parser --- megapixels/app/site/parser.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'megapixels/app/site/parser.py') diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index dc53177b..98d9f284 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -16,7 +16,9 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): """ groups = [] current_group = [] + footnotes = [] in_stats = False + ignoring = False if 'desc' in metadata and 'subdesc' in metadata: groups.append(intro_section(metadata, s3_path)) @@ -27,7 +29,16 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): elif section.strip().startswith('---'): continue elif section.lower().strip().startswith('ignore text'): - break + ignoring = True + continue + elif section.strip().startswith('### Footnotes'): + groups.append(format_section(current_group, s3_path)) + footnotes = [] + in_footnotes = True + elif in_footnotes: + footnotes.append(section) + elif ignoring: + continue elif '### Statistics' in section: if len(current_group): groups.append(format_section(current_group, s3_path)) @@ -70,9 +81,14 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): else: current_group.append(section) groups.append(format_section(current_group, s3_path)) + + if len(footnotes): + groups.append(format_footnotes(footnotes, s3_path)) + content = "".join(groups) return content + def intro_section(metadata, s3_path): """ Build the intro section for datasets @@ -100,6 +116,7 @@ def intro_section(metadata, s3_path): return section + def fix_images(lines, s3_path): """ do our own tranformation of the markdown around images to handle wide images etc @@ -122,6 +139,7 @@ def fix_images(lines, s3_path): real_lines.append(line) return "\n".join(real_lines) + def format_section(lines, s3_path, type='', tag='section'): """ format a normal markdown section @@ -135,6 +153,7 @@ def format_section(lines, s3_path, type='', tag='section'): return "<{}>{}".format(tag, markdown(lines), tag) return "" + def fix_meta(lines): new_lines = [] for line in lines: @@ -143,6 +162,7 @@ def fix_meta(lines): new_lines.append(line) return new_lines + def format_metadata(section): """ format a metadata section (+ key: value pairs) @@ -153,6 +173,13 @@ def format_metadata(section): meta.append("
      {}
      {}
      ".format(key, value)) return "
      {}
      ".format(''.join(meta)) +def format_footnotes(footnotes): + footnotes = '\n'.join(footnotes).split('\n') + for footnote in footnotes: + if not len(footnote) or '[^' not in footnote: + continue + key, footnote = footnotes.split(': ') + def format_applet(section, s3_path): """ Format the applets, which load javascript modules like the map and CSVs -- cgit v1.2.3-70-g09d2 From 1b008e4b4d11def9b13dc0a800b0d068624d43ae Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Wed, 27 Feb 2019 23:48:35 +0100 Subject: half of a footnote implementation --- megapixels/app/site/parser.py | 35 +++++++++++++++++++++++++++++------ site/assets/css/css.css | 34 ++++++++++++++++++++++++++++++++++ site/public/datasets/lfw/index.html | 15 +++++++++------ 3 files changed, 72 insertions(+), 12 deletions(-) (limited to 'megapixels/app/site/parser.py') diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index 98d9f284..ef83b655 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -18,6 +18,7 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): current_group = [] footnotes = [] in_stats = False + in_footnotes = False ignoring = False if 'desc' in metadata and 'subdesc' in metadata: @@ -33,6 +34,7 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): continue elif section.strip().startswith('### Footnotes'): groups.append(format_section(current_group, s3_path)) + current_group = [] footnotes = [] in_footnotes = True elif in_footnotes: @@ -82,10 +84,18 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): current_group.append(section) groups.append(format_section(current_group, s3_path)) + footnote_txt = '' + footnote_lookup = {} + if len(footnotes): - groups.append(format_footnotes(footnotes, s3_path)) + footnote_txt, footnote_lookup = format_footnotes(footnotes, s3_path) content = "".join(groups) + + if footnote_lookup: + for key, index in footnote_lookup.items(): + content = content.replace(key, '{}'.format(key, index, index)) + content += footnote_txt return content @@ -153,8 +163,10 @@ def format_section(lines, s3_path, type='', tag='section'): return "<{}>{}".format(tag, markdown(lines), tag) return "" - def fix_meta(lines): + """ + Format metadata sections before passing to markdown + """ new_lines = [] for line in lines: if line.startswith('+ '): @@ -162,7 +174,6 @@ def fix_meta(lines): new_lines.append(line) return new_lines - def format_metadata(section): """ format a metadata section (+ key: value pairs) @@ -173,12 +184,24 @@ def format_metadata(section): meta.append("
      {}
      {}
      ".format(key, value)) return "
      {}
      ".format(''.join(meta)) -def format_footnotes(footnotes): +def format_footnotes(footnotes, s3_path): + """ + Format the footnotes section separately and produce a lookup we can use to update the main site + """ footnotes = '\n'.join(footnotes).split('\n') + index = 1 + footnote_index_lookup = {} + footnote_list = [] for footnote in footnotes: if not len(footnote) or '[^' not in footnote: continue - key, footnote = footnotes.split(': ') + key, note = footnote.split(': ', 1) + footnote_index_lookup[key] = index + footnote_list.append('^'.format(key) + markdown(note)) + index += 1 + + footnote_txt = '
      • ' + '
      • '.join(footnote_list) + '
      ' + return footnote_txt, footnote_index_lookup def format_applet(section, s3_path): """ @@ -189,7 +212,7 @@ def format_applet(section, s3_path): applet = {} # print(payload) if ': ' in payload[0]: - command, opt = payload[0].split(': ') + command, opt = payload[0].split(': ', 1) else: command = payload[0] opt = None diff --git a/site/assets/css/css.css b/site/assets/css/css.css index fed381a7..8b4241ea 100644 --- a/site/assets/css/css.css +++ b/site/assets/css/css.css @@ -548,4 +548,38 @@ section.intro_section { .intro_section span { box-shadow: -10px -10px #000, 10px -10px #000, 10px 10px #000, -10px 10px #000; background: #000; +} + +/* footnotes */ + +a.footnote { + font-size: 10px; + position: relative; + display: inline-block; + bottom: 10px; + text-decoration: none; + color: #ff0; + left: 2px; +} +.right-sidebar a.footnote { + bottom: 8px; +} +.desktop a.footnote:hover { + background-color: #ff0; + color: #000; +} +a.footnote_anchor { + font-weight: bold; + color: #ff0; + margin-right: 10px; + text-decoration: underline; + cursor: pointer; +} +ul.footnotes { + list-style-type: decimal; + margin-left: 30px; +} +li p { + margin: 0; padding: 0; + display: inline; } \ No newline at end of file diff --git a/site/public/datasets/lfw/index.html b/site/public/datasets/lfw/index.html index 1242df0c..54b6aa22 100644 --- a/site/public/datasets/lfw/index.html +++ b/site/public/datasets/lfw/index.html @@ -31,7 +31,7 @@
      Eighteen of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms.

      Labeled Faces in the Wild

      -

      Labeled Faces in The Wild (LFW) is "a database of face photographs designed for studying the problem of unconstrained face recognition[^lfw_www]. It is used to evaluate and improve the performance of facial recognition algorithms in academic, commercial, and government research. According to BiometricUpdate.com[^lfw_pingan], LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong."

      +

      Labeled Faces in The Wild (LFW) is "a database of face photographs designed for studying the problem of unconstrained face recognition1. It is used to evaluate and improve the performance of facial recognition algorithms in academic, commercial, and government research. According to BiometricUpdate.com3, LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong."

      The LFW dataset includes 13,233 images of 5,749 people that were collected between 2002-2004. LFW is a subset of Names of Faces and is part of the first facial recognition training dataset created entirely from images appearing on the Internet. The people appearing in LFW are...

      The Names and Faces dataset was the first face recognition dataset created entire from online photos. However, Names and Faces and LFW are not the first face recognition dataset created entirely "in the wild". That title belongs to the UCD dataset. Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer.

      Biometric Trade Routes

      @@ -51,11 +51,11 @@

      Additional Information

      (tweet-sized snippets go here)

        -
      • The LFW dataset is considered the "most popular benchmark for face recognition" [^lfw_baidu]
      • -
      • The LFW dataset is "the most widely used evaluation set in the field of facial recognition" [^lfw_pingan]
      • +
      • The LFW dataset is considered the "most popular benchmark for face recognition" 2
      • +
      • The LFW dataset is "the most widely used evaluation set in the field of facial recognition" 3
      • All images in LFW dataset were obtained "in the wild" meaning without any consent from the subject or from the photographer
      • The faces in the LFW dataset were detected using the Viola-Jones haarcascade face detector [^lfw_website] [^lfw-survey]
      • -
      • The LFW dataset is used by several of the largest tech companies in the world including "Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong." [^lfw_pingan]
      • +
      • The LFW dataset is used by several of the largest tech companies in the world including "Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong." 3
      • All images in the LFW dataset were copied from Yahoo News between 2002 - 2004
      • In 2014, two of the four original authors of the LFW dataset received funding from IARPA and ODNI for their followup paper Labeled Faces in the Wild: Updates and New Reporting Procedures via IARPA contract number 2014-14071600010
      • The dataset includes 2 images of George Tenet, the former Director of Central Intelligence (DCI) for the Central Intelligence Agency whose facial biometrics were eventually used to help train facial recognition software in China and Russia
      • @@ -94,7 +94,10 @@ imageio.imwrite('lfw_montage_960.jpg', montage)

      Supplementary Material

      Text and graphics ©Adam Harvey / megapixels.cc

      -
      +
      -- cgit v1.2.3-70-g09d2 From 6711fb0c58e969284e3fcf94bb163c77445e2e13 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Thu, 28 Feb 2019 15:56:04 +0100 Subject: footnote back and forth navigation --- client/util/index.js | 4 ++ megapixels/app/site/parser.py | 17 +++++++- site/assets/css/css.css | 66 +++++++++++++++++++++++--------- site/content/pages/datasets/lfw/index.md | 2 +- site/public/datasets/lfw/index.html | 18 ++++----- 5 files changed, 77 insertions(+), 30 deletions(-) (limited to 'megapixels/app/site/parser.py') diff --git a/client/util/index.js b/client/util/index.js index d0db0d98..0792e24e 100644 --- a/client/util/index.js +++ b/client/util/index.js @@ -5,12 +5,16 @@ export const isiPad = !!(navigator.userAgent.match(/iPad/i)) export const isAndroid = !!(navigator.userAgent.match(/Android/i)) export const isMobile = isiPhone || isiPad || isAndroid export const isDesktop = !isMobile +export const isFirefox = typeof InstallTrigger !== 'undefined' export const toArray = a => Array.prototype.slice.apply(a) export const choice = a => a[Math.floor(Math.random() * a.length)] const htmlClassList = document.body.parentNode.classList htmlClassList.add(isDesktop ? 'desktop' : 'mobile') +if (isFirefox) { + htmlClassList.add('firefox') +} /* Default image dimensions */ diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index ef83b655..9e904e00 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -10,6 +10,8 @@ import app.site.s3 as s3 renderer = mistune.Renderer(escape=False) markdown = mistune.Markdown(renderer=renderer) +footnote_count = 0 + def parse_markdown(metadata, sections, s3_path, skip_h1=False): """ parse page into sections, preprocess the markdown to handle our modifications @@ -94,7 +96,18 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): if footnote_lookup: for key, index in footnote_lookup.items(): - content = content.replace(key, '{}'.format(key, index, index)) + global footnote_count + footnote_count = 0 + letters = "abcdefghijklmnopqrstuvwxyz" + footnote_backlinks = [] + def footnote_tag(match): + global footnote_count + footnote_count += 1 + footnote_backlinks.append('{}'.format(key, footnote_count, letters[footnote_count-1])) + return ' {}'.format(key, footnote_count, key, index, index) + key_regex = re.compile(key.replace('[', '\\[').replace('^', '\\^').replace(']', '\\]')) + content = key_regex.sub(footnote_tag, content) + footnote_txt = footnote_txt.replace("{}_BACKLINKS".format(index), "".join(footnote_backlinks)) content += footnote_txt return content @@ -197,7 +210,7 @@ def format_footnotes(footnotes, s3_path): continue key, note = footnote.split(': ', 1) footnote_index_lookup[key] = index - footnote_list.append('^'.format(key) + markdown(note)) + footnote_list.append('{}_BACKLINKS'.format(key, index) + markdown(note)) index += 1 footnote_txt = '
      • ' + '
      • '.join(footnote_list) + '
      ' diff --git a/site/assets/css/css.css b/site/assets/css/css.css index 0afa3725..4b42657b 100644 --- a/site/assets/css/css.css +++ b/site/assets/css/css.css @@ -16,7 +16,8 @@ html { opacity: 0; transition: opacity 0.2s cubic-bezier(0,1,1,1); } -html.desktop .content, html.mobile .content { +html.desktop .content, +html.mobile .content { opacity: 1; } @@ -28,7 +29,7 @@ header { left: 0; width: 100%; height: 70px; - z-index: 2; + z-index: 9999; background: #1e1e1e; display: flex; flex-direction: row; @@ -53,8 +54,10 @@ header .logo { height: 30px; } header .site_name { + font-family: 'Roboto', sans-serif; font-weight: bold; color: #fff; + font-size: 14px; } header .sub { margin-left: 4px; @@ -148,7 +151,7 @@ h3 { margin: 0 0 20px 0; padding: 0; font-size: 14pt; - font-weight: 600; + font-weight: 500; transition: color 0.2s cubic-bezier(0,0,1,1); } h4 { @@ -170,6 +173,8 @@ h4 { margin: 0; padding: 0 0 10px 0; font-family: 'Roboto Mono'; + font-weight: 400; + font-size: 11px; text-transform: uppercase; letter-spacing: 2px; } @@ -210,13 +215,17 @@ section { p { margin: 0 0 20px 0; line-height: 2; + font-size: 15px; + font-weight: 400; } .content a { - color: #ff0; + color: #fff; + text-decoration: none; + border-bottom: 1px dashed; transition: color 0.2s cubic-bezier(0,0,1,1); } -.content a:hover { - color: #fff; +.desktop .content a:hover { + color: #ff8; } /* top of post metadata */ @@ -368,7 +377,7 @@ section.fullwidth .image { .caption { text-align: left; font-size: 9pt; - color: #bbb; + color: #999; max-width: 960px; margin: 10px auto 0 auto; font-family: 'Roboto'; @@ -538,17 +547,22 @@ section.intro_section { font-size: 38px; line-height: 60px; margin-bottom: 30px; - color: #fff; + color: #ddd; + font-weight: 300; } .intro_section .hero_subdesc { font-size: 18px; line-height: 36px; max-width: 640px; + font-weight: 300; color: #ddd; } -.intro_section span { - box-shadow: -10px -10px #000, 10px -10px #000, 10px 10px #000, -10px 10px #000; - background: #000; +.intro_section div > span { + box-shadow: -10px -10px #1e1e1e, 10px -10px #1e1e1e, 10px 10px #1e1e1e, -10px 10px #1e1e1e; + background: #1e1e1e; +} +.firefox .intro_section div > span { + box-decoration-break: clone; } /* footnotes */ @@ -559,22 +573,38 @@ a.footnote { display: inline-block; bottom: 10px; text-decoration: none; - color: #ff0; + color: #ff8; + border: 0; left: 2px; + transition-duration: 0s; +} +a.footnote_shim { + display: inline-block; + width: 1px; height: 1px; + overflow: hidden; + position: relative; + top: -90px; + visibility: hidden; } .right-sidebar a.footnote { bottom: 8px; } .desktop a.footnote:hover { - background-color: #ff0; + background-color: #ff8; color: #000; } -a.footnote_anchor { - font-weight: bold; - color: #ff0; +.backlinks { margin-right: 10px; - text-decoration: underline; - cursor: pointer; +} +.content .backlinks a { + color: #ff8; + font-size: 10px; + text-decoration: none; + border: 0; + font-weight: bold; + position: relative; + bottom: 5px; + margin-right: 2px; } ul.footnotes { list-style-type: decimal; diff --git a/site/content/pages/datasets/lfw/index.md b/site/content/pages/datasets/lfw/index.md index 1995e1f9..972fafe2 100644 --- a/site/content/pages/datasets/lfw/index.md +++ b/site/content/pages/datasets/lfw/index.md @@ -5,7 +5,7 @@ title: Labeled Faces in The Wild desc: Labeled Faces in The Wild (LFW) is a database of face photographs designed for studying the problem of unconstrained face recognition. subdesc: It includes 13,456 images of 4,432 people’s images copied from the Internet during 2002-2004. image: assets/lfw_feature.jpg -caption: Eighteen of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms. +caption: A few of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms. slug: lfw published: 2019-2-23 updated: 2019-2-23 diff --git a/site/public/datasets/lfw/index.html b/site/public/datasets/lfw/index.html index 54b6aa22..08ec8ee3 100644 --- a/site/public/datasets/lfw/index.html +++ b/site/public/datasets/lfw/index.html @@ -28,10 +28,10 @@
      Labeled Faces in The Wild (LFW) is a database of face photographs designed for studying the problem of unconstrained face recognition.
      It includes 13,456 images of 4,432 people’s images copied from the Internet during 2002-2004. -
      Eighteen of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms.
      A few of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms.

      Labeled Faces in the Wild

      -

      Labeled Faces in The Wild (LFW) is "a database of face photographs designed for studying the problem of unconstrained face recognition1. It is used to evaluate and improve the performance of facial recognition algorithms in academic, commercial, and government research. According to BiometricUpdate.com3, LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong."

      +

      Labeled Faces in The Wild (LFW) is "a database of face photographs designed for studying the problem of unconstrained face recognition 1. It is used to evaluate and improve the performance of facial recognition algorithms in academic, commercial, and government research. According to BiometricUpdate.com 3, LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong."

      The LFW dataset includes 13,233 images of 5,749 people that were collected between 2002-2004. LFW is a subset of Names of Faces and is part of the first facial recognition training dataset created entirely from images appearing on the Internet. The people appearing in LFW are...

      The Names and Faces dataset was the first face recognition dataset created entire from online photos. However, Names and Faces and LFW are not the first face recognition dataset created entirely "in the wild". That title belongs to the UCD dataset. Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer.

      Biometric Trade Routes

      @@ -51,11 +51,11 @@

      Additional Information

      (tweet-sized snippets go here)

        -
      • The LFW dataset is considered the "most popular benchmark for face recognition" 2
      • -
      • The LFW dataset is "the most widely used evaluation set in the field of facial recognition" 3
      • +
      • The LFW dataset is considered the "most popular benchmark for face recognition" 2
      • +
      • The LFW dataset is "the most widely used evaluation set in the field of facial recognition" 3
      • All images in LFW dataset were obtained "in the wild" meaning without any consent from the subject or from the photographer
      • The faces in the LFW dataset were detected using the Viola-Jones haarcascade face detector [^lfw_website] [^lfw-survey]
      • -
      • The LFW dataset is used by several of the largest tech companies in the world including "Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong." 3
      • +
      • The LFW dataset is used by several of the largest tech companies in the world including "Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong." 3
      • All images in the LFW dataset were copied from Yahoo News between 2002 - 2004
      • In 2014, two of the four original authors of the LFW dataset received funding from IARPA and ODNI for their followup paper Labeled Faces in the Wild: Updates and New Reporting Procedures via IARPA contract number 2014-14071600010
      • The dataset includes 2 images of George Tenet, the former Director of Central Intelligence (DCI) for the Central Intelligence Agency whose facial biometrics were eventually used to help train facial recognition software in China and Russia
      • @@ -94,9 +94,9 @@ imageio.imwrite('lfw_montage_960.jpg', montage)

      Supplementary Material

      Text and graphics ©Adam Harvey / megapixels.cc

      -
      -- cgit v1.2.3-70-g09d2 From c33406d4da0f03a986db62b0d6b75c5a70114abe Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Thu, 28 Feb 2019 17:29:16 +0100 Subject: sidebar on about pages --- megapixels/app/site/parser.py | 12 +++++++----- site/assets/css/css.css | 4 ++++ site/content/pages/about/credits.md | 11 +++++++++++ site/content/pages/about/disclaimer.md | 11 +++++++++++ site/content/pages/about/index.md | 24 +++++++++++++++++++----- site/content/pages/about/press.md | 11 +++++++++++ site/content/pages/about/privacy.md | 10 ++++++++++ site/content/pages/about/terms.md | 12 +++++++++++- site/public/about/credits/index.html | 10 +++++++++- site/public/about/disclaimer/index.html | 10 +++++++++- site/public/about/index.html | 15 ++++++++++++--- site/public/about/press/index.html | 10 +++++++++- site/public/about/privacy/index.html | 11 +++++++++-- site/public/about/terms/index.html | 12 ++++++++++-- 14 files changed, 142 insertions(+), 21 deletions(-) (limited to 'megapixels/app/site/parser.py') diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index 9e904e00..b8bbf289 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -43,17 +43,19 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): footnotes.append(section) elif ignoring: continue - elif '### Statistics' in section: + elif '### statistics' in section.lower() or '### sidebar' in section.lower(): if len(current_group): groups.append(format_section(current_group, s3_path)) current_group = [] - current_group.append(section) + if 'sidebar' not in section.lower(): + current_group.append(section) in_stats = True - elif in_stats and not section.strip().startswith('## '): + elif in_stats and not section.strip().startswith('## ') and 'end sidebar' not in section.lower(): current_group.append(section) - elif in_stats and section.strip().startswith('## '): + elif in_stats and section.strip().startswith('## ') or 'end sidebar' in section.lower(): current_group = [format_section(current_group, s3_path, 'right-sidebar', tag='div')] - current_group.append(section) + if 'end sidebar' not in section.lower(): + current_group.append(section) in_stats = False elif section.strip().startswith('```'): groups.append(format_section(current_group, s3_path)) diff --git a/site/assets/css/css.css b/site/assets/css/css.css index d710b3a8..ee99e13e 100644 --- a/site/assets/css/css.css +++ b/site/assets/css/css.css @@ -265,6 +265,10 @@ p { margin-bottom: 10px; color: #aaa; } +.right-sidebar ul:first-child a { + text-decoration: none; + border-bottom: 1px solid; +} /* lists */ diff --git a/site/content/pages/about/credits.md b/site/content/pages/about/credits.md index 2d16155c..3cd0b05b 100644 --- a/site/content/pages/about/credits.md +++ b/site/content/pages/about/credits.md @@ -12,6 +12,17 @@ authors: Adam Harvey # Credits +### Sidebar + +- [About](/about/) +- [Press](/about/press/) +- [Credits](/about/credits/) +- [Disclaimer](/about/disclaimer/) +- [Terms and Conditions](/about/terms/) +- [Privacy Policy](/about/privacy/) + +## End Sidebar + - MegaPixels by Adam Harvey - Made with support from Mozilla - Site developed by Jules Laplace diff --git a/site/content/pages/about/disclaimer.md b/site/content/pages/about/disclaimer.md index 64ce9f21..27cf6760 100644 --- a/site/content/pages/about/disclaimer.md +++ b/site/content/pages/about/disclaimer.md @@ -12,6 +12,17 @@ authors: Adam Harvey # Disclaimer +### Sidebar + +- [About](/about/) +- [Press](/about/press/) +- [Credits](/about/credits/) +- [Disclaimer](/about/disclaimer/) +- [Terms and Conditions](/about/terms/) +- [Privacy Policy](/about/privacy/) + +## End Sidebar + Last updated: December 04, 2018 The information contained on MegaPixels.cc website (the "Service") is for academic and artistic purposes only. diff --git a/site/content/pages/about/index.md b/site/content/pages/about/index.md index f9c6f83a..d3f5874d 100644 --- a/site/content/pages/about/index.md +++ b/site/content/pages/about/index.md @@ -12,18 +12,32 @@ authors: Adam Harvey # About MegaPixels -MegaPixels aims to answers to these questions and reveal the stories behind the millions of images used to train, evaluate, and power the facial recognition surveillance algorithms used today. MegaPixels is authored by Adam Harvey, developed in collaboration with Jules LaPlace, and produced in partnership with Mozilla. +### Sidebar -MegaPixels aims to answers to these questions and reveal the stories behind the millions of images used to train, evaluate, and power the facial recognition surveillance algorithms used today. MegaPixels is authored by Adam Harvey, developed in collaboration with Jules LaPlace, and produced in partnership with Mozilla. +- [Press](/about/press/) +- [Credits](/about/credits/) +- [Disclaimer](/about/disclaimer/) +- [Terms and Conditions](/about/terms/) +- [Privacy Policy](/about/privacy/) -+ Years: 2002-2004 ++ Years: 2002-2019 + Datasets Analyzed: 325 + Author: Adam Harvey + Development: Jules LaPlace + Research Assistance: Berit Gilma -![Adam Harvey](assets/adam-harvey.jpg) **Adam Harvey** is an American artist and researcher based in Berlin. His previous projects (CV Dazzle, Stealth Wear, and SkyLift) explore the potential for countersurveillance as artwork. He is the founder of VFRAME (visual forensics software for human rights groups), the recipient of 2 PrototypeFund awards, and is currently a researcher in residence at Karlsruhe HfG studying artifical intelligence and datasets. +## End Sidebar -![Adam Harvey](assets/jules-laplace.jpg) **Jules LaPlace** is an American technologist and artist also based in Berlin. He was previously the CTO for a NYC digital agency and currently works at VFRAME, developing computer vision for human rights groups, and as a freelance technologists for artists. +MegaPixels aims to answer to these questions and reveal the stories behind the millions of images used to train, evaluate, and power the facial recognition surveillance algorithms used today. MegaPixels is authored by Adam Harvey, developed in collaboration with Jules LaPlace, and produced in partnership with Mozilla. + +MegaPixels aims to answer to these questions and reveal the stories behind the millions of images used to train, evaluate, and power the facial recognition surveillance algorithms used today. MegaPixels is authored by Adam Harvey, developed in collaboration with Jules LaPlace, and produced in partnership with Mozilla. + +![Adam Harvey](assets/adam-harvey.jpg) + +**Adam Harvey** is an American artist and researcher based in Berlin. His previous projects (CV Dazzle, Stealth Wear, and SkyLift) explore the potential for countersurveillance as artwork. He is the founder of VFRAME (visual forensics software for human rights groups), the recipient of 2 PrototypeFund awards, and is currently a researcher in residence at Karlsruhe HfG studying artifical intelligence and datasets. + +![Jules LaPlace](assets/jules-laplace.jpg) + +**Jules LaPlace** is an American artist and technologist also based in Berlin. He was previously the CTO of a NYC digital agency and currently works at VFRAME, developing computer vision for human rights groups, and building creative software for artists. **Mozilla** is a free software community founded in 1998 by members of Netscape. The Mozilla community uses, develops, spreads and supports Mozilla products, thereby promoting exclusively free software and open standards, with only minor exceptions. The community is supported institutionally by the not-for-profit Mozilla Foundation and its tax-paying subsidiary, the Mozilla Corporation. \ No newline at end of file diff --git a/site/content/pages/about/press.md b/site/content/pages/about/press.md index 2e3fa9a7..0e3124d0 100644 --- a/site/content/pages/about/press.md +++ b/site/content/pages/about/press.md @@ -13,6 +13,17 @@ authors: Adam Harvey # Press +### Sidebar + +- [About](/about/) +- [Press](/about/press/) +- [Credits](/about/credits/) +- [Disclaimer](/about/disclaimer/) +- [Terms and Conditions](/about/terms/) +- [Privacy Policy](/about/privacy/) + +## End Sidebar + ![alt text](assets/test.jpg) - Aug 22, 2018: "Transgender YouTubers had their videos grabbed to train facial recognition software" by James Vincent diff --git a/site/content/pages/about/privacy.md b/site/content/pages/about/privacy.md index 17d1b707..9685a189 100644 --- a/site/content/pages/about/privacy.md +++ b/site/content/pages/about/privacy.md @@ -12,6 +12,16 @@ authors: Adam Harvey # Privacy Policy +### Sidebar + +- [About](/about/) +- [Press](/about/press/) +- [Credits](/about/credits/) +- [Disclaimer](/about/disclaimer/) +- [Terms and Conditions](/about/terms/) +- [Privacy Policy](/about/privacy/) + +## End Sidebar A summary of our privacy policy is as follows: diff --git a/site/content/pages/about/terms.md b/site/content/pages/about/terms.md index 3735ff08..6ad03bc1 100644 --- a/site/content/pages/about/terms.md +++ b/site/content/pages/about/terms.md @@ -11,8 +11,18 @@ authors: Adam Harvey ------------ -Terms and Conditions ("Terms") +# Terms and Conditions ("Terms") +### Sidebar + +- [About](/about/) +- [Press](/about/press/) +- [Credits](/about/credits/) +- [Disclaimer](/about/disclaimer/) +- [Terms and Conditions](/about/terms/) +- [Privacy Policy](/about/privacy/) + +## End Sidebar Last updated: December 04, 2018 diff --git a/site/public/about/credits/index.html b/site/public/about/credits/index.html index fecc6c7b..6e4f06c1 100644 --- a/site/public/about/credits/index.html +++ b/site/public/about/credits/index.html @@ -28,7 +28,15 @@

      Credits

      -
        +
      • MegaPixels by Adam Harvey
      • Made with support from Mozilla
      • Site developed by Jules Laplace
      • diff --git a/site/public/about/disclaimer/index.html b/site/public/about/disclaimer/index.html index a108baa0..b93194fa 100644 --- a/site/public/about/disclaimer/index.html +++ b/site/public/about/disclaimer/index.html @@ -28,7 +28,15 @@

        Disclaimer

        -

        Last updated: December 04, 2018

        +

        Last updated: December 04, 2018

        The information contained on MegaPixels.cc website (the "Service") is for academic and artistic purposes only.

        MegaPixels.cc assumes no responsibility for errors or omissions in the contents on the Service.

        In no event shall MegaPixels.cc be liable for any special, direct, indirect, consequential, or incidental damages or any damages whatsoever, whether in an action of contract, negligence or other tort, arising out of or in connection with the use of the Service or the contents of the Service. MegaPixels.cc reserves the right to make additions, deletions, or modification to the contents on the Service at any time without prior notice.

        diff --git a/site/public/about/index.html b/site/public/about/index.html index 4a5ca926..2a0bc6c3 100644 --- a/site/public/about/index.html +++ b/site/public/about/index.html @@ -28,9 +28,18 @@

        About MegaPixels

        -

        MegaPixels aims to answers to these questions and reveal the stories behind the millions of images used to train, evaluate, and power the facial recognition surveillance algorithms used today. MegaPixels is authored by Adam Harvey, developed in collaboration with Jules LaPlace, and produced in partnership with Mozilla.

        -

        MegaPixels aims to answers to these questions and reveal the stories behind the millions of images used to train, evaluate, and power the facial recognition surveillance algorithms used today. MegaPixels is authored by Adam Harvey, developed in collaboration with Jules LaPlace, and produced in partnership with Mozilla.

        -
        Years
        2002-2004
        Datasets Analyzed
        325
        Author
        Adam Harvey
        Development
        Jules LaPlace
        Research Assistance
        Berit Gilma
        Adam Harvey
        Adam Harvey
        Adam Harvey
        Adam Harvey

        Mozilla is a free software community founded in 1998 by members of Netscape. The Mozilla community uses, develops, spreads and supports Mozilla products, thereby promoting exclusively free software and open standards, with only minor exceptions. The community is supported institutionally by the not-for-profit Mozilla Foundation and its tax-paying subsidiary, the Mozilla Corporation.

        +

        MegaPixels aims to answer to these questions and reveal the stories behind the millions of images used to train, evaluate, and power the facial recognition surveillance algorithms used today. MegaPixels is authored by Adam Harvey, developed in collaboration with Jules LaPlace, and produced in partnership with Mozilla.

        +

        MegaPixels aims to answer to these questions and reveal the stories behind the millions of images used to train, evaluate, and power the facial recognition surveillance algorithms used today. MegaPixels is authored by Adam Harvey, developed in collaboration with Jules LaPlace, and produced in partnership with Mozilla.

        +
        Adam Harvey
        Adam Harvey

        Adam Harvey is an American artist and researcher based in Berlin. His previous projects (CV Dazzle, Stealth Wear, and SkyLift) explore the potential for countersurveillance as artwork. He is the founder of VFRAME (visual forensics software for human rights groups), the recipient of 2 PrototypeFund awards, and is currently a researcher in residence at Karlsruhe HfG studying artifical intelligence and datasets.

        +
        Jules LaPlace
        Jules LaPlace

        Jules LaPlace is an American artist and technologist also based in Berlin. He was previously the CTO of a NYC digital agency and currently works at VFRAME, developing computer vision for human rights groups, and building creative software for artists.

        +

        Mozilla is a free software community founded in 1998 by members of Netscape. The Mozilla community uses, develops, spreads and supports Mozilla products, thereby promoting exclusively free software and open standards, with only minor exceptions. The community is supported institutionally by the not-for-profit Mozilla Foundation and its tax-paying subsidiary, the Mozilla Corporation.

        diff --git a/site/public/about/press/index.html b/site/public/about/press/index.html index a1d9d4f5..d36b6bc6 100644 --- a/site/public/about/press/index.html +++ b/site/public/about/press/index.html @@ -28,7 +28,15 @@

        Press

        -
        alt text
        alt text
          +
        alt text
        alt text
        • Aug 22, 2018: "Transgender YouTubers had their videos grabbed to train facial recognition software" by James Vincent https://www.theverge.com/2017/8/22/16180080/transgender-youtubers-ai-facial-recognition-dataset
        • Aug 22, 2018: "Transgender YouTubers had their videos grabbed to train facial recognition software" by James Vincent https://www.theverge.com/2017/8/22/16180080/transgender-youtubers-ai-facial-recognition-dataset
        • Aug 22, 2018: "Transgender YouTubers had their videos grabbed to train facial recognition software" by James Vincent https://www.theverge.com/2017/8/22/16180080/transgender-youtubers-ai-facial-recognition-dataset diff --git a/site/public/about/privacy/index.html b/site/public/about/privacy/index.html index 92a1b9a8..1b3b9d2f 100644 --- a/site/public/about/privacy/index.html +++ b/site/public/about/privacy/index.html @@ -28,10 +28,17 @@

          Privacy Policy

          -

          A summary of our privacy policy is as follows:

          +

          A summary of our privacy policy is as follows:

          The MegaPixels site does not use any analytics programs or collect any data besides the necessary IP address of your connection, which are deleted every 30 days and used only for security and to prevent misuse.

          The image processing sections of the site do not collect any data whatsoever. All processing takes place in temporary memory (RAM) and then is displayed back to the user over a SSL secured HTTPS connection. It is the sole responsibility of the user whether they discard, by closing the page, or share their analyzed information and any potential consequences that may arise from doing so.

          -

          A more complete legal version is below:

          This is a boilerplate Privacy policy from https://termsfeed.com/

          Needs to be reviewed

          diff --git a/site/public/about/terms/index.html b/site/public/about/terms/index.html index fd17b4d9..8bd6e738 100644 --- a/site/public/about/terms/index.html +++ b/site/public/about/terms/index.html @@ -27,8 +27,16 @@
          -

          Terms and Conditions ("Terms")

          -

          Last updated: December 04, 2018

          +

          Terms and Conditions ("Terms")

          +

          Last updated: December 04, 2018

          Please read these Terms and Conditions ("Terms", "Terms and Conditions") carefully before using the MegaPixels website (the "Service") operated by megapixels.cc ("us", "we", or "our").

          Your access to and use of the Service is conditioned on your acceptance of and compliance with these Terms.

          By accessing or using the Service you agree to be bound by these Terms. If you disagree with any part of the terms then you may not access the Service.

          -- cgit v1.2.3-70-g09d2 From 18e595bdaf64417622d12fcbe9b5af96ac935ab3 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Thu, 28 Feb 2019 17:39:22 +0100 Subject: special case adam/jules sideimages --- megapixels/app/site/parser.py | 9 ++++++--- site/assets/css/css.css | 11 ++++++++++- site/content/pages/about/index.md | 8 +++----- site/public/about/index.html | 6 +++--- 4 files changed, 22 insertions(+), 12 deletions(-) (limited to 'megapixels/app/site/parser.py') diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index b8bbf289..c17d3b8a 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -144,7 +144,7 @@ def intro_section(metadata, s3_path): def fix_images(lines, s3_path): """ - do our own tranformation of the markdown around images to handle wide images etc + do our own transformation of the markdown around images to handle wide images etc lines: markdown lines """ real_lines = [] @@ -154,10 +154,13 @@ def fix_images(lines, s3_path): line = line.replace('![', '') alt_text, tail = line.split('](', 1) url, tail = tail.split(')', 1) + tag = '' if ':' in alt_text: - tail, alt_text = alt_text.split(':', 1) + tag, alt_text = alt_text.split(':', 1) img_tag = "{}".format(s3_path + url, alt_text.replace("'", "")) - if len(alt_text): + if 'sideimage' in tag: + line = "
          {}
          {}
          ".format(img_tag, markdown(tail)) + elif len(alt_text): line = "
          {}
          {}
          ".format(img_tag, alt_text) else: line = "
          {}
          ".format(img_tag, alt_text) diff --git a/site/assets/css/css.css b/site/assets/css/css.css index ee99e13e..29833be7 100644 --- a/site/assets/css/css.css +++ b/site/assets/css/css.css @@ -387,7 +387,16 @@ section.fullwidth .image { margin: 10px auto 0 auto; font-family: 'Roboto'; } - +.sideimage { + margin: 10px 0; + display: flex; + flex-direction: row; + justify-content: flex-start; + align-items: flex-start; +} +.sideimage img { + margin-right: 10px; +} /* blog index */ .research_index { diff --git a/site/content/pages/about/index.md b/site/content/pages/about/index.md index d3f5874d..861cfd07 100644 --- a/site/content/pages/about/index.md +++ b/site/content/pages/about/index.md @@ -32,12 +32,10 @@ MegaPixels aims to answer to these questions and reveal the stories behind the m MegaPixels aims to answer to these questions and reveal the stories behind the millions of images used to train, evaluate, and power the facial recognition surveillance algorithms used today. MegaPixels is authored by Adam Harvey, developed in collaboration with Jules LaPlace, and produced in partnership with Mozilla. -![Adam Harvey](assets/adam-harvey.jpg) +![sideimage:Adam Harvey](assets/adam-harvey.jpg) **Adam Harvey** is an American artist and researcher based in Berlin. His previous projects (CV Dazzle, Stealth Wear, and SkyLift) explore the potential for countersurveillance as artwork. He is the founder of VFRAME (visual forensics software for human rights groups), the recipient of 2 PrototypeFund awards, and is currently a researcher in residence at Karlsruhe HfG studying artifical intelligence and datasets. -**Adam Harvey** is an American artist and researcher based in Berlin. His previous projects (CV Dazzle, Stealth Wear, and SkyLift) explore the potential for countersurveillance as artwork. He is the founder of VFRAME (visual forensics software for human rights groups), the recipient of 2 PrototypeFund awards, and is currently a researcher in residence at Karlsruhe HfG studying artifical intelligence and datasets. +![sideimage:Jules LaPlace](assets/jules-laplace.jpg) **Jules LaPlace** is an American artist and technologist also based in Berlin. He was previously the CTO of a NYC digital agency and currently works at VFRAME, developing computer vision for human rights groups, and building creative software for artists. -![Jules LaPlace](assets/jules-laplace.jpg) +**Mozilla** is a free software community founded in 1998 by members of Netscape. The Mozilla community uses, develops, spreads and supports Mozilla products, thereby promoting exclusively free software and open standards, with only minor exceptions. The community is supported institutionally by the not-for-profit Mozilla Foundation and its tax-paying subsidiary, the Mozilla Corporation. -**Jules LaPlace** is an American artist and technologist also based in Berlin. He was previously the CTO of a NYC digital agency and currently works at VFRAME, developing computer vision for human rights groups, and building creative software for artists. -**Mozilla** is a free software community founded in 1998 by members of Netscape. The Mozilla community uses, develops, spreads and supports Mozilla products, thereby promoting exclusively free software and open standards, with only minor exceptions. The community is supported institutionally by the not-for-profit Mozilla Foundation and its tax-paying subsidiary, the Mozilla Corporation. \ No newline at end of file diff --git a/site/public/about/index.html b/site/public/about/index.html index 2a0bc6c3..8583fd96 100644 --- a/site/public/about/index.html +++ b/site/public/about/index.html @@ -37,9 +37,9 @@
        Years
        2002-2019
        Datasets Analyzed
        325
        Author
        Adam Harvey
        Development
        Jules LaPlace
        Research Assistance
        Berit Gilma

        MegaPixels aims to answer to these questions and reveal the stories behind the millions of images used to train, evaluate, and power the facial recognition surveillance algorithms used today. MegaPixels is authored by Adam Harvey, developed in collaboration with Jules LaPlace, and produced in partnership with Mozilla.

        MegaPixels aims to answer to these questions and reveal the stories behind the millions of images used to train, evaluate, and power the facial recognition surveillance algorithms used today. MegaPixels is authored by Adam Harvey, developed in collaboration with Jules LaPlace, and produced in partnership with Mozilla.

        -
        Adam Harvey
        Adam Harvey

        Adam Harvey is an American artist and researcher based in Berlin. His previous projects (CV Dazzle, Stealth Wear, and SkyLift) explore the potential for countersurveillance as artwork. He is the founder of VFRAME (visual forensics software for human rights groups), the recipient of 2 PrototypeFund awards, and is currently a researcher in residence at Karlsruhe HfG studying artifical intelligence and datasets.

        -
        Jules LaPlace
        Jules LaPlace

        Jules LaPlace is an American artist and technologist also based in Berlin. He was previously the CTO of a NYC digital agency and currently works at VFRAME, developing computer vision for human rights groups, and building creative software for artists.

        -

        Mozilla is a free software community founded in 1998 by members of Netscape. The Mozilla community uses, develops, spreads and supports Mozilla products, thereby promoting exclusively free software and open standards, with only minor exceptions. The community is supported institutionally by the not-for-profit Mozilla Foundation and its tax-paying subsidiary, the Mozilla Corporation.

        +
        Adam Harvey

        Adam Harvey is an American artist and researcher based in Berlin. His previous projects (CV Dazzle, Stealth Wear, and SkyLift) explore the potential for countersurveillance as artwork. He is the founder of VFRAME (visual forensics software for human rights groups), the recipient of 2 PrototypeFund awards, and is currently a researcher in residence at Karlsruhe HfG studying artifical intelligence and datasets.

        +
        Jules LaPlace

        Jules LaPlace is an American artist and technologist also based in Berlin. He was previously the CTO of a NYC digital agency and currently works at VFRAME, developing computer vision for human rights groups, and building creative software for artists.

        +

        Mozilla is a free software community founded in 1998 by members of Netscape. The Mozilla community uses, develops, spreads and supports Mozilla products, thereby promoting exclusively free software and open standards, with only minor exceptions. The community is supported institutionally by the not-for-profit Mozilla Foundation and its tax-paying subsidiary, the Mozilla Corporation.

        -- cgit v1.2.3-70-g09d2 From 406d857c61fb128a48281a52899ddf77b68201be Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Thu, 28 Feb 2019 18:32:39 +0100 Subject: threejs splash page on the index --- client/splash/index.js | 14 +-- megapixels/app/site/parser.py | 2 + site/assets/css/css.css | 15 +++- site/content/pages/datasets/index.md | 5 -- site/content/pages/index.md | 26 ++---- site/content/pages/info/index.md | 2 +- site/public/datasets/index.html | 2 +- site/public/index.html | 166 +++-------------------------------- site/public/info/index.html | 2 +- site/templates/home.html | 113 ++++++++---------------- 10 files changed, 74 insertions(+), 273 deletions(-) (limited to 'megapixels/app/site/parser.py') diff --git a/client/splash/index.js b/client/splash/index.js index e247b7f5..a21110f0 100644 --- a/client/splash/index.js +++ b/client/splash/index.js @@ -31,12 +31,14 @@ function build() { function bind() { document.querySelector('.slogan').addEventListener('click', modal.close) - toArray(document.querySelectorAll('.aboutLink')).forEach(el => { - el.addEventListener('click', modal.toggle) - }) - document.querySelector('.about .inner').addEventListener('click', e => e.stopPropagation()) - document.querySelector('.about').addEventListener('click', modal.close) - document.querySelector('.close').addEventListener('click', modal.close) + if (document.querySelector('.about')) { + toArray(document.querySelectorAll('.aboutLink')).forEach(el => { + el.addEventListener('click', modal.toggle) + }) + document.querySelector('.about .inner').addEventListener('click', e => e.stopPropagation()) + document.querySelector('.about').addEventListener('click', modal.close) + document.querySelector('.close').addEventListener('click', modal.close) + } } function animate() { diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index c17d3b8a..ad4256ad 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -198,6 +198,8 @@ def format_metadata(section): """ meta = [] for line in section.split('\n'): + if ': ' not in line: + continue key, value = line[2:].split(': ', 1) meta.append("
        {}
        {}
        ".format(key, value)) return "
        {}
        ".format(''.join(meta)) diff --git a/site/assets/css/css.css b/site/assets/css/css.css index 3bd09f23..732386bd 100644 --- a/site/assets/css/css.css +++ b/site/assets/css/css.css @@ -112,13 +112,19 @@ footer { justify-content: space-between; color: #888; font-size: 9pt; - padding: 20px 75px 20px; + padding: 20px 0 20px; font-family: "Roboto", sans-serif; } footer > div { display: flex; flex-direction: row; } +footer > div:nth-child(1) { + padding-left: 75px; +} +footer > div:nth-child(2) { + padding-right: 75px; +} footer a { display: inline-block; color: #888; @@ -237,6 +243,7 @@ p { align-items: flex-start; font-size: 10pt; margin-bottom: 20px; + font-family: 'Roboto', sans-serif; } .meta > div { margin-right: 30px; @@ -540,11 +547,11 @@ section.fullwidth .image { .desktop .dataset-list a:nth-child(3n+2):hover { background-color: rgba(255, 128, 0, 0.2); } .dataset-list a:nth-child(3n+3) { background-color: rgba(255, 255, 0, 0.1); } -.desktop .dataset-list .dataset:nth-child(3n+3):hover { background-color: rgba(255, 255, 0, 0.2); } +.desktop .dataset-list a:nth-child(3n+3):hover { background-color: rgba(255, 255, 0, 0.2); } .dataset-list span { - box-shadow: -3px -3px black, 3px -3px black, -3px 3px black, 3px 3px black; - background-color: black; + box-shadow: -3px -3px #181818, 3px -3px #181818, -3px 3px #181818, 3px 3px #181818; + background-color: #181818; box-decoration-break: clone; } diff --git a/site/content/pages/datasets/index.md b/site/content/pages/datasets/index.md index c408fba4..fa012758 100644 --- a/site/content/pages/datasets/index.md +++ b/site/content/pages/datasets/index.md @@ -13,8 +13,6 @@ sync: false # Facial Recognition Datasets -### Sidebar - + Found: 275 datasets + Created between: 1993-2018 + Smallest dataset: 20 images @@ -22,6 +20,3 @@ sync: false + Highest resolution faces: 450x500 (Unconstrained College Students) + Lowest resolution faces: 16x20 pixels (QMUL SurvFace) - -## End Sidebar - diff --git a/site/content/pages/index.md b/site/content/pages/index.md index d63cf9fa..1cf47aac 100644 --- a/site/content/pages/index.md +++ b/site/content/pages/index.md @@ -1,30 +1,14 @@ ------------ status: published -title: MegaPixels -desc: -slug: home +title: Megapixels +desc: The Darkside of Datasets +slug: analysis published: 2018-12-15 updated: 2018-12-15 authors: Adam Harvey sync: false - ------------- - -## Facial Recognition Datasets - -Regular Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. -### Summary - -+ Found: 275 datasets -+ Created between: 1993-2018 -+ Smallest dataset: 20 images -+ Largest dataset: 10,000,000 images - -+ Highest resolution faces: 450x500 (Unconstrained College Students) -+ Lowest resolution faces: 16x20 pixels (QMUL SurvFace) +------------ -``` -load_file https://megapixels.nyc3.digitaloceanspaces.com/v1/citations/datasets.csv -``` +## diff --git a/site/content/pages/info/index.md b/site/content/pages/info/index.md index 9cbb219e..090783d9 100644 --- a/site/content/pages/info/index.md +++ b/site/content/pages/info/index.md @@ -11,7 +11,7 @@ sync: false ------------ -## +## Face Analysis ``` face_analysis diff --git a/site/public/datasets/index.html b/site/public/datasets/index.html index 17c938ac..7398da17 100644 --- a/site/public/datasets/index.html +++ b/site/public/datasets/index.html @@ -29,7 +29,7 @@

        Facial Recognition Datasets

        -
        +
      Found
      275 datasets
      Created between
      1993-2018
      Smallest dataset
      20 images
      Largest dataset
      10,000,000 images
      Highest resolution faces
      450x500 (Unconstrained College Students)
      Lowest resolution faces
      16x20 pixels (QMUL SurvFace)

      diff --git a/site/public/index.html b/site/public/index.html index 8775f22d..d5a2e59f 100644 --- a/site/public/index.html +++ b/site/public/index.html @@ -3,15 +3,13 @@ MegaPixels - - + + - - - +

      @@ -20,166 +18,22 @@
      MegaPixels
      -
      - -
      -
      -
      -
      -
      -
      -
      - MegaPixels is an art project that explores the dark side of face recognition datasets and the future of computer vision. -
      - - - -
      - Made by Adam Harvey in collaboration with Jules Laplace, and in partnership with Mozilla.
      - Read more about MegaPixels -
      -
      -
      -
      - -
      -

      Face Recognition Datasets

      - - -

      - MegaPixels is an online art project that explores the history of face recognition from the perspective of datasets. MegaPixels aims to unravel the meanings behind the data and expose the darker corners of the biometric industry that have contributed to its growth. -

      -

      - Through a mix of case studies, visualizations, and interactive tools, Megapixels will use face recognition datasets to tell the history of modern biometrics. Many people have contributed to the development of face recignition technology, both wittingly and unwittingly. Not only scientists, but also celebrities and regular internet users have played a part. -

      -

      - Face recognition is a mess of contradictinos. It works, yet it doesn't actually work. It's cheap and accessible, but also expensive and out of control. Face recognition research has achieved headline grabbing superhuman accuracies over 99.9%, yet in practice it's also dangerously inaccurate. -

      -

      - During a trial installation at Sudkreuz station in Berlin in 2018, 20% of the matches were wrong, a number so low that it should not have any connection to law enforcement or justice. And in London, the Metropolitan police had been using face recognition software that mistakenly identified an alarming 98% of people as criminals, which perhaps is a crime itself. -

      -
      - -
      -

      Dataset Portraits

      -

      - We have prepared detailed case studies of some of the more noteworthy datasets, including tools to help you learn what is contained in these datasets, and even whether your own face has been used to train these algorithms. -

      - - -
      - - +
      +
      - - - - - - - - - - + \ No newline at end of file diff --git a/site/public/info/index.html b/site/public/info/index.html index 65510255..0b59e647 100644 --- a/site/public/info/index.html +++ b/site/public/info/index.html @@ -27,7 +27,7 @@
      -

      +

      Face Analysis

      Results are only stored for the duration of the analysis and are deleted when you leave this page.

      diff --git a/site/templates/home.html b/site/templates/home.html index 9756e21f..d5a2e59f 100644 --- a/site/templates/home.html +++ b/site/templates/home.html @@ -1,82 +1,39 @@ -{% extends 'layout.html' %} - -{% block content %} -
      -
      -
      -
      -
      -
      -
      - MegaPixels is an art project that explores the dark side of face recognition datasets and the future of computer vision. -
      - - - -
      - Made by Adam Harvey in collaboration with Jules Laplace, and in partnership with Mozilla.
      - Read more about MegaPixels -
      -
      + + + + MegaPixels + + + + + + + + + + +
      + + +
      MegaPixels
      +
      + +
      +
      +
      - -
      -

      Face Recognition Datasets

      -
      - -
      -

      Dataset Portraits

      -

      - We have prepared detailed case studies of some of the more noteworthy datasets, including tools to help you learn what is contained in these datasets, and even whether your own face has been used to train these algorithms. -

      - -
      - {% for dataset in datasets %} - -
      - {{ dataset.title }} -
      -
      - {% endfor %} +
      + MegaPixels ©2017-19 Adam R. Harvey /  + ahprojects.com
      -
      - -{% endblock %} - -{% block scripts %} - - - - - - - -{% endblock %} +
      + + + \ No newline at end of file -- cgit v1.2.3-70-g09d2