From 966e27c7418d6e188ea4b1f651a5e6c67495b765 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Mon, 3 Dec 2018 17:19:51 +0100 Subject: base css --- site/assets/css/css.css | 172 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 site/assets/css/css.css (limited to 'site/assets/css/css.css') diff --git a/site/assets/css/css.css b/site/assets/css/css.css new file mode 100644 index 00000000..d7db0e1f --- /dev/null +++ b/site/assets/css/css.css @@ -0,0 +1,172 @@ +* { box-sizing: border-box; } +html, body { + margin: 0; + padding: 0; + width: 100%; + min-height: 100%; + font-family: 'Roboto', sans-serif; + background: #191919; + color: #b8b8b8; +} + +/* header */ + +header { + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 70px; + background: #1e1e1e; + display: flex; + flex-direction: row; + align-items: center; + justify-content: space-between; + box-shadow: 0 0 4px rgba(0,0,0,0.3); +} +header .slogan { + display: flex; + flex-direction: row; + align-items: center; + padding-left: 56px; + text-decoration: none; +} +header .logo { + background-image: url(../img/megapixels_logo_white.svg); + background-size: cover; + background-repeat: no-repeat; + margin-top: 7px; + margin-right: 14px; + width: 49px; + height: 30px; +} +header .site_name { + font-weight: bold; + color: #fff; +} +header .sub { + color: #666; + font-size: 10pt; + margin-left: 4px; + margin-top: 2px; + transition: color 0.1s cubic-bezier(0,1,1,1); +} +.desktop header .slogan:hover .site_name { + color: #fff; +} +.desktop header .slogan:hover .sub { + color: #666; +} +header .links { + display: flex; + flex-direction: row; + font-family: 'Roboto Mono', monospace; +} +header .links a { + display: block; + color: #777; + text-decoration: none; + text-transform: uppercase; + margin-right: 32px; + transition: color 0.1s cubic-bezier(0,1,1,1), border-color 0.1s cubic-bezier(0,1,1,1); + border-bottom: 1px solid rgba(255,255,255,0); +} +header .links a.active { + color: #bbb; +} +.desktop header .links a:hover { + color: #fff; + border-bottom: 1px solid rgba(255,255,255,255); +} +.desktop header .links a.active:hover { + color: #fff; + border-bottom: 1px solid rgba(255,255,255,255); +} + +/* footer */ + +footer { + width: 100%; + background: #000; + display: flex; + flex-direction: row; + justify-content: space-between; + color: #888; + font-size: 9pt; + padding: 20px 75px 20px; +} +footer > div { + display: flex; + flex-direction: row; +} +footer a { + display: inline-block; + color: #888; + transition: color 0.2s cubic-bezier(0,1,1,1); + margin-right: 5px; +} +footer a:hover { + color: #ddd; +} + +/* headings */ + +h1 { + color: #ddd; + font-weight: 300; + font-size: 24pt; + margin: 75px 0 10px; + padding: 0; +} +h3 { + font-family: 'Roboto Mono', monospace; + font-weight: 400; + font-size: 10pt; + text-transform: uppercase; + color: #666; + margin: 0 0 10px 0; + padding: 0; +} + +/* content */ + +.content { + padding-top: 70px; + padding-bottom: 100px; + min-height: calc(100vh - 55px); + line-height: 1.5; +} +section { + width: 640px; + margin: 0 auto; +} +.content .first_paragraph { + font-weight: 300; + font-size: 18pt; + color: #ccc; +} +p { + margin: 0 0 20px 0; +} + +.meta { + display: flex; + flex-direction: row; + justify-content: flex-start; + align-items: flex-start; + font-size: 10pt; + margin-bottom: 20px; +} +.meta > div { + margin-right: 30px; +} +.meta .gray { + font-size: 9pt; + padding-bottom: 4px; +} +.gray { + font-family: 'Roboto Mono', monospace; + font-weight: 400; + text-transform: uppercase; + color: #666; +} -- cgit v1.2.3-70-g09d2 From d69086a1b2d7d6e6def55f35e30d0623701de011 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Tue, 4 Dec 2018 21:12:59 +0100 Subject: embedding images --- builder/README.md | 21 ++++ builder/__init__.py | 0 builder/builder.py | 69 +++++++++++ builder/parser.py | 111 +++++++++++++++++ builder/paths.py | 6 + builder/s3.py | 55 +++++++++ scraper/builder.py | 69 ----------- site/assets/css/css.css | 55 ++++++--- site/content | 1 + site/content/about/index.txt | 12 -- site/content/blog/2018-12-01-intro/index.txt | 13 -- .../blog/2018-12-02-second-blog-post/index.txt | 54 --------- .../blog/2018-12-02-second-blog-post/vision.png | Bin 276521 -> 0 bytes .../blog/2018-12-02-second-blog-post/wideimage.jpg | Bin 71949 -> 0 bytes site/public/about/credits/index.html | 57 +++++++++ site/public/about/disclaimer/index.html | 56 +++++++++ site/public/about/index.html | 26 ++-- site/public/about/press/index.html | 55 +++++++++ site/public/about/privacy/index.html | 134 +++++++++++++++++++++ site/public/about/style/index.html | 86 +++++++++++++ site/public/about/terms/index.html | 68 +++++++++++ site/public/blog/2018-12-01-intro/index.html | 62 ---------- .../blog/2018-12-02-second-blog-post/index.html | 114 ------------------ site/public/datasets/lfw/index.html | 112 +++++++++++++++++ site/public/datasets/vgg_faces2/index.html | 69 +++++++++++ .../research/from_1_to_100_pixels/index.html | 101 ++++++++++++++++ site/templates/blog.html | 22 ---- site/templates/layout.html | 13 +- site/templates/research.html | 35 ++++++ 29 files changed, 1103 insertions(+), 373 deletions(-) create mode 100644 builder/README.md create mode 100644 builder/__init__.py create mode 100644 builder/builder.py create mode 100644 builder/parser.py create mode 100644 builder/paths.py create mode 100644 builder/s3.py delete mode 100644 scraper/builder.py create mode 120000 site/content delete mode 100644 site/content/about/index.txt delete mode 100644 site/content/blog/2018-12-01-intro/index.txt delete mode 100644 site/content/blog/2018-12-02-second-blog-post/index.txt delete mode 100644 site/content/blog/2018-12-02-second-blog-post/vision.png delete mode 100644 site/content/blog/2018-12-02-second-blog-post/wideimage.jpg create mode 100644 site/public/about/credits/index.html create mode 100644 site/public/about/disclaimer/index.html create mode 100644 site/public/about/press/index.html create mode 100644 site/public/about/privacy/index.html create mode 100644 site/public/about/style/index.html create mode 100644 site/public/about/terms/index.html delete mode 100644 site/public/blog/2018-12-01-intro/index.html delete mode 100644 site/public/blog/2018-12-02-second-blog-post/index.html create mode 100644 site/public/datasets/lfw/index.html create mode 100644 site/public/datasets/vgg_faces2/index.html create mode 100644 site/public/research/from_1_to_100_pixels/index.html delete mode 100644 site/templates/blog.html create mode 100644 site/templates/research.html (limited to 'site/assets/css/css.css') diff --git a/builder/README.md b/builder/README.md new file mode 100644 index 00000000..1a6d3a1e --- /dev/null +++ b/builder/README.md @@ -0,0 +1,21 @@ +Megapixels Static Site Generator +================================ + +The index, blog, and about other pages are built using this static site generator. + +## Metadata + +``` +status: published|draft|private +title: From 1 to 100 Pixels +desc: High resolution insights from low resolution imagery +slug: from-1-to-100-pixels +published: 2018-12-04 +updated: 2018-12-04 +authors: Adam Harvey, Berit Gilma, Matthew Stender +``` + +## S3 Assets + +Static assets: `v1/site/about/assets/picture.jpg` +Dataset assets: `v1/datasets/lfw/assets/picture.jpg` diff --git a/builder/__init__.py b/builder/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/builder/builder.py b/builder/builder.py new file mode 100644 index 00000000..44fbd1c6 --- /dev/null +++ b/builder/builder.py @@ -0,0 +1,69 @@ +#!/usr/bin/python + +from dotenv import load_dotenv +load_dotenv() + +import os +import glob +from jinja2 import Environment, FileSystemLoader, select_autoescape + +import s3 +import parser +from paths import * + +env = Environment( + loader=FileSystemLoader(template_path), + autoescape=select_autoescape([]) +) + +def build_page(fn, research_posts): + metadata, sections = parser.read_metadata(fn) + + if metadata is None: + print("{} has no metadata".format(fn)) + return + + print(metadata['url']) + + dirname = os.path.dirname(fn) + output_path = public_path + metadata['url'] + output_fn = os.path.join(output_path, "index.html") + + if 'research/' in fn: + template = env.get_template("research.html") + else: + template = env.get_template("page.html") + + if 'datasets' in fn: + s3_path = "{}/{}/{}{}".format(os.getenv('S3_ENDPOINT'), os.getenv('S3_BUCKET'), s3_datasets_path, metadata['path']) + if 'index.md' in fn: + s3.sync_directory(dirname, s3_datasets_path, metadata) + else: + s3_path = "{}/{}/{}{}".format(os.getenv('S3_ENDPOINT'), os.getenv('S3_BUCKET'), s3_site_path, metadata['path']) + if 'index.md' in fn: + s3.sync_directory(dirname, s3_site_path, metadata) + + print(s3_path) + + content = parser.parse_markdown(sections, s3_path) + + html = template.render( + metadata=metadata, + content=content, + research_posts=research_posts, + latest_research_post=research_posts[-1], + ) + + os.makedirs(output_path, exist_ok=True) + with open(output_fn, "w") as file: + file.write(html) + + print("______") + +def build_site(): + research_posts = parser.read_research_post_index() + for fn in glob.iglob(os.path.join(content_path, "**/*.md"), recursive=True): + build_page(fn, research_posts) + +if __name__ == '__main__': + build_site() diff --git a/builder/parser.py b/builder/parser.py new file mode 100644 index 00000000..ea273556 --- /dev/null +++ b/builder/parser.py @@ -0,0 +1,111 @@ +import os +import glob +import mistune +from paths import * + +renderer = mistune.Renderer(escape=False) +markdown = mistune.Markdown(renderer=renderer) + +def fix_images(lines, s3_path): + real_lines = [] + block = "\n\n".join(lines) + for line in block.split("\n"): + if "![" in line and "](http" not in line: + line = line.replace('](', '](' + s3_path) + real_lines.append(line) + return "\n".join(real_lines) + +def wide_section(line, s3_path): + lines = fix_images(lines, s3_path) + return "
" + markdown(lines) + "
" + +def normal_section(lines, s3_path): + if len(lines): + lines = fix_images(lines, s3_path) + return "
" + markdown(lines) + "
" + return "" + +def parse_markdown(sections, s3_path): + groups = [] + current_group = [] + for section in sections: + if section.startswith('# '): + continue + if '![wide]' in section: + groups.append(normal_section(current_group, s3_path)) + groups.append(wide_section([section], s3_path)) + current_group = [] + else: + current_group.append(section) + groups.append(normal_section(current_group, s3_path)) + content = "".join(groups) + return content + +def read_metadata(fn): + with open(fn, "r") as file: + data = file.read() + data = data.replace("\n ", "\n") + if "\n" in data: + data = data.replace("\r", "") + else: + data = data.replace("\r", "\n") + sections = data.split("\n\n") + return parse_metadata(fn, sections) + +default_metadata = { + 'status': 'published', + 'title': 'Untitled Page', + 'desc': '', + 'slug': '', + 'published': '2018-12-31', + 'updated': '2018-12-31', + 'authors': 'Adam Harvey', +} + +def parse_metadata_section(metadata, section): + for line in section.split("\n"): + if ': ' not in line: + continue + key, value = line.split(': ', 1) + metadata[key.lower()] = value + +def parse_metadata(fn, sections): + found_meta = False + metadata = {} + valid_sections = [] + for section in sections: + if not found_meta and ': ' in section: + found_meta = True + parse_metadata_section(metadata, section) + continue + if '-----' in section: + continue + if found_meta: + valid_sections.append(section) + + if 'title' not in metadata: + print('warning: {} has no title'.format(fn)) + for key in default_metadata: + if key not in metadata: + metadata[key] = default_metadata[key] + basename = os.path.basename(fn) + metadata['path'] = os.path.dirname(fn.replace(content_path, '')) + '/' + if basename == 'index.md': + metadata['url'] = metadata['path'] + else: + metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/' + + if metadata['status'] == 'published|draft|private': + metadata['status'] = 'published' + metadata['authors'] = '
'.join(metadata['authors'].split(',')) + return metadata, valid_sections + +def read_research_post_index(): + posts = [] + for fn in sorted(glob.glob(os.path.join(content_path, 'research/**/index.md'), recursive=True)): + metadata, valid_sections = read_metadata(fn) + if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft': + continue + posts.append(metadata) + return posts + diff --git a/builder/paths.py b/builder/paths.py new file mode 100644 index 00000000..356f2f3d --- /dev/null +++ b/builder/paths.py @@ -0,0 +1,6 @@ + +s3_site_path = "v1/site" +s3_datasets_path = "v1" # datasets is already in the filename +public_path = "../site/public" +content_path = "../site/content" +template_path = "../site/templates" diff --git a/builder/s3.py b/builder/s3.py new file mode 100644 index 00000000..7d4d52a0 --- /dev/null +++ b/builder/s3.py @@ -0,0 +1,55 @@ +import os +import glob +import boto3 +from paths import * + +session = boto3.session.Session() + +s3_client = session.client( + service_name='s3', + aws_access_key_id=os.getenv('S3_KEY'), + aws_secret_access_key=os.getenv('S3_SECRET'), + endpoint_url=os.getenv('S3_ENDPOINT'), + region_name=os.getenv('S3_REGION'), +) + +def sync_directory(base_fn, s3_path, metadata): + fns = {} + for fn in glob.glob(os.path.join(base_fn, 'assets/*')): + fns[os.path.basename(fn)] = True + + remote_path = s3_path + metadata['url'] + + directory = s3_client.list_objects(Bucket=os.getenv('S3_BUCKET'), Prefix=remote_path) + prefixes = [] + + if 'Contents' in directory: + for obj in directory['Contents']: + s3_fn = obj['Key'] + fn = os.path.basename(s3_fn) + local_fn = os.path.join(base_fn, 'assets', fn) + if fn in fns: + del fns[fn] + if obj['LastModified'].timestamp() < os.path.getmtime(os.path.join(local_fn)): + print("s3 update {}".format(s3_fn)) + client.upload_file( + local_fn, + os.getenv('S3_BUCKET'), + s3_fn, + ExtraArgs={ 'ACL': 'public-read' }) + else: + print("s3 delete {}".format(s3_fn)) + response = client.delete_object( + Bucket=os.getenv('S3_BUCKET'), + Key=s3_fn, + ) + + for fn in fns: + local_fn = os.path.join(base_fn, 'assets', fn) + s3_fn = os.path.join(remote_path, 'assets', fn) + print("s3 create {}".format(s3_fn)) + s3_client.upload_file( + local_fn, + os.getenv('S3_BUCKET'), + s3_fn, + ExtraArgs={ 'ACL': 'public-read' }) diff --git a/scraper/builder.py b/scraper/builder.py deleted file mode 100644 index c55b6dff..00000000 --- a/scraper/builder.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/python - -import os -import glob -import mistune -from jinja2 import Environment, FileSystemLoader, select_autoescape - -public_path = "../site/public" -content_path = "../site/content" -template_path = "../site/templates" - -env = Environment( - loader=FileSystemLoader(template_path), - autoescape=select_autoescape([]) -) - -renderer = mistune.Renderer(escape=False) -markdown = mistune.Markdown(renderer=renderer) - -def wide_section(line): - return "
" + markdown(line) + "
" - -def normal_section(lines): - if len(lines): - return "
" + markdown("\n\n".join(lines)) + "
" - return "" - -def build_file(fn): - print(fn) - output_path = os.path.dirname(fn).replace(content_path, public_path) - output_fn = os.path.join(output_path, "index.html") - with open(fn, "r") as file: - sections = file.read().split("\n\n") - metadata = {} - for line in sections[0].split("\n"): - print(line) - key, value = line.split(': ', 1) - metadata[key.lower()] = value - - groups = [] - current_group = [] - for section in sections[1:]: - if '![wide]' in section: - groups.append(normal_section(current_group)) - groups.append(wide_section(section)) - current_group = [] - else: - current_group.append(section) - groups.append(normal_section(current_group)) - content = "".join(groups) - - if 'blog/' in fn: - template = env.get_template("blog.html") - else: - template = env.get_template("page.html") - html = template.render(metadata=metadata, content=content) - - os.makedirs(output_path, exist_ok=True) - with open(output_fn, "w") as file: - file.write(html) - -def build_site(): - print("Building...") - for fn in glob.iglob(os.path.join(content_path, "**/index.txt"), recursive=True): - print(fn) - build_file(fn) - -if __name__ == '__main__': - build_site() diff --git a/site/assets/css/css.css b/site/assets/css/css.css index d7db0e1f..c9d9b029 100644 --- a/site/assets/css/css.css +++ b/site/assets/css/css.css @@ -45,11 +45,13 @@ header .site_name { color: #fff; } header .sub { - color: #666; - font-size: 10pt; margin-left: 4px; margin-top: 2px; - transition: color 0.1s cubic-bezier(0,1,1,1); + transition: color 0.1s cubic-bezier(0,0,1,1); +} +.sub { + color: #666; + font-size: 10pt; } .desktop header .slogan:hover .site_name { color: #fff; @@ -68,7 +70,7 @@ header .links a { text-decoration: none; text-transform: uppercase; margin-right: 32px; - transition: color 0.1s cubic-bezier(0,1,1,1), border-color 0.1s cubic-bezier(0,1,1,1); + transition: color 0.1s cubic-bezier(0,0,1,1), border-color 0.1s cubic-bezier(0,0,1,1); border-bottom: 1px solid rgba(255,255,255,0); } header .links a.active { @@ -102,7 +104,7 @@ footer > div { footer a { display: inline-block; color: #888; - transition: color 0.2s cubic-bezier(0,1,1,1); + transition: color 0.2s cubic-bezier(0,0,1,1); margin-right: 5px; } footer a:hover { @@ -119,13 +121,20 @@ h1 { padding: 0; } h3 { + margin: 0 0 10px 0; + padding: 0; + font-size: 11pt; + font-weight: 500; +} + +th, .gray, h3 { font-family: 'Roboto Mono', monospace; font-weight: 400; - font-size: 10pt; text-transform: uppercase; color: #666; - margin: 0 0 10px 0; - padding: 0; +} +th, .gray { + font-size: 9pt; } /* content */ @@ -148,6 +157,24 @@ section { p { margin: 0 0 20px 0; } +.content a { + color: #ddd; + transition: color 0.2s cubic-bezier(0,0,1,1); +} +.content a:hover { + color: #fff; +} +code { + display: block; + font-family: 'Roboto Mono', monospace; + font-size: 9pt; + max-height: 400px; + max-width: 640px; + padding: 2px 5px; + background: rgba(255,255,255,0.1); +} + +/* top of post metadata */ .meta { display: flex; @@ -164,9 +191,9 @@ p { font-size: 9pt; padding-bottom: 4px; } -.gray { - font-family: 'Roboto Mono', monospace; - font-weight: 400; - text-transform: uppercase; - color: #666; -} + +/* blogpost index */ + +.blogposts div { + margin-bottom: 5px; +} \ No newline at end of file diff --git a/site/content b/site/content new file mode 120000 index 00000000..80b3a367 --- /dev/null +++ b/site/content @@ -0,0 +1 @@ +/Users/user/Nextcloud/megapixels/website/pages/ \ No newline at end of file diff --git a/site/content/about/index.txt b/site/content/about/index.txt deleted file mode 100644 index 36719d7c..00000000 --- a/site/content/about/index.txt +++ /dev/null @@ -1,12 +0,0 @@ -Title: About - -# About - -### The darkside of datasets - -MegaPixels is a project about the darkside of datasets. It's an exploration of what happens when you post photos online, and how they are used. - -In an era of exuberant data collection and analysis, social media has become the superfood of Artificial Intelligence. But what about the creators and individuals behind the data? What about their stories? - -During the last 20 yers etc - diff --git a/site/content/blog/2018-12-01-intro/index.txt b/site/content/blog/2018-12-01-intro/index.txt deleted file mode 100644 index f56a99f3..00000000 --- a/site/content/blog/2018-12-01-intro/index.txt +++ /dev/null @@ -1,13 +0,0 @@ -Title: Welcome to the blog! -Author: Adam Harvey -Date: 2018-12-01 - -Lorem ipsum dolor sit amet is more than just dummy test. It is a way of experiencing the world, an exciting clue to the mystery of being. A key to pondering its weighty thesis - that no one desires pain, but might undergo something painful for greater benefit - has been a key to understanding the nature of humanity since Cicero spoke those words thousands of years ago. And the world keeps on spinning like a top, folks, and we all keep going round on this crazy adventure called life. - -Let me tell you a secret - privacy is contagious! Don't believe me? Get up and look away from the computer right now. Do it! Walk outside! Are you still reading this? No, seriously go outside, and await further instructions! - -Are they gone? Let's wait another minute until we can be sure the reader is gone and it's just me here, writing for my own benefit. Whew, deep breath - not! These words really do exist in a vacuum. I literally have my eyes closed right now. - -Dummy text? Generation 2.0 deserves better! We will not accept the flaccid filler text of yesteryear, no, we want it all, custom bespoke dummy text, hand-crafted with love and some good old fashioned ingenuity. Don't believe me? Get up right now from your chair and go outside. I'll wait. Ok. Are they gone? Good ok now that it's definitely just us, let me let you in on a little secret. Shh, promise not to tell? - -The secret can be found in the next blog post. diff --git a/site/content/blog/2018-12-02-second-blog-post/index.txt b/site/content/blog/2018-12-02-second-blog-post/index.txt deleted file mode 100644 index c17e9cd0..00000000 --- a/site/content/blog/2018-12-02-second-blog-post/index.txt +++ /dev/null @@ -1,54 +0,0 @@ -Title: Second post! -Author: Adam Harvey -Date: 2018-12-02 - -# H1 -## H2 -### H3 -#### H4 - -I declare a thumb war. Ha! - -Let's get one thing straight. [I'm an inline-style link](https://www.google.com). URLs and URLs in angle brackets will automatically get turned into links. http://www.example.com or and sometimes example.com. We have links, but can we break the chain? - -Face it. Time to face facts. Look. I'm not going to say this more than once, so listen up. Listen up. Get real. Get ready. This isn't going to be easy. This isn't going to be the easiest **bold text** _italic text_ ~~strikethrough~~ - -1. Potato -2. Potato - * Un-related sub-potato -3. Potato -4. Potato - -``` -print("blah") -print "i'm a python 2 style print statement" -i'm a syntax error -``` - -![hmf](vision.png "Computer vision at its finest, folks!") -![blah](vision.png "This image has been repeated twice but it shouldn't MATTER!") - -## Hell Yeah - -| Tables | Are | Cool | -| ------------- |:-------------:| -----:| -| col 3 is | right-aligned | $1600 | -| col 2 is | centered | $12 | -| zebra stripes | are neat | $1 | - -> Blockquotes are very handy in email to emulate reply text. -> This line is part of the same quote. - -Quote break. - -> This is a very long line that will still be quoted properly when it wraps. Oh boy let's keep writing to make sure this is long enough to actually wrap for everyone. Oh, you can _put_ **Markdown** into a blockquote. Now no reason to get **swervy** when your *orange* attitude is what's making the text _wrap_. - -
And did I mention contain HTML??!!!
- - - ---- - -That's it! - -![wide](wideimage.jpg "This image should be wide. I mean REALLY wide. Picture somebody who drives a pickup truck and who purposefully parks it to take up two or even three spaces. They're coming out of the Costco now with a whole lotta paper towels and mustard on a giant pallet. Power tools deluxe, fasteners, drywall, everything you need to build a shed.") diff --git a/site/content/blog/2018-12-02-second-blog-post/vision.png b/site/content/blog/2018-12-02-second-blog-post/vision.png deleted file mode 100644 index d266cb43..00000000 Binary files a/site/content/blog/2018-12-02-second-blog-post/vision.png and /dev/null differ diff --git a/site/content/blog/2018-12-02-second-blog-post/wideimage.jpg b/site/content/blog/2018-12-02-second-blog-post/wideimage.jpg deleted file mode 100644 index f337f337..00000000 Binary files a/site/content/blog/2018-12-02-second-blog-post/wideimage.jpg and /dev/null differ diff --git a/site/public/about/credits/index.html b/site/public/about/credits/index.html new file mode 100644 index 00000000..0b3f9db8 --- /dev/null +++ b/site/public/about/credits/index.html @@ -0,0 +1,57 @@ + + + + MegaPixels + + + + + + + + + +
+ + +
MegaPixels
+ The Darkside of Datasets +
+ +
+
+ +

Credits

+

alt text

+
    +
  • MegaPixels by Adam Harvey
  • +
  • Made with support from Mozilla
  • +
  • Site developed by Jules Laplace
  • +
  • Design and graphics: Adam Harvey
  • +
  • Research assistants: Berit Gilma
  • +
+
+ +
+ + + + \ No newline at end of file diff --git a/site/public/about/disclaimer/index.html b/site/public/about/disclaimer/index.html new file mode 100644 index 00000000..1c14a97c --- /dev/null +++ b/site/public/about/disclaimer/index.html @@ -0,0 +1,56 @@ + + + + MegaPixels + + + + + + + + + +
+ + +
MegaPixels
+ The Darkside of Datasets +
+ +
+
+ +

Disclaimer

+

Last updated: December 04, 2018

+

The information contained on MegaPixels.cc website (the "Service") is for academic and artistic purposes only.

+

MegaPixels.cc assumes no responsibility for errors or omissions in the contents on the Service.

+

In no event shall MegaPixels.cc be liable for any special, direct, indirect, consequential, or incidental damages or any damages whatsoever, whether in an action of contract, negligence or other tort, arising out of or in connection with the use of the Service or the contents of the Service. MegaPixels.cc reserves the right to make additions, deletions, or modification to the contents on the Service at any time without prior notice.

+

External links disclaimer

+

MegaPixels.cc website may contain links to external websites that are not provided or maintained by or in any way affiliated with MegaPixels.cc

+

Please note that the MegaPixels.cc does not guarantee the accuracy, relevance, timeliness, or completeness of any information on these external websites.

+
+ +
+ + + + \ No newline at end of file diff --git a/site/public/about/index.html b/site/public/about/index.html index 49fa726c..8441e317 100644 --- a/site/public/about/index.html +++ b/site/public/about/index.html @@ -3,6 +3,9 @@ MegaPixels + + + @@ -17,30 +20,35 @@
-

About

-

The darkside of datasets

-

MegaPixels is a project about the darkside of datasets. It's an exploration of what happens when you post photos online, and how they are used.

-

In an era of exuberant data collection and analysis, social media has become the superfood of Artificial Intelligence. But what about the creators and individuals behind the data? What about their stories?

-

During the last 20 yers etc

+

alt text

+
    +
  • MegaPixels by Adam Harvey
  • +
  • Made with support from Mozilla
  • +
  • Site developed by Jules Laplace
  • +
  • Design and graphics: Adam Harvey
  • +
  • Research assistants: Berit Gilma
  • +
diff --git a/site/public/about/press/index.html b/site/public/about/press/index.html new file mode 100644 index 00000000..76ba90e4 --- /dev/null +++ b/site/public/about/press/index.html @@ -0,0 +1,55 @@ + + + + MegaPixels + + + + + + + + + +
+ + +
MegaPixels
+ The Darkside of Datasets +
+ +
+
+ +

Press

+

alt text

+ +
+ +
+ + + + \ No newline at end of file diff --git a/site/public/about/privacy/index.html b/site/public/about/privacy/index.html new file mode 100644 index 00000000..21fd2255 --- /dev/null +++ b/site/public/about/privacy/index.html @@ -0,0 +1,134 @@ + + + + MegaPixels + + + + + + + + + +
+ + +
MegaPixels
+ The Darkside of Datasets +
+ +
+
+ +

Privacy Policy

+

A summary of our privacy policy is as follows:

+

The MegaPixels site does not use any analytics programs or collect any data besides the necessary IP address of your connection, which are deleted every 30 days and used only for security and to prevent misuse.

+

The image processing sections of the site do not collect any data whatsoever. All processing takes place in temporary memory (RAM) and then is displayed back to the user over a SSL secured HTTPS connection. It is the sole responsibility of the user whether they discard, by closing the page, or share their analyzed information and any potential consequences that may arise from doing so.

+
+

A more complete legal version is below:

+

This is a boilerplate Privacy policy from https://termsfeed.com/

+

Needs to be reviewed

+

Effective date: December 04, 2018

+

megapixels.cc ("us", "we", or "our") operates the WebsiteName website (hereinafter referred to as the "Service").

+

This page informs you of our policies regarding the collection, use, and disclosure of personal data when you use our Service and the choices you have associated with that data.

+

We use your data to provide and improve the Service. By using the Service, you agree to the collection and use of information in accordance with this policy. Unless otherwise defined in this Privacy Policy, the terms used in this Privacy Policy have the same meanings as in our Terms and Conditions, accessible from WebsiteName

+

Definitions

+

Service

+

Service is the MegaPixels website operated by megapixels.cc

+

Personal Data

+

Personal Data means data about a living individual who can be identified from those data (or from those and other information either in our possession or likely to come into our possession).

+

Usage Data

+

Usage Data is data collected automatically either generated by the use of the Service or from the Service infrastructure itself

+

Information Collection and Use

+

We collect several different types of information for various purposes to provide and improve our Service to you.

+

Types of Data Collected

+

Personal Data

+

While using our Service, we may ask you to provide us with certain personally identifiable information that can be used to contact or identify you ("Personal Data"). Personally identifiable information may include, but is not limited to:

+
    +
  • Cookies and Usage Data
  • +
+

Usage Data

+

We may also collect information how the Service is accessed and used ("Usage Data"). This Usage Data may include information such as your computer's Internet Protocol address (e.g. IP address), browser type, browser version, the pages of our Service that you visit, the time and date of your visit, the time spent on those pages, unique device identifiers and other diagnostic data.

+

Tracking & Cookies Data

+

We use cookies and similar tracking technologies to track the activity on our Service and we hold certain information. +Cookies are files with a small amount of data which may include an anonymous unique identifier. Cookies are sent to your browser from a website and stored on your device. Other tracking technologies are also used such as beacons, tags and scripts to collect and track information and to improve and analyse our Service.

+

You can instruct your browser to refuse all cookies or to indicate when a cookie is being sent. However, if you do not accept cookies, you may not be able to use some portions of our Service. +Examples of Cookies we use:

+
    +
  • Session Cookies. We use Session Cookies to operate our Service.
  • +
  • Preference Cookies. We use Preference Cookies to remember your preferences and various settings.
  • +
  • Security Cookies. We use Security Cookies for security purposes.
  • +
+

Use of Data

+

megapixels.cc uses the collected data for various purposes:

+
    +
  • To provide and maintain the Service
  • +
  • To notify you about changes to our Service
  • +
  • To allow you to participate in interactive features of our Service when you choose to do so
  • +
  • To provide customer care and support
  • +
  • To provide analysis or valuable information so that we can improve the Service
  • +
  • To monitor the usage of the Service
  • +
  • To detect, prevent and address technical issues
  • +
+

Transfer Of Data

+

Your information, including Personal Data, may be transferred to — and maintained on — computers located outside of your state, province, country or other governmental jurisdiction where the data protection laws may differ than those from your jurisdiction.

+

If you are located outside Germany and choose to provide information to us, please note that we transfer the data, including Personal Data, to Germany and process it there. +Your consent to this Privacy Policy followed by your submission of such information represents your agreement to that transfer. +megapixels.cc will take all steps reasonably necessary to ensure that your data is treated securely and in accordance with this Privacy Policy and no transfer of your Personal Data will take place to an organization or a country unless there are adequate controls in place including the security of your data and other personal information.

+

Disclosure Of Data

+

Legal Requirements

+

megapixels.cc may disclose your Personal Data in the good faith belief that such action is necessary to:

+

    +
      +
    • To comply with a legal obligation
    • +
    • To protect and defend the rights or property of megapixels.cc
    • +
    • To prevent or investigate possible wrongdoing in connection with the Service
    • +
    • To protect the personal safety of users of the Service or the public
    • +
    • To protect against legal liability
    • +
    +

    Security of Data

    +

    The security of your data is important to us but remember that no method of transmission over the Internet or method of electronic storage is 100% secure. While we strive to use commercially acceptable means to protect your Personal Data, we cannot guarantee its absolute security.

    +

    Service Providers

    +

    We may employ third party companies and individuals to facilitate our Service ("Service Providers"), to provide the Service on our behalf, to perform Service-related services or to assist us in analyzing how our Service is used.

    +

    These third parties have access to your Personal Data only to perform these tasks on our behalf and are obligated not to disclose or use it for any other purpose.

    +

    Links to Other Sites

    +

    Our Service may contain links to other sites that are not operated by us. If you click a third party link, you will be directed to that third party's site. We strongly advise you to review the Privacy Policy of every site you visit. +We have no control over and assume no responsibility for the content, privacy policies or practices of any third party sites or services.

    +

    Children's Privacy

    +

    Our Service does not address anyone under the age of 18 ("Children").

    +

    We do not knowingly collect personally identifiable information from anyone under the age of 18. If you are a parent or guardian and you are aware that your Child has provided us with Personal Data, please contact us. If we become aware that we have collected Personal Data from children without verification of parental consent, we take steps to remove that information from our servers.

    +

    Changes to This Privacy Policy

    +

    We may update our Privacy Policy from time to time. We will notify you of any changes by posting the new Privacy Policy on this page. +We will let you know via email and/or a prominent notice on our Service, prior to the change becoming effective and update the "effective date" at the top of this Privacy Policy. +You are advised to review this Privacy Policy periodically for any changes. Changes to this Privacy Policy are effective when they are posted on this page.

    +

    Contact Us

    +

    If you have any questions about this Privacy Policy, please contact us:

    + +
+ +
+ + + + \ No newline at end of file diff --git a/site/public/about/style/index.html b/site/public/about/style/index.html new file mode 100644 index 00000000..2e0c80d0 --- /dev/null +++ b/site/public/about/style/index.html @@ -0,0 +1,86 @@ + + + + MegaPixels + + + + + + + + + +
+ + +
MegaPixels
+ The Darkside of Datasets +
+ +
+
+ +

Alt text here

+

Header 1

+

Header 2

+

Header 3

+

Header 4

+
Header 5
+
Header 6
+

Bold text, italic text, bold italic text

+

At vero eos et et iusto qui blanditiis praesentium voluptatum deleniti atque corrupti1, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non-provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem rerum facilis est et expedita distinctio2. Nam libero tempore, cum soluta nobis est eligendi optio, cumque nihil impedit, quo minus id, quod maxime placeat, facere possimus, omnis voluptas assumenda est, omnis dolor repellendus3.

+
    +
  • Sed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium doloremque laudantium
  • +
  • Totam rem aperiam eaque ipsa, quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt, explicabo
  • +
  • Nemo enim ipsam voluptatem, quia voluptas sit, aspernatur aut
  • +
  • Odit aut fugit, sed quia consequuntur magni dolores eos
  • +
  • Qui ratione voluptatem sequi nesciunt, neque porro quisquam
  • +
+

est, qui dolorem ipsum, quia dolor sit amet consectetur adipisci[ng] velit, sed quia non-numquam [do] eius modi tempora inci[di]dunt, ut labore et dolore magnam aliquam quaerat voluptatem.

+
+

Inline code has back-ticks around it.

+
var s = "JavaScript syntax highlighting";
+alert(s);
+
+
s = "Python syntax highlighting"
+print(s)
+
+
No language indicated, so no syntax highlighting. 
+But let's throw in a <b>tag</b>.
+
+

Horizontal rule

+
+

Citations below here

+
+
+
  1. First source

  2. +
  3. Second source

  4. +
  5. Third source

  6. +
+
+
+ +
+ + + + \ No newline at end of file diff --git a/site/public/about/terms/index.html b/site/public/about/terms/index.html new file mode 100644 index 00000000..73155546 --- /dev/null +++ b/site/public/about/terms/index.html @@ -0,0 +1,68 @@ + + + + MegaPixels + + + + + + + + + +
+ + +
MegaPixels
+ The Darkside of Datasets +
+ +
+
+ +

Terms and Conditions ("Terms")

+

Last updated: December 04, 2018

+

Please read these Terms and Conditions ("Terms", "Terms and Conditions") carefully before using the MegaPixels website (the "Service") operated by megapixels.cc ("us", "we", or "our").

+

Your access to and use of the Service is conditioned on your acceptance of and compliance with these Terms.

+

By accessing or using the Service you agree to be bound by these Terms. If you disagree with any part of the terms then you may not access the Service.

+

Links To Other Web Sites

+

Our Service may contain links to third-party web sites or services that are not owned or controlled by megapixels.cc.

+

megapixels.cc has no control over, and assumes no responsibility for, the content, privacy policies, or practices of any third party web sites or services. You further acknowledge and agree that megapixels.cc shall not be responsible or liable, directly or indirectly, for any damage or loss caused or alleged to be caused by or in connection with use of or reliance on any such content, goods or services available on or through any such web sites or services.

+

We strongly advise you to read the terms and conditions and privacy policies of any third-party web sites or services that you visit.

+

Termination

+

We may terminate or suspend access to our Service immediately, without prior notice or liability, for any reason whatsoever, including without limitation if you breach the Terms.

+

All provisions of the Terms which by their nature should survive termination shall survive termination, including, without limitation, ownership provisions, warranty disclaimers, indemnity and limitations of liability.

+

Governing Law

+

These Terms shall be governed and construed in accordance with the laws of Berlin, Germany, without regard to its conflict of law provisions.

+

Our failure to enforce any right or provision of these Terms will not be considered a waiver of those rights. If any provision of these Terms is held to be invalid or unenforceable by a court, the remaining provisions of these Terms will remain in effect. These Terms constitute the entire agreement between us regarding our Service, and supersede and replace any prior agreements we might have between us regarding the Service.

+

Changes

+

We reserve the right, at our sole discretion, to modify or replace these Terms at any time. If a revision is material we will try to provide at least 30 days notice prior to any new terms taking effect. What constitutes a material change will be determined at our sole discretion.

+

By continuing to access or use our Service after those revisions become effective, you agree to be bound by the revised terms. If you do not agree to the new terms, please stop using the Service.

+

Contact Us

+

If you have any questions about these Terms, please contact us.

+
+ +
+ + + + \ No newline at end of file diff --git a/site/public/blog/2018-12-01-intro/index.html b/site/public/blog/2018-12-01-intro/index.html deleted file mode 100644 index c92ea2fd..00000000 --- a/site/public/blog/2018-12-01-intro/index.html +++ /dev/null @@ -1,62 +0,0 @@ - - - - MegaPixels - - - - - - -
- - -
MegaPixels
- The Darkside of Datasets -
- -
-
- -
-

Welcome to the blog!

-
-
-
Posted
-
2018-12-01
-
-
-
By
-
Adam Harvey
-
- -
-
-

Lorem ipsum dolor sit amet is more than just dummy test. It is a way of experiencing the world, an exciting clue to the mystery of being. A key to pondering its weighty thesis - that no one desires pain, but might undergo something painful for greater benefit - has been a key to understanding the nature of humanity since Cicero spoke those words thousands of years ago. And the world keeps on spinning like a top, folks, and we all keep going round on this crazy adventure called life.

-

Let me tell you a secret - privacy is contagious! Don't believe me? Get up and look away from the computer right now. Do it! Walk outside! Are you still reading this? No, seriously go outside, and await further instructions!

-

Are they gone? Let's wait another minute until we can be sure the reader is gone and it's just me here, writing for my own benefit. Whew, deep breath - not! These words really do exist in a vacuum. I literally have my eyes closed right now.

-

Dummy text? Generation 2.0 deserves better! We will not accept the flaccid filler text of yesteryear, no, we want it all, custom bespoke dummy text, hand-crafted with love and some good old fashioned ingenuity. Don't believe me? Get up right now from your chair and go outside. I'll wait. Ok. Are they gone? Good ok now that it's definitely just us, let me let you in on a little secret. Shh, promise not to tell?

-

The secret can be found in the next blog post.

-
- -
- - - - \ No newline at end of file diff --git a/site/public/blog/2018-12-02-second-blog-post/index.html b/site/public/blog/2018-12-02-second-blog-post/index.html deleted file mode 100644 index 5852ebe7..00000000 --- a/site/public/blog/2018-12-02-second-blog-post/index.html +++ /dev/null @@ -1,114 +0,0 @@ - - - - MegaPixels - - - - - - -
- - -
MegaPixels
- The Darkside of Datasets -
- -
-
- -
-

Second post!

-
-
-
Posted
-
2018-12-02
-
-
-
By
-
Adam Harvey
-
- -
-
-

H1

-

H2

-

H3

-

H4

-

I declare a thumb war. Ha!

-

Let's get one thing straight. I'm an inline-style link. URLs and URLs in angle brackets will automatically get turned into links. http://www.example.com or http://www.example.com and sometimes example.com. We have links, but can we break the chain?

-

Face it. Time to face facts. Look. I'm not going to say this more than once, so listen up. Listen up. Get real. Get ready. This isn't going to be easy. This isn't going to be the easiest bold text italic text strikethrough

-
    -
  1. Potato
  2. -
  3. Potato
      -
    • Un-related sub-potato
    • -
    -
  4. -
  5. Potato
  6. -
  7. Potato
  8. -
-
print("blah")
-print "i'm a python 2 style print statement"
-i<span class="color: red">'m a syntax error</span>
-
-

hmf -blah

-

Hell Yeah

- - - - - - - - - - - - - - - - - - - - - - - - -
TablesAreCool
col 3 isright-aligned$1600
col 2 iscentered$12
zebra stripesare neat$1
-

Blockquotes are very handy in email to emulate reply text. -This line is part of the same quote.

-
-

Quote break.

-

This is a very long line that will still be quoted properly when it wraps. Oh boy let's keep writing to make sure this is long enough to actually wrap for everyone. Oh, you can put Markdown into a blockquote. Now no reason to get swervy when your orange attitude is what's making the text wrap.

-
-
And did I mention contain HTML??!!!

-

That's it!

-

wide

-
- -
- - - - \ No newline at end of file diff --git a/site/public/datasets/lfw/index.html b/site/public/datasets/lfw/index.html new file mode 100644 index 00000000..8455bc60 --- /dev/null +++ b/site/public/datasets/lfw/index.html @@ -0,0 +1,112 @@ + + + + MegaPixels + + + + + + + + + +
+ + +
MegaPixels
+ The Darkside of Datasets +
+ +
+
+ +
    +
  • Created 2007
  • +
  • Images 13,233
  • +
  • People 5,749
  • +
  • Created From Yahoo News images
  • +
  • Search available Searchable
  • +
+

Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to by researchers as “The Wild”.

+

Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.

+

INTRO

+

It began in 2002. Researchers at University of Massachusetts Amherst were developing algorithms for facial recognition and they needed more data. Between 2002-2004 they scraped Yahoo News for images of public figures. Two years later they cleaned up the dataset and repackaged it as Labeled Faces in the Wild (LFW).

+

Since then the LFW dataset has become one of the most widely used datasets used for evaluating face recognition algorithms. The associated research paper “Labeled Faces in the Wild: A Database for Studying Face Recognition in Unconstrained Environments” has been cited 996 times reaching 45 different countries throughout the world.

+

The faces come from news stories and are mostly celebrities from the entertainment industry, politicians, and villains. It’s a sampling of current affairs and breaking news that has come to pass. The images, detached from their original context now server a new purpose: to train, evaluate, and improve facial recognition.

+

As the most widely used facial recognition dataset, it can be said that each individual in LFW has, in a small way, contributed to the current state of the art in facial recognition surveillance. John Cusack, Julianne Moore, Barry Bonds, Osama bin Laden, and even Moby are amongst these biometric pillars, exemplar faces provided the visual dimensions of a new computer vision future.

+

From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset

+

In addition to commercial use as an evaluation tool, alll of the faces in LFW dataset are prepackaged into a popular machine learning code framework called scikit-learn.

+

Usage

+
#!/usr/bin/python
+from matplotlib import plt
+from sklearn.datasets import fetch_lfw_people
+lfw_people = fetch_lfw_people()
+lfw_person = lfw_people[0]
+plt.imshow(lfw_person)
+
+

Commercial Use

+

The LFW dataset is used by numerous companies for benchmarking algorithms and in some cases training. According to the benchmarking results page 1 provided by the authors, over 2 dozen companies have contributed their benchmark results

+

(Jules: this load the assets/lfw_vendor_results.csv)

+

In benchmarking, companies use a dataset to evaluate their algorithms which are typically trained on other data. After training, researchers will use LFW as a benchmark to compare results with other algorithms.

+

For example, Baidu (est. net worth $13B) uses LFW to report results for their "Targeting Ultimate Accuracy: Face Recognition via Deep Embedding". According to the three Baidu researchers who produced the paper:

+

LFW has been the most popular evaluation benchmark for face recognition, and played a very important role in facilitating the face recognition society to improve algorithm. 2.

+
+

Citations

+ + + + + + + + + + + + + + + + + + + + + + +
TitleOrganizationCountryType
3D-aided face recognition from videosUniversity of LyonFranceedu
A Community Detection Approach to Cleaning Extremely Large Face DatabaseNational University of Defense Technology, ChinaChinaedu
+

Conclusion

+

The LFW face recognition training and evaluation dataset is a historically important face dataset as it was the first popular dataset to be created entirely from Internet images, paving the way for a global trend towards downloading anyone’s face from the Internet and adding it to a dataset. As will be evident with other datasets, LFW’s approach has now become the norm.

+

For all the 5,000 people in this datasets, their face is forever a part of facial recognition history. It would be impossible to remove anyone from the dataset because it is so ubiquitous. For their rest of the lives and forever after, these 5,000 people will continue to be used for training facial recognition surveillance.

+
+
+
  1. "LFW Results". Accessed Dec 3, 2018. http://vis-www.cs.umass.edu/lfw/results.html

  2. +
  3. "Chinese tourist town uses face recognition as an entry pass". New Scientist. November 17, 2016. https://www.newscientist.com/article/2113176-chinese-tourist-town-uses-face-recognition-as-an-entry-pass/

  4. +
+
+
+ +
+ + + + \ No newline at end of file diff --git a/site/public/datasets/vgg_faces2/index.html b/site/public/datasets/vgg_faces2/index.html new file mode 100644 index 00000000..19efbbbc --- /dev/null +++ b/site/public/datasets/vgg_faces2/index.html @@ -0,0 +1,69 @@ + + + + MegaPixels + + + + + + + + + +
+ + +
MegaPixels
+ The Darkside of Datasets +
+ +
+
+ +
    +
  • Created 2007
  • +
  • Images 13,233
  • +
  • People 5,749
  • +
  • Created From Yahoo News images
  • +
  • Search available Searchable
  • +
+

Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to by researchers as “The Wild”.

+

Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.

+

INTRO

+

It began in 2002. Researchers at University of Massachusetts Amherst were developing algorithms for facial recognition and they needed more data. Between 2002-2004 they scraped Yahoo News for images of public figures. Two years later they cleaned up the dataset and repackaged it as Labeled Faces in the Wild (LFW).

+

Since then the LFW dataset has become one of the most widely used datasets used for evaluating face recognition algorithms. The associated research paper “Labeled Faces in the Wild: A Database for Studying Face Recognition in Unconstrained Environments” has been cited 996 times reaching 45 different countries throughout the world.

+

The faces come from news stories and are mostly celebrities from the entertainment industry, politicians, and villains. It’s a sampling of current affairs and breaking news that has come to pass. The images, detached from their original context now server a new purpose: to train, evaluate, and improve facial recognition.

+

As the most widely used facial recognition dataset, it can be said that each individual in LFW has, in a small way, contributed to the current state of the art in facial recognition surveillance. John Cusack, Julianne Moore, Barry Bonds, Osama bin Laden, and even Moby are amongst these biometric pillars, exemplar faces provided the visual dimensions of a new computer vision future.

+

Commercial Use

+

The dataset is used by numerous companies for benchmarking algorithms. According to the benchmarking results page 1 provided by the authors, there over 2 dozen commercial uses of the LFW face dataset.

+
+
+
  1. "LFW Results". Accessed Dec 3, 2018. http://vis-www.cs.umass.edu/lfw/results.html

  2. +
+
+
+ +
+ + + + \ No newline at end of file diff --git a/site/public/research/from_1_to_100_pixels/index.html b/site/public/research/from_1_to_100_pixels/index.html new file mode 100644 index 00000000..751e885b --- /dev/null +++ b/site/public/research/from_1_to_100_pixels/index.html @@ -0,0 +1,101 @@ + + + + MegaPixels + + + + + + + + + +
+ + +
MegaPixels
+ The Darkside of Datasets +
+ +
+
+ +
+

From 1 to 100 Pixels

+
+
+
Posted
+
2018-12-04
+
+
+
By
+
Adam Harvey
Berit Gilma
Matthew Stender
+
+ +
+
+ +

High resolution insights from low resolution data

+

This post will be about the meaning of "face". How do people define it? How to biometrics researchers define it? How has it changed during the last decade.

+

What can you know from a very small amount of information?

+
    +
  • 1 pixel grayscale
  • +
  • 2x2 pixels grayscale, font example
  • +
  • 4x4 pixels
  • +
  • 8x8 yotta yotta
  • +
  • 5x7 face recognition
  • +
  • 12x16 activity recognition
  • +
  • 6/5 (up to 124/106) pixels in height/width, and the average is 24/20 for QMUL SurvFace
  • +
  • 20x16 tiny faces paper
  • +
  • 20x20 MNIST handwritten images http://yann.lecun.com/exdb/mnist/
  • +
  • 24x24 haarcascade detector idealized images
  • +
  • 32x32 CIFAR image dataset
  • +
  • 40x40 can do emotion detection, face recognition at scale, 3d modeling of the face. include datasets with faces at this resolution including pedestrian.
  • +
  • need more material from 60-100
  • +
  • 60x60 show how texture emerges and pupils, eye color, higher resolution of features and compare to lower resolution faces
  • +
  • 100x100 0.5% of one Instagram photo
  • +
+

Find specific cases of facial resolution being used in legal cases, forensic investigations, or military footage

+

Research

+
    +
  • NIST report on sres states several resolutions
  • +
  • "Results show that the tested face recognition systems yielded similar performance for query sets with eye-to-eye distance from 60 pixels to 30 pixels" 1
  • +
+
+
+
  1. NIST 906932. Performance Assessment of Face Recognition Using Super-Resolution. Shuowen Hu, Robert Maschal, S. Susan Young, Tsai Hong Hong, Jonathon P. Phillips

  2. +
+
+
+ +
+

MORE RESEARCH

+
+ +
+
+ +
+ + + + \ No newline at end of file diff --git a/site/templates/blog.html b/site/templates/blog.html deleted file mode 100644 index b23b2f29..00000000 --- a/site/templates/blog.html +++ /dev/null @@ -1,22 +0,0 @@ -{% extends 'layout.html' %} - -{% block content %} -
-

{{ metadata.title }}

-
-
-
Posted
-
{{ metadata.date }}
-
-
-
By
-
{{ metadata.author }}
-
- {% if metadata.datasets %}
-
Datasets
-
{{ metadata.datasets }}
-
{% endif %} -
-
- {{ content }} -{% endblock %} diff --git a/site/templates/layout.html b/site/templates/layout.html index 304e804f..5b5833be 100644 --- a/site/templates/layout.html +++ b/site/templates/layout.html @@ -3,6 +3,9 @@ MegaPixels + + + @@ -17,7 +20,7 @@ @@ -27,13 +30,15 @@ diff --git a/site/templates/research.html b/site/templates/research.html new file mode 100644 index 00000000..4263f204 --- /dev/null +++ b/site/templates/research.html @@ -0,0 +1,35 @@ +{% extends 'layout.html' %} + +{% block content %} +
+

{{ metadata.title }}

+
+
+
Posted
+
{{ metadata.published }}
+
+
+
By
+
{{ metadata.authors }}
+
+ {% if metadata.datasets %}
+
Datasets
+
{{ metadata.datasets }}
+
{% endif %} +
+
+ + {{ content }} + +
+

MORE RESEARCH

+
+ {% for blogpost in blogposts %} +
+ {{ blogpost.title }} + {{ blogpost.date }} +
+ {% endfor %} +
+
+{% endblock %} -- cgit v1.2.3-70-g09d2 From 2a1b884e841efe562e0c84885a404819433b3405 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Wed, 5 Dec 2018 16:19:50 +0100 Subject: styling images --- builder/builder.py | 4 +- builder/parser.py | 57 +++++++++--- builder/s3.py | 4 +- site/assets/css/css.css | 88 ++++++++++++++++-- site/public/about/credits/index.html | 5 +- site/public/about/disclaimer/index.html | 2 +- site/public/about/index.html | 5 +- site/public/about/press/index.html | 5 +- site/public/about/privacy/index.html | 2 +- site/public/about/style/index.html | 23 ++--- site/public/about/terms/index.html | 2 +- site/public/datasets/lfw/index.html | 56 +++++++++--- site/public/datasets/vgg_faces2/index.html | 5 +- site/public/index.html | 63 +++++++++++++ .../research/01_from_1_to_100_pixels/index.html | 101 +++++++++++++++++++++ site/templates/research.html | 2 +- 16 files changed, 357 insertions(+), 67 deletions(-) create mode 100644 site/public/index.html create mode 100644 site/public/research/01_from_1_to_100_pixels/index.html (limited to 'site/assets/css/css.css') diff --git a/builder/builder.py b/builder/builder.py index 44fbd1c6..deb9eb68 100644 --- a/builder/builder.py +++ b/builder/builder.py @@ -40,11 +40,9 @@ def build_page(fn, research_posts): s3.sync_directory(dirname, s3_datasets_path, metadata) else: s3_path = "{}/{}/{}{}".format(os.getenv('S3_ENDPOINT'), os.getenv('S3_BUCKET'), s3_site_path, metadata['path']) - if 'index.md' in fn: + if 'index.md' in fn and metadata['url'] != '/': s3.sync_directory(dirname, s3_site_path, metadata) - print(s3_path) - content = parser.parse_markdown(sections, s3_path) html = template.render( diff --git a/builder/parser.py b/builder/parser.py index ea273556..529d21fa 100644 --- a/builder/parser.py +++ b/builder/parser.py @@ -1,4 +1,5 @@ import os +import re import glob import mistune from paths import * @@ -10,19 +11,28 @@ def fix_images(lines, s3_path): real_lines = [] block = "\n\n".join(lines) for line in block.split("\n"): - if "![" in line and "](http" not in line: - line = line.replace('](', '](' + s3_path) + if "![" in line: + print(line) + line = line.replace('![', '') + alt_text, tail = line.split('](', 1) + url, tail = tail.split(')', 1) + if ':' in alt_text: + tail, alt_text = alt_text.split(':', 1) + img_tag = "{}".format(s3_path + url, alt_text.replace("'", "")) + if len(alt_text): + line = "
{}
{}
".format(img_tag, alt_text) + else: + line = "
{}
".format(img_tag, alt_text) real_lines.append(line) return "\n".join(real_lines) -def wide_section(line, s3_path): - lines = fix_images(lines, s3_path) - return "
" + markdown(lines) + "
" - -def normal_section(lines, s3_path): +def format_section(lines, s3_path, type=''): if len(lines): lines = fix_images(lines, s3_path) - return "
" + markdown(lines) + "
" + if type: + return "
{}
".format(type, markdown(lines)) + else: + return "
" + markdown(lines) + "
" return "" def parse_markdown(sections, s3_path): @@ -31,13 +41,17 @@ def parse_markdown(sections, s3_path): for section in sections: if section.startswith('# '): continue - if '![wide]' in section: - groups.append(normal_section(current_group, s3_path)) - groups.append(wide_section([section], s3_path)) + if '![wide:' in section: + groups.append(format_section(current_group, s3_path)) + groups.append(format_section([section], s3_path, type='wide')) + current_group = [] + elif '![' in section: + groups.append(format_section(current_group, s3_path)) + groups.append(format_section([section], s3_path, type='images')) current_group = [] else: current_group.append(section) - groups.append(normal_section(current_group, s3_path)) + groups.append(format_section(current_group, s3_path)) content = "".join(groups) return content @@ -88,16 +102,22 @@ def parse_metadata(fn, sections): for key in default_metadata: if key not in metadata: metadata[key] = default_metadata[key] + + basedir = os.path.dirname(fn.replace(content_path, '')) basename = os.path.basename(fn) - metadata['path'] = os.path.dirname(fn.replace(content_path, '')) + '/' - if basename == 'index.md': + if basedir == '/': + metadata['path'] = '/' + metadata['url'] = '/' + elif basename == 'index.md': + metadata['path'] = basedir + '/' metadata['url'] = metadata['path'] else: + metadata['path'] = basedir + '/' metadata['url'] = metadata['path'] + basename.replace('.md', '') + '/' if metadata['status'] == 'published|draft|private': metadata['status'] = 'published' - metadata['authors'] = '
'.join(metadata['authors'].split(',')) + metadata['author_html'] = '
'.join(metadata['authors'].split(',')) return metadata, valid_sections def read_research_post_index(): @@ -107,5 +127,12 @@ def read_research_post_index(): if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft': continue posts.append(metadata) + if not len(posts): + posts.append({ + 'title': 'Placeholder', + 'slug': 'placeholder', + 'date': 'Placeholder', + 'url': '/', + }) return posts diff --git a/builder/s3.py b/builder/s3.py index 7d4d52a0..f3dcce48 100644 --- a/builder/s3.py +++ b/builder/s3.py @@ -32,14 +32,14 @@ def sync_directory(base_fn, s3_path, metadata): del fns[fn] if obj['LastModified'].timestamp() < os.path.getmtime(os.path.join(local_fn)): print("s3 update {}".format(s3_fn)) - client.upload_file( + s3_client.upload_file( local_fn, os.getenv('S3_BUCKET'), s3_fn, ExtraArgs={ 'ACL': 'public-read' }) else: print("s3 delete {}".format(s3_fn)) - response = client.delete_object( + response = s3_client.delete_object( Bucket=os.getenv('S3_BUCKET'), Key=s3_fn, ) diff --git a/site/assets/css/css.css b/site/assets/css/css.css index c9d9b029..1024ffcd 100644 --- a/site/assets/css/css.css +++ b/site/assets/css/css.css @@ -120,14 +120,14 @@ h1 { margin: 75px 0 10px; padding: 0; } -h3 { - margin: 0 0 10px 0; +h2, h3 { + margin: 0 0 20px 0; padding: 0; font-size: 11pt; font-weight: 500; } -th, .gray, h3 { +th, .gray, h2, h3 { font-family: 'Roboto Mono', monospace; font-weight: 400; text-transform: uppercase; @@ -165,13 +165,89 @@ p { color: #fff; } code { - display: block; font-family: 'Roboto Mono', monospace; font-size: 9pt; + padding: 2px 4px; + background: rgba(255,255,255,0.1); +} +pre code { + display: block; max-height: 400px; max-width: 640px; - padding: 2px 5px; - background: rgba(255,255,255,0.1); +} +hr { + height: 1px; + background: #888; + border: 0; + width: 80px; +} +.footnotes hr { + display: none; +} +.footnotes ol:before { + content: 'Footnotes'; + margin: 0 0 10px -40px; + padding-bottom: 0; + display: block; + font-family: 'Roboto Mono', monospace; + font-weight: 400; + text-transform: uppercase; + color: #666; + font-size: 11pt; +} + +/* images */ + +section img { + max-width: 100%; + display: block; + margin: 0 auto; +} +section .image { + margin-bottom: 40px; +} +section.images { + display: flex; + flex-direction: row; + align-items: flex-start; + justify-content: center; +} +.image:only-child { + width: 100%; +} +.image:first-child { + margin-left: 0; +} +.image:nth-child(2), +.image:nth-child(3) { + margin-left: 40px; +} +.image:first-child:nth-last-child(2), +.image:first-child:nth-last-child(2) ~ .image { + width: 300px; +} +.image:first-child:nth-last-child(3), +.image:first-child:nth-last-child(3) ~ .image { + width: 186px; +} +section.wide { + width: 100%; +} +section.wide .image { + max-width: 100%; +} +.caption { + text-align: center; + font-size: 9pt; + color: #888; + max-width: 620px; + margin: 10px auto 0 auto; +} + +blockquote { + margin-left: 28px; + padding: 0 0 0 10px; + border-left: 2px solid #555; } /* top of post metadata */ diff --git a/site/public/about/credits/index.html b/site/public/about/credits/index.html index 0b3f9db8..9fec7e64 100644 --- a/site/public/about/credits/index.html +++ b/site/public/about/credits/index.html @@ -20,15 +20,14 @@

Credits

-

alt text

-
    +
alt text
alt text
  • MegaPixels by Adam Harvey
  • Made with support from Mozilla
  • Site developed by Jules Laplace
  • diff --git a/site/public/about/disclaimer/index.html b/site/public/about/disclaimer/index.html index 1c14a97c..553bf084 100644 --- a/site/public/about/disclaimer/index.html +++ b/site/public/about/disclaimer/index.html @@ -20,7 +20,7 @@ diff --git a/site/public/about/index.html b/site/public/about/index.html index 8441e317..363e8fc0 100644 --- a/site/public/about/index.html +++ b/site/public/about/index.html @@ -20,14 +20,13 @@
    -

    alt text

    -
      +
      alt text
      alt text
      • MegaPixels by Adam Harvey
      • Made with support from Mozilla
      • Site developed by Jules Laplace
      • diff --git a/site/public/about/press/index.html b/site/public/about/press/index.html index 76ba90e4..aa6e5e13 100644 --- a/site/public/about/press/index.html +++ b/site/public/about/press/index.html @@ -20,15 +20,14 @@

        Press

        -

        alt text

        -
          +
        alt text
        alt text
        • Aug 22, 2018: "Transgender YouTubers had their videos grabbed to train facial recognition software" by James Vincent https://www.theverge.com/2017/8/22/16180080/transgender-youtubers-ai-facial-recognition-dataset
        • Aug 22, 2018: "Transgender YouTubers had their videos grabbed to train facial recognition software" by James Vincent https://www.theverge.com/2017/8/22/16180080/transgender-youtubers-ai-facial-recognition-dataset
        • Aug 22, 2018: "Transgender YouTubers had their videos grabbed to train facial recognition software" by James Vincent https://www.theverge.com/2017/8/22/16180080/transgender-youtubers-ai-facial-recognition-dataset
        • diff --git a/site/public/about/privacy/index.html b/site/public/about/privacy/index.html index 21fd2255..d1ec1c77 100644 --- a/site/public/about/privacy/index.html +++ b/site/public/about/privacy/index.html @@ -20,7 +20,7 @@ diff --git a/site/public/about/style/index.html b/site/public/about/style/index.html index 2e0c80d0..24e6f5be 100644 --- a/site/public/about/style/index.html +++ b/site/public/about/style/index.html @@ -20,21 +20,19 @@
          -

          Alt text here

          -

          Header 1

          -

          Header 2

          +
          Alt text here
          Alt text here

          Header 2

          Header 3

          Header 4

          Header 5
          Header 6

          Bold text, italic text, bold italic text

          -

          At vero eos et et iusto qui blanditiis praesentium voluptatum deleniti atque corrupti1, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non-provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem rerum facilis est et expedita distinctio2. Nam libero tempore, cum soluta nobis est eligendi optio, cumque nihil impedit, quo minus id, quod maxime placeat, facere possimus, omnis voluptas assumenda est, omnis dolor repellendus3.

          +

          At vero eos et et iusto qui blanditiis praesentium voluptatum deleniti atque corrupti[^1], quos dolores et quas molestias excepturi sint, obcaecati cupiditate non-provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem rerum facilis est et expedita distinctio[^2]. Nam libero tempore, cum soluta nobis est eligendi optio, cumque nihil impedit, quo minus id, quod maxime placeat, facere possimus, omnis voluptas assumenda est, omnis dolor repellendus[^3].

          • Sed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium doloremque laudantium
          • Totam rem aperiam eaque ipsa, quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt, explicabo
          • @@ -42,9 +40,15 @@
          • Odit aut fugit, sed quia consequuntur magni dolores eos
          • Qui ratione voluptatem sequi nesciunt, neque porro quisquam
          -

          est, qui dolorem ipsum, quia dolor sit amet consectetur adipisci[ng] velit, sed quia non-numquam [do] eius modi tempora inci[di]dunt, ut labore et dolore magnam aliquam quaerat voluptatem.

          +

          single image test

          +
          This person is alone
          This person is alone

          double image test

          +
          This person is on the left
          This person is on the left
          +
          This person is on the right
          This person is on the right

          triple image test

          +
          Person 1
          Person 1
          +
          Person 2
          Person 2
          +
          Person 3. Let me tell you about Person 3.  This person has a very long description with text which wraps like crazy
          Person 3. Let me tell you about Person 3. This person has a very long description with text which wraps like crazy

          est, qui dolorem ipsum, quia dolor sit amet consectetur adipisci[ng] velit, sed quia non-numquam [do] eius modi tempora inci[di]dunt, ut labore et dolore magnam aliquam quaerat voluptatem.

          -

          Inline code has back-ticks around it.

          +
          This image is extremely wide and the text beneath it will wrap but thats fine because it can also contain <a href="https://example.com/">hyperlinks</a>! Yes, you read that right—hyperlinks! Lorem ipsum dolor sit amet ad volotesque sic hoc ad nauseam
          This image is extremely wide and the text beneath it will wrap but that's fine because it can also contain hyperlinks! Yes, you read that right—hyperlinks! Lorem ipsum dolor sit amet ad volotesque sic hoc ad nauseam

          Inline code has back-ticks around it.

          var s = "JavaScript syntax highlighting";
           alert(s);
           
          @@ -59,10 +63,7 @@ But let's throw in a <b>tag</b>.

          Citations below here


          -
          1. First source

          2. -
          3. Second source

          4. -
          5. Third source

          6. -
          +
            diff --git a/site/public/about/terms/index.html b/site/public/about/terms/index.html index 73155546..4b9f4445 100644 --- a/site/public/about/terms/index.html +++ b/site/public/about/terms/index.html @@ -20,7 +20,7 @@ diff --git a/site/public/datasets/lfw/index.html b/site/public/datasets/lfw/index.html index 8455bc60..a130c24e 100644 --- a/site/public/datasets/lfw/index.html +++ b/site/public/datasets/lfw/index.html @@ -4,7 +4,7 @@ MegaPixels - + @@ -20,7 +20,7 @@ @@ -31,17 +31,15 @@
          1. Images 13,233
          2. People 5,749
          3. Created From Yahoo News images
          4. -
          5. Search available Searchable
          6. +
          7. Analyzed and searchable
          -

          Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to by researchers as “The Wild”.

          -

          Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.

          -

          INTRO

          +

          Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to as “The Wild”.

          +
          Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.
          Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.

          INTRO

          It began in 2002. Researchers at University of Massachusetts Amherst were developing algorithms for facial recognition and they needed more data. Between 2002-2004 they scraped Yahoo News for images of public figures. Two years later they cleaned up the dataset and repackaged it as Labeled Faces in the Wild (LFW).

          Since then the LFW dataset has become one of the most widely used datasets used for evaluating face recognition algorithms. The associated research paper “Labeled Faces in the Wild: A Database for Studying Face Recognition in Unconstrained Environments” has been cited 996 times reaching 45 different countries throughout the world.

          The faces come from news stories and are mostly celebrities from the entertainment industry, politicians, and villains. It’s a sampling of current affairs and breaking news that has come to pass. The images, detached from their original context now server a new purpose: to train, evaluate, and improve facial recognition.

          As the most widely used facial recognition dataset, it can be said that each individual in LFW has, in a small way, contributed to the current state of the art in facial recognition surveillance. John Cusack, Julianne Moore, Barry Bonds, Osama bin Laden, and even Moby are amongst these biometric pillars, exemplar faces provided the visual dimensions of a new computer vision future.

          -

          From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset

          -

          In addition to commercial use as an evaluation tool, alll of the faces in LFW dataset are prepackaged into a popular machine learning code framework called scikit-learn.

          +
          From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset
          From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset

          In addition to commercial use as an evaluation tool, alll of the faces in LFW dataset are prepackaged into a popular machine learning code framework called scikit-learn.

          Usage

          #!/usr/bin/python
           from matplotlib import plt
          @@ -51,11 +49,39 @@ lfw_person = lfw_people[0]
           plt.imshow(lfw_person)
           

          Commercial Use

          -

          The LFW dataset is used by numerous companies for benchmarking algorithms and in some cases training. According to the benchmarking results page 1 provided by the authors, over 2 dozen companies have contributed their benchmark results

          -

          (Jules: this load the assets/lfw_vendor_results.csv)

          -

          In benchmarking, companies use a dataset to evaluate their algorithms which are typically trained on other data. After training, researchers will use LFW as a benchmark to compare results with other algorithms.

          +

          The LFW dataset is used by numerous companies for benchmarking algorithms and in some cases training. According to the benchmarking results page [^lfw_results] provided by the authors, over 2 dozen companies have contributed their benchmark results

          +
          load file: lfw_commercial_use.csv
          +name_display,company_url,example_url,country,description
          +
          + + + + + + + + + + + + + + + + + + + + + + + + +
          CompanyCountryIndustries
          AratekChinaBiometric sensors for telecom, civil identification, finance, education, POS, and transportation
          AratekChinaBiometric sensors for telecom, civil identification, finance, education, POS, and transportation
          AratekChinaBiometric sensors for telecom, civil identification, finance, education, POS, and transportation
          +

          Add 2-4 screenshots of companies mentioning LFW here

          +
          ReadSense
          ReadSense

          In benchmarking, companies use a dataset to evaluate their algorithms which are typically trained on other data. After training, researchers will use LFW as a benchmark to compare results with other algorithms.

          For example, Baidu (est. net worth $13B) uses LFW to report results for their "Targeting Ultimate Accuracy: Face Recognition via Deep Embedding". According to the three Baidu researchers who produced the paper:

          -

          LFW has been the most popular evaluation benchmark for face recognition, and played a very important role in facilitating the face recognition society to improve algorithm. 2.

          +

          LFW has been the most popular evaluation benchmark for face recognition, and played a very important role in facilitating the face recognition society to improve algorithm. 1.

          Citations

          @@ -84,10 +110,12 @@ plt.imshow(lfw_person)

          Conclusion

          The LFW face recognition training and evaluation dataset is a historically important face dataset as it was the first popular dataset to be created entirely from Internet images, paving the way for a global trend towards downloading anyone’s face from the Internet and adding it to a dataset. As will be evident with other datasets, LFW’s approach has now become the norm.

          For all the 5,000 people in this datasets, their face is forever a part of facial recognition history. It would be impossible to remove anyone from the dataset because it is so ubiquitous. For their rest of the lives and forever after, these 5,000 people will continue to be used for training facial recognition surveillance.

          +

          Notes

          +

          According to BiometricUpdate.com2, LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong."


          -
          1. "LFW Results". Accessed Dec 3, 2018. http://vis-www.cs.umass.edu/lfw/results.html

          2. -
          3. "Chinese tourist town uses face recognition as an entry pass". New Scientist. November 17, 2016. https://www.newscientist.com/article/2113176-chinese-tourist-town-uses-face-recognition-as-an-entry-pass/

          4. +
            1. "Chinese tourist town uses face recognition as an entry pass". New Scientist. November 17, 2016. https://www.newscientist.com/article/2113176-chinese-tourist-town-uses-face-recognition-as-an-entry-pass/

            2. +
            3. "PING AN Tech facial recognition receives high score in latest LFW test results". https://www.biometricupdate.com/201702/ping-an-tech-facial-recognition-receives-high-score-in-latest-lfw-test-results

          diff --git a/site/public/datasets/vgg_faces2/index.html b/site/public/datasets/vgg_faces2/index.html index 19efbbbc..ee353047 100644 --- a/site/public/datasets/vgg_faces2/index.html +++ b/site/public/datasets/vgg_faces2/index.html @@ -20,7 +20,7 @@ @@ -34,8 +34,7 @@
        • Search available Searchable
        • Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to by researchers as “The Wild”.

          -

          Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.

          -

          INTRO

          +
          Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.
          Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.

          INTRO

          It began in 2002. Researchers at University of Massachusetts Amherst were developing algorithms for facial recognition and they needed more data. Between 2002-2004 they scraped Yahoo News for images of public figures. Two years later they cleaned up the dataset and repackaged it as Labeled Faces in the Wild (LFW).

          Since then the LFW dataset has become one of the most widely used datasets used for evaluating face recognition algorithms. The associated research paper “Labeled Faces in the Wild: A Database for Studying Face Recognition in Unconstrained Environments” has been cited 996 times reaching 45 different countries throughout the world.

          The faces come from news stories and are mostly celebrities from the entertainment industry, politicians, and villains. It’s a sampling of current affairs and breaking news that has come to pass. The images, detached from their original context now server a new purpose: to train, evaluate, and improve facial recognition.

          diff --git a/site/public/index.html b/site/public/index.html new file mode 100644 index 00000000..ea3dc24c --- /dev/null +++ b/site/public/index.html @@ -0,0 +1,63 @@ + + + + MegaPixels + + + + + + + + + +
          + + +
          MegaPixels
          + The Darkside of Datasets +
          + +
          +
          + +

          MegaPixels is an art project that explores the dark side of face recognition training data and the future of computer vision

          +

          Made by Adam Harvey in partnership with Mozilla.
          +Read more about MegaPixels

          +

          [Explore Datasets] [Explore Algorithms]

          +

          Facial Recognition Datasets

          +

          Regular Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

          +

          Summary

          +
            +
          • 275 datasets found
          • +
          • Created between the years 1993-2018
          • +
          • Smallest dataset: 20 images
          • +
          • Largest dataset: 10,000,000 images
          • +
          • Highest resolution faces: 450x500 (Unconstrained College Students)
          • +
          • Lowest resolution faces: 16x20 pixels (QMUL SurvFace)
          • +
          +
          + +
          + + + + \ No newline at end of file diff --git a/site/public/research/01_from_1_to_100_pixels/index.html b/site/public/research/01_from_1_to_100_pixels/index.html new file mode 100644 index 00000000..90f142e9 --- /dev/null +++ b/site/public/research/01_from_1_to_100_pixels/index.html @@ -0,0 +1,101 @@ + + + + MegaPixels + + + + + + + + + +
          + + +
          MegaPixels
          + The Darkside of Datasets +
          + +
          +
          + +
          +

          From 1 to 100 Pixels

          +
          +
          +
          Posted
          +
          2018-12-04
          +
          +
          +
          By
          +
          Adam Harvey
          +
          + +
          +
          + +

          High resolution insights from low resolution data

          +

          This post will be about the meaning of "face". How do people define it? How to biometrics researchers define it? How has it changed during the last decade.

          +

          What can you know from a very small amount of information?

          +
            +
          • 1 pixel grayscale
          • +
          • 2x2 pixels grayscale, font example
          • +
          • 4x4 pixels
          • +
          • 8x8 yotta yotta
          • +
          • 5x7 face recognition
          • +
          • 12x16 activity recognition
          • +
          • 6/5 (up to 124/106) pixels in height/width, and the average is 24/20 for QMUL SurvFace
          • +
          • 20x16 tiny faces paper
          • +
          • 20x20 MNIST handwritten images http://yann.lecun.com/exdb/mnist/
          • +
          • 24x24 haarcascade detector idealized images
          • +
          • 32x32 CIFAR image dataset
          • +
          • 40x40 can do emotion detection, face recognition at scale, 3d modeling of the face. include datasets with faces at this resolution including pedestrian.
          • +
          • need more material from 60-100
          • +
          • 60x60 show how texture emerges and pupils, eye color, higher resolution of features and compare to lower resolution faces
          • +
          • 100x100 0.5% of one Instagram photo
          • +
          +

          Find specific cases of facial resolution being used in legal cases, forensic investigations, or military footage

          +

          Research

          +
            +
          • NIST report on sres states several resolutions
          • +
          • "Results show that the tested face recognition systems yielded similar performance for query sets with eye-to-eye distance from 60 pixels to 30 pixels" 1
          • +
          +
          +
          +
          1. NIST 906932. Performance Assessment of Face Recognition Using Super-Resolution. Shuowen Hu, Robert Maschal, S. Susan Young, Tsai Hong Hong, Jonathon P. Phillips

          2. +
          +
          +
          + +
          +

          MORE RESEARCH

          +
          + +
          +
          + +
          + + + + \ No newline at end of file diff --git a/site/templates/research.html b/site/templates/research.html index 4263f204..22e494c2 100644 --- a/site/templates/research.html +++ b/site/templates/research.html @@ -10,7 +10,7 @@
          By
          -
          {{ metadata.authors }}
          +
          {{ metadata.author_html }}
          {% if metadata.datasets %}
          Datasets
          -- cgit v1.2.3-70-g09d2 From 03ed12b471c1e50ae531c46fcbf5afd06ca5432b Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Wed, 5 Dec 2018 18:23:32 +0100 Subject: build --- builder/builder.py | 37 ++++-- builder/parser.py | 46 ++++++- builder/s3.py | 6 + site/assets/css/css.css | 65 +++++++--- site/assets/css/fonts.css | 18 ++- site/assets/js/app/site.js | 7 + site/public/about/credits/index.html | 8 +- site/public/about/disclaimer/index.html | 8 +- site/public/about/index.html | 11 +- site/public/about/press/index.html | 8 +- site/public/about/privacy/index.html | 9 +- site/public/about/style/index.html | 12 +- site/public/about/terms/index.html | 8 +- site/public/datasets/lfw/index.html | 20 +-- site/public/datasets/lfw/what/index.html | 141 +++++++++++++++++++++ site/public/datasets/vgg_faces2/index.html | 20 +-- site/public/index.html | 12 +- .../research/01_from_1_to_100_pixels/index.html | 15 +-- site/public/research/index.html | 50 ++++++++ site/templates/layout.html | 8 +- site/templates/research.html | 12 -- 21 files changed, 391 insertions(+), 130 deletions(-) create mode 100644 site/public/datasets/lfw/what/index.html create mode 100644 site/public/research/index.html (limited to 'site/assets/css/css.css') diff --git a/builder/builder.py b/builder/builder.py index deb9eb68..0e404b88 100644 --- a/builder/builder.py +++ b/builder/builder.py @@ -29,21 +29,25 @@ def build_page(fn, research_posts): output_path = public_path + metadata['url'] output_fn = os.path.join(output_path, "index.html") + is_research = False + if 'research/' in fn: + is_research = True template = env.get_template("research.html") else: template = env.get_template("page.html") - if 'datasets' in fn: - s3_path = "{}/{}/{}{}".format(os.getenv('S3_ENDPOINT'), os.getenv('S3_BUCKET'), s3_datasets_path, metadata['path']) - if 'index.md' in fn: - s3.sync_directory(dirname, s3_datasets_path, metadata) + if 'datasets/' in fn: + s3_dir = s3_datasets_path else: - s3_path = "{}/{}/{}{}".format(os.getenv('S3_ENDPOINT'), os.getenv('S3_BUCKET'), s3_site_path, metadata['path']) - if 'index.md' in fn and metadata['url'] != '/': - s3.sync_directory(dirname, s3_site_path, metadata) + s3_dir = s3_site_path + + s3_path = s3.make_s3_path(s3_dir, metadata['path']) + + if 'index.md' in fn: + s3.sync_directory(dirname, s3_dir, metadata) - content = parser.parse_markdown(sections, s3_path) + content = parser.parse_markdown(sections, s3_path, skip_h1=is_research) html = template.render( metadata=metadata, @@ -58,10 +62,27 @@ def build_page(fn, research_posts): print("______") +def build_research_index(research_posts): + metadata, sections = parser.read_metadata('../site/content/research/index.md') + template = env.get_template("page.html") + s3_path = s3.make_s3_path(s3_site_path, metadata['path']) + content = parser.parse_markdown(sections, s3_path, skip_h1=False) + content += parser.parse_research_index(research_posts) + html = template.render( + metadata=metadata, + content=content, + research_posts=research_posts, + latest_research_post=research_posts[-1], + ) + output_fn = public_path + '/research/index.html' + with open(output_fn, "w") as file: + file.write(html) + def build_site(): research_posts = parser.read_research_post_index() for fn in glob.iglob(os.path.join(content_path, "**/*.md"), recursive=True): build_page(fn, research_posts) + build_research_index(research_posts) if __name__ == '__main__': build_site() diff --git a/builder/parser.py b/builder/parser.py index 529d21fa..da3044a0 100644 --- a/builder/parser.py +++ b/builder/parser.py @@ -2,6 +2,8 @@ import os import re import glob import mistune + +import s3 from paths import * renderer = mistune.Renderer(escape=False) @@ -12,7 +14,6 @@ def fix_images(lines, s3_path): block = "\n\n".join(lines) for line in block.split("\n"): if "![" in line: - print(line) line = line.replace('![', '') alt_text, tail = line.split('](', 1) url, tail = tail.split(')', 1) @@ -35,13 +36,26 @@ def format_section(lines, s3_path, type=''): return "
          " + markdown(lines) + "
          " return "" -def parse_markdown(sections, s3_path): +def format_metadata(section): + meta = [] + for line in section.split('\n'): + key, value = line[2:].split(': ', 1) + meta.append("
          {}
          {}
          ".format(key, value)) + return "
          {}
          ".format(''.join(meta)) + +def parse_markdown(sections, s3_path, skip_h1=False): groups = [] current_group = [] + seen_metadata = False for section in sections: - if section.startswith('# '): + if skip_h1 and section.startswith('# '): continue - if '![wide:' in section: + elif section.startswith('+ ') and not seen_metadata: + groups.append(format_section(current_group, s3_path)) + groups.append(format_metadata(section)) + current_group = [] + seen_metadata = True + elif '![wide:' in section: groups.append(format_section(current_group, s3_path)) groups.append(format_section([section], s3_path, type='wide')) current_group = [] @@ -55,6 +69,23 @@ def parse_markdown(sections, s3_path): content = "".join(groups) return content +def parse_research_index(research_posts): + content = "
          " + for post in research_posts: + s3_path = s3.make_s3_path(s3_site_path, post['path']) + if 'image' in post: + post_image = s3_path + post['image'] + else: + post_image = '' + row = "
          Research post

          {}

          {}

          ".format( + post['path'], + post_image, + post['title'], + post['tagline']) + content += row + content += '
          ' + return content + def read_metadata(fn): with open(fn, "r") as file: data = file.read() @@ -74,6 +105,8 @@ default_metadata = { 'published': '2018-12-31', 'updated': '2018-12-31', 'authors': 'Adam Harvey', + 'sync': 'true', + 'tagline': '', } def parse_metadata_section(metadata, section): @@ -117,12 +150,15 @@ def parse_metadata(fn, sections): if metadata['status'] == 'published|draft|private': metadata['status'] = 'published' + + metadata['sync'] = metadata['sync'] != 'false' + metadata['author_html'] = '
          '.join(metadata['authors'].split(',')) return metadata, valid_sections def read_research_post_index(): posts = [] - for fn in sorted(glob.glob(os.path.join(content_path, 'research/**/index.md'), recursive=True)): + for fn in sorted(glob.glob('../site/content/research/*/index.md')): metadata, valid_sections = read_metadata(fn) if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft': continue diff --git a/builder/s3.py b/builder/s3.py index f3dcce48..41ecdf61 100644 --- a/builder/s3.py +++ b/builder/s3.py @@ -18,6 +18,9 @@ def sync_directory(base_fn, s3_path, metadata): for fn in glob.glob(os.path.join(base_fn, 'assets/*')): fns[os.path.basename(fn)] = True + if not metadata['sync']: + return + remote_path = s3_path + metadata['url'] directory = s3_client.list_objects(Bucket=os.getenv('S3_BUCKET'), Prefix=remote_path) @@ -53,3 +56,6 @@ def sync_directory(base_fn, s3_path, metadata): os.getenv('S3_BUCKET'), s3_fn, ExtraArgs={ 'ACL': 'public-read' }) + +def make_s3_path(s3_dir, metadata_path): + return "{}/{}/{}{}".format(os.getenv('S3_ENDPOINT'), os.getenv('S3_BUCKET'), s3_dir, metadata_path) diff --git a/site/assets/css/css.css b/site/assets/css/css.css index 1024ffcd..843809a8 100644 --- a/site/assets/css/css.css +++ b/site/assets/css/css.css @@ -164,16 +164,46 @@ p { .content a:hover { color: #fff; } + +/* top of post metadata */ + +.meta { + display: flex; + flex-direction: row; + justify-content: flex-start; + align-items: flex-start; + font-size: 10pt; + margin-bottom: 20px; +} +.meta > div { + margin-right: 30px; +} +.meta .gray { + font-size: 9pt; + padding-bottom: 4px; +} + +/* misc formatting */ + code { font-family: 'Roboto Mono', monospace; font-size: 9pt; padding: 2px 4px; background: rgba(255,255,255,0.1); } +pre { + margin: 0 0 40px 0; + border: 1px solid #666; + border-radius: 2px; +} pre code { display: block; max-height: 400px; max-width: 640px; + padding: 4px 10px; +} +table { + margin-bottom: 40px; } hr { height: 1px; @@ -181,6 +211,14 @@ hr { border: 0; width: 80px; } +blockquote { + margin-left: 28px; + padding: 0 0 0 10px; + border-left: 2px solid #555; +} + +/* footnotes */ + .footnotes hr { display: none; } @@ -243,29 +281,14 @@ section.wide .image { max-width: 620px; margin: 10px auto 0 auto; } - -blockquote { - margin-left: 28px; - padding: 0 0 0 10px; - border-left: 2px solid #555; -} - -/* top of post metadata */ - -.meta { - display: flex; - flex-direction: row; - justify-content: flex-start; - align-items: flex-start; - font-size: 10pt; - margin-bottom: 20px; +.research_index { + margin-top: 40px; } -.meta > div { - margin-right: 30px; +.research_index a { + text-decoration: none; } -.meta .gray { - font-size: 9pt; - padding-bottom: 4px; +.research_index h1 { + margin-top: 20px; } /* blogpost index */ diff --git a/site/assets/css/fonts.css b/site/assets/css/fonts.css index 2195c70b..8db01fbd 100644 --- a/site/assets/css/fonts.css +++ b/site/assets/css/fonts.css @@ -2,34 +2,40 @@ font-family: 'Roboto'; font-style: normal; font-weight: 300; - src: url("../fonts/Roboto_300.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_300.woff") format("woff"), url("../fonts/Roboto_300.woff2") format("woff2"), url("../fonts/Roboto_300.svg#Roboto") format("svg"), url("../fonts/Roboto_300.ttf") format("truetype"); } + src: url("../fonts/Roboto_300.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_300.woff") format("woff"), url("../fonts/Roboto_300.woff2") format("woff2"), url("../fonts/Roboto_300.svg#Roboto") format("svg"), url("../fonts/Roboto_300.ttf") format("truetype"); +} @font-face { font-family: 'Roboto'; font-style: normal; font-weight: 400; - src: url("../fonts/Roboto_400.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_400.woff") format("woff"), url("../fonts/Roboto_400.woff2") format("woff2"), url("../fonts/Roboto_400.svg#Roboto") format("svg"), url("../fonts/Roboto_400.ttf") format("truetype"); } + src: url("../fonts/Roboto_400.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_400.woff") format("woff"), url("../fonts/Roboto_400.woff2") format("woff2"), url("../fonts/Roboto_400.svg#Roboto") format("svg"), url("../fonts/Roboto_400.ttf") format("truetype"); +} @font-face { font-family: 'Roboto'; font-style: normal; font-weight: 500; - src: url("../fonts/Roboto_500.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_500.woff") format("woff"), url("../fonts/Roboto_500.woff2") format("woff2"), url("../fonts/Roboto_500.svg#Roboto") format("svg"), url("../fonts/Roboto_500.ttf") format("truetype"); } + src: url("../fonts/Roboto_500.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_500.woff") format("woff"), url("../fonts/Roboto_500.woff2") format("woff2"), url("../fonts/Roboto_500.svg#Roboto") format("svg"), url("../fonts/Roboto_500.ttf") format("truetype"); +} @font-face { font-family: 'Roboto Mono'; font-style: normal; font-weight: 300; - src: url("../fonts/Roboto_Mono_300.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_Mono_300.woff") format("woff"), url("../fonts/Roboto_Mono_300.woff2") format("woff2"), url("../fonts/Roboto_Mono_300.svg#RobotoMono") format("svg"), url("../fonts/Roboto_Mono_300.ttf") format("truetype"); } + src: url("../fonts/Roboto_Mono_300.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_Mono_300.woff") format("woff"), url("../fonts/Roboto_Mono_300.woff2") format("woff2"), url("../fonts/Roboto_Mono_300.svg#RobotoMono") format("svg"), url("../fonts/Roboto_Mono_300.ttf") format("truetype"); +} @font-face { font-family: 'Roboto Mono'; font-style: normal; font-weight: 400; - src: url("../fonts/Roboto_Mono_400.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_Mono_400.woff") format("woff"), url("../fonts/Roboto_Mono_400.woff2") format("woff2"), url("../fonts/Roboto_Mono_400.svg#RobotoMono") format("svg"), url("../fonts/Roboto_Mono_400.ttf") format("truetype"); } + src: url("../fonts/Roboto_Mono_400.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_Mono_400.woff") format("woff"), url("../fonts/Roboto_Mono_400.woff2") format("woff2"), url("../fonts/Roboto_Mono_400.svg#RobotoMono") format("svg"), url("../fonts/Roboto_Mono_400.ttf") format("truetype"); +} @font-face { font-family: 'Roboto Mono'; font-style: normal; font-weight: 500; - src: local("Roboto-Mono Medium"), local("RobotoMono-Medium"), url("../fonts/Roboto_Mono_500.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_Mono_500.woff") format("woff"), url("../fonts/Roboto_Mono_500.woff2") format("woff2"), url("../fonts/Roboto_Mono_500.svg#RobotoMono") format("svg"), url("../fonts/Roboto_Mono_500.ttf") format("truetype"); } + src: local("Roboto-Mono Medium"), local("RobotoMono-Medium"), url("../fonts/Roboto_Mono_500.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_Mono_500.woff") format("woff"), url("../fonts/Roboto_Mono_500.woff2") format("woff2"), url("../fonts/Roboto_Mono_500.svg#RobotoMono") format("svg"), url("../fonts/Roboto_Mono_500.ttf") format("truetype"); +} diff --git a/site/assets/js/app/site.js b/site/assets/js/app/site.js index 04c0c495..12bee3ec 100644 --- a/site/assets/js/app/site.js +++ b/site/assets/js/app/site.js @@ -7,6 +7,8 @@ const isDesktop = !isMobile const htmlClassList = document.body.parentNode.classList htmlClassList.add(isDesktop ? 'desktop' : 'mobile') +function toArray(A) { return Array.prototype.slice.apply(A) } + var site = (function(){ var site = {} site.init = function(){ @@ -17,6 +19,11 @@ var site = (function(){ if (paras.length) { paras[0].classList.add('first_paragraph') } + toArray(document.querySelectorAll('header .links a')).forEach(tag => { + if (window.location.href.match(tag.href)) { + tag.classList.add('active') + } + }) } site.init() })() \ No newline at end of file diff --git a/site/public/about/credits/index.html b/site/public/about/credits/index.html index 9fec7e64..f1a28b0e 100644 --- a/site/public/about/credits/index.html +++ b/site/public/about/credits/index.html @@ -18,10 +18,10 @@ The Darkside of Datasets
          diff --git a/site/public/about/disclaimer/index.html b/site/public/about/disclaimer/index.html index 553bf084..5df5d656 100644 --- a/site/public/about/disclaimer/index.html +++ b/site/public/about/disclaimer/index.html @@ -18,10 +18,10 @@ The Darkside of Datasets
          diff --git a/site/public/about/index.html b/site/public/about/index.html index 363e8fc0..f1a28b0e 100644 --- a/site/public/about/index.html +++ b/site/public/about/index.html @@ -18,15 +18,16 @@ The Darkside of Datasets
          -
          alt text
          alt text
            +

            Credits

            +
            alt text
            alt text
            • MegaPixels by Adam Harvey
            • Made with support from Mozilla
            • Site developed by Jules Laplace
            • diff --git a/site/public/about/press/index.html b/site/public/about/press/index.html index aa6e5e13..e5763036 100644 --- a/site/public/about/press/index.html +++ b/site/public/about/press/index.html @@ -18,10 +18,10 @@ The Darkside of Datasets
              diff --git a/site/public/about/privacy/index.html b/site/public/about/privacy/index.html index d1ec1c77..7ad9564f 100644 --- a/site/public/about/privacy/index.html +++ b/site/public/about/privacy/index.html @@ -18,10 +18,10 @@ The Darkside of Datasets
              @@ -84,7 +84,6 @@ megapixels.cc will take all steps reasonably necessary to ensure that your data

              Disclosure Of Data

              Legal Requirements

              megapixels.cc may disclose your Personal Data in the good faith belief that such action is necessary to:

              -

                • To comply with a legal obligation
                • To protect and defend the rights or property of megapixels.cc
                • diff --git a/site/public/about/style/index.html b/site/public/about/style/index.html index 24e6f5be..eea861ac 100644 --- a/site/public/about/style/index.html +++ b/site/public/about/style/index.html @@ -18,15 +18,17 @@ The Darkside of Datasets
                  -
                  Alt text here
                  Alt text here

                  Header 2

                  +

                  Style Examples

                  +
                  Alt text here
                  Alt text here

                  Header 1

                  +

                  Header 2

                  Header 3

                  Header 4

                  Header 5
                  diff --git a/site/public/about/terms/index.html b/site/public/about/terms/index.html index 4b9f4445..db8b9e57 100644 --- a/site/public/about/terms/index.html +++ b/site/public/about/terms/index.html @@ -18,10 +18,10 @@ The Darkside of Datasets
                  diff --git a/site/public/datasets/lfw/index.html b/site/public/datasets/lfw/index.html index a130c24e..76549d25 100644 --- a/site/public/datasets/lfw/index.html +++ b/site/public/datasets/lfw/index.html @@ -18,28 +18,22 @@ The Darkside of Datasets
                  -
                    -
                  • Created 2007
                  • -
                  • Images 13,233
                  • -
                  • People 5,749
                  • -
                  • Created From Yahoo News images
                  • -
                  • Analyzed and searchable
                  • -
                  -

                  Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to as “The Wild”.

                  +

                  Labeled Faces in The Wild

                  +
                  Created
                  2007
                  Images
                  13,233
                  People
                  5,749
                  Created From
                  Yahoo News images
                  Search available
                  Searchable

                  Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to as “The Wild”.

                  Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.
                  Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.

                  INTRO

                  It began in 2002. Researchers at University of Massachusetts Amherst were developing algorithms for facial recognition and they needed more data. Between 2002-2004 they scraped Yahoo News for images of public figures. Two years later they cleaned up the dataset and repackaged it as Labeled Faces in the Wild (LFW).

                  Since then the LFW dataset has become one of the most widely used datasets used for evaluating face recognition algorithms. The associated research paper “Labeled Faces in the Wild: A Database for Studying Face Recognition in Unconstrained Environments” has been cited 996 times reaching 45 different countries throughout the world.

                  The faces come from news stories and are mostly celebrities from the entertainment industry, politicians, and villains. It’s a sampling of current affairs and breaking news that has come to pass. The images, detached from their original context now server a new purpose: to train, evaluate, and improve facial recognition.

                  As the most widely used facial recognition dataset, it can be said that each individual in LFW has, in a small way, contributed to the current state of the art in facial recognition surveillance. John Cusack, Julianne Moore, Barry Bonds, Osama bin Laden, and even Moby are amongst these biometric pillars, exemplar faces provided the visual dimensions of a new computer vision future.

                  -
                  From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset
                  From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset

                  In addition to commercial use as an evaluation tool, alll of the faces in LFW dataset are prepackaged into a popular machine learning code framework called scikit-learn.

                  +
                  From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset
                  From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset

                  In addition to commercial use as an evaluation tool, alll of the faces in LFW dataset are prepackaged into a popular machine learning code framework called scikit-learn.

                  Usage

                  #!/usr/bin/python
                   from matplotlib import plt
                  diff --git a/site/public/datasets/lfw/what/index.html b/site/public/datasets/lfw/what/index.html
                  new file mode 100644
                  index 00000000..52993a79
                  --- /dev/null
                  +++ b/site/public/datasets/lfw/what/index.html
                  @@ -0,0 +1,141 @@
                  +
                  +
                  +
                  +  MegaPixels
                  +  
                  +  
                  +  
                  +  
                  +  
                  +  
                  +  
                  +
                  +
                  +  
                  + + +
                  MegaPixels
                  + The Darkside of Datasets +
                  + +
                  +
                  + +

                  Labeled Faces in The Wild

                  +
                    +
                  • Created 2007 (auto)
                  • +
                  • Images 13,233 (auto)
                  • +
                  • People 5,749 (auto)
                  • +
                  • Created From Yahoo News images (auto)
                  • +
                  • Analyzed and searchable (auto)
                  • +
                  +

                  Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first facial recognition dataset [^lfw_names_faces] of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people that appeared on Yahoo News between 2002 - 2004.

                  +
                  Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.
                  Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.

                  INTRO

                  +

                  It began in 2002. Researchers at University of Massachusetts Amherst were developing algorithms for facial recognition and they needed more data. Between 2002-2004 they scraped Yahoo News for images of public figures. Two years later they cleaned up the dataset and repackaged it as Labeled Faces in the Wild (LFW).

                  +

                  Since then the LFW dataset has become one of the most widely used datasets used for evaluating face recognition algorithms. The associated research paper “Labeled Faces in the Wild: A Database for Studying Face Recognition in Unconstrained Environments” has been cited 996 times reaching 45 different countries throughout the world.

                  +

                  The faces come from news stories and are mostly celebrities from the entertainment industry, politicians, and villains. It’s a sampling of current affairs and breaking news that has come to pass. The images, detached from their original context now server a new purpose: to train, evaluate, and improve facial recognition.

                  +

                  As the most widely used facial recognition dataset, it can be said that each individual in LFW has, in a small way, contributed to the current state of the art in facial recognition surveillance. John Cusack, Julianne Moore, Barry Bonds, Osama bin Laden, and even Moby are amongst these biometric pillars, exemplar faces provided the visual dimensions of a new computer vision future.

                  +
                  From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset
                  From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset

                  In addition to commercial use as an evaluation tool, all of the faces in LFW dataset are prepackaged into a popular machine learning code framework called scikit-learn.

                  +

                  Usage

                  +
                  #!/usr/bin/python
                  +from matplotlib import plt
                  +from sklearn.datasets import fetch_lfw_people
                  +lfw_people = fetch_lfw_people()
                  +lfw_person = lfw_people[0]
                  +plt.imshow(lfw_person)
                  +
                  +

                  Commercial Use

                  +

                  The LFW dataset is used by numerous companies for benchmarking algorithms and in some cases training. According to the benchmarking results page [^lfw_results] provided by the authors, over 2 dozen companies have contributed their benchmark results

                  +
                  load file: lfw_commercial_use.csv
                  +name_display,company_url,example_url,country,description
                  +
                  +
          + + + + + + + + + + + + + + + + + + + + + + + +
          CompanyCountryIndustries
          AratekChinaBiometric sensors for telecom, civil identification, finance, education, POS, and transportation
          AratekChinaBiometric sensors for telecom, civil identification, finance, education, POS, and transportation
          AratekChinaBiometric sensors for telecom, civil identification, finance, education, POS, and transportation
          +

          Add 2-4 screenshots of companies mentioning LFW here

          +
          ReadSense
          ReadSense

          In benchmarking, companies use a dataset to evaluate their algorithms which are typically trained on other data. After training, researchers will use LFW as a benchmark to compare results with other algorithms.

          +

          For example, Baidu (est. net worth $13B) uses LFW to report results for their "Targeting Ultimate Accuracy: Face Recognition via Deep Embedding". According to the three Baidu researchers who produced the paper:

          +

          LFW has been the most popular evaluation benchmark for face recognition, and played a very important role in facilitating the face recognition society to improve algorithm. 1.

          +
          +

          Citations

          + + + + + + + + + + + + + + + + + + + + + + +
          TitleOrganizationCountryType
          3D-aided face recognition from videosUniversity of LyonFranceedu
          A Community Detection Approach to Cleaning Extremely Large Face DatabaseNational University of Defense Technology, ChinaChinaedu
          +

          Conclusion

          +

          The LFW face recognition training and evaluation dataset is a historically important face dataset as it was the first popular dataset to be created entirely from Internet images, paving the way for a global trend towards downloading anyone’s face from the Internet and adding it to a dataset. As will be evident with other datasets, LFW’s approach has now become the norm.

          +

          For all the 5,000 people in this datasets, their face is forever a part of facial recognition history. It would be impossible to remove anyone from the dataset because it is so ubiquitous. For their rest of the lives and forever after, these 5,000 people will continue to be used for training facial recognition surveillance.

          +

          Notes

          +

          According to BiometricUpdate.com2, LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong."

          +
          +
          +
          1. "Chinese tourist town uses face recognition as an entry pass". New Scientist. November 17, 2016. https://www.newscientist.com/article/2113176-chinese-tourist-town-uses-face-recognition-as-an-entry-pass/

          2. +
          3. "PING AN Tech facial recognition receives high score in latest LFW test results". https://www.biometricupdate.com/201702/ping-an-tech-facial-recognition-receives-high-score-in-latest-lfw-test-results

          4. +
          +
          +
          + +
          + + + + \ No newline at end of file diff --git a/site/public/datasets/vgg_faces2/index.html b/site/public/datasets/vgg_faces2/index.html index ee353047..95b5f7d7 100644 --- a/site/public/datasets/vgg_faces2/index.html +++ b/site/public/datasets/vgg_faces2/index.html @@ -18,23 +18,17 @@ The Darkside of Datasets
          -
            -
          • Created 2007
          • -
          • Images 13,233
          • -
          • People 5,749
          • -
          • Created From Yahoo News images
          • -
          • Search available Searchable
          • -
          -

          Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to by researchers as “The Wild”.

          -
          Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.
          Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.

          INTRO

          +

          Labeled Faces in The Wild

          +
          Created
          2007
          Images
          13,233
          People
          5,749
          Created From
          Yahoo News images
          Search available
          [Searchable](#)

          Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to by researchers as “The Wild”.

          +

          INTRO

          It began in 2002. Researchers at University of Massachusetts Amherst were developing algorithms for facial recognition and they needed more data. Between 2002-2004 they scraped Yahoo News for images of public figures. Two years later they cleaned up the dataset and repackaged it as Labeled Faces in the Wild (LFW).

          Since then the LFW dataset has become one of the most widely used datasets used for evaluating face recognition algorithms. The associated research paper “Labeled Faces in the Wild: A Database for Studying Face Recognition in Unconstrained Environments” has been cited 996 times reaching 45 different countries throughout the world.

          The faces come from news stories and are mostly celebrities from the entertainment industry, politicians, and villains. It’s a sampling of current affairs and breaking news that has come to pass. The images, detached from their original context now server a new purpose: to train, evaluate, and improve facial recognition.

          diff --git a/site/public/index.html b/site/public/index.html index ea3dc24c..3ce22936 100644 --- a/site/public/index.html +++ b/site/public/index.html @@ -18,23 +18,23 @@ The Darkside of Datasets

          MegaPixels is an art project that explores the dark side of face recognition training data and the future of computer vision

          Made by Adam Harvey in partnership with Mozilla.
          -Read more about MegaPixels

          +Read more [about MegaPixels]

          [Explore Datasets] [Explore Algorithms]

          Facial Recognition Datasets

          Regular Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

          Summary

            -
          • 275 datasets found
          • +
          • 275 datsets found
          • Created between the years 1993-2018
          • Smallest dataset: 20 images
          • Largest dataset: 10,000,000 images
          • diff --git a/site/public/research/01_from_1_to_100_pixels/index.html b/site/public/research/01_from_1_to_100_pixels/index.html index 90f142e9..55e02c6c 100644 --- a/site/public/research/01_from_1_to_100_pixels/index.html +++ b/site/public/research/01_from_1_to_100_pixels/index.html @@ -18,10 +18,10 @@ The Darkside of Datasets
            @@ -74,13 +74,6 @@
          -
          -

          MORE RESEARCH

          -
          - -
          -
          -
          {{ content }} - -
          -

          MORE RESEARCH

          -
          - {% for blogpost in blogposts %} -
          - {{ blogpost.title }} - {{ blogpost.date }} -
          - {% endfor %} -
          -
          {% endblock %} -- cgit v1.2.3-70-g09d2 From 2d950c3fa3b8107f941a80f88127ab45e371d128 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Thu, 6 Dec 2018 19:39:29 +0100 Subject: homepage css --- builder/README.md | 3 + builder/builder.py | 10 +- builder/parser.py | 4 +- site/assets/css/css.css | 93 ++++++++- site/assets/js/app/face.js | 213 +++++++++++++++++++++ site/assets/js/app/site.js | 3 +- site/public/about/credits/index.html | 1 + site/public/about/disclaimer/index.html | 1 + site/public/about/index.html | 1 + site/public/about/press/index.html | 1 + site/public/about/privacy/index.html | 1 + site/public/about/style/index.html | 3 +- site/public/about/terms/index.html | 1 + site/public/datasets/lfw/index.html | 160 +++++++++++++--- site/public/datasets/lfw/what/index.html | 1 + site/public/datasets/vgg_faces2/index.html | 1 + site/public/index.html | 42 ++-- .../research/01_from_1_to_100_pixels/index.html | 1 + site/public/research/index.html | 1 + site/templates/home.html | 32 ++++ site/templates/layout.html | 1 + 21 files changed, 524 insertions(+), 50 deletions(-) create mode 100644 site/assets/js/app/face.js create mode 100644 site/templates/home.html (limited to 'site/assets/css/css.css') diff --git a/builder/README.md b/builder/README.md index 1a6d3a1e..57c024cb 100644 --- a/builder/README.md +++ b/builder/README.md @@ -19,3 +19,6 @@ authors: Adam Harvey, Berit Gilma, Matthew Stender Static assets: `v1/site/about/assets/picture.jpg` Dataset assets: `v1/datasets/lfw/assets/picture.jpg` + +## Markup + diff --git a/builder/builder.py b/builder/builder.py index 0e404b88..620fc710 100644 --- a/builder/builder.py +++ b/builder/builder.py @@ -29,10 +29,12 @@ def build_page(fn, research_posts): output_path = public_path + metadata['url'] output_fn = os.path.join(output_path, "index.html") - is_research = False + skip_h1 = False - if 'research/' in fn: - is_research = True + if metadata['url'] == '/': + template = env.get_template("home.html") + elif 'research/' in fn: + skip_h1 = True template = env.get_template("research.html") else: template = env.get_template("page.html") @@ -47,7 +49,7 @@ def build_page(fn, research_posts): if 'index.md' in fn: s3.sync_directory(dirname, s3_dir, metadata) - content = parser.parse_markdown(sections, s3_path, skip_h1=is_research) + content = parser.parse_markdown(sections, s3_path, skip_h1=skip_h1) html = template.render( metadata=metadata, diff --git a/builder/parser.py b/builder/parser.py index da3044a0..dd3643bf 100644 --- a/builder/parser.py +++ b/builder/parser.py @@ -46,15 +46,13 @@ def format_metadata(section): def parse_markdown(sections, s3_path, skip_h1=False): groups = [] current_group = [] - seen_metadata = False for section in sections: if skip_h1 and section.startswith('# '): continue - elif section.startswith('+ ') and not seen_metadata: + elif section.startswith('+ '): groups.append(format_section(current_group, s3_path)) groups.append(format_metadata(section)) current_group = [] - seen_metadata = True elif '![wide:' in section: groups.append(format_section(current_group, s3_path)) groups.append(format_section([section], s3_path, type='wide')) diff --git a/site/assets/css/css.css b/site/assets/css/css.css index 843809a8..9ac35699 100644 --- a/site/assets/css/css.css +++ b/site/assets/css/css.css @@ -5,9 +5,11 @@ html, body { width: 100%; min-height: 100%; font-family: 'Roboto', sans-serif; - background: #191919; color: #b8b8b8; } +html { + background: #191919; +} /* header */ @@ -119,12 +121,14 @@ h1 { font-size: 24pt; margin: 75px 0 10px; padding: 0; + transition: color 0.2s cubic-bezier(0,0,1,1); } h2, h3 { margin: 0 0 20px 0; padding: 0; font-size: 11pt; font-weight: 500; + transition: color 0.2s cubic-bezier(0,0,1,1); } th, .gray, h2, h3 { @@ -281,6 +285,9 @@ section.wide .image { max-width: 620px; margin: 10px auto 0 auto; } + +/* blog index */ + .research_index { margin-top: 40px; } @@ -289,10 +296,88 @@ section.wide .image { } .research_index h1 { margin-top: 20px; + text-decoration: underline; +} +.desktop .research_index section:hover h1 { + color: #fff; +} +.research_index section:hover h2 { + color: #ddd; } -/* blogpost index */ +/* home page */ -.blogposts div { - margin-bottom: 5px; +.hero { + position: relative; + width: 100%; + max-width: 1200px; + height: 50vw; + max-height: 70vh; + display: flex; + align-items: center; + margin: 0 auto; +} +#face_container { + pointer-events: none; + position: absolute; + width: 50vw; + height: 50vw; + max-height: 70vh; + top: 0; + right: 0; + z-index: -1; + text-align: center; +} +.currentFace { + position: absolute; + bottom: 50px; + width: 100%; + left: 0; + text-align: center; +} +.intro { + max-width: 640px; + padding: 75px 0 75px 10px; + z-index: 1; +} +.intro .headline { + font-family: 'Roboto Mono', monospace; + font-size: 16pt; +} +.intro .buttons { + margin: 40px 0; +} +.intro button { + font-family: 'Roboto', sans-serif; + padding: 8px 12px; + border-radius: 6px; + border: 1px solid transparent; + cursor: pointer; + font-size: 11pt; + margin-right: 10px; + transition: color 0.1s cubic-bezier(0,0,1,1), background-color 0.1s cubic-bezier(0,0,1,1); +} +.intro button.normal { + background: #191919; + border-color: #444; + color: #ddd; +} +.intro button.important { + background: #444; + border-color: #444; + color: #ddd; +} +.desktop .intro button:hover { + background: #666; + border-color: #666; + color: #fff; +} +.intro .under { + color: #888; +} +.intro .under a { + color: #bbb; +} +.desktop .intro .under a:hover { + color: #fff; } \ No newline at end of file diff --git a/site/assets/js/app/face.js b/site/assets/js/app/face.js new file mode 100644 index 00000000..e8bcd313 --- /dev/null +++ b/site/assets/js/app/face.js @@ -0,0 +1,213 @@ +var face = (function(){ + var container = document.querySelector("#face_container") + var camera, controls, scene, renderer + var mouse = new THREE.Vector2(0.5, 0.5) + var mouseTarget = new THREE.Vector2(0.5, 0.5) + var POINT_SCALE = 1.8 + var FACE_POINT_COUNT = 68 + var SWAP_TIME = 500 + var cubes = [], meshes = [] + var currentFace = document.querySelector('.currentFace') + var faceBuffer = (function () { + var a = new Array(FACE_POINT_COUNT) + for (let i = 0; i < FACE_POINT_COUNT; i++) { + a[i] = new THREE.Vector3() + } + return a + })() + var last_t = 0, start_t = 0 + var colors = [ + 0xff3333, + 0xff8833, + 0xffff33, + 0x338833, + 0x3388ff, + 0x3333ff, + 0x8833ff, + 0xff3388, + 0xffffff, + ] + var swapping = false, swap_count = 0, swapFrom, swapTo, face_names, faces + init() + + function init() { + fetch("/assets/data/3dlm_0_10.json") + .then(req => req.json()) + .then(data => { + face_names = Object.keys(data) + faces = face_names.map(name => recenter(data[name])) + setup() + build(faces[0]) + updateFace(faces[0]) + setCurrentFace(face_names[0]) + swapTo = faces[0] + animate() + }) + } + function setup() { + var w = window.innerWidth / 2 + var h = Math.min(window.innerWidth / 2, window.innerHeight * 0.7) + camera = new THREE.PerspectiveCamera(70, w/h, 1, 10000) + camera.position.x = 0 + camera.position.y = 0 + camera.position.z = 250 + + scene = new THREE.Scene() + scene.background = new THREE.Color(0x191919) + + renderer = new THREE.WebGLRenderer({ antialias: true }) + renderer.setPixelRatio(window.devicePixelRatio) + renderer.setSize(w, h) + container.appendChild(renderer.domElement) + document.body.addEventListener('mousemove', onMouseMove) + // renderer.domElement.addEventListener('mousedown', swap) + setInterval(swap, 5000) + } + function build(points) { + var matrix = new THREE.Matrix4() + var quaternion = new THREE.Quaternion() + + for (var i = 0; i < FACE_POINT_COUNT; i++) { + var p = points[i] + var geometry = new THREE.BoxBufferGeometry() + var position = new THREE.Vector3(p[0], p[1], p[2]) + var rotation = new THREE.Euler() + var scale = new THREE.Vector3() + var color = new THREE.Color() + scale.x = scale.y = scale.z = POINT_SCALE + quaternion.setFromEuler(rotation, false) + matrix.compose(position, quaternion, scale) + geometry.applyMatrix(matrix) + material = new THREE.MeshBasicMaterial({ color: color.setHex(0xffffff) }) + cube = new THREE.Mesh(geometry, material) + scene.add(cube) + cubes.push(cube) + } + + meshes = getLineGeometry(points).map((geometry, i) => { + var color = new THREE.Color() + var material = new MeshLineMaterial({ + color: color.setHex(colors[i % colors.length]), + }) + var line = new MeshLine() + line.setGeometry(geometry, _ => 1.5) + var mesh = new THREE.Mesh(line.geometry, material) + mesh.geometry.dynamic = true + scene.add(mesh) + return [line, mesh] + }) + } + function lerpPoints(n, A, B, C) { + for (let i = 0, len = A.length; i < len; i++) { + lerpPoint(n, A[i], B[i], C[i]) + } + } + function lerpPoint(n, A, B, C) { + C.x = lerp(n, A.x, B.x) + C.y = lerp(n, A.y, B.y) + C.z = lerp(n, A.z, B.z) + } + function lerp(n, a, b) { + return (b-a) * n + a + } + function swap(){ + if (swapping) return + start_t = last_t + swapping = true + swap_count = (swap_count + 1) % faces.length + swapFrom = swapTo + swapTo = faces[swap_count] + setCurrentFace(face_names[swap_count]) + } + function setCurrentFace(name) { + name = name.replace('.png', '').split('_').filter(s => !s.match(/\d+/)).join(' ') + currentFace.innerHTML = name + } + function update_swap(t){ + var n = (t - start_t) / SWAP_TIME + if (n > 1) { + swapping = false + n = 1 + } + lerpPoints(n, swapFrom, swapTo, faceBuffer) + updateFace(faceBuffer) + } + function updateFace(points) { + updateCubeGeometry(points) + updateLineGeometry(points) + } + function updateCubeGeometry(points) { + cubes.forEach((cube, i) => { + const p = points[i] + cube.position.set(p.x, p.y, p.z) + }) + } + function updateLineGeometry(points) { + getLineGeometry(points).map((geometry, i) => { + var [line, mesh] = meshes[i] + line.setGeometry(geometry, _ => 1.5) + mesh.geometry.vertices = line.geometry.vertices + mesh.geometry.verticesNeedUpdate = true + }) + } + function getLineGeometry(points) { + return [ + points.slice(0, 17), + points.slice(17, 22), + points.slice(22, 27), + points.slice(27, 31), + points.slice(31, 36), + points.slice(36, 42), + points.slice(42, 48), + points.slice(48) + ].map((a, i) => { + var geometry = new THREE.Geometry() + a.forEach(p => geometry.vertices.push(p)) + if (i > 4) { + geometry.vertices.push(a[0]) + } + return geometry + }) + } + function getBounds(obj) { + return obj.reduce((a, p) => { + return [ + Math.min(a[0], p[0]), + Math.max(a[1], p[0]), + Math.min(a[2], p[1]), + Math.max(a[3], p[1]), + Math.min(a[4], p[2]), + Math.max(a[5], p[2]), + ] + }, [Infinity, -Infinity, Infinity, -Infinity, Infinity, -Infinity]) + } + function recenter(obj) { + const bounds = getBounds(obj) + const x_width = (bounds[1] - bounds[0]) / 2 + const y_width = (bounds[3] - bounds[2]) / -3 + const z_width = (bounds[5] - bounds[4]) / 2 + return obj.map(p => { + p[0] = p[0] - bounds[0] - x_width + p[1] = -p[1] + bounds[1] + y_width + p[2] = p[2] - bounds[2] + z_width + return new THREE.Vector3(p[0], p[1], p[2]) + }) + } + // + function onMouseMove(e) { + mouse.x = e.clientX / window.innerWidth + mouse.y = e.clientY / window.innerHeight + } + function animate(t) { + requestAnimationFrame(animate) + if (swapping) update_swap(t) + renderer.render(scene, camera) + scene.rotation.y += 0.01 * Math.PI + mouseTarget.x += (mouse.x - mouseTarget.x) * 0.1 + mouseTarget.y += (mouse.y - mouseTarget.y) * 0.1 + scene.rotation.x = (mouseTarget.y - 0.5) * Math.PI / 2 + // scene.rotation.y = (mouseTarget.x - 0.5) * Math.PI + scene.rotation.y += 0.01 + last_t = t + } +})() diff --git a/site/assets/js/app/site.js b/site/assets/js/app/site.js index 12bee3ec..eb6886c2 100644 --- a/site/assets/js/app/site.js +++ b/site/assets/js/app/site.js @@ -7,7 +7,8 @@ const isDesktop = !isMobile const htmlClassList = document.body.parentNode.classList htmlClassList.add(isDesktop ? 'desktop' : 'mobile') -function toArray(A) { return Array.prototype.slice.apply(A) } +function toArray(a) { return Array.prototype.slice.apply(a) } +function choice(a) { return a[Math.floor(Math.random()*a.length)]} var site = (function(){ var site = {} diff --git a/site/public/about/credits/index.html b/site/public/about/credits/index.html index f1a28b0e..65bc7ac4 100644 --- a/site/public/about/credits/index.html +++ b/site/public/about/credits/index.html @@ -52,5 +52,6 @@
          + \ No newline at end of file diff --git a/site/public/about/disclaimer/index.html b/site/public/about/disclaimer/index.html index 5df5d656..b0215bde 100644 --- a/site/public/about/disclaimer/index.html +++ b/site/public/about/disclaimer/index.html @@ -52,5 +52,6 @@
      + \ No newline at end of file diff --git a/site/public/about/index.html b/site/public/about/index.html index f1a28b0e..65bc7ac4 100644 --- a/site/public/about/index.html +++ b/site/public/about/index.html @@ -52,5 +52,6 @@
    + \ No newline at end of file diff --git a/site/public/about/press/index.html b/site/public/about/press/index.html index e5763036..09c89165 100644 --- a/site/public/about/press/index.html +++ b/site/public/about/press/index.html @@ -50,5 +50,6 @@ + \ No newline at end of file diff --git a/site/public/about/privacy/index.html b/site/public/about/privacy/index.html index 7ad9564f..5675f072 100644 --- a/site/public/about/privacy/index.html +++ b/site/public/about/privacy/index.html @@ -129,5 +129,6 @@ You are advised to review this Privacy Policy periodically for any changes. Chan + \ No newline at end of file diff --git a/site/public/about/style/index.html b/site/public/about/style/index.html index eea861ac..f2c0d4b8 100644 --- a/site/public/about/style/index.html +++ b/site/public/about/style/index.html @@ -27,7 +27,7 @@

    Style Examples

    -
    Alt text here
    Alt text here

    Header 1

    +
    Alt text here
    Alt text here
    Date
    17-Jan-2019
    Numbers
    17
    Identities
    12,139
    But also
    This is a test of the stylesheet

    Header 1

    Header 2

    Header 3

    Header 4

    @@ -85,5 +85,6 @@ But let's throw in a <b>tag</b>.
    + \ No newline at end of file diff --git a/site/public/about/terms/index.html b/site/public/about/terms/index.html index db8b9e57..078c339f 100644 --- a/site/public/about/terms/index.html +++ b/site/public/about/terms/index.html @@ -64,5 +64,6 @@ + \ No newline at end of file diff --git a/site/public/datasets/lfw/index.html b/site/public/datasets/lfw/index.html index 76549d25..39052b44 100644 --- a/site/public/datasets/lfw/index.html +++ b/site/public/datasets/lfw/index.html @@ -27,23 +27,22 @@

    Labeled Faces in The Wild

    -
    Created
    2007
    Images
    13,233
    People
    5,749
    Created From
    Yahoo News images
    Search available
    Searchable

    Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to as “The Wild”.

    -
    Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.
    Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.

    INTRO

    +
    Created
    2007
    Images
    13,233
    People
    5,749
    Created From
    Yahoo News images
    Search available
    Searchable

    Labeled Faces in The Wild (LFW) is amongst the most widely used facial recognition training datasets in the world and is the first of its kind to be created entirely from images that were posted online. The LFW dataset includes 13,233 images of 5,749 people that were collected between 2002-2004. Use the tools below to check if you were included in this dataset or scroll down to read the analysis.

    +

    {INSERT IMAGE SEARCH MODULE}

    +

    {INSERT TEXT SEARCH MODULE}

    +
    Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.
    Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.

    INTRO

    It began in 2002. Researchers at University of Massachusetts Amherst were developing algorithms for facial recognition and they needed more data. Between 2002-2004 they scraped Yahoo News for images of public figures. Two years later they cleaned up the dataset and repackaged it as Labeled Faces in the Wild (LFW).

    Since then the LFW dataset has become one of the most widely used datasets used for evaluating face recognition algorithms. The associated research paper “Labeled Faces in the Wild: A Database for Studying Face Recognition in Unconstrained Environments” has been cited 996 times reaching 45 different countries throughout the world.

    The faces come from news stories and are mostly celebrities from the entertainment industry, politicians, and villains. It’s a sampling of current affairs and breaking news that has come to pass. The images, detached from their original context now server a new purpose: to train, evaluate, and improve facial recognition.

    As the most widely used facial recognition dataset, it can be said that each individual in LFW has, in a small way, contributed to the current state of the art in facial recognition surveillance. John Cusack, Julianne Moore, Barry Bonds, Osama bin Laden, and even Moby are amongst these biometric pillars, exemplar faces provided the visual dimensions of a new computer vision future.

    -
    From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset
    From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset

    In addition to commercial use as an evaluation tool, alll of the faces in LFW dataset are prepackaged into a popular machine learning code framework called scikit-learn.

    -

    Usage

    -
    #!/usr/bin/python
    -from matplotlib import plt
    -from sklearn.datasets import fetch_lfw_people
    -lfw_people = fetch_lfw_people()
    -lfw_person = lfw_people[0]
    -plt.imshow(lfw_person)
    -
    +
    The entire LFW dataset cropped to facial regions
    The entire LFW dataset cropped to facial regions

    In addition to commercial use as an evaluation tool, alll of the faces in LFW dataset are prepackaged into a popular machine learning code framework called scikit-learn.

    +

    Facts

    +

    The person with the most images is: +The person with the least images is:

    Commercial Use

    -

    The LFW dataset is used by numerous companies for benchmarking algorithms and in some cases training. According to the benchmarking results page [^lfw_results] provided by the authors, over 2 dozen companies have contributed their benchmark results

    +

    The LFW dataset is used by numerous companies for benchmarking algorithms and in some cases training. According to the benchmarking results page [^lfw_results] provided by the authors, over 2 dozen companies have contributed their benchmark results.

    +

    According to BiometricUpdate.com [^lfw_pingan], LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong."

    +

    According to researchers at the Baidu Research – Institute of Deep Learning "LFW has been the most popular evaluation benchmark for face recognition, and played a very important role in facilitating the face recognition society to improve algorithm. [^lfw_baidu]."

    load file: lfw_commercial_use.csv
     name_display,company_url,example_url,country,description
     
    @@ -73,11 +72,24 @@ name_display,company_url,example_url,country,description

    Add 2-4 screenshots of companies mentioning LFW here

    -
    ReadSense
    ReadSense

    In benchmarking, companies use a dataset to evaluate their algorithms which are typically trained on other data. After training, researchers will use LFW as a benchmark to compare results with other algorithms.

    +
     "PING AN Tech facial recognition receives high score in latest LFW test results"
    "PING AN Tech facial recognition receives high score in latest LFW test results"
    +
     "Face Recognition Performance in LFW benchmark"
    "Face Recognition Performance in LFW benchmark"
    +
     "The 1st place in face verification challenge, LFW"
    "The 1st place in face verification challenge, LFW"

    In benchmarking, companies use a dataset to evaluate their algorithms which are typically trained on other data. After training, researchers will use LFW as a benchmark to compare results with other algorithms.

    For example, Baidu (est. net worth $13B) uses LFW to report results for their "Targeting Ultimate Accuracy: Face Recognition via Deep Embedding". According to the three Baidu researchers who produced the paper:

    -

    LFW has been the most popular evaluation benchmark for face recognition, and played a very important role in facilitating the face recognition society to improve algorithm. 1.

    -

    Citations

    +

    Overall, LFW has at least 456 citations from 123 countries. Sed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam eaque ipsa, quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt, explicabo. Nemo enim ipsam voluptatem, quia voluptas sit, aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos.

    +

    Sed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam eaque ipsa, quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt, explicabo. Nemo enim ipsam voluptatem, quia voluptas sit, aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos.

    +
    Distribution of citations per year per country for the top 5 countries with citations for the LFW Dataset
    Distribution of citations per year per country for the top 5 countries with citations for the LFW Dataset
    Geographic distributions of citations for the LFW Dataset
    Geographic distributions of citations for the LFW Dataset

    Conclusion

    +

    The LFW face recognition training and evaluation dataset is a historically important face dataset as it was the first popular dataset to be created entirely from Internet images, paving the way for a global trend towards downloading anyone’s face from the Internet and adding it to a dataset. As will be evident with other datasets, LFW’s approach has now become the norm.

    +

    For all the 5,000 people in this datasets, their face is forever a part of facial recognition history. It would be impossible to remove anyone from the dataset because it is so ubiquitous. For their rest of the lives and forever after, these 5,000 people will continue to be used for training facial recognition surveillance.

    +

    Right to Removal

    +

    If you are affected by disclosure of your identity in this dataset please do contact the authors, many state that they are willing to remove images upon request. The authors of the LFW can be reached from the emails posted in their paper:

    +

    You can use the following message to request removal from the dataset:

    +

    Dear [researcher name],

    +

    I am writing to you about the "LFW Dataset". Recently I have discovered that your dataset includes my identity and no longer wish to be included in your dataset

    +

    MegaPixels is an educational art project developed for academic purposes. In no way does this project aim to villify the researchers who produced the datasets. The aim of this project is to encourage discourse around ethics and consent in artificial intelligence by providing information about these datasets that is otherwise difficult to obtain or inaccessible to other researchers.

    +

    Supplementary Data

    +

    Sed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam eaque ipsa, quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt, explicabo. Nemo enim ipsam voluptatem, quia voluptas sit, aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos.

    @@ -99,18 +111,119 @@ name_display,company_url,example_url,country,description + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    TitleChina edu
    3D-aided face recognition from videosUniversity of LyonFranceedu
    3D-aided face recognition from videosUniversity of LyonFranceedu
    3D-aided face recognition from videosUniversity of LyonFranceedu
    3D-aided face recognition from videosUniversity of LyonFranceedu
    3D-aided face recognition from videosUniversity of LyonFranceedu
    3D-aided face recognition from videosUniversity of LyonFranceedu
    3D-aided face recognition from videosUniversity of LyonFranceedu
    3D-aided face recognition from videosUniversity of LyonFranceedu
    3D-aided face recognition from videosUniversity of LyonFranceedu
    3D-aided face recognition from videosUniversity of LyonFranceedu
    3D-aided face recognition from videosUniversity of LyonFranceedu
    3D-aided face recognition from videosUniversity of LyonFranceedu
    3D-aided face recognition from videosUniversity of LyonFranceedu
    -

    Conclusion

    -

    The LFW face recognition training and evaluation dataset is a historically important face dataset as it was the first popular dataset to be created entirely from Internet images, paving the way for a global trend towards downloading anyone’s face from the Internet and adding it to a dataset. As will be evident with other datasets, LFW’s approach has now become the norm.

    -

    For all the 5,000 people in this datasets, their face is forever a part of facial recognition history. It would be impossible to remove anyone from the dataset because it is so ubiquitous. For their rest of the lives and forever after, these 5,000 people will continue to be used for training facial recognition surveillance.

    -

    Notes

    -

    According to BiometricUpdate.com2, LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong."

    +

    Code

    +
    #!/usr/bin/python
    +
    +import numpy as np
    +from sklearn.datasets import fetch_lfw_people
    +import imageio
    +import imutils
    +
    +# download LFW dataset (first run takes a while)
    +lfw_people = fetch_lfw_people(min_faces_per_person=1, resize=1, color=True, funneled=False)
    +
    +# introspect dataset
    +n_samples, h, w, c = lfw_people.images.shape
    +print('{:,} images at {}x{}'.format(n_samples, w, h))
    +cols, rows = (176, 76)
    +n_ims = cols * rows
    +
    +# build montages
    +im_scale = 0.5
    +ims = lfw_people.images[:n_ims
    +montages = imutils.build_montages(ims, (int(w*im_scale, int(h*im_scale)), (cols, rows))
    +montage = montages[0]
    +
    +# save full montage image
    +imageio.imwrite('lfw_montage_full.png', montage)
    +
    +# make a smaller version
    +montage_960 = imutils.resize(montage, width=960)
    +imageio.imwrite('lfw_montage_960.jpg', montage_960)
    +

    -
    1. "Chinese tourist town uses face recognition as an entry pass". New Scientist. November 17, 2016. https://www.newscientist.com/article/2113176-chinese-tourist-town-uses-face-recognition-as-an-entry-pass/

    2. -
    3. "PING AN Tech facial recognition receives high score in latest LFW test results". https://www.biometricupdate.com/201702/ping-an-tech-facial-recognition-receives-high-score-in-latest-lfw-test-results

    4. -
    +
      @@ -130,5 +243,6 @@ name_display,company_url,example_url,country,description
      + \ No newline at end of file diff --git a/site/public/datasets/lfw/what/index.html b/site/public/datasets/lfw/what/index.html index 52993a79..ceafb35a 100644 --- a/site/public/datasets/lfw/what/index.html +++ b/site/public/datasets/lfw/what/index.html @@ -137,5 +137,6 @@ name_display,company_url,example_url,country,description + \ No newline at end of file diff --git a/site/public/datasets/vgg_faces2/index.html b/site/public/datasets/vgg_faces2/index.html index 95b5f7d7..3f778f71 100644 --- a/site/public/datasets/vgg_faces2/index.html +++ b/site/public/datasets/vgg_faces2/index.html @@ -58,5 +58,6 @@ + \ No newline at end of file diff --git a/site/public/index.html b/site/public/index.html index 3ce22936..51006b59 100644 --- a/site/public/index.html +++ b/site/public/index.html @@ -26,22 +26,31 @@
      -

      MegaPixels is an art project that explores the dark side of face recognition training data and the future of computer vision

      -

      Made by Adam Harvey in partnership with Mozilla.
      -Read more [about MegaPixels]

      -

      [Explore Datasets] [Explore Algorithms]

      -

      Facial Recognition Datasets

      +
      +
      +
      +
      +
      +
      + MegaPixels is an art project that explores the dark side of face recognition and the future of computer vision. +
      + + + +
      + Made by Adam Harvey in partnership with Mozilla.
      + Read more about MegaPixels +
      +
      +
      + +

      Facial Recognition Datasets

      Regular Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

      Summary

      -
        -
      • 275 datsets found
      • -
      • Created between the years 1993-2018
      • -
      • Smallest dataset: 20 images
      • -
      • Largest dataset: 10,000,000 images
      • -
      • Highest resolution faces: 450x500 (Unconstrained College Students)
      • -
      • Lowest resolution faces: 16x20 pixels (QMUL SurvFace)
      • -
      -
      +
      Found
      275 datasets
      Created between
      1993-2018
      Smallest dataset
      20 images
      Largest dataset
      10,000,000 images
      Highest resolution faces
      450x500 (Unconstrained College Students)
      Lowest resolution faces
      16x20 pixels (QMUL SurvFace)
      +
      @@ -59,5 +68,10 @@ Read more [about MegaPixels]

      + + + + + \ No newline at end of file diff --git a/site/public/research/01_from_1_to_100_pixels/index.html b/site/public/research/01_from_1_to_100_pixels/index.html index 55e02c6c..b4c85d00 100644 --- a/site/public/research/01_from_1_to_100_pixels/index.html +++ b/site/public/research/01_from_1_to_100_pixels/index.html @@ -90,5 +90,6 @@ + \ No newline at end of file diff --git a/site/public/research/index.html b/site/public/research/index.html index 1f61dadf..cf9546e1 100644 --- a/site/public/research/index.html +++ b/site/public/research/index.html @@ -46,5 +46,6 @@ + \ No newline at end of file diff --git a/site/templates/home.html b/site/templates/home.html new file mode 100644 index 00000000..436c1ddf --- /dev/null +++ b/site/templates/home.html @@ -0,0 +1,32 @@ +{% extends 'layout.html' %} + +{% block content %} +
      +
      +
      +
      +
      +
      + MegaPixels is an art project that explores the dark side of face recognition and the future of computer vision. +
      + + + +
      + Made by Adam Harvey in partnership with Mozilla.
      + Read more about MegaPixels +
      +
      +
      + + {{ content }} + +{% endblock %} + +{% block scripts %} + + + +{% endblock %} diff --git a/site/templates/layout.html b/site/templates/layout.html index 7558163e..605f9788 100644 --- a/site/templates/layout.html +++ b/site/templates/layout.html @@ -42,5 +42,6 @@ +{% block scripts %}{% endblock %} \ No newline at end of file -- cgit v1.2.3-70-g09d2