diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2018-12-05 18:23:32 +0100 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2018-12-05 18:23:32 +0100 |
| commit | 03ed12b471c1e50ae531c46fcbf5afd06ca5432b (patch) | |
| tree | 67f48d66b2e9def2e514359b150e5f1384d9a3d2 | |
| parent | 2a1b884e841efe562e0c84885a404819433b3405 (diff) | |
build
| -rw-r--r-- | builder/builder.py | 37 | ||||
| -rw-r--r-- | builder/parser.py | 46 | ||||
| -rw-r--r-- | builder/s3.py | 6 | ||||
| -rw-r--r-- | site/assets/css/css.css | 65 | ||||
| -rw-r--r-- | site/assets/css/fonts.css | 18 | ||||
| -rw-r--r-- | site/assets/js/app/site.js | 7 | ||||
| -rw-r--r-- | site/public/about/credits/index.html | 8 | ||||
| -rw-r--r-- | site/public/about/disclaimer/index.html | 8 | ||||
| -rw-r--r-- | site/public/about/index.html | 11 | ||||
| -rw-r--r-- | site/public/about/press/index.html | 8 | ||||
| -rw-r--r-- | site/public/about/privacy/index.html | 9 | ||||
| -rw-r--r-- | site/public/about/style/index.html | 12 | ||||
| -rw-r--r-- | site/public/about/terms/index.html | 8 | ||||
| -rw-r--r-- | site/public/datasets/lfw/index.html | 20 | ||||
| -rw-r--r-- | site/public/datasets/lfw/what/index.html | 141 | ||||
| -rw-r--r-- | site/public/datasets/vgg_faces2/index.html | 20 | ||||
| -rw-r--r-- | site/public/index.html | 12 | ||||
| -rw-r--r-- | site/public/research/01_from_1_to_100_pixels/index.html | 15 | ||||
| -rw-r--r-- | site/public/research/index.html | 50 | ||||
| -rw-r--r-- | site/templates/layout.html | 8 | ||||
| -rw-r--r-- | site/templates/research.html | 12 |
21 files changed, 391 insertions, 130 deletions
diff --git a/builder/builder.py b/builder/builder.py index deb9eb68..0e404b88 100644 --- a/builder/builder.py +++ b/builder/builder.py @@ -29,21 +29,25 @@ def build_page(fn, research_posts): output_path = public_path + metadata['url'] output_fn = os.path.join(output_path, "index.html") + is_research = False + if 'research/' in fn: + is_research = True template = env.get_template("research.html") else: template = env.get_template("page.html") - if 'datasets' in fn: - s3_path = "{}/{}/{}{}".format(os.getenv('S3_ENDPOINT'), os.getenv('S3_BUCKET'), s3_datasets_path, metadata['path']) - if 'index.md' in fn: - s3.sync_directory(dirname, s3_datasets_path, metadata) + if 'datasets/' in fn: + s3_dir = s3_datasets_path else: - s3_path = "{}/{}/{}{}".format(os.getenv('S3_ENDPOINT'), os.getenv('S3_BUCKET'), s3_site_path, metadata['path']) - if 'index.md' in fn and metadata['url'] != '/': - s3.sync_directory(dirname, s3_site_path, metadata) + s3_dir = s3_site_path + + s3_path = s3.make_s3_path(s3_dir, metadata['path']) + + if 'index.md' in fn: + s3.sync_directory(dirname, s3_dir, metadata) - content = parser.parse_markdown(sections, s3_path) + content = parser.parse_markdown(sections, s3_path, skip_h1=is_research) html = template.render( metadata=metadata, @@ -58,10 +62,27 @@ def build_page(fn, research_posts): print("______") +def build_research_index(research_posts): + metadata, sections = parser.read_metadata('../site/content/research/index.md') + template = env.get_template("page.html") + s3_path = s3.make_s3_path(s3_site_path, metadata['path']) + content = parser.parse_markdown(sections, s3_path, skip_h1=False) + content += parser.parse_research_index(research_posts) + html = template.render( + metadata=metadata, + content=content, + research_posts=research_posts, + latest_research_post=research_posts[-1], + ) + output_fn = public_path + '/research/index.html' + with open(output_fn, "w") as file: + file.write(html) + def build_site(): research_posts = parser.read_research_post_index() for fn in glob.iglob(os.path.join(content_path, "**/*.md"), recursive=True): build_page(fn, research_posts) + build_research_index(research_posts) if __name__ == '__main__': build_site() diff --git a/builder/parser.py b/builder/parser.py index 529d21fa..da3044a0 100644 --- a/builder/parser.py +++ b/builder/parser.py @@ -2,6 +2,8 @@ import os import re import glob import mistune + +import s3 from paths import * renderer = mistune.Renderer(escape=False) @@ -12,7 +14,6 @@ def fix_images(lines, s3_path): block = "\n\n".join(lines) for line in block.split("\n"): if " url, tail = tail.split(')', 1) @@ -35,13 +36,26 @@ def format_section(lines, s3_path, type=''): return "<section>" + markdown(lines) + "</section>" return "" -def parse_markdown(sections, s3_path): +def format_metadata(section): + meta = [] + for line in section.split('\n'): + key, value = line[2:].split(': ', 1) + meta.append("<div><div class='gray'>{}</div><div>{}</div></div>".format(key, value)) + return "<section><div class='meta'>{}</div></section>".format(''.join(meta)) + +def parse_markdown(sections, s3_path, skip_h1=False): groups = [] current_group = [] + seen_metadata = False for section in sections: - if section.startswith('# '): + if skip_h1 and section.startswith('# '): continue - if '![wide:' in section: + elif section.startswith('+ ') and not seen_metadata: + groups.append(format_section(current_group, s3_path)) + groups.append(format_metadata(section)) + current_group = [] + seen_metadata = True + elif '![wide:' in section: groups.append(format_section(current_group, s3_path)) groups.append(format_section([section], s3_path, type='wide')) current_group = [] @@ -55,6 +69,23 @@ def parse_markdown(sections, s3_path): content = "".join(groups) return content +def parse_research_index(research_posts): + content = "<div class='research_index'>" + for post in research_posts: + s3_path = s3.make_s3_path(s3_site_path, post['path']) + if 'image' in post: + post_image = s3_path + post['image'] + else: + post_image = '' + row = "<a href='{}'><section class='wide'><img src='{}' alt='Research post' /><section><h1>{}</h1><h2>{}</h2></section></section></a>".format( + post['path'], + post_image, + post['title'], + post['tagline']) + content += row + content += '</div>' + return content + def read_metadata(fn): with open(fn, "r") as file: data = file.read() @@ -74,6 +105,8 @@ default_metadata = { 'published': '2018-12-31', 'updated': '2018-12-31', 'authors': 'Adam Harvey', + 'sync': 'true', + 'tagline': '', } def parse_metadata_section(metadata, section): @@ -117,12 +150,15 @@ def parse_metadata(fn, sections): if metadata['status'] == 'published|draft|private': metadata['status'] = 'published' + + metadata['sync'] = metadata['sync'] != 'false' + metadata['author_html'] = '<br>'.join(metadata['authors'].split(',')) return metadata, valid_sections def read_research_post_index(): posts = [] - for fn in sorted(glob.glob(os.path.join(content_path, 'research/**/index.md'), recursive=True)): + for fn in sorted(glob.glob('../site/content/research/*/index.md')): metadata, valid_sections = read_metadata(fn) if metadata is None or metadata['status'] == 'private' or metadata['status'] == 'draft': continue diff --git a/builder/s3.py b/builder/s3.py index f3dcce48..41ecdf61 100644 --- a/builder/s3.py +++ b/builder/s3.py @@ -18,6 +18,9 @@ def sync_directory(base_fn, s3_path, metadata): for fn in glob.glob(os.path.join(base_fn, 'assets/*')): fns[os.path.basename(fn)] = True + if not metadata['sync']: + return + remote_path = s3_path + metadata['url'] directory = s3_client.list_objects(Bucket=os.getenv('S3_BUCKET'), Prefix=remote_path) @@ -53,3 +56,6 @@ def sync_directory(base_fn, s3_path, metadata): os.getenv('S3_BUCKET'), s3_fn, ExtraArgs={ 'ACL': 'public-read' }) + +def make_s3_path(s3_dir, metadata_path): + return "{}/{}/{}{}".format(os.getenv('S3_ENDPOINT'), os.getenv('S3_BUCKET'), s3_dir, metadata_path) diff --git a/site/assets/css/css.css b/site/assets/css/css.css index 1024ffcd..843809a8 100644 --- a/site/assets/css/css.css +++ b/site/assets/css/css.css @@ -164,16 +164,46 @@ p { .content a:hover { color: #fff; } + +/* top of post metadata */ + +.meta { + display: flex; + flex-direction: row; + justify-content: flex-start; + align-items: flex-start; + font-size: 10pt; + margin-bottom: 20px; +} +.meta > div { + margin-right: 30px; +} +.meta .gray { + font-size: 9pt; + padding-bottom: 4px; +} + +/* misc formatting */ + code { font-family: 'Roboto Mono', monospace; font-size: 9pt; padding: 2px 4px; background: rgba(255,255,255,0.1); } +pre { + margin: 0 0 40px 0; + border: 1px solid #666; + border-radius: 2px; +} pre code { display: block; max-height: 400px; max-width: 640px; + padding: 4px 10px; +} +table { + margin-bottom: 40px; } hr { height: 1px; @@ -181,6 +211,14 @@ hr { border: 0; width: 80px; } +blockquote { + margin-left: 28px; + padding: 0 0 0 10px; + border-left: 2px solid #555; +} + +/* footnotes */ + .footnotes hr { display: none; } @@ -243,29 +281,14 @@ section.wide .image { max-width: 620px; margin: 10px auto 0 auto; } - -blockquote { - margin-left: 28px; - padding: 0 0 0 10px; - border-left: 2px solid #555; -} - -/* top of post metadata */ - -.meta { - display: flex; - flex-direction: row; - justify-content: flex-start; - align-items: flex-start; - font-size: 10pt; - margin-bottom: 20px; +.research_index { + margin-top: 40px; } -.meta > div { - margin-right: 30px; +.research_index a { + text-decoration: none; } -.meta .gray { - font-size: 9pt; - padding-bottom: 4px; +.research_index h1 { + margin-top: 20px; } /* blogpost index */ diff --git a/site/assets/css/fonts.css b/site/assets/css/fonts.css index 2195c70b..8db01fbd 100644 --- a/site/assets/css/fonts.css +++ b/site/assets/css/fonts.css @@ -2,34 +2,40 @@ font-family: 'Roboto'; font-style: normal; font-weight: 300; - src: url("../fonts/Roboto_300.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_300.woff") format("woff"), url("../fonts/Roboto_300.woff2") format("woff2"), url("../fonts/Roboto_300.svg#Roboto") format("svg"), url("../fonts/Roboto_300.ttf") format("truetype"); } + src: url("../fonts/Roboto_300.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_300.woff") format("woff"), url("../fonts/Roboto_300.woff2") format("woff2"), url("../fonts/Roboto_300.svg#Roboto") format("svg"), url("../fonts/Roboto_300.ttf") format("truetype"); +} @font-face { font-family: 'Roboto'; font-style: normal; font-weight: 400; - src: url("../fonts/Roboto_400.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_400.woff") format("woff"), url("../fonts/Roboto_400.woff2") format("woff2"), url("../fonts/Roboto_400.svg#Roboto") format("svg"), url("../fonts/Roboto_400.ttf") format("truetype"); } + src: url("../fonts/Roboto_400.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_400.woff") format("woff"), url("../fonts/Roboto_400.woff2") format("woff2"), url("../fonts/Roboto_400.svg#Roboto") format("svg"), url("../fonts/Roboto_400.ttf") format("truetype"); +} @font-face { font-family: 'Roboto'; font-style: normal; font-weight: 500; - src: url("../fonts/Roboto_500.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_500.woff") format("woff"), url("../fonts/Roboto_500.woff2") format("woff2"), url("../fonts/Roboto_500.svg#Roboto") format("svg"), url("../fonts/Roboto_500.ttf") format("truetype"); } + src: url("../fonts/Roboto_500.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_500.woff") format("woff"), url("../fonts/Roboto_500.woff2") format("woff2"), url("../fonts/Roboto_500.svg#Roboto") format("svg"), url("../fonts/Roboto_500.ttf") format("truetype"); +} @font-face { font-family: 'Roboto Mono'; font-style: normal; font-weight: 300; - src: url("../fonts/Roboto_Mono_300.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_Mono_300.woff") format("woff"), url("../fonts/Roboto_Mono_300.woff2") format("woff2"), url("../fonts/Roboto_Mono_300.svg#RobotoMono") format("svg"), url("../fonts/Roboto_Mono_300.ttf") format("truetype"); } + src: url("../fonts/Roboto_Mono_300.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_Mono_300.woff") format("woff"), url("../fonts/Roboto_Mono_300.woff2") format("woff2"), url("../fonts/Roboto_Mono_300.svg#RobotoMono") format("svg"), url("../fonts/Roboto_Mono_300.ttf") format("truetype"); +} @font-face { font-family: 'Roboto Mono'; font-style: normal; font-weight: 400; - src: url("../fonts/Roboto_Mono_400.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_Mono_400.woff") format("woff"), url("../fonts/Roboto_Mono_400.woff2") format("woff2"), url("../fonts/Roboto_Mono_400.svg#RobotoMono") format("svg"), url("../fonts/Roboto_Mono_400.ttf") format("truetype"); } + src: url("../fonts/Roboto_Mono_400.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_Mono_400.woff") format("woff"), url("../fonts/Roboto_Mono_400.woff2") format("woff2"), url("../fonts/Roboto_Mono_400.svg#RobotoMono") format("svg"), url("../fonts/Roboto_Mono_400.ttf") format("truetype"); +} @font-face { font-family: 'Roboto Mono'; font-style: normal; font-weight: 500; - src: local("Roboto-Mono Medium"), local("RobotoMono-Medium"), url("../fonts/Roboto_Mono_500.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_Mono_500.woff") format("woff"), url("../fonts/Roboto_Mono_500.woff2") format("woff2"), url("../fonts/Roboto_Mono_500.svg#RobotoMono") format("svg"), url("../fonts/Roboto_Mono_500.ttf") format("truetype"); } + src: local("Roboto-Mono Medium"), local("RobotoMono-Medium"), url("../fonts/Roboto_Mono_500.eot?#iefix") format("embedded-opentype"), url("../fonts/Roboto_Mono_500.woff") format("woff"), url("../fonts/Roboto_Mono_500.woff2") format("woff2"), url("../fonts/Roboto_Mono_500.svg#RobotoMono") format("svg"), url("../fonts/Roboto_Mono_500.ttf") format("truetype"); +} diff --git a/site/assets/js/app/site.js b/site/assets/js/app/site.js index 04c0c495..12bee3ec 100644 --- a/site/assets/js/app/site.js +++ b/site/assets/js/app/site.js @@ -7,6 +7,8 @@ const isDesktop = !isMobile const htmlClassList = document.body.parentNode.classList htmlClassList.add(isDesktop ? 'desktop' : 'mobile') +function toArray(A) { return Array.prototype.slice.apply(A) } + var site = (function(){ var site = {} site.init = function(){ @@ -17,6 +19,11 @@ var site = (function(){ if (paras.length) { paras[0].classList.add('first_paragraph') } + toArray(document.querySelectorAll('header .links a')).forEach(tag => { + if (window.location.href.match(tag.href)) { + tag.classList.add('active') + } + }) } site.init() })()
\ No newline at end of file diff --git a/site/public/about/credits/index.html b/site/public/about/credits/index.html index 9fec7e64..f1a28b0e 100644 --- a/site/public/about/credits/index.html +++ b/site/public/about/credits/index.html @@ -18,10 +18,10 @@ <span class='sub'>The Darkside of Datasets</span> </a> <div class='links'> - <a href="/search">Face Search</a> - <a href="/datasets">Datasets</a> - <a href="/">Research</a> - <a href="/about">About</a> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> </div> </header> <div class="content"> diff --git a/site/public/about/disclaimer/index.html b/site/public/about/disclaimer/index.html index 553bf084..5df5d656 100644 --- a/site/public/about/disclaimer/index.html +++ b/site/public/about/disclaimer/index.html @@ -18,10 +18,10 @@ <span class='sub'>The Darkside of Datasets</span> </a> <div class='links'> - <a href="/search">Face Search</a> - <a href="/datasets">Datasets</a> - <a href="/">Research</a> - <a href="/about">About</a> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> </div> </header> <div class="content"> diff --git a/site/public/about/index.html b/site/public/about/index.html index 363e8fc0..f1a28b0e 100644 --- a/site/public/about/index.html +++ b/site/public/about/index.html @@ -18,15 +18,16 @@ <span class='sub'>The Darkside of Datasets</span> </a> <div class='links'> - <a href="/search">Face Search</a> - <a href="/datasets">Datasets</a> - <a href="/">Research</a> - <a href="/about">About</a> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> </div> </header> <div class="content"> - <section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/site/about/assets/test.jpg' alt='alt text'><div class='caption'>alt text</div></div></section><section><ul> + <section><h1>Credits</h1> +</section><section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/site/about/assets/test.jpg' alt='alt text'><div class='caption'>alt text</div></div></section><section><ul> <li>MegaPixels by Adam Harvey</li> <li>Made with support from Mozilla</li> <li>Site developed by Jules Laplace</li> diff --git a/site/public/about/press/index.html b/site/public/about/press/index.html index aa6e5e13..e5763036 100644 --- a/site/public/about/press/index.html +++ b/site/public/about/press/index.html @@ -18,10 +18,10 @@ <span class='sub'>The Darkside of Datasets</span> </a> <div class='links'> - <a href="/search">Face Search</a> - <a href="/datasets">Datasets</a> - <a href="/">Research</a> - <a href="/about">About</a> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> </div> </header> <div class="content"> diff --git a/site/public/about/privacy/index.html b/site/public/about/privacy/index.html index d1ec1c77..7ad9564f 100644 --- a/site/public/about/privacy/index.html +++ b/site/public/about/privacy/index.html @@ -18,10 +18,10 @@ <span class='sub'>The Darkside of Datasets</span> </a> <div class='links'> - <a href="/search">Face Search</a> - <a href="/datasets">Datasets</a> - <a href="/">Research</a> - <a href="/about">About</a> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> </div> </header> <div class="content"> @@ -84,7 +84,6 @@ megapixels.cc will take all steps reasonably necessary to ensure that your data <h2>Disclosure Of Data</h2> <h3>Legal Requirements</h3> <p>megapixels.cc may disclose your Personal Data in the good faith belief that such action is necessary to:</p> -<p><ul></p> <ul> <li>To comply with a legal obligation</li> <li>To protect and defend the rights or property of megapixels.cc</li> diff --git a/site/public/about/style/index.html b/site/public/about/style/index.html index 24e6f5be..eea861ac 100644 --- a/site/public/about/style/index.html +++ b/site/public/about/style/index.html @@ -18,15 +18,17 @@ <span class='sub'>The Darkside of Datasets</span> </a> <div class='links'> - <a href="/search">Face Search</a> - <a href="/datasets">Datasets</a> - <a href="/">Research</a> - <a href="/about">About</a> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> </div> </header> <div class="content"> - <section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/site/about/assets/test.jpg' alt='Alt text here'><div class='caption'>Alt text here</div></div></section><section><h2>Header 2</h2> + <section><h1>Style Examples</h1> +</section><section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/site/about/assets/test.jpg' alt='Alt text here'><div class='caption'>Alt text here</div></div></section><section><h1>Header 1</h1> +<h2>Header 2</h2> <h3>Header 3</h3> <h4>Header 4</h4> <h5>Header 5</h5> diff --git a/site/public/about/terms/index.html b/site/public/about/terms/index.html index 4b9f4445..db8b9e57 100644 --- a/site/public/about/terms/index.html +++ b/site/public/about/terms/index.html @@ -18,10 +18,10 @@ <span class='sub'>The Darkside of Datasets</span> </a> <div class='links'> - <a href="/search">Face Search</a> - <a href="/datasets">Datasets</a> - <a href="/">Research</a> - <a href="/about">About</a> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> </div> </header> <div class="content"> diff --git a/site/public/datasets/lfw/index.html b/site/public/datasets/lfw/index.html index a130c24e..76549d25 100644 --- a/site/public/datasets/lfw/index.html +++ b/site/public/datasets/lfw/index.html @@ -18,28 +18,22 @@ <span class='sub'>The Darkside of Datasets</span> </a> <div class='links'> - <a href="/search">Face Search</a> - <a href="/datasets">Datasets</a> - <a href="/">Research</a> - <a href="/about">About</a> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> </div> </header> <div class="content"> - <section><ul> -<li>Created 2007</li> -<li>Images 13,233</li> -<li>People 5,749</li> -<li>Created From Yahoo News images</li> -<li>Analyzed and searchable</li> -</ul> -<p>Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to as “The Wild”.</p> + <section><h1>Labeled Faces in The Wild</h1> +</section><section><div class='meta'><div><div class='gray'>Created</div><div>2007</div></div><div><div class='gray'>Images</div><div>13,233</div></div><div><div class='gray'>People</div><div>5,749</div></div><div><div class='gray'>Created From</div><div>Yahoo News images</div></div><div><div class='gray'>Search available</div><div>Searchable</div></div></div></section><section><p>Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to as “The Wild”.</p> </section><section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/lfw/assets/lfw_sample.jpg' alt='Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.'><div class='caption'>Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.</div></div></section><section><h2>INTRO</h2> <p>It began in 2002. Researchers at University of Massachusetts Amherst were developing algorithms for facial recognition and they needed more data. Between 2002-2004 they scraped Yahoo News for images of public figures. Two years later they cleaned up the dataset and repackaged it as Labeled Faces in the Wild (LFW).</p> <p>Since then the LFW dataset has become one of the most widely used datasets used for evaluating face recognition algorithms. The associated research paper “Labeled Faces in the Wild: A Database for Studying Face Recognition in Unconstrained Environments” has been cited 996 times reaching 45 different countries throughout the world.</p> <p>The faces come from news stories and are mostly celebrities from the entertainment industry, politicians, and villains. It’s a sampling of current affairs and breaking news that has come to pass. The images, detached from their original context now server a new purpose: to train, evaluate, and improve facial recognition.</p> <p>As the most widely used facial recognition dataset, it can be said that each individual in LFW has, in a small way, contributed to the current state of the art in facial recognition surveillance. John Cusack, Julianne Moore, Barry Bonds, Osama bin Laden, and even Moby are amongst these biometric pillars, exemplar faces provided the visual dimensions of a new computer vision future.</p> -</section><section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/lfw/assets/lfw_a_to_c.jpg' alt='From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset'><div class='caption'>From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset</div></div></section><section><p>In addition to commercial use as an evaluation tool, alll of the faces in LFW dataset are prepackaged into a popular machine learning code framework called scikit-learn.</p> +</section><section class='wide'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/lfw/assets/lfw_a_to_c.jpg' alt='From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset'><div class='caption'>From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset</div></div></section><section><p>In addition to commercial use as an evaluation tool, alll of the faces in LFW dataset are prepackaged into a popular machine learning code framework called scikit-learn.</p> <h2>Usage</h2> <pre><code class="lang-python">#!/usr/bin/python from matplotlib import plt diff --git a/site/public/datasets/lfw/what/index.html b/site/public/datasets/lfw/what/index.html new file mode 100644 index 00000000..52993a79 --- /dev/null +++ b/site/public/datasets/lfw/what/index.html @@ -0,0 +1,141 @@ +<!doctype html> +<html> +<head> + <title>MegaPixels</title> + <meta charset="utf-8" /> + <meta name="author" content="Adam Harvey" /> + <meta name="description" content="LFW: Labeled Faces in The Wild" /> + <meta name="referrer" content="no-referrer" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" /> + <link rel='stylesheet' href='/assets/css/fonts.css' /> + <link rel='stylesheet' href='/assets/css/css.css' /> +</head> +<body> + <header> + <a class='slogan' href="/"> + <div class='logo'></div> + <div class='site_name'>MegaPixels</div> + <span class='sub'>The Darkside of Datasets</span> + </a> + <div class='links'> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> + </div> + </header> + <div class="content"> + + <section><h1>Labeled Faces in The Wild</h1> +<ul> +<li>Created 2007 (auto)</li> +<li>Images 13,233 (auto)</li> +<li>People 5,749 (auto)</li> +<li>Created From Yahoo News images (auto)</li> +<li>Analyzed and searchable (auto)</li> +</ul> +<p><em>Labeled Faces in The Wild</em> is amongst the most widely used facial recognition training datasets in the world and is the first facial recognition dataset [^lfw_names_faces] of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people that appeared on Yahoo News between 2002 - 2004.</p> +</section><section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/lfw/assets/lfw_grid_preview.jpg' alt='Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.'><div class='caption'>Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.</div></div></section><section><h2>INTRO</h2> +<p>It began in 2002. Researchers at University of Massachusetts Amherst were developing algorithms for facial recognition and they needed more data. Between 2002-2004 they scraped Yahoo News for images of public figures. Two years later they cleaned up the dataset and repackaged it as Labeled Faces in the Wild (LFW).</p> +<p>Since then the LFW dataset has become one of the most widely used datasets used for evaluating face recognition algorithms. The associated research paper “Labeled Faces in the Wild: A Database for Studying Face Recognition in Unconstrained Environments” has been cited 996 times reaching 45 different countries throughout the world.</p> +<p>The faces come from news stories and are mostly celebrities from the entertainment industry, politicians, and villains. It’s a sampling of current affairs and breaking news that has come to pass. The images, detached from their original context now server a new purpose: to train, evaluate, and improve facial recognition.</p> +<p>As the most widely used facial recognition dataset, it can be said that each individual in LFW has, in a small way, contributed to the current state of the art in facial recognition surveillance. John Cusack, Julianne Moore, Barry Bonds, Osama bin Laden, and even Moby are amongst these biometric pillars, exemplar faces provided the visual dimensions of a new computer vision future.</p> +</section><section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/lfw/assets/lfw_a_to_c.jpg' alt='From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset'><div class='caption'>From Aaron Eckhart to Zydrunas Ilgauskas. A small sampling of the LFW dataset</div></div></section><section><p>In addition to commercial use as an evaluation tool, all of the faces in LFW dataset are prepackaged into a popular machine learning code framework called scikit-learn.</p> +<h2>Usage</h2> +<pre><code class="lang-python">#!/usr/bin/python +from matplotlib import plt +from sklearn.datasets import fetch_lfw_people +lfw_people = fetch_lfw_people() +lfw_person = lfw_people[0] +plt.imshow(lfw_person) +</code></pre> +<h2>Commercial Use</h2> +<p>The LFW dataset is used by numerous companies for benchmarking algorithms and in some cases training. According to the benchmarking results page [^lfw_results] provided by the authors, over 2 dozen companies have contributed their benchmark results</p> +<pre><code>load file: lfw_commercial_use.csv +name_display,company_url,example_url,country,description +</code></pre> +<table> +<thead><tr> +<th style="text-align:left">Company</th> +<th style="text-align:left">Country</th> +<th style="text-align:left">Industries</th> +</tr> +</thead> +<tbody> +<tr> +<td style="text-align:left"><a href="http://www.aratek.co">Aratek</a></td> +<td style="text-align:left">China</td> +<td style="text-align:left">Biometric sensors for telecom, civil identification, finance, education, POS, and transportation</td> +</tr> +<tr> +<td style="text-align:left"><a href="http://www.aratek.co">Aratek</a></td> +<td style="text-align:left">China</td> +<td style="text-align:left">Biometric sensors for telecom, civil identification, finance, education, POS, and transportation</td> +</tr> +<tr> +<td style="text-align:left"><a href="http://www.aratek.co">Aratek</a></td> +<td style="text-align:left">China</td> +<td style="text-align:left">Biometric sensors for telecom, civil identification, finance, education, POS, and transportation</td> +</tr> +</tbody> +</table> +<p>Add 2-4 screenshots of companies mentioning LFW here</p> +</section><section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/lfw/assets/lfw_screenshot_01.png' alt='ReadSense'><div class='caption'>ReadSense</div></div></section><section><p>In benchmarking, companies use a dataset to evaluate their algorithms which are typically trained on other data. After training, researchers will use LFW as a benchmark to compare results with other algorithms.</p> +<p>For example, Baidu (est. net worth $13B) uses LFW to report results for their "Targeting Ultimate Accuracy: Face Recognition via Deep Embedding". According to the three Baidu researchers who produced the paper:</p> +<blockquote><p>LFW has been the most popular evaluation benchmark for face recognition, and played a very important role in facilitating the face recognition society to improve algorithm. <sup class="footnote-ref" id="fnref-baidu_lfw"><a href="#fn-baidu_lfw">1</a></sup>.</p> +</blockquote> +<h2>Citations</h2> +<table> +<thead><tr> +<th style="text-align:left">Title</th> +<th style="text-align:left">Organization</th> +<th style="text-align:left">Country</th> +<th style="text-align:left">Type</th> +</tr> +</thead> +<tbody> +<tr> +<td style="text-align:left">3D-aided face recognition from videos</td> +<td style="text-align:left">University of Lyon</td> +<td style="text-align:left">France</td> +<td style="text-align:left">edu</td> +</tr> +<tr> +<td style="text-align:left">A Community Detection Approach to Cleaning Extremely Large Face Database</td> +<td style="text-align:left">National University of Defense Technology, China</td> +<td style="text-align:left">China</td> +<td style="text-align:left">edu</td> +</tr> +</tbody> +</table> +<h2>Conclusion</h2> +<p>The LFW face recognition training and evaluation dataset is a historically important face dataset as it was the first popular dataset to be created entirely from Internet images, paving the way for a global trend towards downloading anyone’s face from the Internet and adding it to a dataset. As will be evident with other datasets, LFW’s approach has now become the norm.</p> +<p>For all the 5,000 people in this datasets, their face is forever a part of facial recognition history. It would be impossible to remove anyone from the dataset because it is so ubiquitous. For their rest of the lives and forever after, these 5,000 people will continue to be used for training facial recognition surveillance.</p> +<h2>Notes</h2> +<p>According to BiometricUpdate.com<sup class="footnote-ref" id="fnref-biometric_update_lfw"><a href="#fn-biometric_update_lfw">2</a></sup>, LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong."</p> +<div class="footnotes"> +<hr> +<ol><li id="fn-baidu_lfw"><p>"Chinese tourist town uses face recognition as an entry pass". New Scientist. November 17, 2016. <a href="https://www.newscientist.com/article/2113176-chinese-tourist-town-uses-face-recognition-as-an-entry-pass/">https://www.newscientist.com/article/2113176-chinese-tourist-town-uses-face-recognition-as-an-entry-pass/</a><a href="#fnref-baidu_lfw" class="footnote">↩</a></p></li> +<li id="fn-biometric_update_lfw"><p>"PING AN Tech facial recognition receives high score in latest LFW test results". <a href="https://www.biometricupdate.com/201702/ping-an-tech-facial-recognition-receives-high-score-in-latest-lfw-test-results">https://www.biometricupdate.com/201702/ping-an-tech-facial-recognition-receives-high-score-in-latest-lfw-test-results</a><a href="#fnref-biometric_update_lfw" class="footnote">↩</a></p></li> +</ol> +</div> +</section> + + </div> + <footer> + <div> + <a href="/">MegaPixels.cc</a> + <a href="/about/disclaimer/">Disclaimer</a> + <a href="/about/terms/">Terms of Use</a> + <a href="/about/privacy/">Privacy</a> + <a href="/about/">About</a> + <a href="/about/team/">Team</a> + </div> + <div> + MegaPixels ©2017-19 Adam R. Harvey / + <a href="https://ahprojects.com">ahprojects.com</a> + </div> + </footer> +</body> +<script src="/assets/js/app/site.js"></script> +</html>
\ No newline at end of file diff --git a/site/public/datasets/vgg_faces2/index.html b/site/public/datasets/vgg_faces2/index.html index ee353047..95b5f7d7 100644 --- a/site/public/datasets/vgg_faces2/index.html +++ b/site/public/datasets/vgg_faces2/index.html @@ -18,23 +18,17 @@ <span class='sub'>The Darkside of Datasets</span> </a> <div class='links'> - <a href="/search">Face Search</a> - <a href="/datasets">Datasets</a> - <a href="/">Research</a> - <a href="/about">About</a> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> </div> </header> <div class="content"> - <section><ul> -<li>Created 2007</li> -<li>Images 13,233</li> -<li>People 5,749</li> -<li>Created From Yahoo News images</li> -<li>Search available <a href="#">Searchable</a></li> -</ul> -<p>Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to by researchers as “The Wild”.</p> -</section><section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/vgg_faces2/assets/identity_grid_01.jpg' alt='Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.'><div class='caption'>Eight out of 5,749 people in the Labeled Faces in the Wild dataset. The face recognition training dataset is created entirely from photos downloaded from the Internet.</div></div></section><section><h2>INTRO</h2> + <section><h1>Labeled Faces in The Wild</h1> +</section><section><div class='meta'><div><div class='gray'>Created</div><div>2007</div></div><div><div class='gray'>Images</div><div>13,233</div></div><div><div class='gray'>People</div><div>5,749</div></div><div><div class='gray'>Created From</div><div>Yahoo News images</div></div><div><div class='gray'>Search available</div><div>[Searchable](#)</div></div></div></section><section><p>Labeled Faces in The Wild is amongst the most widely used facial recognition training datasets in the world and is the first dataset of its kind to be created entirely from Internet photos. It includes 13,233 images of 5,749 people downloaded from the Internet, otherwise referred to by researchers as “The Wild”.</p> +<h2>INTRO</h2> <p>It began in 2002. Researchers at University of Massachusetts Amherst were developing algorithms for facial recognition and they needed more data. Between 2002-2004 they scraped Yahoo News for images of public figures. Two years later they cleaned up the dataset and repackaged it as Labeled Faces in the Wild (LFW).</p> <p>Since then the LFW dataset has become one of the most widely used datasets used for evaluating face recognition algorithms. The associated research paper “Labeled Faces in the Wild: A Database for Studying Face Recognition in Unconstrained Environments” has been cited 996 times reaching 45 different countries throughout the world.</p> <p>The faces come from news stories and are mostly celebrities from the entertainment industry, politicians, and villains. It’s a sampling of current affairs and breaking news that has come to pass. The images, detached from their original context now server a new purpose: to train, evaluate, and improve facial recognition.</p> diff --git a/site/public/index.html b/site/public/index.html index ea3dc24c..3ce22936 100644 --- a/site/public/index.html +++ b/site/public/index.html @@ -18,23 +18,23 @@ <span class='sub'>The Darkside of Datasets</span> </a> <div class='links'> - <a href="/search">Face Search</a> - <a href="/datasets">Datasets</a> - <a href="/">Research</a> - <a href="/about">About</a> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> </div> </header> <div class="content"> <section><p>MegaPixels is an art project that explores the dark side of face recognition training data and the future of computer vision</p> <p>Made by Adam Harvey in partnership with Mozilla.<br> -Read more <a href="/about">about MegaPixels</a></p> +Read more [about MegaPixels]</p> <p>[Explore Datasets] [Explore Algorithms]</p> <h2>Facial Recognition Datasets</h2> <p>Regular Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.</p> <h3>Summary</h3> <ul> -<li>275 datasets found</li> +<li>275 datsets found</li> <li>Created between the years 1993-2018</li> <li>Smallest dataset: 20 images</li> <li>Largest dataset: 10,000,000 images</li> diff --git a/site/public/research/01_from_1_to_100_pixels/index.html b/site/public/research/01_from_1_to_100_pixels/index.html index 90f142e9..55e02c6c 100644 --- a/site/public/research/01_from_1_to_100_pixels/index.html +++ b/site/public/research/01_from_1_to_100_pixels/index.html @@ -18,10 +18,10 @@ <span class='sub'>The Darkside of Datasets</span> </a> <div class='links'> - <a href="/search">Face Search</a> - <a href="/datasets">Datasets</a> - <a href="/">Research</a> - <a href="/about">About</a> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> </div> </header> <div class="content"> @@ -74,13 +74,6 @@ </div> </section> - <section> - <h3>MORE RESEARCH</h3> - <div class='blogposts'> - - </div> - </section> - </div> <footer> <div> diff --git a/site/public/research/index.html b/site/public/research/index.html new file mode 100644 index 00000000..1f61dadf --- /dev/null +++ b/site/public/research/index.html @@ -0,0 +1,50 @@ +<!doctype html> +<html> +<head> + <title>MegaPixels</title> + <meta charset="utf-8" /> + <meta name="author" content="Adam Harvey" /> + <meta name="description" content="Research blog" /> + <meta name="referrer" content="no-referrer" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" /> + <link rel='stylesheet' href='/assets/css/fonts.css' /> + <link rel='stylesheet' href='/assets/css/css.css' /> +</head> +<body> + <header> + <a class='slogan' href="/"> + <div class='logo'></div> + <div class='site_name'>MegaPixels</div> + <span class='sub'>The Darkside of Datasets</span> + </a> + <div class='links'> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> + </div> + </header> + <div class="content"> + + <section><h1>Research Blog</h1> +<h2>The darkside of datasets and the future of computer vision</h2> +</section><div class='research_index'><a href='/research/01_from_1_to_100_pixels/'><section class='wide'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/site/research/01_from_1_to_100_pixels/assets/intro.jpg' alt='Research post' /><section><h1>From 1 to 100 Pixels</h1><h2>Photographs are for romantics. For the rest of us, it's all about data. And a photo contains a massive amount of information about who you are.</h2></section></section></a></div> + + </div> + <footer> + <div> + <a href="/">MegaPixels.cc</a> + <a href="/about/disclaimer/">Disclaimer</a> + <a href="/about/terms/">Terms of Use</a> + <a href="/about/privacy/">Privacy</a> + <a href="/about/">About</a> + <a href="/about/team/">Team</a> + </div> + <div> + MegaPixels ©2017-19 Adam R. Harvey / + <a href="https://ahprojects.com">ahprojects.com</a> + </div> + </footer> +</body> +<script src="/assets/js/app/site.js"></script> +</html>
\ No newline at end of file diff --git a/site/templates/layout.html b/site/templates/layout.html index 5b5833be..7558163e 100644 --- a/site/templates/layout.html +++ b/site/templates/layout.html @@ -18,10 +18,10 @@ <span class='sub'>The Darkside of Datasets</span> </a> <div class='links'> - <a href="/search">Face Search</a> - <a href="/datasets">Datasets</a> - <a href="{{ latest_research_post.url }}">Research</a> - <a href="/about">About</a> + <a href="/search/">Face Search</a> + <a href="/datasets/">Datasets</a> + <a href="/research/">Research</a> + <a href="/about/">About</a> </div> </header> <div class="content"> diff --git a/site/templates/research.html b/site/templates/research.html index 22e494c2..0bb9fa30 100644 --- a/site/templates/research.html +++ b/site/templates/research.html @@ -20,16 +20,4 @@ </section> {{ content }} - - <section> - <h3>MORE RESEARCH</h3> - <div class='blogposts'> - {% for blogpost in blogposts %} - <div> - <a href="{{ blogpost.url }}">{{ blogpost.title }}</a> - <span class='sub'>{{ blogpost.date }}</span> - </div> - {% endfor %} - </div> - </section> {% endblock %} |
