From b1e7dc570fe25749a2e1b02c9e859df6588b4660 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Sat, 15 Dec 2018 22:04:41 +0100 Subject: move builder --- megapixels/commands/site/build.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 megapixels/commands/site/build.py (limited to 'megapixels/commands/site/build.py') diff --git a/megapixels/commands/site/build.py b/megapixels/commands/site/build.py new file mode 100644 index 00000000..fc4fb302 --- /dev/null +++ b/megapixels/commands/site/build.py @@ -0,0 +1,15 @@ +""" +Build the static site +""" + +import click + +from app.builder.builder import build_site + +@click.command() +@click.pass_context +def cli(ctx): + """Build the static site + """ + print('Building the site...') + build_site() -- cgit v1.2.3-70-g09d2 From 898e6cdf8df0993f853b748d4e8a9c269fad0294 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Sat, 15 Dec 2018 22:14:17 +0100 Subject: inject applet payload --- megapixels/app/settings/app_cfg.py | 2 +- megapixels/app/site/builder.py | 4 +-- megapixels/app/site/parser.py | 15 ++++++--- megapixels/commands/site/build.py | 2 +- site/public/about/style/index.html | 11 +------ site/public/datasets/lfw/index.html | 51 ++++++++++++------------------- site/public/datasets/vgg_face2/index.html | 9 ++---- 7 files changed, 36 insertions(+), 58 deletions(-) (limited to 'megapixels/commands/site/build.py') diff --git a/megapixels/app/settings/app_cfg.py b/megapixels/app/settings/app_cfg.py index a18e5875..d25936e6 100644 --- a/megapixels/app/settings/app_cfg.py +++ b/megapixels/app/settings/app_cfg.py @@ -75,10 +75,10 @@ FP_FONT = join(DIR_ASSETS, 'font') # ----------------------------------------------------------------------------- DIR_COMMANDS_CV = 'commands/cv' DIR_COMMANDS_ADMIN = 'commands/admin' -DIR_COMMANDS_SITE = 'commands/site' DIR_COMMANDS_DATASETS = 'commands/datasets' DIR_COMMANDS_FAISS = 'commands/faiss' DIR_COMMANDS_MISC = 'commands/misc' +DIR_COMMANDS_SITE = 'commands/site' # ----------------------------------------------------------------------------- # Filesystem settings diff --git a/megapixels/app/site/builder.py b/megapixels/app/site/builder.py index df609f60..42e25768 100644 --- a/megapixels/app/site/builder.py +++ b/megapixels/app/site/builder.py @@ -6,8 +6,8 @@ from jinja2 import Environment, FileSystemLoader, select_autoescape import app.settings.app_cfg as cfg -import app.builder.s3 as s3 -import app.builder.parser as parser +import app.site.s3 as s3 +import app.site.parser as parser env = Environment( loader=FileSystemLoader(cfg.DIR_SITE_TEMPLATES), diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index add3f386..d78cc402 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -1,10 +1,11 @@ import os import re import glob +import simplejson as json import mistune import app.settings.app_cfg as cfg -import app.builder.s3 as s3 +import app.site.s3 as s3 renderer = mistune.Renderer(escape=False) markdown = mistune.Markdown(renderer=renderer) @@ -45,14 +46,18 @@ def format_metadata(section): def format_applet(section): payload = section.replace('```', '').strip().split('\n') + applet = {} if ': ' in payload[0]: command, opt = payload[0].split(': ') else: command = payload[0] opt = None - if command == 'load_file': - return "
{}
" - + applet['command'] = command + if opt: + applet['opt'] = opt + if command == 'load file': + applet['fields'] = payload[1] + return "
".format(json.dumps(applet)) def parse_markdown(sections, s3_path, skip_h1=False): groups = [] @@ -60,7 +65,7 @@ def parse_markdown(sections, s3_path, skip_h1=False): for section in sections: if skip_h1 and section.startswith('# '): continue - elif section.startsWith('```'): + elif section.startswith('```'): groups.append(format_section(current_group, s3_path)) groups.append(format_applet(section)) current_group = [] diff --git a/megapixels/commands/site/build.py b/megapixels/commands/site/build.py index fc4fb302..0a76a9ac 100644 --- a/megapixels/commands/site/build.py +++ b/megapixels/commands/site/build.py @@ -4,7 +4,7 @@ Build the static site import click -from app.builder.builder import build_site +from app.site.builder import build_site @click.command() @click.pass_context diff --git a/site/public/about/style/index.html b/site/public/about/style/index.html index f2c0d4b8..39a44380 100644 --- a/site/public/about/style/index.html +++ b/site/public/about/style/index.html @@ -51,16 +51,7 @@
Person 3. Let me tell you about Person 3.  This person has a very long description with text which wraps like crazy
Person 3. Let me tell you about Person 3. This person has a very long description with text which wraps like crazy

est, qui dolorem ipsum, quia dolor sit amet consectetur adipisci[ng] velit, sed quia non-numquam [do] eius modi tempora inci[di]dunt, ut labore et dolore magnam aliquam quaerat voluptatem.

This image is extremely wide and the text beneath it will wrap but thats fine because it can also contain <a href="https://example.com/">hyperlinks</a>! Yes, you read that right—hyperlinks! Lorem ipsum dolor sit amet ad volotesque sic hoc ad nauseam
This image is extremely wide and the text beneath it will wrap but that's fine because it can also contain hyperlinks! Yes, you read that right—hyperlinks! Lorem ipsum dolor sit amet ad volotesque sic hoc ad nauseam

Inline code has back-ticks around it.

-
var s = "JavaScript syntax highlighting";
-alert(s);
-
-
s = "Python syntax highlighting"
-print(s)
-
-
No language indicated, so no syntax highlighting. 
-But let's throw in a <b>tag</b>.
-
-

Horizontal rule

+

Horizontal rule


Citations below here

diff --git a/site/public/datasets/lfw/index.html b/site/public/datasets/lfw/index.html index e080229f..3c83acd3 100644 --- a/site/public/datasets/lfw/index.html +++ b/site/public/datasets/lfw/index.html @@ -28,12 +28,7 @@

Labeled Faces in the Wild

Created
2007
Images
13,233
People
5,749
Created From
Yahoo News images
Search available
Searchable

Labeled Faces in The Wild (LFW) is amongst the most widely used facial recognition training datasets in the world and is the first of its kind to be created entirely from images posted online. The LFW dataset includes 13,233 images of 5,749 people that were collected between 2002-2004. Use the tools below to check if you were included in this dataset or scroll down to read the analysis.

-

{INSERT IMAGE SEARCH MODULE}

-

{INSERT TEXT SEARCH MODULE}

-
load file: lfw_names_gender_kg_min.csv
-Name, Images, Gender, Description
-
-
Eighteen of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms.
Eighteen of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms.

Intro

+
Eighteen of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms.
Eighteen of the 5,749 people in the Labeled Faces in the Wild Dataset. The most widely used face dataset for benchmarking commercial face recognition algorithms.

Intro

Three paragraphs describing the LFW dataset in a format that can be easily replicated for the other datasets. Nothing too custom. An analysis of the initial research papers with context relative to all the other dataset papers.

 all 5,749 people in the LFW Dataset sorted from most to least images collected.
all 5,749 people in the LFW Dataset sorted from most to least images collected.

LFW by the Numbers

    @@ -224,36 +219,28 @@ name_display,company_url,example_url,country,description

    Code

    -
    #!/usr/bin/python
    -
    -import numpy as np
    +

import numpy as np from sklearn.datasets import fetch_lfw_people import imageio -import imutils - -# download LFW dataset (first run takes a while) -lfw_people = fetch_lfw_people(min_faces_per_person=1, resize=1, color=True, funneled=False) - -# introspect dataset -n_samples, h, w, c = lfw_people.images.shape -print('{:,} images at {}x{}'.format(n_samples, w, h)) +import imutils

+

download LFW dataset (first run takes a while)

+

lfw_people = fetch_lfw_people(min_faces_per_person=1, resize=1, color=True, funneled=False)

+

introspect dataset

+

n_samples, h, w, c = lfw_people.images.shape +print('{:,} images at {}x{}'.format(n_samples, w, h)) cols, rows = (176, 76) -n_ims = cols * rows - -# build montages -im_scale = 0.5 +n_ims = cols * rows

+

build montages

+

im_scale = 0.5 ims = lfw_people.images[:n_ims -montages = imutils.build_montages(ims, (int(w*im_scale, int(h*im_scale)), (cols, rows)) -montage = montages[0] - -# save full montage image -imageio.imwrite('lfw_montage_full.png', montage) - -# make a smaller version -montage_960 = imutils.resize(montage, width=960) -imageio.imwrite('lfw_montage_960.jpg', montage_960) - -

Disclaimer

+montages = imutils.build_montages(ims, (int(wim_scale, int(him_scale)), (cols, rows)) +montage = montages[0]

+

save full montage image

+

imageio.imwrite('lfw_montage_full.png', montage)

+

make a smaller version

+

montage_960 = imutils.resize(montage, width=960) +imageio.imwrite('lfw_montage_960.jpg', montage_960)

+

Disclaimer

MegaPixels is an educational art project designed to encourage discourse about facial recognition datasets. Any ethical or legal issues should be directed to the researcher's parent organizations. Except where necessary for contact or clarity, the names of researchers have been subsituted by their parent organization. In no way does this project aim to villify researchers who produced the datasets.

Read more about MegaPixels Code of Conduct

diff --git a/site/public/datasets/vgg_face2/index.html b/site/public/datasets/vgg_face2/index.html index 24a1059b..817fc9a0 100644 --- a/site/public/datasets/vgg_face2/index.html +++ b/site/public/datasets/vgg_face2/index.html @@ -28,12 +28,7 @@

VGG Faces2

Created
2018
Images
3.3M
People
9,000
Created From
Scraping search engines
Search available
[Searchable](#)

VGG Face2 is the updated version of the VGG Face dataset and now includes over 3.3M face images from over 9K people. The identities were selected by taking the top 500K identities in Google's Knowledge Graph of celebrities and then selecting only the names that yielded enough training images. The dataset was created in the UK but funded by Office of Director of National Intelligence in the United States.

-

{INSERT IMAGE SEARCH MODULE}

-

{INSERT TEXT SEARCH MODULE}

-
load file: lfw_names_gender_kg_min.csv
-Name, Images, Gender, Description
-
-

VGG Face2 by the Numbers

+

VGG Face2 by the Numbers

  • 1,331 actresses, 139 presidents
  • 3 husbands and 16 wives
  • @@ -47,7 +42,7 @@ Name, Images, Gender, Description
  • The original VGGF2 name list has been updated with the results returned from Google Knowledge
  • Names with a similarity score greater than 0.75 where automatically updated. Scores computed using import difflib; seq = difflib.SequenceMatcher(a=a.lower(), b=b.lower()); score = seq.ratio()
  • The 97 names with a score of 0.75 or lower were manually reviewed and includes name changes validating using Wikipedia.org results for names such as "Bruce Jenner" to "Caitlyn Jenner", spousal last-name changes, and discretionary changes to improve search results such as combining nicknames with full name when appropriate, for example changing "Aleksandar Petrović" to "Aleksandar 'Aco' Petrović" and minor changes such as "Mohammad Ali" to "Muhammad Ali"
  • -
  • The 'Description` text was automatically added when the Knowledge Graph score was greater than 250
  • +
  • The 'Description' text was automatically added when the Knowledge Graph score was greater than 250

TODO

    -- cgit v1.2.3-70-g09d2 From 6ce4a4589f8e21b038f1ff636da60e9881651a4a Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Sun, 16 Dec 2018 12:28:45 +0100 Subject: build -i to build a single page --- megapixels/app/site/builder.py | 10 ++++++++++ megapixels/app/site/parser.py | 5 +++-- megapixels/app/site/s3.py | 9 ++++++--- megapixels/commands/site/build.py | 14 ++++++++++---- site/public/about/credits/index.html | 2 +- site/public/about/index.html | 2 +- site/public/research/00_introduction/index.html | 12 ++++++------ site/public/research/index.html | 2 +- 8 files changed, 38 insertions(+), 18 deletions(-) (limited to 'megapixels/commands/site/build.py') diff --git a/megapixels/app/site/builder.py b/megapixels/app/site/builder.py index 895f265b..ff1a0c83 100644 --- a/megapixels/app/site/builder.py +++ b/megapixels/app/site/builder.py @@ -18,6 +18,8 @@ def build_page(fn, research_posts): """ build a single page from markdown into the appropriate template - writes it to site/public/ + - syncs any assets with s3 + - handles certain index pages... """ metadata, sections = parser.read_metadata(fn) @@ -91,3 +93,11 @@ def build_site(): for fn in glob.iglob(os.path.join(cfg.DIR_SITE_CONTENT, "**/*.md"), recursive=True): build_page(fn, research_posts) build_research_index(research_posts) + +def build_file(fn): + """ + build just one page from a filename! =^) + """ + research_posts = parser.read_research_post_index() + fn = os.path.join(cfg.DIR_SITE_CONTENT, fn) + build_page(fn, research_posts) diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index 6c6ad688..ca6ac77b 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -55,8 +55,8 @@ def format_metadata(section): return "
    {}
    ".format(''.join(meta)) def format_applet(section, s3_path): - print(section) - payload = section.strip('```').strip().split('\n') + # print(section) + payload = section.strip('```').strip().strip('```').strip().split('\n') applet = {} print(payload) if ': ' in payload[0]: @@ -213,6 +213,7 @@ def parse_metadata(fn, sections): metadata['sync'] = metadata['sync'] != 'false' metadata['author_html'] = '
    '.join(metadata['authors'].split(',')) + return metadata, valid_sections def read_research_post_index(): diff --git a/megapixels/app/site/s3.py b/megapixels/app/site/s3.py index 5464d464..18133078 100644 --- a/megapixels/app/site/s3.py +++ b/megapixels/app/site/s3.py @@ -6,13 +6,14 @@ def sync_directory(base_fn, s3_path, metadata): """ Synchronize a local assets folder with S3 """ + if not metadata['sync']: + return + fns = {} for fn in glob.glob(os.path.join(base_fn, 'assets/*')): + # print(fn) fns[os.path.basename(fn)] = True - if not metadata['sync']: - return - remote_path = s3_path + metadata['url'] session = boto3.session.Session() @@ -31,6 +32,7 @@ def sync_directory(base_fn, s3_path, metadata): if 'Contents' in directory: for obj in directory['Contents']: s3_fn = obj['Key'] + # print(s3_fn) fn = os.path.basename(s3_fn) local_fn = os.path.join(base_fn, 'assets', fn) if fn in fns: @@ -52,6 +54,7 @@ def sync_directory(base_fn, s3_path, metadata): for fn in fns: local_fn = os.path.join(base_fn, 'assets', fn) s3_fn = os.path.join(remote_path, 'assets', fn) + print(s3_fn) print("s3 create {}".format(s3_fn)) s3_client.upload_file( local_fn, diff --git a/megapixels/commands/site/build.py b/megapixels/commands/site/build.py index 0a76a9ac..2d344899 100644 --- a/megapixels/commands/site/build.py +++ b/megapixels/commands/site/build.py @@ -4,12 +4,18 @@ Build the static site import click -from app.site.builder import build_site +from app.site.builder import build_site, build_file @click.command() +@click.option('-i', '--input', 'input_file', required=False, + help='File to generate') @click.pass_context -def cli(ctx): +def cli(ctx, input_file): """Build the static site """ - print('Building the site...') - build_site() + if input_file: + print('Building {}'.format(input_file)) + build_file(input_file) + else: + print('Building the site...') + build_site() diff --git a/site/public/about/credits/index.html b/site/public/about/credits/index.html index 67e9dcb8..b1b394bd 100644 --- a/site/public/about/credits/index.html +++ b/site/public/about/credits/index.html @@ -28,7 +28,7 @@

    Credits

    -
    alt text
    alt text
      +
      • MegaPixels by Adam Harvey
      • Made with support from Mozilla
      • Site developed by Jules Laplace
      • diff --git a/site/public/about/index.html b/site/public/about/index.html index 67e9dcb8..b1b394bd 100644 --- a/site/public/about/index.html +++ b/site/public/about/index.html @@ -28,7 +28,7 @@

        Credits

        -
        alt text
        alt text
          +
          • MegaPixels by Adam Harvey
          • Made with support from Mozilla
          • Site developed by Jules Laplace
          • diff --git a/site/public/research/00_introduction/index.html b/site/public/research/00_introduction/index.html index 290208f8..4349bcaf 100644 --- a/site/public/research/00_introduction/index.html +++ b/site/public/research/00_introduction/index.html @@ -3,8 +3,8 @@ MegaPixels - - + + @@ -28,21 +28,21 @@
            -

            Untitled Page

            +

            00: Introduction

            Posted
            -
            2018-12-31
            +
            2018-12-15
            By
            -
            Adam Harvey
            +
            Megapixels
            -

            It was the early 2000s. Face recognition was new and no one seemed sure exactly how well it was going to perform in practice. In theory, face recognition was poised to be a game changer, a force multiplier, a strategic military advantage, a way to make cities safer and to secure borders. This was the future John Ashcroft demanded with the Total Information Awareness act of the 2003 and that spooks had dreamed of for decades. It was a future that academics at Carnegie Mellon Universtiy and Colorado State University would help build. It was also a future that celebrities would play a significant role in building. And to the surprise of ordinary Internet users like myself and perhaps you, it was a future that millions of Internet users would unwittingly play role in creating.

            +
            Posted
            Dec. 15
            Author
            Adam Harvey

            It was the early 2000s. Face recognition was new and no one seemed sure exactly how well it was going to perform in practice. In theory, face recognition was poised to be a game changer, a force multiplier, a strategic military advantage, a way to make cities safer and to secure borders. This was the future John Ashcroft demanded with the Total Information Awareness act of the 2003 and that spooks had dreamed of for decades. It was a future that academics at Carnegie Mellon Universtiy and Colorado State University would help build. It was also a future that celebrities would play a significant role in building. And to the surprise of ordinary Internet users like myself and perhaps you, it was a future that millions of Internet users would unwittingly play role in creating.

            Now the future has arrived and it doesn't make sense. Facial recognition works yet it doesn't actually work. Facial recognition is cheap and accessible but also expensive and out of control. Facial recognition research has achieved headline grabbing superhuman accuracies over 99.9% yet facial recognition is also dangerously inaccurate. During a trial installation at Sudkreuz station in Berlin in 2018, 20% of the matches were wrong, a number so low that it should not have any connection to law enforcement or justice. And in London, the Metropolitan police had been using facial recognition software that mistakenly identified an alarming 98% of people as criminals 1, which perhaps is a crime itself.

            MegaPixels is an online art project that explores the history of facial recognition from the perspective of datasets. To paraphrase the artist Trevor Paglen, whoever controls the dataset controls the meaning. MegaPixels aims to unravel the meanings behind the data and expose the darker corners of the biometric industry that have contributed to its growth. MegaPixels does not start with a conclusion, a moralistic slant, or a

            Whether or not to build facial recognition was a question that can no longer be asked. As an outspoken critic of face recognition I've developed, and hopefully furthered, my understanding during the last 10 years I've spent working with computer vision. Though I initially disagreed, I've come to see technocratic perspective as a non-negotiable reality. As Oren (nytimes article) wrote in NYT Op-Ed "the horse is out of the barn" and the only thing we can do collectively or individually is to steer towards the least worse outcome. Computational communication has entered a new era and it's both exciting and frightening to explore the potentials and opportunities. In 1997 getting access to 1 teraFLOPS of computational power would have cost you $55 million and required a strategic partnership with the Department of Defense. At the time of writing, anyone can rent 1 teraFLOPS on a cloud GPU marketplace for less than $1/day. 2.

            diff --git a/site/public/research/index.html b/site/public/research/index.html index 17b4a2a0..1acbe844 100644 --- a/site/public/research/index.html +++ b/site/public/research/index.html @@ -29,7 +29,7 @@

            Research Blog

            The darkside of datasets and the future of computer vision

            -
            +