From 9d6d12f0b16d10219c62f25ce036b9377417de70 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Tue, 16 Apr 2019 18:55:02 +0200 Subject: build --- megapixels/app/site/parser.py | 3 +- .../datasets/50_people_one_question/index.html | 56 +++++++++- site/public/datasets/afad/index.html | 56 +++++++++- site/public/datasets/brainwash/index.html | 86 +++++++++++++-- site/public/datasets/caltech_10k/index.html | 58 +++++++++- site/public/datasets/celeba/index.html | 58 +++++++++- site/public/datasets/cofw/index.html | 117 +++++++++++++++++++-- site/public/datasets/duke_mtmc/index.html | 99 ++++++++++++++--- site/public/datasets/feret/index.html | 58 +++++++++- site/public/datasets/hrt_transgender/index.html | 6 +- site/public/datasets/lfpw/index.html | 56 +++++++++- site/public/datasets/lfw/index.html | 77 ++++++++++++-- site/public/datasets/market_1501/index.html | 56 +++++++++- site/public/datasets/msceleb/index.html | 68 +++++++++++- site/public/datasets/oxford_town_centre/index.html | 89 ++++++++++++++-- site/public/datasets/pipa/index.html | 56 +++++++++- site/public/datasets/pubfig/index.html | 56 +++++++++- site/public/datasets/uccs/index.html | 103 +++++++++++++++--- site/public/datasets/vgg_face2/index.html | 58 +++++++++- site/public/datasets/viper/index.html | 56 +++++++++- .../public/datasets/youtube_celebrities/index.html | 56 +++++++++- 21 files changed, 1212 insertions(+), 116 deletions(-) diff --git a/megapixels/app/site/parser.py b/megapixels/app/site/parser.py index aa2ddcda..1489d056 100644 --- a/megapixels/app/site/parser.py +++ b/megapixels/app/site/parser.py @@ -60,8 +60,7 @@ def parse_markdown(metadata, sections, s3_path, skip_h1=False): current_group.append(section) in_stats = True if 'end sidebar' in section.lower(): - groups.append(format_section(current_group, s3_path, 'right-sidebar', tag='div')) - current_group = [] + current_group = [format_section(current_group, s3_path, 'right-sidebar', tag='div')] in_stats = False elif in_stats and not section.strip().startswith('## ') and 'end sidebar' not in section.lower(): current_group.append(section) diff --git a/site/public/datasets/50_people_one_question/index.html b/site/public/datasets/50_people_one_question/index.html index 577d4d8c..dc7919f7 100644 --- a/site/public/datasets/50_people_one_question/index.html +++ b/site/public/datasets/50_people_one_question/index.html @@ -28,7 +28,7 @@
People One Question is a dataset of people from an online video series on YouTube and Vimeo used for building facial recogntion algorithms
People One Question dataset includes ...

50 People 1 Question

-
+

[ page under development ]

+
+

Who used 50 People One Question Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how 50 People One Question Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing 50 People One Question was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+ +
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
diff --git a/site/public/datasets/afad/index.html b/site/public/datasets/afad/index.html index 6ef13948..f2b0a5ba 100644 --- a/site/public/datasets/afad/index.html +++ b/site/public/datasets/afad/index.html @@ -42,9 +42,59 @@
Website
github.io
-

[ page under development ]

-

{% include 'dashboard.html' %}

-

(ignore) research notes

+

[ page under development ]

+
+

Who used Asian Face Age Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how Asian Face Age Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing The Asian Face Age Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+ +
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+

(ignore) research notes

The Asian Face Age Dataset (AFAD) is a new dataset proposed for evaluating the performance of age estimation, which contains more than 160K facial images and the corresponding age and gender labels. This dataset is oriented to age estimation on Asian faces, so all the facial images are for Asian faces. It is noted that the AFAD is the biggest dataset for age estimation to date. It is well suited to evaluate how deep learning methods can be adopted for age estimation. Motivation

For age estimation, there are several public datasets for evaluating the performance of a specific algorithm, such as FG-NET [1] (1002 face images), MORPH I (1690 face images), and MORPH II[2] (55,608 face images). Among them, the MORPH II is the biggest public dataset to date. On the other hand, as we know it is necessary to collect a large scale dataset to train a deep Convolutional Neural Network. Therefore, the MORPH II dataset is extensively used to evaluate how deep learning methods can be adopted for age estimation [3][4].

diff --git a/site/public/datasets/brainwash/index.html b/site/public/datasets/brainwash/index.html index 2a6044d0..95f0d77d 100644 --- a/site/public/datasets/brainwash/index.html +++ b/site/public/datasets/brainwash/index.html @@ -28,7 +28,7 @@
Brainwash is a dataset of webcam images taken from the Brainwash Cafe in San Francisco in 2014
The Brainwash dataset includes 11,918 images of "everyday life of a busy downtown cafe" and is used for training head detection surveillance algorithms

Brainwash Dataset

-
+

Brainwash is a head detection dataset created from San Francisco's Brainwash Cafe livecam footage. It includes 11,918 images of "everyday life of a busy downtown cafe" 1 captured at 100 second intervals throught the entire day. Brainwash dataset was captured during 3 days in 2014: October 27, November 13, and November 24. According the author's reserach paper introducing the dataset, the images were acquired with the help of Angelcam.com. 2

Brainwash is not a widely used dataset but since its publication by Stanford University in 2015, it has notably appeared in several research papers from the National University of Defense Technology in Changsha, China. In 2016 and in 2017 researchers there conducted studies on detecting people's heads in crowded scenes for the purpose of surveillance. 3 4

If you happen to have been at Brainwash cafe in San Francisco at any time on October 26, November 13, or November 24 in 2014 you are most likely included in the Brainwash dataset and have unwittingly contributed to surveillance research.

-

{% include 'dashboard.html' %}

-

{% include 'supplementary_header.html' %}

-
 A visualization of 81,973 head annotations from the Brainwash dataset training partition. © megapixels.cc
A visualization of 81,973 head annotations from the Brainwash dataset training partition. © megapixels.cc
 An sample image from the Brainwash dataset used for training face and head detection algorithms for surveillance. The datset contains about 12,000 images. License: Open Data Commons Public Domain Dedication (PDDL)
An sample image from the Brainwash dataset used for training face and head detection algorithms for surveillance. The datset contains about 12,000 images. License: Open Data Commons Public Domain Dedication (PDDL)
 49 of the 11,918 images included in the Brainwash dataset. License: Open Data Commons Public Domain Dedication (PDDL)
49 of the 11,918 images included in the Brainwash dataset. License: Open Data Commons Public Domain Dedication (PDDL)

TODO

+
+

Who used Brainwash Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how Brainwash Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Brainwash Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+
+ +
+
+
+
+ +

Supplementary Information

+ +
 A visualization of 81,973 head annotations from the Brainwash dataset training partition. © megapixels.cc
A visualization of 81,973 head annotations from the Brainwash dataset training partition. © megapixels.cc
 An sample image from the Brainwash dataset used for training face and head detection algorithms for surveillance. The datset contains about 12,000 images. License: Open Data Commons Public Domain Dedication (PDDL)
An sample image from the Brainwash dataset used for training face and head detection algorithms for surveillance. The datset contains about 12,000 images. License: Open Data Commons Public Domain Dedication (PDDL)
 49 of the 11,918 images included in the Brainwash dataset. License: Open Data Commons Public Domain Dedication (PDDL)
49 of the 11,918 images included in the Brainwash dataset. License: Open Data Commons Public Domain Dedication (PDDL)

TODO

  • change supp images to 2x2 grid with bboxes
  • add bounding boxes to the header image
  • @@ -62,7 +120,23 @@
  • add ethics link to Stanford
  • add optout info
-

{% include 'cite_our_work.html' %}

+
+ +

Cite Our Work

+

+ + If you use our data, research, or graphics please cite our work: + +

+@online{megapixels,
+  author = {Harvey, Adam. LaPlace, Jules.},
+  title = {MegaPixels: Origins, Ethics, and Privacy Implications of Publicly Available Face Recognition Image Datasets},
+  year = 2019,
+  url = {https://megapixels.cc/},
+  urldate = {2019-04-20}
+}
+ +

References

  • a

    "readme.txt" https://exhibits.stanford.edu/data/catalog/sx925dc9385.

  • a

    Stewart, Russel. Andriluka, Mykhaylo. "End-to-end people detection in crowded scenes". 2016.

  • a

    Li, Y. and Dou, Y. and Liu, X. and Li, T. Localized Region Context and Object Feature Fusion for People Head Detection. ICIP16 Proceedings. 2016. Pages 594-598.

    diff --git a/site/public/datasets/caltech_10k/index.html b/site/public/datasets/caltech_10k/index.html index 4cbb7ce6..04d63ee3 100644 --- a/site/public/datasets/caltech_10k/index.html +++ b/site/public/datasets/caltech_10k/index.html @@ -27,7 +27,7 @@

    Caltech 10K Faces Dataset

    -
    +

    [ page under development ]

    +
+

Who used Brainwash Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how Brainwash Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Brainwash Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+

(ignore) research notes

The dataset contains images of people collected from the web by typing common given names into Google Image Search. The coordinates of the eyes, the nose and the center of the mouth for each frontal face are provided in a ground truth file. This information can be used to align and crop the human faces or as a ground truth for a face detection algorithm. The dataset has 10,524 human faces of various resolutions and in different settings, e.g. portrait images, groups of people, etc. Profile faces or very low resolution faces are not labeled.

diff --git a/site/public/datasets/celeba/index.html b/site/public/datasets/celeba/index.html index 9d75b428..c72f3798 100644 --- a/site/public/datasets/celeba/index.html +++ b/site/public/datasets/celeba/index.html @@ -28,7 +28,7 @@
CelebA is a dataset of people...
CelebA includes...

CelebA Dataset

-
+

[ PAGE UNDER DEVELOPMENT ]

+
+

Who used CelebA Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how CelebA Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Large-scale CelebFaces Attributes Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+

Research

  • "An Unsupervised Approach to Solving Inverse Problems using Generative Adversarial Networks" mentions use by sponsored by an agency of the United States government. Neither the United States government nor Lawrence Livermore National Security, LLC, nor any of their"
  • 7dab6fbf42f82f0f5730fc902f72c3fb628ef2f0
  • diff --git a/site/public/datasets/cofw/index.html b/site/public/datasets/cofw/index.html index 084cf7c2..eef8cf5e 100644 --- a/site/public/datasets/cofw/index.html +++ b/site/public/datasets/cofw/index.html @@ -27,7 +27,7 @@

    Caltech Occluded Faces in the Wild

    -
    +

    [ PAGE UNDER DEVELOPMENT ]

    +
+

Who used COFW Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how COFW Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Caltech Occluded Faces in the Wild was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+

(ignore) research notes

+
Years
1993-1996
Images
14,126
Identities
1,199
Origin
Web Searches
Funded by
ODNI, IARPA, Microsoft

COFW is "is designed to benchmark face landmark algorithms in realistic conditions, which include heavy occlusions and large shape variations" [Robust face landmark estimation under occlusion].

We asked four people with different levels of computer vision knowledge to each collect 250 faces representative of typical real-world images, with the clear goal of challenging computer vision methods. The result is 1,007 images of faces obtained from a variety of sources.

@@ -54,11 +104,58 @@ To increase the number of training images, and since COFW has the exact same la

This research is supported by NSF Grant 0954083 and by the Office of the Director of National Intelligence (ODNI), Intelligence Advanced Research Projects Activity (IARPA), via IARPA R&D Contract No. 2014-14071600012.

https://www.cs.cmu.edu/~peiyunh/topdown/

-

{% include 'map.html' %}

-

{% include 'supplementary_header.html' %}

-

{% include 'citations.html' %}

-

{% include 'chart.html' %}

-

TODO

+
+ +

Biometric Trade Routes

+ +

+ To help understand how COFW Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Caltech Occluded Faces in the Wild was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the location markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org and then dataset usage verified and geolocated.
+
+ +
+
+
+
+ +

Supplementary Information

+ +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+
+

Who used COFW Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+

TODO

- replace graphic

diff --git a/site/public/datasets/duke_mtmc/index.html b/site/public/datasets/duke_mtmc/index.html index 3cd19708..5cb6fb0c 100644 --- a/site/public/datasets/duke_mtmc/index.html +++ b/site/public/datasets/duke_mtmc/index.html @@ -28,7 +28,7 @@
Duke MTMC is a dataset of surveillance camera footage of students on Duke University campus
Duke MTMC contains over 2 million video frames and 2,700 unique identities collected from 8 HD cameras at Duke University campus in March 2014

Duke MTMC

-
+

[ page under development ]

Duke MTMC (Multi-Target, Multi-Camera Tracking) is a dataset of video recorded on Duke University campus for research and development of networked camera surveillance systems. MTMC tracking algorithms are used for citywide dragnet surveillance systems such as those used throughout China by SenseTime 1 and the oppressive monitoring of 2.5 million Uyghurs in Xinjiang by SenseNets 2. In fact researchers from both SenseTime 4 5 and SenseNets 3 used the Duke MTMC dataset for their research.

In this investigation into the Duke MTMC dataset, we found that researchers at Duke University in Durham, North Carolina captured over 2,000 students, faculty members, and passersby into one of the most prolific public surveillance research datasets that's used around the world by commercial and defense surveillance organizations.

Since it's publication in 2016, the Duke MTMC dataset has been used in over 100 studies at organizations around the world including SenseTime 4 5, SenseNets 3, IARPA and IBM 9, Chinese National University of Defense 7 8, US Department of Homeland Security 10, Tencent, Microsoft, Microsft Asia, Fraunhofer, Senstar Corp., Alibaba, Naver Labs, Google and Hewlett-Packard Labs to name only a few.

The creation and publication of the Duke MTMC dataset in 2014 (published in 2016) was originally funded by the U.S. Army Research Laboratory and the National Science Foundation 6. Though our analysis of the geographic locations of the publicly available research shows over twice as many citations by researchers from China (44% China, 20% United States). In 2018 alone, there were 70 research project citations from China.

-
 A collection of 1,600 out of the 2,700 students and passersby captured into the Duke MTMC surveillance research and development dataset on . These students were also included in the Duke MTMC Re-ID dataset extension used for person re-identification. Open Data Commons Attribution License.
A collection of 1,600 out of the 2,700 students and passersby captured into the Duke MTMC surveillance research and development dataset on . These students were also included in the Duke MTMC Re-ID dataset extension used for person re-identification. Open Data Commons Attribution License.

The 8 cameras deployed on Duke's campus were specifically setup to capture students "during periods between lectures, when pedestrian traffic is heavy". 6. Camera 5 was positioned to capture students as entering and exiting the university's main chapel. Each camera's location and approximate field of view. The heat map visualization shows the locations where pedestrians were most frequently annotated in each video from the Duke MTMC dataset.

-
 Duke MTMC camera locations on Duke University campus. Open Data Commons Attribution License.
Duke MTMC camera locations on Duke University campus. Open Data Commons Attribution License.
 Duke MTMC camera views for 8 cameras deployed on campus © megapixels.cc
Duke MTMC camera views for 8 cameras deployed on campus © megapixels.cc
 Duke MTMC pedestrian detection saliency maps for 8 cameras deployed on campus © megapixels.cc
Duke MTMC pedestrian detection saliency maps for 8 cameras deployed on campus © megapixels.cc

{% include 'dashboard.html' %}

-

{% include 'supplementary_header.html' %}

-

Funding

+
 A collection of 1,600 out of the 2,700 students and passersby captured into the Duke MTMC surveillance research and development dataset on . These students were also included in the Duke MTMC Re-ID dataset extension used for person re-identification. Open Data Commons Attribution License.
A collection of 1,600 out of the 2,700 students and passersby captured into the Duke MTMC surveillance research and development dataset on . These students were also included in the Duke MTMC Re-ID dataset extension used for person re-identification. Open Data Commons Attribution License.

The 8 cameras deployed on Duke's campus were specifically setup to capture students "during periods between lectures, when pedestrian traffic is heavy". 6. Camera 5 was positioned to capture students as entering and exiting the university's main chapel. Each camera's location and approximate field of view. The heat map visualization shows the locations where pedestrians were most frequently annotated in each video from the Duke MTMC dataset.

+
 Duke MTMC camera locations on Duke University campus. Open Data Commons Attribution License.
Duke MTMC camera locations on Duke University campus. Open Data Commons Attribution License.
 Duke MTMC camera views for 8 cameras deployed on campus © megapixels.cc
Duke MTMC camera views for 8 cameras deployed on campus © megapixels.cc
 Duke MTMC pedestrian detection saliency maps for 8 cameras deployed on campus © megapixels.cc
Duke MTMC pedestrian detection saliency maps for 8 cameras deployed on campus © megapixels.cc
+

Who used Duke MTMC Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how Duke MTMC Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Duke Multi-Target, Multi-Camera Tracking Project was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+
+ +
+
+
+
+ +

Supplementary Information

+ +

Funding

Original funding for the Duke MTMC dataset was provided by the Army Research Office under Grant No. W911NF-10-1-0387 and by the National Science Foundation under Grants IIS-10-17017 and IIS-14-20894.

Video Timestamps

The video timestamps contain the likely, but not yet confirmed, date and times of capture. Because the video timestamps align with the start and stop time sync data provided by the researchers, it at least aligns the relative time. The rainy weather on that day also contribute towards the likelihood of March 14, 2014..

-

=== columns 2

- +
@@ -95,8 +152,7 @@ under Grants IIS-10-17017 and IIS-14-20894.

Camera Date
-

===========

- +
@@ -131,15 +187,30 @@ under Grants IIS-10-17017 and IIS-14-20894.

Camera Date
-

=== end columns

-

Opting Out

+

Opting Out

If you attended Duke University and were captured by any of the 8 surveillance cameras positioned on campus in 2014, there is unfortunately no way to be removed. The dataset files have been distributed throughout the world and it would not be possible to contact all the owners for removal. Nor do the authors provide any options for students to opt-out, nor did they even inform students they would be used at test subjects for surveillance research and development in a project funded, in part, by the United States Army Research Office.

Notes

  • The Duke MTMC dataset paper mentions 2,700 identities, but their ground truth file only lists annotations for 1,812
-

{% include 'cite_our_work.html' %}

-

If you use any data from the Duke MTMC please follow their license and cite their work as:

+
+ +

Cite Our Work

+

+ + If you use our data, research, or graphics please cite our work: + +

+@online{megapixels,
+  author = {Harvey, Adam. LaPlace, Jules.},
+  title = {MegaPixels: Origins, Ethics, and Privacy Implications of Publicly Available Face Recognition Image Datasets},
+  year = 2019,
+  url = {https://megapixels.cc/},
+  urldate = {2019-04-20}
+}
+ +

+

If you use any data from the Duke MTMC please follow their license and cite their work as:

 @inproceedings{ristani2016MTMC,
  title =        {Performance Measures and a Data Set for Multi-Target, Multi-Camera Tracking},
diff --git a/site/public/datasets/feret/index.html b/site/public/datasets/feret/index.html
index 8af139ab..387826b0 100644
--- a/site/public/datasets/feret/index.html
+++ b/site/public/datasets/feret/index.html
@@ -42,9 +42,59 @@
   
Website
-

[ page under development ]

-

{% include 'dashboard.html' %}

-

(ignore) RESEARCH below this line

+

[ page under development ]

+
+

Who used LFW?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how LFW has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Labeled Faces in the Wild was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+

(ignore) RESEARCH below this line

  • Years: 1993-1996
  • Images: 14,126
  • @@ -63,7 +113,7 @@
    • "A release form is necessary because of the privacy laws in the United States."
    -

    Funding

    +

    Funding

    The FERET program is sponsored by the U.S. Depart- ment of Defense’s Counterdrug Technology Development Program Office. The U.S. Army Research Laboratory (ARL) is the technical agent for the FERET program. ARL designed, administered, and scored the FERET tests. George Mason University collected, processed, and main- tained the FERET database. Inquiries regarding the FERET database or test should be directed to P. Jonathon Phillips.

diff --git a/site/public/datasets/hrt_transgender/index.html b/site/public/datasets/hrt_transgender/index.html index 15cf4807..6b9ae7be 100644 --- a/site/public/datasets/hrt_transgender/index.html +++ b/site/public/datasets/hrt_transgender/index.html @@ -28,7 +28,7 @@
TBD
TBD

HRT Transgender Dataset

-
+

[ page under development ]

+

{% include 'dashboard.html' }

diff --git a/site/public/datasets/lfpw/index.html b/site/public/datasets/lfpw/index.html index 7f16cd01..45de2599 100644 --- a/site/public/datasets/lfpw/index.html +++ b/site/public/datasets/lfpw/index.html @@ -27,7 +27,7 @@

Labeled Face Parts in The Wild

-
+
+

Who used LFWP?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how LFWP has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Labeled Face Parts in the Wild was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+

RESEARCH below this line

Release 1 of LFPW consists of 1,432 faces from images downloaded from the web using simple text queries on sites such as google.com, flickr.com, and yahoo.com. Each image was labeled by three MTurk workers, and 29 fiducial points, shown below, are included in dataset. LFPW was originally described in the following publication:

Due to copyright issues, we cannot distribute image files in any format to anyone. Instead, we have made available a list of image URLs where you can download the images yourself. We realize that this makes it impossible to exactly compare numbers, as image links will slowly disappear over time, but we have no other option. This seems to be the way other large web-based databases seem to be evolving.

diff --git a/site/public/datasets/lfw/index.html b/site/public/datasets/lfw/index.html index 54b10611..7997629f 100644 --- a/site/public/datasets/lfw/index.html +++ b/site/public/datasets/lfw/index.html @@ -28,7 +28,7 @@
Labeled Faces in The Wild (LFW) is the first facial recognition dataset created entirely from online photos
It includes 13,456 images of 4,432 people's images copied from the Internet during 2002-2004 and is the most frequently used dataset in the world for benchmarking face recognition algorithms.

Labeled Faces in the Wild

-
+

[ PAGE UNDER DEVELOPMENT ]

Labeled Faces in The Wild (LFW) is "a database of face photographs designed for studying the problem of unconstrained face recognition 1. It is used to evaluate and improve the performance of facial recognition algorithms in academic, commercial, and government research. According to BiometricUpdate.com 3, LFW is "the most widely used evaluation set in the field of facial recognition, LFW attracts a few dozen teams from around the globe including Google, Facebook, Microsoft Research Asia, Baidu, Tencent, SenseTime, Face++ and Chinese University of Hong Kong."

The LFW dataset includes 13,233 images of 5,749 people that were collected between 2002-2004. LFW is a subset of Names of Faces and is part of the first facial recognition training dataset created entirely from images appearing on the Internet. The people appearing in LFW are...

The Names and Faces dataset was the first face recognition dataset created entire from online photos. However, Names and Faces and LFW are not the first face recognition dataset created entirely "in the wild". That title belongs to the UCD dataset. Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer.

The Names and Faces dataset was the first face recognition dataset created entire from online photos. However, Names and Faces and LFW are not the first face recognition dataset created entirely "in the wild". That title belongs to the UCD dataset. Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer.

-
All 5,379 people in the Labeled Faces in The Wild Dataset. Showing one face per person
All 5,379 people in the Labeled Faces in The Wild Dataset. Showing one face per person

The Names and Faces dataset was the first face recognition dataset created entire from online photos. However, Names and Faces and LFW are not the first face recognition dataset created entirely "in the wild". That title belongs to the UCD dataset. Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer.

+
All 5,379 people in the Labeled Faces in The Wild Dataset. Showing one face per person
All 5,379 people in the Labeled Faces in The Wild Dataset. Showing one face per person

The Names and Faces dataset was the first face recognition dataset created entire from online photos. However, Names and Faces and LFW are not the first face recognition dataset created entirely "in the wild". That title belongs to the UCD dataset. Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer.

The Names and Faces dataset was the first face recognition dataset created entire from online photos. However, Names and Faces and LFW are not the first face recognition dataset created entirely "in the wild". That title belongs to the UCD dataset. Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer.

-

{% include 'dashboard.html' %}

-

{% include 'supplementary_header.html' %}

-

Commercial Use

+
+

Who used LFW?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how LFW has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Labeled Faces in the Wild was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+
+ +
+
+
+
+ +

Supplementary Information

+ +

Commercial Use

Add a paragraph about how usage extends far beyond academia into research centers for largest companies in the world. And even funnels into CIA funded research in the US and defense industry usage in China.

-
load_file assets/lfw_commercial_use.csv
-name_display, company_url, example_url, country, description
-
-

Research

+

Research

  • "In our experiments, we used 10000 images and associated captions from the Faces in the wilddata set [3]."
  • "This work was supported in part by the Center for Intelligent Information Retrieval, the Central Intelligence Agency, the National Security Agency and National Science Foundation under CAREER award IIS-0546666 and grant IIS-0326249."
  • @@ -77,7 +132,7 @@ name_display, company_url, example_url, country, description
  • The dataset includes 2 images of George Tenet, the former Director of Central Intelligence (DCI) for the Central Intelligence Agency whose facial biometrics were eventually used to help train facial recognition software in China and Russia
  • ./15/155205b8e288fd49bf203135871d66de879c8c04/paper.txt shows usage by DSTO Australia, supported parimal@iisc.ac.in
-
Created
2002 – 2004
Images
13,233
Identities
5,749
Origin
Yahoo! News Images
Used by
Facebook, Google, Microsoft, Baidu, Tencent, SenseTime, Face++, CIA, NSA, IARPA
Website
    +
Created
2002 – 2004
Images
13,233
Identities
5,749
Origin
Yahoo! News Images
Used by
Facebook, Google, Microsoft, Baidu, Tencent, SenseTime, Face++, CIA, NSA, IARPA
Website
  • There are about 3 men for every 1 woman in the LFW dataset 1
  • The person with the most images is George W. Bush with 530
  • There are about 3 George W. Bush's for every 1 Tony Blair
  • diff --git a/site/public/datasets/market_1501/index.html b/site/public/datasets/market_1501/index.html index a76a8859..7c545335 100644 --- a/site/public/datasets/market_1501/index.html +++ b/site/public/datasets/market_1501/index.html @@ -43,9 +43,59 @@
    Website
    -

    [ PAGE UNDER DEVELOPMENT]

    -

    {% include 'dashboard.html' %}

    -

    (ignore) research Notes

    +

    [ PAGE UNDER DEVELOPMENT]

    +
+

Who used Market 1501?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how Market 1501 has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Market 1501 Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+

(ignore) research Notes

  • "MARS is an extension of the Market-1501 dataset. During collection, we placed six near synchronized cameras in the campus of Tsinghua university. There were Five 1,0801920 HD cameras and one 640480 SD camera. MARS consists of 1,261 different pedestrians whom are captured by at least 2 cameras. Given a query tracklet, MARS aims to retrieve tracklets that contain the same ID." - main paper
  • bbox "0065C1T0002F0016.jpg", "0065" is the ID of the pedestrian. "C1" denotes the first diff --git a/site/public/datasets/msceleb/index.html b/site/public/datasets/msceleb/index.html index 60b08b50..8b070118 100644 --- a/site/public/datasets/msceleb/index.html +++ b/site/public/datasets/msceleb/index.html @@ -28,7 +28,7 @@
    MS Celeb is a dataset of web images used for training and evaluating face recognition algorithms
    The MS Celeb dataset includes over 10,000,000 images and 93,000 identities of semi-public figures collected using the Bing search engine

    Microsoft Celeb Dataset (MS Celeb)

    -
    +

    [ PAGE UNDER DEVELOPMENT ]

    https://www.hrw.org/news/2019/01/15/letter-microsoft-face-surveillance-technology

    https://www.scmp.com/tech/science-research/article/3005733/what-you-need-know-about-sensenets-facial-recognition-firm

    -

    {% include 'dashboard.html' %}

    -

    {% include 'supplementary_header.html' %}

    -

    Additional Information

    +
    +

    Who used Microsoft Celeb?

    + +

    + This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

    + +
    + +
    + +
    +
    + +
    +
    +
    + +
    + +

    Biometric Trade Routes

    + +

    + To help understand how Microsoft Celeb has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Microsoft Celebrity Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

    + +
    + +
    +
    +
    + +
    +
      +
    • Academic
    • +
    • Commercial
    • +
    • Military / Government
    • +
    +
    Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
    +
    + + +
    + +

    Dataset Citations

    +

    + The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

    + +
    +
    + +
    +
    +
    +
    + +

    Supplementary Information

    + +

    Additional Information

    diff --git a/site/public/datasets/oxford_town_centre/index.html b/site/public/datasets/oxford_town_centre/index.html index d6f7378f..b48efe3e 100644 --- a/site/public/datasets/oxford_town_centre/index.html +++ b/site/public/datasets/oxford_town_centre/index.html @@ -28,7 +28,7 @@
    Oxford Town Centre is a dataset of surveillance camera footage from Cornmarket St Oxford, England
    The Oxford Town Centre dataset includes approximately 2,200 identities and is used for research and development of face recognition systems

    Oxford Town Centre

    -
    +

    The Oxford Town Centre dataset is a CCTV video of pedestrians in a busy downtown area in Oxford used for research and development of activity and face recognition systems. 1 The CCTV video was obtained from a public surveillance camera at the corner of Cornmarket and Market St. in Oxford, England and includes approximately 2,200 people. Since its publication in 2009 2 the Oxford Town Centre dataset has been used in over 80 verified research projects including commercial research by Amazon, Disney, OSRAM, and Huawei; and academic research in China, Israel, Russia, Singapore, the US, and Germany among dozens more.

    The Oxford Town Centre dataset is unique in that it uses footage from a public surveillance camera that would otherwise be designated for public safety. The video shows that the pedestrians act normally and unrehearsed indicating they neither knew of or consented to participation in the research project.

    -

    {% include 'dashboard.html' %}

    -

    {% include 'supplementary_header.html' %}

    -

    Location

    +
    +

    Who used TownCentre?

    + +

    + This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

    + +
    + +
    + +
    +
    + +
    +
    +
    + +
    + +

    Biometric Trade Routes

    + +

    + To help understand how TownCentre has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Oxford Town Centre was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

    + +
    + +
    +
    +
    + +
    +
      +
    • Academic
    • +
    • Commercial
    • +
    • Military / Government
    • +
    +
    Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
    +
    + + +
    + +

    Dataset Citations

    +

    + The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

    + +
    +
    + +
    +
    +
    +
    + +

    Supplementary Information

    + +

    Location

    The street location of the camera used for the Oxford Town Centre dataset was confirmed by matching the road, benches, and store signs source. At that location, two public CCTV cameras exist mounted on the side of the Northgate House building at 13-20 Cornmarket St. Because of the lower camera's mounting pole directionality, a view from a private camera in the building across the street can be ruled out because it would have to show more of silhouette of the lower camera's mounting pole. Two options remain: either the public CCTV camera mounted to the side of the building was used or the researchers mounted their own camera to the side of the building in the same location. Because the researchers used many other existing public CCTV cameras for their research projects it is likely that they would also be able to access to this camera.

    To discredit the theory that this public CCTV is only seen pointing the other way in Google Street View images, at least one public photo shows the upper CCTV camera pointing in the same direction as the Oxford Town Centre dataset proving the camera can and has been rotated before.

    As for the capture date, the text on the storefront display shows a sale happening from December 2nd – 7th indicating the capture date was between or just before those dates. The capture year is either 2008 or 2007 since prior to 2007 the Carphone Warehouse (photo, history) did not exist at this location. Since the sweaters in the GAP window display are more similar to those in a GAP website snapshot from November 2007, our guess is that the footage was obtained during late November or early December 2007. The lack of street vendors and slight waste residue near the bench suggests that is was probably a weekday after rubbish removal.

    -
     Footage from this public CCTV camera was used to create the Oxford Town Centre dataset. Image sources: Google Street View (<a href="https://www.google.com/maps/@51.7528162,-1.2581152,3a,50.3y,310.59h,87.23t/data=!3m7!1e1!3m5!1s3FsGN-PqYC-VhQGjWgmBdQ!2e0!5s20120601T000000!7i13312!8i6656">map</a>)
    Footage from this public CCTV camera was used to create the Oxford Town Centre dataset. Image sources: Google Street View (map)

    ==== columns

    -
     Heat map body visualization of the pedestrians detected in the Oxford Town Centre dataset © megapixels.cc
    Heat map body visualization of the pedestrians detected in the Oxford Town Centre dataset © megapixels.cc

    ====

    -
     Heat map face visualization of the pedestrians detected in the Oxford Town Centre dataset © megapixels.cc
    Heat map face visualization of the pedestrians detected in the Oxford Town Centre dataset © megapixels.cc

    === end columns

    -

    {% include 'cite_our_work.html' %}

    +
     Footage from this public CCTV camera was used to create the Oxford Town Centre dataset. Image sources: Google Street View (<a href="https://www.google.com/maps/@51.7528162,-1.2581152,3a,50.3y,310.59h,87.23t/data=!3m7!1e1!3m5!1s3FsGN-PqYC-VhQGjWgmBdQ!2e0!5s20120601T000000!7i13312!8i6656">map</a>)
    Footage from this public CCTV camera was used to create the Oxford Town Centre dataset. Image sources: Google Street View (map)
     Heat map body visualization of the pedestrians detected in the Oxford Town Centre dataset © megapixels.cc
    Heat map body visualization of the pedestrians detected in the Oxford Town Centre dataset © megapixels.cc
     Heat map face visualization of the pedestrians detected in the Oxford Town Centre dataset © megapixels.cc
    Heat map face visualization of the pedestrians detected in the Oxford Town Centre dataset © megapixels.cc
    + +

    Cite Our Work

    +

    + + If you use our data, research, or graphics please cite our work: + +

    +@online{megapixels,
    +  author = {Harvey, Adam. LaPlace, Jules.},
    +  title = {MegaPixels: Origins, Ethics, and Privacy Implications of Publicly Available Face Recognition Image Datasets},
    +  year = 2019,
    +  url = {https://megapixels.cc/},
    +  urldate = {2019-04-20}
    +}
    + +

    References

    • a

      Benfold, Ben and Reid, Ian. "Stable Multi-Target Tracking in Real-Time Surveillance Video". CVPR 2011. Pages 3457-3464.

    • a

      "Guiding Visual Surveillance by Tracking Human Attention". 2009.

    diff --git a/site/public/datasets/pipa/index.html b/site/public/datasets/pipa/index.html index 28da8d4b..6c920b46 100644 --- a/site/public/datasets/pipa/index.html +++ b/site/public/datasets/pipa/index.html @@ -28,7 +28,7 @@
    People in Photo Albums (PIPA) is a dataset...
    [ add subdescrition ]

    People in Photo Albums

    -
    +

    [ PAGE UNDER DEVELOPMENT ]

    +
    +

    Who used PIPA Dataset?

    + +

    + This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

    + +
    + +
    + +
    +
    + +
    +
    +
    + +
    + +

    Biometric Trade Routes

    + +

    + To help understand how PIPA Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing People in Photo Albums Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

    + +
    + +
    +
    +
    + +
    +
      +
    • Academic
    • +
    • Commercial
    • +
    • Military / Government
    • +
    +
    Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
    +
    + + +
    + +

    Dataset Citations

    +

    + The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

    + +
    diff --git a/site/public/datasets/pubfig/index.html b/site/public/datasets/pubfig/index.html index 1a6ffebf..e81e12bc 100644 --- a/site/public/datasets/pubfig/index.html +++ b/site/public/datasets/pubfig/index.html @@ -28,7 +28,7 @@
    PubFig is a dataset...
    [ add subdescrition ]

    PubFig

    -
    +

    [ PAGE UNDER DEVELOPMENT ]

    +
    +

    Who used PubFig?

    + +

    + This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

    + +
    + +
    + +
    +
    + +
    +
    +
    + +
    + +

    Biometric Trade Routes

    + +

    + To help understand how PubFig has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Public Figures Face Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

    + +
    + +
    +
    +
    + +
    +
      +
    • Academic
    • +
    • Commercial
    • +
    • Military / Government
    • +
    +
    Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
    +
    + + +
    + +

    Dataset Citations

    +

    + The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

    + +
    diff --git a/site/public/datasets/uccs/index.html b/site/public/datasets/uccs/index.html index 4c106922..32f7cdb2 100644 --- a/site/public/datasets/uccs/index.html +++ b/site/public/datasets/uccs/index.html @@ -28,7 +28,7 @@
    UnConstrained College Students is a dataset of long-range surveillance photos of students on University of Colorado in Colorado Springs campus
    The UnConstrained College Students dataset includes 16,149 images of 1,732 students, faculty, and pedestrians and is used for developing face recognition and face detection algorithms

    UnConstrained College Students

    -
    +

    UnConstrained College Students (UCCS) is a dataset of long-range surveillance photos captured at University of Colorado Colorado Springs developed primarily for research and development of "face detection and recognition research towards surveillance applications" 1. According to the authors of two papers associated with the dataset, over 1,700 students and pedestrians were "photographed using a long-range high-resolution surveillance camera without their knowledge". 3 In this investigation, we examine the contents of the dataset, funding sources, photo EXIF data, and information from publicly available research project citations.

    The UCCS dataset includes over 1,700 unique identities, most of which are students walking to and from class. As of 2018, it was the "largest surveillance [face recognition] benchmark in the public domain." 4 The photos were taken during the spring semesters of 2012 – 2013 on the West Lawn of the University of Colorado Colorado Springs campus. The photographs were timed to capture students during breaks between their scheduled classes in the morning and afternoon during Monday through Thursday. "For example, a student taking Monday-Wednesday classes at 12:30 PM will show up in the camera on almost every Monday and Wednesday." 2.

    -
     Example images from the UnConstrained College Students Dataset.
    Example images from the UnConstrained College Students Dataset.

    The long-range surveillance images in the UnContsrained College Students dataset were captured using a Canon 7D 18 megapixel digital camera fitted with a Sigma 800mm F5.6 EX APO DG HSM telephoto lens and pointed out an office window across the university's West Lawn. The students were photographed from a distance of approximately 150 meters through an office window. "The camera [was] programmed to start capturing images at specific time intervals between classes to maximize the number of faces being captured." 2 +

     Example images from the UnConstrained College Students Dataset.
    Example images from the UnConstrained College Students Dataset.

    The long-range surveillance images in the UnContsrained College Students dataset were captured using a Canon 7D 18 megapixel digital camera fitted with a Sigma 800mm F5.6 EX APO DG HSM telephoto lens and pointed out an office window across the university's West Lawn. The students were photographed from a distance of approximately 150 meters through an office window. "The camera [was] programmed to start capturing images at specific time intervals between classes to maximize the number of faces being captured." 2 Their setup made it impossible for students to know they were being photographed, providing the researchers with realistic surveillance images to help build face detection and recognition systems for real world applications in defense, intelligence, and commercial applications.

    -
     The location at University of Colorado Colorado Springs where students were surreptitiously photographed with a long-range surveillance camera for use in a defense and intelligence agency funded research project on face recognition. Image: Google Maps
    The location at University of Colorado Colorado Springs where students were surreptitiously photographed with a long-range surveillance camera for use in a defense and intelligence agency funded research project on face recognition. Image: Google Maps

    In the two papers associated with the release of the UCCS dataset (Unconstrained Face Detection and Open-Set Face Recognition Challenge and Large Scale Unconstrained Open Set Face Database), the researchers disclosed their funding sources as ODNI (United States Office of Director of National Intelligence), IARPA (Intelligence Advance Research Projects Activity), ONR MURI (Office of Naval Research and The Department of Defense Multidisciplinary University Research Initiative), Army SBIR (Small Business Innovation Research), SOCOM SBIR (Special Operations Command and Small Business Innovation Research), and the National Science Foundation. Further, UCCS's VAST site explicity states they are part of the IARPA Janus, a face recognition project developed to serve the needs of national intelligence interests.

    +
     The location at University of Colorado Colorado Springs where students were surreptitiously photographed with a long-range surveillance camera for use in a defense and intelligence agency funded research project on face recognition. Image: Google Maps
    The location at University of Colorado Colorado Springs where students were surreptitiously photographed with a long-range surveillance camera for use in a defense and intelligence agency funded research project on face recognition. Image: Google Maps

    In the two papers associated with the release of the UCCS dataset (Unconstrained Face Detection and Open-Set Face Recognition Challenge and Large Scale Unconstrained Open Set Face Database), the researchers disclosed their funding sources as ODNI (United States Office of Director of National Intelligence), IARPA (Intelligence Advance Research Projects Activity), ONR MURI (Office of Naval Research and The Department of Defense Multidisciplinary University Research Initiative), Army SBIR (Small Business Innovation Research), SOCOM SBIR (Special Operations Command and Small Business Innovation Research), and the National Science Foundation. Further, UCCS's VAST site explicity states they are part of the IARPA Janus, a face recognition project developed to serve the needs of national intelligence interests.

    The EXIF data embedded in the images shows that the photo capture times follow a similar pattern, but also highlights that the vast majority of photos (over 7,000) were taken on Tuesdays around noon during students' lunch break. The lack of any photos taken on Friday shows that the researchers were only interested in capturing images of students.

    -
     UCCS photos captured per weekday © megapixels.cc
    UCCS photos captured per weekday © megapixels.cc
     UCCS photos captured per weekday © megapixels.cc
    UCCS photos captured per weekday © megapixels.cc

    The two research papers associated with the release of the UCCS dataset (Unconstrained Face Detection and Open-Set Face Recognition Challenge and Large Scale Unconstrained Open Set Face Database), acknowledge that the primary funding sources for their work were United States defense and intelligence agencies. Specifically, development of the UnContrianed College Students dataset was funded by the Intelligence Advanced Research Projects Activity (IARPA), Office of Director of National Intelligence (ODNI), Office of Naval Research and The Department of Defense Multidisciplinary University Research Initiative (ONR MURI), Small Business Innovation Research (SBIR), Special Operations Command and Small Business Innovation Research (SOCOM SBIR), and the National Science Foundation. Further, UCCS's VAST site explicitly states they are part of the IARPA Janus, a face recognition project developed to serve the needs of national intelligence interests, clearly establishing the the funding sources and immediate benefactors of this dataset are United States defense and intelligence agencies.

    +
     UCCS photos captured per weekday © megapixels.cc
    UCCS photos captured per weekday © megapixels.cc
     UCCS photos captured per weekday © megapixels.cc
    UCCS photos captured per weekday © megapixels.cc

    The two research papers associated with the release of the UCCS dataset (Unconstrained Face Detection and Open-Set Face Recognition Challenge and Large Scale Unconstrained Open Set Face Database), acknowledge that the primary funding sources for their work were United States defense and intelligence agencies. Specifically, development of the UnContrianed College Students dataset was funded by the Intelligence Advanced Research Projects Activity (IARPA), Office of Director of National Intelligence (ODNI), Office of Naval Research and The Department of Defense Multidisciplinary University Research Initiative (ONR MURI), Small Business Innovation Research (SBIR), Special Operations Command and Small Business Innovation Research (SOCOM SBIR), and the National Science Foundation. Further, UCCS's VAST site explicitly states they are part of the IARPA Janus, a face recognition project developed to serve the needs of national intelligence interests, clearly establishing the the funding sources and immediate benefactors of this dataset are United States defense and intelligence agencies.

    Although the images were first captured in 2012 – 2013 the dataset was not publicly released until 2016. Then in 2017 the UCCS face dataset formed the basis for a defense and intelligence agency funded face recognition challenge project at the International Joint Biometrics Conference in Denver, CO. And in 2018 the dataset was again used for the 2nd Unconstrained Face Detection and Open Set Recognition Challenge at the European Computer Vision Conference (ECCV) in Munich, Germany.

    As of April 15, 2019, the UCCS dataset is no longer available for public download. But during the three years it was publicly available (2016-2019) the UCCS dataset appeared in at least 6 publicly available research papers including verified usage from Beihang University who is known to provide research and development for China's military.

    -

    {% include 'dashboard.html' %}

    -

    {% include 'supplementary_header.html' %}

    -

    To show the types of face images used in the UCCS student dataset while protecting their individual privacy, a generative adversarial network was used to interpolate between identities in the dataset. The image below shows a generative adversarial network trained on the UCCS face bounding box areas from 16,000 images and over 90,000 face regions.

    -
     GAN generated approximations of students in the UCCS dataset. © megapixels.cc 2018
    GAN generated approximations of students in the UCCS dataset. © megapixels.cc 2018

    === columns 2

    -

    UCCS photos taken in 2012

    +
    +

    Who used UCCS?

    + +

    + This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

    + +
    + +
    + +
    +
    + +
    +
    +
    + +
    + +

    Biometric Trade Routes

    + +

    + To help understand how UCCS has been used around the world by commercial, military, and academic organizations; existing publicly available research citing UnConstrained College Students Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

    + +
    + +
    +
    +
    + +
    +
      +
    • Academic
    • +
    • Commercial
    • +
    • Military / Government
    • +
    +
    Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
    +
    + + +
    + +

    Dataset Citations

    +

    + The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

    + +
    +
    + +
    +
    +
    +
    + +

    Supplementary Information

    + +

    To show the types of face images used in the UCCS student dataset while protecting their individual privacy, a generative adversarial network was used to interpolate between identities in the dataset. The image below shows a generative adversarial network trained on the UCCS face bounding box areas from 16,000 images and over 90,000 face regions.

    +
     GAN generated approximations of students in the UCCS dataset. © megapixels.cc 2018
    GAN generated approximations of students in the UCCS dataset. © megapixels.cc 2018

    UCCS photos taken in 2012

    @@ -120,8 +177,7 @@ Their setup made it impossible for students to know they were being photographed
    Date
    -

    ===========

    -

    UCCS photos taken in 2013

    +

    UCCS photos taken in 2013

    @@ -155,10 +211,9 @@ Their setup made it impossible for students to know they were being photographed
    Date
    -

    === end columns

    -

    Location

    +

    Location

    The location of the camera and subjects can confirmed using several visual cues in the dataset images: the unique pattern of the sidewalk that is only used on the UCCS Pedestrian Spine near the West Lawn, the two UCCS sign poles with matching graphics still visible in Google Street View, the no parking sign and directionality of its arrow, the back of street sign next to it, the slight bend in the sidewalk, the presence of cars passing in the background of the image, and the far wall of the parking garage all match images in the dataset. The original papers also provides another clue: a picture of the camera inside the office that was used to create the dataset. The window view in this image provides another match for the brick pattern on the north facade of the Kraember Family Library and the green metal fence along the sidewalk. View the location on Google Maps

    -
     3D view showing the angle of view of the surveillance camera used for UCCS dataset. Image: Google Maps
    3D view showing the angle of view of the surveillance camera used for UCCS dataset. Image: Google Maps

    Funding

    +
     3D view showing the angle of view of the surveillance camera used for UCCS dataset. Image: Google Maps
    3D view showing the angle of view of the surveillance camera used for UCCS dataset. Image: Google Maps

    Funding

    The UnConstrained College Students dataset is associated with two main research papers: "Large Scale Unconstrained Open Set Face Database" and "Unconstrained Face Detection and Open-Set Face Recognition Challenge". Collectively, these papers and the creation of the dataset have received funding from the following organizations:

    • ONR (Office of Naval Research) MURI (The Department of Defense Multidisciplinary University Research Initiative) grant N00014-08-1-0638
    • @@ -179,7 +234,23 @@ Their setup made it impossible for students to know they were being photographed -

      {% include 'cite_our_work.html' %}

      +
    + +

    Cite Our Work

    +

    + + If you use our data, research, or graphics please cite our work: + +

    +@online{megapixels,
    +  author = {Harvey, Adam. LaPlace, Jules.},
    +  title = {MegaPixels: Origins, Ethics, and Privacy Implications of Publicly Available Face Recognition Image Datasets},
    +  year = 2019,
    +  url = {https://megapixels.cc/},
    +  urldate = {2019-04-20}
    +}
    + +

    References

    • a

      "2nd Unconstrained Face Detection and Open Set Recognition Challenge." https://vast.uccs.edu/Opensetface/. Accessed April 15, 2019.

    • ab

      Sapkota, Archana and Boult, Terrance. "Large Scale Unconstrained Open Set Face Database." 2013.

    • a

      Günther, M. et. al. "Unconstrained Face Detection and Open-Set Face Recognition Challenge," 2018. Arxiv 1708.02337v3.

      diff --git a/site/public/datasets/vgg_face2/index.html b/site/public/datasets/vgg_face2/index.html index e23a3afd..a9d318f1 100644 --- a/site/public/datasets/vgg_face2/index.html +++ b/site/public/datasets/vgg_face2/index.html @@ -48,9 +48,59 @@
      Website
      -

      [ page under development ]

      -

      {% include 'dashboard.html' %}

      -

      (ignore) research notes

      +

      [ page under development ]

      +
    +

    Who used Brainwash Dataset?

    + +

    + This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

    + +
    + +
    + +
    +
    + +
    +
    +
    + +
    + +

    Biometric Trade Routes

    + +

    + To help understand how Brainwash Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Brainwash Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

    + +
    + +
    +
    +
    + +
    +
      +
    • Academic
    • +
    • Commercial
    • +
    • Military / Government
    • +
    +
    Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
    +
    + + +
    + +

    Dataset Citations

    +

    + The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

    + +
    +

    (ignore) research notes

    • The VGG Face 2 dataset includes approximately 1,331 actresses, 139 presidents, 16 wives, 3 husbands, 2 snooker player, and 1 guru
    • The original VGGF2 name list has been updated with the results returned from Google Knowledge
    • @@ -58,7 +108,7 @@
    • The 97 names with a score of 0.75 or lower were manually reviewed and includes name changes validating using Wikipedia.org results for names such as "Bruce Jenner" to "Caitlyn Jenner", spousal last-name changes, and discretionary changes to improve search results such as combining nicknames with full name when appropriate, for example changing "Aleksandar Petrović" to "Aleksandar 'Aco' Petrović" and minor changes such as "Mohammad Ali" to "Muhammad Ali"
    • The 'Description' text was automatically added when the Knowledge Graph score was greater than 250
    -

    TODO

    +

    TODO

    • create name list, and populate with Knowledge graph information like LFW
    • make list of interesting number stats, by the numbers
    • diff --git a/site/public/datasets/viper/index.html b/site/public/datasets/viper/index.html index 6f646bb8..bc4ddd3d 100644 --- a/site/public/datasets/viper/index.html +++ b/site/public/datasets/viper/index.html @@ -28,7 +28,7 @@
      VIPeR is a person re-identification dataset of images captured at UC Santa Cruz in 2007
      VIPeR contains 1,264 images and 632 persons on the UC Santa Cruz campus and is used to train person re-identification algorithms for surveillance

      VIPeR Dataset

      -
      +

      [ page under development ]

      VIPeR (Viewpoint Invariant Pedestrian Recognition) is a dataset of pedestrian images captured at University of California Santa Cruz in 2007. Accoriding to the reserachers 2 "cameras were placed in different locations in an academic setting and subjects were notified of the presence of cameras, but were not coached or instructed in any way."

      VIPeR is amongst the most widely used publicly available person re-identification datasets. In 2017 the VIPeR dataset was combined into a larger person re-identification created by the Chinese University of Hong Kong called PETA (PEdesTrian Attribute).

      -

      {% include 'dashboard.html' %}

      +
      +

      Who used VIPeR?

      + +

      + This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

      + +
      + +
      + +
      +
      + +
      +
      +
      + +
      + +

      Biometric Trade Routes

      + +

      + To help understand how VIPeR has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Viewpoint Invariant Pedestrian Recognition was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

      + +
      + +
      +
      +
      + +
      +
        +
      • Academic
      • +
      • Commercial
      • +
      • Military / Government
      • +
      +
      Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
      +
      + + +
      + +

      Dataset Citations

      +

      + The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

      + +
      diff --git a/site/public/datasets/youtube_celebrities/index.html b/site/public/datasets/youtube_celebrities/index.html index c491e6af..69b3a02e 100644 --- a/site/public/datasets/youtube_celebrities/index.html +++ b/site/public/datasets/youtube_celebrities/index.html @@ -27,9 +27,59 @@

      YouTube Celebrities

      -

      [ page under development ]

      -

      {% include 'dashboard.html' %}

      -

      Notes...

      +

      [ page under development ]

      +
      +

      Who used YouTube Celebrities?

      + +

      + This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

      + +
      + +
      + +
      +
      + +
      +
      +
      + +
      + +

      Biometric Trade Routes

      + +

      + To help understand how YouTube Celebrities has been used around the world by commercial, military, and academic organizations; existing publicly available research citing YouTube Celebrities was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

      + +
      + +
      +
      +
      + +
      +
        +
      • Academic
      • +
      • Commercial
      • +
      • Military / Government
      • +
      +
      Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
      +
      + + +
      + +

      Dataset Citations

      +

      + The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

      + +
      +

      Notes...

      • Selected dataset sequences: (a) MBGC, (b) CMU MoBo, (c) First Honda/UCSD, and (d) YouTube Celebrities.
      • -- cgit v1.2.3-70-g09d2