From 828ab34ca5e01e03e055ef9e091a88cd516a6061 Mon Sep 17 00:00:00 2001 From: adamhrv Date: Mon, 15 Apr 2019 14:08:35 +0200 Subject: fix up duke --- site/public/about/assets/LICENSE/index.html | 58 ++++ site/public/about/attribution/index.html | 78 ++++++ site/public/about/index.html | 107 +++++++ site/public/about/legal/index.html | 108 +++++++ site/public/about/press/index.html | 59 ++++ .../datasets/50_people_one_question/index.html | 114 ++++++++ site/public/datasets/afad/index.html | 127 +++++++++ site/public/datasets/brainwash/index.html | 163 +++++++++++ site/public/datasets/caltech_10k/index.html | 124 +++++++++ site/public/datasets/celeba/index.html | 126 +++++++++ site/public/datasets/cofw/index.html | 179 ++++++++++++ site/public/datasets/duke_mtmc/index.html | 139 +++++++-- site/public/datasets/feret/index.html | 87 ++++++ site/public/datasets/hrt_transgender/index.html | 2 +- site/public/datasets/lfpw/index.html | 116 ++++++++ site/public/datasets/lfw/index.html | 2 +- site/public/datasets/market_1501/index.html | 132 +++++++++ site/public/datasets/msceleb/index.html | 4 +- site/public/datasets/oxford_town_centre/index.html | 29 +- site/public/datasets/pipa/index.html | 120 ++++++++ site/public/datasets/pubfig/index.html | 117 ++++++++ site/public/datasets/uccs/index.html | 274 ++++++++++++++++++ site/public/datasets/vgg_face2/index.html | 142 ++++++++++ site/public/datasets/viper/index.html | 122 ++++++++ .../public/datasets/youtube_celebrities/index.html | 113 ++++++++ site/public/index.html | 39 +++ site/public/info/index.html | 50 ++++ site/public/research/00_introduction/index.html | 101 +++++++ .../research/01_from_1_to_100_pixels/index.html | 139 +++++++++ .../research/02_what_computers_can_see/index.html | 310 +++++++++++++++++++++ site/public/research/index.html | 49 ++++ site/public/test/chart/index.html | 50 ++++ site/public/test/citations/index.html | 50 ++++ site/public/test/csv/index.html | 50 ++++ site/public/test/datasets/index.html | 50 ++++ site/public/test/face_search/index.html | 50 ++++ site/public/test/gallery/index.html | 68 +++++ site/public/test/index.html | 61 ++++ site/public/test/map/index.html | 50 ++++ site/public/test/name_search/index.html | 50 ++++ site/public/test/pie_chart/index.html | 50 ++++ 41 files changed, 3828 insertions(+), 31 deletions(-) create mode 100644 site/public/about/assets/LICENSE/index.html create mode 100644 site/public/about/attribution/index.html create mode 100644 site/public/about/index.html create mode 100644 site/public/about/legal/index.html create mode 100644 site/public/about/press/index.html create mode 100644 site/public/datasets/50_people_one_question/index.html create mode 100644 site/public/datasets/afad/index.html create mode 100644 site/public/datasets/brainwash/index.html create mode 100644 site/public/datasets/caltech_10k/index.html create mode 100644 site/public/datasets/celeba/index.html create mode 100644 site/public/datasets/cofw/index.html create mode 100644 site/public/datasets/feret/index.html create mode 100644 site/public/datasets/lfpw/index.html create mode 100644 site/public/datasets/market_1501/index.html create mode 100644 site/public/datasets/pipa/index.html create mode 100644 site/public/datasets/pubfig/index.html create mode 100644 site/public/datasets/uccs/index.html create mode 100644 site/public/datasets/vgg_face2/index.html create mode 100644 site/public/datasets/viper/index.html create mode 100644 site/public/datasets/youtube_celebrities/index.html create mode 100644 site/public/index.html create mode 100644 site/public/info/index.html create mode 100644 site/public/research/00_introduction/index.html create mode 100644 site/public/research/01_from_1_to_100_pixels/index.html create mode 100644 site/public/research/02_what_computers_can_see/index.html create mode 100644 site/public/research/index.html create mode 100644 site/public/test/chart/index.html create mode 100644 site/public/test/citations/index.html create mode 100644 site/public/test/csv/index.html create mode 100644 site/public/test/datasets/index.html create mode 100644 site/public/test/face_search/index.html create mode 100644 site/public/test/gallery/index.html create mode 100644 site/public/test/index.html create mode 100644 site/public/test/map/index.html create mode 100644 site/public/test/name_search/index.html create mode 100644 site/public/test/pie_chart/index.html (limited to 'site/public') diff --git a/site/public/about/assets/LICENSE/index.html b/site/public/about/assets/LICENSE/index.html new file mode 100644 index 00000000..0d3a7878 --- /dev/null +++ b/site/public/about/assets/LICENSE/index.html @@ -0,0 +1,58 @@ + + + + MegaPixels + + + + + + + + + + + +
+ + +
MegaPixels
+ +
+ +
+
+ +

and include this license and attribution protocol within any derivative work.

+

If you publish data derived from MegaPixels, the original dataset creators should first be notified.

+

The MegaPixels dataset is made available under the Open Data Commons Attribution License (https://opendatacommons.org/licenses/by/1.0/) and for academic use only.

+

READABLE SUMMARY OF Open Data Commons Attribution License

+

You are free:

+

To Share: To copy, distribute and use the dataset + To Create: To produce works from the dataset + To Adapt: To modify, transform and build upon the database

+

As long as you:

+

Attribute: You must attribute any public use of the database, or works produced from the database, in the manner specified in the license. For any use or redistribution of the database, or works produced from it, you must make clear to others the license of the database and keep intact any notices on the original database.

+
+ +
+ + + + + \ No newline at end of file diff --git a/site/public/about/attribution/index.html b/site/public/about/attribution/index.html new file mode 100644 index 00000000..0a1b8e0f --- /dev/null +++ b/site/public/about/attribution/index.html @@ -0,0 +1,78 @@ + + + + MegaPixels + + + + + + + + + + + +
+ + +
MegaPixels
+ +
+ +
+
+ +

Legal

+
+ +

ATTRIBUTION PROTOCOL

+

If you use the MegaPixels data or any data derived from it, please cite the original work as follows:

+
+@online{megapixels,
+ author = {Harvey, Adam. LaPlace, Jules.},
+ title = {MegaPixels: Origins, Ethics, and Privacy Implications of Publicly Available Face Recognition Image Datasets},
+ year = 2019,
+ url = {https://megapixels.cc/},
+ urldate = {2019-04-20}
+}
+

and include this license and attribution protocol within any derivative work.

+

If you publish data derived from MegaPixels, the original dataset creators should first be notified.

+

The MegaPixels dataset is made available under the Open Data Commons Attribution License (https://opendatacommons.org/licenses/by/1.0/) and for academic use only.

+

READABLE SUMMARY OF Open Data Commons Attribution License

+

You are free:

+

To Share: To copy, distribute and use the dataset +To Create: To produce works from the dataset +To Adapt: To modify, transform and build upon the database

+
+

As long as you:

+

Attribute: You must attribute any public use of the database, or works produced from the database, in the manner specified in the license. For any use or redistribution of the database, or works produced from it, you must make clear to others the license of the database and keep intact any notices on the original database.

+
+
+ +
+ + + + + \ No newline at end of file diff --git a/site/public/about/index.html b/site/public/about/index.html new file mode 100644 index 00000000..4a4ab3c6 --- /dev/null +++ b/site/public/about/index.html @@ -0,0 +1,107 @@ + + + + MegaPixels + + + + + + + + + + + +
+ + +
MegaPixels
+ +
+ +
+
+ +

About MegaPixels

+
+ +

MegaPixels is an independent art and research project by Adam Harvey and Jules LaPlace that investigates the ethics, origins, and individual privacy implications of face recognition image datasets and their role in the expansion of biometric surveillance technologies.

+

The MegaPixels site is made possible with support from Mozilla

+
+
+

Adam Harvey

+

is Berlin-based American artist and researcher. His previous projects (CV Dazzle, Stealth Wear, and SkyLift) explore the potential for counter-surveillance as artwork. He is the founder of VFRAME (visual forensics software for human rights groups) and is a currently researcher in residence at Karlsruhe HfG.

+

ahprojects.com

+

+
+
+

Jules LaPlace

+

is an American technologist and artist also based in Berlin. He was previously the CTO of a digital agency in NYC and now also works at VFRAME, developing computer vision and data analysis software for human rights groups. Jules also builds experimental software for artists and musicians. +

+

asdf.us

+
+

The MegaPixels website is based on an earlier installation from 2017 and ongoing research and lectures (TedX, CPDP) about facial recognition datasets. Over the last several years this project has evolved into a large-scale interrogation of hundreds of publicly-available face and person analysis datasets.

+

MegaPixels aims to provide a critical perspective on machine learning image datsets, one that might otherwise escape academia and the industry funded artificial intelligence think tanks that are often supported by the same technology companies who have created many of the datasets presented on this site.

+

MegaPixels is an independent project, designed as a public resource for educators, students, journalists, and researchers. Each dataset presented on this site undergoes a thorough review of its images, intent, and funding sources. Though the goals are similar to publishing a public academic paper, MegaPixels is a website-first reserch project aligns closley with the goals of pre-print academic publications. As such we welcome feedback and ways to improve this site and the clarity of the research.

+

Because this project surfaces many funding issues with datasets (from datasets funded by the C.I.A. to the National Unviversity of Defense and Technology in China), it is important that we are transparent about own funding. The original MegaPixels installation in 2017 was built as a commission for and with support from Tactical Technology Collective and Mozilla. The bulk of the research and web-development during 2018 - 2018 was supported by a grant from Mozilla. Continued development in 2019 is partially supported by a 1-year Reseacher-in-Residence grant from Karlsruhe HfG, lecture and workshop fees, and from commissions and sales from the Privacy Gift Shop.

+

Please get in touch if you are interested in supporting this project.

+
Team
+
    +
  • Adam Harvey: Concept, research and analysis, design, computer vision
  • +
  • Jules LaPlace: Information and systems architecture, data management, web applications +You are free:
  • +
+
Contributing Researchers
+
    +
  • Berit Gilma
  • +
  • Beth (aka Ms. Celeb)
  • +
  • Mathana Stender
  • +
+
Code and Libraries
+
    +
  • Semantic Scholar for citation aggregation
  • +
  • Leaflet.js for maps
  • +
  • C3.js for charts
  • +
  • ThreeJS for 3D visualizations
  • +
  • PDFMiner.Six and Pandas for research paper data analysis
  • +
+

Please direct questions, comments, or feedback to mastodon.social/@adamhrv

+
Attribution
+

If you use MegaPixels or any data derived from it for your work, please cite our original work as follows:

+
+@online{megapixels,
+ author = {Harvey, Adam. LaPlace, Jules.},
+ title = {MegaPixels: Origins, Ethics, and Privacy Implications of Publicly Available Face Recognition Image Datasets},
+ year = 2019,
+ url = {https://megapixels.cc/},
+ urldate = {2019-04-20}
+}
+
+ +
+ + + + + \ No newline at end of file diff --git a/site/public/about/legal/index.html b/site/public/about/legal/index.html new file mode 100644 index 00000000..9eb5dd5a --- /dev/null +++ b/site/public/about/legal/index.html @@ -0,0 +1,108 @@ + + + + MegaPixels + + + + + + + + + + + +
+ + +
MegaPixels
+ +
+ +
+
+ +

Legal

+
+ +

MegaPixels.cc Terms and Privacy

+

MegaPixels is an independent and academic art and research project about the origins and ethics of publicly available face analysis image datasets. By accessing MegaPixels (the Service or Services) you agree to the terms and conditions set forth below.

+

Privacy

+

The MegaPixels site has been designed to minimize the amount of network requests to 3rd party services and therefore prioritize the privacy of the viewer. This site does not use any local or external analytics programs to monitor site viewers. In fact, the only data collected are the necessary server logs used only for preventing misuse, which are deleted at short-term intervals.

+

3rd Party Services

+

In order to provide certain features of the site, some 3rd party services are needed. Currently, the MegaPixels.cc site uses two 3rd party services: (1) Leaflet.js for the interactive map and (2) Digital Ocean Spaces as a content delivery network. Both services encrypt your requests to their server using HTTPS and neither service requires storing any cookies or authentication. However, both services will store files in your web browser's local cache (local storage) to improve loading performance. None of these local storage files are using for analytics, tracking, or any similar purpose.

+

Links To Other Web Sites

+

The MegaPixels.cc contains many links to 3rd party websites, especially in the list of citations that are provided for each dataset. This website has no control over and assumes no responsibility for, the content, privacy policies, or practices of any third party web sites or services. You acknowledge and agree that megapixels.cc shall not be responsible or liable, directly or indirectly, for any damage or loss caused or alleged to be caused by or in connection with use of or reliance on any such content, goods or services available on or through any such web sites or services.

+

We advise you to read the terms and conditions and privacy policies of any third-party web sites or services that you visit.

+

Information We Collect

+

When you access the Service, we record your visit to the site in a server log file for the purposes of maintaining site security and preventing misuse. This includes your IP address and the header information sent by your web browser which includes the User Agent, referrer, and the requested page on our site.

+

Information We Share

+

We do not share or make public any information about individual site visitors, unless where required by law to the extent that server logs are only retained for a limited duration.

+

Information We Provide

+

We provide information for educational, journalistic, and research purposes. The published information on MegaPixels is made available under the Open Data Commons Attribution License (https://opendatacommons.org/licenses/by/1.0/) and for academic use only.

+

You are free:

+

To Share: To copy, distribute and use the dataset +To Create: To produce works from the dataset +To Adapt: To modify, transform and build upon the database

+
+

As long as you:

+

Attribute: You must attribute any public use of the database, or works produced from the database, in the manner specified in the license. For any use or redistribution of the database, or works produced from it, you must make clear to others the license of the database and keep intact any notices on the original database.

+
+

If you use the MegaPixels data or any data derived from it, please cite the original work as follows:

+
+@online{megapixels,
+ author = {Harvey, Adam. LaPlace, Jules.},
+ title = {MegaPixels: Origins, Ethics, and Privacy Implications of Publicly Available Face Recognition Image Datasets},
+ year = 2019,
+ url = {https://megapixels.cc/},
+ urldate = {2019-04-20}
+}
+

While every intention is made to publish only verifiable information, at times information may be edited, removed, or appended for clarity or correction. In no event will the operators of this site be liable for your use or misuse of the information provided.

+

We may terminate or suspend access to our Service immediately without prior notice or liability, for any reason whatsoever, including without limitation if you breach the Terms.

+

All provisions of the Terms which by their nature should survive termination shall survive termination, including, without limitation, ownership provisions, warranty disclaimers, indemnity and limitations of liability.

+

Prohibited Uses

+

You may not access or use, or attempt to access or use, the Services to take any action that could harm us or a third party. You may not use the Services in violation of applicable laws or in violation of our or any third party’s intellectual property or other proprietary or legal rights. You further agree that you shall not attempt (or encourage or support anyone else's attempt) to circumvent, reverse engineer, decrypt, or otherwise alter or interfere with the Services, or any content thereof, or make any unauthorized use thereof.

+

Without prior written consent, you shall not:

+

(i) access any part of the Services, Content, data or information you do not have permission or authorization to access;

+

(ii) use robots, spiders, scripts, service, software or any manual or automatic device, tool, or process designed to data mine or scrape the Content, data or information from the Services, or otherwise access or collect the Content, data or information from the Services using automated means;

+

(iii) use services, software or any manual or automatic device, tool, or process designed to circumvent any restriction, condition, or technological measure that controls access to the Services in any way, including overriding any security feature or bypassing or circumventing any access controls or use limits of the Services;

+

(iv) cache or archive the Content (except for a public search engine’s use of spiders for creating search indices) with prior written consent;

+

(v) take action that imposes an unreasonable or disproportionately large load on our network or infrastructure; and

+

(vi) do anything that could disable, damage or change the functioning or appearance of the Services, including the presentation of advertising.

+

Engaging in a prohibited use of the Services may result in civil, criminal, and/or administrative penalties, fines, or sanctions against the user and those assisting the user.

+

Governing Law

+

These Terms shall be governed and construed in accordance with the laws of Berlin, Germany, without regard to its conflict of law provisions.

+

Our failure to enforce any right or provision of these Terms will not be considered a waiver of those rights. If any provision of these Terms is held to be invalid or unenforceable by a court, the remaining provisions of these Terms will remain in effect. These Terms constitute the entire agreement between us regarding our Service, and supersede and replace any prior agreements we might have between us regarding the Service.

+

Indemnity

+

You hereby indemnify, defend and hold harmless MegaPixels (and its creators) and all officers, directors, owners, agents, information providers, affiliates, licensors and licensees (collectively, the "Indemnified Parties") from and against any and all liability and costs, including, without limitation, reasonable attorneys' fees, incurred by the Indemnified Parties in connection with any claim arising out of any breach by you or any user of your account of these Terms of Service or the foregoing representations, warranties and covenants. You shall cooperate as fully as reasonably required in the defense of any such claim. We reserves the right, at its own expense, to assume the exclusive defense and control of any matter subject to indemnification by you.

+

Changes

+

We reserve the right, at our sole discretion, to modify or replace these Terms at any time. By continuing to use or access our Service after revisions become effective, you agree to be bound by the revised terms. If you do not agree to revised terms, please do not use the Service.

+
+ +
+ + + + + \ No newline at end of file diff --git a/site/public/about/press/index.html b/site/public/about/press/index.html new file mode 100644 index 00000000..7b0a3e87 --- /dev/null +++ b/site/public/about/press/index.html @@ -0,0 +1,59 @@ + + + + MegaPixels + + + + + + + + + + + +
+ + +
MegaPixels
+ +
+ +
+
+ +

Press

+
+ +
+
+ +
+ + + + + \ No newline at end of file diff --git a/site/public/datasets/50_people_one_question/index.html b/site/public/datasets/50_people_one_question/index.html new file mode 100644 index 00000000..dfd8cbff --- /dev/null +++ b/site/public/datasets/50_people_one_question/index.html @@ -0,0 +1,114 @@ + + + + MegaPixels + + + + + + + + + + + +
+ + +
MegaPixels
+
50 People One Question Dataset
+
+ +
+
+ +
People One Question is a dataset of people from an online video series on YouTube and Vimeo used for building facial recogntion algorithms
People One Question dataset includes ... +

50 People 1 Question

+

[ page under development ]

+
+

Who used 50 People One Question Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how 50 People One Question Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing 50 People One Question was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+
+ +
+ + + + + \ No newline at end of file diff --git a/site/public/datasets/afad/index.html b/site/public/datasets/afad/index.html new file mode 100644 index 00000000..df14e7cd --- /dev/null +++ b/site/public/datasets/afad/index.html @@ -0,0 +1,127 @@ + + + + MegaPixels + + + + + + + + + + + +
+ + +
MegaPixels
+
Asian Face Age Dataset
+
+ +
+
+ +

Asian Face Age Dataset

+

[ page under development ]

+
+

Who used Asian Face Age Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how Asian Face Age Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing The Asian Face Age Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+

(ignore) research notes

+

The Asian Face Age Dataset (AFAD) is a new dataset proposed for evaluating the performance of age estimation, which contains more than 160K facial images and the corresponding age and gender labels. This dataset is oriented to age estimation on Asian faces, so all the facial images are for Asian faces. It is noted that the AFAD is the biggest dataset for age estimation to date. It is well suited to evaluate how deep learning methods can be adopted for age estimation. +Motivation

+

For age estimation, there are several public datasets for evaluating the performance of a specific algorithm, such as FG-NET [1] (1002 face images), MORPH I (1690 face images), and MORPH II[2] (55,608 face images). Among them, the MORPH II is the biggest public dataset to date. On the other hand, as we know it is necessary to collect a large scale dataset to train a deep Convolutional Neural Network. Therefore, the MORPH II dataset is extensively used to evaluate how deep learning methods can be adopted for age estimation [3][4].

+

However, the ethnic is very unbalanced for the MORPH II dataset, i.e., it has only less than 1% Asian faces. In order to evaluate the previous methods for age estimation on Asian Faces, the Asian Face Age Dataset (AFAD) was proposed.

+

There are 164,432 well-labeled photos in the AFAD dataset. It consist of 63,680 photos for female as well as 100,752 photos for male, and the ages range from 15 to 40. The distribution of photo counts for distinct ages are illustrated in the figure above. Some samples are shown in the Figure on the top. Its download link is provided in the "Download" section.

+

In addition, we also provide a subset of the AFAD dataset, called AFAD-Lite, which only contains PLACEHOLDER well-labeled photos. It consist of PLACEHOLDER photos for female as well as PLACEHOLDER photos for male, and the ages range from 15 to 40. The distribution of photo counts for distinct ages are illustrated in Fig. PLACEHOLDER. Its download link is also provided in the "Download" section.

+

The AFAD dataset is built by collecting selfie photos on a particular social network -- RenRen Social Network (RSN) [5]. The RSN is widely used by Asian students including middle school, high school, undergraduate, and graduate students. Even after leaving from school, some people still access their RSN account to connect with their old classmates. So, the age of the RSN user crosses a wide range from 15-years to more than 40-years old.

+

Please notice that this dataset is made available for academic research purpose only.

+
+

https://afad-dataset.github.io/

+
+ +
+ + + + + \ No newline at end of file diff --git a/site/public/datasets/brainwash/index.html b/site/public/datasets/brainwash/index.html new file mode 100644 index 00000000..03331a2d --- /dev/null +++ b/site/public/datasets/brainwash/index.html @@ -0,0 +1,163 @@ + + + + MegaPixels + + + + + + + + + + + +
+ + +
MegaPixels
+
Brainwash Dataset
+
+ +
+
+ +
Brainwash is a dataset of webcam images taken from the Brainwash Cafe in San Francisco in 2014
The Brainwash dataset includes 11,918 images of "everyday life of a busy downtown cafe" and is used for training head detection surveillance algorithms +

Brainwash Dataset

+

Brainwash is a head detection dataset created from San Francisco's Brainwash Cafe livecam footage. It includes 11,918 images of "everyday life of a busy downtown cafe" 1 captured at 100 second intervals throught the entire day. Brainwash dataset was captured during 3 days in 2014: October 27, November 13, and November 24. According the author's reserach paper introducing the dataset, the images were acquired with the help of Angelcam.com. 2

+

Brainwash is not a widely used dataset but since its publication by Stanford University in 2015, it has notably appeared in several research papers from the National University of Defense Technology in Changsha, China. In 2016 and in 2017 researchers there conducted studies on detecting people's heads in crowded scenes for the purpose of surveillance. 3 4

+

If you happen to have been at Brainwash cafe in San Francisco at any time on October 26, November 13, or November 24 in 2014 you are most likely included in the Brainwash dataset and have unwittingly contributed to surveillance research.

+
+

Who used Brainwash Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how Brainwash Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Brainwash Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+
+ +
+
+
+
+ +

Supplementary Information

+ +
 A visualization of 81,973 head annotations from the Brainwash dataset training partition. © megapixels.cc
A visualization of 81,973 head annotations from the Brainwash dataset training partition. © megapixels.cc
 An sample image from the Brainwash dataset used for training face and head detection algorithms for surveillance. The datset contains about 12,000 images. License: Open Data Commons Public Domain Dedication (PDDL)
An sample image from the Brainwash dataset used for training face and head detection algorithms for surveillance. The datset contains about 12,000 images. License: Open Data Commons Public Domain Dedication (PDDL)
 49 of the 11,918 images included in the Brainwash dataset. License: Open Data Commons Public Domain Dedication (PDDL)
49 of the 11,918 images included in the Brainwash dataset. License: Open Data Commons Public Domain Dedication (PDDL)

TODO

+
    +
  • change supp images to 2x2 grid with bboxes
  • +
  • add bounding boxes to the header image
  • +
  • remake montage with randomized images, with bboxes
  • +
  • add ethics link to Stanford
  • +
  • add optout info
  • +
+
+ +

Cite Our Work

+

+ + If you use our data, research, or graphics please cite our work: + +

+@online{megapixels,
+  author = {Harvey, Adam. LaPlace, Jules.},
+  title = {MegaPixels: Origins, Ethics, and Privacy Implications of Publicly Available Face Recognition Image Datasets},
+  year = 2019,
+  url = {https://megapixels.cc/},
+  urldate = {2019-04-20}
+}
+ +

+

References

  • a

    "readme.txt" https://exhibits.stanford.edu/data/catalog/sx925dc9385.

    +
  • a

    Stewart, Russel. Andriluka, Mykhaylo. "End-to-end people detection in crowded scenes". 2016.

    +
  • a

    Li, Y. and Dou, Y. and Liu, X. and Li, T. Localized Region Context and Object Feature Fusion for People Head Detection. ICIP16 Proceedings. 2016. Pages 594-598.

    +
  • a

    Zhao. X, Wang Y, Dou, Y. A Replacement Algorithm of Non-Maximum Suppression Base on Graph Clustering.

    +
+ +
+ + + + + \ No newline at end of file diff --git a/site/public/datasets/caltech_10k/index.html b/site/public/datasets/caltech_10k/index.html new file mode 100644 index 00000000..00b5e7fd --- /dev/null +++ b/site/public/datasets/caltech_10k/index.html @@ -0,0 +1,124 @@ + + + + MegaPixels + + + + + + + + + + + +
+ + +
MegaPixels
+
Brainwash Dataset
+
+ +
+
+ +

Caltech 10K Faces Dataset

+

[ page under development ]

+
+

Who used Brainwash Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how Brainwash Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Brainwash Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+

(ignore) research notes

+

The dataset contains images of people collected from the web by typing common given names into Google Image Search. The coordinates of the eyes, the nose and the center of the mouth for each frontal face are provided in a ground truth file. This information can be used to align and crop the human faces or as a ground truth for a face detection algorithm. The dataset has 10,524 human faces of various resolutions and in different settings, e.g. portrait images, groups of people, etc. Profile faces or very low resolution faces are not labeled.

+
+ +
+ + + + + \ No newline at end of file diff --git a/site/public/datasets/celeba/index.html b/site/public/datasets/celeba/index.html new file mode 100644 index 00000000..c4caef20 --- /dev/null +++ b/site/public/datasets/celeba/index.html @@ -0,0 +1,126 @@ + + + + MegaPixels + + + + + + + + + + + +
+ + +
MegaPixels
+
CelebA Dataset
+
+ +
+
+ +
CelebA is a dataset of people...
CelebA includes... +

CelebA Dataset

+

[ PAGE UNDER DEVELOPMENT ]

+
+

Who used CelebA Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how CelebA Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Large-scale CelebFaces Attributes Dataset was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+

Research

+
    +
  • "An Unsupervised Approach to Solving Inverse Problems using Generative Adversarial Networks" mentions use by sponsored by an agency of the United States government. Neither the United States government nor Lawrence Livermore National Security, LLC, nor any of their"
  • +
  • 7dab6fbf42f82f0f5730fc902f72c3fb628ef2f0
  • +
  • principal responsibility is ensuring the safety, security and reliability of the nation's nuclear weapons NNSA ( National Nuclear Security Administration )
  • +
+
+ +
+ + + + + \ No newline at end of file diff --git a/site/public/datasets/cofw/index.html b/site/public/datasets/cofw/index.html new file mode 100644 index 00000000..4851e256 --- /dev/null +++ b/site/public/datasets/cofw/index.html @@ -0,0 +1,179 @@ + + + + MegaPixels + + + + + + + + + + + +
+ + +
MegaPixels
+
COFW Dataset
+
+ +
+
+ +

Caltech Occluded Faces in the Wild

+

[ PAGE UNDER DEVELOPMENT ]

+
+

Who used COFW Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+
+ +
+
+
+ +
+ +

Biometric Trade Routes

+ +

+ To help understand how COFW Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Caltech Occluded Faces in the Wild was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org then dataset usage verified and geolocated.
+
+ + +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+

(ignore) research notes

+
Years
1993-1996
Images
14,126
Identities
1,199
Origin
Web Searches
Funded by
ODNI, IARPA, Microsoft

COFW is "is designed to benchmark face landmark algorithms in realistic conditions, which include heavy occlusions and large shape variations" [Robust face landmark estimation under occlusion].

+

We asked four people with different levels of computer vision knowledge to each collect 250 faces representative of typical real-world images, with the clear goal of challenging computer vision methods. +The result is 1,007 images of faces obtained from a variety of sources.

+
+

Robust face landmark estimation under occlusion

+

Our face dataset is designed to present faces in real-world conditions. Faces show large variations in shape and occlusions due to differences in pose, expression, use of accessories such as sunglasses and hats and interactions with objects (e.g. food, hands, microphones, etc.). All images were hand annotated in our lab using the same 29 landmarks as in LFPW. We annotated both the landmark positions as well as their occluded/unoccluded state. The faces are occluded to different degrees, with large variations in the type of occlusions encountered. COFW has an average occlusion of over 23%. +To increase the number of training images, and since COFW has the exact same landmarks as LFPW, for training we use the original non-augmented 845 LFPW faces + 500 COFW faces (1345 total), and for testing the remaining 507 COFW faces. To make sure all images had occlusion labels, we annotated occlusion on the available 845 LFPW training images, finding an average of only 2% occlusion.

+
+

http://www.vision.caltech.edu/xpburgos/ICCV13/

+

This research is supported by NSF Grant 0954083 and by the Office of the Director of National Intelligence (ODNI), Intelligence Advanced Research Projects Activity (IARPA), via IARPA R&D Contract No. 2014-14071600012.

+
+

https://www.cs.cmu.edu/~peiyunh/topdown/

+
+ +

Biometric Trade Routes

+ +

+ To help understand how COFW Dataset has been used around the world by commercial, military, and academic organizations; existing publicly available research citing Caltech Occluded Faces in the Wild was collected, verified, and geocoded to show the biometric trade routes of people appearing in the images. Click on the location markers to reveal research projects at that location. +

+ +
+ +
+
+
+ +
+
    +
  • Academic
  • +
  • Commercial
  • +
  • Military / Government
  • +
+
Citation data is collected using SemanticScholar.org and then dataset usage verified and geolocated.
+
+ +
+
+
+
+ +

Supplementary Information

+ +
+ +

Dataset Citations

+

+ The dataset citations used in the visualizations were collected from Semantic Scholar, a website which aggregates and indexes research papers. Each citation was geocoded using names of institutions found in the PDF front matter, or as listed on other resources. These papers have been manually verified to show that researchers downloaded and used the dataset to train or test machine learning algorithms. +

+ +
+
+

Who used COFW Dataset?

+ +

+ This bar chart presents a ranking of the top countries where dataset citations originated. Mouse over individual columns to see yearly totals. These charts show at most the top 10 countries. +

+ +
+ +
+ +
+

TODO

+

- replace graphic

+
+ +
+ + + + + \ No newline at end of file diff --git a/site/public/datasets/duke_mtmc/index.html b/site/public/datasets/duke_mtmc/index.html index 9bec47ed..ba32484a 100644 --- a/site/public/datasets/duke_mtmc/index.html +++ b/site/public/datasets/duke_mtmc/index.html @@ -27,7 +27,7 @@
Duke MTMC is a dataset of surveillance camera footage of students on Duke University campus
Duke MTMC contains over 2 million video frames and 2,700 unique identities collected from 8 HD cameras at Duke University campus in March 2014 -

Duke MTMC

-

The Duke Multi-Target, Multi-Camera Tracking Dataset (MTMC) is a dataset of video recorded on Duke University campus for research and development of networked camera surveillance systems. MTMC tracking is used for citywide dragnet surveillance systems such as those used throughout China by SenseTime 1 and the oppressive monitoring of 2.5 million Uyghurs in Xinjiang by SenseNets 2. In fact researchers from both SenseTime 4 5 and SenseNets 3 used the Duke MTMC dataset for their research.

-

The Duke MTMC dataset is unique because it is the largest publicly available MTMC and person re-identification dataset and has the longest duration of annotated video. In total, the Duke MTMC dataset provides over 14 hours of 1080p video from 8 synchronized surveillance cameras. 6 It is among the most widely used person re-identification datasets in the world. The approximately 2,700 unique people in the Duke MTMC videos, most of whom are students, are used for research and development of surveillance technologies by commercial, academic, and even defense organizations.

-
 A collection of 1,600 out of the 2,700 students and passersby captured into the Duke MTMC surveillance research dataset. These students were also included in the Duke MTMC Re-ID dataset extension used for person re-identification. © megapixels.cc
A collection of 1,600 out of the 2,700 students and passersby captured into the Duke MTMC surveillance research dataset. These students were also included in the Duke MTMC Re-ID dataset extension used for person re-identification. © megapixels.cc

The creation and publication of the Duke MTMC dataset in 2016 was originally funded by the U.S. Army Research Laboratory and the National Science Foundation 6. Since 2016 use of the Duke MTMC dataset images have been publicly acknowledged in research funded by or on behalf of the Chinese National University of Defense 7 8, IARPA and IBM 9, and U.S. Department of Homeland Security 10.

-

The 8 cameras deployed on Duke's campus were specifically setup to capture students "during periods between lectures, when pedestrian traffic is heavy". 6 Camera 7 and 2 capture large groups of prospective students and children. Camera 5 was positioned to capture students as they enter and exit Duke University's main chapel. Each camera's location is documented below.

-
 Duke MTMC camera locations on Duke University campus © megapixels.cc
Duke MTMC camera locations on Duke University campus © megapixels.cc
 Duke MTMC camera views for 8 cameras deployed on campus © megapixels.cc
Duke MTMC camera views for 8 cameras deployed on campus © megapixels.cc
 Duke MTMC pedestrian detection saliency maps for 8 cameras deployed on campus © megapixels.cc
Duke MTMC pedestrian detection saliency maps for 8 cameras deployed on campus © megapixels.cc
+

Duke MTMC (Multi-Target, Multi-Camera Tracking) is a dataset of video recorded on Duke University campus for research and development of networked camera surveillance systems. MTMC tracking algorithms are used for citywide dragnet surveillance systems such as those used throughout China by SenseTime 1 and the oppressive monitoring of 2.5 million Uyghurs in Xinjiang by SenseNets 2. In fact researchers from both SenseTime 4 5 and SenseNets 3 used the Duke MTMC dataset for their research.

+

In this investigation into the Duke MTMC dataset, we found that researchers at Duke Univesity in Durham, North Carolina captured over 2,000 students, faculty members, and passersby into one of the most prolific public surveillance research datasets that's used around the world by commercial and defense surveillance organizations.

+

Since it's publication in 2016, the Duke MTMC dataset has been used in over 100 studies at organizations around the world including SenseTime 4 5, SenseNets 3, IARPA and IBM 9, Chinese National University of Defense 7 8, US Department of Homeland Security 10, Tencent, Microsoft, Microsft Asia, Fraunhofer, Senstar Corp., Alibaba, Naver Labs, Google and Hewlett-Packard Labs to name only a few.

+

The creation and publication of the Duke MTMC dataset in 2014 (published in 2016) was originally funded by the U.S. Army Research Laboratory and the National Science Foundation 6. Though our analysis of the geographic locations of the publicly available research shows over twice as many citations by researchers from China (44% China, 20% United States). In 2018 alone, there were 70 research project citations from China.

+
 A collection of 1,600 out of the 2,700 students and passersby captured into the Duke MTMC surveillance research and development dataset on . These students were also included in the Duke MTMC Re-ID dataset extension used for person re-identification. Open Data Commons Attribution License.
A collection of 1,600 out of the 2,700 students and passersby captured into the Duke MTMC surveillance research and development dataset on . These students were also included in the Duke MTMC Re-ID dataset extension used for person re-identification. Open Data Commons Attribution License.

The 8 cameras deployed on Duke's campus were specifically setup to capture students "during periods between lectures, when pedestrian traffic is heavy". 6. Camera 5 was positioned to capture students as entering and exiting the university's main chapel. Each camera's location and approximate field of view. The heat map visualization shows the locations where pedestrians were most frequently annotated in each video from the Duke MTMC datset.

+
 Duke MTMC camera locations on Duke University campus. Open Data Commons Attribution License.
Duke MTMC camera locations on Duke University campus. Open Data Commons Attribution License.
 Duke MTMC camera views for 8 cameras deployed on campus © megapixels.cc
Duke MTMC camera views for 8 cameras deployed on campus © megapixels.cc
 Duke MTMC pedestrian detection saliency maps for 8 cameras deployed on campus © megapixels.cc
Duke MTMC pedestrian detection saliency maps for 8 cameras deployed on campus © megapixels.cc

Who used Duke MTMC Dataset?

@@ -110,18 +111,122 @@

Supplementary Information

-

Notes

-

The Duke MTMC dataset paper mentions 2,700 identities, but their ground truth file only lists annotations for 1,812

-

References

Funding

+

Original funding for the Duke MTMC dataset was provided by the Army Research Office under Grant No. W911NF-10-1-0387 and by the National Science Foundation +under Grants IIS-10-17017 and IIS-14-20894.

+

Video Timestamps

+

The video timestamps contain the likely, but not yet confirmed, date and times of capture. Because the video timestamps align with the start and stop time sync data provided by the researchers, it at least aligns the relative time. The rainy weather on that day also contribute towards the likelihood of March 14, 2014..

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CameraDateStartEnd
Camera 1March 14, 20144:14PM5:43PM
Camera 2March 14, 20144:13PM4:43PM
Camera 3March 14, 20144:20PM5:48PM
Camera 4March 14, 20144:21PM5:54PM
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CameraDateStartEnd
Camera 5March 14, 20144:12PM5:43PM
Camera 6March 14, 20144:18PM5:43PM
Camera 7March 14, 20144:16PM5:40PM
Camera 8March 14, 20144:25PM5:42PM
+

Opting Out

+

If you attended Duke University and were captured by any of the 8 surveillance cameras positioned on campus in 2014, there is unfortunately no way to be removed. The dataset files have been distributed throughout the world and it would not be possible to contact all the owners for removal. Nor do the authors provide any options for students to opt-out, nor did they even inform students they would be used at test subjects for surveillance research and development in a project funded, in part, by the United States Army Research Office.

+

Notes

+
    +
  • The Duke MTMC dataset paper mentions 2,700 identities, but their ground truth file only lists annotations for 1,812
  • +
+
+ +

Cite Our Work

+

+ + If you use our data, research, or graphics please cite our work: + +

+@online{megapixels,
+  author = {Harvey, Adam. LaPlace, Jules.},
+  title = {MegaPixels: Origins, Ethics, and Privacy Implications of Publicly Available Face Recognition Image Datasets},
+  year = 2019,
+  url = {https://megapixels.cc/},
+  urldate = {2019-04-20}
+}
+ +

+

If you use any data from the Duke MTMC please follow their license and cite their work as:

+
+@inproceedings{ristani2016MTMC,
+ title =        {Performance Measures and a Data Set for Multi-Target, Multi-Camera Tracking},
+ author =       {Ristani, Ergys and Solera, Francesco and Zou, Roger and Cucchiara, Rita and Tomasi, Carlo},
+ booktitle =    {European Conference on Computer Vision workshop on Benchmarking Multi-Target Tracking},
+ year =         {2016}
+}
+

References

diff --git a/site/public/datasets/feret/index.html b/site/public/datasets/feret/index.html new file mode 100644 index 00000000..089cd351 --- /dev/null +++ b/site/public/datasets/feret/index.html @@ -0,0 +1,87 @@ + + + + MegaPixels + + + + + + + + + + + +
+ + +
MegaPixels
+
LFW
+
+ +
+
+ +

Funding

+

The FERET program is sponsored by the U.S. Depart- ment of Defense’s Counterdrug Technology Development Program Office. The U.S. Army Research Laboratory (ARL) is the technical agent for the FERET program. ARL designed, administered, and scored the FERET tests. George Mason University collected, processed, and main- tained the FERET database. Inquiries regarding the FERET database or test should be directed to P. Jonathon Phillips.

+
+ +
+ + + + + \ No newline at end of file diff --git a/site/public/datasets/hrt_transgender/index.html b/site/public/datasets/hrt_transgender/index.html index 486b9122..231a5271 100644 --- a/site/public/datasets/hrt_transgender/index.html +++ b/site/public/datasets/hrt_transgender/index.html @@ -27,7 +27,7 @@
TBD
TBD -