summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--client/chart/chart.container.js26
-rw-r--r--client/index.js4
-rw-r--r--megapixels/app/site/loader.py2
-rw-r--r--site/assets/css/css.css3
-rwxr-xr-xsite/assets/css/tabulator.css2
-rw-r--r--site/content/pages/datasets/50_people_one_question/assets/background.gifbin41564 -> 0 bytes
-rw-r--r--site/content/pages/datasets/50_people_one_question/index.md34
-rw-r--r--site/content/pages/datasets/brainwash/index.md3
-rw-r--r--site/content/pages/datasets/celeba/index.md42
-rw-r--r--site/content/pages/datasets/cofw/index.md8
-rw-r--r--site/content/pages/datasets/kitti/index.md42
-rw-r--r--site/content/pages/datasets/lfw/index.md7
-rw-r--r--site/content/pages/datasets/mars/index.md34
-rw-r--r--site/includes/chart.html11
-rw-r--r--site/includes/citations.html8
-rw-r--r--site/includes/map.html11
-rw-r--r--site/public/datasets/brainwash/index.html2
-rw-r--r--site/public/datasets/cofw/index.html2
-rw-r--r--site/public/datasets/lfw/index.html2
-rw-r--r--todo.md56
21 files changed, 253 insertions, 47 deletions
diff --git a/.gitignore b/.gitignore
index cdf46edc..74a8a054 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
#site/public/datasets/
#site/public/*.html
notebooks/_local_*
+site/datasets/final/*.csv
# Project specific
webpack-stats.dev.json
diff --git a/client/chart/chart.container.js b/client/chart/chart.container.js
index ea86f998..aa80100e 100644
--- a/client/chart/chart.container.js
+++ b/client/chart/chart.container.js
@@ -107,17 +107,17 @@ class ChartContainer extends Component {
)
)
- const maxCitationsInAYear = citationCountsByYear
- .slice(1)
- .reduce((a,b) => (
- Math.max(
- a,
- b.slice(1).reduce((a2, b2) => (
- a2 + b2
- ),
- 0)
- )
- ), 0)
+ let maxCitationsInAYear = 0
+ let currentSum = 0
+ // for each year...
+ for (let j = 1; j < citationCountsByYear[0].length; j++) {
+ // for each country
+ currentSum = 0
+ for (let i = 1; i < citationCountsByYear.length; i++) {
+ currentSum += citationCountsByYear[i][j]
+ }
+ maxCitationsInAYear = Math.max(currentSum, maxCitationsInAYear)
+ }
let ticks = []
for (let i = 0; i < maxCitationsInAYear; i += 50) {
@@ -149,7 +149,7 @@ class ChartContainer extends Component {
tick: {
values: ticks,
},
- default: [ 0, maxCitationsInAYear ],
+ default: [ 0, maxCitationsInAYear * 286 / 261 ],
show: true,
}
}}
@@ -165,7 +165,7 @@ class ChartContainer extends Component {
let countriesByYear = Object.keys(countriesByYearLookup).map(country => [country, countriesByYearLookup[country]]).sort((a,b) => b[1] - a[1])
let topCountriesForThisYear = countriesByYear.slice(0, topCountryCount)
let bottomTotal = countriesByYear.slice(topCountryCount).reduce((a,b) => (a + b[1]), 0)
- console.log(topCountriesForThisYear)
+ // console.log(topCountriesForThisYear)
topCountriesForThisYear.push([otherCountriesLabel, bottomTotal])
const tableRows = topCountriesForThisYear.filter(pair => !!pair[1]).map(([country, total]) => {
let colorIndex = topCountries.indexOf(country)
diff --git a/client/index.js b/client/index.js
index 87214925..dd60c0ed 100644
--- a/client/index.js
+++ b/client/index.js
@@ -89,8 +89,8 @@ function runApplets() {
}
console.log('dataset from path:', dataset)
} else {
- console.log('couldnt determine citations dataset')
- return null
+ console.log('not on a dataset page')
+ return [el, payload]
}
}
payload.dataset = dataset
diff --git a/megapixels/app/site/loader.py b/megapixels/app/site/loader.py
index 80ac4710..8fd7a2f8 100644
--- a/megapixels/app/site/loader.py
+++ b/megapixels/app/site/loader.py
@@ -83,6 +83,8 @@ def parse_metadata(fn, sections):
dataset_path = os.path.join(cfg.DIR_SITE_FINAL_CITATIONS, metadata['slug'] + '.json')
if os.path.exists(dataset_path):
metadata['meta'] = load_json(dataset_path)
+ if not metadata['meta']:
+ print("Bad metadata? {}".format(dataset_path))
return metadata, valid_sections
diff --git a/site/assets/css/css.css b/site/assets/css/css.css
index 675c6571..1633b0d7 100644
--- a/site/assets/css/css.css
+++ b/site/assets/css/css.css
@@ -250,6 +250,9 @@ p {
font-size: 16px;
font-weight: 300;
}
+p.subp{
+ font-size: 14px;
+}
.content a {
color: #fff;
text-decoration: none;
diff --git a/site/assets/css/tabulator.css b/site/assets/css/tabulator.css
index 63abf050..41c7ffa4 100755
--- a/site/assets/css/tabulator.css
+++ b/site/assets/css/tabulator.css
@@ -478,7 +478,7 @@
}
.tabulator-row .tabulator-responsive-collapse table {
- font-size: 14px;
+ font-size: 13px;
}
.tabulator-row .tabulator-responsive-collapse table tr td {
diff --git a/site/content/pages/datasets/50_people_one_question/assets/background.gif b/site/content/pages/datasets/50_people_one_question/assets/background.gif
deleted file mode 100644
index a0539bbb..00000000
--- a/site/content/pages/datasets/50_people_one_question/assets/background.gif
+++ /dev/null
Binary files differ
diff --git a/site/content/pages/datasets/50_people_one_question/index.md b/site/content/pages/datasets/50_people_one_question/index.md
new file mode 100644
index 00000000..e7dec0aa
--- /dev/null
+++ b/site/content/pages/datasets/50_people_one_question/index.md
@@ -0,0 +1,34 @@
+------------
+
+status: published
+title: 50 People One Question
+desc: <span style="color:#ffaa00">People One Question</span> is a dataset of people from an online video series on YouTube and Vimeo used for building facial recogntion algorithms
+subdesc: People One Question dataset includes ...
+slug: 50_people_one_question
+cssclass: dataset
+image: assets/background.jpg
+published: 2019-2-23
+updated: 2019-2-23
+authors: Adam Harvey
+
+------------
+
+### sidebar
+
++ Collected: TBD
++ Published: TBD
++ Images: TBD
++ Faces: TBD
+
+
+## 50 People 1 Question
+
+ At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non-provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem rerum facilis est et expedita distinctio.
+
+ Nam libero tempore, cum soluta nobis est eligendi optio, cumque nihil impedit, quo minus id, quod maxime placeat, facere possimus, omnis voluptas assumenda est, omnis dolor repellendus. Temporibus autem quibusdam et aut officiis debitis aut rerum necessitatibus saepe eveniet, ut et voluptates repudiandae sint et molestiae non-recusandae. Itaque earum rerum hic tenetur a sapiente delectus, ut aut reiciendis voluptatibus maiores alias consequatur aut perferendis doloribus asperiores repellat
+
+{% include 'map.html' %}
+
+{% include 'supplementary_header.html' %}
+
+{% include 'citations.html' %} \ No newline at end of file
diff --git a/site/content/pages/datasets/brainwash/index.md b/site/content/pages/datasets/brainwash/index.md
index 83c30be8..5fe0da4c 100644
--- a/site/content/pages/datasets/brainwash/index.md
+++ b/site/content/pages/datasets/brainwash/index.md
@@ -8,6 +8,7 @@ slug: brainwash
cssclass: dataset
color: #ffaa00
image: assets/background.jpg
+year: 2015
published: 2019-2-23
updated: 2019-2-23
authors: Adam Harvey
@@ -44,11 +45,11 @@ Since it's publication by Stanford in 2015, the Brainwash dataset has appeared i
{% include 'map.html' %}
-
{% include 'supplementary_header.html' %}
{% include 'citations.html' %}
+{% include 'chart.html' %}
### Additional Information
diff --git a/site/content/pages/datasets/celeba/index.md b/site/content/pages/datasets/celeba/index.md
new file mode 100644
index 00000000..19b0291d
--- /dev/null
+++ b/site/content/pages/datasets/celeba/index.md
@@ -0,0 +1,42 @@
+------------
+
+status: published
+title: CelebA
+desc: <span style="color:#ffaa00">CelebA</span> is a dataset of people...
+subdesc: CelebA includes...
+slug: celeba
+cssclass: dataset
+image: assets/background.jpg
+published: 2019-2-23
+updated: 2019-2-23
+authors: Adam Harvey
+
+------------
+
+### sidebar
+
++ Collected: TBD
++ Published: TBD
++ Images: TBD
++ Faces: TBD
+
+
+## CelebA
+
+At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non-provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem rerum facilis est et expedita distinctio.
+
+Nam libero tempore, cum soluta nobis est eligendi optio, cumque nihil impedit, quo minus id, quod maxime placeat, facere possimus, omnis voluptas assumenda est, omnis dolor repellendus. Temporibus autem quibusdam et aut officiis debitis aut rerum necessitatibus saepe eveniet, ut et voluptates repudiandae sint et molestiae non-recusandae. Itaque earum rerum hic tenetur a sapiente delectus, ut aut reiciendis voluptatibus maiores alias consequatur aut perferendis doloribus asperiores repellat
+
+{% include 'map.html' %}
+
+{% include 'supplementary_header.html' %}
+
+{% include 'citations.html' %}
+
+-----
+
+### Research
+
+- "An Unsupervised Approach to Solving Inverse Problems using Generative Adversarial Networks" mentions use by sponsored by an agency of the United States government. Neither the United States government nor Lawrence Livermore National Security, LLC, nor any of their"
+- 7dab6fbf42f82f0f5730fc902f72c3fb628ef2f0
+- principal responsibility is ensuring the safety, security and reliability of the nation's nuclear weapons NNSA ( National Nuclear Security Administration ) \ No newline at end of file
diff --git a/site/content/pages/datasets/cofw/index.md b/site/content/pages/datasets/cofw/index.md
index 7a668cec..3b1cdb2b 100644
--- a/site/content/pages/datasets/cofw/index.md
+++ b/site/content/pages/datasets/cofw/index.md
@@ -44,6 +44,14 @@ Robust face landmark estimation under occlusion
<https://www.cs.cmu.edu/~peiyunh/topdown/>
+{% include 'map.html' %}
+
+{% include 'supplementary_header.html' %}
+
+{% include 'citations.html' %}
+
+{% include 'chart.html' %}
+
TODO
- replace graphic
diff --git a/site/content/pages/datasets/kitti/index.md b/site/content/pages/datasets/kitti/index.md
new file mode 100644
index 00000000..25d0da69
--- /dev/null
+++ b/site/content/pages/datasets/kitti/index.md
@@ -0,0 +1,42 @@
+------------
+
+status: published
+title: KITTI
+desc: <span style="color:#ffaa00">Kitti</span> TBD
+subdesc: TBD
+slug: kitti
+cssclass: dataset
+image: assets/background.jpg
+year: 2015
+published: 2019-2-23
+updated: 2019-2-23
+authors: Adam Harvey
+
+------------
+
+### sidebar
+
++ Collected: TBD
+
+## Kitti
+
+add text
+
+{% include 'map.html' %}
+
+
+{% include 'supplementary_header.html' %}
+
+{% include 'citations.html' %}
+
+
+### Additional Information
+
+- The dataset author spoke about his research at the CVPR conference in 2016 <https://www.youtube.com/watch?v=Nl2fBKxwusQ>
+
+
+### Footnotes
+
+[^readme]: "readme.txt" https://exhibits.stanford.edu/data/catalog/sx925dc9385.
+[^localized_region_context]: Li, Y. and Dou, Y. and Liu, X. and Li, T. Localized Region Context and Object Feature Fusion for People Head Detection. ICIP16 Proceedings. 2016. Pages 594-598.
+[^replacement_algorithm]: Zhao. X, Wang Y, Dou, Y. A Replacement Algorithm of Non-Maximum Suppression Base on Graph Clustering. \ No newline at end of file
diff --git a/site/content/pages/datasets/lfw/index.md b/site/content/pages/datasets/lfw/index.md
index 7ccbfb0b..833c6963 100644
--- a/site/content/pages/datasets/lfw/index.md
+++ b/site/content/pages/datasets/lfw/index.md
@@ -6,7 +6,11 @@ desc: Labeled Faces in The Wild (LFW) is a database of face photographs designed
subdesc: It includes 13,456 images of 4,432 people's images copied from the Internet during 2002-2004.
image: assets/background.jpg
slug: lfw
+<<<<<<< HEAD
+year: 2007
+=======
color: #ff0000
+>>>>>>> e6c50e5550275b8e9d2245201c77c6f9fef7a11a
published: 2019-2-23
updated: 2019-2-23
authors: Adam Harvey
@@ -52,6 +56,8 @@ The *Names and Faces* dataset was the first face recognition dataset created ent
{% include 'citations.html' %}
+{% include 'chart.html' %}
+
### Commercial Use
Add a paragraph about how usage extends far beyond academia into research centers for largest companies in the world. And even funnels into CIA funded research in the US and defense industry usage in China.
@@ -89,6 +95,7 @@ Ignore text below these lines
- All images in the LFW dataset were copied from Yahoo News between 2002 - 2004
- In 2014, two of the four original authors of the LFW dataset received funding from IARPA and ODNI for their followup paper [Labeled Faces in the Wild: Updates and New Reporting Procedures](https://www.semanticscholar.org/paper/Labeled-Faces-in-the-Wild-%3A-Updates-and-New-Huang-Learned-Miller/2d3482dcff69c7417c7b933f22de606a0e8e42d4) via IARPA contract number 2014-14071600010
- The dataset includes 2 images of [George Tenet](http://vis-www.cs.umass.edu/lfw/person/George_Tenet.html), the former Director of Central Intelligence (DCI) for the Central Intelligence Agency whose facial biometrics were eventually used to help train facial recognition software in China and Russia
+- ./15/155205b8e288fd49bf203135871d66de879c8c04/paper.txt shows usage by DSTO Australia, supported parimal@iisc.ac.in
### Footnotes
diff --git a/site/content/pages/datasets/mars/index.md b/site/content/pages/datasets/mars/index.md
new file mode 100644
index 00000000..19f9ced4
--- /dev/null
+++ b/site/content/pages/datasets/mars/index.md
@@ -0,0 +1,34 @@
+------------
+
+status: published
+title: MARS
+desc: <span style="color:#ffaa00">MARS</span> is a dataset of people...
+subdesc: MARS includes...
+slug: mars
+cssclass: dataset
+image: assets/background.jpg
+published: 2019-2-23
+updated: 2019-2-23
+authors: Adam Harvey
+
+------------
+
+### sidebar
+
++ Collected: TBD
++ Published: TBD
++ Images: TBD
++ Faces: TBD
+
+
+## 50 MARS
+
+At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non-provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem rerum facilis est et expedita distinctio.
+
+Nam libero tempore, cum soluta nobis est eligendi optio, cumque nihil impedit, quo minus id, quod maxime placeat, facere possimus, omnis voluptas assumenda est, omnis dolor repellendus. Temporibus autem quibusdam et aut officiis debitis aut rerum necessitatibus saepe eveniet, ut et voluptates repudiandae sint et molestiae non-recusandae. Itaque earum rerum hic tenetur a sapiente delectus, ut aut reiciendis voluptatibus maiores alias consequatur aut perferendis doloribus asperiores repellat
+
+{% include 'map.html' %}
+
+{% include 'supplementary_header.html' %}
+
+{% include 'citations.html' %} \ No newline at end of file
diff --git a/site/includes/chart.html b/site/includes/chart.html
new file mode 100644
index 00000000..63108df1
--- /dev/null
+++ b/site/includes/chart.html
@@ -0,0 +1,11 @@
+<section>
+ <p>
+ This bar chart presents a ranking of the top countries where citations originated. Mouse over individual columns
+ to see yearly totals. Colors are only assigned to the top 10 overall countries.
+ </p>
+
+ </section>
+
+<section class="applet_container">
+ <div class="applet" data-payload="{&quot;command&quot;: &quot;chart&quot;}"></div>
+</section>
diff --git a/site/includes/citations.html b/site/includes/citations.html
index ed54b9b1..a37cc43a 100644
--- a/site/includes/citations.html
+++ b/site/includes/citations.html
@@ -1,6 +1,12 @@
<section class="applet_container">
<h3>Citations</h3>
- <p>Add graph showing distribution by country. Add information about how the citations were generated. Add button/link to download CSV</p>
+ <p>
+ Citations were collected from <a href="https://www.semanticscholar.org">Semantic Scholar</a>, a website which aggregates
+ and indexes research papers. Metadata was extracted from these papers, including extracting names of institutions automatically from PDFs, and then the addresses were geocoded. Data is not yet manually verified, and reflects anytime the paper was cited. Some papers may only mention the dataset in passing, while others use it as part of their research methodology.
+ </p>
+ <p>
+ Add button/link to download CSV
+ </p>
<div class="applet" data-payload="{&quot;command&quot;: &quot;citations&quot;}"></div>
</section> \ No newline at end of file
diff --git a/site/includes/map.html b/site/includes/map.html
index 9b99a732..6d4b43b4 100644
--- a/site/includes/map.html
+++ b/site/includes/map.html
@@ -1,6 +1,6 @@
<section>
- <h3>Information Supply Chain</h3>
+ <h3>Biometric Trade Routes (beta)</h3>
<!--
<div class="map-sidebar right-sidebar">
<h3>Legend</h3>
@@ -12,7 +12,8 @@
</div>
-->
<p>
- To understand how and where this dataset has been used, organizations using the dataset are plotted below. The data is generated by collecting all citations for all the original research papers associated with the dataset. The PDFs are then converted to text and the organization names are extracted and geocoded. Because of the automated approach to extracting data, <span style="color:#ff8a72">not all organizations have been confirmed as using the dataset</span>. This visualization is provided to help locate and confirm usage and will be updated as data noise is reduced.
+ To understand how this dataset has been used around the world...
+ affected global research on computer vision, surveillance, defense, and consumer technology, the and where this dataset has been used the locations of each organization that used or referenced the datast
</p>
</section>
@@ -27,3 +28,9 @@
<div class="map-legend-item gov">Government</div>
Data is compiled from <a href="https://www.semanticscholar.org">Semantic Scholar</a> and not yet manually verified.
</div>
+
+<section>
+ <p class='subp'>
+ The data is generated by collecting all citations for all original research papers associated with the dataset. Then the PDFs are then converted to text and the organization names are extracted and geocoded. Because of the automated approach to extracting data, actual use of the dataset can not yet be confirmed. This visualization is provided to help locate and confirm usage and will be updated as data noise is reduced.
+ </p>
+</section>
diff --git a/site/public/datasets/brainwash/index.html b/site/public/datasets/brainwash/index.html
index 33a10dde..b52cbca3 100644
--- a/site/public/datasets/brainwash/index.html
+++ b/site/public/datasets/brainwash/index.html
@@ -32,7 +32,7 @@
<p><em>Brainwash</em> is a face detection dataset created from the Brainwash Cafe's livecam footage including 11,918 images of "everyday life of a busy downtown cafe<a class="footnote_shim" name="[^readme]_1"> </a><a href="#[^readme]" class="footnote" title="Footnote 1">1</a>". The images are used to develop face detection algorithms for the "challenging task of detecting people in crowded scenes" and tracking them.</p>
<p>Before closing in 2017, Brainwash Cafe was a "cafe and laundromat" located in San Francisco's SoMA district. The cafe published a publicy available livestream from the cafe with a view of the cash register, performance stage, and seating area.</p>
<p>Since it's publication by Stanford in 2015, the Brainwash dataset has appeared in several notable research papers. In September 2016 four researchers from the National University of Defense Technology in Changsha, China used the Brainwash dataset for a research study on "people head detection in crowded scenes", concluding that their algorithm "achieves superior head detection performance on the crowded scenes dataset<a class="footnote_shim" name="[^localized_region_context]_1"> </a><a href="#[^localized_region_context]" class="footnote" title="Footnote 2">2</a>". And again in 2017 three researchers at the National University of Defense Technology used Brainwash for a study on object detection noting "the data set used in our experiment is shown in Table 1, which includes one scene of the brainwash dataset<a class="footnote_shim" name="[^replacement_algorithm]_1"> </a><a href="#[^replacement_algorithm]" class="footnote" title="Footnote 3">3</a>".</p>
-</section><section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/brainwash/assets/00425000_960.jpg' alt=' An sample image from the Brainwash dataset used for training face and head detection algorithms for surveillance. The datset contains about 12,000 images. License: Open Data Commons Public Domain Dedication (PDDL)'><div class='caption'> An sample image from the Brainwash dataset used for training face and head detection algorithms for surveillance. The datset contains about 12,000 images. License: Open Data Commons Public Domain Dedication (PDDL)</div></div></section><section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/brainwash/assets/brainwash_montage.jpg' alt=' 49 of the 11,918 images included in the Brainwash dataset. License: Open Data Commons Public Domain Dedication (PDDL)'><div class='caption'> 49 of the 11,918 images included in the Brainwash dataset. License: Open Data Commons Public Domain Dedication (PDDL)</div></div></section><section> <h3>Information Supply Chain</h3><!-- <div class="map-sidebar right-sidebar"> <h3>Legend</h3> <ul> <li><span style="color: #f2f293">&#9632;</span> Industry</li> <li><span style="color: #f30000">&#9632;</span> Academic</li> <li><span style="color: #3264f6">&#9632;</span> Government</li> </ul> </div> --> <p> To understand how and where this dataset has been used, organizations using the dataset are plotted below. The data is generated by collecting all citations for all the original research papers associated with the dataset. The PDFs are then converted to text and the organization names are extracted and geocoded. Because of the automated approach to extracting data, <span style="color:#ff8a72">not all organizations have been confirmed as using the dataset</span>. This visualization is provided to help locate and confirm usage and will be updated as data noise is reduced. </p> </section><section class="applet_container"> <div class="applet" data-payload="{&quot;command&quot;: &quot;map&quot;}"></div></section><div class="caption"> <div class="map-legend-item edu">Academic</div> <div class="map-legend-item com">Industry</div> <div class="map-legend-item gov">Government</div> Data is compiled from <a href="https://www.semanticscholar.org">Semantic Scholar</a> and not yet manually verified.</div><section> <div class="hr-wave-holder"> <div class="hr-wave-line hr-wave-line1"></div> <div class="hr-wave-line hr-wave-line2"></div> </div> <h2>Supplementary Information</h2></section><section class="applet_container"> <h3>Citations</h3> <p>Add graph showing distribution by country. Add information about how the citations were generated. Add button/link to download CSV</p> <div class="applet" data-payload="{&quot;command&quot;: &quot;citations&quot;}"></div></section><section><h3>Additional Information</h3>
+</section><section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/brainwash/assets/00425000_960.jpg' alt=' An sample image from the Brainwash dataset used for training face and head detection algorithms for surveillance. The datset contains about 12,000 images. License: Open Data Commons Public Domain Dedication (PDDL)'><div class='caption'> An sample image from the Brainwash dataset used for training face and head detection algorithms for surveillance. The datset contains about 12,000 images. License: Open Data Commons Public Domain Dedication (PDDL)</div></div></section><section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/brainwash/assets/brainwash_montage.jpg' alt=' 49 of the 11,918 images included in the Brainwash dataset. License: Open Data Commons Public Domain Dedication (PDDL)'><div class='caption'> 49 of the 11,918 images included in the Brainwash dataset. License: Open Data Commons Public Domain Dedication (PDDL)</div></div></section><section> <h3>Information Supply Chain</h3><!-- <div class="map-sidebar right-sidebar"> <h3>Legend</h3> <ul> <li><span style="color: #f2f293">&#9632;</span> Industry</li> <li><span style="color: #f30000">&#9632;</span> Academic</li> <li><span style="color: #3264f6">&#9632;</span> Government</li> </ul> </div> --> <p> To understand how and where this dataset has been used, organizations using the dataset are plotted below. The data is generated by collecting all citations for all the original research papers associated with the dataset. The PDFs are then converted to text and the organization names are extracted and geocoded. Because of the automated approach to extracting data, <span style="color:#ff8a72">not all organizations have been confirmed as using the dataset</span>. This visualization is provided to help locate and confirm usage and will be updated as data noise is reduced. </p> </section><section class="applet_container"> <div class="applet" data-payload="{&quot;command&quot;: &quot;map&quot;}"></div></section><div class="caption"> <div class="map-legend-item edu">Academic</div> <div class="map-legend-item com">Industry</div> <div class="map-legend-item gov">Government</div> Data is compiled from <a href="https://www.semanticscholar.org">Semantic Scholar</a> and not yet manually verified.</div><section> <div class="hr-wave-holder"> <div class="hr-wave-line hr-wave-line1"></div> <div class="hr-wave-line hr-wave-line2"></div> </div> <h2>Supplementary Information</h2></section><section class="applet_container"> <h3>Citations</h3> <p> Citations were collected from <a href="https://www.semanticscholar.org">Semantic Scholar</a>, a website which aggregates and indexes research papers. Metadata was extracted from these papers, including extracting names of institutions automatically from PDFs, and then the addresses were geocoded. Data is not yet manually verified, and reflects anytime the paper was cited. Some papers may only mention the dataset in passing, while others use it as part of their research methodology. </p> <p> Add button/link to download CSV </p> <div class="applet" data-payload="{&quot;command&quot;: &quot;citations&quot;}"></div></section><section> <p> This bar chart presents a ranking of the top countries where citations originated. Mouse over individual columns to see yearly totals. Colors are only assigned to the top 10 overall countries. </p> </section><section class="applet_container"> <div class="applet" data-payload="{&quot;command&quot;: &quot;chart&quot;}"></div></section><section><h3>Additional Information</h3>
<ul>
<li>The dataset author spoke about his research at the CVPR conference in 2016 <a href="https://www.youtube.com/watch?v=Nl2fBKxwusQ">https://www.youtube.com/watch?v=Nl2fBKxwusQ</a></li>
</ul>
diff --git a/site/public/datasets/cofw/index.html b/site/public/datasets/cofw/index.html
index 82842955..b4addd20 100644
--- a/site/public/datasets/cofw/index.html
+++ b/site/public/datasets/cofw/index.html
@@ -41,7 +41,7 @@ To increase the number of training images, and since COFW has the exact same la
<blockquote><p>This research is supported by NSF Grant 0954083 and by the Office of the Director of National Intelligence (ODNI), Intelligence Advanced Research Projects Activity (IARPA), via IARPA R&amp;D Contract No. 2014-14071600012.</p>
</blockquote>
<p><a href="https://www.cs.cmu.edu/~peiyunh/topdown/">https://www.cs.cmu.edu/~peiyunh/topdown/</a></p>
-<p>TODO</p>
+</section><section> <h3>Information Supply Chain</h3><!-- <div class="map-sidebar right-sidebar"> <h3>Legend</h3> <ul> <li><span style="color: #f2f293">&#9632;</span> Industry</li> <li><span style="color: #f30000">&#9632;</span> Academic</li> <li><span style="color: #3264f6">&#9632;</span> Government</li> </ul> </div> --> <p> To understand how and where this dataset has been used, organizations using the dataset are plotted below. The data is generated by collecting all citations for all the original research papers associated with the dataset. The PDFs are then converted to text and the organization names are extracted and geocoded. Because of the automated approach to extracting data, <span style="color:#ff8a72">not all organizations have been confirmed as using the dataset</span>. This visualization is provided to help locate and confirm usage and will be updated as data noise is reduced. </p> </section><section class="applet_container"> <div class="applet" data-payload="{&quot;command&quot;: &quot;map&quot;}"></div></section><div class="caption"> <div class="map-legend-item edu">Academic</div> <div class="map-legend-item com">Industry</div> <div class="map-legend-item gov">Government</div> Data is compiled from <a href="https://www.semanticscholar.org">Semantic Scholar</a> and not yet manually verified.</div><section> <div class="hr-wave-holder"> <div class="hr-wave-line hr-wave-line1"></div> <div class="hr-wave-line hr-wave-line2"></div> </div> <h2>Supplementary Information</h2></section><section class="applet_container"> <h3>Citations</h3> <p> Citations were collected from <a href="https://www.semanticscholar.org">Semantic Scholar</a>, a website which aggregates and indexes research papers. Metadata was extracted from these papers, including extracting names of institutions automatically from PDFs, and then the addresses were geocoded. Data is not yet manually verified, and reflects anytime the paper was cited. Some papers may only mention the dataset in passing, while others use it as part of their research methodology. </p> <p> Add button/link to download CSV </p> <div class="applet" data-payload="{&quot;command&quot;: &quot;citations&quot;}"></div></section><section> <p> This bar chart presents a ranking of the top countries where citations originated. Mouse over individual columns to see yearly totals. Colors are only assigned to the top 10 overall countries. </p> </section><section class="applet_container"> <div class="applet" data-payload="{&quot;command&quot;: &quot;chart&quot;}"></div></section><section><p>TODO</p>
<h2>- replace graphic</h2>
</section>
diff --git a/site/public/datasets/lfw/index.html b/site/public/datasets/lfw/index.html
index f224e345..adad8aea 100644
--- a/site/public/datasets/lfw/index.html
+++ b/site/public/datasets/lfw/index.html
@@ -44,7 +44,7 @@
<p>The <em>Names and Faces</em> dataset was the first face recognition dataset created entire from online photos. However, <em>Names and Faces</em> and <em>LFW</em> are not the first face recognition dataset created entirely "in the wild". That title belongs to the <a href="/datasets/ucd_faces/">UCD dataset</a>. Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer.</p>
</section><section class='images'><div class='image'><img src='https://nyc3.digitaloceanspaces.com/megapixels/v1/datasets/lfw/assets/lfw_montage_all_crop.jpg' alt='All 5,379 people in the Labeled Faces in The Wild Dataset. Showing one face per person'><div class='caption'>All 5,379 people in the Labeled Faces in The Wild Dataset. Showing one face per person</div></div></section><section><p>The <em>Names and Faces</em> dataset was the first face recognition dataset created entire from online photos. However, <em>Names and Faces</em> and <em>LFW</em> are not the first face recognition dataset created entirely "in the wild". That title belongs to the <a href="/datasets/ucd_faces/">UCD dataset</a>. Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer.</p>
<p>The <em>Names and Faces</em> dataset was the first face recognition dataset created entire from online photos. However, <em>Names and Faces</em> and <em>LFW</em> are not the first face recognition dataset created entirely "in the wild". That title belongs to the <a href="/datasets/ucd_faces/">UCD dataset</a>. Images obtained "in the wild" means using an image without explicit consent or awareness from the subject or photographer.</p>
-</section><section> <h3>Information Supply Chain</h3><!-- <div class="map-sidebar right-sidebar"> <h3>Legend</h3> <ul> <li><span style="color: #f2f293">&#9632;</span> Industry</li> <li><span style="color: #f30000">&#9632;</span> Academic</li> <li><span style="color: #3264f6">&#9632;</span> Government</li> </ul> </div> --> <p> To understand how and where this dataset has been used, organizations using the dataset are plotted below. The data is generated by collecting all citations for all the original research papers associated with the dataset. The PDFs are then converted to text and the organization names are extracted and geocoded. Because of the automated approach to extracting data, <span style="color:#ff8a72">not all organizations have been confirmed as using the dataset</span>. This visualization is provided to help locate and confirm usage and will be updated as data noise is reduced. </p> </section><section class="applet_container"> <div class="applet" data-payload="{&quot;command&quot;: &quot;map&quot;}"></div></section><div class="caption"> <div class="map-legend-item edu">Academic</div> <div class="map-legend-item com">Industry</div> <div class="map-legend-item gov">Government</div> Data is compiled from <a href="https://www.semanticscholar.org">Semantic Scholar</a> and not yet manually verified.</div><section> <div class="hr-wave-holder"> <div class="hr-wave-line hr-wave-line1"></div> <div class="hr-wave-line hr-wave-line2"></div> </div> <h2>Supplementary Information</h2></section><section class="applet_container"> <h3>Citations</h3> <p>Add graph showing distribution by country. Add information about how the citations were generated. Add button/link to download CSV</p> <div class="applet" data-payload="{&quot;command&quot;: &quot;citations&quot;}"></div></section><section><h3>Commercial Use</h3>
+</section><section> <h3>Information Supply Chain</h3><!-- <div class="map-sidebar right-sidebar"> <h3>Legend</h3> <ul> <li><span style="color: #f2f293">&#9632;</span> Industry</li> <li><span style="color: #f30000">&#9632;</span> Academic</li> <li><span style="color: #3264f6">&#9632;</span> Government</li> </ul> </div> --> <p> To understand how and where this dataset has been used, organizations using the dataset are plotted below. The data is generated by collecting all citations for all the original research papers associated with the dataset. The PDFs are then converted to text and the organization names are extracted and geocoded. Because of the automated approach to extracting data, <span style="color:#ff8a72">not all organizations have been confirmed as using the dataset</span>. This visualization is provided to help locate and confirm usage and will be updated as data noise is reduced. </p> </section><section class="applet_container"> <div class="applet" data-payload="{&quot;command&quot;: &quot;map&quot;}"></div></section><div class="caption"> <div class="map-legend-item edu">Academic</div> <div class="map-legend-item com">Industry</div> <div class="map-legend-item gov">Government</div> Data is compiled from <a href="https://www.semanticscholar.org">Semantic Scholar</a> and not yet manually verified.</div><section> <div class="hr-wave-holder"> <div class="hr-wave-line hr-wave-line1"></div> <div class="hr-wave-line hr-wave-line2"></div> </div> <h2>Supplementary Information</h2></section><section class="applet_container"> <h3>Citations</h3> <p> Citations were collected from <a href="https://www.semanticscholar.org">Semantic Scholar</a>, a website which aggregates and indexes research papers. Metadata was extracted from these papers, including extracting names of institutions automatically from PDFs, and then the addresses were geocoded. Data is not yet manually verified, and reflects anytime the paper was cited. Some papers may only mention the dataset in passing, while others use it as part of their research methodology. </p> <p> Add button/link to download CSV </p> <div class="applet" data-payload="{&quot;command&quot;: &quot;citations&quot;}"></div></section><section> <p> This bar chart presents a ranking of the top countries where citations originated. Mouse over individual columns to see yearly totals. Colors are only assigned to the top 10 overall countries. </p> </section><section class="applet_container"> <div class="applet" data-payload="{&quot;command&quot;: &quot;chart&quot;}"></div></section><section><h3>Commercial Use</h3>
<p>Add a paragraph about how usage extends far beyond academia into research centers for largest companies in the world. And even funnels into CIA funded research in the US and defense industry usage in China.</p>
</section><section class='applet_container'><div class='applet' data-payload='{"command": "load_file assets/lfw_commercial_use.csv", "fields": ["name_display, company_url, example_url, country, description"]}'></div></section><section><p>Research, text, and graphics ©Adam Harvey / megapixels.cc</p>
</section><section><ul class="footnotes"><li><a name="[^lfw_www]" class="footnote_shim"></a><span class="backlinks"><a href="#[^lfw_www]_1">a</a><a href="#[^lfw_www]_2">b</a></span><p><a href="http://vis-www.cs.umass.edu/lfw/results.html">http://vis-www.cs.umass.edu/lfw/results.html</a></p>
diff --git a/todo.md b/todo.md
index 300d7879..20eac835 100644
--- a/todo.md
+++ b/todo.md
@@ -9,44 +9,47 @@
## Datasets Index
-- looking good
- AH: add more datasets
+- AH: finalize intro text
## Datasets
-- overall looking really good
-- JL: possible to fade in background header graphic?
-- AH: add more synthetic faces
-- modify layout into section:
- - Overview
- - Citations
- - Supplementary Information
-- modify layout for synthetic images (1 large image/video left column, 4 small thumbnails right column)
-- [x] JL: add URL text and href to citations
-- JL: add download (button) and search option for CSV? or link to github
- AH: Try creating another google doc to manually review each citation and send to FT to maybe help with review
+- AH: finalize text for map include, beta disclaimer
- JL/AH: add graph showing distribtion by country
- JL/AH: add graph/number display showing distribution by sector (edu, gov, com)
+- JL: possible to fade in background header graphic?
+- JL: add download (button) and search option for CSV? or link to github
+- JL: make sortable by name? year (if added to metadata)? maybe group by year (like VCAT)
+- JL: remove pointer rollover on tabulators
+- JL: change PDF url to only show domain (ie https:/arxiv.org/12345/ --> arxiv.org)
+- JL: check footnotes (it shows an 'a' next to the numbers on bottom. is this right?)
+- NB: skipping synthetic faces for now
## About
-- layout still feels awkward
+- AH: layout still feels awkward, fixing
- AH: update text and images
- AH/JL: update layout, css tweaks
+- JL: add underline/active state to the subnavigation
- awaiting Mozilla response for their text
## Research
- AH: create example page
-- CSS tweaks for Research index page
-- CSS tweaks for Research post page
+- AH: CSS tweaks for Research index page
+- AH: CSS tweaks for Research post page
+## Design
+
+- AH: logo vector
## Datasets for FT Launch:
+April 4th launch
+
- AFAD
- [x] 50 People One Question
-- Adience
- AFLW
- AFW
- Brainwash
@@ -54,31 +57,36 @@
- CAVIAR
- CelebA
- COFW
-- CUHK
- DukeMTC
-- Face Scrub
- Face Tracer
- Helen
- HRT Transgender
- iLIDS IVD
- IJB-C
-- Kin CTTS
-- LAOFIW
- LFPW
- [x] LFW
- [x] MARS
- MegaFace
- MS Celeb
- MSRA
-- Pipa
- PubFig
- PubFig83
-- TinyFaces
- [x] UCCS
-- UCF Selfie
- UMD Faces
-- USED
- VGG Face 1?
- VGG Face 2
- YouTube Celebrities
-- [x] YouTube Makeup \ No newline at end of file
+- [x] YouTube Makeup
+
+
+- Face Scrub
+- Adience
+- CUHK
+- Kin CTTS
+- LAOFIW
+- UCF Selfie
+- USED
+- TinyFaces
+- Pipa
+- Shakecam
+- \ No newline at end of file