summaryrefslogtreecommitdiff
path: root/site/datasets/final/yfcc_100m.json
diff options
context:
space:
mode:
Diffstat (limited to 'site/datasets/final/yfcc_100m.json')
-rw-r--r--site/datasets/final/yfcc_100m.json2
1 files changed, 1 insertions, 1 deletions
diff --git a/site/datasets/final/yfcc_100m.json b/site/datasets/final/yfcc_100m.json
index 9bf84625..032090f3 100644
--- a/site/datasets/final/yfcc_100m.json
+++ b/site/datasets/final/yfcc_100m.json
@@ -1 +1 @@
-{"id": "a6e695ddd07aad719001c0fc1129328452385949", "paper": {"paper_id": "a6e695ddd07aad719001c0fc1129328452385949", "key": "yfcc_100m", "title": "The New Data and New Challenges in Multimedia Research", "year": "2015", "pdf": null, "address": "", "name": "YFCC100M"}, "address": "", "additional_papers": [], "citations": [{"id": "7d0ff6d0621b3846e8543bc162fd0215d8adfaf0", "title": "Efficient Large-Scale Similarity Search Using Matrix Factorization", "addresses": [{"address": "McGill University", "lat": "45.50397610", "lng": "-73.57496870", "type": "edu"}], "year": 2016, "pdf": "http://openaccess.thecvf.com/content_cvpr_2016/papers/Iscen_Efficient_Large-Scale_Similarity_CVPR_2016_paper.pdf"}, {"id": "8c192cd39f90eb8ff2969f8916ef8967607c5298", "title": "See, Hear, and Read: Deep Aligned Representations", "addresses": [{"address": "MIT", "lat": "42.35839610", "lng": "-71.09567788", "type": "edu"}], "year": 2017, "pdf": "http://pdfs.semanticscholar.org/9677/d2f6a994f598c1d631038d49401c5f707ee0.pdf"}, {"id": "b7c8452ac9791563d9a739bd079b05e518b20aea", "title": "Web Video in Numbers - An Analysis of Web-Video Metadata", "addresses": [{"address": "University of Basel", "lat": "47.56126510", "lng": "7.57529610", "type": "edu"}], "year": 2017, "pdf": "http://pdfs.semanticscholar.org/b7c8/452ac9791563d9a739bd079b05e518b20aea.pdf"}, {"id": "7060f6062ba1cbe9502eeaaf13779aa1664224bb", "title": "A Glimpse Far into the Future: Understanding Long-term Crowd Worker Quality", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2017, "pdf": "http://cs.stanford.edu/groups/vision/pdf/hata2017cscw.pdf"}, {"id": "258dda85eadcd2081d1e0131826aceac7f1e2415", "title": "Supervision Beyond Manual Annotations for Learning Visual Representations", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/e62d/40940a2711c7adca2857110272fb34d70576.pdf"}, {"id": "2577211aeaaa1f2245ddc379564813bee3d46c06", "title": "Seeing through the Human Reporting Bias: Visual Classifiers from Noisy Human-Centric Labels", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}, {"address": "Microsoft", "lat": "47.64233180", "lng": "-122.13693020", "type": "company"}], "year": 2016, "pdf": "http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Misra_Seeing_Through_the_CVPR_2016_paper.pdf"}, {"id": "405526dfc79de98f5bf3c97bf4aa9a287700f15d", "title": "MegaFace: A Million Faces for Recognition at Scale", "addresses": [{"address": "University of Washington", "lat": "47.65432380", "lng": "-122.30800894", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/8a6c/57fcd99a77982ec754e0b97fd67519ccb60c.pdf"}, {"id": "18fe63c013983bea53be7d559ef36a1f385ca6ea", "title": "Supervision Beyond Human Annotations for Learning Visual Representations", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/18fe/63c013983bea53be7d559ef36a1f385ca6ea.pdf"}, {"id": "629b1bdf4d96bb41f7d3fce5c7d5617515303b71", "title": "Diving Deeper into IM2GPS", "addresses": [{"address": "Georgia Institute of Technology", "lat": "33.77603300", "lng": "-84.39884086", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/629b/1bdf4d96bb41f7d3fce5c7d5617515303b71.pdf"}, {"id": "96e0cfcd81cdeb8282e29ef9ec9962b125f379b0", "title": "The MegaFace Benchmark: 1 Million Faces for Recognition at Scale", "addresses": [{"address": "University of Washington", "lat": "47.65432380", "lng": "-122.30800894", "type": "edu"}], "year": 2016, "pdf": "http://doi.ieeecomputersociety.org/10.1109/CVPR.2016.527"}, {"id": "d0ac9913a3b1784f94446db2f1fb4cf3afda151f", "title": "Exploiting Multi-modal Curriculum in Noisy Web Data for Large-scale Concept Learning", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/d0ac/9913a3b1784f94446db2f1fb4cf3afda151f.pdf"}, {"id": "18078e72bddefffc24a6e882790aca8531773bed", "title": "Sublinear scaling of country attractiveness observed from Flickr dataset", "addresses": [{"address": "New York University", "lat": "40.72925325", "lng": "-73.99625394", "type": "edu"}], "year": 2015, "pdf": "https://arxiv.org/pdf/1601.02306v1.pdf"}, {"id": "9677d2f6a994f598c1d631038d49401c5f707ee0", "title": "See, Hear, and Read: Deep Aligned Representations", "addresses": [{"address": "MIT", "lat": "42.35839610", "lng": "-71.09567788", "type": "edu"}], "year": "2017", "pdf": "https://arxiv.org/pdf/1706.00932.pdf"}, {"id": "1b6f3139b1e59b90ab1aaf978359229b75985b49", "title": "Learning with a Wasserstein Loss", "addresses": [{"address": "MIT", "lat": "42.35839610", "lng": "-71.09567788", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/847e/39b52a63a55fb94fff7ade1f90a7c67e508b.pdf"}, {"id": "ea985e35b36f05156f82ac2025ad3fe8037be0cd", "title": "CERTH/CEA LIST at MediaEval Placing Task 2015", "addresses": [{"address": "Information Technologies Institute", "lat": "33.59345390", "lng": "130.35578370", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/ea98/5e35b36f05156f82ac2025ad3fe8037be0cd.pdf"}, {"id": "518f3cb2c9f2481cdce7741c5a821c26378b75e9", "title": "The Unreasonable Effectiveness of Noisy Data for Fine-Grained Recognition", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/518f/3cb2c9f2481cdce7741c5a821c26378b75e9.pdf"}, {"id": "982ede05154c1afdcf6fc623ba45186a34f4b9f2", "title": "The Many Shades of Negativity", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}, {"address": "University of Technology Sydney", "lat": "-33.88096510", "lng": "151.20107299", "type": "edu"}, {"address": "University of Trento", "lat": "46.06588360", "lng": "11.11598940", "type": "edu"}], "year": 2017, "pdf": "https://doi.org/10.1109/TMM.2017.2659221"}, {"id": "5996001b797ab2a0f55d5355cb168f25bfe56bbd", "title": "Content-Based Video Search over 1 Million Videos with 1 Core in 1 Second", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2015, "pdf": "http://doi.acm.org/10.1145/2671188.2749398"}, {"id": "65c978a97f54cf255f01c6846d6c51b37c61f836", "title": "A Glimpse Far into the Future: Understanding Long-term Crowd Worker Accuracy", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/65c9/78a97f54cf255f01c6846d6c51b37c61f836.pdf"}, {"id": "301486e8dad7a41a1a99fd6fba28ce153fe1e56e", "title": "Are Elephants Bigger than Butterflies? Reasoning about Sizes of Objects", "addresses": [{"address": "University of Washington", "lat": "47.65432380", "lng": "-122.30800894", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/3014/86e8dad7a41a1a99fd6fba28ce153fe1e56e.pdf"}, {"id": "01a903739564f575b81c87f7a9e2cb7b609f7ada", "title": "Image retrieval using scene graphs", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2015, "pdf": "http://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Johnson_Image_Retrieval_Using_2015_CVPR_paper.pdf"}, {"id": "c5e37630d0672e4d44f7dee83ac2c1528be41c2e", "title": "Multi-task Deep Neural Network for Joint Face Recognition and Facial Attribute Prediction", "addresses": [{"address": "Fudan University", "lat": "31.30104395", "lng": "121.50045497", "type": "edu"}], "year": "2017", "pdf": "http://dl.acm.org/citation.cfm?id=3078973"}, {"id": "05818eddd8a35fed7f3041d591ef966f8e79bd9a", "title": "Web scale photo hash clustering on a single machine", "addresses": [{"address": "Facebook", "lat": "37.39367170", "lng": "-122.08072620", "type": "company"}], "year": 2015, "pdf": "http://www.cv-foundation.org/openaccess/content_cvpr_2015/app/1A_003_ext.pdf"}, {"id": "22954dd92a795d7f381465d1b353bcc41901430d", "title": "Learning Visual Storylines with Skipping Recurrent Neural Networks", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/3b04/f759e9b3c21defe2227374a008bec67751e3.pdf"}, {"id": "9bbc952adb3e3c6091d45d800e806d3373a52bac", "title": "Learning Visual Classifiers using Human-centric Annotations", "addresses": [{"address": "Microsoft", "lat": "47.64233180", "lng": "-122.13693020", "type": "company"}], "year": "2015", "pdf": "https://pdfs.semanticscholar.org/9bbc/952adb3e3c6091d45d800e806d3373a52bac.pdf"}, {"id": "2c761495cf3dd320e229586f80f868be12360d4e", "title": "Revisiting Unreasonable Effectiveness of Data in Deep Learning Era", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2017, "pdf": "http://arxiv.org/abs/1707.02968"}, {"id": "a9be20954e9177d8b2bc39747acdea4f5496f394", "title": "Event-Specific Image Importance", "addresses": [{"address": "University of California, San Diego", "lat": "32.87935255", "lng": "-117.23110049", "type": "edu"}], "year": 2016, "pdf": "http://acsweb.ucsd.edu/~yuw176/report/cvpr_2016.pdf"}, {"id": "256f09fe3163564958381d7f3727b5c27c19144c", "title": "Image2Emoji: Zero-shot Emoji Prediction for Visual Media", "addresses": [{"address": "University of Amsterdam", "lat": "52.35536550", "lng": "4.95016440", "type": "edu"}], "year": 2015, "pdf": "http://doi.acm.org/10.1145/2733373.2806335"}, {"id": "891433740bf6d318782c468638722aebf8bef2f5", "title": "Multi-Frame Video Super-Resolution Using Convolutional Neural Networks", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/8914/33740bf6d318782c468638722aebf8bef2f5.pdf"}, {"id": "85304f24f5a1800e66de20ad05e20c8c032b7d03", "title": "Understanding and Discovering Deliberate Self-harm Content in Social Media", "addresses": [{"address": "University of Washington", "lat": "47.65432380", "lng": "-122.30800894", "type": "edu"}], "year": 2017, "pdf": "http://pdfs.semanticscholar.org/8530/4f24f5a1800e66de20ad05e20c8c032b7d03.pdf"}, {"id": "35ec869dd0637c933d35ab823202c13b9b5d9aad", "title": "Effective Community Search for Large Attributed Graphs", "addresses": [{"address": "University of Hong Kong", "lat": "22.20814690", "lng": "114.25964115", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/4498/06bcb0987db60a0f8647380f9c335078fb46.pdf"}, {"id": "7c5dde400571fd357d1093e1829a8bd7917d8fcd", "title": "Retrospective Higher-Order Markov Processes for User Trails", "addresses": [{"address": "Purdue University", "lat": "40.43197220", "lng": "-86.92389368", "type": "edu"}], "year": "2017", "pdf": "https://arxiv.org/pdf/1704.05982.pdf"}, {"id": "9ded64e83d3ba51513ea00de27c0c770a02b0cf4", "title": "Image Classification using Transfer Learning from Siamese Networks based on Text Metadata Similarity", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/9ded/64e83d3ba51513ea00de27c0c770a02b0cf4.pdf"}, {"id": "7d621ec871a03a01f5aa65253e9ae6c8aadaf798", "title": "Converting Static Image Datasets to Spiking Neuromorphic Datasets Using Saccades", "addresses": [{"address": "National University of Singapore", "lat": "1.29620180", "lng": "103.77689944", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/fa2a/0fd5c5d5d3f14bf3875d531372ba6957748d.pdf"}, {"id": "33737f966cca541d5dbfb72906da2794c692b65b", "title": "Spotting Audio-Visual Inconsistencies (SAVI) in Manipulated Video", "addresses": [{"address": "SRI International", "lat": "37.45857960", "lng": "-122.17560525", "type": "edu"}, {"address": "University of Amsterdam", "lat": "52.35536550", "lng": "4.95016440", "type": "edu"}], "year": 2017, "pdf": "http://doi.ieeecomputersociety.org/10.1109/CVPRW.2017.238"}, {"id": "988aa2583c63ada43ca260dd8b5a4a543725a483", "title": "Choosing the Right Home Location Definition Method for the Given Dataset", "addresses": [{"address": "MIT", "lat": "42.35839610", "lng": "-71.09567788", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/988a/a2583c63ada43ca260dd8b5a4a543725a483.pdf"}, {"id": "ac9516a589901f1421e8ce905dd8bc5b689317ca", "title": "A Practical Framework for Executing Complex Queries over Encrypted Multimedia Data", "addresses": [{"address": "University of Texas at Dallas", "lat": "32.98207990", "lng": "-96.75662780", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/ac95/16a589901f1421e8ce905dd8bc5b689317ca.pdf"}, {"id": "d3008b4122e50a28f6cc1fa98ac6af28b42271ea", "title": "Searching Persuasively: Joint Event Detection and Evidence Recounting with Limited Supervision", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}, {"address": "University of Technology Sydney", "lat": "-33.88096510", "lng": "151.20107299", "type": "edu"}], "year": "2015", "pdf": "http://dl.acm.org/citation.cfm?id=2806218"}, {"id": "17e7a53456539dac2c9cf8631174c6388f64e24b", "title": "Learning to Detect Multiple Photographic Defects", "addresses": [{"address": "University of Virginia", "lat": "38.03536820", "lng": "-78.50353220", "type": "edu"}], "year": "2018", "pdf": "https://arxiv.org/pdf/1612.01635.pdf"}, {"id": "5d1ffb7ba3c53ecc5a90d40380ae235043c16344", "title": "On Label-Aware Community Search", "addresses": [{"address": "University of Hong Kong", "lat": "22.20814690", "lng": "114.25964115", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/5d1f/fb7ba3c53ecc5a90d40380ae235043c16344.pdf"}, {"id": "81f63e7344cc242416e37d791f7eb83ec2c07681", "title": "Multimodal Co-Training for Selecting Good Examples from Webly Labeled Video", "addresses": [{"address": "University of Tokyo", "lat": "35.90204480", "lng": "139.93622009", "type": "edu"}], "year": "2018", "pdf": "https://arxiv.org/pdf/1804.06057.pdf"}, {"id": "3ad6bd5c34b0866019b54f5976d644326069cb3d", "title": "Towards Next Generation Touring: Personalized Group Tours", "addresses": [{"address": "RMIT University", "lat": "-37.80874650", "lng": "144.96388750", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/3ad6/bd5c34b0866019b54f5976d644326069cb3d.pdf"}, {"id": "02b852e698dfe85df39c24e7dd39dedf484893dd", "title": "Collaborative Learning for Weakly Supervised Object Detection", "addresses": [{"address": "Shanghai Jiao Tong University", "lat": "31.20081505", "lng": "121.42840681", "type": "edu"}], "year": 2018, "pdf": "http://pdfs.semanticscholar.org/02b8/52e698dfe85df39c24e7dd39dedf484893dd.pdf"}, {"id": "187480101af3fb195993da1e2c17d917df24eb23", "title": "Unsupervised Visual Representation Learning by Context Prediction", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}, {"address": "University of California, Berkeley", "lat": "37.86871260", "lng": "-122.25586815", "type": "edu"}], "year": 2015, "pdf": "http://arxiv.org/pdf/1505.05192v2.pdf"}, {"id": "beeadf57a976f23f4fd6fa8a330eac6c81d3e3cd", "title": "ESGM : Event Enrichment and Summarization by Graph Model", "addresses": [{"address": "Hefei University of Technology", "lat": "31.84691800", "lng": "117.29053367", "type": "edu"}, {"address": "EURECOM", "lat": "43.61438600", "lng": "7.07112500", "type": "edu"}, {"address": "East China Normal University", "lat": "31.22849230", "lng": "121.40211389", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/beea/df57a976f23f4fd6fa8a330eac6c81d3e3cd.pdf"}, {"id": "171042ba12818238e3c0994ff08d71f8c28d4134", "title": "Learning to Describe E-Commerce Images from Noisy Online Data", "addresses": [{"address": "Tohoku University", "lat": "38.25309450", "lng": "140.87365930", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/1710/42ba12818238e3c0994ff08d71f8c28d4134.pdf"}, {"id": "8a8861ad6caedc3993e31d46e7de6c251a8cda22", "title": "StreetStyle: Exploring world-wide clothing styles from millions of photos", "addresses": [{"address": "Cornell University", "lat": "42.45055070", "lng": "-76.47835130", "type": "edu"}], "year": "2017", "pdf": "https://arxiv.org/pdf/1706.01869.pdf"}, {"id": "19d1855e021561d6da9d0200bb18e47f51cddda6", "title": "Visual Storytelling", "addresses": [{"address": "Microsoft", "lat": "47.64233180", "lng": "-122.13693020", "type": "company"}, {"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": "2016", "pdf": "https://arxiv.org/pdf/1604.03968.pdf"}, {"id": "0ae80aa149764e91544bbe45b80bb50434e7bda9", "title": "Ambient Sound Provides Supervision for Visual Learning", "addresses": [{"address": "MIT", "lat": "42.35839610", "lng": "-71.09567788", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/714c/21c575d2c02a51f2dd5250164f1269be44ca.pdf"}, {"id": "30193451e552286645baa00db7dcd05780d9e1da", "title": "On Available Corpora for Empirical Methods in Vision & Language", "addresses": [{"address": "Microsoft", "lat": "47.64233180", "lng": "-122.13693020", "type": "company"}], "year": "2015", "pdf": "https://pdfs.semanticscholar.org/3019/3451e552286645baa00db7dcd05780d9e1da.pdf"}, {"id": "16815ef660ef9e4091a81044d430591348df72ee", "title": "Combining Texture and Shape Cues for Object Recognition with Minimal Supervision", "addresses": [{"address": "Boston University", "lat": "42.35042530", "lng": "-71.10056114", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/1681/5ef660ef9e4091a81044d430591348df72ee.pdf"}, {"id": "2a2fd2538e19652721bc664f92056fbd08c604fd", "title": "Surveillance Video Analysis with External Knowledge and Internal Constraints", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/5042/096e3a80b14a6686014f338e0643f5270e65.pdf"}, {"id": "4576b59a44f75120f6a2d17a4e9c52e894297661", "title": "Learning Geo-Temporal Image Features", "addresses": [{"address": "University of Kentucky", "lat": "38.03337420", "lng": "-84.50177580", "type": "edu"}], "year": "2018", "pdf": "https://pdfs.semanticscholar.org/4576/b59a44f75120f6a2d17a4e9c52e894297661.pdf"}, {"id": "4cfd770ccecae1c0b4248bc800d7fd35c817bbbd", "title": "A Discriminative Feature Learning Approach for Deep Face Recognition", "addresses": [{"address": "Shenzhen Institutes of Advanced Technology", "lat": "22.59805605", "lng": "113.98533784", "type": "edu"}, {"address": "Chinese University of Hong Kong", "lat": "22.42031295", "lng": "114.20788644", "type": "edu"}], "year": "2016", "pdf": "https://pdfs.semanticscholar.org/8774/e206564df3bf9050f8c2be6b434cc2469c5b.pdf"}, {"id": "7f05df12dff3defee495507abd4870a0a30c3590", "title": "Placing Images with Refined Language Models and Similarity Search with PCA-reduced VGG Features", "addresses": [{"address": "Information Technologies Institute", "lat": "33.59345390", "lng": "130.35578370", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/7f05/df12dff3defee495507abd4870a0a30c3590.pdf"}, {"id": "b7b421be7c1dcbb8d41edb11180ba6ec87511976", "title": "A Deep Face Identification Network Enhanced by Facial Attributes Prediction", "addresses": [{"address": "West Virginia University", "lat": "39.65404635", "lng": "-79.96475355", "type": "edu"}], "year": "2018", "pdf": "https://arxiv.org/pdf/1805.00324.pdf"}, {"id": "7fda1edac608bc67e55ac3d7c9dc5a542d8f8aee", "title": "Multimodal Compact Bilinear Pooling for Visual Question Answering and Visual Grounding", "addresses": [{"address": "Max Planck Institute for Informatics", "lat": "49.25795660", "lng": "7.04577417", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/b742/8da870a9872ecdaa6feaaab43c0bcd136dd2.pdf"}]} \ No newline at end of file
+{"id": "a6e695ddd07aad719001c0fc1129328452385949", "paper": {"paper_id": "a6e695ddd07aad719001c0fc1129328452385949", "key": "yfcc_100m", "title": "The New Data and New Challenges in Multimedia Research", "year": "2015", "pdf": null, "address": "", "name": "YFCC100M"}, "address": null, "additional_papers": [], "citations": [{"id": "7d0ff6d0621b3846e8543bc162fd0215d8adfaf0", "title": "Efficient Large-Scale Similarity Search Using Matrix Factorization", "addresses": [{"address": "McGill University", "lat": "45.50397610", "lng": "-73.57496870", "type": "edu"}], "year": 2016, "pdf": "http://openaccess.thecvf.com/content_cvpr_2016/papers/Iscen_Efficient_Large-Scale_Similarity_CVPR_2016_paper.pdf"}, {"id": "8c192cd39f90eb8ff2969f8916ef8967607c5298", "title": "See, Hear, and Read: Deep Aligned Representations", "addresses": [{"address": "MIT", "lat": "42.35839610", "lng": "-71.09567788", "type": "edu"}], "year": 2017, "pdf": "http://pdfs.semanticscholar.org/9677/d2f6a994f598c1d631038d49401c5f707ee0.pdf"}, {"id": "b7c8452ac9791563d9a739bd079b05e518b20aea", "title": "Web Video in Numbers - An Analysis of Web-Video Metadata", "addresses": [{"address": "University of Basel", "lat": "47.56126510", "lng": "7.57529610", "type": "edu"}], "year": 2017, "pdf": "http://pdfs.semanticscholar.org/b7c8/452ac9791563d9a739bd079b05e518b20aea.pdf"}, {"id": "7060f6062ba1cbe9502eeaaf13779aa1664224bb", "title": "A Glimpse Far into the Future: Understanding Long-term Crowd Worker Quality", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2017, "pdf": "http://cs.stanford.edu/groups/vision/pdf/hata2017cscw.pdf"}, {"id": "258dda85eadcd2081d1e0131826aceac7f1e2415", "title": "Supervision Beyond Manual Annotations for Learning Visual Representations", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/e62d/40940a2711c7adca2857110272fb34d70576.pdf"}, {"id": "2577211aeaaa1f2245ddc379564813bee3d46c06", "title": "Seeing through the Human Reporting Bias: Visual Classifiers from Noisy Human-Centric Labels", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}, {"address": "Microsoft", "lat": "47.64233180", "lng": "-122.13693020", "type": "company"}], "year": 2016, "pdf": "http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Misra_Seeing_Through_the_CVPR_2016_paper.pdf"}, {"id": "405526dfc79de98f5bf3c97bf4aa9a287700f15d", "title": "MegaFace: A Million Faces for Recognition at Scale", "addresses": [{"address": "University of Washington", "lat": "47.65432380", "lng": "-122.30800894", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/8a6c/57fcd99a77982ec754e0b97fd67519ccb60c.pdf"}, {"id": "18fe63c013983bea53be7d559ef36a1f385ca6ea", "title": "Supervision Beyond Human Annotations for Learning Visual Representations", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/18fe/63c013983bea53be7d559ef36a1f385ca6ea.pdf"}, {"id": "629b1bdf4d96bb41f7d3fce5c7d5617515303b71", "title": "Diving Deeper into IM2GPS", "addresses": [{"address": "Georgia Institute of Technology", "lat": "33.77603300", "lng": "-84.39884086", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/629b/1bdf4d96bb41f7d3fce5c7d5617515303b71.pdf"}, {"id": "96e0cfcd81cdeb8282e29ef9ec9962b125f379b0", "title": "The MegaFace Benchmark: 1 Million Faces for Recognition at Scale", "addresses": [{"address": "University of Washington", "lat": "47.65432380", "lng": "-122.30800894", "type": "edu"}], "year": 2016, "pdf": "http://doi.ieeecomputersociety.org/10.1109/CVPR.2016.527"}, {"id": "d0ac9913a3b1784f94446db2f1fb4cf3afda151f", "title": "Exploiting Multi-modal Curriculum in Noisy Web Data for Large-scale Concept Learning", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/d0ac/9913a3b1784f94446db2f1fb4cf3afda151f.pdf"}, {"id": "18078e72bddefffc24a6e882790aca8531773bed", "title": "Sublinear scaling of country attractiveness observed from Flickr dataset", "addresses": [{"address": "New York University", "lat": "40.72925325", "lng": "-73.99625394", "type": "edu"}], "year": 2015, "pdf": "https://arxiv.org/pdf/1601.02306v1.pdf"}, {"id": "9677d2f6a994f598c1d631038d49401c5f707ee0", "title": "See, Hear, and Read: Deep Aligned Representations", "addresses": [{"address": "MIT", "lat": "42.35839610", "lng": "-71.09567788", "type": "edu"}], "year": "2017", "pdf": "https://arxiv.org/pdf/1706.00932.pdf"}, {"id": "1b6f3139b1e59b90ab1aaf978359229b75985b49", "title": "Learning with a Wasserstein Loss", "addresses": [{"address": "MIT", "lat": "42.35839610", "lng": "-71.09567788", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/847e/39b52a63a55fb94fff7ade1f90a7c67e508b.pdf"}, {"id": "ea985e35b36f05156f82ac2025ad3fe8037be0cd", "title": "CERTH/CEA LIST at MediaEval Placing Task 2015", "addresses": [{"address": "Information Technologies Institute", "lat": "33.59345390", "lng": "130.35578370", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/ea98/5e35b36f05156f82ac2025ad3fe8037be0cd.pdf"}, {"id": "518f3cb2c9f2481cdce7741c5a821c26378b75e9", "title": "The Unreasonable Effectiveness of Noisy Data for Fine-Grained Recognition", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/518f/3cb2c9f2481cdce7741c5a821c26378b75e9.pdf"}, {"id": "982ede05154c1afdcf6fc623ba45186a34f4b9f2", "title": "The Many Shades of Negativity", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}, {"address": "University of Technology Sydney", "lat": "-33.88096510", "lng": "151.20107299", "type": "edu"}, {"address": "University of Trento", "lat": "46.06588360", "lng": "11.11598940", "type": "edu"}], "year": 2017, "pdf": "https://doi.org/10.1109/TMM.2017.2659221"}, {"id": "5996001b797ab2a0f55d5355cb168f25bfe56bbd", "title": "Content-Based Video Search over 1 Million Videos with 1 Core in 1 Second", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2015, "pdf": "http://doi.acm.org/10.1145/2671188.2749398"}, {"id": "65c978a97f54cf255f01c6846d6c51b37c61f836", "title": "A Glimpse Far into the Future: Understanding Long-term Crowd Worker Accuracy", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/65c9/78a97f54cf255f01c6846d6c51b37c61f836.pdf"}, {"id": "301486e8dad7a41a1a99fd6fba28ce153fe1e56e", "title": "Are Elephants Bigger than Butterflies? Reasoning about Sizes of Objects", "addresses": [{"address": "University of Washington", "lat": "47.65432380", "lng": "-122.30800894", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/3014/86e8dad7a41a1a99fd6fba28ce153fe1e56e.pdf"}, {"id": "01a903739564f575b81c87f7a9e2cb7b609f7ada", "title": "Image retrieval using scene graphs", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2015, "pdf": "http://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Johnson_Image_Retrieval_Using_2015_CVPR_paper.pdf"}, {"id": "c5e37630d0672e4d44f7dee83ac2c1528be41c2e", "title": "Multi-task Deep Neural Network for Joint Face Recognition and Facial Attribute Prediction", "addresses": [{"address": "Fudan University", "lat": "31.30104395", "lng": "121.50045497", "type": "edu"}], "year": "2017", "pdf": "http://dl.acm.org/citation.cfm?id=3078973"}, {"id": "05818eddd8a35fed7f3041d591ef966f8e79bd9a", "title": "Web scale photo hash clustering on a single machine", "addresses": [{"address": "Facebook", "lat": "37.39367170", "lng": "-122.08072620", "type": "company"}], "year": 2015, "pdf": "http://www.cv-foundation.org/openaccess/content_cvpr_2015/app/1A_003_ext.pdf"}, {"id": "22954dd92a795d7f381465d1b353bcc41901430d", "title": "Learning Visual Storylines with Skipping Recurrent Neural Networks", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/3b04/f759e9b3c21defe2227374a008bec67751e3.pdf"}, {"id": "9bbc952adb3e3c6091d45d800e806d3373a52bac", "title": "Learning Visual Classifiers using Human-centric Annotations", "addresses": [{"address": "Microsoft", "lat": "47.64233180", "lng": "-122.13693020", "type": "company"}], "year": "2015", "pdf": "https://pdfs.semanticscholar.org/9bbc/952adb3e3c6091d45d800e806d3373a52bac.pdf"}, {"id": "2c761495cf3dd320e229586f80f868be12360d4e", "title": "Revisiting Unreasonable Effectiveness of Data in Deep Learning Era", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2017, "pdf": "http://arxiv.org/abs/1707.02968"}, {"id": "a9be20954e9177d8b2bc39747acdea4f5496f394", "title": "Event-Specific Image Importance", "addresses": [{"address": "University of California, San Diego", "lat": "32.87935255", "lng": "-117.23110049", "type": "edu"}], "year": 2016, "pdf": "http://acsweb.ucsd.edu/~yuw176/report/cvpr_2016.pdf"}, {"id": "256f09fe3163564958381d7f3727b5c27c19144c", "title": "Image2Emoji: Zero-shot Emoji Prediction for Visual Media", "addresses": [{"address": "University of Amsterdam", "lat": "52.35536550", "lng": "4.95016440", "type": "edu"}], "year": 2015, "pdf": "http://doi.acm.org/10.1145/2733373.2806335"}, {"id": "891433740bf6d318782c468638722aebf8bef2f5", "title": "Multi-Frame Video Super-Resolution Using Convolutional Neural Networks", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/8914/33740bf6d318782c468638722aebf8bef2f5.pdf"}, {"id": "85304f24f5a1800e66de20ad05e20c8c032b7d03", "title": "Understanding and Discovering Deliberate Self-harm Content in Social Media", "addresses": [{"address": "University of Washington", "lat": "47.65432380", "lng": "-122.30800894", "type": "edu"}], "year": 2017, "pdf": "http://pdfs.semanticscholar.org/8530/4f24f5a1800e66de20ad05e20c8c032b7d03.pdf"}, {"id": "35ec869dd0637c933d35ab823202c13b9b5d9aad", "title": "Effective Community Search for Large Attributed Graphs", "addresses": [{"address": "University of Hong Kong", "lat": "22.20814690", "lng": "114.25964115", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/4498/06bcb0987db60a0f8647380f9c335078fb46.pdf"}, {"id": "7c5dde400571fd357d1093e1829a8bd7917d8fcd", "title": "Retrospective Higher-Order Markov Processes for User Trails", "addresses": [{"address": "Purdue University", "lat": "40.43197220", "lng": "-86.92389368", "type": "edu"}], "year": "2017", "pdf": "https://arxiv.org/pdf/1704.05982.pdf"}, {"id": "9ded64e83d3ba51513ea00de27c0c770a02b0cf4", "title": "Image Classification using Transfer Learning from Siamese Networks based on Text Metadata Similarity", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/9ded/64e83d3ba51513ea00de27c0c770a02b0cf4.pdf"}, {"id": "7d621ec871a03a01f5aa65253e9ae6c8aadaf798", "title": "Converting Static Image Datasets to Spiking Neuromorphic Datasets Using Saccades", "addresses": [{"address": "National University of Singapore", "lat": "1.29620180", "lng": "103.77689944", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/fa2a/0fd5c5d5d3f14bf3875d531372ba6957748d.pdf"}, {"id": "33737f966cca541d5dbfb72906da2794c692b65b", "title": "Spotting Audio-Visual Inconsistencies (SAVI) in Manipulated Video", "addresses": [{"address": "SRI International", "lat": "37.45857960", "lng": "-122.17560525", "type": "edu"}, {"address": "University of Amsterdam", "lat": "52.35536550", "lng": "4.95016440", "type": "edu"}], "year": 2017, "pdf": "http://doi.ieeecomputersociety.org/10.1109/CVPRW.2017.238"}, {"id": "988aa2583c63ada43ca260dd8b5a4a543725a483", "title": "Choosing the Right Home Location Definition Method for the Given Dataset", "addresses": [{"address": "MIT", "lat": "42.35839610", "lng": "-71.09567788", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/988a/a2583c63ada43ca260dd8b5a4a543725a483.pdf"}, {"id": "ac9516a589901f1421e8ce905dd8bc5b689317ca", "title": "A Practical Framework for Executing Complex Queries over Encrypted Multimedia Data", "addresses": [{"address": "University of Texas at Dallas", "lat": "32.98207990", "lng": "-96.75662780", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/ac95/16a589901f1421e8ce905dd8bc5b689317ca.pdf"}, {"id": "d3008b4122e50a28f6cc1fa98ac6af28b42271ea", "title": "Searching Persuasively: Joint Event Detection and Evidence Recounting with Limited Supervision", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}, {"address": "University of Technology Sydney", "lat": "-33.88096510", "lng": "151.20107299", "type": "edu"}], "year": "2015", "pdf": "http://dl.acm.org/citation.cfm?id=2806218"}, {"id": "17e7a53456539dac2c9cf8631174c6388f64e24b", "title": "Learning to Detect Multiple Photographic Defects", "addresses": [{"address": "University of Virginia", "lat": "38.03536820", "lng": "-78.50353220", "type": "edu"}], "year": "2018", "pdf": "https://arxiv.org/pdf/1612.01635.pdf"}, {"id": "5d1ffb7ba3c53ecc5a90d40380ae235043c16344", "title": "On Label-Aware Community Search", "addresses": [{"address": "University of Hong Kong", "lat": "22.20814690", "lng": "114.25964115", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/5d1f/fb7ba3c53ecc5a90d40380ae235043c16344.pdf"}, {"id": "81f63e7344cc242416e37d791f7eb83ec2c07681", "title": "Multimodal Co-Training for Selecting Good Examples from Webly Labeled Video", "addresses": [{"address": "University of Tokyo", "lat": "35.90204480", "lng": "139.93622009", "type": "edu"}], "year": "2018", "pdf": "https://arxiv.org/pdf/1804.06057.pdf"}, {"id": "3ad6bd5c34b0866019b54f5976d644326069cb3d", "title": "Towards Next Generation Touring: Personalized Group Tours", "addresses": [{"address": "RMIT University", "lat": "-37.80874650", "lng": "144.96388750", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/3ad6/bd5c34b0866019b54f5976d644326069cb3d.pdf"}, {"id": "02b852e698dfe85df39c24e7dd39dedf484893dd", "title": "Collaborative Learning for Weakly Supervised Object Detection", "addresses": [{"address": "Shanghai Jiao Tong University", "lat": "31.20081505", "lng": "121.42840681", "type": "edu"}], "year": 2018, "pdf": "http://pdfs.semanticscholar.org/02b8/52e698dfe85df39c24e7dd39dedf484893dd.pdf"}, {"id": "187480101af3fb195993da1e2c17d917df24eb23", "title": "Unsupervised Visual Representation Learning by Context Prediction", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}, {"address": "University of California, Berkeley", "lat": "37.86871260", "lng": "-122.25586815", "type": "edu"}], "year": 2015, "pdf": "http://arxiv.org/pdf/1505.05192v2.pdf"}, {"id": "beeadf57a976f23f4fd6fa8a330eac6c81d3e3cd", "title": "ESGM : Event Enrichment and Summarization by Graph Model", "addresses": [{"address": "Hefei University of Technology", "lat": "31.84691800", "lng": "117.29053367", "type": "edu"}, {"address": "EURECOM", "lat": "43.61438600", "lng": "7.07112500", "type": "edu"}, {"address": "East China Normal University", "lat": "31.22849230", "lng": "121.40211389", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/beea/df57a976f23f4fd6fa8a330eac6c81d3e3cd.pdf"}, {"id": "171042ba12818238e3c0994ff08d71f8c28d4134", "title": "Learning to Describe E-Commerce Images from Noisy Online Data", "addresses": [{"address": "Tohoku University", "lat": "38.25309450", "lng": "140.87365930", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/1710/42ba12818238e3c0994ff08d71f8c28d4134.pdf"}, {"id": "8a8861ad6caedc3993e31d46e7de6c251a8cda22", "title": "StreetStyle: Exploring world-wide clothing styles from millions of photos", "addresses": [{"address": "Cornell University", "lat": "42.45055070", "lng": "-76.47835130", "type": "edu"}], "year": "2017", "pdf": "https://arxiv.org/pdf/1706.01869.pdf"}, {"id": "19d1855e021561d6da9d0200bb18e47f51cddda6", "title": "Visual Storytelling", "addresses": [{"address": "Microsoft", "lat": "47.64233180", "lng": "-122.13693020", "type": "company"}, {"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": "2016", "pdf": "https://arxiv.org/pdf/1604.03968.pdf"}, {"id": "0ae80aa149764e91544bbe45b80bb50434e7bda9", "title": "Ambient Sound Provides Supervision for Visual Learning", "addresses": [{"address": "MIT", "lat": "42.35839610", "lng": "-71.09567788", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/714c/21c575d2c02a51f2dd5250164f1269be44ca.pdf"}, {"id": "30193451e552286645baa00db7dcd05780d9e1da", "title": "On Available Corpora for Empirical Methods in Vision & Language", "addresses": [{"address": "Microsoft", "lat": "47.64233180", "lng": "-122.13693020", "type": "company"}], "year": "2015", "pdf": "https://pdfs.semanticscholar.org/3019/3451e552286645baa00db7dcd05780d9e1da.pdf"}, {"id": "16815ef660ef9e4091a81044d430591348df72ee", "title": "Combining Texture and Shape Cues for Object Recognition with Minimal Supervision", "addresses": [{"address": "Boston University", "lat": "42.35042530", "lng": "-71.10056114", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/1681/5ef660ef9e4091a81044d430591348df72ee.pdf"}, {"id": "2a2fd2538e19652721bc664f92056fbd08c604fd", "title": "Surveillance Video Analysis with External Knowledge and Internal Constraints", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/5042/096e3a80b14a6686014f338e0643f5270e65.pdf"}, {"id": "4576b59a44f75120f6a2d17a4e9c52e894297661", "title": "Learning Geo-Temporal Image Features", "addresses": [{"address": "University of Kentucky", "lat": "38.03337420", "lng": "-84.50177580", "type": "edu"}], "year": "2018", "pdf": "https://pdfs.semanticscholar.org/4576/b59a44f75120f6a2d17a4e9c52e894297661.pdf"}, {"id": "4cfd770ccecae1c0b4248bc800d7fd35c817bbbd", "title": "A Discriminative Feature Learning Approach for Deep Face Recognition", "addresses": [{"address": "Shenzhen Institutes of Advanced Technology", "lat": "22.59805605", "lng": "113.98533784", "type": "edu"}, {"address": "Chinese University of Hong Kong", "lat": "22.42031295", "lng": "114.20788644", "type": "edu"}], "year": "2016", "pdf": "https://pdfs.semanticscholar.org/8774/e206564df3bf9050f8c2be6b434cc2469c5b.pdf"}, {"id": "7f05df12dff3defee495507abd4870a0a30c3590", "title": "Placing Images with Refined Language Models and Similarity Search with PCA-reduced VGG Features", "addresses": [{"address": "Information Technologies Institute", "lat": "33.59345390", "lng": "130.35578370", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/7f05/df12dff3defee495507abd4870a0a30c3590.pdf"}, {"id": "b7b421be7c1dcbb8d41edb11180ba6ec87511976", "title": "A Deep Face Identification Network Enhanced by Facial Attributes Prediction", "addresses": [{"address": "West Virginia University", "lat": "39.65404635", "lng": "-79.96475355", "type": "edu"}], "year": "2018", "pdf": "https://arxiv.org/pdf/1805.00324.pdf"}, {"id": "7fda1edac608bc67e55ac3d7c9dc5a542d8f8aee", "title": "Multimodal Compact Bilinear Pooling for Visual Question Answering and Visual Grounding", "addresses": [{"address": "Max Planck Institute for Informatics", "lat": "49.25795660", "lng": "7.04577417", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/b742/8da870a9872ecdaa6feaaab43c0bcd136dd2.pdf"}]} \ No newline at end of file