site/datasets/final/yfcc_100m.json


1

{"id": "010f0f4929e6a6644fb01f0e43820f91d0fad292", "dataset": {"key": "yfcc_100m", "name_short": "YFCC100M", "using": "N", "ft_share": "1", "subset_of": "", "superset_of": "", "name_full": "Yahoo Flickr Creative Commons 100 Million Dataset", "url": "http://yfcc100m.appspot.com/", "added_on": "", "faces": "", "pdf_paper": "Y", "comments": "", "": "", "relevance": ""}, "statistics": {"key": "yfcc_100m", "name": "YFCC100M", "berit": "Y", "charlie": "", "adam": "", "priority": "", "wild": "Y", "indoor": "N", "outdoor": "N", "cyberspace": "Y", "names": "", "downloaded": "", "year_start": "", "year_end": "", "year_published": "2014", "ongoing": "", "images": "99,171,688 ", "videos": "787,479 ", "faces_unique": "", "total_faces": "", "img_per_person": "", "num_cameras": "", "faces_persons": "", "female": "", "male": "", "landmarks": "", "width": "", "height": "", "color": "Y", "gray": "", "derivative_of": "", "tags": "", "source": "yahoo, flickr", "purpose_short": "large scale dataset for multimedia research", "size_gb": "", "agreement": "", "agree_requied": "", "agreement_signed": "", "comment": "metadata includes tags, locations, camera, licence", "comment 2": "", "comment 3": "", "": ""}, "paper": {"paper_id": "010f0f4929e6a6644fb01f0e43820f91d0fad292", "key": "yfcc_100m", "title": "YFCC100M: the new data in multimedia research", "year": "2016", "pdf": ["https://arxiv.org/pdf/1503.01817.pdf"], "address": "", "name": "YFCC100M", "doi": []}, "address": null, "additional_papers": [], "citations": [{"id": "1cb5547c3f5ff42746bf9c4e083795aed3c8c609", "title": "Capped Lp-Norm Graph Embedding for Photo Clustering", "addresses": [{"address": "Northwestern Polytechnical University", "lat": "34.24691520", "lng": "108.91061982", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "890291bc2367e802fca871d67880bdc4a05374cf", "title": "Modeling Characteristics of Location from User Photos", "addresses": [{"address": "Facebook", "lat": "37.39367170", "lng": "-122.08072620", "type": "company"}, {"address": "University of Minnesota", "lat": "44.97308605", "lng": "-93.23708813", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "db63bf0ea02ad561d6b97c6cf000c3954e428787", "title": "Polyphonic Sound Event Detection with Weak Labeling", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/06fb/e175f8d34c6d8524e1a9357859bce230ae6a.pdf"]}, {"id": "e346aef68ae6df61b83fdeaa71ded9ba0282325e", "title": "Partially-Supervised Image Captioning", "addresses": [{"address": "Australian National University", "lat": "-35.27769990", "lng": "149.11852700", "type": "edu"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1806.06004.pdf"]}, {"id": "7d0ff6d0621b3846e8543bc162fd0215d8adfaf0", "title": "Efficient Large-Scale Similarity Search Using Matrix Factorization", "addresses": [{"address": "McGill University", "lat": "45.50397610", "lng": "-73.57496870", "type": "edu"}], "year": "2016", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2016/papers/Iscen_Efficient_Large-Scale_Similarity_CVPR_2016_paper.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Iscen_Efficient_Large-Scale_Similarity_CVPR_2016_paper.pdf"]}, {"id": "8a2e3453d5f88ce6ce73cc7731800cd512f95e64", "title": "Learning to Find Good Correspondences", "addresses": [{"address": "Graz University of Technology", "lat": "47.05821000", "lng": "15.46019568", "type": "edu"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.05971.pdf"]}, {"id": "e4ca0b623d81a6cfa4ad852e9ef931980e44dccc", "title": "Identifying concavity laws of attractiveness character for composite regions: Application through geo-tagged media check-ins", "addresses": [{"address": "Amity University", "lat": "26.85095965", "lng": "81.04950965", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "0f6090015ce5b19ff5b7f58ae849668bf04fa3cd", "title": "Location Extraction from Social Media: Geoparsing, Location Disambiguation, and Geotagging", "addresses": [{"address": "Information Technologies Institute", "lat": "33.59345390", "lng": "130.35578370", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "55077b3293fe5f699d09df0675c2318109b4b90d", "title": "Pooling Objects for Recognizing Scenes without Examples", "addresses": [{"address": "University of Amsterdam", "lat": "52.35536550", "lng": "4.95016440", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "37a5f93048cbfb74d122455531b4bc4f6e15d48f", "title": "Statistic Experience Based Adaptive One-Shot Detector (EAO) for Camera Sensing System", "addresses": [{"address": "Beijing University of Posts and Telecommunications", "lat": "39.96014880", "lng": "116.35193921", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/37a5/f93048cbfb74d122455531b4bc4f6e15d48f.pdf"]}, {"id": "f257e3ac714cd8fcd3b22d7d27ac6fab2db34097", "title": "Multimedia Big Data Analytics: A Survey", "addresses": [{"address": "University of Miami", "lat": "25.71733390", "lng": "-80.27866887", "type": "edu"}, {"address": "Florida International University", "lat": "25.75533775", "lng": "-80.37628897", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "b1398234454ee3c9bc5a20f6d2d00232cb79622c", "title": "Combining Low-Density Separators with CNNs", "addresses": [{"address": "Robotics Institute", "lat": "13.65450525", "lng": "100.49423171", "type": "edu"}], "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/b139/8234454ee3c9bc5a20f6d2d00232cb79622c.pdf"]}, {"id": "36091ff6b5d5a53d9641f5c3388b8c31b9ad4b49", "title": "Temporal Modular Networks for Retrieving Complex Compositional Activities in Videos", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/f1f4/5a961cd429d3257d98a9a7f803a2783f11a8.pdf"]}, {"id": "92fb2cb7f9a54360ea4442f902472aded5e88c74", "title": "Blackthorn: Large-Scale Interactive Multimodal Learning", "addresses": [{"address": "University of Amsterdam", "lat": "52.35536550", "lng": "4.95016440", "type": "edu"}], "year": "2018", "pdf": ["https://pure.itu.dk/portal/files/82406569/tmm_2017_blackthorn.pdf"]}, {"id": "2e3c8c8a413a317295bd44d86d089ed70a0b0c29", "title": "Introducing Concept And Syntax Transition Networks for Image Captioning", "addresses": [{"address": "University of Kaiserslautern", "lat": "49.42537740", "lng": "7.75554960", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "666300af8ffb8c903223f32f1fcc5c4674e2430b", "title": "Changing Fashion Cultures", "addresses": [{"address": "Tokyo Denki University", "lat": "35.65729570", "lng": "139.54255868", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1703.07920.pdf"]}, {"id": "3dfb822e16328e0f98a47209d7ecd242e4211f82", "title": "Cross-Age LFW: A Database for Studying Cross-Age Face Recognition in Unconstrained Environments", "addresses": [{"address": "Beijing University of Posts and Telecommunications", "lat": "39.96014880", "lng": "116.35193921", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1708.08197.pdf"]}, {"id": "866aec1ac5e1242132b4dd7b2c9a2b70ca8a4583", "title": "DCAR: A Discriminative and Compact Audio Representation for Audio Processing", "addresses": [{"address": "Beijing Jiaotong University", "lat": "39.94976005", "lng": "116.33629046", "type": "edu"}], "year": "2017", "pdf": ["https://www.stat.berkeley.edu/~mmahoney/pubs/DCAR_07927482.pdf"]}, {"id": "5ed63317cdef429f77499d9de0e58402ed1f687e", "title": "From Photo Streams to Evolving Situations", "addresses": [{"address": "Northwestern Polytechnical University", "lat": "34.24691520", "lng": "108.91061982", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.05878.pdf"]}, {"id": "0e0a927a7fbcd2061b3b27d3f1ca2fe5d95071c7", "title": "On Allocating Cache Resources to Content Providers", "addresses": [{"address": "Northwestern Polytechnical University", "lat": "34.24691520", "lng": "108.91061982", "type": "edu"}, {"address": "University of Massachusetts", "lat": "42.38897850", "lng": "-72.52869870", "type": "edu"}, {"address": "University of Minnesota", "lat": "44.97308605", "lng": "-93.23708813", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "fbdfed192ea0e6508d931b3d00335d99e976c0d7", "title": "From Selective Deep Convolutional Features to Compact Binary Representations for Image Retrieval", "addresses": [{"address": "University of Adelaide", "lat": "-34.91892260", "lng": "138.60423668", "type": "edu"}, {"address": "Singapore University of Technology and Design", "lat": "1.34021600", "lng": "103.96508900", "type": "edu"}, {"address": "Queensland University of Technology", "lat": "-27.47715625", "lng": "153.02841004", "type": "edu"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1802.02899.pdf"]}, {"id": "65429789a95b3026457de76d46b5ec94158ce10e", "title": "PPR-FCN: Weakly Supervised Visual Relation Detection via Parallel Pairwise R-FCN", "addresses": [{"address": "Columbia University", "lat": "40.84198360", "lng": "-73.94368971", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1708.01956.pdf"]}, {"id": "edfc222d66267709b66440c17e66fb08c69f2ba6", "title": "Global multi-layer network of human mobility", "addresses": [{"address": "MIT", "lat": "42.35839610", "lng": "-71.09567788", "type": "edu"}, {"address": "New York University", "lat": "40.72925325", "lng": "-73.99625394", "type": "edu"}, {"address": "University of Salzburg", "lat": "47.79475945", "lng": "13.05417525", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1601.05532.pdf"]}, {"id": "1e54025a6b399bfc210a52a8c3314e8f570c2204", "title": "DenseCap: Fully Convolutional Localization Networks for Dense Captioning", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": "2016", "pdf": ["https://arxiv.org/pdf/1511.07571.pdf"]}, {"id": "ceac30061d8f7985987448f4712c49eeb98efad2", "title": "MemexQA: Visual Memex Question Answering", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1708.01336.pdf"]}, {"id": "0a100cb8a1accb69905374ef35c13f36c073873e", "title": "Multimodal Analysis of User-Generated Multimedia Content", "addresses": [{"address": "National University of Singapore", "lat": "1.29620180", "lng": "103.77689944", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "28b725d03845312c619e79932f3edbb47356e252", "title": "Multimodal-based Multimedia Analysis, Retrieval, and Services in Support of Social Media Applications", "addresses": [{"address": "National University of Singapore", "lat": "1.29620180", "lng": "103.77689944", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "1957bf2457f4dbac1bc226a16a751ad8f00705d5", "title": "Robust bundle adjustment for large-scale structure from motion", "addresses": [{"address": "Hefei University of Technology", "lat": "31.84691800", "lng": "117.29053367", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "142b46642dd46566f7be8c6263dfc6bf13a8b0dd", "title": "S2L: Single-Streamline For Complex Video Event Detection", "addresses": [{"address": "Chinese Academy of Sciences", "lat": "40.00447950", "lng": "116.37023800", "type": "edu"}, {"address": "Communication University of China", "lat": "39.91199955", "lng": "116.55189141", "type": "edu"}, {"address": "University of Chinese Academy of Sciences", "lat": "39.90828040", "lng": "116.24585270", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "7559688658fbfcfea6829ce48ff6583d1c3439f9", "title": "Capacity-aware Sequential Recommendations", "addresses": [{"address": "Delft University of Technology", "lat": "51.99882735", "lng": "4.37396037", "type": "edu"}], "year": "2018", "pdf": ["http://ifaamas.org/Proceedings/aamas2018/pdfs/p416.pdf", "https://pure.tudelft.nl/portal/files/44976871/De_Nijs_et_al._2018_Capacity_aware_Sequential_Recommendations.pdf"]}, {"id": "db989600b1857cea9abd14dba9c10808030c7d33", "title": "Delving Deep into Personal Photo and Video Search", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}, {"address": "Michigan State University", "lat": "42.71856800", "lng": "-84.47791571", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "cbd0f4006df1b2661f2c3a711d95727d61756afe", "title": "Multimodal Classification of Moderated Online Pro-Eating Disorder Content", "addresses": [{"address": "Georgia Institute of Technology", "lat": "33.77603300", "lng": "-84.39884086", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "df9a08016fa553a169d893ce2d3fca375bab4781", "title": "Partially-Supervised Image Captioning", "addresses": [{"address": "Australian National University", "lat": "-35.27769990", "lng": "149.11852700", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/df9a/08016fa553a169d893ce2d3fca375bab4781.pdf"]}, {"id": "2ccb12140ffc642010b448e7b6f77b3f57cea497", "title": "Adaptive City Characteristics: How Location Familiarity Changes What Is Regionally Descriptive", "addresses": [{"address": "University of Minnesota", "lat": "44.97308605", "lng": "-93.23708813", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "247a14b584d8d83a9191149011de2908a8da605b", "title": "State of the Geotags: Motivations and Recent Changes", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/247a/14b584d8d83a9191149011de2908a8da605b.pdf"]}, {"id": "75b32783a098b3173d44f27f7b8329b6d48f369e", "title": "Scaling of foreign attractiveness for countries and states", "addresses": [{"address": "MIT", "lat": "42.35839610", "lng": "-71.09567788", "type": "edu"}, {"address": "New York University", "lat": "40.72925325", "lng": "-73.99625394", "type": "edu"}], "year": "2016", "pdf": ["https://arxiv.org/pdf/1606.08132.pdf"]}, {"id": "58d96262dff8f28e59e66491ffb7f36f28af07a3", "title": "Quartet-net Learning for Visual Instance Retrieval", "addresses": [{"address": "University of Queensland", "lat": "-27.49741805", "lng": "153.01316956", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "6f97f9959015cb0e93fba000ae468d22b4c5078a", "title": "A Discriminative and Compact Audio Representation for Event Detection", "addresses": [{"address": "Beijing Jiaotong University", "lat": "39.94976005", "lng": "116.33629046", "type": "edu"}, {"address": "Delft University of Technology", "lat": "51.99882735", "lng": "4.37396037", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "612da08d1dd16ac0a3b61151c6cf40b1e95b50c3", "title": "Hybrid Indexes to Expedite Spatial-Visual Search", "addresses": [{"address": "University of Southern California", "lat": "34.02241490", "lng": "-118.28634407", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.05200.pdf"]}, {"id": "0e2e4b2deee91884d6da21c06e609b1f61270f70", "title": "How Data Mining and Machine Learning Evolved from Relational Data Base to Data Science", "addresses": [{"address": "University of Pisa", "lat": "43.72012990", "lng": "10.40789760", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/0e2e/4b2deee91884d6da21c06e609b1f61270f70.pdf"]}, {"id": "788da403d220e2cc08dca9cffbe1f84b3c68469a", "title": "Causally Regularized Learning with Agnostic Data Selection Bias.", "addresses": [{"address": "Tsinghua University", "lat": "40.00229045", "lng": "116.32098908", "type": "edu"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1708.06656.pdf"]}, {"id": "d3b3c82d73fac683aa23533d26240ec92d14352b", "title": "Fine-grained Video Attractiveness Prediction Using Multimodal Deep Learning on a Large Real-world Dataset", "addresses": [{"address": "Wuhan University of Technology", "lat": "30.60903415", "lng": "114.35142840", "type": "edu"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.01373.pdf"]}, {"id": "4ae7fb256d3251019f570776dfe34e8d234c6fdf", "title": "Exploiting social media information toward a context-aware recommendation system", "addresses": [{"address": "Ionian University", "lat": "38.28994820", "lng": "21.78864690", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "a5628b377efcd69043b158388203502e41266852", "title": "Hybrid Indexes for Spatial-Visual Search", "addresses": [{"address": "University of Southern California", "lat": "34.02241490", "lng": "-118.28634407", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "1fefb2f8dd1efcdb57d5c2966d81f9ab22c1c58d", "title": "vExplorer: A Search Method to Find Relevant YouTube Videos for Health Researchers", "addresses": [{"address": "IBM Research, North Carolina", "lat": "35.90422720", "lng": "-78.85565763", "type": "company"}], "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/1fef/b2f8dd1efcdb57d5c2966d81f9ab22c1c58d.pdf"]}, {"id": "a50d2245d46ce0595ddbf25ae9acb8513aa70067", "title": "Text-to-Clip Video Retrieval with Early Fusion and Re-Captioning", "addresses": [{"address": "Boston University", "lat": "42.35042530", "lng": "-71.10056114", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/acd2/1d21ca466efb16b43ac9d647e99220fd6ddd.pdf"]}, {"id": "4d62d302f7c2ee8a9bf53ddeda881d686bcc9fe4", "title": "Snap Decisions?: How Users, Content, and Aesthetics Interact to Shape Photo Sharing Behaviors", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "d50e47c59655d0274d60386c6627e2571242a121", "title": "Who are happier? Spatio-temporal Analysis of Worldwide Human Emotion Based on Geo-Crowdsourcing Faces", "addresses": [{"address": "Wuhan University of Technology", "lat": "30.60903415", "lng": "114.35142840", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "0f885fd46064d271d4404cf9bb3d758e1a6f8d55", "title": "Exploring the Limits of Weakly Supervised Pretraining", "addresses": [{"address": "Facebook", "lat": "37.39367170", "lng": "-122.08072620", "type": "company"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1805.00932.pdf"]}, {"id": "c92d6fdcb16eb547b3794a2d60cf04328fdd2344", "title": "Event Detection and Retrieval on Social Media", "addresses": [{"address": "Aristotle University of Thessaloniki", "lat": "40.62984145", "lng": "22.95889350", "type": "edu"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.03675.pdf"]}, {"id": "ea7d9a72e5f804567da3e9d937bd7d240c546d06", "title": "Personalized trip recommendation for tourists based on user interests, points of interest visit durations and visit recency", "addresses": [{"address": "RMIT University", "lat": "-37.80874650", "lng": "144.96388750", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "06b94b47b70c9bfcddbe277a61a9a802acd20096", "title": "Future-Supervised Retrieval of Unseen Queries for Live Video", "addresses": [{"address": "University of Amsterdam", "lat": "52.35536550", "lng": "4.95016440", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "788b16995a689a918b7994977350e7db13ef96a4", "title": "Efficient Conversion of Deep Features to Compact Binary Codes Using Fourier Decomposition for Multimedia Big Data", "addresses": [{"address": "Sejong University", "lat": "37.55025960", "lng": "127.07313900", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "830a532ca99fb9d8527dcfbac2d4003b4dd98eb7", "title": "Scheduling Large-scale Distributed Training via Reinforcement Learning", "addresses": [{"address": "Chinese University of Hong Kong", "lat": "22.42031295", "lng": "114.20788644", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "4d1aaf1027231979bb0cad6dc3e31e01925970cc", "title": "The Helmholtz Method: Using Perceptual Compression to Reduce Machine Learning Complexity", "addresses": [{"address": "University of California, Berkeley", "lat": "37.86871260", "lng": "-122.25586815", "type": "edu"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.10569.pdf"]}, {"id": "57582fd607c8aad7245b497a6a68bc3a1901ef08", "title": "Neural Personalized Ranking for Image Recommendation", "addresses": [{"address": "Texas A&M University", "lat": "30.61083650", "lng": "-96.35212800", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "821382c28f4a2f047adcbe7e7b55e947de8f6e97", "title": "A Comparative Study of Global and Deep Features for the Analysis of User-Generated Natural Disaster Related Images", "addresses": [{"address": "University of Trento", "lat": "46.06588360", "lng": "11.11598940", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "0a55824aa80e5a06af5bf8ba089b19db245471f1", "title": "Quantifying Tourist Behavior Patterns by Travel Motifs and Geo-Tagged Photos from Flickr", "addresses": [{"address": "Wuhan University of Technology", "lat": "30.60903415", "lng": "114.35142840", "type": "edu"}, {"address": "Peking University", "lat": "39.99223790", "lng": "116.30393816", "type": "edu"}, {"address": "Hong Kong Polytechnic University", "lat": "22.30457200", "lng": "114.17976285", "type": "edu"}], "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/0a55/824aa80e5a06af5bf8ba089b19db245471f1.pdf"]}, {"id": "90eb833df9614da495712f4c1fbb65f8e7d9b356", "title": "Improved Scoring Models for Semantic Image Retrieval Using Scene Graphs", "addresses": [{"address": "Portland State University", "lat": "45.51181205", "lng": "-122.68492999", "type": "edu"}], "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/c12d/09f36feaa03a533d87eb3ceef5bc76989f05.pdf"]}, {"id": "9fb32f4f332b6acb8eafa418b45650e54796f0a7", "title": "A Data-Centric Approach for Image Scene Localization", "addresses": [{"address": "University of Southern California", "lat": "34.02241490", "lng": "-118.28634407", "type": "edu"}], "year": "2018", "pdf": ["https://infolab.usc.edu/DocsDemos/IEEE_BigData_ImageSceneLocalization.pdf"]}, {"id": "37380f8247e7ddd77f0105914d97eef849b9dc47", "title": "CUImage: A Neverending Learning Platform on a Convolutional Knowledge Graph of Billion Web Images", "addresses": [{"address": "Chinese University of Hong Kong", "lat": "22.42031295", "lng": "114.20788644", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "e132b98d91d0f07a95e3d3165ebb10c8d1d7fe49", "title": "Interactive Multimodal Learning on 100 Million Images", "addresses": [{"address": "University of Amsterdam", "lat": "52.35536550", "lng": "4.95016440", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "c1de9803bd0072f017a97c0503308a7b9b4f211b", "title": "Analysis of Spatial, Temporal, and Content Characteristics of Videos in the YFCC100M Dataset", "addresses": [{"address": "Yonsei University", "lat": "37.56004060", "lng": "126.93692480", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "5ad42766d7ba40ceccd8047f6810b9cf6ac79748", "title": "Automatic Data Augmentation from Massive Web Images for Deep Visual Recognition", "addresses": [{"address": "Harbin Institute of Technology", "lat": "45.74139210", "lng": "126.62552755", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "86973c8c9adef3b6a36c31c2682f2179e3013ae1", "title": "Active Learning from Noisy Tagged Images", "addresses": [{"address": "University of Adelaide", "lat": "-34.91892260", "lng": "138.60423668", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/8697/3c8c9adef3b6a36c31c2682f2179e3013ae1.pdf"]}, {"id": "968ab65077c4be1c1071120052b2e4b4f3d3c59a", "title": "\"Seeing is believing: the quest for multimodal knowledge\" by Gerard de Melo and Niket Tandon, with Martin Vesely as coordinator", "addresses": [{"address": "Tsinghua University", "lat": "40.00229045", "lng": "116.32098908", "type": "edu"}, {"address": "Max Planck Institute for Informatics", "lat": "49.25795660", "lng": "7.04577417", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "983f695c0ae44632182184f81579872f0353c6bc", "title": "V3C - A Research Video Collection", "addresses": [{"address": "University of Basel", "lat": "47.56126510", "lng": "7.57529610", "type": "edu"}, {"address": "National Institute of Standards and Technology", "lat": "39.12549380", "lng": "-77.22293475", "type": "edu"}], "year": "2019", "pdf": ["https://arxiv.org/pdf/1810.04401.pdf"]}, {"id": "de09cb5eeff5e7a752567c009f7621dfa6ebb2da", "title": "Generating Affective Captions using Concept And Syntax Transition Networks", "addresses": [{"address": "University of Kaiserslautern", "lat": "49.42537740", "lng": "7.75554960", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "4a29bc00d1bc944fffe510713e3e8f2206c33fd1", "title": "Self-supervised Segmentation by Grouping Optical-Flow", "addresses": [{"address": "University of Oxford", "lat": "51.75345380", "lng": "-1.25400997", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/4a29/bc00d1bc944fffe510713e3e8f2206c33fd1.pdf"]}, {"id": "2874e3b4bde2203129773f1a7baf78f0710c490e", "title": "Selective Deep Convolutional Features for Image Retrieval", "addresses": [{"address": "Singapore University of Technology and Design", "lat": "1.34021600", "lng": "103.96508900", "type": "edu"}, {"address": "University of Adelaide", "lat": "-34.91892260", "lng": "138.60423668", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.00809.pdf"]}, {"id": "52516cf440eaf57966bfef2a6cc800163bd85793", "title": "Rethinking Summarization and Storytelling for Modern Social Multimedia", "addresses": [{"address": "University of Amsterdam", "lat": "52.35536550", "lng": "4.95016440", "type": "edu"}, {"address": "National University of Singapore", "lat": "1.29620180", "lng": "103.77689944", "type": "edu"}, {"address": "EURECOM", "lat": "43.61438600", "lng": "7.07112500", "type": "edu"}, {"address": "Aalto University", "lat": "60.18558755", "lng": "24.82427330", "type": "edu"}, {"address": "Dublin City University", "lat": "53.38522185", "lng": "-6.25740874", "type": "edu"}, {"address": "Australian National University", "lat": "-35.27769990", "lng": "149.11852700", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/5251/6cf440eaf57966bfef2a6cc800163bd85793.pdf"]}, {"id": "d32691ba18767cd7b41fc1dda81c6d6b3e07ac4b", "title": "Beyond Concept Detection: The Potential of User Intent for Image Retrieval", "addresses": [{"address": "Delft University of Technology", "lat": "51.99882735", "lng": "4.37396037", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "5674ace2c666f6af53a2a58279ade6ebd271e8c7", "title": "Exploiting Visual-based Intent Classification for Diverse Social Image Retrieval", "addresses": [{"address": "Delft University of Technology", "lat": "51.99882735", "lng": "4.37396037", "type": "edu"}], "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/5e11/24345969a536fd5fa78db05b6149ea262a69.pdf"]}, {"id": "76908c01966d1656a13de779955aded5fc1fed32", "title": "Unsupervised Deep Video Hashing via Balanced Code for Large-Scale Video Retrieval", "addresses": [{"address": "Inception Institute of Artificial Intelligence, Abu Dhabi, UAE", "lat": "24.45388400", "lng": "54.37734380", "type": "edu"}, {"address": "Lancaster University", "lat": "54.00975365", "lng": "-2.78757491", "type": "edu"}, {"address": "Tsinghua University", "lat": "40.00229045", "lng": "116.32098908", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "fc8fb68a7e3b79c37108588671c0e1abf374f501", "title": "Semantic Pooling for Complex Event Analysis in Untrimmed Videos", "addresses": [{"address": "University of Technology Sydney", "lat": "-33.88096510", "lng": "151.20107299", "type": "edu"}, {"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": "2017", "pdf": ["https://cs.uwaterloo.ca/~y328yu/mypapers/pami17.pdf"]}, {"id": "632d9b67c474aab18dc960d8a251b2030ad59bc1", "title": "Binarized Mode Seeking for Scalable Visual Pattern Discovery", "addresses": [{"address": "Chinese Academy of Sciences", "lat": "40.00447950", "lng": "116.37023800", "type": "edu"}, {"address": "University of Chinese Academy of Sciences", "lat": "39.90828040", "lng": "116.24585270", "type": "edu"}], "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2017/papers/Zhang_Binarized_Mode_Seeking_CVPR_2017_paper.pdf"]}, {"id": "a019131087eaca6d33f43d9b38d9bd177f89855c", "title": "User-Generated Content in Social Media", "addresses": [{"address": "National University of Singapore", "lat": "1.29620180", "lng": "103.77689944", "type": "edu"}, {"address": "University of Tampere", "lat": "61.49412325", "lng": "23.77920678", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/a019/131087eaca6d33f43d9b38d9bd177f89855c.pdf"]}, {"id": "c3a79349476d81cc6b7b304fd4c5cd7b5f65efb9", "title": "PQk-means: Billion-scale Clustering for Product-quantized Codes", "addresses": [{"address": "National Institute of Informatics, Japan", "lat": "35.69248530", "lng": "139.75825330", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1709.03708.pdf"]}, {"id": "bc6830fd4ac9307ddd323c5f92627fd42ac02ff5", "title": "Video Description Generation using Audio and Visual Cues", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "36631dcbb9452ea3d35b19b2de6ef709022531a6", "title": "TRECVID 2016 : Evaluating Video Search , Video Event Detection , Localization , and Hyperlinking", "addresses": [{"address": "National Institute of Standards and Technology", "lat": "39.12549380", "lng": "-77.22293475", "type": "edu"}, {"address": "Dublin City University", "lat": "53.38522185", "lng": "-6.25740874", "type": "edu"}, {"address": "University of Twente", "lat": "52.23801390", "lng": "6.85667610", "type": "edu"}, {"address": "EURECOM", "lat": "43.61438600", "lng": "7.07112500", "type": "edu"}], "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/0109/93ae9742f7f4c40763a25ded237723de60b5.pdf"]}, {"id": "aa246dd3999021ec733a56c3d4b7b6772ba007f8", "title": "Beautiful...but at What Cost?: An Examination of Externalities in Geographic Vehicle Routing", "addresses": [{"address": "Northwestern University", "lat": "42.05511640", "lng": "-87.67581113", "type": "edu"}, {"address": "University of Minnesota", "lat": "44.97308605", "lng": "-93.23708813", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "841855205818d3a6d6f85ec17a22515f4f062882", "title": "Low Resolution Face Recognition in the Wild", "addresses": [{"address": "University of Notre Dame", "lat": "41.70456775", "lng": "-86.23822026", "type": "edu"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1805.11529.pdf"]}, {"id": "4ab023f9bac5c85b829be95c905b6d5dc51820c2", "title": "Discovering physics and design trends from visual temporal structures", "addresses": [{"address": "Brown University", "lat": "41.82686820", "lng": "-71.40123146", "type": "edu"}, {"address": "MIT", "lat": "42.35839610", "lng": "-71.09567788", "type": "edu"}], "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/4ab0/23f9bac5c85b829be95c905b6d5dc51820c2.pdf"]}, {"id": "b3e50a64709a62628105546e392cf796f95ea0fb", "title": "Clustering via Boundary Erosion", "addresses": [{"address": "Xiamen University", "lat": "24.43994190", "lng": "118.09301781", "type": "edu"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.04312.pdf"]}, {"id": "7a0e9de471b25252afbf8cec566d4962538c5b78", "title": "Right inflight?: a dataset for exploring the automatic prediction of movies suitable for a watching situation", "addresses": [{"address": "University of Oslo", "lat": "59.93891665", "lng": "10.72170765", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "ba7890a5e9e51bf6181cc3c03144796cb3e5e254", "title": "Query : \" A man in a red sweatshirt performing breakdance \"", "addresses": [{"address": "Max Planck Institute for Informatics", "lat": "49.25795660", "lng": "7.04577417", "type": "edu"}, {"address": "University of California, Berkeley", "lat": "37.86871260", "lng": "-122.25586815", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/ba78/90a5e9e51bf6181cc3c03144796cb3e5e254.pdf"]}, {"id": "6ecb8f65ba1ede4877d75979b1540503e568303c", "title": "OmniArt: A Large-scale Artistic Benchmark", "addresses": [{"address": "University of Amsterdam", "lat": "52.35536550", "lng": "4.95016440", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "cd7b872365cc5b5114e0ccb833e307e874a5c3aa", "title": "Video Object Segmentation with Language Referring Expressions", "addresses": [{"address": "Max Planck Institute for Informatics", "lat": "49.25795660", "lng": "7.04577417", "type": "edu"}, {"address": "University of California, Berkeley", "lat": "37.86871260", "lng": "-122.25586815", "type": "edu"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.08006.pdf"]}, {"id": "b44999bb2e23cf8ca0a413a2d006cc9800794650", "title": "More Than An Answer: Neural Pivot Network for Visual Qestion Answering", "addresses": [{"address": "Xiamen University", "lat": "24.43994190", "lng": "118.09301781", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "9d6993f60539d30ee325138b3465aa020fa3bcb4", "title": "Analysis and Optimization of fastText Linear Text Classifier", "addresses": [{"address": "IBM Thomas J. Watson Research Center", "lat": "41.21002475", "lng": "-73.80407056", "type": "company"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.05531.pdf"]}, {"id": "1415c27bf7ddf4137d45f446cae6edfc9740919e", "title": "Few-Shot Hash Learning for Image Retrieval", "addresses": [{"address": "Carnegie Mellon University", "lat": "37.41021930", "lng": "-122.05965487", "type": "edu"}], "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_ICCV_2017_workshops/papers/w18/Wang_Few-Shot_Hash_Learning_ICCV_2017_paper.pdf", "https://ri.cmu.edu/wp-content/uploads/2017/12/45.pdf"]}, {"id": "8a5be2b370c5a1df06e1063b306b2874706c24dc", "title": "The Konstanz natural video database (KoNViD-1k)", "addresses": [{"address": "University of Surrey", "lat": "51.24303255", "lng": "-0.59001382", "type": "edu"}], "year": "2017", "pdf": ["http://epubs.surrey.ac.uk/814067/1/konstanz-natural-video.pdf", "https://kops.uni-konstanz.de/bitstream/handle/123456789/39103/Hosu_2-e7mh9z8d8u09.pdf?isAllowed=y&sequence=1", "https://www.uni-konstanz.de/mmsp/pubsys/publishedFiles/HoHaJe17.pdf"]}, {"id": "353a058f49a8c60b41e9cd28962f6060cf4f6646", "title": "The research of multimedia cloud computing platform data dynamic task scheduling optimization method in multi core environment", "addresses": [{"address": "Jilin University", "lat": "22.05356500", "lng": "113.39913285", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "aa6e8a2a9d3ed59d2ae72add84176e7b7f4b2912", "title": "ORCHARD: Visual object recognition accelerator based on approximate in-memory processing", "addresses": [{"address": "University of California, San Diego", "lat": "32.87935255", "lng": "-117.23110049", "type": "edu"}], "year": "2017", "pdf": ["http://moimani.weebly.com/uploads/2/3/8/6/23860882/iccad17_orchard.pdf", "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-final8.pdf", "http://seelab.ucsd.edu/papers/ykim_2017_orchard.pdf"]}, {"id": "3349fab3a5e2b7c5194289bdc839f87f556ea3ef", "title": "Social Image Tags as a Source of Word Embeddings: A Task-oriented Evaluation", "addresses": [{"address": "Waseda University", "lat": "33.88987280", "lng": "130.70856205", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/3349/fab3a5e2b7c5194289bdc839f87f556ea3ef.pdf"]}, {"id": "a9e19e8ab24071a085d1273b9f9d49aa0e4ba48c", "title": "VizWiz Grand Challenge: Answering Visual Questions from Blind People", "addresses": [{"address": "University of Texas at Austin", "lat": "30.28415100", "lng": "-97.73195598", "type": "edu"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1802.08218.pdf"]}, {"id": "bf7f554ec8f5f44e9c942237effd282a16efecf6", "title": "SALIC: Social Active Learning for Image Classification", "addresses": [{"address": "University of Surrey", "lat": "51.24303255", "lng": "-0.59001382", "type": "edu"}, {"address": "Information Technologies Institute", "lat": "33.59345390", "lng": "130.35578370", "type": "edu"}], "year": "2016", "pdf": ["http://epubs.surrey.ac.uk/812487/1/double.pdf"]}, {"id": "2ce4e06a9fe107ff29a34ed4a8771222cbaacc9c", "title": "Learning Points and Routes to Recommend Trajectories", "addresses": [{"address": "Australian National University", "lat": "-35.27769990", "lng": "149.11852700", "type": "edu"}], "year": "2016", "pdf": ["https://arxiv.org/pdf/1608.07051.pdf"]}, {"id": "cb8a1b8d87a3fef15635eb4a32173f9c6f966055", "title": "A Survey on Deep Learning: Algorithms, Techniques, and Applications", "addresses": [{"address": "Florida International University", "lat": "25.75533775", "lng": "-80.37628897", "type": "edu"}, {"address": "University of Miami", "lat": "25.71733390", "lng": "-80.27866887", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "719969807953d7ea8bda0397b1aadbaa6e205718", "title": "Automatic Dataset Augmentation", "addresses": [{"address": "Harbin Institute of Technology", "lat": "45.74139210", "lng": "126.62552755", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1708.08201.pdf"]}, {"id": "b83fa4fe617db200defe21a07f81aa8060587217", "title": "Attention and Localization based on a Deep Convolutional Recurrent Model for Weakly Supervised Audio Tagging", "addresses": [{"address": "University of Surrey", "lat": "51.24303255", "lng": "-0.59001382", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1703.06052.pdf"]}, {"id": "f765c628c5483d7ad974a8605fdd3c7b4010ded2", "title": "Integrating Online and Offline Three-Dimensional Deep Learning for Automated Polyp Detection in Colonoscopy Videos", "addresses": [{"address": "Hong Kong Polytechnic University", "lat": "22.30457200", "lng": "114.17976285", "type": "edu"}, {"address": "Chinese University of Hong Kong", "lat": "22.42031295", "lng": "114.20788644", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "774ae9c6b2a83c6891b5aeeb169cfd462d45f715", "title": "Learning from Small Sample Sets by Combining Unsupervised Meta-Training with CNNs", "addresses": [{"address": "Robotics Institute", "lat": "13.65450525", "lng": "100.49423171", "type": "edu"}], "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/774a/e9c6b2a83c6891b5aeeb169cfd462d45f715.pdf"]}, {"id": "4f90acbfe479bad10b1c733dc7e6a174fd4b2870", "title": "Training and Evaluating Multimodal Word Embeddings with Large-scale Web Annotated Images", "addresses": [{"address": "University of California, Los Angeles", "lat": "34.06877880", "lng": "-118.44500940", "type": "edu"}], "year": "2016", "pdf": ["https://arxiv.org/pdf/1611.08321.pdf"]}, {"id": "e0ffda149d1d81fff507953377e08cf4f718d8ae", "title": "Image Captioning with both Object and Scene Information", "addresses": [{"address": "Chinese Academy of Sciences", "lat": "40.00447950", "lng": "116.37023800", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "2bf37dd0ebaac003e67d11cc869e5e93b13dbd00", "title": "Opportunities and Challenges of Industry-Academic Collaborations in Multimedia Research", "addresses": [{"address": "Columbia University", "lat": "40.84198360", "lng": "-73.94368971", "type": "edu"}, {"address": "Facebook", "lat": "37.39367170", "lng": "-122.08072620", "type": "company"}], "year": "2015", "pdf": []}, {"id": "6f71862aa00d61fc8fd7f205de35ee8af458ec0c", "title": "Semi-supervised Learning of Instance-level Recognition from Video", "addresses": [{"address": "Imperial College London", "lat": "51.49887085", "lng": "-0.17560797", "type": "edu"}], "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/6f71/862aa00d61fc8fd7f205de35ee8af458ec0c.pdf"]}, {"id": "697f0e24f24b016cef9474db485fe61a667f07b8", "title": "VISER: Visual Self-Regularization", "addresses": [{"address": "University of Washington", "lat": "47.65432380", "lng": "-122.30800894", "type": "edu"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1802.02568.pdf"]}, {"id": "7fd358a0eba775fe9d87f165cb2074fa8fa762e0", "title": "Spatial-Aware Object Embeddings for Zero-Shot Localization and Classification of Actions", "addresses": [{"address": "University of Amsterdam", "lat": "52.35536550", "lng": "4.95016440", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.09145.pdf"]}, {"id": "2e57eef707fa9c89d2a7ac68f55c26dea850c5ea", "title": "Mixture of Convolutional Neural Networks for Image Classification", "addresses": [{"address": "Hebrew University of Jerusalem", "lat": "31.79185550", "lng": "35.24472300", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/2e57/eef707fa9c89d2a7ac68f55c26dea850c5ea.pdf"]}, {"id": "0c8c7d58b9bf2acb16ce6b73311db9b1bdff8d11", "title": "Windsurf: the best way to SURF", "addresses": [{"address": "University of Bologna", "lat": "44.49623180", "lng": "11.35415700", "type": "edu"}], "year": "2017", "pdf": []}, {"id": "ed2f4e5ecbc4b08ee0784e97760a7f9e5ea9efae", "title": "Deep-Structured Event Modeling for User-Generated Photos", "addresses": [{"address": "Chinese Academy of Sciences", "lat": "40.00447950", "lng": "116.37023800", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "799b9904f37a515a9614f6a92ff673c4c6e97a7f", "title": "Survey on Visual-Based Localization", "addresses": [{"address": "Princeton University", "lat": "40.34829285", "lng": "-74.66308325", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/799b/9904f37a515a9614f6a92ff673c4c6e97a7f.pdf"]}, {"id": "2ef0adfaf84def97e88ae77f887f4497ddc9ccbb", "title": "Structured Recommendation", "addresses": [{"address": "Australian National University", "lat": "-35.27769990", "lng": "149.11852700", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1706.09067.pdf"]}, {"id": "aef293fa9617a8924389b95f313540c1329cb2b1", "title": "LocXplore: a system for profiling urban regions", "addresses": [{"address": "Freie Universit\u00e4t Berlin", "lat": "52.45252640", "lng": "13.28967870", "type": "edu"}], "year": "2018", "pdf": []}, {"id": "35d181da0b939bdf3bdf579969e5fe69e277e03e", "title": "Learning Features by Watching Objects Move", "addresses": [{"address": "University of California, Berkeley", "lat": "37.86871260", "lng": "-122.25586815", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1612.06370.pdf"]}, {"id": "f54ce6b62e9889832d891f2aada677171d4219fe", "title": "Concept-Level Multimodal Ranking of Flickr Photo Tags via Recall Based Weighting", "addresses": [{"address": "National Institute of Informatics, Japan", "lat": "35.69248530", "lng": "139.75825330", "type": "edu"}, {"address": "National University of Singapore", "lat": "1.29620180", "lng": "103.77689944", "type": "edu"}, {"address": "University of Electro-Communications", "lat": "35.65729570", "lng": "139.54255868", "type": "edu"}], "year": "2016", "pdf": []}, {"id": "10e78afd7ea5f70e192b08be8fe1a978ad89394b", "title": "Video Stream Retrieval of Unseen Queries using Semantic Memory", "addresses": [{"address": "University of Amsterdam", "lat": "52.35536550", "lng": "4.95016440", "type": "edu"}], "year": "2016", "pdf": ["https://arxiv.org/pdf/1612.06753.pdf"]}, {"id": "0bb078880758c023007950f1d7eea99e666e4f06", "title": "Discovering Connotations as Labels for Weakly Supervised Image-Sentence Data", "addresses": [{"address": "Karlsruhe Institute of Technology", "lat": "49.10184375", "lng": "8.43312560", "type": "edu"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/0bb0/78880758c023007950f1d7eea99e666e4f06.pdf"]}, {"id": "8a3d969732951e4d22c608d465b091b9272f03bb", "title": "Multimedia COMMONS - Community-Organized Multimodal Mining: Opportunities for Novel Solutions (MMCommons Workshop 2015)", "addresses": [{"address": "City University of Hong Kong", "lat": "22.34000115", "lng": "114.16970291", "type": "edu"}], "year": "2015", "pdf": []}, {"id": "d3dae5c4f47a0457ebe2297d7e70432521c82cc6", "title": "The Benchmarking Initiative for Multimedia Evaluation: MediaEval 2016", "addresses": [{"address": "University of Geneva", "lat": "42.57054745", "lng": "-88.55578627", "type": "edu"}, {"address": "Dublin City University", "lat": "53.38522185", "lng": "-6.25740874", "type": "edu"}], "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/d3da/e5c4f47a0457ebe2297d7e70432521c82cc6.pdf"]}, {"id": "20a1350815c4588a2380414bc78a7e215a2e3955", "title": "Cross Pixel Optical Flow Similarity for Self-Supervised Learning", "addresses": [{"address": "University of Oxford", "lat": "51.75345380", "lng": "-1.25400997", "type": "edu"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.05636.pdf"]}, {"id": "f2cbdd5f24c2d6a4f33734636cc220f0825042f0", "title": "Dual-Glance Model for Deciphering Social Relationships", "addresses": [{"address": "National University of Singapore", "lat": "1.29620180", "lng": "103.77689944", "type": "edu"}, {"address": "University of Minnesota", "lat": "44.97308605", "lng": "-93.23708813", "type": "edu"}], "year": "2017", "pdf": ["https://arxiv.org/pdf/1708.00634.pdf"]}]}