summaryrefslogtreecommitdiff
path: root/site/datasets/citations/coco_qa.json
diff options
context:
space:
mode:
Diffstat (limited to 'site/datasets/citations/coco_qa.json')
-rw-r--r--site/datasets/citations/coco_qa.json2
1 files changed, 1 insertions, 1 deletions
diff --git a/site/datasets/citations/coco_qa.json b/site/datasets/citations/coco_qa.json
index c210dea0..2b2c538f 100644
--- a/site/datasets/citations/coco_qa.json
+++ b/site/datasets/citations/coco_qa.json
@@ -1 +1 @@
-{"id": "35b0331dfcd2897abd5749b49ff5e2b8ba0f7a62", "paper": {"paperId": "35b0331dfcd2897abd5749b49ff5e2b8ba0f7a62", "key": "coco_qa", "title": "Exploring Models and Data for Image Question Answering", "journal": "", "address": "", "address_type": "", "lat": "", "lng": "", "pdf_link": "http://pdfs.semanticscholar.org/aa79/9c29c0d44ece1864467af520fe70540c069b.pdf", "report_link": "papers/35b0331dfcd2897abd5749b49ff5e2b8ba0f7a62.html", "citation_count": 191, "citations_geocoded": 83, "citations_unknown": 108, "citations_empty": 12, "citations_pdf": 163, "citations_doi": 27, "name": "COCO QA"}, "address": null, "citations": [["Automatic Understanding of Image and Video Advertisements", "", "University of Pittsburgh", "University of Pittsburgh", "University of Pittsburgh, Sutherland Drive, West Oakland, PGH, Allegheny County, Pennsylvania, 15240, USA", "40.44415295", "-79.96243993", "edu", "", 2017], ["Tell-and-Answer: Towards Explainable Visual Question Answering using Attributes and Captions", "", "University of Rochester", "University of Rochester", "Memorial Art Gallery, 500, University Avenue, East End, Rochester, Monroe County, New York, 14607, USA", "43.15769690", "-77.58829158", "edu", "", "2018"], ["Counting Everyday Objects in Everyday Scenes", "", "Georgia Institute of Technology", "Georgia Institute of Technology", "Georgia Tech, Atlantic Drive Northwest, Bellwood, Rockdale, Atlanta, Fulton County, Georgia, 30318, USA", "33.77603300", "-84.39884086", "edu", "", "2017"], ["A Focused Dynamic Attention Model for Visual Question Answering", "", "National University of Singapore", "National University of Singapore", "NUS, Former 1936 British Outpost, Nepal Hill, Clementi, Southwest, 117542, Singapore", "1.29620180", "103.77689944", "edu", "", 2016], ["Sherlock: Modeling Structured Knowledge in Images", "", "Rutgers University", "Rutgers University", "Rutgers Cook Campus - North, Biel Road, New Brunswick, Middlesex County, New Jersey, 08901, USA", "40.47913175", "-74.43168868", "edu", "", 2015], ["Aligned Image-Word Representations Improve Inductive Transfer Across Vision-Language Tasks", "", "University of Illinois, Urbana-Champaign", "University of Illinois, Urbana-Champaign", "B-3, South Mathews Avenue, Urbana, Champaign County, Illinois, 61801, USA", "40.11116745", "-88.22587665", "edu", "", "2017"], ["On Human Motion Prediction Using Recurrent Neural Networks", "", "University of British Columbia", "University of British Columbia", "University of British Columbia, Eagles Drive, Hawthorn Place, University Endowment Lands, Metro Vancouver, British Columbia, V6T, Canada", "49.25839375", "-123.24658161", "edu", "", "2017"], ["Question Answering under Instructor Guidance with Memory Networks", "", "Tsinghua University", "Tsinghua University", "\u6e05\u534e\u5927\u5b66, 30, \u53cc\u6e05\u8def, \u4e94\u9053\u53e3, \u540e\u516b\u5bb6, \u6d77\u6dc0\u533a, 100084, \u4e2d\u56fd", "40.00229045", "116.32098908", "edu", "", ""], ["TVQA: Localized, Compositional Video Question Answering", "", "University of North Carolina at Chapel Hill", "University of North Carolina at Chapel Hill", "University of North Carolina at Chapel Hill, East Cameron Avenue, Chapel Hill, Orange County, North Carolina, 27514, USA", "35.91139710", "-79.05045290", "edu", "", "2018"], ["ABC-CNN: An Attention Based Convolutional Neural Network for Visual Question Answering", "", "University of Southern California", "University of Southern California", "University of Southern California, Watt Way, Saint James Park, LA, Los Angeles County, California, 90089, USA", "34.02241490", "-118.28634407", "edu", "", 2015], ["Don't Just Assume; Look and Answer: Overcoming Priors for Visual Question Answering", "", "Georgia Institute of Technology", "Georgia Institute of Technology", "Georgia Tech, Atlantic Drive Northwest, Bellwood, Rockdale, Atlanta, Fulton County, Georgia, 30318, USA", "33.77603300", "-84.39884086", "edu", "", "2017"], ["Making the V in VQA Matter: Elevating the Role of Image Understanding in Visual Question Answering", "", "Georgia Institute of Technology", "Georgia Institute of Technology", "Georgia Tech, Atlantic Drive Northwest, Bellwood, Rockdale, Atlanta, Fulton County, Georgia, 30318, USA", "33.77603300", "-84.39884086", "edu", "", "2017"], ["Sketch Recognition with Deep Visual-Sequential Fusion Model", "", "Fudan University", "Fudan University", "\u590d\u65e6\u5927\u5b66, 220, \u90af\u90f8\u8def, \u4e94\u89d2\u573a\u8857\u9053, \u6768\u6d66\u533a, \u4e0a\u6d77\u5e02, 200433, \u4e2d\u56fd", "31.30104395", "121.50045497", "edu", "", 2017], ["Best of Both Worlds: Transferring Knowledge from Discriminative Learning to a Generative Visual Dialog Model", "", "Georgia Institute of Technology", "Georgia Institute of Technology", "Georgia Tech, Atlantic Drive Northwest, Bellwood, Rockdale, Atlanta, Fulton County, Georgia, 30318, USA", "33.77603300", "-84.39884086", "edu", "", "2017"], ["Neural Module Networks", "", "University of California, Berkeley", "University of California, Berkeley", "Berkeley Art Museum and Pacific Film Archive, Bancroft Way, Southside, Berkeley, Alameda County, California, 94720-1076, USA", "37.86871260", "-122.25586815", "edu", "", 2016], ["Deep Attention Neural Tensor Network for Visual Question Answering", "", "Microsoft Research Asia", "Microsoft Live Labs Research, China", "China, Beijing Shi, Haidian Qu, Zhichun Rd, 49\u53f7\u5e0c\u683c\u739b709\u53f7 \u90ae\u653f\u7f16\u7801: 100080", "39.97721700", "116.33763200", "company", "", "2018"], ["SCA-CNN: Spatial and Channel-Wise Attention in Convolutional Networks for Image Captioning", "", "National University of Singapore", "National University of Singapore", "NUS, Former 1936 British Outpost, Nepal Hill, Clementi, Southwest, 117542, Singapore", "1.29620180", "103.77689944", "edu", "", 2017], ["CoDraw: Visual Dialog for Collaborative Drawing", "", "Seoul National University", "Seoul National University", "\uc11c\uc6b8\ub300\ud559\uad50, \uc11c\ud638\ub3d9\ub85c, \uc11c\ub454\ub3d9, \uad8c\uc120\uad6c, \uc218\uc6d0\uc2dc, \uacbd\uae30, 16614, \ub300\ud55c\ubbfc\uad6d", "37.26728000", "126.98411510", "edu", "", 2017], ["Diverse and Coherent Paragraph Generation from Images", "", "University of Illinois, Urbana-Champaign", "University of Illinois, Urbana-Champaign", "B-3, South Mathews Avenue, Urbana, Champaign County, Illinois, 61801, USA", "40.11116745", "-88.22587665", "edu", "", "2018"], ["An Analysis of Visual Question Answering Algorithms", "", "Rochester Institute of Technology", "Rochester Institute of Technology", "Rochester Institute of Technology (RIT), 1, Lomb Memorial Drive, Bailey, Henrietta Town, Monroe County, New York, 14623, USA", "43.08250655", "-77.67121663", "edu", "", "2017"], ["Simple Baseline for Visual Question Answering", "", "MIT", "Massachusetts Institute", "MIT, Amherst Street, Cambridgeport, Cambridge, Middlesex County, Massachusetts, 02238, USA", "42.35839610", "-71.09567788", "edu", "", 2015], ["Examining Cooperation in Visual Dialog Models", "", "University of Amsterdam", "University of Amsterdam", "Institute for Logic, Language and Computation (ILLC), 107, Science Park, Oost-Watergraafsmeer, Amsterdam, Oost, Amsterdam, Noord-Holland, Nederland, 1098XG, Nederland", "52.35536550", "4.95016440", "edu", "", 2017], ["Task-driven Visual Saliency and Attention-based Visual Question Answering", "", "Zhejiang University", "Zhejiang University", "\u6d59\u6c5f\u5927\u5b66\u4e4b\u6c5f\u6821\u533a, \u4e4b\u6c5f\u8def, \u8f6c\u5858\u8857\u9053, \u897f\u6e56\u533a (Xihu), \u676d\u5dde\u5e02 Hangzhou, \u6d59\u6c5f\u7701, 310008, \u4e2d\u56fd", "30.19331415", "120.11930822", "edu", "", 2017], ["TGIF-QA: Toward Spatio-Temporal Reasoning in Visual Question Answering", "", "Seoul National University", "Seoul National University", "\uc11c\uc6b8\ub300\ud559\uad50, \uc11c\ud638\ub3d9\ub85c, \uc11c\ub454\ub3d9, \uad8c\uc120\uad6c, \uc218\uc6d0\uc2dc, \uacbd\uae30, 16614, \ub300\ud55c\ubbfc\uad6d", "37.26728000", "126.98411510", "edu", "", "2017"], ["Explicit Reasoning over End-to-End Neural Architectures for Visual Question Answering", "", "Arizona State University", "Arizona State University", "Arizona State University Polytechnic campus, East Texas Avenue, Mesa, Maricopa County, Arizona, 85212, USA", "33.30715065", "-111.67653157", "edu", "", "2018"], ["MovieQA: Understanding Stories in Movies through Question-Answering", "", "Karlsruhe Institute of Technology", "Karlsruhe Institute of Technology", "KIT, Leopoldshafener Allee, Linkenheim, Linkenheim-Hochstetten, Landkreis Karlsruhe, Regierungsbezirk Karlsruhe, Baden-W\u00fcrttemberg, 76351, Deutschland", "49.10184375", "8.43312560", "edu", "", 2016], ["Exploration on Grounded Word Embedding: Matching Words and Images with Image-Enhanced Skip-Gram Model", "", "Peking University", "Peking University", "\u5317\u4eac\u5927\u5b66, 5\u53f7, \u9890\u548c\u56ed\u8def, \u7a3b\u9999\u56ed\u5357\u793e\u533a, \u6d77\u6dc0\u533a, \u5317\u4eac\u5e02, 100871, \u4e2d\u56fd", "39.99223790", "116.30393816", "edu", "", "2018"], ["TallyQA: Answering Complex Counting Questions", "", "Rochester Institute of Technology", "Rochester Institute of Technology", "Rochester Institute of Technology (RIT), 1, Lomb Memorial Drive, Bailey, Henrietta Town, Monroe County, New York, 14623, USA", "43.08250655", "-77.67121663", "edu", "", "2018"], ["FOIL it! Find One mismatch between Image and Language caption", "", "University of Trento", "University of Trento", "University of Trento, Via Giuseppe Verdi, Piedicastello, Trento, Territorio Val d'Adige, TN, TAA, 38122, Italia", "46.06588360", "11.11598940", "edu", "", "2017"], ["Learning to Disambiguate by Asking Discriminative Questions", "", "Robotics Institute", "Robotics Institute", "Institute for Field Robotics, \u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e2d\u0e38\u0e17\u0e34\u0e28, \u0e01\u0e23\u0e38\u0e07\u0e40\u0e17\u0e1e\u0e21\u0e2b\u0e32\u0e19\u0e04\u0e23, \u0e40\u0e02\u0e15\u0e23\u0e32\u0e29\u0e0e\u0e23\u0e4c\u0e1a\u0e39\u0e23\u0e13\u0e30, \u0e01\u0e23\u0e38\u0e07\u0e40\u0e17\u0e1e\u0e21\u0e2b\u0e32\u0e19\u0e04\u0e23, 10140, \u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22", "13.65450525", "100.49423171", "edu", "", "2017"], ["Proposal Incorporating Structural Bias into Neural Networks", "", "Carnegie Mellon University", "Carnegie Mellon University", "Carnegie Mellon University Silicon Valley, South Akron Road, ARC, Santa Clara County, California, 94035-0016, USA", "37.41021930", "-122.05965487", "edu", "", "2017"], ["Learning Social Image Embedding with Deep Multimodal Attention Networks", "", "Beihang University", "Beihang University", "\u5317\u4eac\u822a\u7a7a\u822a\u5929\u5927\u5b66, 37, \u5b66\u9662\u8def, \u4e94\u9053\u53e3, \u540e\u516b\u5bb6, \u6d77\u6dc0\u533a, 100083, \u4e2d\u56fd", "39.98083330", "116.34101249", "edu", "", "2017"], ["Video Fill In the Blank Using LR/RL LSTMs with Spatial-Temporal Attentions", "", "University of Central Florida", "University of Central Florida", "University of Central Florida, Libra Drive, University Park, Orange County, Florida, 32816, USA", "28.59899755", "-81.19712501", "edu", "", "2017"], ["R-VQA: Learning Visual Relation Facts with Semantic Attention for Visual Question Answering", "", "Tsinghua University", "Tsinghua University", "\u6e05\u534e\u5927\u5b66, 30, \u53cc\u6e05\u8def, \u4e94\u9053\u53e3, \u540e\u516b\u5bb6, \u6d77\u6dc0\u533a, 100084, \u4e2d\u56fd", "40.00229045", "116.32098908", "edu", "", "2018"], ["Answer-Type Prediction for Visual Question Answering", "", "Rochester Institute of Technology", "Rochester Institute of Technology", "Rochester Institute of Technology (RIT), 1, Lomb Memorial Drive, Bailey, Henrietta Town, Monroe County, New York, 14623, USA", "43.08250655", "-77.67121663", "edu", "", 2016], ["Presentation Attack Detection for Cadaver Iris", "", "University of Notre Dame", "University of Notre Dame", "University of Notre Dame du Lac, Holy Cross Drive, Notre Dame, Maple Lane, Saint Joseph County, Indiana, 46556, USA", "41.70456775", "-86.23822026", "edu", "", "2018"], ["Knowledge Acquisition for Visual Question Answering via Iterative Querying", "", "University of Southern California", "University of Southern California", "University of Southern California, Watt Way, Saint James Park, LA, Los Angeles County, California, 90089, USA", "34.02241490", "-118.28634407", "edu", "", 2017], ["VizWiz Grand Challenge: Answering Visual Questions from Blind People", "", "University of Texas at Austin", "University of Texas at Austin", "University of Texas at Austin, 1, East 23rd Street, The Drag, Austin, Travis County, Texas, 78712, USA", "30.28415100", "-97.73195598", "edu", "", "2018"], ["A Reinforcement Learning Framework for Natural Question Generation using Bi-discriminators", "", "Fudan University", "Fudan University", "\u590d\u65e6\u5927\u5b66, 220, \u90af\u90f8\u8def, \u4e94\u89d2\u573a\u8857\u9053, \u6768\u6d66\u533a, \u4e0a\u6d77\u5e02, 200433, \u4e2d\u56fd", "31.30104395", "121.50045497", "edu", "", "2018"], ["Leveraging Video Descriptions to Learn Video Question Answering", "", "Stanford University", "Stanford University", "Stanford University, Memorial Way, Stanford, Santa Clara County, California, 94305-6015, USA", "37.43131385", "-122.16936535", "edu", "", 2017], ["Co-Attending Free-Form Regions and Detections With Multi-Modal Multiplicative Feature Embedding for Visual Question Answering", "", "East China Normal University", "East China Normal University", "\u534e\u4e1c\u5e08\u8303\u5927\u5b66, 3663, \u4e2d\u5c71\u5317\u8def, \u66f9\u5bb6\u6e21, \u666e\u9640\u533a, \u666e\u9640\u533a (Putuo), \u4e0a\u6d77\u5e02, 200062, \u4e2d\u56fd", "31.22849230", "121.40211389", "edu", "", "2018"], ["Visual Question Answering with Question Representation Update (QRU)", "", "Chinese University of Hong Kong", "The Chinese University of Hong Kong", "\u4e2d\u5927 CUHK, NA\u68af New Asia Stairs, \u99ac\u6599\u6c34 Ma Liu Shui, \u4e5d\u809a\u6751 Kau To Village, \u6c99\u7530\u5340 Sha Tin District, \u65b0\u754c New Territories, HK, DD193 1191, \u4e2d\u56fd", "22.42031295", "114.20788644", "edu", "", 2016], ["Stacked Attention Networks for Image Question Answering", "", "Carnegie Mellon University", "Carnegie Mellon University", "Carnegie Mellon University Silicon Valley, South Akron Road, ARC, Santa Clara County, California, 94035-0016, USA", "37.41021930", "-122.05965487", "edu", "", 2016], ["Zero-Shot Transfer VQA Dataset", "", "Baidu Research, USA", "Baidu Research, USA", "1195 Bordeaux Dr, Sunnyvale, CA 94089, USA", "37.40922650", "-122.02366150", "company", "", "2018"], ["VQS: Linking Segmentations to Questions and Answers for Supervised Attention in VQA and Question-Focused Semantic Segmentation", "", "University of Central Florida", "University of Central Florida", "University of Central Florida, Libra Drive, University Park, Orange County, Florida, 32816, USA", "28.59899755", "-81.19712501", "edu", "", "2017"], ["Joint Image Captioning and Question Answering", "", "University of Texas at Austin", "University of Texas at Austin", "University of Texas at Austin, 1, East 23rd Street, The Drag, Austin, Travis County, Texas, 78712, USA", "30.28415100", "-97.73195598", "edu", "", "2018"], ["Video Fill in the Blank with Merging LSTMs", "", "University of Central Florida", "University of Central Florida", "University of Central Florida, Libra Drive, University Park, Orange County, Florida, 32816, USA", "28.59899755", "-81.19712501", "edu", "", 2016], ["Where to Look: Focus Regions for Visual Question Answering", "", "University of Illinois, Urbana-Champaign", "University of Illinois, Urbana-Champaign", "B-3, South Mathews Avenue, Urbana, Champaign County, Illinois, 61801, USA", "40.11116745", "-88.22587665", "edu", "", 2016], ["Visual7W: Grounded Question Answering in Images", "", "Stanford University", "Stanford University", "Stanford University, Memorial Way, Stanford, Santa Clara County, California, 94305-6015, USA", "37.43131385", "-122.16936535", "edu", "", 2016], ["DVQA: Understanding Data Visualizations via Question Answering", "", "Rochester Institute of Technology", "Rochester Institute of Technology", "Rochester Institute of Technology (RIT), 1, Lomb Memorial Drive, Bailey, Henrietta Town, Monroe County, New York, 14623, USA", "43.08250655", "-77.67121663", "edu", "", "2018"], ["Data Augmentation for Visual Question Answering", "", "Rochester Institute of Technology", "Rochester Institute of Technology", "Rochester Institute of Technology (RIT), 1, Lomb Memorial Drive, Bailey, Henrietta Town, Monroe County, New York, 14623, USA", "43.08250655", "-77.67121663", "edu", "", "2017"], ["Subhashini VenugopalanProposal", "", "University of Texas at Austin", "University of Texas at Austin", "University of Texas at Austin, 1, East 23rd Street, The Drag, Austin, Travis County, Texas, 78712, USA", "30.28415100", "-97.73195598", "edu", "", 2015], ["Generative Adversarial Text to Image Synthesis", "", "Max Planck Institute for Informatics", "Max Planck Institute for Informatics", "MPII, E1 4, Campus, Universit\u00e4t, Sankt Johann, Bezirk Mitte, Saarbr\u00fccken, Regionalverband Saarbr\u00fccken, Saarland, 66123, Deutschland", "49.25795660", "7.04577417", "edu", "", 2016], ["Creativity: Generating Diverse Questions Using Variational Autoencoders", "", "Northwestern University", "Northwestern University", "Northwestern University, Northwestern Place, Downtown, Evanston, Cook County, Illinois, 60208, USA", "42.05511640", "-87.67581113", "edu", "", "2017"], ["Pay Attention to Those Sets! Learning Quantification from Images", "", "University of Barcelona", "University of Barcelona", "Universitat de Barcelona, Carrer de la Diputaci\u00f3, l'Antiga Esquerra de l'Eixample, Eixample, Barcelona, BCN, CAT, 08013, Espa\u00f1a", "41.38689130", "2.16352385", "edu", "", 2017], ["Textually Enriched Neural Module Networks for Visual Question Answering", "", "Carnegie Mellon University", "Carnegie Mellon University", "Carnegie Mellon University Silicon Valley, South Akron Road, ARC, Santa Clara County, California, 94035-0016, USA", "37.41021930", "-122.05965487", "edu", "", "2018"], ["Being Negative but Constructively: Lessons Learnt from Creating Better Visual Question Answering Datasets", "", "University of Southern California", "University of Southern California", "University of Southern California, Watt Way, Saint James Park, LA, Los Angeles County, California, 90089, USA", "34.02241490", "-118.28634407", "edu", "", 2017], ["Visual Dialog", "", "Georgia Institute of Technology", "Georgia Institute of Technology", "Georgia Tech, Atlantic Drive Northwest, Bellwood, Rockdale, Atlanta, Fulton County, Georgia, 30318, USA", "33.77603300", "-84.39884086", "edu", "", "2017"], ["Multimodal Differential Network for Visual Question Generation", "", "Indian Institute of Technology Delhi", "IIIT-Delhi, India", "IIIT-Delhi, Mathura Road, Friends Colony, South East Delhi, Delhi, 110020, India", "28.54632595", "77.27325504", "edu", "", "2018"], ["Motion-Appearance Co-Memory Networks for Video Question Answering", "", "University of Southern California", "University of Southern California", "University of Southern California, Watt Way, Saint James Park, LA, Los Angeles County, California, 90089, USA", "34.02241490", "-118.28634407", "edu", "", "2018"], ["Dual Attention Network for Visual Question Answering", "", "Boston University", "Boston University", "BU, Bay State Road, Fenway, Boston, Suffolk County, Massachusetts, 02215, USA", "42.35042530", "-71.10056114", "edu", "", 2017], ["Solving Visual Madlibs with Multiple Cues", "", "University of Illinois, Urbana-Champaign", "University of Illinois, Urbana-Champaign", "B-3, South Mathews Avenue, Urbana, Champaign County, Illinois, 61801, USA", "40.11116745", "-88.22587665", "edu", "", "2016"], ["Automatic Generation of Grounded Visual Questions", "", "Tianjin University", "Tianjin University", "\u6cf0\u5c71\u822a\u7a7a\u6e2f/\u5929\u6d25\u5927\u53a6, \u67a3\u884c\u8def, \u67a3\u884c \u9ad8\u738b\u5bfa, \u957f\u57ce\u8def, \u5927\u6cb3, \u5cb1\u5cb3\u533a (Daiyue), \u6cf0\u5b89\u5e02, \u5c71\u4e1c\u7701, 271000, \u4e2d\u56fd", "36.20304395", "117.05842113", "edu", "", "2017"], ["Visual Storytelling", "", "Carnegie Mellon University", "Carnegie Mellon University", "Carnegie Mellon University Silicon Valley, South Akron Road, ARC, Santa Clara County, California, 94035-0016, USA", "37.41021930", "-122.05965487", "edu", "", "2016"], ["Learning Cooperative Visual Dialog Agents with Deep Reinforcement Learning", "", "Georgia Institute of Technology", "Georgia Institute of Technology", "Georgia Tech, Atlantic Drive Northwest, Bellwood, Rockdale, Atlanta, Fulton County, Georgia, 30318, USA", "33.77603300", "-84.39884086", "edu", "", 2017], ["PinterNet: A thematic label curation tool for large image datasets", "", "Northwestern University", "Northwestern University", "Northwestern University, Northwestern Place, Downtown, Evanston, Cook County, Illinois, 60208, USA", "42.05511640", "-87.67581113", "edu", "", 2016], ["Visual Question Answering Dataset for Bilingual Image Understanding: A Study of Cross-Lingual Transfer Using Attention Maps", "", "Tokyo Institute of Technology", "Tokyo Institute of Technology", "\u6771\u4eac\u5de5\u696d\u5927\u5b66, \u539a\u6728\u8857\u9053, \u7dd1\u533a, \u753a\u7530\u5e02, \u795e\u5948\u5ddd\u770c, \u95a2\u6771\u5730\u65b9, 226-0026, \u65e5\u672c", "35.51675380", "139.48342251", "edu", "", "2018"], ["Greedy Inference Algorithms for Structured and Neural Models", "", "Virginia Polytechnic Institute and State University", "Virginia Polytechnic Institute and State University", "Virginia Polytechnic Institute and State University, Duck Pond Drive, Blacksburg, Montgomery County, Virginia, 24061-9517, USA", "37.21872455", "-80.42542519", "edu", "", 2018], ["JUST at VQA-Med: A VGG-Seq2Seq Model", "", "Jordan University of Science and Technology", "Jordan University of Science and Technology", "Jordan University of Science and Technology, \u0634\u0627\u0631\u0639 \u0627\u0644\u0623\u0631\u062f\u0646, \u0625\u0631\u0628\u062f\u200e, \u0625\u0631\u0628\u062f, \u0627\u0644\u0623\u0631\u062f\u0646", "32.49566485", "35.99160717", "edu", "", "2018"], ["Two-Stage Synthesis Networks for Transfer Learning in Machine Comprehension", "", "Microsoft", "Microsoft Corporation, Redmond, WA, USA", "One Microsoft Way, Redmond, WA 98052, USA", "47.64233180", "-122.13693020", "company", "", "2017"], ["From VQA to Multimodal CQA: Adapting Visual QA Models for Community QA Tasks", "", "Waseda University", "Waseda University", "\u65e9\u7a32\u7530\u5927\u5b66 \u5317\u4e5d\u5dde\u30ad\u30e3\u30f3\u30d1\u30b9, 2-2, \u6709\u6bdb\u5f15\u91ce\u7dda, \u516b\u5e61\u897f\u533a, \u5317\u4e5d\u5dde\u5e02, \u798f\u5ca1\u770c, \u4e5d\u5dde\u5730\u65b9, 808-0135, \u65e5\u672c", "33.88987280", "130.70856205", "edu", "", "2018"], ["A Dataset and Exploration of Models for Understanding Video Data through Fill-in-the-Blank Question-Answering", "", "Polytechnique Montreal", "Polytechnique Montr\u00b4eal", "2900 Boulevard Edouard-Montpetit, Montr\u00e9al, QC H3T 1J4, Canada", "45.50438400", "-73.61288290", "edu", "Polytechnique Montreal, Montreal, Quebec, Canada", "2017"], ["CLEVR: A Diagnostic Dataset for Compositional Language and Elementary Visual Reasoning", "", "Stanford University", "Stanford University", "Stanford University, Memorial Way, Stanford, Santa Clara County, California, 94305-6015, USA", "37.43131385", "-122.16936535", "edu", "", 2017], ["Jointly Discovering Visual Objects and Spoken Words from Raw Sensory Input", "", "MIT", "Massachusetts Institute", "MIT, Amherst Street, Cambridgeport, Cambridge, Middlesex County, Massachusetts, 02238, USA", "42.35839610", "-71.09567788", "edu", "", "2018"], ["Anchors: High-Precision Model-Agnostic Explanations", "", "University of Washington", "University of Washington", "University of Washington, Rainier Vista, Montlake, University District, Seattle, King County, Washington, 98195, USA", "47.65432380", "-122.30800894", "edu", "", 2017], ["Adversarial Geometry-Aware Human Motion Prediction", "", "Carnegie Mellon University", "Carnegie Mellon University", "Carnegie Mellon University Silicon Valley, South Akron Road, ARC, Santa Clara County, California, 94035-0016, USA", "37.41021930", "-122.05965487", "edu", "", "2018"], ["End-to-End Instance Segmentation and Counting with Recurrent Attention", "", "University of Toronto", "University of Toronto", "University of Toronto, St. George Street, Bloor Street Culture Corridor, Old Toronto, Toronto, Ontario, M5S 1A5, Canada", "43.66333345", "-79.39769975", "edu", "", "2016"], ["Neural Self Talk: Image Understanding via Continuous Questioning and Answering", "", "University of Maryland", "University of Maryland", "The Grand Garage, 5, North Paca Street, Seton Hill, Baltimore, Maryland, 21201, USA", "39.28996850", "-76.62196103", "edu", "", 2015], ["Categorizing Concepts with Basic Level for Vision-to-Language", "", "Tongji University", "Tongji University", "\u540c\u6d4e\u5927\u5b66, 1239, \u56db\u5e73\u8def, \u6c5f\u6e7e, \u8679\u53e3\u533a, \u4e0a\u6d77\u5e02, 200092, \u4e2d\u56fd", "31.28473925", "121.49694909", "edu", "", ""], ["Visual Text Correction", "", "University of Central Florida", "University of Central Florida", "University of Central Florida, Libra Drive, University Park, Orange County, Florida, 32816, USA", "28.59899755", "-81.19712501", "edu", "", "2018"], ["Comparatives, Quantifiers, Proportions: A Multi-Task Model for the Learning of Quantities from Vision", "", "University of Trento", "University of Trento", "University of Trento, Via Giuseppe Verdi, Piedicastello, Trento, Territorio Val d'Adige, TN, TAA, 38122, Italia", "46.06588360", "11.11598940", "edu", "", "2018"], ["Computer Vision and Natural Language Processing: Recent Approaches in Multimedia and Robotics", "", "University of Maryland", "University of Maryland", "The Grand Garage, 5, North Paca Street, Seton Hill, Baltimore, Maryland, 21201, USA", "39.28996850", "-76.62196103", "edu", "", 2016], ["The Color of the Cat is Gray: 1 Million Full-Sentences Visual Question Answering (FSVQA)", "", "University of Tokyo", "University of Tokyo", "\u6771\u4eac\u5927\u5b66 \u67cf\u30ad\u30e3\u30f3\u30d1\u30b9, \u5b66\u878d\u5408\u306e\u9053, \u67cf\u5e02, \u5343\u8449\u770c, \u95a2\u6771\u5730\u65b9, 277-8583, \u65e5\u672c", "35.90204480", "139.93622009", "edu", "", 2016]]} \ No newline at end of file
+{"id": "35b0331dfcd2897abd5749b49ff5e2b8ba0f7a62", "paper": {"paperId": "35b0331dfcd2897abd5749b49ff5e2b8ba0f7a62", "key": "coco_qa", "title": "Exploring Models and Data for Image Question Answering", "journal": "Unknown", "address": "", "address_type": "", "lat": "", "lng": "", "pdf_link": "https://arxiv.org/pdf/1505.02074.pdf", "report_link": "papers/35b0331dfcd2897abd5749b49ff5e2b8ba0f7a62.html", "citation_count": 191, "citations_geocoded": 106, "citations_unknown": 85, "citations_empty": 12, "citations_pdf": 163, "citations_doi": 27, "name": "COCO QA"}, "address": null, "citations": [["Automatic Understanding of Image and Video Advertisements", "", "University of Pittsburgh", "University of Pittsburgh", "University of Pittsburgh, Sutherland Drive, West Oakland, PGH, Allegheny County, Pennsylvania, 15240, USA", "40.44415295", "-79.96243993", "edu", "", "2017"], ["Tell-and-Answer: Towards Explainable Visual Question Answering using Attributes and Captions", "", "University of Rochester", "University of Rochester", "Memorial Art Gallery, 500, University Avenue, East End, Rochester, Monroe County, New York, 14607, USA", "43.15769690", "-77.58829158", "edu", "", "2018"], ["Counting Everyday Objects in Everyday Scenes", "", "Georgia Institute of Technology", "Georgia Institute of Technology", "Georgia Tech, Atlantic Drive Northwest, Bellwood, Rockdale, Atlanta, Fulton County, Georgia, 30318, USA", "33.77603300", "-84.39884086", "edu", "", "2017"], ["A Focused Dynamic Attention Model for Visual Question Answering", "", "National University of Singapore", "National University of Singapore", "NUS, Former 1936 British Outpost, Nepal Hill, Clementi, Southwest, 117542, Singapore", "1.29620180", "103.77689944", "edu", "", "2016"], ["Sherlock: Scalable Fact Learning in Images", "", "Rutgers University", "Rutgers University", "Rutgers Cook Campus - North, Biel Road, New Brunswick, Middlesex County, New Jersey, 08901, USA", "40.47913175", "-74.43168868", "edu", "", "2017"], ["Aligned Image-Word Representations Improve Inductive Transfer Across Vision-Language Tasks", "", "University of Illinois, Urbana-Champaign", "University of Illinois, Urbana-Champaign", "B-3, South Mathews Avenue, Urbana, Champaign County, Illinois, 61801, USA", "40.11116745", "-88.22587665", "edu", "", "2017"], ["On Human Motion Prediction Using Recurrent Neural Networks", "", "University of British Columbia", "University of British Columbia", "University of British Columbia, Eagles Drive, Hawthorn Place, University Endowment Lands, Metro Vancouver, British Columbia, V6T, Canada", "49.25839375", "-123.24658161", "edu", "", "2017"], ["Question Answering under Instructor Guidance with Memory Networks", "", "Tsinghua University", "Tsinghua University", "\u6e05\u534e\u5927\u5b66, 30, \u53cc\u6e05\u8def, \u4e94\u9053\u53e3, \u540e\u516b\u5bb6, \u6d77\u6dc0\u533a, 100084, \u4e2d\u56fd", "40.00229045", "116.32098908", "edu", "", ""], ["TVQA: Localized, Compositional Video Question Answering", "", "University of North Carolina at Chapel Hill", "University of North Carolina at Chapel Hill", "University of North Carolina at Chapel Hill, East Cameron Avenue, Chapel Hill, Orange County, North Carolina, 27514, USA", "35.91139710", "-79.05045290", "edu", "", "2018"], ["ABC-CNN: An Attention Based Convolutional Neural Network for Visual Question Answering", "", "University of Southern California", "University of Southern California", "University of Southern California, Watt Way, Saint James Park, LA, Los Angeles County, California, 90089, USA", "34.02241490", "-118.28634407", "edu", "", "2015"], ["Don't Just Assume; Look and Answer: Overcoming Priors for Visual Question Answering", "", "Georgia Institute of Technology", "Georgia Institute of Technology", "Georgia Tech, Atlantic Drive Northwest, Bellwood, Rockdale, Atlanta, Fulton County, Georgia, 30318, USA", "33.77603300", "-84.39884086", "edu", "", "2017"], ["Making the V in VQA Matter: Elevating the Role of Image Understanding in Visual Question Answering", "", "Georgia Institute of Technology", "Georgia Institute of Technology", "Georgia Tech, Atlantic Drive Northwest, Bellwood, Rockdale, Atlanta, Fulton County, Georgia, 30318, USA", "33.77603300", "-84.39884086", "edu", "", "2017"], ["Sketch Recognition with Deep Visual-Sequential Fusion Model", "", "Fudan University", "Fudan University", "\u590d\u65e6\u5927\u5b66, 220, \u90af\u90f8\u8def, \u4e94\u89d2\u573a\u8857\u9053, \u6768\u6d66\u533a, \u4e0a\u6d77\u5e02, 200433, \u4e2d\u56fd", "31.30104395", "121.50045497", "edu", "", "2017"], ["Best of Both Worlds: Transferring Knowledge from Discriminative Learning to a Generative Visual Dialog Model", "", "Georgia Institute of Technology", "Georgia Institute of Technology", "Georgia Tech, Atlantic Drive Northwest, Bellwood, Rockdale, Atlanta, Fulton County, Georgia, 30318, USA", "33.77603300", "-84.39884086", "edu", "", "2017"], ["Visual Question Answering using Explicit Visual Attention", "Aristotle University of Thessaloniki, Dept. of Informatics, Thessaloniki, 54124,Greece", "Aristotle University of Thessaloniki", "Aristotle University of Thessaloniki", "\u0391\u03c1\u03b9\u03c3\u03c4\u03bf\u03c4\u03ad\u03bb\u03b5\u03b9\u03bf \u03a0\u03b1\u03bd\u03b5\u03c0\u03b9\u03c3\u03c4\u03ae\u03bc\u03b9\u03bf \u0398\u03b5\u03c3\u03c3\u03b1\u03bb\u03bf\u03bd\u03af\u03ba\u03b7\u03c2, \u0395\u03b3\u03bd\u03b1\u03c4\u03af\u03b1, \u03a3\u03b1\u03c1\u03ac\u03bd\u03c4\u03b1 \u0395\u03ba\u03ba\u03bb\u03b7\u03c3\u03af\u03b5\u03c2, \u0395\u03c5\u03b1\u03b3\u03b3\u03b5\u03bb\u03af\u03c3\u03c4\u03c1\u03b9\u03b1, \u0398\u03b5\u03c3\u03c3\u03b1\u03bb\u03bf\u03bd\u03af\u03ba\u03b7, \u0394\u03ae\u03bc\u03bf\u03c2 \u0398\u03b5\u03c3\u03c3\u03b1\u03bb\u03bf\u03bd\u03af\u03ba\u03b7\u03c2, \u03a0\u03b5\u03c1\u03b9\u03c6\u03b5\u03c1\u03b5\u03b9\u03b1\u03ba\u03ae \u0395\u03bd\u03cc\u03c4\u03b7\u03c4\u03b1 \u0398\u03b5\u03c3\u03c3\u03b1\u03bb\u03bf\u03bd\u03af\u03ba\u03b7\u03c2, \u03a0\u03b5\u03c1\u03b9\u03c6\u03ad\u03c1\u03b5\u03b9\u03b1 \u039a\u03b5\u03bd\u03c4\u03c1\u03b9\u03ba\u03ae\u03c2 \u039c\u03b1\u03ba\u03b5\u03b4\u03bf\u03bd\u03af\u03b1\u03c2, \u039c\u03b1\u03ba\u03b5\u03b4\u03bf\u03bd\u03af\u03b1 - \u0398\u03c1\u03ac\u03ba\u03b7, 54124, \u0395\u03bb\u03bb\u03ac\u03b4\u03b1", "40.62984145", "22.95889350", "edu", "", "2018"], ["Combining Graph-based Dependency Features with Convolutional Neural Network for Answer Triggering", "", "India", "India", "India", "20.59368400", "78.96288000", "edu", "", "2018"], ["Simple and effective visual question answering in a single modality", "Zhejiang University, College of Computer Science, Hangzhou, P. R. China", "Zhejiang University", "Zhejiang University", "\u6d59\u6c5f\u5927\u5b66\u4e4b\u6c5f\u6821\u533a, \u4e4b\u6c5f\u8def, \u8f6c\u5858\u8857\u9053, \u897f\u6e56\u533a (Xihu), \u676d\u5dde\u5e02 Hangzhou, \u6d59\u6c5f\u7701, 310008, \u4e2d\u56fd", "30.19331415", "120.11930822", "edu", "", "2016"], ["Neural Module Networks", "", "University of California, Berkeley", "University of California, Berkeley", "Berkeley Art Museum and Pacific Film Archive, Bancroft Way, Southside, Berkeley, Alameda County, California, 94720-1076, USA", "37.86871260", "-122.25586815", "edu", "", "2016"], ["Object-Difference Attention: A Simple Relational Attention for Visual Question Answering", "Beijing University of Posts and Telecommunications, Beijing, China", "Beijing University of Posts and Telecommunications", "Beijing University of Posts and Telecommunications", "\u5317\u4eac\u90ae\u7535\u5927\u5b66, \u897f\u571f\u57ce\u8def, \u6d77\u6dc0\u533a, \u5317\u4eac\u5e02, 100082, \u4e2d\u56fd", "39.96014880", "116.35193921", "edu", "", "2018"], ["Deep Attention Neural Tensor Network for Visual Question Answering", "", "Microsoft Research Asia", "Microsoft Live Labs Research, China", "China, Beijing Shi, Haidian Qu, Zhichun Rd, 49\u53f7\u5e0c\u683c\u739b709\u53f7 \u90ae\u653f\u7f16\u7801: 100080", "39.97721700", "116.33763200", "company", "", "2018"], ["Guest Editorial: Image and Language Understanding", "Google, Seattle, USA", "Boston University", "Boston University", "BU, Bay State Road, Fenway, Boston, Suffolk County, Massachusetts, 02215, USA", "42.35042530", "-71.10056114", "edu", "", "2017"], ["SCA-CNN: Spatial and Channel-Wise Attention in Convolutional Networks for Image Captioning", "", "National University of Singapore", "National University of Singapore", "NUS, Former 1936 British Outpost, Nepal Hill, Clementi, Southwest, 117542, Singapore", "1.29620180", "103.77689944", "edu", "", "2017"], ["CoDraw: Visual Dialog for Collaborative Drawing", "", "Seoul National University", "Seoul National University", "\uc11c\uc6b8\ub300\ud559\uad50, \uc11c\ud638\ub3d9\ub85c, \uc11c\ub454\ub3d9, \uad8c\uc120\uad6c, \uc218\uc6d0\uc2dc, \uacbd\uae30, 16614, \ub300\ud55c\ubbfc\uad6d", "37.26728000", "126.98411510", "edu", "", "2017"], ["Diverse and Coherent Paragraph Generation from Images", "", "University of Illinois, Urbana-Champaign", "University of Illinois, Urbana-Champaign", "B-3, South Mathews Avenue, Urbana, Champaign County, Illinois, 61801, USA", "40.11116745", "-88.22587665", "edu", "", "2018"], ["Bayesian Compression for Natural Language Processing", "", "Russia", "Russia", "Russia", "61.52401000", "105.31875600", "edu", "", "2018"], ["An Analysis of Visual Question Answering Algorithms", "", "Rochester Institute of Technology", "Rochester Institute of Technology", "Rochester Institute of Technology (RIT), 1, Lomb Memorial Drive, Bailey, Henrietta Town, Monroe County, New York, 14623, USA", "43.08250655", "-77.67121663", "edu", "", "2017"], ["Simple Baseline for Visual Question Answering", "", "MIT", "Massachusetts Institute", "MIT, Amherst Street, Cambridgeport, Cambridge, Middlesex County, Massachusetts, 02238, USA", "42.35839610", "-71.09567788", "edu", "", "2015"], ["Examining Cooperation in Visual Dialog Models", "", "University of Amsterdam", "University of Amsterdam", "Institute for Logic, Language and Computation (ILLC), 107, Science Park, Oost-Watergraafsmeer, Amsterdam, Oost, Amsterdam, Noord-Holland, Nederland, 1098XG, Nederland", "52.35536550", "4.95016440", "edu", "", "2017"], ["Task-driven Visual Saliency and Attention-based Visual Question Answering", "", "Zhejiang University", "Zhejiang University", "\u6d59\u6c5f\u5927\u5b66\u4e4b\u6c5f\u6821\u533a, \u4e4b\u6c5f\u8def, \u8f6c\u5858\u8857\u9053, \u897f\u6e56\u533a (Xihu), \u676d\u5dde\u5e02 Hangzhou, \u6d59\u6c5f\u7701, 310008, \u4e2d\u56fd", "30.19331415", "120.11930822", "edu", "", "2017"], ["TGIF-QA: Toward Spatio-Temporal Reasoning in Visual Question Answering", "", "Seoul National University", "Seoul National University", "\uc11c\uc6b8\ub300\ud559\uad50, \uc11c\ud638\ub3d9\ub85c, \uc11c\ub454\ub3d9, \uad8c\uc120\uad6c, \uc218\uc6d0\uc2dc, \uacbd\uae30, 16614, \ub300\ud55c\ubbfc\uad6d", "37.26728000", "126.98411510", "edu", "", "2017"], ["Explicit Reasoning over End-to-End Neural Architectures for Visual Question Answering", "", "Arizona State University", "Arizona State University", "Arizona State University Polytechnic campus, East Texas Avenue, Mesa, Maricopa County, Arizona, 85212, USA", "33.30715065", "-111.67653157", "edu", "", "2018"], ["MovieQA: Understanding Stories in Movies through Question-Answering", "", "Karlsruhe Institute of Technology", "Karlsruhe Institute of Technology", "KIT, Leopoldshafener Allee, Linkenheim, Linkenheim-Hochstetten, Landkreis Karlsruhe, Regierungsbezirk Karlsruhe, Baden-W\u00fcrttemberg, 76351, Deutschland", "49.10184375", "8.43312560", "edu", "", "2016"], ["I Lead, You Help but Only with Enough Details: Understanding User Experience of Co-Creation with Artificial Intelligence", "Seoul National University, Suwon-si, Gyeonggi-do, Rebublic of Korea", "Seoul National University", "Seoul National University", "\uc11c\uc6b8\ub300\ud559\uad50, \uc11c\ud638\ub3d9\ub85c, \uc11c\ub454\ub3d9, \uad8c\uc120\uad6c, \uc218\uc6d0\uc2dc, \uacbd\uae30, 16614, \ub300\ud55c\ubbfc\uad6d", "37.26728000", "126.98411510", "edu", "", "2018"], ["Exploration on Grounded Word Embedding: Matching Words and Images with Image-Enhanced Skip-Gram Model", "", "Peking University", "Peking University", "\u5317\u4eac\u5927\u5b66, 5\u53f7, \u9890\u548c\u56ed\u8def, \u7a3b\u9999\u56ed\u5357\u793e\u533a, \u6d77\u6dc0\u533a, \u5317\u4eac\u5e02, 100871, \u4e2d\u56fd", "39.99223790", "116.30393816", "edu", "", "2018"], ["TallyQA: Answering Complex Counting Questions", "", "Rochester Institute of Technology", "Rochester Institute of Technology", "Rochester Institute of Technology (RIT), 1, Lomb Memorial Drive, Bailey, Henrietta Town, Monroe County, New York, 14623, USA", "43.08250655", "-77.67121663", "edu", "", "2018"], ["Uncovering the Temporal Context for Video Question Answering", "SCS, Carnegie Mellon University, Pittsburgh, USA", "Carnegie Mellon University", "Carnegie Mellon University", "Carnegie Mellon University Silicon Valley, South Akron Road, ARC, Santa Clara County, California, 94035-0016, USA", "37.41021930", "-122.05965487", "edu", "", "2017"], ["FOIL it! Find One mismatch between Image and Language caption", "", "University of Trento", "University of Trento", "University of Trento, Via Giuseppe Verdi, Piedicastello, Trento, Territorio Val d'Adige, TN, TAA, 38122, Italia", "46.06588360", "11.11598940", "edu", "", "2017"], ["Learning to Disambiguate by Asking Discriminative Questions", "", "Robotics Institute", "Robotics Institute", "Institute for Field Robotics, \u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e2d\u0e38\u0e17\u0e34\u0e28, \u0e01\u0e23\u0e38\u0e07\u0e40\u0e17\u0e1e\u0e21\u0e2b\u0e32\u0e19\u0e04\u0e23, \u0e40\u0e02\u0e15\u0e23\u0e32\u0e29\u0e0e\u0e23\u0e4c\u0e1a\u0e39\u0e23\u0e13\u0e30, \u0e01\u0e23\u0e38\u0e07\u0e40\u0e17\u0e1e\u0e21\u0e2b\u0e32\u0e19\u0e04\u0e23, 10140, \u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22", "13.65450525", "100.49423171", "edu", "", "2017"], ["Proposal Incorporating Structural Bias into Neural Networks", "", "Carnegie Mellon University", "Carnegie Mellon University", "Carnegie Mellon University Silicon Valley, South Akron Road, ARC, Santa Clara County, California, 94035-0016, USA", "37.41021930", "-122.05965487", "edu", "", "2017"], ["Learning Social Image Embedding with Deep Multimodal Attention Networks", "", "China", "China", "China", "35.86166000", "104.19539700", "edu", "", "2017"], ["Video Fill In the Blank Using LR/RL LSTMs with Spatial-Temporal Attentions", "", "University of Central Florida", "University of Central Florida", "University of Central Florida, Libra Drive, University Park, Orange County, Florida, 32816, USA", "28.59899755", "-81.19712501", "edu", "", "2017"], ["More Than An Answer: Neural Pivot Network for Visual Qestion Answering", "Xiamen University, Xiamen, China", "Xiamen University", "Xiamen University", "\u53a6\u95e8\u5927\u5b66, \u601d\u660e\u5357\u8def Siming South Road, \u601d\u660e\u533a, \u601d\u660e\u533a (Siming), \u53a6\u95e8\u5e02 / Xiamen, \u798f\u5efa\u7701, 361005, \u4e2d\u56fd", "24.43994190", "118.09301781", "edu", "", "2017"], ["R-VQA: Learning Visual Relation Facts with Semantic Attention for Visual Question Answering", "", "Tsinghua University", "Tsinghua University", "\u6e05\u534e\u5927\u5b66, 30, \u53cc\u6e05\u8def, \u4e94\u9053\u53e3, \u540e\u516b\u5bb6, \u6d77\u6dc0\u533a, 100084, \u4e2d\u56fd", "40.00229045", "116.32098908", "edu", "", "2018"], ["Answer-Type Prediction for Visual Question Answering", "", "Rochester Institute of Technology", "Rochester Institute of Technology", "Rochester Institute of Technology (RIT), 1, Lomb Memorial Drive, Bailey, Henrietta Town, Monroe County, New York, 14623, USA", "43.08250655", "-77.67121663", "edu", "", "2016"], ["Presentation Attack Detection for Cadaver Iris", "", "University of Notre Dame", "University of Notre Dame", "University of Notre Dame du Lac, Holy Cross Drive, Notre Dame, Maple Lane, Saint Joseph County, Indiana, 46556, USA", "41.70456775", "-86.23822026", "edu", "", "2018"], ["Knowledge Acquisition for Visual Question Answering via Iterative Querying", "", "University of Southern California", "University of Southern California", "University of Southern California, Watt Way, Saint James Park, LA, Los Angeles County, California, 90089, USA", "34.02241490", "-118.28634407", "edu", "", "2017"], ["Game of Sketches: Deep Recurrent Models of Pictionary-Style Word Guessing", "", "Member", "Member", "1322 N Inglewood Ave, Coffeyville, KS 67337, USA", "37.05826350", "-95.67914910", "edu", "", "2018"], ["VizWiz Grand Challenge: Answering Visual Questions from Blind People", "", "University of Texas at Austin", "University of Texas at Austin", "University of Texas at Austin, 1, East 23rd Street, The Drag, Austin, Travis County, Texas, 78712, USA", "30.28415100", "-97.73195598", "edu", "", "2018"], ["A Reinforcement Learning Framework for Natural Question Generation using Bi-discriminators", "", "Fudan University", "Fudan University", "\u590d\u65e6\u5927\u5b66, 220, \u90af\u90f8\u8def, \u4e94\u89d2\u573a\u8857\u9053, \u6768\u6d66\u533a, \u4e0a\u6d77\u5e02, 200433, \u4e2d\u56fd", "31.30104395", "121.50045497", "edu", "", "2018"], ["Leveraging Video Descriptions to Learn Video Question Answering", "", "Stanford University", "Stanford University", "Stanford University, Memorial Way, Stanford, Santa Clara County, California, 94305-6015, USA", "37.43131385", "-122.16936535", "edu", "", "2017"], ["Co-Attending Free-Form Regions and Detections With Multi-Modal Multiplicative Feature Embedding for Visual Question Answering", "", "East China Normal University", "East China Normal University", "\u534e\u4e1c\u5e08\u8303\u5927\u5b66, 3663, \u4e2d\u5c71\u5317\u8def, \u66f9\u5bb6\u6e21, \u666e\u9640\u533a, \u666e\u9640\u533a (Putuo), \u4e0a\u6d77\u5e02, 200062, \u4e2d\u56fd", "31.22849230", "121.40211389", "edu", "", "2018"], ["Visual Question Answering with Question Representation Update (QRU)", "", "Chinese University of Hong Kong", "The Chinese University of Hong Kong", "\u4e2d\u5927 CUHK, NA\u68af New Asia Stairs, \u99ac\u6599\u6c34 Ma Liu Shui, \u4e5d\u809a\u6751 Kau To Village, \u6c99\u7530\u5340 Sha Tin District, \u65b0\u754c New Territories, HK, DD193 1191, \u4e2d\u56fd", "22.42031295", "114.20788644", "edu", "", "2016"], ["Neural Networks and Deep Learning", "IBM T. J. Watson Research Center, International Business Machines, Yorktown Heights, USA", "IBM Thomas J. Watson Research Center", "IBM Thomas J. Watson Research Center", "IBM Yorktown research lab, Adams Road, Millwood, Town of New Castle, Westchester County, New York, 10562, USA", "41.21002475", "-73.80407056", "company", "", "2018"], ["Stacked Attention Networks for Image Question Answering", "", "Carnegie Mellon University", "Carnegie Mellon University", "Carnegie Mellon University Silicon Valley, South Akron Road, ARC, Santa Clara County, California, 94035-0016, USA", "37.41021930", "-122.05965487", "edu", "", "2016"], ["Zero-Shot Transfer VQA Dataset", "", "Baidu Research, USA", "Baidu Research, USA", "1195 Bordeaux Dr, Sunnyvale, CA 94089, USA", "37.40922650", "-122.02366150", "company", "", "2018"], ["VQS: Linking Segmentations to Questions and Answers for Supervised Attention in VQA and Question-Focused Semantic Segmentation", "", "University of Central Florida", "University of Central Florida", "University of Central Florida, Libra Drive, University Park, Orange County, Florida, 32816, USA", "28.59899755", "-81.19712501", "edu", "", "2017"], ["Joint Image Captioning and Question Answering", "", "University of Texas at Austin", "University of Texas at Austin", "University of Texas at Austin, 1, East 23rd Street, The Drag, Austin, Travis County, Texas, 78712, USA", "30.28415100", "-97.73195598", "edu", "", "2018"], ["Video Fill in the Blank with Merging LSTMs", "", "University of Central Florida", "University of Central Florida", "University of Central Florida, Libra Drive, University Park, Orange County, Florida, 32816, USA", "28.59899755", "-81.19712501", "edu", "", "2016"], ["Where to Look: Focus Regions for Visual Question Answering", "", "University of Illinois, Urbana-Champaign", "University of Illinois, Urbana-Champaign", "B-3, South Mathews Avenue, Urbana, Champaign County, Illinois, 61801, USA", "40.11116745", "-88.22587665", "edu", "", "2016"], ["Combining Multiple Cues for Visual Madlibs Question Answering", "University of North Carolina at Chapel Hill, Chapel Hill, USA", "University of North Carolina at Chapel Hill", "University of North Carolina at Chapel Hill", "University of North Carolina at Chapel Hill, East Cameron Avenue, Chapel Hill, Orange County, North Carolina, 27514, USA", "35.91139710", "-79.05045290", "edu", "", "2018"], ["Visual7W: Grounded Question Answering in Images", "", "Stanford University", "Stanford University", "Stanford University, Memorial Way, Stanford, Santa Clara County, California, 94305-6015, USA", "37.43131385", "-122.16936535", "edu", "", "2016"], ["Mention Recommendation for Multimodal Microblog with Cross-attention Memory Network", "Shandong University, Jinan, China", "Shandong University", "Shandong University", "\u5c71\u4e1c\u5927\u5b66, \u6cf0\u5b89\u8857, \u9ccc\u5c71\u536b\u8857\u9053, \u5373\u58a8\u533a, \u9752\u5c9b\u5e02, \u5c71\u4e1c\u7701, 266200, \u4e2d\u56fd", "36.36934730", "120.67381800", "edu", "", "2018"], ["DVQA: Understanding Data Visualizations via Question Answering", "", "Rochester Institute of Technology", "Rochester Institute of Technology", "Rochester Institute of Technology (RIT), 1, Lomb Memorial Drive, Bailey, Henrietta Town, Monroe County, New York, 14623, USA", "43.08250655", "-77.67121663", "edu", "", "2018"], ["Data Augmentation for Visual Question Answering", "", "Rochester Institute of Technology", "Rochester Institute of Technology", "Rochester Institute of Technology (RIT), 1, Lomb Memorial Drive, Bailey, Henrietta Town, Monroe County, New York, 14623, USA", "43.08250655", "-77.67121663", "edu", "", "2017"], ["Natural Language Video Description using Deep Recurrent Neural Networks", "", "University of Texas at Austin", "University of Texas at Austin", "University of Texas at Austin, 1, East 23rd Street, The Drag, Austin, Travis County, Texas, 78712, USA", "30.28415100", "-97.73195598", "edu", "", "2015"], ["Generative Adversarial Text to Image Synthesis", "", "Max Planck Institute for Informatics", "Max Planck Institute for Informatics", "MPII, E1 4, Campus, Universit\u00e4t, Sankt Johann, Bezirk Mitte, Saarbr\u00fccken, Regionalverband Saarbr\u00fccken, Saarland, 66123, Deutschland", "49.25795660", "7.04577417", "edu", "", "2016"], ["Creativity: Generating Diverse Questions Using Variational Autoencoders", "", "Northwestern University", "Northwestern University", "Northwestern University, Northwestern Place, Downtown, Evanston, Cook County, Illinois, 60208, USA", "42.05511640", "-87.67581113", "edu", "", "2017"], ["Pay Attention to Those Sets! Learning Quantification from Images", "", "University of Barcelona", "University of Barcelona", "Universitat de Barcelona, Carrer de la Diputaci\u00f3, l'Antiga Esquerra de l'Eixample, Eixample, Barcelona, BCN, CAT, 08013, Espa\u00f1a", "41.38689130", "2.16352385", "edu", "", "2017"], ["Deep Multimodal Learning: A Survey on Recent Advances and Trends", "University of Guelph, Guelph, Ontario, Canada", "University of Guelph", "University of Guelph, Guelph, Ontario, Canada", "University of Guelph, Guelph, ON N1G 1Y4, Canada", "43.52937320", "-80.22525020", "edu", "", "2017"], ["Textually Enriched Neural Module Networks for Visual Question Answering", "", "Carnegie Mellon University", "Carnegie Mellon University", "Carnegie Mellon University Silicon Valley, South Akron Road, ARC, Santa Clara County, California, 94035-0016, USA", "37.41021930", "-122.05965487", "edu", "", "2018"], ["Being Negative but Constructively: Lessons Learnt from Creating Better Visual Question Answering Datasets", "", "University of Southern California", "University of Southern California", "University of Southern California, Watt Way, Saint James Park, LA, Los Angeles County, California, 90089, USA", "34.02241490", "-118.28634407", "edu", "", "2018"], ["Visual Dialog", "", "Georgia Institute of Technology", "Georgia Institute of Technology", "Georgia Tech, Atlantic Drive Northwest, Bellwood, Rockdale, Atlanta, Fulton County, Georgia, 30318, USA", "33.77603300", "-84.39884086", "edu", "", "2017"], ["Explore Multi-Step Reasoning in Video Question Answering", "Tianjin University, Tianjin, China", "Tianjin University", "Tianjin University", "\u6cf0\u5c71\u822a\u7a7a\u6e2f/\u5929\u6d25\u5927\u53a6, \u67a3\u884c\u8def, \u67a3\u884c \u9ad8\u738b\u5bfa, \u957f\u57ce\u8def, \u5927\u6cb3, \u5cb1\u5cb3\u533a (Daiyue), \u6cf0\u5b89\u5e02, \u5c71\u4e1c\u7701, 271000, \u4e2d\u56fd", "36.20304395", "117.05842113", "edu", "", "2018"], ["Multimodal Differential Network for Visual Question Generation", "", "Indian Institute of Technology Delhi", "IIIT-Delhi, India", "IIIT-Delhi, Mathura Road, Friends Colony, South East Delhi, Delhi, 110020, India", "28.54632595", "77.27325504", "edu", "", "2018"], ["Motion-Appearance Co-Memory Networks for Video Question Answering", "", "University of Southern California", "University of Southern California", "University of Southern California, Watt Way, Saint James Park, LA, Los Angeles County, California, 90089, USA", "34.02241490", "-118.28634407", "edu", "", "2018"], ["Dual Attention Network for Visual Question Answering", "", "Boston University", "Boston University", "BU, Bay State Road, Fenway, Boston, Suffolk County, Massachusetts, 02215, USA", "42.35042530", "-71.10056114", "edu", "", "2017"], ["Fast Parameter Adaptation for Few-shot Image Captioning and Visual Question Answering", "Zhejiang University, Zhejiang, China", "Zhejiang University", "Zhejiang University", "\u6d59\u6c5f\u5927\u5b66\u4e4b\u6c5f\u6821\u533a, \u4e4b\u6c5f\u8def, \u8f6c\u5858\u8857\u9053, \u897f\u6e56\u533a (Xihu), \u676d\u5dde\u5e02 Hangzhou, \u6d59\u6c5f\u7701, 310008, \u4e2d\u56fd", "30.19331415", "120.11930822", "edu", "", "2018"], ["Solving Visual Madlibs with Multiple Cues", "", "University of Illinois, Urbana-Champaign", "University of Illinois, Urbana-Champaign", "B-3, South Mathews Avenue, Urbana, Champaign County, Illinois, 61801, USA", "40.11116745", "-88.22587665", "edu", "", "2016"], ["Bayesian Sparsification of Gated Recurrent Neural Networks", "", "Russia", "Russia", "Russia", "61.52401000", "105.31875600", "edu", "", "2018"], ["Automatic Generation of Grounded Visual Questions", "", "Tianjin University", "Tianjin University", "\u6cf0\u5c71\u822a\u7a7a\u6e2f/\u5929\u6d25\u5927\u53a6, \u67a3\u884c\u8def, \u67a3\u884c \u9ad8\u738b\u5bfa, \u957f\u57ce\u8def, \u5927\u6cb3, \u5cb1\u5cb3\u533a (Daiyue), \u6cf0\u5b89\u5e02, \u5c71\u4e1c\u7701, 271000, \u4e2d\u56fd", "36.20304395", "117.05842113", "edu", "", "2017"], ["Visual Storytelling", "", "Carnegie Mellon University", "Carnegie Mellon University", "Carnegie Mellon University Silicon Valley, South Akron Road, ARC, Santa Clara County, California, 94035-0016, USA", "37.41021930", "-122.05965487", "edu", "", "2016"], ["Learning Cooperative Visual Dialog Agents with Deep Reinforcement Learning", "", "Georgia Institute of Technology", "Georgia Institute of Technology", "Georgia Tech, Atlantic Drive Northwest, Bellwood, Rockdale, Atlanta, Fulton County, Georgia, 30318, USA", "33.77603300", "-84.39884086", "edu", "", "2017"], ["Unifying the Video and Question Attentions for Open-Ended Video Question Answering", "State Key Laboratory of CAD&CG, Zhejiang University, Hangzhou, China", "Zhejiang University", "Zhejiang University", "\u6d59\u6c5f\u5927\u5b66\u4e4b\u6c5f\u6821\u533a, \u4e4b\u6c5f\u8def, \u8f6c\u5858\u8857\u9053, \u897f\u6e56\u533a (Xihu), \u676d\u5dde\u5e02 Hangzhou, \u6d59\u6c5f\u7701, 310008, \u4e2d\u56fd", "30.19331415", "120.11930822", "edu", "", "2017"], ["PinterNet: A thematic label curation tool for large image datasets", "", "Northwestern University", "Northwestern University", "Northwestern University, Northwestern Place, Downtown, Evanston, Cook County, Illinois, 60208, USA", "42.05511640", "-87.67581113", "edu", "", "2016"], ["Visual Question Answering Dataset for Bilingual Image Understanding: A Study of Cross-Lingual Transfer Using Attention Maps", "", "Tokyo Institute of Technology", "Tokyo Institute of Technology", "\u6771\u4eac\u5de5\u696d\u5927\u5b66, \u539a\u6728\u8857\u9053, \u7dd1\u533a, \u753a\u7530\u5e02, \u795e\u5948\u5ddd\u770c, \u95a2\u6771\u5730\u65b9, 226-0026, \u65e5\u672c", "35.51675380", "139.48342251", "edu", "", "2018"], ["Greedy Inference Algorithms for Structured and Neural Models", "", "Virginia Tech", "Virginia Polytechnic Institute and State University", "Virginia Polytechnic Institute and State University, Duck Pond Drive, Blacksburg, Montgomery County, Virginia, 24061-9517, USA", "37.21872455", "-80.42542519", "edu", "", "2018"], ["JUST at VQA-Med: A VGG-Seq2Seq Model", "", "Jordan University of Science and Technology", "Jordan University of Science and Technology", "Jordan University of Science and Technology, \u0634\u0627\u0631\u0639 \u0627\u0644\u0623\u0631\u062f\u0646, \u0625\u0631\u0628\u062f\u200e, \u0625\u0631\u0628\u062f, \u0627\u0644\u0623\u0631\u062f\u0646", "32.49566485", "35.99160717", "edu", "", "2018"], ["Two-Stage Synthesis Networks for Transfer Learning in Machine Comprehension", "", "Microsoft", "Microsoft", "7021 S Memorial Dr, Tulsa, OK 74133, USA", "36.06303740", "-95.88099660", "edu", "", "2017"], ["From VQA to Multimodal CQA: Adapting Visual QA Models for Community QA Tasks", "", "Waseda University", "Waseda University", "\u65e9\u7a32\u7530\u5927\u5b66 \u5317\u4e5d\u5dde\u30ad\u30e3\u30f3\u30d1\u30b9, 2-2, \u6709\u6bdb\u5f15\u91ce\u7dda, \u516b\u5e61\u897f\u533a, \u5317\u4e5d\u5dde\u5e02, \u798f\u5ca1\u770c, \u4e5d\u5dde\u5730\u65b9, 808-0135, \u65e5\u672c", "33.88987280", "130.70856205", "edu", "", "2018"], ["A Dataset and Exploration of Models for Understanding Video Data through Fill-in-the-Blank Question-Answering", "", "Polytechnique Montreal", "Polytechnique Montr\u00b4eal", "2900 Boulevard Edouard-Montpetit, Montr\u00e9al, QC H3T 1J4, Canada", "45.50438400", "-73.61288290", "edu", "Polytechnique Montreal, Montreal, Quebec, Canada", "2017"], ["CLEVR: A Diagnostic Dataset for Compositional Language and Elementary Visual Reasoning", "", "Stanford University", "Stanford University", "Stanford University, Memorial Way, Stanford, Santa Clara County, California, 94305-6015, USA", "37.43131385", "-122.16936535", "edu", "", "2017"], ["Jointly Discovering Visual Objects and Spoken Words from Raw Sensory Input", "", "MIT", "Massachusetts Institute", "MIT, Amherst Street, Cambridgeport, Cambridge, Middlesex County, Massachusetts, 02238, USA", "42.35839610", "-71.09567788", "edu", "", "2018"], ["Structured Triplet Learning with POS-Tag Guided Attention for Visual Question Answering", "", "Microsoft", "Microsoft", "7021 S Memorial Dr, Tulsa, OK 74133, USA", "36.06303740", "-95.88099660", "edu", "", "2018"], ["Anchors: High-Precision Model-Agnostic Explanations", "", "University of Washington", "University of Washington", "University of Washington, Rainier Vista, Montlake, University District, Seattle, King County, Washington, 98195, USA", "47.65432380", "-122.30800894", "edu", "", "2018"], ["A Sequential Neural Encoder With Latent Structured Description for Modeling Sentences", "", "Member", "Member", "1322 N Inglewood Ave, Coffeyville, KS 67337, USA", "37.05826350", "-95.67914910", "edu", "", "2018"], ["Adversarial Geometry-Aware Human Motion Prediction", "", "Carnegie Mellon University", "Carnegie Mellon University", "Carnegie Mellon University Silicon Valley, South Akron Road, ARC, Santa Clara County, California, 94035-0016, USA", "37.41021930", "-122.05965487", "edu", "", "2018"], ["End-to-End Instance Segmentation and Counting with Recurrent Attention", "", "University of Toronto", "University of Toronto", "University of Toronto, St. George Street, Bloor Street Culture Corridor, Old Toronto, Toronto, Ontario, M5S 1A5, Canada", "43.66333345", "-79.39769975", "edu", "", "2016"], ["Neural Self Talk: Image Understanding via Continuous Questioning and Answering", "", "University of Maryland", "University of Maryland", "The Grand Garage, 5, North Paca Street, Seton Hill, Baltimore, Maryland, 21201, USA", "39.28996850", "-76.62196103", "edu", "", "2015"], ["Adversarial Learning of Answer-Related Representation for Visual Question Answering", "Beihang University, Beijing, China", "Beihang University", "Beihang University", "\u5317\u4eac\u822a\u7a7a\u822a\u5929\u5927\u5b66, 37, \u5b66\u9662\u8def, \u4e94\u9053\u53e3, \u540e\u516b\u5bb6, \u6d77\u6dc0\u533a, 100083, \u4e2d\u56fd", "39.98083330", "116.34101249", "edu", "", "2018"], ["Categorizing Concepts with Basic Level for Vision-to-Language", "", "Tongji University", "Tongji University", "\u540c\u6d4e\u5927\u5b66, 1239, \u56db\u5e73\u8def, \u6c5f\u6e7e, \u8679\u53e3\u533a, \u4e0a\u6d77\u5e02, 200092, \u4e2d\u56fd", "31.28473925", "121.49694909", "edu", "", ""], ["Visual Text Correction", "", "University of Central Florida", "University of Central Florida", "University of Central Florida, Libra Drive, University Park, Orange County, Florida, 32816, USA", "28.59899755", "-81.19712501", "edu", "", "2018"], ["Comparatives, Quantifiers, Proportions: A Multi-Task Model for the Learning of Quantities from Vision", "", "University of Trento", "University of Trento", "University of Trento, Via Giuseppe Verdi, Piedicastello, Trento, Territorio Val d'Adige, TN, TAA, 38122, Italia", "46.06588360", "11.11598940", "edu", "", "2018"], ["Computer Vision and Natural Language Processing: Recent Approaches in Multimedia and Robotics", "", "University of Maryland", "University of Maryland", "The Grand Garage, 5, North Paca Street, Seton Hill, Baltimore, Maryland, 21201, USA", "39.28996850", "-76.62196103", "edu", "", "2016"], ["The Color of the Cat is Gray: 1 Million Full-Sentences Visual Question Answering (FSVQA).", "", "University of Tokyo", "University of Tokyo", "\u6771\u4eac\u5927\u5b66 \u67cf\u30ad\u30e3\u30f3\u30d1\u30b9, \u5b66\u878d\u5408\u306e\u9053, \u67cf\u5e02, \u5343\u8449\u770c, \u95a2\u6771\u5730\u65b9, 277-8583, \u65e5\u672c", "35.90204480", "139.93622009", "edu", "", "2016"], ["Multi-Networks Joint Learning for Large-Scale Cross-Modal Retrieval", "University of Texas at San Antonio, San Antonio, TX, USA", "University of Texas at San Antonio", "University of Texas at San Antonio", "UTSA, Paseo Principal, San Antonio, Bexar County, Texas, 78249-1620, USA", "29.58333105", "-98.61944505", "edu", "", "2017"], ["Small Sample Learning in Big Data Era", "", "China", "China", "China", "35.86166000", "104.19539700", "edu", "", "2018"]]} \ No newline at end of file