summaryrefslogtreecommitdiff
path: root/site/datasets/unknown/coco.json
diff options
context:
space:
mode:
authorjules@lens <julescarbon@gmail.com>2019-03-28 17:31:11 +0100
committerjules@lens <julescarbon@gmail.com>2019-03-28 17:31:11 +0100
commit5309b381e64f59b8f57014ad41e55d7f87ca0628 (patch)
tree8747e32ccb265c4996aa84b80032019553e0b16f /site/datasets/unknown/coco.json
parent70bdd4f19b2b03b3e2d5423b2ea9b23be6d067d7 (diff)
whew
Diffstat (limited to 'site/datasets/unknown/coco.json')
-rw-r--r--site/datasets/unknown/coco.json2
1 files changed, 1 insertions, 1 deletions
diff --git a/site/datasets/unknown/coco.json b/site/datasets/unknown/coco.json
index cfa7df7f..c7822fbc 100644
--- a/site/datasets/unknown/coco.json
+++ b/site/datasets/unknown/coco.json
@@ -1 +1 @@
-{"id": "5e0f8c355a37a5a89351c02f174e7a5ddcb98683", "citations": [{"id": "08f6b52317b34b60aa65f38b83e3d72deffa0473", "title": "Sheffield MultiMT: Using Object Posterior Predictions for Multimodal Machine Translation", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/bae0/9864ea2c05bccf275cf824580ce212111e42.pdf"]}, {"id": "ce9799830a24412f4bd9ad30a9d6e2a50215f8f8", "title": "Beef Cattle Instance Segmentation Using Fully Convolutional Neural Network", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.01972.pdf"]}, {"id": "369c4a308ec9e56746f7cc1b164208b917e31a22", "title": "Scene Classification in Indoor Environments for Robots using Context Based Word Embeddings", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/369c/4a308ec9e56746f7cc1b164208b917e31a22.pdf"]}, {"id": "35ebe95db7ab148e25904604d3b06a9412f6b4a4", "title": "Illustrative Language Understanding: Large-Scale Visual Grounding with Image Search", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/80d9/993e6abd22a58a4fedd63754a15085778367.pdf"]}, {"id": "d4901683e2c2552fc2d62d4eb3b1f5d5fa60a5ff", "title": "ScaleNet: Scale Invariant Network for Semantic Segmentation in Urban Driving Scenes", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/d490/1683e2c2552fc2d62d4eb3b1f5d5fa60a5ff.pdf"]}, {"id": "254f7ef73629c18ff9ba13af59b2d78df3fda59d", "title": "Deep Object-Centric Representations for Generalizable Robot Learning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1708.04225.pdf"]}, {"id": "0f08d62e882026ac83ebf26c0bd288c553873814", "title": "Multispecies Fruit Flower Detection Using a Refined Semantic Segmentation Network", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.10080.pdf"]}, {"id": "89fff8387432878db240a044a98ff9c9200f3197", "title": "Learning Globally Optimized Object Detector via Policy Gradient", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/papers/Rao_Learning_Globally_Optimized_CVPR_2018_paper.pdf"]}, {"id": "b955969e1077ca328018c9e4dcf27b87ed9f5076", "title": "Knowing When to Look: Adaptive Attention via a Visual Sentinel for Image Captioning", "year": "2017", "pdf": ["https://arxiv.org/pdf/1612.01887.pdf"]}, {"id": "f56edb6f2bf4f5bc9d54284289212b8d4a437c1b", "title": "Detection and Localization of Texture-less Objects with Deep Neural Networks", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/f56e/db6f2bf4f5bc9d54284289212b8d4a437c1b.pdf"]}, {"id": "a345fc597b15c26f3f2823ccd5aac0d4c976279e", "title": "Adversarially Parameterized Optimization for 3D Human Pose Estimation", "year": "2017", "pdf": []}, {"id": "3d1e82b69663758a1db87fbebed6525d23090146", "title": "ScribbleSup: Scribble-Supervised Convolutional Networks for Semantic Segmentation", "year": "2016", "pdf": []}, {"id": "7c2a6b27dd756fe851954e0c040bc87bbd39f098", "title": "Using Psychophysical Methods to Understand Mechanisms of Face Identification in a Deep Neural Network", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018_workshops/papers/w39/Xu_Using_Psychophysical_Methods_CVPR_2018_paper.pdf"]}, {"id": "9595a267de2b0ecf7e4e2962a606c8854551e203", "title": "On the Relation between Color Image Denoising and Classification", "year": "2017", "pdf": ["https://arxiv.org/pdf/1704.01372.pdf"]}, {"id": "94a76e349e43f09c863a9c77e47722c5ade3740e", "title": "WELDON: Weakly Supervised Learning of Deep Convolutional Neural Networks", "year": "2016", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2016/papers/Durand_WELDON_Weakly_Supervised_CVPR_2016_paper.pdf", "http://openaccess.thecvf.com/content_cvpr_2016/supplemental/Durand_WELDON_Weakly_Supervised_2016_CVPR_supplemental.pdf", "http://webia.lip6.fr/~thomen/papers/Durand_CVPR_2016.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Durand_WELDON_Weakly_Supervised_CVPR_2016_paper.pdf"]}, {"id": "f5f3faa71ed2b61fa3a99bde25d6e84bfbefbb0b", "title": "Automatic Fish Classification System Using Deep Learning", "year": "2017", "pdf": []}, {"id": "3832a6d6b1f78cdadee6968d51c1c7c2922ab3cd", "title": "ISIA at the ImageCLEF 2017 Image Caption Task", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/3832/a6d6b1f78cdadee6968d51c1c7c2922ab3cd.pdf"]}, {"id": "6abdd7c4dea2d5d7ff82cd5e6e503be1589861df", "title": "Detection and Recognition of U.S. Warning Signs on Curves", "year": "2017", "pdf": []}, {"id": "a49b309ba14ad00a3bcb7b99a45d5bcf9bbc0ab1", "title": "Guided Perturbations: Self-Corrective Behavior in Convolutional Neural Networks", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_ICCV_2017/papers/Sankaranarayanan_Guided_Perturbations_Self-Corrective_ICCV_2017_paper.pdf", "https://arxiv.org/pdf/1703.07928v1.pdf"]}, {"id": "104dd4963f7f0ef03fe09d505d31966666f9281d", "title": "Salient Object Subitizing", "year": "2015", "pdf": ["https://arxiv.org/pdf/1607.07525.pdf"]}, {"id": "041755d1c14077ce18d8553aa40a415283edc825", "title": "W2F: A Weakly-Supervised to Fully-Supervised Framework for Object Detection", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/papers/Zhang_W2F_A_Weakly-Supervised_CVPR_2018_paper.pdf", "https://ivul.kaust.edu.sa/Documents/Publications/2018/W2F%20A%20Weakly-Supervised%20to%20Fully-Supervised%20Framework.pdf"]}, {"id": "ce8c8e9fdbdd84adc096018bb0edb49b6913b946", "title": "Learning Discriminative Features for Speaker Identification and Verification", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/ce8c/8e9fdbdd84adc096018bb0edb49b6913b946.pdf"]}, {"id": "110556d073a4d930877edc597a92995f0ff9d294", "title": "Application of Faster R-CNN model on Human Running Pattern Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.05147.pdf"]}, {"id": "549c719c4429812dff4d02753d2db11dd490b2ae", "title": "YouTube-BoundingBoxes: A Large High-Precision Human-Annotated Data Set for Object Detection in Video", "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.00824.pdf"]}, {"id": "1295cbaf3b03de2eb8c79530289f5939d7819e5c", "title": "DeepFix: A Fully Convolutional Neural Network for Predicting Human Eye Fixations", "year": "2017", "pdf": ["https://arxiv.org/pdf/1510.02927.pdf"]}, {"id": "c32b09f20badd9ce04309d7c5ebea88336a3345a", "title": "Token-level and sequence-level loss smoothing for RNN language models", "year": "2018", "pdf": ["https://arxiv.org/pdf/1805.05062.pdf"]}, {"id": "4cfd15e9d3c01028bcda22e68791a95aa54c2a7c", "title": "DeepLesion: Automated Deep Mining, Categorization and Detection of Significant Radiology Image Findings using Large-Scale Clinical Lesion Annotations", "year": "2017", "pdf": ["https://arxiv.org/pdf/1710.01766.pdf"]}, {"id": "25b9ef5c78dbf17c71e6fd94054dd55d66c39264", "title": "Multimedia Semantic Integrity Assessment Using Joint Embedding Of Images And Text", "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.01606.pdf"]}, {"id": "0b2c543e0c47454c4512569175094e6cb6ae02a9", "title": "The VizWiz Grand Challenge : A Large Visual Question Answering Dataset from Blind People Anonymous CVPR submission", "year": "", "pdf": ["https://pdfs.semanticscholar.org/0b2c/543e0c47454c4512569175094e6cb6ae02a9.pdf"]}, {"id": "efb01e07ee994fec890ad83b89c9b652349da50e", "title": "What am I searching for", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.11926.pdf"]}, {"id": "1c9333bcf523388d75f852e0689b0e7f5a04faa4", "title": "Person Part Segmentation based on Weak Supervision", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/1c93/33bcf523388d75f852e0689b0e7f5a04faa4.pdf"]}, {"id": "325093f2c5b33d7507c10aa422e96aa5b10a33f1", "title": "In-place Activated BatchNorm for Memory-Optimized Training of DNNs", "year": "2018", "pdf": []}, {"id": "c399c0089fb134d1476fadf5f0426e0e8b70eebd", "title": "The Lov\u00e1sz Hinge: A Novel Convex Surrogate for Submodular Losses.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1512.07797.pdf"]}, {"id": "c98e5a582e9f1b11bf5244ed16c78a7cbccf36b9", "title": "Towards Semantic SLAM: Points, Planes and Objects", "year": "2018", "pdf": []}, {"id": "b0d52bb1c9cff9416fe766e9cba94ceeab12d51f", "title": "Object detection using deep CNNs trained on synthetic images", "year": "2017", "pdf": ["https://arxiv.org/pdf/1706.06782.pdf"]}, {"id": "1d21ec277d2a72e506a9665f19ec5478dbedc20f", "title": "A Semi-Automatic Annotation Technology for Traffic Scene Image Labeling Based on Deep Learning Preprocessing", "year": "2017", "pdf": []}, {"id": "18f9a6045ba01cb079c4fa49a630d71bbd27cd92", "title": "A dataset of clinically generated visual questions and answers about radiology images", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/18f9/a6045ba01cb079c4fa49a630d71bbd27cd92.pdf"]}, {"id": "619701bb7d73171d6311c9e14b8ac5919692d03e", "title": "Semantic Edge Detection with Diverse Deep Supervision", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.02864.pdf"]}, {"id": "68f6b329044b01cce1660e953875ece2c3778c68", "title": "Intelligent Surveillance as an Edge Network Service: from Harr-Cascade, SVM to a Lightweight CNN", "year": "2018", "pdf": []}, {"id": "d983ce0a7d249bb85d14a66a89225a7136cc783b", "title": "A Single-shot Object Detector with Feature Aggragation and Enhancement", "year": "2019", "pdf": ["https://arxiv.org/pdf/1902.02923.pdf"]}, {"id": "22aa426aeffb77339646cc03da8e94de22396efc", "title": "S HAKES HAKE REGULARIZATION OF 3-BRANCH RESIDUAL NETWORKS", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/22aa/426aeffb77339646cc03da8e94de22396efc.pdf"]}, {"id": "19f73120fa314666c841ea88cb2b627aa67251b4", "title": "Evaluating Merging Strategies for Sampling-based Uncertainty Techniques in Object Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.06006.pdf"]}, {"id": "d31d4bb58f5dd67016e77352ac7600e2ba71e38f", "title": "Deep Learning Object Detection Methods for Ecological Camera Trap Data", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.10842.pdf"]}, {"id": "3fea412361b2d14cb3c6723968b421c1c8cb38e8", "title": "Shake-Shake regularization", "year": "2017", "pdf": ["https://arxiv.org/pdf/1705.07485.pdf"]}, {"id": "96fdc0131dc80ffa6d7b9c526e07f080414c54ec", "title": "1 Paying More A ention to Saliency : Image Captioning with Saliency and Context A ention", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/96fd/c0131dc80ffa6d7b9c526e07f080414c54ec.pdf"]}, {"id": "332339c32d41cc8176d360082b4d9faa90dadffa", "title": "UberNet: Training a Universal Convolutional Neural Network for Low-, Mid-, and High-Level Vision Using Diverse Datasets and Limited Memory", "year": "2017", "pdf": ["https://arxiv.org/pdf/1609.02132.pdf"]}, {"id": "76b61ccb488ad7861e9e36054f96195877f4c9f6", "title": "Automatic image annotation: the quirks and what works", "year": "2018", "pdf": []}, {"id": "193a69489230de1013dff9af1232e5379cc5282f", "title": "Intelligent Multimodal Framework for Human Assistive Robotics Based on Computer Vision Algorithms", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/193a/69489230de1013dff9af1232e5379cc5282f.pdf"]}, {"id": "05e9e85b5137016c93d042170e82f77bb551a108", "title": "A Benchmark Dataset and Evaluation Methodology for Video Object Segmentation", "year": "2016", "pdf": ["http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Perazzi_A_Benchmark_Dataset_CVPR_2016_paper.pdf", "https://graphics.ethz.ch/~perazzif/davis/files/davis.pdf", "https://graphics.ethz.ch/~perazzif/davis/files/davis_poster_cvpr_2016.pdf", "https://graphics.ethz.ch/~perazzif/davis/files/davis_supplementary.pdf"]}, {"id": "8899094797e82c5c185a0893896320ef77f60e64", "title": "Non-local Neural Networks", "year": "2018", "pdf": []}, {"id": "12660f0defc6580e566c0fa2ac909971d6c6883b", "title": "The SYNTHIA Dataset: A Large Collection of Synthetic Images for Semantic Segmentation of Urban Scenes", "year": "2016", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2016/papers/Ros_The_SYNTHIA_Dataset_CVPR_2016_paper.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Ros_The_SYNTHIA_Dataset_CVPR_2016_paper.pdf"]}, {"id": "bf881e53510b230879aa0d3b02576043b8f881e7", "title": "Automated Image Captioning Using Nearest-Neighbors Approach Driven by Top-Object Detections", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/bf88/1e53510b230879aa0d3b02576043b8f881e7.pdf"]}, {"id": "10beef48f3a9b2c4a6c863edefe2cac8bf09b50c", "title": "Fine-Level Semantic Labeling of Large-Scale 3D Model by Active Learning", "year": "2018", "pdf": []}, {"id": "761de31c575bf30162b6e0d92a1800eb406e96b5", "title": "A Flexible Convolutional Solver with Application to Photorealistic Style Transfer", "year": "2018", "pdf": ["https://arxiv.org/pdf/1806.05285.pdf"]}, {"id": "96d1bf51b3aa118696ce18dc45924e5ca8b9d885", "title": "Insulator Detection in Aerial Images for Transmission Line Inspection Using Single Shot Multibox Detector", "year": "2019", "pdf": []}, {"id": "10c4b2489d7e1ee43a1d19724d3c1e9c33ca3f29", "title": "A Question-Answering framework for plots using Deep learning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1806.04655.pdf"]}, {"id": "dbc359e95d999bf119353f51edcb53b2beb5b3fe", "title": "Object Detection in Real-Time Systems: Going Beyond Precision", "year": "2018", "pdf": ["https://www.iiitd.edu.in/~chetan/papers/obj-det-wacv18.pdf"]}, {"id": "de1505819e145b5c22a6e09002510413019f7228", "title": "DeepFood: Deep Learning-Based Food Image Recognition for Computer-Aided Dietary Assessment", "year": "2016", "pdf": ["https://arxiv.org/pdf/1606.05675.pdf"]}, {"id": "637648198f9e91654ce27eaaa40512f2dc870fc1", "title": "Survey of Visual Question Answering: Datasets and Techniques", "year": "2017", "pdf": ["https://arxiv.org/pdf/1705.03865.pdf"]}, {"id": "0b888196dda951287dddb60bd44798aab16d6fca", "title": "Learning Common Sense through Visual Abstraction", "year": "2015", "pdf": ["http://openaccess.thecvf.com/content_iccv_2015/papers/Vedantam_Learning_Common_Sense_ICCV_2015_paper.pdf", "http://oui.csail.mit.edu/camera_readys/21.pdf", "http://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Vedantam_Learning_Common_Sense_ICCV_2015_paper.pdf", "https://filebox.ece.vt.edu/~parikh/Publications/ICCV2015_commonsense.pdf", "https://www.cc.gatech.edu/~parikh/Publications/ICCV2015_commonsense.pdf"]}, {"id": "bb127015474fdc51d4cd6b4dda7176a8c778ea49", "title": "Examining the Impact of Blur on Recognition by Convolutional Networks.", "year": "2016", "pdf": ["https://arxiv.org/pdf/1611.05760.pdf"]}, {"id": "85cace98e07e190494a5d88eba5e5b11538dc3b1", "title": "Hierarchical Semantic Mapping Using Convolutional Neural Networks for Intelligent Service Robotics", "year": "2018", "pdf": []}, {"id": "2dd853b617c176810e3dda008f7cacea6473f0ae", "title": "Image captioning using deep neural architectures", "year": "2017", "pdf": ["https://arxiv.org/pdf/1801.05568.pdf"]}, {"id": "25894be540936562953f37fbbcff69e5ac17a494", "title": "Semantic Image Retrieval via Active Grounding of Visual Situations", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.00088.pdf"]}, {"id": "28949c94c8457e570ba65c8382c993600f8404e7", "title": "Efficient Video Object Segmentation via Network Modulation", "year": "2018", "pdf": []}, {"id": "ba60b642a558858325f50d38a345b6bb85114ce1", "title": "Imbalanced Deep Learning by Minority Class Incremental Rectification", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.10851.pdf"]}, {"id": "a4aa145b519a9515e1162ca017d80d460002ba5a", "title": "HandyNet: A One-stop Solution to Detect, Segment, Localize & Analyze Driver Hands", "year": "2018", "pdf": []}, {"id": "442b6114ae8316c95f59acabe6de26f2b569cc02", "title": "Edit me: A Corpus and a Framework for Understanding Natural Language Image Editing", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/1205/8e0de3dadc1469035ddb3c82e1b7ba722960.pdf"]}, {"id": "729a9d35bc291cc7117b924219bef89a864ce62c", "title": "Recognizing Material Properties from Images.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1801.03127.pdf"]}, {"id": "41a96329d93e7b06ebd6b3a761cc0d50375c5f15", "title": "Visual Learning Beyond Direct Supervision", "year": "2018", "pdf": ["https://www2.eecs.berkeley.edu/Pubs/TechRpts/2018/EECS-2018-128.pdf"]}, {"id": "144ba4e9e64d4f9a5bb436c80c3c02b40e4092e0", "title": "Learning Video Features for Multi-label Classification", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/144b/a4e9e64d4f9a5bb436c80c3c02b40e4092e0.pdf"]}, {"id": "9c0ff1f1e2c83f089cb6b42a91869428fe9a92b2", "title": "Commonsense LocatedNear Relation Extraction", "year": "2017", "pdf": []}, {"id": "e5918229f44f0215d73a0b9d5eb13eb56764a2e4", "title": "Counting Vehicles with Cameras", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/e591/8229f44f0215d73a0b9d5eb13eb56764a2e4.pdf"]}, {"id": "1b793cc5dceb98c95e816aebc2252205bfd71569", "title": "ADNet: A Deep Network for Detecting Adverts", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.04115.pdf"]}, {"id": "f7cf4e0b1e371efca045e71996956be19aef0577", "title": "C L ] 3 D ec 2 01 7 Adversarial Ranking for Language Generation", "year": "2017", "pdf": []}, {"id": "0199150ccad6479eac9d693a7cc0406935d877a8", "title": "Towards Real-Time Accurate Object Detection in Both Images and Videos Based on Dual Refinement.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.08638.pdf"]}, {"id": "4c8dfc4720186799b2baf53c282a32ba3f312bcc", "title": "A Mask Regional Convolutional Neural Network Model for Segmenting Real Time Traffic Images", "year": "2018", "pdf": []}, {"id": "3921afded8bc8471d784df86f64432fb14b8ef58", "title": "Egocentric Gesture Recognition for Head-Mounted AR devices", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.05380.pdf"]}, {"id": "b29be8ae91eff7ccf001d48adf518bae86129be3", "title": "OmniDetector: With Neural Networks to Bounding Boxes", "year": "2018", "pdf": []}, {"id": "2fcd5cff2b4743ea640c4af68bf4143f4a2cccb1", "title": "Are You Talking to a Machine? Dataset and Methods for Multilingual Image Question Answering", "year": "2015", "pdf": ["https://arxiv.org/pdf/1505.05612.pdf"]}, {"id": "6a1b76f1ef876061ec479ab9bc13fcd517eb4188", "title": "Large Kernel Matters \u2014 Improve Semantic Segmentation by Global Convolutional Network", "year": "2017", "pdf": ["https://arxiv.org/pdf/1703.02719.pdf"]}, {"id": "3bcca85ad84806be6d38d3882f7a6aac0ad90253", "title": "Video captioning with recurrent networks based on frame- and video-level features and visual content classification", "year": "2015", "pdf": ["https://arxiv.org/pdf/1512.02949.pdf"]}, {"id": "39978ba7c83333475d6825d0ff897692933895fc", "title": "Conditional Random Fields as Recurrent Neural Networks", "year": "2015", "pdf": []}, {"id": "6ee2173c06c84cae6aae2912a4439ec956ecc3bb", "title": "Zero-shot Sim-to-Real Transfer with Modular Priors", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.07480.pdf"]}, {"id": "fd0769033b18038b9baf9e762f16973ee27be626", "title": "MobileNetV2: Inverted Residuals and Linear Bottlenecks", "year": "2018", "pdf": []}, {"id": "b408b939c0f3be9cce0f84871a78a71d1684cd77", "title": "Identifying spatial relations in images using convolutional neural networks", "year": "2017", "pdf": ["https://arxiv.org/pdf/1706.04215.pdf"]}, {"id": "8aa9d9ccbdf37fd1d9fb4f3adb778b3c2c9baa45", "title": "POL-LWIR Vehicle Detection: Convolutional Neural Networks Meet Polarised Infrared Sensors", "year": "2018", "pdf": []}, {"id": "ed7de328e2191463d42d745d8ac8f16024f95f25", "title": "Beyond Planar Symmetry: Modeling Human Perception of Reflection and Rotation Symmetries in the Wild", "year": "2017", "pdf": []}, {"id": "37b207d2c4a82a57f80e96353f79ecd71320a854", "title": "Person Search with Natural Language Description", "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.05729.pdf"]}, {"id": "11da2d589485685f792a8ac79d4c2e589e5f77bd", "title": "Show and tell: A neural image caption generator", "year": "2015", "pdf": ["https://arxiv.org/pdf/1411.4555.pdf"]}, {"id": "54f688ea5de7b9156db28b2821e5f5ed0148605a", "title": "Contextual Memory Trees", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.06473.pdf"]}, {"id": "19aa506d04d3f7241fc71b595d28b5f1bb99edad", "title": "Compact Generalized Non-local Network", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.13125.pdf"]}, {"id": "4a88237199595feaa3f0e3289cbdd201a3ce28ff", "title": "Multi-Domain Pose Network for Multi-Person Pose Estimation and Tracking", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.08338.pdf"]}, {"id": "3979e8ddcf95fedf7a220b7d39a72fa120d436f8", "title": "Deep Learning Applied to Image and Text Matching", "year": "2015", "pdf": ["https://arxiv.org/pdf/1601.03478.pdf"]}, {"id": "df9a102288582d8edadeddcb8d55068a06cc471c", "title": "Are You Smarter Than a Sixth Grader? Textbook Question Answering for Multimodal Machine Comprehension", "year": "2017", "pdf": ["http://ai2-website.s3.amazonaws.com/publications/CVPR17_TQA.pdf", "http://openaccess.thecvf.com/content_cvpr_2017/papers/Kembhavi_Are_You_Smarter_CVPR_2017_paper.pdf"]}, {"id": "9bddd98289ecc7a8dc5517122d21d5c6f5a9a01a", "title": "DS*: Tighter Lifting-Free Convex Relaxations for Quadratic Matching Problems", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.10733.pdf"]}, {"id": "b4c48aa7a93f38d2eb60209120a1a8daa61c4545", "title": "Diversity in Object Proposals", "year": "2016", "pdf": ["https://arxiv.org/pdf/1603.04308.pdf"]}, {"id": "26e425781e4090abfae65b5d68eac72282dd2e31", "title": "Image Captioning with Deep Bidirectional LSTMs", "year": "2016", "pdf": ["https://arxiv.org/pdf/1604.00790.pdf"]}, {"id": "d951ff5f378b2a5f878423029123ad6b3491b444", "title": "Foveal Vision for Instance Segmentation of Road Images", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/c33e/a181e9cd13ad3f11f459d5d1f7b7bf114033.pdf"]}, {"id": "9645e8b4829c04879a642d8dd6b3cdf5cf264afb", "title": "Finding Beans in Burgers: Deep Semantic-Visual Embedding with Localization", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.01720.pdf"]}, {"id": "1e2d9ea6fe9c50a5c26a629b94446250e1be4e7d", "title": "The Freiburg Groceries Dataset", "year": "2016", "pdf": ["https://arxiv.org/pdf/1611.05799.pdf"]}, {"id": "40c6a2b1cb312f11f8225a733545fdabd436e347", "title": "Deep Co-Training for Semi-Supervised Image Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.05984.pdf"]}, {"id": "1f0c7b93636f879bd5ef3dd915a02dcd813a053d", "title": "Interpreting Deep Visual Representations via Network Dissection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.05611.pdf"]}, {"id": "402056b2d10b27c1b17dbb5eb82b95fd1cfd8fa5", "title": "Deriving Privacy and Security Considerations for CORE: An Indoor IoT Adaptive Context Environment", "year": "2018", "pdf": []}, {"id": "5b3dc81a490b1d9e69d7be20c4e8e1de886b5ca3", "title": "Improving Object Localization with Fitness NMS and Bounded IoU Loss", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.00164.pdf"]}, {"id": "b7c2798e136feb85847c8a9aa693d75bc3f9b08c", "title": "Classifying a specific image region using convolutional nets with an ROI mask as input", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.00291.pdf"]}, {"id": "372026896229c870f11bcecc1c81c80dad9c6761", "title": "Draw and Tell: Multimodal Descriptions Outperform Verbal- or Sketch-Only Descriptions in an Image Retrieval Task", "year": "2017", "pdf": []}, {"id": "8d384e8c45a429f5c5f6628e8ba0d73c60a51a89", "title": "Temporal Dynamic Graph LSTM for Action-Driven Video Object Detection", "year": "2017", "pdf": ["https://arxiv.org/pdf/1708.00666.pdf"]}, {"id": "6f77ff9990973a6cdad6b5b6022323bff9d03965", "title": "Action Recognition in Still Images Using Word Embeddings from Natural Language Descriptions", "year": "2017", "pdf": ["http://cobweb.cs.uga.edu/~csc/papers/wacv17_1.pdf"]}, {"id": "146f6f6ed688c905fb6e346ad02332efd5464616", "title": "Show, Attend and Tell: Neural Image Caption Generation with Visual Attention", "year": "2015", "pdf": ["https://arxiv.org/pdf/1502.03044.pdf"]}, {"id": "e20daf69526c5da9cffb252d043fdc765f37a89e", "title": "Relating images and 3D models with convolutional neural networks. (Mise en relation d'images et de mod\u00e8les 3D avec des r\u00e9seaux de neurones convolutifs)", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/e20d/af69526c5da9cffb252d043fdc765f37a89e.pdf"]}, {"id": "fb732b15fd5235893228ad3249cf04e1809034c9", "title": "CBVMR: Content-Based Video-Music Retrieval Using Soft Intra-Modal Structure Constraint", "year": "2018", "pdf": ["https://arxiv.org/pdf/1704.06761.pdf"]}, {"id": "45e7ddd5248977ba8ec61be111db912a4387d62f", "title": "Adversarial Learning of Structure-Aware Fully Convolutional Networks for Landmark Localization", "year": "2017", "pdf": ["https://arxiv.org/pdf/1711.00253.pdf"]}, {"id": "eaf8c104ab14600ecc5e9cce739b55280eef7ad4", "title": "Abstractive Compression of Captions with Attentive Recurrent Neural Networks", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/eaf8/c104ab14600ecc5e9cce739b55280eef7ad4.pdf"]}, {"id": "dc4682cd15ccb7ebcbc8d2ba21b613e34a9af8ca", "title": "Bonnet: An Open-Source Training and Deployment Framework for Semantic Segmentation in Robotics using CNNs", "year": "2018", "pdf": ["https://arxiv.org/pdf/1802.08960.pdf"]}, {"id": "478a1ed7dc1890ca9476dcc1befe7f21c9bf9149", "title": "Learning to Learn from Noisy Labeled Data", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.05214.pdf"]}, {"id": "e4b9c14951cea6259dd9d522586ba2c5bb1fbcce", "title": "Social Image Captioning: Exploring Visual Attention and User Attention", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/e4b9/c14951cea6259dd9d522586ba2c5bb1fbcce.pdf"]}, {"id": "1e17202d6de18d5e1965edce5fee79744b717d0b", "title": "MIML-FCN+: Multi-Instance Multi-Label Learning via Fully Convolutional Networks with Privileged Information", "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.08681.pdf"]}, {"id": "542289d1acfebb9d79ea7a10c8e1516924e09973", "title": "Video Highlight Prediction Using Audience Chat Reactions", "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.08559.pdf"]}, {"id": "cf98c333c8d7d5870c1ce5538bb0c3de3de16657", "title": "Panoptic Segmentation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1801.00868.pdf"]}, {"id": "2badc4c87a7751dd5ae1797bc4091d10d1acf442", "title": "Multimodal Retrieval with Asymmetrically Weighted Regularized Canonical Correla- Tion Analysis", "year": "", "pdf": ["https://pdfs.semanticscholar.org/2bad/c4c87a7751dd5ae1797bc4091d10d1acf442.pdf"]}, {"id": "9b45e9a40313096abf530df3b98a1dfa1553f17b", "title": "Comprehension-Guided Referring Expressions", "year": "2017", "pdf": []}, {"id": "66837b29270f3e03df64941a081d70c687c7955c", "title": "ActionXPose: A Novel 2D Multi-view Pose-based Algorithm for Real-time Human Action Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.12126.pdf"]}, {"id": "7d035e6c6b5ef1267dff23845009284677c16a4d", "title": "AlphaGAN: Generative adversarial networks for natural image matting", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.10088.pdf"]}, {"id": "655f587a59c835a7b6b5017016ea1c2123f266e6", "title": "Separating Self-Expression and Visual Content in Hashtag Supervision", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.09825.pdf"]}, {"id": "3ca194773fe583661b988fbdf33f7680764438b3", "title": "Exploring Nearest Neighbor Approaches for Image Captioning", "year": "2015", "pdf": ["https://arxiv.org/pdf/1505.04467.pdf"]}, {"id": "0013fae7390cbd34aade7959b4476512d8ab9aa3", "title": "Show, Control and Tell: A Framework for Generating Controllable and Grounded Captions", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.10652.pdf"]}, {"id": "f43463770d361e55fb6f6eb801a6e8530ab668cd", "title": "Corpus Construction and Semantic Analysis of Indonesian Image Description", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/f434/63770d361e55fb6f6eb801a6e8530ab668cd.pdf"]}, {"id": "30f33cffe390dfcc03d8df28593aebba6afafd59", "title": "Video Predictive Object Detector", "year": "2018", "pdf": []}, {"id": "46d8531797d8913b05f9124def1ef81ac30423c1", "title": "MetaStyle: Three-Way Trade-Off Among Speed, Flexibility, and Quality in Neural Style Transfer", "year": "2019", "pdf": ["https://arxiv.org/pdf/1812.05233.pdf"]}, {"id": "f5748711db00d82469ff60e05f62319f1eac90c5", "title": "Comparing Apples and Oranges: Off-Road Pedestrian Detection on the NREC Agricultural Person-Detection Dataset", "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.07169.pdf"]}, {"id": "cfc22c35ad191cf9d70f4a3655840748b0e1322c", "title": "Real-Time Dense Mapping for Self-driving Vehicles using Fisheye Cameras", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.06132.pdf"]}, {"id": "6bfae88bea2301f2abeb6d1ed62c8b9a99b251c0", "title": "CNRS TELECOM ParisTech at ImageCLEF 2015 Scalable Concept Image Annotation Task: Concept Detection with Blind Localization Proposals", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/6bfa/e88bea2301f2abeb6d1ed62c8b9a99b251c0.pdf"]}, {"id": "478261574ddc6cf297611000735aa9808f8f0030", "title": "ScanNet: Richly-Annotated 3D Reconstructions of Indoor Scenes", "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.04405.pdf"]}, {"id": "12d9a4233a16ea08fbb1de853b6a97b4f4952c23", "title": "Tractable Structured Prediction using the Permutohedral Lattice", "year": "2017", "pdf": ["https://www.research-collection.ethz.ch/bitstream/handle/20.500.11850/130429/eth-50807-01.pdf?isAllowed=y&sequence=1", "https://www.research-collection.ethz.ch/bitstream/handle/20.500.11850/130429/eth-50807-02.pdf?isAllowed=y&sequence=2"]}, {"id": "e654320739770029ec5cb22174772c935478b237", "title": "Paraphrase Thought: Sentence Embedding Module Imitating Human Language Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.05505.pdf"]}, {"id": "b80f43b42b5320578d4c1e214fe1a8b6b45352ae", "title": "MSR-VTT: A Large Video Description Dataset for Bridging Video and Language", "year": "2016", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2016/papers/Xu_MSR-VTT_A_Large_CVPR_2016_paper.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Xu_MSR-VTT_A_Large_CVPR_2016_paper.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/06/cvpr16.msr-vtt.tmei_-1.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/10/cvpr16.supplementary.pdf"]}, {"id": "81cb69e401b3b51e49ec378dba4bc0c8e33448e1", "title": "Applying Domain Randomization to Synthetic Data for Object Category Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.09834.pdf"]}, {"id": "030646f4fc694ffea5d4f77203cbbc5d02aae797", "title": "Cognitive Deep Machine Can Train Itself", "year": "2016", "pdf": ["https://arxiv.org/pdf/1612.00745.pdf"]}, {"id": "7b1dd2708e1d7bf0fdcda437de1970a9a6facc0d", "title": "Deep Recurrent Convolutional Neural Network: Improving Performance For Speech Recognition", "year": "2016", "pdf": ["https://arxiv.org/pdf/1611.07174.pdf"]}, {"id": "18087bfafbe471f346c8b739522e64fa11e48b13", "title": "Visual Madlibs: Fill in the Blank Description Generation and Question Answering", "year": "2015", "pdf": ["http://acberg.com/papers/madlibs.pdf", "http://openaccess.thecvf.com/content_iccv_2015/papers/Yu_Visual_Madlibs_Fill_ICCV_2015_paper.pdf", "http://tamaraberg.com/papers/madlibs.pdf", "http://www.cs.unc.edu/~licheng/papers/iccv15_madlibs.pdf", "http://www.cs.unc.edu/~licheng/papers/iccv15_madlibs_supp.pdf", "http://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Yu_Visual_Madlibs_Fill_ICCV_2015_paper.pdf"]}, {"id": "ad8642e186c5c81d06934d4e6fc249b7cbca40e8", "title": "Learning Transferable Architectures for Scalable Image Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1707.07012.pdf"]}, {"id": "04427d8371cb9e66e2cdcd2035756203398a8bf1", "title": "Learning Answer Embeddings for Visual Question Answering", "year": "2018", "pdf": []}, {"id": "189b02caa2bb9ffb303154e35f60797ec8576f84", "title": "CRRN: Multi-scale Guided Concurrent Reflection Removal Network", "year": "2018", "pdf": []}, {"id": "3b8ad1f2335fc755e5cd75ee5922b8a0d432018a", "title": "A Fast and Compact Saliency Score Regression Network Based on Fully Convolutional Network", "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.00615.pdf"]}, {"id": "357df3ee0f0c30d5c8abc5a1bdf70122322d6fbd", "title": "O BJECT DETECTORS EMERGE IN D EEP S CENE CNN S", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/08c7/a85c443935468450a162ddeccd2b945660e3.pdf"]}, {"id": "2a5667702b0f1ff77dde8fb3e2e10d4e05e8de9d", "title": "Scene Parsing through ADE20K Dataset", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2017/papers/Zhou_Scene_Parsing_Through_CVPR_2017_paper.pdf", "http://people.csail.mit.edu/bzhou/publication/scene-parse-camera-ready.pdf"]}, {"id": "10bb4ef7a6719ea132e00f0ab5680919a4131d99", "title": "BAM: Bottleneck Attention Module", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.06514.pdf"]}, {"id": "a6e695ddd07aad719001c0fc1129328452385949", "title": "The New Data and New Challenges in Multimedia Research", "year": "2015", "pdf": []}, {"id": "f795b4ff67d3ecce0b988ecfc0589cc7e54f5dfa", "title": "Grounded Human-Object Interaction Hotspots from Video", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.04558.pdf"]}, {"id": "b9146b314812231d09587e3a9f622dda65d3cc40", "title": "A survey on social image understanding", "year": "2017", "pdf": []}, {"id": "30f78071ac2bc965ffbf452a7b315d6dfddae30e", "title": "Lingusitic Analysis of Multi-Modal Recurrent Neural Networks", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/30f7/8071ac2bc965ffbf452a7b315d6dfddae30e.pdf"]}, {"id": "819d1dcea397e6e671acf74adccdef5750550873", "title": "Representations for Visually Guided Actions", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/819d/1dcea397e6e671acf74adccdef5750550873.pdf"]}, {"id": "10554295addeae86571a26de6c2ad7e274963953", "title": "Re-ranking Object Proposals for Object Detection in Automatic Driving", "year": "2016", "pdf": ["https://arxiv.org/pdf/1605.05904.pdf"]}, {"id": "af95ba2c08cf3317291dad554488dd951cd6ff80", "title": "Decoupled Classification Refinement: Hard False Positive Suppression for Object Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.04002.pdf"]}, {"id": "c4cca330ec7289fac16d0793da98f705f6513219", "title": "Knowledge Projection for Effective Design of Thinner and Faster Deep Neural Networks", "year": "2017", "pdf": []}, {"id": "6a75ef6b36489cb59c61f21f3cd09c50ad5b2995", "title": "MVTec D2S: Densely Segmented Supermarket Dataset", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.08292.pdf"]}, {"id": "22a8b7ca2e6449cad31ae4f71bb3587751087e77", "title": "DeepBox: Learning Objectness with Convolutional Networks", "year": "2015", "pdf": []}, {"id": "0dcc768631d9ede8a3679e980b37204b782781b2", "title": "Stating the Obvious: Extracting Visual Common Sense Knowledge", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/55b9/251541eb09a6be4a317d4db1bc8d5dba07f7.pdf"]}, {"id": "3f5b20c35f55417823f0201862d85af1f31e9348", "title": "Salience Biased Loss for Object Detection in Aerial Images", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.08103.pdf"]}, {"id": "c1e714a9ec329629798a88ebff8657c349fec739", "title": "WILDCAT: Weakly Supervised Learning of Deep ConvNets for Image Classification, Pointwise Localization and Segmentation", "year": "2017", "pdf": ["http://cedric.cnam.fr/~thomen/papers/Durand_CVPR_2017.pdf", "http://openaccess.thecvf.com/content_cvpr_2017/papers/Durand_WILDCAT_Weakly_Supervised_CVPR_2017_paper.pdf", "http://openaccess.thecvf.com/content_cvpr_2017/supplemental/Durand_WILDCAT_Weakly_Supervised_2017_CVPR_supplemental.pdf", "http://webia.lip6.fr/~durandt/pdfs/2017_CVPR/Durand_WILDCAT_CVPR_2017.pdf"]}, {"id": "a4e1a95c32e9477aa1b09073a1fd3ca129aacadc", "title": "Driver Identification System Using Convolutional Neural Network with Background Removal-based Infrared Data Augmentation", "year": "2018", "pdf": []}, {"id": "01959ef569f74c286956024866c1d107099199f7", "title": "VQA: Visual Question Answering", "year": "2015", "pdf": []}, {"id": "87e1ae939f3632f26a9878fe32674e338cb18094", "title": "C4Synth: Cross-Caption Cycle-Consistent Text-to-Image Synthesis", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.10238.pdf"]}, {"id": "5c8ad080ccb3f5e3c999c2948029f0bd005d5635", "title": "Engaging Image Captioning", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/5c8a/d080ccb3f5e3c999c2948029f0bd005d5635.pdf"]}, {"id": "284be8be0c6bedc36dfe43229bc84345ab0aedc2", "title": "Faster Training of Mask R-CNN by Focusing on Instance Boundaries", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.07069.pdf"]}, {"id": "b6b9d29d25de42d78f09217c9cc457247d90fc70", "title": "Semantic Part Detection via Matching: Learning to Generalize to Novel Viewpoints from Limited Training Data", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.11823.pdf"]}, {"id": "2198a3d3342442d6ed6608f2e2b0687f644b67d6", "title": "Dynamic High Resolution Deformable Articulated Tracking", "year": "2017", "pdf": ["https://arxiv.org/pdf/1711.07999.pdf"]}, {"id": "16beefb2f8d0d61e536bf20766e485499906c2e0", "title": "Image Surveillance Assistant", "year": "2016", "pdf": ["http://cs.umd.edu/~maynord/Image_Surveillance_Assistant.pdf", "https://www.nrl.navy.mil/itd/aic/sites/www.nrl.navy.mil.itd.aic/files/pdfs/(Maynord+%20WACV-16%20WS)%20Intelligent%20Surveillance%20Assistant.pdf"]}, {"id": "340e55a44793226a51ad06612f340f2c520e3575", "title": "G2DeNet: Global Gaussian Distribution Embedding Network and Its Application to Visual Recognition", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2017/papers/Wang_G2DeNet_Global_Gaussian_CVPR_2017_paper.pdf"]}, {"id": "833ada09759039b7c620b8930a50a0521d70b2c7", "title": "Attend in Groups: A Weakly-Supervised Deep Learning Framework for Learning from Web Data", "year": "2017", "pdf": ["https://arxiv.org/pdf/1611.09960.pdf"]}, {"id": "fbb9cdd699baf86e9d616b259ada02449c2322ca", "title": "Active Testing: An Efficient and Robust Framework for Estimating Accuracy", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.00493.pdf"]}, {"id": "f1d8c377093ecf64afd7f17383738e81666fe5ae", "title": "Remote Detection of Idling Cars Using Infrared Imaging and Deep Networks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.10805.pdf"]}, {"id": "858a2e0ca7ba129fda1a92959abe8770eb303bbe", "title": "Input Features Output Features Input Features Output Features Input Features Output Features Selected and", "year": "2017", "pdf": []}, {"id": "f5ce3c9eb539d2f4a85880db65ba3890a0fd8c6c", "title": "Zero-shot object prediction and context modeling using semantic scene knowledge", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/f5ce/3c9eb539d2f4a85880db65ba3890a0fd8c6c.pdf"]}, {"id": "9a7784eea6bfa62bf2834ee0b87a3cdda46006f2", "title": "Digital Comics Image Indexing Based on Deep Learning", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/9a77/84eea6bfa62bf2834ee0b87a3cdda46006f2.pdf"]}, {"id": "9634348d3bc7b86d0b644f6c14ab0c4294341905", "title": "Investigating Redundancy in Emoji Use: Study on a Twitter Based Corpus", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/9634/348d3bc7b86d0b644f6c14ab0c4294341905.pdf"]}, {"id": "068171535ac18a4b7b65be0748d483ce4c71a9a4", "title": "Event Specific Multimodal Pattern Mining with Image-Caption Pairs", "year": "2015", "pdf": ["https://arxiv.org/pdf/1601.00022.pdf"]}, {"id": "346a877564351e4014441a1dc174b0369a759ba5", "title": "CLEAR: Cumulative LEARning for One-Shot One-Class Image Recognition", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/papers/Kozerawski_CLEAR_Cumulative_LEARning_CVPR_2018_paper.pdf", "http://www-inst.cs.berkeley.edu/~ee290t/fa18/readings/Kozerawski_CLEAR_Cumulative_LEARning_CVPR_2018_paper.pdf"]}, {"id": "2b45c35aa63180ef83e7f8e6f55405db9c688b0e", "title": "Unsupervised Domain Adaptation with Similarity Learning", "year": "2018", "pdf": []}, {"id": "a8420e7fa53b81b8069ced8d9c743c141e2fc432", "title": "Real-Time Multiple Object Tracking - A Study on the Importance of Speed", "year": "2017", "pdf": ["https://arxiv.org/pdf/1709.03572.pdf"]}, {"id": "ea743597a5f48babef1982259566d76a9bf66bf2", "title": "Context Contrasted Feature and Gated Multi-scale Aggregation for Scene Segmentation", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/CameraReady/1276.pdf", "http://openaccess.thecvf.com/content_cvpr_2018/papers/Ding_Context_Contrasted_Feature_CVPR_2018_paper.pdf"]}, {"id": "2770af2c74f9b82f2167cf5773ccc94d482ac8d4", "title": "Gossip training for deep learning", "year": "2016", "pdf": ["https://arxiv.org/pdf/1611.09726.pdf"]}, {"id": "31b05f65405534a696a847dd19c621b7b8588263", "title": "UMDFaces: An annotated face dataset for training deep networks", "year": "2017", "pdf": []}, {"id": "ff5869f420d33511c22e60bbcff072c3cde3ebc4", "title": "Regional Interactive Image Segmentation Networks", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_ICCV_2017/papers/Liew_Regional_Interactive_Image_ICCV_2017_paper.pdf"]}, {"id": "3aa66f2829ef440842c71a52cdaff30398a90ccb", "title": "Pointly-Supervised Action Localization", "year": "2018", "pdf": ["https://arxiv.org/pdf/1805.11333.pdf"]}, {"id": "15a148957469bc8b91bd7cc31aa1f0c6584a1571", "title": "DeepGlobe 2018: A Challenge to Parse the Earth through Satellite Images", "year": "2018", "pdf": []}, {"id": "ccba451687f83c463deeefb69c1f4a781de2d638", "title": "Full-Network Embedding in a Multimodal Embedding Pipeline", "year": "2017", "pdf": []}, {"id": "0219ced7a3afcd8e7584b24809bac30304f9288e", "title": "L ] 3 1 M ay 2 01 7 Adversarial Ranking for Language Generation", "year": "2017", "pdf": []}, {"id": "59f4df3087c22e0e13574bb7d20e9d41e74cb9a5", "title": "Visual Re-ranking with Natural Language Understanding for Text Spotting", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.12738.pdf"]}, {"id": "3fb61e0d133d0971c853b8435fea99150e15cdbb", "title": "Predicting Foreground Object Ambiguity and Efficiently Crowdsourcing the Segmentation(s)", "year": "2018", "pdf": ["https://arxiv.org/pdf/1705.00366.pdf"]}, {"id": "cb8f1f77a8b19d99dfe0c7b50dae3978cf646aa9", "title": "Adaptive attention fusion network for visual question answering", "year": "2017", "pdf": []}, {"id": "4f6d8d4d460517297b419242a7e9ab3f9a307a72", "title": "Feature Extraction for Image Selection Using Machine Learning", "year": "2017", "pdf": []}, {"id": "0c36c988acc9ec239953ff1b3931799af388ef70", "title": "Face Detection Using Improved Faster RCNN", "year": "2018", "pdf": ["https://arxiv.org/pdf/1802.02142.pdf"]}, {"id": "1baa9fdfb27f561be67c52ef55b0e3587f306cff", "title": "Image Captioning with Attention", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/1baa/9fdfb27f561be67c52ef55b0e3587f306cff.pdf"]}, {"id": "8b35c00edfa4edfd7a99d816e671023d2c000d55", "title": "AttnGAN: Fine-Grained Text to Image Generation with Attentional Generative Adversarial Networks", "year": "2018", "pdf": []}, {"id": "a0e03c5b647438299c79c71458e6b1776082a37b", "title": "Areas of Attention for Image Captioning", "year": "2017", "pdf": ["https://arxiv.org/pdf/1612.01033.pdf"]}, {"id": "9976b88d15f89b6c82b16564735d489a7524821d", "title": "Learning Visual N-Grams from Web Data", "year": "2017", "pdf": ["https://arxiv.org/pdf/1612.09161.pdf"]}, {"id": "0b938c4a993e7e2fda81426924cb91c108a7830c", "title": "Which Emoji Talks Best for My Picture?", "year": "2018", "pdf": []}, {"id": "4a9831e5fec549edee454709048a51997ef60fb7", "title": "Did the Model Understand the Question?", "year": "2018", "pdf": ["https://arxiv.org/pdf/1805.05492.pdf"]}, {"id": "4041cf003f797007fe53ac89fd57072f97257abd", "title": "End-to-end policy learning for active visual categorization.", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/4041/cf003f797007fe53ac89fd57072f97257abd.pdf"]}, {"id": "f741cba061655581f6fbb628613d0669c4bdecd5", "title": "Deep Cosine Metric Learning for Person Re-identification", "year": "2018", "pdf": []}, {"id": "c91e94e981084f5d9c3c1479fa90b8b091826d1d", "title": "Variational Bayesian Multiple Instance Learning with Gaussian Processes", "year": "2017", "pdf": []}, {"id": "03c820f35afdc38dd05e4c663d2877e2602bcde0", "title": "Semantically Guided Visual Question Answering", "year": "2018", "pdf": []}, {"id": "23a0cfaceb35413bd6caad437e37d566fcfdcc64", "title": "MindCamera: Interactive Sketch-Based Image Retrieval and Synthesis", "year": "2018", "pdf": []}, {"id": "9d5a5517650d5f9a7d9818bcc1eb59ba65d316e1", "title": "Conditional Generative Adversarial Network for Structured Domain Adaptation", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/papers/Hong_Conditional_Generative_Adversarial_CVPR_2018_paper.pdf", "https://weixianghong.github.io/publications/papers/CVPR_18.pdf"]}, {"id": "f87aa7d7b746e24e3b08fd3375331068ecd13311", "title": "Mask-SLAM: Robust Feature-Based Monocular SLAM by Masking Using Semantic Segmentation", "year": "2018", "pdf": []}, {"id": "bce963c494ccce1c66757e69993f578c37564f6d", "title": "Can Adversarial Networks Hallucinate Occluded People With a Plausible Aspect?", "year": "2019", "pdf": ["https://arxiv.org/pdf/1901.08097.pdf"]}, {"id": "3bb0ab7ebe1ffb83aa9775ffe2602407c9d6329d", "title": "Active Decision Boundary Annotation with Deep Generative Models", "year": "2017", "pdf": []}, {"id": "93adca9ce6f4a0fab9ea027c90b4df828cfa10d7", "title": "Learning Actionable Representations from Visual Observations", "year": "2018", "pdf": []}, {"id": "54b5aab87dbe38803935789c4d730bd203d198a1", "title": "3D Human Pose Estimation in RGBD Images for Robotic Task Learning", "year": "2018", "pdf": []}, {"id": "6962505b78d0cec04b992a059cc58870c361c971", "title": "Tattoo Image Search at Scale: Joint Detection and Compact Representation Learning", "year": "2019", "pdf": ["https://arxiv.org/pdf/1811.00218.pdf"]}, {"id": "747e9b36c5a1b0b8a9572da0ab416ddd1e1d2d33", "title": "Augmentation for Visual Question Answering", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/747e/9b36c5a1b0b8a9572da0ab416ddd1e1d2d33.pdf"]}, {"id": "09a4f1b7afd8d5f6854b23f0aa9e80b9a0fa6d20", "title": "Seeing Beyond Appearance - Mapping Real Images into Geometrical Domains for Unsupervised CAD-based Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.04158.pdf"]}, {"id": "85473d7bd0b73488f22d8443583165fdbd3d221d", "title": "Product Quantized Translation for Fast Nearest Neighbor Search", "year": "2018", "pdf": []}, {"id": "86d9f836c5fc542e987601a102c2b6cdbca1d64f", "title": "Large Scale Semi-Supervised Object Detection Using Visual and Semantic Knowledge Transfer", "year": "2016", "pdf": []}, {"id": "c719a941d3ab00e45d1d74e7533ed59cc0740ae2", "title": "Does Optical Character Recognition and Caption Generation Improve Emotion Detection in Microblog Posts?", "year": "2017", "pdf": ["http://www.romanklinger.de/publications/klinger2017-nldb.pdf", "http://www.romanklinger.de/talks/nldb.pdf"]}, {"id": "4f618cbf19917ce5b8703adbc14e15b0bf0d35cc", "title": "Multi-View Dynamic Facial Action Unit Detection", "year": "2017", "pdf": ["https://arxiv.org/pdf/1704.07863.pdf"]}, {"id": "1f2b28dc48c8f2c0349dce728d7b6a0681f58aea", "title": "A Dataset for Lane Instance Segmentation in Urban Environments", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.01347.pdf"]}, {"id": "7d6132a884d2b154059c461e107c7a8c41603ef7", "title": "Exploring Multi-Branch and High-Level Semantic Networks for Improving Pedestrian Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.00872.pdf"]}, {"id": "8e06f628d0566e31366cf046b4a83ac4724ea7c3", "title": "Categorizing Concepts with Basic Level for Vision-to-Language", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/papers/Wang_Categorizing_Concepts_With_CVPR_2018_paper.pdf"]}, {"id": "72048081cfe678f700c52e1c7a56713a8e0865b6", "title": "Unsupervised Adversarial Visual Level Domain Adaptation for Learning Video Object Detectors from Images", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.02074.pdf"]}, {"id": "974aa04b051bd2493bf4aaa5cd410dcb8f3b264d", "title": "Red Hen Lab: Dataset and Tools for Multimodal Human Communication Research", "year": "2017", "pdf": []}, {"id": "13ad6164dba75845f0f397e9314ad596e74eb946", "title": "Learning to Estimate 3D Human Pose and Shape from a Single Color Image", "year": "2018", "pdf": []}, {"id": "91e89cedd4093bfe176532530ddb960f2767aca5", "title": "Cross-Domain Self-Supervised Multi-task Feature Learning Using Synthetic Imagery", "year": "2018", "pdf": []}, {"id": "f28e2bb46e49799589787e466c3ca966a0897bf7", "title": "Textually Customized Video Summaries", "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.01528.pdf"]}, {"id": "bcb35a8eb4cea46755c7430618e4ecfa4647b360", "title": "Deep Joint Semantic-Embedding Hashing", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/bcb3/5a8eb4cea46755c7430618e4ecfa4647b360.pdf"]}, {"id": "4b59e1fbcd68d616c9e5d02a62d332b0b2c268c5", "title": "Training Recurrent Neural Network through Moment Matching for NLP Applications", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/4b59/e1fbcd68d616c9e5d02a62d332b0b2c268c5.pdf"]}, {"id": "b8ccc5341a1b0214e9d155b019962023f344c2ee", "title": "Incremental Learning of Object Detectors without Catastrophic Forgetting", "year": "2017", "pdf": ["https://arxiv.org/pdf/1708.06977.pdf"]}, {"id": "45dd2a3cd7c27f2e9509b023d702408f5ac11c9d", "title": "Stacked Cross Attention for Image-Text Matching", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.08024.pdf"]}, {"id": "02a4e2569b8033eff87099ad402f251d02213cfe", "title": "Learning-Based Cloth Material Recovery from Video", "year": "2017", "pdf": []}, {"id": "ff23167f18723faf41265cd36ee2cf1de45a3048", "title": "Straight to Shapes: Real-Time Detection of Encoded Shapes", "year": "2017", "pdf": []}, {"id": "47b4c33621bc0e636d6c268ad6acb319e12abbee", "title": "StackGAN++: Realistic Image Synthesis with Stacked Generative Adversarial Networks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1710.10916.pdf"]}, {"id": "26d27a6de0360ed690248a9dd08e4ec0818472b0", "title": "DehazeGAN: When Image Dehazing Meets Differential Programming", "year": "2018", "pdf": ["http://www.pengxi.me/wp-content/uploads/Papers/2018-IJCAI-DehazeGAN.pdf"]}, {"id": "2284ba28bd3b1afaf06afb8c2a94638e350b3ecb", "title": "Boosting Object Proposals: From Pascal to COCO", "year": "2015", "pdf": ["http://openaccess.thecvf.com/content_iccv_2015/papers/Pont-Tuset_Boosting_Object_Proposals_ICCV_2015_paper.pdf", "http://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Pont-Tuset_Boosting_Object_Proposals_ICCV_2015_paper.pdf", "http://www.vision.ee.ethz.ch/en/publications/papers/proceedings/eth_biwi_01226.pdf"]}, {"id": "21d7130230162af2a4cc1b9375bfe9b37dbbd499", "title": "Origami: A 803-GOp/s/W Convolutional Network Accelerator", "year": "2017", "pdf": ["https://arxiv.org/pdf/1512.04295.pdf"]}, {"id": "efbac99adf8628aae7f070e5b4388a295956f9d2", "title": "CondenseNet: An Efficient DenseNet Using Learned Group Convolutions", "year": "2018", "pdf": []}, {"id": "015d25f66514ce0a966300944201d45968a104ba", "title": "SIMPLE IMAGE DESCRIPTION GENERATOR VIA A LINEAR PHRASE-BASED MODEL R\u00e9mi Lebret", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/015d/25f66514ce0a966300944201d45968a104ba.pdf"]}, {"id": "47f8a217de3df8d70c3ac7fac73696a9104b1294", "title": "Weakly-supervised image captioning based on rich contextual information", "year": "2017", "pdf": []}, {"id": "1fefc1d288a87fe218ba25024c4b2b6ef345738e", "title": "Self-ensembling for domain adaptation", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/1fef/c1d288a87fe218ba25024c4b2b6ef345738e.pdf"]}, {"id": "688cb9fd33769b152806c04ef6fc276629a9f300", "title": "LocNet: Improving Localization Accuracy for Object Detection", "year": "2016", "pdf": ["https://arxiv.org/pdf/1511.07763.pdf"]}, {"id": "c96f012f4915398259e7e223810c57898b5e1a76", "title": "Fast LIDAR-based Road Detection Using Convolutional Neural Networks", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/c96f/012f4915398259e7e223810c57898b5e1a76.pdf"]}, {"id": "d4a7259340ece685b9dacb390eea10c6684a05b3", "title": "Object Detection based on Region Decomposition and Assembly", "year": "2019", "pdf": ["https://arxiv.org/pdf/1901.08225.pdf"]}, {"id": "c562e95b7906066be4210d00c4f6187475e6e13a", "title": "Deep Lesion Graphs in the Wild: Relationship Learning and Organization of Significant Radiology Image Findings in a Diverse Large-Scale Lesion Database", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.10535.pdf"]}, {"id": "a24a0126f76ba1423ac3548ef95aa24ac4e670dd", "title": "How Would You Say It ? Eliciting Lexically Diverse Data for Supervised Semantic Parsing", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/3870/0128bc3ce57a04c46458c3898eaaa1166508.pdf"]}, {"id": "99975f84728140154a35b0763753b35bc44e1d5a", "title": "Video Description: A Survey of Methods, Datasets and Evaluation Metrics", "year": "2018", "pdf": ["https://arxiv.org/pdf/1806.00186.pdf"]}, {"id": "a6a6cfae45e8633c01793debf43592b7d515f65d", "title": "From ImageNet to Mining: Adapting Visual Object Detection with Minimal Supervision", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/a6a6/cfae45e8633c01793debf43592b7d515f65d.pdf"]}, {"id": "29b1a44d1e1ffa05c2bf7f4be931c5045f427718", "title": "ON GENERIC OBJECT RECOGNITION TECHNIQUES : CHALLENGES AND OPPORTUNITIES Prof", "year": "", "pdf": ["https://pdfs.semanticscholar.org/29b1/a44d1e1ffa05c2bf7f4be931c5045f427718.pdf"]}, {"id": "4634bf44a0c994e2bed89686225f8cef601a0224", "title": "NLM at ImageCLEF 2018 Visual Question Answering in the Medical Domain", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/4634/bf44a0c994e2bed89686225f8cef601a0224.pdf"]}, {"id": "acf13c52c86a3b38642ba0c6cbcd1b771778965c", "title": "NAACL HLT 2018 Generalization in the Age of Deep Learning Proceedings of the Workshop", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/e255/eb6d8e46275387f71986a26d81fd746e0147.pdf"]}, {"id": "fd716faea00fb6ec433a62c79aced17d51de8d23", "title": "Cascaded Contextual Region-based Convolutional Neural Network for Event Detection from Time Series Signals: A Seismic Application", "year": "2017", "pdf": []}, {"id": "64bff565c83c6ce3fac783d8d67191af99803701", "title": "Learning to Co-Generate Object Proposals with a Deep Structured Network", "year": "2016", "pdf": ["http://infoscience.epfl.ch/record/217984/files/HayderHeSalzmannCVPR16.pdf", "http://openaccess.thecvf.com/content_cvpr_2016/papers/Hayder_Learning_to_Co-Generate_CVPR_2016_paper.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Hayder_Learning_to_Co-Generate_CVPR_2016_paper.pdf"]}, {"id": "1779b6a17ee68afafb6801477b165f19901689b2", "title": "Object Contour Detection with a Fully Convolutional Encoder-Decoder Network", "year": "2016", "pdf": []}, {"id": "5ac18d505ed6d10e8692cbb7d33f6852e6782692", "title": "The Open Images Dataset V4: Unified image classification, object detection, and visual relationship detection at scale", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.00982.pdf"]}, {"id": "f2b95f135b95c3df4f6ebe6015098a2e1667711d", "title": "Weakly Supervised Object Localization Using Things and Stuff Transfer", "year": "2017", "pdf": ["https://arxiv.org/pdf/1703.08000.pdf"]}, {"id": "e2c122bea06dfa067712cdb58ce474144f93af07", "title": "Phrase-based Image Captioning with Hierarchical LSTM Model", "year": "2017", "pdf": ["https://arxiv.org/pdf/1711.05557.pdf"]}, {"id": "bb1dc1e9e9c20d99b55f37b9e635457af86a065f", "title": "Neural Ctrl-F: Segmentation-Free Query-by-String Word Spotting in Handwritten Manuscript Collections", "year": "2017", "pdf": ["https://arxiv.org/pdf/1703.07645.pdf"]}, {"id": "0e0900b88c33b671be5dd2ded9885b6526d6b429", "title": "From captions to visual concepts and back", "year": "2015", "pdf": ["https://arxiv.org/pdf/1411.4952.pdf"]}, {"id": "3e08a3912ebe494242f6bcd772929cc65307129c", "title": "Few-Shot Image Recognition by Predicting Parameters from Activations", "year": "2018", "pdf": []}, {"id": "47a2b1ba7e4275174a017a290f2e5de7de0acb00", "title": "Semantically Coherent Co-Segmentation and Reconstruction of Dynamic Scenes", "year": "2017", "pdf": ["http://epubs.surrey.ac.uk/813877/1/cvprFinalOpenAccess.pdf", "http://openaccess.thecvf.com/content_cvpr_2017/papers/Mustafa_Semantically_Coherent_Co-Segmentation_CVPR_2017_paper.pdf", "https://arminmustafa.github.io/docs/cvpr17/ArminCVPR2017.pdf", "https://arminmustafa.github.io/docs/cvpr17/ppt.pdf"]}, {"id": "7902309d3c5ab2e1e3a1f08503dc39108e1639dc", "title": "Look into Person: Joint Body Parsing & Pose Estimation Network and A New Benchmark", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.01984.pdf"]}, {"id": "6c0c368fca391b4456e64d2943d0bcbe6d8e1ecc", "title": "A Pipeline for Creative Visual Storytelling", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.08077.pdf"]}, {"id": "dc7a4d5ba20ca07d29c360b26e1e72afae9a77be", "title": "The ApolloScape Open Dataset for Autonomous Driving and its Application", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.06184.pdf"]}, {"id": "9120d59f2ca86954b45d254cae1409cb0806d9c7", "title": "DenseFuse: A Fusion Approach to Infrared and Visible Images", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.08361.pdf"]}, {"id": "428017f7a6df4d667275c7ac9b3feba39b70e4ae", "title": "CNN-RNN: A Unified Framework for Multi-label Image Classification", "year": "2016", "pdf": ["https://arxiv.org/pdf/1604.04573.pdf"]}, {"id": "aa74e63360c341f47a921e0043c5d58d55807fe4", "title": "Multi-Residual Networks: Improving the Speed and Accuracy of Residual Networks", "year": "2016", "pdf": ["https://arxiv.org/pdf/1609.05672.pdf"]}, {"id": "ecbaa92c289f4f5ff9a57b19a2725036a92311f5", "title": "Focused Evaluation for Image Description with Binary Forced-Choice Tasks", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/ecba/a92c289f4f5ff9a57b19a2725036a92311f5.pdf"]}, {"id": "b3eb61c3542e0c6bafb4c1acd05cffc0970faa85", "title": "Region-Based Image Retrieval Revisited", "year": "2017", "pdf": ["https://arxiv.org/pdf/1709.09106.pdf"]}, {"id": "904b322a61d9be9c0b1023946320f9245533085e", "title": "Multi-Residual Networks", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/95cd/6df1b921859103f2eea702e8d3622a862a08.pdf"]}, {"id": "62a5f2a142fe636632f6467176ab18bb86e6398e", "title": "Pixelated Semantic Colorization", "year": "2019", "pdf": ["https://arxiv.org/pdf/1901.10889.pdf"]}, {"id": "a0e286f3c6a72c857ffd03bd8ab9a9f9b98c4432", "title": "AI Learns to Recognize Bengali Handwritten Digits: Bengali.AI Computer Vision Challenge 2018", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.04452.pdf"]}, {"id": "333be4858994e6d9364341aeb520f7800a0f6a07", "title": "Unsupervised Pixel-Level Domain Adaptation with Generative Adversarial Networks", "year": "2017", "pdf": ["https://arxiv.org/pdf/1612.05424.pdf"]}, {"id": "23ef40af4fda9c7db8fd7596d34446303c378793", "title": "Towards a Generalized Approach for Deep Neural Network Based Event Processing for the Internet of Multimedia Things", "year": "2018", "pdf": []}, {"id": "ee53c9480132fc0d09b1192226cb2c460462fd6d", "title": "Channel Pruning for Accelerating Very Deep Neural Networks", "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.06168.pdf"]}, {"id": "b0760764dc573b519f76d5a79531d49af333c67a", "title": "Neural Style Transfer: A Review", "year": "2017", "pdf": ["https://arxiv.org/pdf/1705.04058.pdf"]}, {"id": "5fa587381a9e4308163b3a5395985f0375c3cf7d", "title": "Deep Extreme Cut: From Extreme Points to Object Segmentation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.09081.pdf"]}, {"id": "2f005b31b41face8a8b157e2ce7f97ece5b61391", "title": "L 1 Graph Based Sparse Model for Label Denoising", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/2f00/5b31b41face8a8b157e2ce7f97ece5b61391.pdf"]}, {"id": "ede3af38e30ca332af0c1ce3bd5144070f7fb7f3", "title": "SGN: Sequential Grouping Networks for Instance Segmentation", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_ICCV_2017/papers/Liu_SGN_Sequential_Grouping_ICCV_2017_paper.pdf", "http://www.cs.toronto.edu/~fidler/papers/sgn_iccv17.pdf"]}, {"id": "73f1a3d88defbd5481214e27da7df501d0e31fb1", "title": "Tencent ML-Images: A Large-Scale Multi-Label Image Database for Visual Representation Learning", "year": "2019", "pdf": ["https://arxiv.org/pdf/1901.01703.pdf"]}, {"id": "2eef20a11324686099ee6f9b1a7613444b0d2112", "title": "Dual-Path Convolutional Image-Text Embedding with Instance Loss", "year": "2017", "pdf": ["https://arxiv.org/pdf/1711.05535.pdf"]}, {"id": "6a9c3011b5092daa1d0cacda23f20ca4ae74b902", "title": "Fast and Accurate Person Re-Identification with RMNet.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.02465.pdf"]}, {"id": "f9255703f0a89c9ca2e9256595a0526829ff4402", "title": "On the Importance of Visual Context for Data Augmentation in Scene Understanding", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.02492.pdf"]}, {"id": "e8e43abbc8bee64a53af64ceca90bfb687f7bb9d", "title": "Fast Object Class Labelling via Speech", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.09461.pdf"]}, {"id": "3f775e3be9e1a00ebf4fd281e524932e88cec0ae", "title": "Deep Contextual Recurrent Residual Networks for Scene Labeling", "year": "2018", "pdf": ["https://arxiv.org/pdf/1704.03594.pdf"]}, {"id": "c5b9a96fcb07f538be3181922e5f1a24a7936783", "title": "Autonomous drone cinematographer: Using artistic principles to create smooth, safe, occlusion-free trajectories for aerial filming", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.09563.pdf"]}, {"id": "d5673c53b3643372dd8d35136769ecd73a6dede3", "title": "A Deep Learning Framework for Smart Street Cleaning", "year": "2017", "pdf": []}, {"id": "456983805a8781d6429bed1ed66dc9f3902767af", "title": "Seeing with Humans : Gaze-Assisted Neural Image", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/4569/83805a8781d6429bed1ed66dc9f3902767af.pdf"]}, {"id": "a759570e6ef674cd93068020c2e6bd036961f7c6", "title": "SPEECH-COCO: 600k Visually Grounded Spoken Captions Aligned to MSCOCO Data Set", "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.08435.pdf"]}, {"id": "e7b92fc6e2f1a13a3076a48a78e03badacb0465b", "title": "Deep Cauchy Hashing for Hamming Space Retrieval", "year": "2018", "pdf": ["http://ise.thss.tsinghua.edu.cn/~mlong/doc/deep-cauchy-hashing-cvpr18.pdf", "http://openaccess.thecvf.com/content_cvpr_2018/papers/Cao_Deep_Cauchy_Hashing_CVPR_2018_paper.pdf"]}, {"id": "7892606127c94d0be5c9bea5b6cb539f7fe3bf39", "title": "Combining Multiple Cues for Visual Madlibs Question Answering", "year": "2018", "pdf": []}, {"id": "e42b2981f4e8de54213d624d1ef12bad4fe02f0a", "title": "Through-Wall Human Pose Estimation Using Radio Signals", "year": "2018", "pdf": []}, {"id": "c896502edcdec38466e7d66f38fb53a57c8e05db", "title": "Image Companding and Inverse Halftoning using Deep Convolutional Neural Networks", "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.00116.pdf"]}, {"id": "30a68bea6a43c239d899d7f02bb8ef9f3c5a8f47", "title": "Cross-Media Similarity Evaluation for Web Image Retrieval in the Wild", "year": "2018", "pdf": ["https://arxiv.org/pdf/1709.01305.pdf"]}, {"id": "1daaeae28270b06962eb6fcf812a368892b5dff4", "title": "Modeling Visual Context Is Key to Augmenting Object Detection Datasets", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.07428.pdf"]}, {"id": "fdce9cbe5c726201575b3c8a8c1af0752f1af53f", "title": "MAttNet: Modular Attention Network for Referring Expression Comprehension", "year": "2018", "pdf": []}, {"id": "91bb3680cee8cd37b80e07644f66f9cccf1b1aff", "title": "PASCAL Boundaries: A Semantic Boundary Dataset with a Deep Semantic Boundary Detector", "year": "2017", "pdf": ["http://www.cs.jhu.edu/~alanlab/Pubs17/premachandran2017pascalboundaries.pdf", "https://www.computer.org/csdl/proceedings/wacv/2017/4822/00/07926599.pdf"]}, {"id": "09b2e7af73689dbdba1547e19111a6ee06767906", "title": "PoseTrack: A Benchmark for Human Pose Estimation and Tracking", "year": "2018", "pdf": []}, {"id": "38e3c26829e38c6b56f7c541e0c4445820fab0fe", "title": "BOLD5000: A public fMRI dataset of 5000 images", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.01281.pdf"]}, {"id": "57bfbd06e94c5def35c1dde901bb4f1c839dbcdf", "title": "Deep patch learning for weakly supervised object classification and discovery", "year": "2017", "pdf": ["https://arxiv.org/pdf/1705.02429.pdf"]}, {"id": "5d165ff5b0b389e32809c17838a2afc218a91d62", "title": "Object Detectors Emerge in Deep Scene CNNs", "year": "2014", "pdf": ["https://arxiv.org/pdf/1412.6856.pdf"]}, {"id": "165ef2b5f86b9b2c68b652391db5ece8c5a0bc7e", "title": "Efficient Piecewise Training of Deep Structured Models for Semantic Segmentation", "year": "2016", "pdf": []}, {"id": "944faf7f14f1bead911aeec30cc80c861442b610", "title": "Action Tubelet Detector for Spatio-Temporal Action Localization", "year": "2017", "pdf": ["https://arxiv.org/pdf/1705.01861.pdf"]}, {"id": "729a30040132909cda0eab2c6c4ba60d6d1941b5", "title": "Image-based Plant Species Identification with Deep Convolutional Neural Networks", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/729a/30040132909cda0eab2c6c4ba60d6d1941b5.pdf"]}, {"id": "28af8e1a3cb3a158f8a642c8493fcfb207743d0a", "title": "Better Image Segmentation by Exploiting Dense Semantic Predictions", "year": "2016", "pdf": ["https://arxiv.org/pdf/1606.01481.pdf"]}, {"id": "139ab79a0aae5d4346f907b7b29288d09ef373fa", "title": "A Baseline for Multi-Label Image Classification Using Ensemble Deep CNN", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.08412.pdf"]}, {"id": "24aee34d1fb3cf5b1cddc2c6ef9259506be4e9c0", "title": "Visual Context Learning with Big Data Analytics", "year": "2016", "pdf": []}, {"id": "9b18cc5c938062161a4b6b0c71ee7a6c550a15f7", "title": "A Scalable Optimization Mechanism for Pairwise based Discrete Hashing.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.10810.pdf"]}, {"id": "ad9b3dc6c0e54070cec79df86458ed38566da1ff", "title": "Automated Image Captioning for Rapid Prototyping and Resource Constrained Environments", "year": "2016", "pdf": ["https://arxiv.org/pdf/1606.01393.pdf"]}, {"id": "2a74b00daa80e64cde5256bc014d1b011926b97b", "title": "Shake-Shake regularization", "year": "2017", "pdf": []}, {"id": "40c1aec7e0830bf9dd8a689d671024567311ae72", "title": "Interact as You Intend: Intention-Driven Human-Object Interaction Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.09796.pdf"]}, {"id": "3b08ef7aa0cf9528da42b2b594b66e4a6f7fdb7f", "title": "Active Learning for Delineation of Curvilinear Structures", "year": "2016", "pdf": ["https://arxiv.org/pdf/1512.00747.pdf"]}, {"id": "4d1e28368e1121872bcd4ce75bc7ba5e43bd42d0", "title": "Attend to You: Personalized Image Captioning with Context Sequence Memory Networks", "year": "2017", "pdf": ["https://arxiv.org/pdf/1704.06485.pdf"]}, {"id": "b6f682648418422e992e3ef78a6965773550d36b", "title": "CBMM Memo No . 061 February 8 , 2017 Full interpretation of minimal images", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/b6f6/82648418422e992e3ef78a6965773550d36b.pdf"]}, {"id": "2595319371a06449e3f8cef251303aecbb9e731e", "title": "A Review on Object Recognition for Blind People Based on Deep Learning", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/2595/319371a06449e3f8cef251303aecbb9e731e.pdf"]}, {"id": "e7df750805d6c025d0a1a74a05a83bcf8cc63dd7", "title": "Assessment of crowdsourcing and gamification loss in user-assisted object segmentation", "year": "2015", "pdf": ["http://upcommons.upc.edu/bitstream/handle/2117/81293/asknseek.pdf;jsessionid=469B72384B7C56D4A331D68A53942294?sequence=1"]}, {"id": "abd4152773ebb97b90163b9a6bbdf2075e825481", "title": "Procedural Text Generation from an Execution Video", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/abd4/152773ebb97b90163b9a6bbdf2075e825481.pdf"]}, {"id": "8b5b8db6a2a2880c14894140ea70ceb5f96c3b9b", "title": "Learning a Text-Video Embedding from Incomplete and Heterogeneous Data", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.02516.pdf"]}, {"id": "72f8df596eb9bb3a8c8206329083c42e70fcd9fd", "title": "Will People Like Your Image?", "year": "2016", "pdf": ["https://arxiv.org/pdf/1611.05203.pdf"]}, {"id": "9684a54d9ff94855a40a2dc3eeb1ff16b44795fe", "title": "Human, Object and Scene Centric Image Retrieval Engine to Enhance Image Management", "year": "2017", "pdf": []}, {"id": "1eeca84f33079c6d7a95daf8994370b2d7a93443", "title": "Fingertip Detection and Tracking for Recognition of Air-Writing in Videos", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.03016.pdf"]}, {"id": "5a0209515ab62e008efeca31f80fa0a97031cd9d", "title": "Dataset fingerprints: Exploring image collections through data mining", "year": "2015", "pdf": ["http://www.cv-foundation.org/openaccess/content_cvpr_2015/app/3B_046.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/app/3B_046_ext.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/ext/3B_046_ext.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Rematas_Dataset_Fingerprints_Exploring_2015_CVPR_paper.pdf", "https://homes.cs.washington.edu/~krematas/Publications/rematasCVPR2015.pdf"]}, {"id": "b3b85d0a697c9b0309965151dcc396eb70adf46e", "title": "Shallowing Deep Networks: Layer-wise Pruning based on Feature Representations.", "year": "2018", "pdf": []}, {"id": "5925a25dfe107c49c636eccb8f9fd1aeef7b438c", "title": "Temporal Shift Module for Efficient Video Understanding", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.08383.pdf"]}, {"id": "821ba3eba1e36a29cc482f5378f4a0d0f6893159", "title": "Unsupervised Domain Adaptation for Learning Eye Gaze from a Million Synthetic Images: An Adversarial Approach", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.07926.pdf"]}, {"id": "1ca06a9c5e80bc0d4011f0d6f6ccc1a5ee746844", "title": "Automatic Image Cropping: A Computational Complexity Study", "year": "2016", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2016/papers/Chen_Automatic_Image_Cropping_CVPR_2016_paper.pdf", "http://pages.cs.wisc.edu/~liangz/12_Chen_Automatic_Image_Cropping_CVPR_2016_paper.pdf"]}, {"id": "64a336f952ec67f85fe00092847d50dd29e4cddc", "title": "Fast Screening Algorithm for Template Matching FAST SCREENING ALGORITHM FOR TEMPLATE MATCHING", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/64a3/36f952ec67f85fe00092847d50dd29e4cddc.pdf"]}, {"id": "d42142285c46207a16bd4294e437d504e419a9b7", "title": "Varying image description tasks : spoken versus written descriptions", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/d421/42285c46207a16bd4294e437d504e419a9b7.pdf"]}, {"id": "cd8398e82e0c0cc4276a1694fd333214ede337ea", "title": "Decoupled Spatial Neural Attention for Weakly Supervised Semantic Segmentation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.02563.pdf"]}, {"id": "8e416d760feb5f23bc1a6dab98eb1f6e75ab8907", "title": "Image-Grounded Conversations: Multimodal Context for Natural Question and Response Generation", "year": "2017", "pdf": ["https://arxiv.org/pdf/1701.08251.pdf"]}, {"id": "f7186eb3d717694d4ab1730a8d0f662e90c44d6f", "title": "A Computational Study on Word Meanings and Their Distributed Representations via Polymodal Embedding", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/f718/6eb3d717694d4ab1730a8d0f662e90c44d6f.pdf"]}, {"id": "35c5b84fa47e5b6cee201f831507f62482ccebcc", "title": "CUNI System for the WMT 17 Multimodal Traslation Task", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/35c5/b84fa47e5b6cee201f831507f62482ccebcc.pdf"]}, {"id": "b63041d05b78a66724fbcb2803508999bf885d6b", "title": "Deep Sets", "year": "2017", "pdf": ["https://arxiv.org/pdf/1703.06114.pdf"]}, {"id": "d318f3ca49f7f2159b9fc0face08eb284d5442dc", "title": "Scene Text Detection via Holistic, Multi-Channel Prediction", "year": "2016", "pdf": ["https://arxiv.org/pdf/1606.09002.pdf"]}, {"id": "bb4650130c460f413e97b0328624a485bf094967", "title": "Dynamic Lexicon Generation for Natural Scene Images", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/bb46/50130c460f413e97b0328624a485bf094967.pdf"]}, {"id": "43cb50f669a0d492256d11c6cc4128ba0ce79a3e", "title": "Per-Pixel Feedback for improving Semantic Segmentation", "year": "2017", "pdf": ["https://arxiv.org/pdf/1712.02861.pdf"]}, {"id": "f81f5da2a1e4eb80b465b8dffca4c9e583a8a8a6", "title": "Rapid Object Detection Systems , Utilising Deep Learning and Unmanned Aerial Systems ( Uas ) for Civil Engineering Applications", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/f81f/5da2a1e4eb80b465b8dffca4c9e583a8a8a6.pdf"]}, {"id": "e7928bd33d09fd00a588617736b102063ca9d070", "title": "A Non-Technical Survey on Deep Convolutional Neural Network Architectures", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.02129.pdf"]}, {"id": "45e2aa7706fcedcbb2d93304a9824fe762b8b3b0", "title": "DAC-SDC Low Power Object Detection Challenge for UAV Applications", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.00110.pdf"]}, {"id": "8f45907fba8fce5e9d958cf66cb68b406858fbb6", "title": "Project AutoVision: Localization and 3D Scene Perception for an Autonomous Vehicle with a Multi-Camera System", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.05477.pdf"]}, {"id": "f672bf42dbefb6b40921c00a05f60284934e9948", "title": "LDS-Inspired Residual Networks", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/f672/bf42dbefb6b40921c00a05f60284934e9948.pdf"]}, {"id": "c0343f9cc5f16166bda83815812c4c71ab3258e3", "title": "Hierarchical LSTMs with Adaptive Attention for Visual Captioning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.11004.pdf"]}, {"id": "e075c5ef9d7d4ef77c92008ffaa7ac12e60383c1", "title": "Fusing Saliency Maps with Region Proposals for Unsupervised Object Localization", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.03905.pdf"]}, {"id": "fb6bcf5763fe7ec7366f5991400090fa503613a7", "title": "Reduce Cognitive Burden on Drivers through Contextualising Environments", "year": "2018", "pdf": []}, {"id": "a133374b9630bbe6eb2b7de8c3204aa57e75c582", "title": "A Deep Network Solution for Attention and Aesthetics Aware Photo Cropping.", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/a133/374b9630bbe6eb2b7de8c3204aa57e75c582.pdf"]}, {"id": "ccbd7e417158e7ae0f9f61c3b6d1e5a3317cce34", "title": "Object Proposals in Computer Vision", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/7fb4/19f28652e4d1b1ddab1a19bdc5be1c7bbb62.pdf"]}, {"id": "aa09ade36424fd83f067f234baffde294800e705", "title": "Is a Picture Worth Ten Thousand Words in a Review Dataset?", "year": "2016", "pdf": ["https://arxiv.org/pdf/1606.07496.pdf"]}, {"id": "30ccfd2b4b6d5b30581356ccefcf96fd77c1766a", "title": "Overview of the ImageCLEF 2014 Scalable Concept Image Annotation Task", "year": "2014", "pdf": ["https://pdfs.semanticscholar.org/a346/060e3b71a0553eaa4bc28fbc0114b1798fec.pdf"]}, {"id": "a25c32ea0292af5eacd6e9e0ed53b9ff7e65433f", "title": "Learning to Learn from Web Data Through Deep Semantic Embeddings", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.06368.pdf"]}, {"id": "247b14570940601f5c7a2da1db532ecf1c302288", "title": "Dual Attention Networks for Multimodal Reasoning and Matching", "year": "2017", "pdf": ["https://arxiv.org/pdf/1611.00471.pdf"]}, {"id": "bb021f58f8822d12f5747d583a46005ade4a0b10", "title": "Breaking Microsoft \u2019 s CAPTCHA", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/bb02/1f58f8822d12f5747d583a46005ade4a0b10.pdf"]}, {"id": "a7066c13ba21817abcf8ff955740493adf95b02c", "title": "Points2Pix: 3D Point-Cloud to Image Translation using conditional Generative Adversarial Networks", "year": "2019", "pdf": ["https://arxiv.org/pdf/1901.09280.pdf"]}, {"id": "c06447df3e50ec451240205cefa0708caee8ab8c", "title": "Picture it in your mind: generating high level visual representations from textual descriptions", "year": "2017", "pdf": ["https://arxiv.org/pdf/1606.07287.pdf"]}, {"id": "4ecaa651722a98c2847377f3ae1c70294b4791ce", "title": "Few-Example Object Detection with Model Communication.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1706.08249.pdf"]}, {"id": "1eb4ea011a3122dc7ef3447e10c1dad5b69b0642", "title": "Contextual Visual Recognition from Images and Videos", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/1eb4/ea011a3122dc7ef3447e10c1dad5b69b0642.pdf"]}, {"id": "0b4d3e59a0107f0dad22e74054bab1cf1ad9c32e", "title": "Visual Genome: Connecting Language and Vision Using Crowdsourced Dense Image Annotations", "year": "2016", "pdf": ["https://arxiv.org/pdf/1602.07332.pdf"]}, {"id": "28b72ad9229f38ec61f950e1d794d6af070d1800", "title": "Zero-shot Object Prediction using Semantic Scene Knowledge", "year": "2017", "pdf": ["https://arxiv.org/pdf/1604.07952.pdf"]}, {"id": "877c5a3ad915c2bbb5595252d08163f34ce58957", "title": "LCR-Net: Localization-Classification-Regression for Human Pose", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2017/papers/Rogez_LCR-Net_Localization-Classification-Regression_for_CVPR_2017_paper.pdf"]}, {"id": "e8e662e45e39249756d2b0090782434a5cf1f4aa", "title": "VegFru: A Domain-Specific Dataset for Fine-Grained Visual Categorization", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_ICCV_2017/papers/Hou_VegFru_A_Domain-Specific_ICCV_2017_paper.pdf", "http://openaccess.thecvf.com/content_ICCV_2017/supplemental/Hou_VegFru_A_Domain-Specific_ICCV_2017_supplemental.pdf"]}, {"id": "1db6a35c42e86d5cd64ad2a4803fd683647fab96", "title": "Tiny Transform Net for Mobile Image Stylization", "year": "2017", "pdf": []}, {"id": "8efb48fdb8616da5133efb8659cd96c53529f76a", "title": "Active Vision Dataset Benchmark", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018_workshops/papers/w40/Ammirato_Active_Vision_Dataset_CVPR_2018_paper.pdf"]}, {"id": "71afbb37a81a44299872bee9ae888f4129962e18", "title": "RECURRENT NEURAL FEEDBACK MODEL FOR AUTOMATED IMAGE ANNOTATION", "year": "2017", "pdf": []}, {"id": "9d8747468f0fed8e335656d7fe9737e4dc21c798", "title": "RetinaMask: Learning to predict masks improves state-of-the-art single-shot detection for free", "year": "2019", "pdf": ["https://arxiv.org/pdf/1901.03353.pdf"]}, {"id": "511fad07b943f088e487ea09ffc6c89114bb3c3e", "title": "Towards an Integrated Method of Detection and Description for Face Authentication System", "year": "2018", "pdf": []}, {"id": "9f545b9006970f7626b7b121c5c3c66204f1ba40", "title": "Improving Pairwise Ranking for Multi-label Image Classification", "year": "2017", "pdf": ["https://arxiv.org/pdf/1704.03135.pdf"]}, {"id": "2ba540ca70c7dee81e13768792aa7571952987f6", "title": "Drivable Road Detection Based on Dilated FPN with Feature Aggregation", "year": "2017", "pdf": []}, {"id": "2e8b08c8df95d2ef8c0d03820094608e9cf456ab", "title": "License Plate Detection and Recognition in Unconstrained Scenarios", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/2e8b/08c8df95d2ef8c0d03820094608e9cf456ab.pdf"]}, {"id": "8331fb280f083767fe85ba476862e519e0275233", "title": "OMNIA Faster R-CNN: Detection in the wild through dataset merging and soft distillation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.02611.pdf"]}, {"id": "54e0b4f63e6ed966d76b6ef83488249fd09d3a43", "title": "Plug & Play Generative Networks: Conditional Iterative Generation of Images in Latent Space", "year": "2017", "pdf": []}, {"id": "f11f609facfb650ed8e659236a04bc0a664cb665", "title": "An Analysis of Scale Invariance in Object Detection - SNIP", "year": "2018", "pdf": []}, {"id": "a8773da25b133d258132d646024bf5ffd5694302", "title": "A Bottom-Up Approach Based on Semantics for the Interpretation of the Main Camera Stream in Soccer Games", "year": "2018", "pdf": []}, {"id": "666e08b6921a28fed75f35dd70d322f0edc06e41", "title": "Rain Removal in Traffic Surveillance: Does it Matter?", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.12574.pdf"]}, {"id": "1ac6a33f04f6c5a8084c15c85295f987cc8e3d72", "title": "FVQA: Fact-Based Visual Question Answering", "year": "2018", "pdf": ["https://arxiv.org/pdf/1606.05433.pdf"]}, {"id": "3ab5d67310aa3592b68e9ef55df4603507d0d486", "title": "Learning Spatial Transforms for Refining Object Segment Proposals", "year": "2017", "pdf": ["https://www.computer.org/csdl/proceedings/wacv/2017/4822/00/07926595.pdf"]}, {"id": "757b27a3ceb2293b8284fc24a7084a0c3fc2ae21", "title": "Data Distillation: Towards Omni-Supervised Learning", "year": "2018", "pdf": []}, {"id": "0d4dbd59e42e615ccf6cd4f71203be97afac48fb", "title": "End-to-End Joint Semantic Segmentation of Actors and Actions in Video", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/0d4d/bd59e42e615ccf6cd4f71203be97afac48fb.pdf"]}, {"id": "81d327ec41c67728b15438bca86d10b72de1d88f", "title": "Visual Affordance and Function Understanding: A Survey", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.06775.pdf"]}, {"id": "df5094b2e8cf7e3bde3943ca7a56eb879b8e34ab", "title": "A Concatenated Residual Convolutional Network for Image Deblurring", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/df50/94b2e8cf7e3bde3943ca7a56eb879b8e34ab.pdf"]}, {"id": "b8298cf0056af5afa3185181ddd5f6bb03181696", "title": "Training for Diversity in Image Paragraph Captioning", "year": "2018", "pdf": []}, {"id": "7a9fe5781220cca6ca600833015f200a9c03d50e", "title": "Teaching Machines to Describe Images via Natural Language Feedback", "year": "2017", "pdf": ["https://arxiv.org/pdf/1706.00130.pdf"]}, {"id": "08f46d6a91e513edd57a0ef15d5367b5d0545c1b", "title": "How do targets, nontargets, and scene context influence real-world object detection?", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/08f4/6d6a91e513edd57a0ef15d5367b5d0545c1b.pdf"]}, {"id": "0612745dbd292fc0a548a16d39cd73e127faedde", "title": "Flickr30k Entities: Collecting Region-to-Phrase Correspondences for Richer Image-to-Sentence Models", "year": "2015", "pdf": ["https://arxiv.org/pdf/1505.04870.pdf"]}, {"id": "4b4a174f46ce03caf1ffa4addd074aaa70539f35", "title": "BlazeIt: Fast Exploratory Video Queries using Neural Networks.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1805.01046.pdf"]}, {"id": "aa6f094f17d78380f927555a348ad514a505cc3b", "title": "SlowFast Networks for Video Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.03982.pdf"]}, {"id": "bfce448a3409d87ba281de53ed696b09119f2ba9", "title": "Object segmentation in depth maps with one user click and a synthetically trained fully convolutional network", "year": "2018", "pdf": ["https://arxiv.org/pdf/1801.01281.pdf"]}, {"id": "885d589101ab3c09bda20ee9578f2c6d2f6cddfa", "title": "Learning to Guide Decoding for Image Captioning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.00887.pdf"]}, {"id": "1553084dcbf2235428e7dbf57b57e567c5ea4d1f", "title": "AISHELL-2: Transforming Mandarin ASR Research Into Industrial Scale", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.10583.pdf"]}, {"id": "d7d2a1d42f0e3182d538cf8fb4d55f3e9d7ce779", "title": "Setting an attention region for convolutional neural networks using region selective features, for recognition of materials within glass vessels", "year": "2017", "pdf": ["https://arxiv.org/pdf/1708.08711.pdf"]}, {"id": "658c802890c7133e2ade778b5d88b68bcd0dca9c", "title": "Learning to Segment via Cut-and-Paste", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.06414.pdf"]}, {"id": "e56c99e8a94d3e585166fcd66f2ab6da60932f09", "title": "Semantic Speech Retrieval With a Visually Grounded Model of Untranscribed Speech", "year": "2018", "pdf": ["https://arxiv.org/pdf/1710.01949.pdf"]}, {"id": "135c71101af5d030f8cf470c454e7b655d699920", "title": "Stacked Latent Attention for Multimodal Reasoning", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/papers/Fan_Stacked_Latent_Attention_CVPR_2018_paper.pdf"]}, {"id": "9d17e897e8344d1cf42a322359b48d1ff50b4aef", "title": "Learning to Fuse Things and Stuff", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.01192.pdf"]}, {"id": "185e7191dc5eca046d90205527da597b6ba9ae3c", "title": "Fast Self-Attentive Multimodal Retrieval", "year": "2018", "pdf": []}, {"id": "6fdc0bc13f2517061eaa1364dcf853f36e1ea5ae", "title": "DAISEE: Dataset for Affective States in E-Learning Environments", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/6fdc/0bc13f2517061eaa1364dcf853f36e1ea5ae.pdf"]}, {"id": "947c973846f2c5f8f42225c1108810bcdb4a7015", "title": "Grounded language understanding for manipulation instructions using GAN-based classification", "year": "2017", "pdf": ["https://arxiv.org/pdf/1801.05096.pdf"]}, {"id": "d082f64b8a0a07d105207eb822be58ffb61b353a", "title": "EMOTIC: Emotions in Context Dataset", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2017_workshops/w41/papers/Lapedriza_EMOTIC_Emotions_in_CVPR_2017_paper.pdf"]}, {"id": "1e55e9c647832c969e449da28a391205a9704c60", "title": "Actor and Action Video Segmentation from a Sentence", "year": "2018", "pdf": []}, {"id": "b705ca751a947e3b761e2305b41891051525d9df", "title": "Exploring Context with Deep Structured Models for Semantic Segmentation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1603.03183.pdf"]}, {"id": "8bce31108f598986558e9afb1061eb988ea4f3be", "title": "Automated Image Annotation based on YOLOv3", "year": "2018", "pdf": []}, {"id": "7c7af300c4780ad01e7db4d60fbf89771672585b", "title": "Detection and Segmentation of Manufacturing Defects with Convolutional Neural Networks and Transfer Learning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.02518.pdf"]}, {"id": "f4c45108cb41051010d8a5175b8da23eb246c967", "title": "Improving Image Captioning by Leveraging Knowledge Graphs", "year": "2019", "pdf": []}, {"id": "2c28c95066b1df918f956f3cc072e29fd452dcad", "title": "Generalized Multi-View Embedding for Visual Recognition and Cross-Modal Retrieval", "year": "2018", "pdf": ["https://arxiv.org/pdf/1605.09696.pdf"]}, {"id": "356ac139eb92753d710c1cd6bbb84403b8cd711b", "title": "A virtual reality platform for dynamic human-scene interaction", "year": "2016", "pdf": []}, {"id": "d3a545bb8008a717ef95aa981295f38e6cee0d9d", "title": "Art painting detection and identification based on deep learning and image local features", "year": "2018", "pdf": []}, {"id": "3b1860cacfd59893bb0f7e131e982e9bbeb21e87", "title": "Defect Detection from UAV Images Based on Region-Based CNNs", "year": "2018", "pdf": []}, {"id": "bd4b5fcdf801b13d51083f3f7809366751c81c09", "title": "c-RNN: A Fine-Grained Language Model for Image Captioning", "year": "2018", "pdf": []}, {"id": "3f10b9d98a276fb9e21e5742ce88bc7f48629715", "title": "Imparare a Quantificare Guardando (Learning to Quantify by Watching)", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/3f10/b9d98a276fb9e21e5742ce88bc7f48629715.pdf"]}, {"id": "bb60196acdd4b0870cd0dd7f5a7c712aa042b1d1", "title": "A Benchmark for Endoluminal Scene Segmentation of Colonoscopy Images", "year": "2017", "pdf": ["https://arxiv.org/pdf/1612.00799.pdf"]}, {"id": "afc4cc092f990644ff7a11dc7ab60519920cbc9d", "title": "Learning Rich Features for Image Manipulation Detection", "year": "2018", "pdf": []}, {"id": "09222c50d8ffcc74bbb7462400bd021772850bba", "title": "Incorporating Network Built-in Priors in Weakly-Supervised Semantic Segmentation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1706.02189.pdf"]}, {"id": "7d39d69b23424446f0400ef603b2e3e22d0309d6", "title": "YOLO9000: Better, Faster, Stronger", "year": "2017", "pdf": ["https://arxiv.org/pdf/1612.08242.pdf"]}, {"id": "bc6a01ea112d45bdded0bb2d34a4782e4f6f16be", "title": "Real-world Multi-object, Multi-grasp Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1802.00520.pdf"]}, {"id": "24e680c5e7f59b72984ebc344dfba0f9573fb38c", "title": "On the difficulty of a distributional semantics of spoken language", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.08869.pdf"]}, {"id": "2d15a7546c16d5821ffa8f769eb7ec18e435e64d", "title": "Recognition in Terra Incognita", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.04975.pdf"]}, {"id": "84a3478cf884b0cbba480c7c36dab565e778094b", "title": "Learning to detect chest radiographs containing lung nodules using visual attention networks", "year": "2019", "pdf": ["https://arxiv.org/pdf/1712.00996.pdf"]}, {"id": "bbe3d39adcb41ad2824204c0b0d299d77c2d8363", "title": "SketchyGAN: Towards Diverse and Realistic Sketch to Image Synthesis", "year": "2018", "pdf": []}, {"id": "d7443ed614548498a390aa1b83c74069aa923074", "title": "Dataless Black-Box Model Comparison", "year": "2018", "pdf": []}, {"id": "225c318f3143b9b9d6c0f7a87672bd1d9d2901f4", "title": "2 D-Driven 3 D Object Detection in RGB-D Images", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/225c/318f3143b9b9d6c0f7a87672bd1d9d2901f4.pdf"]}, {"id": "b20dc4906fc021aaf8353df98f8828d99f951c3c", "title": "Good View Hunting: Learning Photo Composition from Dense View Pairs", "year": "2018", "pdf": []}, {"id": "f42099bcd6cd13146d96d1ed15353a4ebc1f4d9a", "title": "Single-Shot Refinement Neural Network for Object Detection", "year": "2018", "pdf": []}, {"id": "d05825a394f11a391c8815f6b0d394cdb4cfaa95", "title": "I2T2I: Learning text to image synthesis with textual data augmentation", "year": "2017", "pdf": ["https://arxiv.org/pdf/1703.06676.pdf"]}, {"id": "475e16577be1bfc0dd1f74f67bb651abd6d63524", "title": "DAiSEE: Towards User Engagement Recognition in the Wild", "year": "2016", "pdf": ["https://arxiv.org/pdf/1609.01885.pdf"]}, {"id": "7985ac55e170273dd0ffa6bd756e588bab301d57", "title": "Mind's eye: A recurrent visual representation for image caption generation", "year": "2015", "pdf": ["http://www.cs.cmu.edu/~xinleic/docs/rvr/RVR-1.pdf", "http://www.cs.cmu.edu/~xinleic/papers/cvpr15_rnn.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/app/2A_022.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/app/2A_022_ext.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/ext/2A_022_ext.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Chen_Minds_Eye_A_2015_CVPR_paper.pdf"]}, {"id": "5979acf247d8af73b41da0eab6d8a7eda1036c9a", "title": "A Fully-Convolutional Framework for Semantic Segmentation", "year": "2017", "pdf": []}, {"id": "05357b8c05b5bc020e871fc330a88910c3177e4d", "title": "Multiple Instance Detection Network with Online Instance Classifier Refinement", "year": "2017", "pdf": []}, {"id": "2ce76250731cb19ccc5ffff43e4c6abec8f5af79", "title": "Learning to Segment Affordances", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_ICCV_2017_workshops/papers/w14/Luddecke_Learning_to_Segment_ICCV_2017_paper.pdf"]}, {"id": "3468740e4a9fc72a269f4f0ca8470ccd60925f92", "title": "Robustness Analysis of Visual QA Models by Basic Questions", "year": "2017", "pdf": ["https://arxiv.org/pdf/1709.04625.pdf"]}, {"id": "9c11b1552748c697bbfd33c157f7b7875686eb10", "title": "Application of deep learning in object detection", "year": "2017", "pdf": []}, {"id": "bf2793fc09176f8bf23b3a2b3c6b32185e8a8329", "title": "Multi-level Fusion Based 3D Object Detection from Monocular Images", "year": "2018", "pdf": []}, {"id": "9a33c33190ce0c1bfffe2896ce162b3be05bc9e8", "title": "Using Explanations to Improve Ensembling of Visual Question Answering Systems", "year": "2017", "pdf": []}, {"id": "052373d7fd12145c41377f9a05513596d32e409c", "title": "Deep Compositional Captioning: Describing Novel Object Categories without Paired Training Data", "year": "2016", "pdf": []}, {"id": "3796fe0e5dd10b01fc9fb486d09e61b28eedf915", "title": "Instance Segmentation of Fibers from Low Resolution CT Scans via 3D Deep Embedding Learning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1901.01034.pdf"]}, {"id": "21ff1d20dd7b3e6b1ea02036c0176d200ec5626d", "title": "Loss Max-Pooling for Semantic Image Segmentation", "year": "2017", "pdf": ["https://arxiv.org/pdf/1704.02966.pdf"]}, {"id": "2e78e804ddfe86f112719d4a237c882f930a9d3b", "title": "Development of Real-time ADAS Object Detector for Deployment on CPU", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.05894.pdf"]}, {"id": "82451d0ae2e0154b5f9cd096af199c35319e911a", "title": "Image spam filtering using convolutional neural networks", "year": "2018", "pdf": []}, {"id": "5fe7add7bb041eb52c9983fbdd792bfad1af9992", "title": "ELASTIC: Improving CNNs with Instance Specific Scaling Policies", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.05262.pdf"]}, {"id": "c2021d9652bf3ec3921bf9c13a06e1ea51588d54", "title": "Multimodal Image Captioning for Marketing Analysis", "year": "2018", "pdf": []}, {"id": "c88c233c50f49b4b85a8702d5ee384d96fca8c23", "title": "Object-Proposal Evaluation Protocol is \u2018Gameable\u2019", "year": "2016", "pdf": []}, {"id": "bd37ff771acd72ebdf4024043cb62fcacdd3a82b", "title": "Cycle-Consistent Deep Generative Hashing for Cross-Modal Retrieval", "year": "2019", "pdf": ["https://arxiv.org/pdf/1804.11013.pdf"]}, {"id": "a9b219bd6ad71394ad21929fb5c50387698f340d", "title": "Geometry-Aware Traffic Flow Analysis by Detection and Tracking", "year": "2018", "pdf": []}, {"id": "e8b2a98f87b7b2593b4a046464c1ec63bfd13b51", "title": "CMS-RCNN: Contextual Multi-Scale Region-based CNN for Unconstrained Face Detection", "year": "2016", "pdf": ["https://arxiv.org/pdf/1606.05413.pdf"]}, {"id": "79a05184a6689800fee40bbb95554ffeffafeee2", "title": "Deep Image Harmonization", "year": "2017", "pdf": []}, {"id": "4d86a4c74201c0d5df9a4b259a70f2221a5cb0cb", "title": "Quad-Networks: Unsupervised Learning to Rank for Interest Point Detection", "year": "2017", "pdf": []}, {"id": "8067d5d3fc80abb010b239bbe012b50bae8e6611", "title": "On the Robustness of Semantic Segmentation Models to Adversarial Attacks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.09856.pdf"]}, {"id": "87fe38a79ee12531bb618d4006a476b3a5256f3e", "title": "Relative Saliency and Ranking: Models, Metrics, Data, and Benchmarks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.02426.pdf"]}, {"id": "04a5b47caca62d9351219726e92c08e60f74d652", "title": "A Novel Co-design Peta-scale Heterogeneous Cluster for Deep Learning Training.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1802.02326.pdf"]}, {"id": "fc09e358a130e88dead33bf6bb2cbc0e94c88291", "title": "Single-Shot Object Detection with Enriched Semantics", "year": "2018", "pdf": []}, {"id": "ca673e6c11000459e3ac27dc4ab5f29177d04fea", "title": "A detection method for low-pixel ratio object", "year": "2018", "pdf": []}, {"id": "e4a5ff03ac258f1bcc9c214c30497610b3d5faa2", "title": "DropBlock: A regularization method for convolutional networks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.12890.pdf"]}, {"id": "58d16e23e1192be4acaf6a29c1f5995817146554", "title": "Bringing back simplicity and lightliness into neural image captioning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.06245.pdf"]}, {"id": "d7cb7ca97a665f23165b3c5420e848563cdf1f15", "title": "Pooling Pyramid Network for Object Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.03284.pdf"]}, {"id": "38b2e523828a1f23ad5ad4306a0f9fedca167c90", "title": "Satellite Imagery Multiscale Rapid Detection with Windowed Networks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.09978.pdf"]}, {"id": "1b7a7d291235e4b6e5f97722124070feb26f3cc1", "title": "Learning Two-Branch Neural Networks for Image-Text Matching Tasks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1704.03470.pdf"]}, {"id": "cb4fc4d49783f2049c48a062169f04eb744443ec", "title": "Paying More Attention to Saliency: Image Captioning with Saliency and Context Attention", "year": "2018", "pdf": ["https://arxiv.org/pdf/1706.08474.pdf"]}, {"id": "c27c2fe9642fb82a3dfc314ce6003fe7a88eb1ec", "title": "Interpretable R-CNN", "year": "2017", "pdf": []}]} \ No newline at end of file
+{"id": "5e0f8c355a37a5a89351c02f174e7a5ddcb98683", "citations": [{"id": "08f6b52317b34b60aa65f38b83e3d72deffa0473", "title": "Sheffield MultiMT: Using Object Posterior Predictions for Multimodal Machine Translation", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/bae0/9864ea2c05bccf275cf824580ce212111e42.pdf"], "doi": []}, {"id": "ce9799830a24412f4bd9ad30a9d6e2a50215f8f8", "title": "Beef Cattle Instance Segmentation Using Fully Convolutional Neural Network", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.01972.pdf"], "doi": []}, {"id": "369c4a308ec9e56746f7cc1b164208b917e31a22", "title": "Scene Classification in Indoor Environments for Robots using Context Based Word Embeddings", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/369c/4a308ec9e56746f7cc1b164208b917e31a22.pdf"], "doi": []}, {"id": "35ebe95db7ab148e25904604d3b06a9412f6b4a4", "title": "Illustrative Language Understanding: Large-Scale Visual Grounding with Image Search", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/80d9/993e6abd22a58a4fedd63754a15085778367.pdf"], "doi": []}, {"id": "d4901683e2c2552fc2d62d4eb3b1f5d5fa60a5ff", "title": "ScaleNet: Scale Invariant Network for Semantic Segmentation in Urban Driving Scenes", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/d490/1683e2c2552fc2d62d4eb3b1f5d5fa60a5ff.pdf"], "doi": []}, {"id": "254f7ef73629c18ff9ba13af59b2d78df3fda59d", "title": "Deep Object-Centric Representations for Generalizable Robot Learning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1708.04225.pdf"], "doi": []}, {"id": "0f08d62e882026ac83ebf26c0bd288c553873814", "title": "Multispecies Fruit Flower Detection Using a Refined Semantic Segmentation Network", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.10080.pdf"], "doi": []}, {"id": "89fff8387432878db240a044a98ff9c9200f3197", "title": "Learning Globally Optimized Object Detector via Policy Gradient", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/papers/Rao_Learning_Globally_Optimized_CVPR_2018_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578746", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2018.00648", "http://doi.org/10.1109/CVPR.2018.00648"]}, {"id": "b955969e1077ca328018c9e4dcf27b87ed9f5076", "title": "Knowing When to Look: Adaptive Attention via a Visual Sentinel for Image Captioning", "year": "2017", "pdf": ["https://arxiv.org/pdf/1612.01887.pdf"], "doi": []}, {"id": "f56edb6f2bf4f5bc9d54284289212b8d4a437c1b", "title": "Detection and Localization of Texture-less Objects with Deep Neural Networks", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/f56e/db6f2bf4f5bc9d54284289212b8d4a437c1b.pdf"], "doi": []}, {"id": "a345fc597b15c26f3f2823ccd5aac0d4c976279e", "title": "Adversarially Parameterized Optimization for 3D Human Pose Estimation", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8374567", "http://doi.ieeecomputersociety.org/10.1109/3DV.2017.00026", "http://doi.org/10.1109/3DV.2017.00026"]}, {"id": "3d1e82b69663758a1db87fbebed6525d23090146", "title": "ScribbleSup: Scribble-Supervised Convolutional Networks for Semantic Segmentation", "year": "2016", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7780713"]}, {"id": "7c2a6b27dd756fe851954e0c040bc87bbd39f098", "title": "Using Psychophysical Methods to Understand Mechanisms of Face Identification in a Deep Neural Network", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018_workshops/papers/w39/Xu_Using_Psychophysical_Methods_CVPR_2018_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8575433", "http://doi.ieeecomputersociety.org/10.1109/CVPRW.2018.00266", "http://doi.org/10.1109/CVPRW.2018.00266"]}, {"id": "9595a267de2b0ecf7e4e2962a606c8854551e203", "title": "On the Relation between Color Image Denoising and Classification", "year": "2017", "pdf": ["https://arxiv.org/pdf/1704.01372.pdf"], "doi": []}, {"id": "94a76e349e43f09c863a9c77e47722c5ade3740e", "title": "WELDON: Weakly Supervised Learning of Deep Convolutional Neural Networks", "year": "2016", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2016/papers/Durand_WELDON_Weakly_Supervised_CVPR_2016_paper.pdf", "http://openaccess.thecvf.com/content_cvpr_2016/supplemental/Durand_WELDON_Weakly_Supervised_2016_CVPR_supplemental.pdf", "http://webia.lip6.fr/~thomen/papers/Durand_CVPR_2016.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Durand_WELDON_Weakly_Supervised_CVPR_2016_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7780882", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2016.513", "http://doi.org/10.1109/CVPR.2016.513", "https://hal.archives-ouvertes.fr/hal-01343785/document"]}, {"id": "f5f3faa71ed2b61fa3a99bde25d6e84bfbefbb0b", "title": "Automatic Fish Classification System Using Deep Learning", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8371919", "http://doi.ieeecomputersociety.org/10.1109/ICTAI.2017.00016", "http://doi.org/10.1109/ICTAI.2017.00016"]}, {"id": "3832a6d6b1f78cdadee6968d51c1c7c2922ab3cd", "title": "ISIA at the ImageCLEF 2017 Image Caption Task", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/3832/a6d6b1f78cdadee6968d51c1c7c2922ab3cd.pdf"], "doi": []}, {"id": "6abdd7c4dea2d5d7ff82cd5e6e503be1589861df", "title": "Detection and Recognition of U.S. Warning Signs on Curves", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8372008", "http://doi.ieeecomputersociety.org/10.1109/ICTAI.2017.00105", "http://doi.org/10.1109/ICTAI.2017.00105"]}, {"id": "a49b309ba14ad00a3bcb7b99a45d5bcf9bbc0ab1", "title": "Guided Perturbations: Self-Corrective Behavior in Convolutional Neural Networks", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_ICCV_2017/papers/Sankaranarayanan_Guided_Perturbations_Self-Corrective_ICCV_2017_paper.pdf", "https://arxiv.org/pdf/1703.07928v1.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8237647", "http://doi.ieeecomputersociety.org/10.1109/ICCV.2017.385", "http://doi.org/10.1109/ICCV.2017.385"]}, {"id": "104dd4963f7f0ef03fe09d505d31966666f9281d", "title": "Salient Object Subitizing", "year": "2015", "pdf": ["https://arxiv.org/pdf/1607.07525.pdf"], "doi": []}, {"id": "041755d1c14077ce18d8553aa40a415283edc825", "title": "W2F: A Weakly-Supervised to Fully-Supervised Framework for Object Detection", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/papers/Zhang_W2F_A_Weakly-Supervised_CVPR_2018_paper.pdf", "https://ivul.kaust.edu.sa/Documents/Publications/2018/W2F%20A%20Weakly-Supervised%20to%20Fully-Supervised%20Framework.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578201", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2018.00103", "http://doi.org/10.1109/CVPR.2018.00103"]}, {"id": "ce8c8e9fdbdd84adc096018bb0edb49b6913b946", "title": "Learning Discriminative Features for Speaker Identification and Verification", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/ce8c/8e9fdbdd84adc096018bb0edb49b6913b946.pdf"], "doi": []}, {"id": "110556d073a4d930877edc597a92995f0ff9d294", "title": "Application of Faster R-CNN model on Human Running Pattern Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.05147.pdf"], "doi": []}, {"id": "549c719c4429812dff4d02753d2db11dd490b2ae", "title": "YouTube-BoundingBoxes: A Large High-Precision Human-Annotated Data Set for Object Detection in Video", "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.00824.pdf"], "doi": []}, {"id": "1295cbaf3b03de2eb8c79530289f5939d7819e5c", "title": "DeepFix: A Fully Convolutional Neural Network for Predicting Human Eye Fixations", "year": "2017", "pdf": ["https://arxiv.org/pdf/1510.02927.pdf"], "doi": []}, {"id": "c32b09f20badd9ce04309d7c5ebea88336a3345a", "title": "Token-level and sequence-level loss smoothing for RNN language models", "year": "2018", "pdf": ["https://arxiv.org/pdf/1805.05062.pdf"], "doi": []}, {"id": "4cfd15e9d3c01028bcda22e68791a95aa54c2a7c", "title": "DeepLesion: Automated Deep Mining, Categorization and Detection of Significant Radiology Image Findings using Large-Scale Clinical Lesion Annotations", "year": "2017", "pdf": ["https://arxiv.org/pdf/1710.01766.pdf"], "doi": []}, {"id": "25b9ef5c78dbf17c71e6fd94054dd55d66c39264", "title": "Multimedia Semantic Integrity Assessment Using Joint Embedding Of Images And Text", "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.01606.pdf"], "doi": []}, {"id": "0b2c543e0c47454c4512569175094e6cb6ae02a9", "title": "The VizWiz Grand Challenge : A Large Visual Question Answering Dataset from Blind People Anonymous CVPR submission", "year": "", "pdf": ["https://pdfs.semanticscholar.org/0b2c/543e0c47454c4512569175094e6cb6ae02a9.pdf"], "doi": []}, {"id": "efb01e07ee994fec890ad83b89c9b652349da50e", "title": "What am I searching for", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.11926.pdf"], "doi": []}, {"id": "1c9333bcf523388d75f852e0689b0e7f5a04faa4", "title": "Person Part Segmentation based on Weak Supervision", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/1c93/33bcf523388d75f852e0689b0e7f5a04faa4.pdf"], "doi": []}, {"id": "325093f2c5b33d7507c10aa422e96aa5b10a33f1", "title": "In-place Activated BatchNorm for Memory-Optimized Training of DNNs", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578689"]}, {"id": "c399c0089fb134d1476fadf5f0426e0e8b70eebd", "title": "The Lov\u00e1sz Hinge: A Novel Convex Surrogate for Submodular Losses.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1512.07797.pdf"], "doi": []}, {"id": "c98e5a582e9f1b11bf5244ed16c78a7cbccf36b9", "title": "Towards Semantic SLAM: Points, Planes and Objects", "year": "2018", "pdf": [], "doi": []}, {"id": "b0d52bb1c9cff9416fe766e9cba94ceeab12d51f", "title": "Object detection using deep CNNs trained on synthetic images", "year": "2017", "pdf": ["https://arxiv.org/pdf/1706.06782.pdf"], "doi": []}, {"id": "1d21ec277d2a72e506a9665f19ec5478dbedc20f", "title": "A Semi-Automatic Annotation Technology for Traffic Scene Image Labeling Based on Deep Learning Preprocessing", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8005815", "http://doi.ieeecomputersociety.org/10.1109/CSE-EUC.2017.63", "http://doi.org/10.1109/CSE-EUC.2017.63"]}, {"id": "18f9a6045ba01cb079c4fa49a630d71bbd27cd92", "title": "A dataset of clinically generated visual questions and answers about radiology images", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/18f9/a6045ba01cb079c4fa49a630d71bbd27cd92.pdf"], "doi": []}, {"id": "619701bb7d73171d6311c9e14b8ac5919692d03e", "title": "Semantic Edge Detection with Diverse Deep Supervision", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.02864.pdf"], "doi": []}, {"id": "68f6b329044b01cce1660e953875ece2c3778c68", "title": "Intelligent Surveillance as an Edge Network Service: from Harr-Cascade, SVM to a Lightweight CNN", "year": "2018", "pdf": [], "doi": []}, {"id": "d983ce0a7d249bb85d14a66a89225a7136cc783b", "title": "A Single-shot Object Detector with Feature Aggragation and Enhancement", "year": "2019", "pdf": ["https://arxiv.org/pdf/1902.02923.pdf"], "doi": []}, {"id": "22aa426aeffb77339646cc03da8e94de22396efc", "title": "S HAKES HAKE REGULARIZATION OF 3-BRANCH RESIDUAL NETWORKS", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/22aa/426aeffb77339646cc03da8e94de22396efc.pdf"], "doi": []}, {"id": "19f73120fa314666c841ea88cb2b627aa67251b4", "title": "Evaluating Merging Strategies for Sampling-based Uncertainty Techniques in Object Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.06006.pdf"], "doi": []}, {"id": "d31d4bb58f5dd67016e77352ac7600e2ba71e38f", "title": "Deep Learning Object Detection Methods for Ecological Camera Trap Data", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.10842.pdf"], "doi": []}, {"id": "3fea412361b2d14cb3c6723968b421c1c8cb38e8", "title": "Shake-Shake regularization", "year": "2017", "pdf": ["https://arxiv.org/pdf/1705.07485.pdf"], "doi": []}, {"id": "96fdc0131dc80ffa6d7b9c526e07f080414c54ec", "title": "1 Paying More A ention to Saliency : Image Captioning with Saliency and Context A ention", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/96fd/c0131dc80ffa6d7b9c526e07f080414c54ec.pdf"], "doi": []}, {"id": "332339c32d41cc8176d360082b4d9faa90dadffa", "title": "UberNet: Training a Universal Convolutional Neural Network for Low-, Mid-, and High-Level Vision Using Diverse Datasets and Limited Memory", "year": "2017", "pdf": ["https://arxiv.org/pdf/1609.02132.pdf"], "doi": []}, {"id": "76b61ccb488ad7861e9e36054f96195877f4c9f6", "title": "Automatic image annotation: the quirks and what works", "year": "2018", "pdf": [], "doi": ["https://doi.org/10.1007/s11042-018-6247-3"]}, {"id": "193a69489230de1013dff9af1232e5379cc5282f", "title": "Intelligent Multimodal Framework for Human Assistive Robotics Based on Computer Vision Algorithms", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/193a/69489230de1013dff9af1232e5379cc5282f.pdf"], "doi": []}, {"id": "05e9e85b5137016c93d042170e82f77bb551a108", "title": "A Benchmark Dataset and Evaluation Methodology for Video Object Segmentation", "year": "2016", "pdf": ["http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Perazzi_A_Benchmark_Dataset_CVPR_2016_paper.pdf", "https://graphics.ethz.ch/~perazzif/davis/files/davis.pdf", "https://graphics.ethz.ch/~perazzif/davis/files/davis_poster_cvpr_2016.pdf", "https://graphics.ethz.ch/~perazzif/davis/files/davis_supplementary.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7780454", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2016.85", "http://doi.org/10.1109/CVPR.2016.85"]}, {"id": "8899094797e82c5c185a0893896320ef77f60e64", "title": "Non-local Neural Networks", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578911"]}, {"id": "12660f0defc6580e566c0fa2ac909971d6c6883b", "title": "The SYNTHIA Dataset: A Large Collection of Synthetic Images for Semantic Segmentation of Urban Scenes", "year": "2016", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2016/papers/Ros_The_SYNTHIA_Dataset_CVPR_2016_paper.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Ros_The_SYNTHIA_Dataset_CVPR_2016_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7780721", "http://doi.org/10.1109/CVPR.2016.352", "https://www.wikidata.org/entity/Q57840870"]}, {"id": "bf881e53510b230879aa0d3b02576043b8f881e7", "title": "Automated Image Captioning Using Nearest-Neighbors Approach Driven by Top-Object Detections", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/bf88/1e53510b230879aa0d3b02576043b8f881e7.pdf"], "doi": []}, {"id": "10beef48f3a9b2c4a6c863edefe2cac8bf09b50c", "title": "Fine-Level Semantic Labeling of Large-Scale 3D Model by Active Learning", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8491004", "http://doi.ieeecomputersociety.org/10.1109/3DV.2018.00066", "http://doi.org/10.1109/3DV.2018.00066"]}, {"id": "761de31c575bf30162b6e0d92a1800eb406e96b5", "title": "A Flexible Convolutional Solver with Application to Photorealistic Style Transfer", "year": "2018", "pdf": ["https://arxiv.org/pdf/1806.05285.pdf"], "doi": []}, {"id": "96d1bf51b3aa118696ce18dc45924e5ca8b9d885", "title": "Insulator Detection in Aerial Images for Transmission Line Inspection Using Single Shot Multibox Detector", "year": "2019", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8603728"]}, {"id": "10c4b2489d7e1ee43a1d19724d3c1e9c33ca3f29", "title": "A Question-Answering framework for plots using Deep learning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1806.04655.pdf"], "doi": []}, {"id": "dbc359e95d999bf119353f51edcb53b2beb5b3fe", "title": "Object Detection in Real-Time Systems: Going Beyond Precision", "year": "2018", "pdf": ["https://www.iiitd.edu.in/~chetan/papers/obj-det-wacv18.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8354221", "http://doi.ieeecomputersociety.org/10.1109/WACV.2018.00117", "http://doi.org/10.1109/WACV.2018.00117"]}, {"id": "de1505819e145b5c22a6e09002510413019f7228", "title": "DeepFood: Deep Learning-Based Food Image Recognition for Computer-Aided Dietary Assessment", "year": "2016", "pdf": ["https://arxiv.org/pdf/1606.05675.pdf"], "doi": []}, {"id": "637648198f9e91654ce27eaaa40512f2dc870fc1", "title": "Survey of Visual Question Answering: Datasets and Techniques", "year": "2017", "pdf": ["https://arxiv.org/pdf/1705.03865.pdf"], "doi": []}, {"id": "0b888196dda951287dddb60bd44798aab16d6fca", "title": "Learning Common Sense through Visual Abstraction", "year": "2015", "pdf": ["http://openaccess.thecvf.com/content_iccv_2015/papers/Vedantam_Learning_Common_Sense_ICCV_2015_paper.pdf", "http://oui.csail.mit.edu/camera_readys/21.pdf", "http://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Vedantam_Learning_Common_Sense_ICCV_2015_paper.pdf", "https://filebox.ece.vt.edu/~parikh/Publications/ICCV2015_commonsense.pdf", "https://www.cc.gatech.edu/~parikh/Publications/ICCV2015_commonsense.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7410649", "http://doi.ieeecomputersociety.org/10.1109/ICCV.2015.292", "http://doi.org/10.1109/ICCV.2015.292"]}, {"id": "bb127015474fdc51d4cd6b4dda7176a8c778ea49", "title": "Examining the Impact of Blur on Recognition by Convolutional Networks.", "year": "2016", "pdf": ["https://arxiv.org/pdf/1611.05760.pdf"], "doi": []}, {"id": "85cace98e07e190494a5d88eba5e5b11538dc3b1", "title": "Hierarchical Semantic Mapping Using Convolutional Neural Networks for Intelligent Service Robotics", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8490234", "http://doi.org/10.1109/ACCESS.2018.2873597"]}, {"id": "2dd853b617c176810e3dda008f7cacea6473f0ae", "title": "Image captioning using deep neural architectures", "year": "2017", "pdf": ["https://arxiv.org/pdf/1801.05568.pdf"], "doi": []}, {"id": "25894be540936562953f37fbbcff69e5ac17a494", "title": "Semantic Image Retrieval via Active Grounding of Visual Situations", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.00088.pdf"], "doi": []}, {"id": "28949c94c8457e570ba65c8382c993600f8404e7", "title": "Efficient Video Object Segmentation via Network Modulation", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578778"]}, {"id": "ba60b642a558858325f50d38a345b6bb85114ce1", "title": "Imbalanced Deep Learning by Minority Class Incremental Rectification", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.10851.pdf"], "doi": []}, {"id": "a4aa145b519a9515e1162ca017d80d460002ba5a", "title": "HandyNet: A One-stop Solution to Detect, Segment, Localize & Analyze Driver Hands", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8575312"]}, {"id": "442b6114ae8316c95f59acabe6de26f2b569cc02", "title": "Edit me: A Corpus and a Framework for Understanding Natural Language Image Editing", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/1205/8e0de3dadc1469035ddb3c82e1b7ba722960.pdf"], "doi": []}, {"id": "729a9d35bc291cc7117b924219bef89a864ce62c", "title": "Recognizing Material Properties from Images.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1801.03127.pdf"], "doi": []}, {"id": "41a96329d93e7b06ebd6b3a761cc0d50375c5f15", "title": "Visual Learning Beyond Direct Supervision", "year": "2018", "pdf": ["https://www2.eecs.berkeley.edu/Pubs/TechRpts/2018/EECS-2018-128.pdf"], "doi": ["https://www.base-search.net/Record/2a66cc65fb84214dedf1e3019655a0c160d394ee4e1457c1e1301e57cce6ca94"]}, {"id": "144ba4e9e64d4f9a5bb436c80c3c02b40e4092e0", "title": "Learning Video Features for Multi-label Classification", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/144b/a4e9e64d4f9a5bb436c80c3c02b40e4092e0.pdf"], "doi": []}, {"id": "9c0ff1f1e2c83f089cb6b42a91869428fe9a92b2", "title": "Commonsense LocatedNear Relation Extraction", "year": "2017", "pdf": [], "doi": []}, {"id": "e5918229f44f0215d73a0b9d5eb13eb56764a2e4", "title": "Counting Vehicles with Cameras", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/e591/8229f44f0215d73a0b9d5eb13eb56764a2e4.pdf"], "doi": []}, {"id": "1b793cc5dceb98c95e816aebc2252205bfd71569", "title": "ADNet: A Deep Network for Detecting Adverts", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.04115.pdf"], "doi": []}, {"id": "f7cf4e0b1e371efca045e71996956be19aef0577", "title": "C L ] 3 D ec 2 01 7 Adversarial Ranking for Language Generation", "year": "2017", "pdf": [], "doi": []}, {"id": "0199150ccad6479eac9d693a7cc0406935d877a8", "title": "Towards Real-Time Accurate Object Detection in Both Images and Videos Based on Dual Refinement.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.08638.pdf"], "doi": []}, {"id": "4c8dfc4720186799b2baf53c282a32ba3f312bcc", "title": "A Mask Regional Convolutional Neural Network Model for Segmenting Real Time Traffic Images", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8493883"]}, {"id": "3921afded8bc8471d784df86f64432fb14b8ef58", "title": "Egocentric Gesture Recognition for Head-Mounted AR devices", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.05380.pdf"], "doi": []}, {"id": "b29be8ae91eff7ccf001d48adf518bae86129be3", "title": "OmniDetector: With Neural Networks to Bounding Boxes", "year": "2018", "pdf": [], "doi": []}, {"id": "2fcd5cff2b4743ea640c4af68bf4143f4a2cccb1", "title": "Are You Talking to a Machine? Dataset and Methods for Multilingual Image Question Answering", "year": "2015", "pdf": ["https://arxiv.org/pdf/1505.05612.pdf"], "doi": []}, {"id": "6a1b76f1ef876061ec479ab9bc13fcd517eb4188", "title": "Large Kernel Matters \u2014 Improve Semantic Segmentation by Global Convolutional Network", "year": "2017", "pdf": ["https://arxiv.org/pdf/1703.02719.pdf"], "doi": []}, {"id": "3bcca85ad84806be6d38d3882f7a6aac0ad90253", "title": "Video captioning with recurrent networks based on frame- and video-level features and visual content classification", "year": "2015", "pdf": ["https://arxiv.org/pdf/1512.02949.pdf"], "doi": []}, {"id": "39978ba7c83333475d6825d0ff897692933895fc", "title": "Conditional Random Fields as Recurrent Neural Networks", "year": "2015", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7410536"]}, {"id": "6ee2173c06c84cae6aae2912a4439ec956ecc3bb", "title": "Zero-shot Sim-to-Real Transfer with Modular Priors", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.07480.pdf"], "doi": []}, {"id": "fd0769033b18038b9baf9e762f16973ee27be626", "title": "MobileNetV2: Inverted Residuals and Linear Bottlenecks", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578572"]}, {"id": "b408b939c0f3be9cce0f84871a78a71d1684cd77", "title": "Identifying spatial relations in images using convolutional neural networks", "year": "2017", "pdf": ["https://arxiv.org/pdf/1706.04215.pdf"], "doi": []}, {"id": "8aa9d9ccbdf37fd1d9fb4f3adb778b3c2c9baa45", "title": "POL-LWIR Vehicle Detection: Convolutional Neural Networks Meet Polarised Infrared Sensors", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8575327"]}, {"id": "ed7de328e2191463d42d745d8ac8f16024f95f25", "title": "Beyond Planar Symmetry: Modeling Human Perception of Reflection and Rotation Symmetries in the Wild", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8237354"]}, {"id": "37b207d2c4a82a57f80e96353f79ecd71320a854", "title": "Person Search with Natural Language Description", "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.05729.pdf"], "doi": []}, {"id": "11da2d589485685f792a8ac79d4c2e589e5f77bd", "title": "Show and tell: A neural image caption generator", "year": "2015", "pdf": ["https://arxiv.org/pdf/1411.4555.pdf"], "doi": []}, {"id": "54f688ea5de7b9156db28b2821e5f5ed0148605a", "title": "Contextual Memory Trees", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.06473.pdf"], "doi": []}, {"id": "19aa506d04d3f7241fc71b595d28b5f1bb99edad", "title": "Compact Generalized Non-local Network", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.13125.pdf"], "doi": []}, {"id": "4a88237199595feaa3f0e3289cbdd201a3ce28ff", "title": "Multi-Domain Pose Network for Multi-Person Pose Estimation and Tracking", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.08338.pdf"], "doi": []}, {"id": "3979e8ddcf95fedf7a220b7d39a72fa120d436f8", "title": "Deep Learning Applied to Image and Text Matching", "year": "2015", "pdf": ["https://arxiv.org/pdf/1601.03478.pdf"], "doi": []}, {"id": "df9a102288582d8edadeddcb8d55068a06cc471c", "title": "Are You Smarter Than a Sixth Grader? Textbook Question Answering for Multimodal Machine Comprehension", "year": "2017", "pdf": ["http://ai2-website.s3.amazonaws.com/publications/CVPR17_TQA.pdf", "http://openaccess.thecvf.com/content_cvpr_2017/papers/Kembhavi_Are_You_Smarter_CVPR_2017_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8100054", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2017.571", "http://doi.org/10.1109/CVPR.2017.571"]}, {"id": "9bddd98289ecc7a8dc5517122d21d5c6f5a9a01a", "title": "DS*: Tighter Lifting-Free Convex Relaxations for Quadratic Matching Problems", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.10733.pdf"], "doi": []}, {"id": "b4c48aa7a93f38d2eb60209120a1a8daa61c4545", "title": "Diversity in Object Proposals", "year": "2016", "pdf": ["https://arxiv.org/pdf/1603.04308.pdf"], "doi": []}, {"id": "26e425781e4090abfae65b5d68eac72282dd2e31", "title": "Image Captioning with Deep Bidirectional LSTMs", "year": "2016", "pdf": ["https://arxiv.org/pdf/1604.00790.pdf"], "doi": []}, {"id": "d951ff5f378b2a5f878423029123ad6b3491b444", "title": "Foveal Vision for Instance Segmentation of Road Images", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/c33e/a181e9cd13ad3f11f459d5d1f7b7bf114033.pdf"], "doi": []}, {"id": "9645e8b4829c04879a642d8dd6b3cdf5cf264afb", "title": "Finding Beans in Burgers: Deep Semantic-Visual Embedding with Localization", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.01720.pdf"], "doi": []}, {"id": "1e2d9ea6fe9c50a5c26a629b94446250e1be4e7d", "title": "The Freiburg Groceries Dataset", "year": "2016", "pdf": ["https://arxiv.org/pdf/1611.05799.pdf"], "doi": []}, {"id": "40c6a2b1cb312f11f8225a733545fdabd436e347", "title": "Deep Co-Training for Semi-Supervised Image Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.05984.pdf"], "doi": []}, {"id": "1f0c7b93636f879bd5ef3dd915a02dcd813a053d", "title": "Interpreting Deep Visual Representations via Network Dissection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.05611.pdf"], "doi": []}, {"id": "402056b2d10b27c1b17dbb5eb82b95fd1cfd8fa5", "title": "Deriving Privacy and Security Considerations for CORE: An Indoor IoT Adaptive Context Environment", "year": "2018", "pdf": [], "doi": ["http://dl.acm.org/citation.cfm?id=3267363"]}, {"id": "5b3dc81a490b1d9e69d7be20c4e8e1de886b5ca3", "title": "Improving Object Localization with Fitness NMS and Bounded IoU Loss", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.00164.pdf"], "doi": []}, {"id": "b7c2798e136feb85847c8a9aa693d75bc3f9b08c", "title": "Classifying a specific image region using convolutional nets with an ROI mask as input", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.00291.pdf"], "doi": []}, {"id": "372026896229c870f11bcecc1c81c80dad9c6761", "title": "Draw and Tell: Multimodal Descriptions Outperform Verbal- or Sketch-Only Descriptions in an Image Retrieval Task", "year": "2017", "pdf": [], "doi": ["https://aclanthology.info/papers/I17-2061/i17-2061", "http://aclweb.org/anthology/I17-2061", "https://pub.uni-bielefeld.de/download/2913598/2914382"]}, {"id": "8d384e8c45a429f5c5f6628e8ba0d73c60a51a89", "title": "Temporal Dynamic Graph LSTM for Action-Driven Video Object Detection", "year": "2017", "pdf": ["https://arxiv.org/pdf/1708.00666.pdf"], "doi": []}, {"id": "6f77ff9990973a6cdad6b5b6022323bff9d03965", "title": "Action Recognition in Still Images Using Word Embeddings from Natural Language Descriptions", "year": "2017", "pdf": ["http://cobweb.cs.uga.edu/~csc/papers/wacv17_1.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7912210", "http://doi.org/10.1109/WACVW.2017.17"]}, {"id": "146f6f6ed688c905fb6e346ad02332efd5464616", "title": "Show, Attend and Tell: Neural Image Caption Generation with Visual Attention", "year": "2015", "pdf": ["https://arxiv.org/pdf/1502.03044.pdf"], "doi": []}, {"id": "e20daf69526c5da9cffb252d043fdc765f37a89e", "title": "Relating images and 3D models with convolutional neural networks. (Mise en relation d'images et de mod\u00e8les 3D avec des r\u00e9seaux de neurones convolutifs)", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/e20d/af69526c5da9cffb252d043fdc765f37a89e.pdf"], "doi": []}, {"id": "fb732b15fd5235893228ad3249cf04e1809034c9", "title": "CBVMR: Content-Based Video-Music Retrieval Using Soft Intra-Modal Structure Constraint", "year": "2018", "pdf": ["https://arxiv.org/pdf/1704.06761.pdf"], "doi": []}, {"id": "45e7ddd5248977ba8ec61be111db912a4387d62f", "title": "Adversarial Learning of Structure-Aware Fully Convolutional Networks for Landmark Localization", "year": "2017", "pdf": ["https://arxiv.org/pdf/1711.00253.pdf"], "doi": []}, {"id": "eaf8c104ab14600ecc5e9cce739b55280eef7ad4", "title": "Abstractive Compression of Captions with Attentive Recurrent Neural Networks", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/eaf8/c104ab14600ecc5e9cce739b55280eef7ad4.pdf"], "doi": []}, {"id": "dc4682cd15ccb7ebcbc8d2ba21b613e34a9af8ca", "title": "Bonnet: An Open-Source Training and Deployment Framework for Semantic Segmentation in Robotics using CNNs", "year": "2018", "pdf": ["https://arxiv.org/pdf/1802.08960.pdf"], "doi": []}, {"id": "478a1ed7dc1890ca9476dcc1befe7f21c9bf9149", "title": "Learning to Learn from Noisy Labeled Data", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.05214.pdf"], "doi": []}, {"id": "e4b9c14951cea6259dd9d522586ba2c5bb1fbcce", "title": "Social Image Captioning: Exploring Visual Attention and User Attention", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/e4b9/c14951cea6259dd9d522586ba2c5bb1fbcce.pdf"], "doi": []}, {"id": "1e17202d6de18d5e1965edce5fee79744b717d0b", "title": "MIML-FCN+: Multi-Instance Multi-Label Learning via Fully Convolutional Networks with Privileged Information", "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.08681.pdf"], "doi": []}, {"id": "542289d1acfebb9d79ea7a10c8e1516924e09973", "title": "Video Highlight Prediction Using Audience Chat Reactions", "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.08559.pdf"], "doi": []}, {"id": "cf98c333c8d7d5870c1ce5538bb0c3de3de16657", "title": "Panoptic Segmentation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1801.00868.pdf"], "doi": []}, {"id": "2badc4c87a7751dd5ae1797bc4091d10d1acf442", "title": "Multimodal Retrieval with Asymmetrically Weighted Regularized Canonical Correla- Tion Analysis", "year": "", "pdf": ["https://pdfs.semanticscholar.org/2bad/c4c87a7751dd5ae1797bc4091d10d1acf442.pdf"], "doi": []}, {"id": "9b45e9a40313096abf530df3b98a1dfa1553f17b", "title": "Comprehension-Guided Referring Expressions", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8099816"]}, {"id": "66837b29270f3e03df64941a081d70c687c7955c", "title": "ActionXPose: A Novel 2D Multi-view Pose-based Algorithm for Real-time Human Action Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.12126.pdf"], "doi": []}, {"id": "7d035e6c6b5ef1267dff23845009284677c16a4d", "title": "AlphaGAN: Generative adversarial networks for natural image matting", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.10088.pdf"], "doi": []}, {"id": "655f587a59c835a7b6b5017016ea1c2123f266e6", "title": "Separating Self-Expression and Visual Content in Hashtag Supervision", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.09825.pdf"], "doi": []}, {"id": "3ca194773fe583661b988fbdf33f7680764438b3", "title": "Exploring Nearest Neighbor Approaches for Image Captioning", "year": "2015", "pdf": ["https://arxiv.org/pdf/1505.04467.pdf"], "doi": []}, {"id": "0013fae7390cbd34aade7959b4476512d8ab9aa3", "title": "Show, Control and Tell: A Framework for Generating Controllable and Grounded Captions", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.10652.pdf"], "doi": []}, {"id": "f43463770d361e55fb6f6eb801a6e8530ab668cd", "title": "Corpus Construction and Semantic Analysis of Indonesian Image Description", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/f434/63770d361e55fb6f6eb801a6e8530ab668cd.pdf"], "doi": []}, {"id": "30f33cffe390dfcc03d8df28593aebba6afafd59", "title": "Video Predictive Object Detector", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8615054"]}, {"id": "46d8531797d8913b05f9124def1ef81ac30423c1", "title": "MetaStyle: Three-Way Trade-Off Among Speed, Flexibility, and Quality in Neural Style Transfer", "year": "2019", "pdf": ["https://arxiv.org/pdf/1812.05233.pdf"], "doi": []}, {"id": "f5748711db00d82469ff60e05f62319f1eac90c5", "title": "Comparing Apples and Oranges: Off-Road Pedestrian Detection on the NREC Agricultural Person-Detection Dataset", "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.07169.pdf"], "doi": []}, {"id": "cfc22c35ad191cf9d70f4a3655840748b0e1322c", "title": "Real-Time Dense Mapping for Self-driving Vehicles using Fisheye Cameras", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.06132.pdf"], "doi": []}, {"id": "6bfae88bea2301f2abeb6d1ed62c8b9a99b251c0", "title": "CNRS TELECOM ParisTech at ImageCLEF 2015 Scalable Concept Image Annotation Task: Concept Detection with Blind Localization Proposals", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/6bfa/e88bea2301f2abeb6d1ed62c8b9a99b251c0.pdf"], "doi": []}, {"id": "478261574ddc6cf297611000735aa9808f8f0030", "title": "ScanNet: Richly-Annotated 3D Reconstructions of Indoor Scenes", "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.04405.pdf"], "doi": []}, {"id": "12d9a4233a16ea08fbb1de853b6a97b4f4952c23", "title": "Tractable Structured Prediction using the Permutohedral Lattice", "year": "2017", "pdf": ["https://www.research-collection.ethz.ch/bitstream/handle/20.500.11850/130429/eth-50807-01.pdf?isAllowed=y&sequence=1", "https://www.research-collection.ethz.ch/bitstream/handle/20.500.11850/130429/eth-50807-02.pdf?isAllowed=y&sequence=2"], "doi": ["https://doi.org/10.3929/ethz-a-010882665"]}, {"id": "e654320739770029ec5cb22174772c935478b237", "title": "Paraphrase Thought: Sentence Embedding Module Imitating Human Language Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.05505.pdf"], "doi": []}, {"id": "b80f43b42b5320578d4c1e214fe1a8b6b45352ae", "title": "MSR-VTT: A Large Video Description Dataset for Bridging Video and Language", "year": "2016", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2016/papers/Xu_MSR-VTT_A_Large_CVPR_2016_paper.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Xu_MSR-VTT_A_Large_CVPR_2016_paper.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/06/cvpr16.msr-vtt.tmei_-1.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/10/cvpr16.supplementary.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7780940", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2016.571", "http://doi.org/10.1109/CVPR.2016.571"]}, {"id": "81cb69e401b3b51e49ec378dba4bc0c8e33448e1", "title": "Applying Domain Randomization to Synthetic Data for Object Category Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.09834.pdf"], "doi": []}, {"id": "030646f4fc694ffea5d4f77203cbbc5d02aae797", "title": "Cognitive Deep Machine Can Train Itself", "year": "2016", "pdf": ["https://arxiv.org/pdf/1612.00745.pdf"], "doi": []}, {"id": "7b1dd2708e1d7bf0fdcda437de1970a9a6facc0d", "title": "Deep Recurrent Convolutional Neural Network: Improving Performance For Speech Recognition", "year": "2016", "pdf": ["https://arxiv.org/pdf/1611.07174.pdf"], "doi": []}, {"id": "18087bfafbe471f346c8b739522e64fa11e48b13", "title": "Visual Madlibs: Fill in the Blank Description Generation and Question Answering", "year": "2015", "pdf": ["http://acberg.com/papers/madlibs.pdf", "http://openaccess.thecvf.com/content_iccv_2015/papers/Yu_Visual_Madlibs_Fill_ICCV_2015_paper.pdf", "http://tamaraberg.com/papers/madlibs.pdf", "http://www.cs.unc.edu/~licheng/papers/iccv15_madlibs.pdf", "http://www.cs.unc.edu/~licheng/papers/iccv15_madlibs_supp.pdf", "http://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Yu_Visual_Madlibs_Fill_ICCV_2015_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7410640", "http://doi.ieeecomputersociety.org/10.1109/ICCV.2015.283", "http://doi.org/10.1109/ICCV.2015.283"]}, {"id": "ad8642e186c5c81d06934d4e6fc249b7cbca40e8", "title": "Learning Transferable Architectures for Scalable Image Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1707.07012.pdf"], "doi": []}, {"id": "04427d8371cb9e66e2cdcd2035756203398a8bf1", "title": "Learning Answer Embeddings for Visual Question Answering", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578667"]}, {"id": "189b02caa2bb9ffb303154e35f60797ec8576f84", "title": "CRRN: Multi-scale Guided Concurrent Reflection Removal Network", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578600"]}, {"id": "3b8ad1f2335fc755e5cd75ee5922b8a0d432018a", "title": "A Fast and Compact Saliency Score Regression Network Based on Fully Convolutional Network", "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.00615.pdf"], "doi": []}, {"id": "357df3ee0f0c30d5c8abc5a1bdf70122322d6fbd", "title": "O BJECT DETECTORS EMERGE IN D EEP S CENE CNN S", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/08c7/a85c443935468450a162ddeccd2b945660e3.pdf"], "doi": []}, {"id": "2a5667702b0f1ff77dde8fb3e2e10d4e05e8de9d", "title": "Scene Parsing through ADE20K Dataset", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2017/papers/Zhou_Scene_Parsing_Through_CVPR_2017_paper.pdf", "http://people.csail.mit.edu/bzhou/publication/scene-parse-camera-ready.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8100027", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2017.544", "http://doi.org/10.1109/CVPR.2017.544"]}, {"id": "10bb4ef7a6719ea132e00f0ab5680919a4131d99", "title": "BAM: Bottleneck Attention Module", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.06514.pdf"], "doi": []}, {"id": "a6e695ddd07aad719001c0fc1129328452385949", "title": "The New Data and New Challenges in Multimedia Research", "year": "2015", "pdf": [], "doi": []}, {"id": "f795b4ff67d3ecce0b988ecfc0589cc7e54f5dfa", "title": "Grounded Human-Object Interaction Hotspots from Video", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.04558.pdf"], "doi": []}, {"id": "b9146b314812231d09587e3a9f622dda65d3cc40", "title": "A survey on social image understanding", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8256394", "http://doi.org/10.1109/BESC.2017.8256394"]}, {"id": "30f78071ac2bc965ffbf452a7b315d6dfddae30e", "title": "Lingusitic Analysis of Multi-Modal Recurrent Neural Networks", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/30f7/8071ac2bc965ffbf452a7b315d6dfddae30e.pdf"], "doi": []}, {"id": "819d1dcea397e6e671acf74adccdef5750550873", "title": "Representations for Visually Guided Actions", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/819d/1dcea397e6e671acf74adccdef5750550873.pdf"], "doi": []}, {"id": "10554295addeae86571a26de6c2ad7e274963953", "title": "Re-ranking Object Proposals for Object Detection in Automatic Driving", "year": "2016", "pdf": ["https://arxiv.org/pdf/1605.05904.pdf"], "doi": []}, {"id": "af95ba2c08cf3317291dad554488dd951cd6ff80", "title": "Decoupled Classification Refinement: Hard False Positive Suppression for Object Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.04002.pdf"], "doi": []}, {"id": "c4cca330ec7289fac16d0793da98f705f6513219", "title": "Knowledge Projection for Effective Design of Thinner and Faster Deep Neural Networks", "year": "2017", "pdf": [], "doi": []}, {"id": "6a75ef6b36489cb59c61f21f3cd09c50ad5b2995", "title": "MVTec D2S: Densely Segmented Supermarket Dataset", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.08292.pdf"], "doi": []}, {"id": "22a8b7ca2e6449cad31ae4f71bb3587751087e77", "title": "DeepBox: Learning Objectness with Convolutional Networks", "year": "2015", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7410642"]}, {"id": "0dcc768631d9ede8a3679e980b37204b782781b2", "title": "Stating the Obvious: Extracting Visual Common Sense Knowledge", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/55b9/251541eb09a6be4a317d4db1bc8d5dba07f7.pdf"], "doi": []}, {"id": "3f5b20c35f55417823f0201862d85af1f31e9348", "title": "Salience Biased Loss for Object Detection in Aerial Images", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.08103.pdf"], "doi": []}, {"id": "c1e714a9ec329629798a88ebff8657c349fec739", "title": "WILDCAT: Weakly Supervised Learning of Deep ConvNets for Image Classification, Pointwise Localization and Segmentation", "year": "2017", "pdf": ["http://cedric.cnam.fr/~thomen/papers/Durand_CVPR_2017.pdf", "http://openaccess.thecvf.com/content_cvpr_2017/papers/Durand_WILDCAT_Weakly_Supervised_CVPR_2017_paper.pdf", "http://openaccess.thecvf.com/content_cvpr_2017/supplemental/Durand_WILDCAT_Weakly_Supervised_2017_CVPR_supplemental.pdf", "http://webia.lip6.fr/~durandt/pdfs/2017_CVPR/Durand_WILDCAT_CVPR_2017.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8100114", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2017.631", "http://doi.org/10.1109/CVPR.2017.631"]}, {"id": "a4e1a95c32e9477aa1b09073a1fd3ca129aacadc", "title": "Driver Identification System Using Convolutional Neural Network with Background Removal-based Infrared Data Augmentation", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8500364", "http://doi.org/10.1109/IVS.2018.8500364"]}, {"id": "01959ef569f74c286956024866c1d107099199f7", "title": "VQA: Visual Question Answering", "year": "2015", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7410636"]}, {"id": "87e1ae939f3632f26a9878fe32674e338cb18094", "title": "C4Synth: Cross-Caption Cycle-Consistent Text-to-Image Synthesis", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.10238.pdf"], "doi": []}, {"id": "5c8ad080ccb3f5e3c999c2948029f0bd005d5635", "title": "Engaging Image Captioning", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/5c8a/d080ccb3f5e3c999c2948029f0bd005d5635.pdf"], "doi": []}, {"id": "284be8be0c6bedc36dfe43229bc84345ab0aedc2", "title": "Faster Training of Mask R-CNN by Focusing on Instance Boundaries", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.07069.pdf"], "doi": []}, {"id": "b6b9d29d25de42d78f09217c9cc457247d90fc70", "title": "Semantic Part Detection via Matching: Learning to Generalize to Novel Viewpoints from Limited Training Data", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.11823.pdf"], "doi": []}, {"id": "2198a3d3342442d6ed6608f2e2b0687f644b67d6", "title": "Dynamic High Resolution Deformable Articulated Tracking", "year": "2017", "pdf": ["https://arxiv.org/pdf/1711.07999.pdf"], "doi": []}, {"id": "16beefb2f8d0d61e536bf20766e485499906c2e0", "title": "Image Surveillance Assistant", "year": "2016", "pdf": ["http://cs.umd.edu/~maynord/Image_Surveillance_Assistant.pdf", "https://www.nrl.navy.mil/itd/aic/sites/www.nrl.navy.mil.itd.aic/files/pdfs/(Maynord+%20WACV-16%20WS)%20Intelligent%20Surveillance%20Assistant.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7470119", "http://doi.ieeecomputersociety.org/10.1109/WACVW.2016.7470119", "http://doi.org/10.1109/WACVW.2016.7470119"]}, {"id": "340e55a44793226a51ad06612f340f2c520e3575", "title": "G2DeNet: Global Gaussian Distribution Embedding Network and Its Application to Visual Recognition", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2017/papers/Wang_G2DeNet_Global_Gaussian_CVPR_2017_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8100172", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2017.689", "http://doi.org/10.1109/CVPR.2017.689"]}, {"id": "833ada09759039b7c620b8930a50a0521d70b2c7", "title": "Attend in Groups: A Weakly-Supervised Deep Learning Framework for Learning from Web Data", "year": "2017", "pdf": ["https://arxiv.org/pdf/1611.09960.pdf"], "doi": []}, {"id": "fbb9cdd699baf86e9d616b259ada02449c2322ca", "title": "Active Testing: An Efficient and Robust Framework for Estimating Accuracy", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.00493.pdf"], "doi": []}, {"id": "f1d8c377093ecf64afd7f17383738e81666fe5ae", "title": "Remote Detection of Idling Cars Using Infrared Imaging and Deep Networks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.10805.pdf"], "doi": []}, {"id": "858a2e0ca7ba129fda1a92959abe8770eb303bbe", "title": "Input Features Output Features Input Features Output Features Input Features Output Features Selected and", "year": "2017", "pdf": [], "doi": []}, {"id": "f5ce3c9eb539d2f4a85880db65ba3890a0fd8c6c", "title": "Zero-shot object prediction and context modeling using semantic scene knowledge", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/f5ce/3c9eb539d2f4a85880db65ba3890a0fd8c6c.pdf"], "doi": []}, {"id": "9a7784eea6bfa62bf2834ee0b87a3cdda46006f2", "title": "Digital Comics Image Indexing Based on Deep Learning", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/9a77/84eea6bfa62bf2834ee0b87a3cdda46006f2.pdf"], "doi": []}, {"id": "9634348d3bc7b86d0b644f6c14ab0c4294341905", "title": "Investigating Redundancy in Emoji Use: Study on a Twitter Based Corpus", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/9634/348d3bc7b86d0b644f6c14ab0c4294341905.pdf"], "doi": []}, {"id": "068171535ac18a4b7b65be0748d483ce4c71a9a4", "title": "Event Specific Multimodal Pattern Mining with Image-Caption Pairs", "year": "2015", "pdf": ["https://arxiv.org/pdf/1601.00022.pdf"], "doi": []}, {"id": "346a877564351e4014441a1dc174b0369a759ba5", "title": "CLEAR: Cumulative LEARning for One-Shot One-Class Image Recognition", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/papers/Kozerawski_CLEAR_Cumulative_LEARning_CVPR_2018_paper.pdf", "http://www-inst.cs.berkeley.edu/~ee290t/fa18/readings/Kozerawski_CLEAR_Cumulative_LEARning_CVPR_2018_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578461", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2018.00363", "http://doi.org/10.1109/CVPR.2018.00363"]}, {"id": "2b45c35aa63180ef83e7f8e6f55405db9c688b0e", "title": "Unsupervised Domain Adaptation with Similarity Learning", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578933"]}, {"id": "a8420e7fa53b81b8069ced8d9c743c141e2fc432", "title": "Real-Time Multiple Object Tracking - A Study on the Importance of Speed", "year": "2017", "pdf": ["https://arxiv.org/pdf/1709.03572.pdf"], "doi": []}, {"id": "ea743597a5f48babef1982259566d76a9bf66bf2", "title": "Context Contrasted Feature and Gated Multi-scale Aggregation for Scene Segmentation", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/CameraReady/1276.pdf", "http://openaccess.thecvf.com/content_cvpr_2018/papers/Ding_Context_Contrasted_Feature_CVPR_2018_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578352", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2018.00254", "http://doi.org/10.1109/CVPR.2018.00254"]}, {"id": "2770af2c74f9b82f2167cf5773ccc94d482ac8d4", "title": "Gossip training for deep learning", "year": "2016", "pdf": ["https://arxiv.org/pdf/1611.09726.pdf"], "doi": []}, {"id": "31b05f65405534a696a847dd19c621b7b8588263", "title": "UMDFaces: An annotated face dataset for training deep networks", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8272731"]}, {"id": "ff5869f420d33511c22e60bbcff072c3cde3ebc4", "title": "Regional Interactive Image Segmentation Networks", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_ICCV_2017/papers/Liew_Regional_Interactive_Image_ICCV_2017_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8237559", "http://doi.ieeecomputersociety.org/10.1109/ICCV.2017.297", "http://doi.org/10.1109/ICCV.2017.297"]}, {"id": "3aa66f2829ef440842c71a52cdaff30398a90ccb", "title": "Pointly-Supervised Action Localization", "year": "2018", "pdf": ["https://arxiv.org/pdf/1805.11333.pdf"], "doi": []}, {"id": "15a148957469bc8b91bd7cc31aa1f0c6584a1571", "title": "DeepGlobe 2018: A Challenge to Parse the Earth through Satellite Images", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8575485"]}, {"id": "ccba451687f83c463deeefb69c1f4a781de2d638", "title": "Full-Network Embedding in a Multimodal Embedding Pipeline", "year": "2017", "pdf": [], "doi": []}, {"id": "0219ced7a3afcd8e7584b24809bac30304f9288e", "title": "L ] 3 1 M ay 2 01 7 Adversarial Ranking for Language Generation", "year": "2017", "pdf": [], "doi": []}, {"id": "59f4df3087c22e0e13574bb7d20e9d41e74cb9a5", "title": "Visual Re-ranking with Natural Language Understanding for Text Spotting", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.12738.pdf"], "doi": []}, {"id": "3fb61e0d133d0971c853b8435fea99150e15cdbb", "title": "Predicting Foreground Object Ambiguity and Efficiently Crowdsourcing the Segmentation(s)", "year": "2018", "pdf": ["https://arxiv.org/pdf/1705.00366.pdf"], "doi": []}, {"id": "cb8f1f77a8b19d99dfe0c7b50dae3978cf646aa9", "title": "Adaptive attention fusion network for visual question answering", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8019540", "http://doi.ieeecomputersociety.org/10.1109/ICME.2017.8019540", "http://doi.org/10.1109/ICME.2017.8019540"]}, {"id": "4f6d8d4d460517297b419242a7e9ab3f9a307a72", "title": "Feature Extraction for Image Selection Using Machine Learning", "year": "2017", "pdf": [], "doi": []}, {"id": "0c36c988acc9ec239953ff1b3931799af388ef70", "title": "Face Detection Using Improved Faster RCNN", "year": "2018", "pdf": ["https://arxiv.org/pdf/1802.02142.pdf"], "doi": []}, {"id": "1baa9fdfb27f561be67c52ef55b0e3587f306cff", "title": "Image Captioning with Attention", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/1baa/9fdfb27f561be67c52ef55b0e3587f306cff.pdf"], "doi": []}, {"id": "8b35c00edfa4edfd7a99d816e671023d2c000d55", "title": "AttnGAN: Fine-Grained Text to Image Generation with Attentional Generative Adversarial Networks", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578241"]}, {"id": "a0e03c5b647438299c79c71458e6b1776082a37b", "title": "Areas of Attention for Image Captioning", "year": "2017", "pdf": ["https://arxiv.org/pdf/1612.01033.pdf"], "doi": []}, {"id": "9976b88d15f89b6c82b16564735d489a7524821d", "title": "Learning Visual N-Grams from Web Data", "year": "2017", "pdf": ["https://arxiv.org/pdf/1612.09161.pdf"], "doi": []}, {"id": "0b938c4a993e7e2fda81426924cb91c108a7830c", "title": "Which Emoji Talks Best for My Picture?", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8609639"]}, {"id": "4a9831e5fec549edee454709048a51997ef60fb7", "title": "Did the Model Understand the Question?", "year": "2018", "pdf": ["https://arxiv.org/pdf/1805.05492.pdf"], "doi": []}, {"id": "4041cf003f797007fe53ac89fd57072f97257abd", "title": "End-to-end policy learning for active visual categorization.", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/4041/cf003f797007fe53ac89fd57072f97257abd.pdf"], "doi": []}, {"id": "f741cba061655581f6fbb628613d0669c4bdecd5", "title": "Deep Cosine Metric Learning for Person Re-identification", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8354191"]}, {"id": "c91e94e981084f5d9c3c1479fa90b8b091826d1d", "title": "Variational Bayesian Multiple Instance Learning with Gaussian Processes", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8099576"]}, {"id": "03c820f35afdc38dd05e4c663d2877e2602bcde0", "title": "Semantically Guided Visual Question Answering", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8354309", "http://doi.ieeecomputersociety.org/10.1109/WACV.2018.00205", "http://doi.org/10.1109/WACV.2018.00205"]}, {"id": "23a0cfaceb35413bd6caad437e37d566fcfdcc64", "title": "MindCamera: Interactive Sketch-Based Image Retrieval and Synthesis", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8267031"]}, {"id": "9d5a5517650d5f9a7d9818bcc1eb59ba65d316e1", "title": "Conditional Generative Adversarial Network for Structured Domain Adaptation", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/papers/Hong_Conditional_Generative_Adversarial_CVPR_2018_paper.pdf", "https://weixianghong.github.io/publications/papers/CVPR_18.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578243", "http://doi.org/10.1109/cvpr.2018.00145"]}, {"id": "f87aa7d7b746e24e3b08fd3375331068ecd13311", "title": "Mask-SLAM: Robust Feature-Based Monocular SLAM by Masking Using Semantic Segmentation", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8575524"]}, {"id": "bce963c494ccce1c66757e69993f578c37564f6d", "title": "Can Adversarial Networks Hallucinate Occluded People With a Plausible Aspect?", "year": "2019", "pdf": ["https://arxiv.org/pdf/1901.08097.pdf"], "doi": []}, {"id": "3bb0ab7ebe1ffb83aa9775ffe2602407c9d6329d", "title": "Active Decision Boundary Annotation with Deep Generative Models", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8237827"]}, {"id": "93adca9ce6f4a0fab9ea027c90b4df828cfa10d7", "title": "Learning Actionable Representations from Visual Observations", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8593951"]}, {"id": "54b5aab87dbe38803935789c4d730bd203d198a1", "title": "3D Human Pose Estimation in RGBD Images for Robotic Task Learning", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8462833"]}, {"id": "6962505b78d0cec04b992a059cc58870c361c971", "title": "Tattoo Image Search at Scale: Joint Detection and Compact Representation Learning", "year": "2019", "pdf": ["https://arxiv.org/pdf/1811.00218.pdf"], "doi": []}, {"id": "747e9b36c5a1b0b8a9572da0ab416ddd1e1d2d33", "title": "Augmentation for Visual Question Answering", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/747e/9b36c5a1b0b8a9572da0ab416ddd1e1d2d33.pdf"], "doi": []}, {"id": "09a4f1b7afd8d5f6854b23f0aa9e80b9a0fa6d20", "title": "Seeing Beyond Appearance - Mapping Real Images into Geometrical Domains for Unsupervised CAD-based Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.04158.pdf"], "doi": []}, {"id": "85473d7bd0b73488f22d8443583165fdbd3d221d", "title": "Product Quantized Translation for Fast Nearest Neighbor Search", "year": "2018", "pdf": [], "doi": ["https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/view/16953", "https://aaai.org/ocs/index.php/AAAI/AAAI18/paper/viewFile/16953/16702"]}, {"id": "86d9f836c5fc542e987601a102c2b6cdbca1d64f", "title": "Large Scale Semi-Supervised Object Detection Using Visual and Semantic Knowledge Transfer", "year": "2016", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7780602"]}, {"id": "c719a941d3ab00e45d1d74e7533ed59cc0740ae2", "title": "Does Optical Character Recognition and Caption Generation Improve Emotion Detection in Microblog Posts?", "year": "2017", "pdf": ["http://www.romanklinger.de/publications/klinger2017-nldb.pdf", "http://www.romanklinger.de/talks/nldb.pdf"], "doi": ["https://doi.org/10.1007/978-3-319-59569-6_39"]}, {"id": "4f618cbf19917ce5b8703adbc14e15b0bf0d35cc", "title": "Multi-View Dynamic Facial Action Unit Detection", "year": "2017", "pdf": ["https://arxiv.org/pdf/1704.07863.pdf"], "doi": []}, {"id": "1f2b28dc48c8f2c0349dce728d7b6a0681f58aea", "title": "A Dataset for Lane Instance Segmentation in Urban Environments", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.01347.pdf"], "doi": []}, {"id": "7d6132a884d2b154059c461e107c7a8c41603ef7", "title": "Exploring Multi-Branch and High-Level Semantic Networks for Improving Pedestrian Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.00872.pdf"], "doi": []}, {"id": "8e06f628d0566e31366cf046b4a83ac4724ea7c3", "title": "Categorizing Concepts with Basic Level for Vision-to-Language", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/papers/Wang_Categorizing_Concepts_With_CVPR_2018_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578619", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2018.00521", "http://doi.org/10.1109/CVPR.2018.00521"]}, {"id": "72048081cfe678f700c52e1c7a56713a8e0865b6", "title": "Unsupervised Adversarial Visual Level Domain Adaptation for Learning Video Object Detectors from Images", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.02074.pdf"], "doi": []}, {"id": "974aa04b051bd2493bf4aaa5cd410dcb8f3b264d", "title": "Red Hen Lab: Dataset and Tools for Multimodal Human Communication Research", "year": "2017", "pdf": [], "doi": ["https://doi.org/10.1007/s13218-017-0505-9"]}, {"id": "13ad6164dba75845f0f397e9314ad596e74eb946", "title": "Learning to Estimate 3D Human Pose and Shape from a Single Color Image", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578153"]}, {"id": "91e89cedd4093bfe176532530ddb960f2767aca5", "title": "Cross-Domain Self-Supervised Multi-task Feature Learning Using Synthetic Imagery", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578184"]}, {"id": "f28e2bb46e49799589787e466c3ca966a0897bf7", "title": "Textually Customized Video Summaries", "year": "2017", "pdf": ["https://arxiv.org/pdf/1702.01528.pdf"], "doi": []}, {"id": "bcb35a8eb4cea46755c7430618e4ecfa4647b360", "title": "Deep Joint Semantic-Embedding Hashing", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/bcb3/5a8eb4cea46755c7430618e4ecfa4647b360.pdf"], "doi": []}, {"id": "4b59e1fbcd68d616c9e5d02a62d332b0b2c268c5", "title": "Training Recurrent Neural Network through Moment Matching for NLP Applications", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/4b59/e1fbcd68d616c9e5d02a62d332b0b2c268c5.pdf"], "doi": []}, {"id": "b8ccc5341a1b0214e9d155b019962023f344c2ee", "title": "Incremental Learning of Object Detectors without Catastrophic Forgetting", "year": "2017", "pdf": ["https://arxiv.org/pdf/1708.06977.pdf"], "doi": []}, {"id": "45dd2a3cd7c27f2e9509b023d702408f5ac11c9d", "title": "Stacked Cross Attention for Image-Text Matching", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.08024.pdf"], "doi": []}, {"id": "02a4e2569b8033eff87099ad402f251d02213cfe", "title": "Learning-Based Cloth Material Recovery from Video", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8237732"]}, {"id": "ff23167f18723faf41265cd36ee2cf1de45a3048", "title": "Straight to Shapes: Real-Time Detection of Encoded Shapes", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8099931"]}, {"id": "47b4c33621bc0e636d6c268ad6acb319e12abbee", "title": "StackGAN++: Realistic Image Synthesis with Stacked Generative Adversarial Networks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1710.10916.pdf"], "doi": []}, {"id": "26d27a6de0360ed690248a9dd08e4ec0818472b0", "title": "DehazeGAN: When Image Dehazing Meets Differential Programming", "year": "2018", "pdf": ["http://www.pengxi.me/wp-content/uploads/Papers/2018-IJCAI-DehazeGAN.pdf"], "doi": ["https://doi.org/10.24963/ijcai.2018%2F172", "https://doi.org/10.24963/ijcai.2018/172"]}, {"id": "2284ba28bd3b1afaf06afb8c2a94638e350b3ecb", "title": "Boosting Object Proposals: From Pascal to COCO", "year": "2015", "pdf": ["http://openaccess.thecvf.com/content_iccv_2015/papers/Pont-Tuset_Boosting_Object_Proposals_ICCV_2015_paper.pdf", "http://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Pont-Tuset_Boosting_Object_Proposals_ICCV_2015_paper.pdf", "http://www.vision.ee.ethz.ch/en/publications/papers/proceedings/eth_biwi_01226.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7410538", "http://doi.ieeecomputersociety.org/10.1109/ICCV.2015.181", "http://doi.org/10.1109/ICCV.2015.181"]}, {"id": "21d7130230162af2a4cc1b9375bfe9b37dbbd499", "title": "Origami: A 803-GOp/s/W Convolutional Network Accelerator", "year": "2017", "pdf": ["https://arxiv.org/pdf/1512.04295.pdf"], "doi": []}, {"id": "efbac99adf8628aae7f070e5b4388a295956f9d2", "title": "CondenseNet: An Efficient DenseNet Using Learned Group Convolutions", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578389"]}, {"id": "015d25f66514ce0a966300944201d45968a104ba", "title": "SIMPLE IMAGE DESCRIPTION GENERATOR VIA A LINEAR PHRASE-BASED MODEL R\u00e9mi Lebret", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/015d/25f66514ce0a966300944201d45968a104ba.pdf"], "doi": []}, {"id": "47f8a217de3df8d70c3ac7fac73696a9104b1294", "title": "Weakly-supervised image captioning based on rich contextual information", "year": "2017", "pdf": [], "doi": ["https://doi.org/10.1007/s11042-017-5236-2"]}, {"id": "1fefc1d288a87fe218ba25024c4b2b6ef345738e", "title": "Self-ensembling for domain adaptation", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/1fef/c1d288a87fe218ba25024c4b2b6ef345738e.pdf"], "doi": []}, {"id": "688cb9fd33769b152806c04ef6fc276629a9f300", "title": "LocNet: Improving Localization Accuracy for Object Detection", "year": "2016", "pdf": ["https://arxiv.org/pdf/1511.07763.pdf"], "doi": []}, {"id": "c96f012f4915398259e7e223810c57898b5e1a76", "title": "Fast LIDAR-based Road Detection Using Convolutional Neural Networks", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/c96f/012f4915398259e7e223810c57898b5e1a76.pdf"], "doi": []}, {"id": "d4a7259340ece685b9dacb390eea10c6684a05b3", "title": "Object Detection based on Region Decomposition and Assembly", "year": "2019", "pdf": ["https://arxiv.org/pdf/1901.08225.pdf"], "doi": []}, {"id": "c562e95b7906066be4210d00c4f6187475e6e13a", "title": "Deep Lesion Graphs in the Wild: Relationship Learning and Organization of Significant Radiology Image Findings in a Diverse Large-Scale Lesion Database", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.10535.pdf"], "doi": []}, {"id": "a24a0126f76ba1423ac3548ef95aa24ac4e670dd", "title": "How Would You Say It ? Eliciting Lexically Diverse Data for Supervised Semantic Parsing", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/3870/0128bc3ce57a04c46458c3898eaaa1166508.pdf"], "doi": []}, {"id": "99975f84728140154a35b0763753b35bc44e1d5a", "title": "Video Description: A Survey of Methods, Datasets and Evaluation Metrics", "year": "2018", "pdf": ["https://arxiv.org/pdf/1806.00186.pdf"], "doi": []}, {"id": "a6a6cfae45e8633c01793debf43592b7d515f65d", "title": "From ImageNet to Mining: Adapting Visual Object Detection with Minimal Supervision", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/a6a6/cfae45e8633c01793debf43592b7d515f65d.pdf"], "doi": []}, {"id": "29b1a44d1e1ffa05c2bf7f4be931c5045f427718", "title": "ON GENERIC OBJECT RECOGNITION TECHNIQUES : CHALLENGES AND OPPORTUNITIES Prof", "year": "", "pdf": ["https://pdfs.semanticscholar.org/29b1/a44d1e1ffa05c2bf7f4be931c5045f427718.pdf"], "doi": []}, {"id": "4634bf44a0c994e2bed89686225f8cef601a0224", "title": "NLM at ImageCLEF 2018 Visual Question Answering in the Medical Domain", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/4634/bf44a0c994e2bed89686225f8cef601a0224.pdf"], "doi": []}, {"id": "acf13c52c86a3b38642ba0c6cbcd1b771778965c", "title": "NAACL HLT 2018 Generalization in the Age of Deep Learning Proceedings of the Workshop", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/e255/eb6d8e46275387f71986a26d81fd746e0147.pdf"], "doi": []}, {"id": "fd716faea00fb6ec433a62c79aced17d51de8d23", "title": "Cascaded Contextual Region-based Convolutional Neural Network for Event Detection from Time Series Signals: A Seismic Application", "year": "2017", "pdf": [], "doi": []}, {"id": "64bff565c83c6ce3fac783d8d67191af99803701", "title": "Learning to Co-Generate Object Proposals with a Deep Structured Network", "year": "2016", "pdf": ["http://infoscience.epfl.ch/record/217984/files/HayderHeSalzmannCVPR16.pdf", "http://openaccess.thecvf.com/content_cvpr_2016/papers/Hayder_Learning_to_Co-Generate_CVPR_2016_paper.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Hayder_Learning_to_Co-Generate_CVPR_2016_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7780650", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2016.281", "http://doi.org/10.1109/CVPR.2016.281"]}, {"id": "1779b6a17ee68afafb6801477b165f19901689b2", "title": "Object Contour Detection with a Fully Convolutional Encoder-Decoder Network", "year": "2016", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7780397"]}, {"id": "5ac18d505ed6d10e8692cbb7d33f6852e6782692", "title": "The Open Images Dataset V4: Unified image classification, object detection, and visual relationship detection at scale", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.00982.pdf"], "doi": []}, {"id": "f2b95f135b95c3df4f6ebe6015098a2e1667711d", "title": "Weakly Supervised Object Localization Using Things and Stuff Transfer", "year": "2017", "pdf": ["https://arxiv.org/pdf/1703.08000.pdf"], "doi": []}, {"id": "e2c122bea06dfa067712cdb58ce474144f93af07", "title": "Phrase-based Image Captioning with Hierarchical LSTM Model", "year": "2017", "pdf": ["https://arxiv.org/pdf/1711.05557.pdf"], "doi": []}, {"id": "bb1dc1e9e9c20d99b55f37b9e635457af86a065f", "title": "Neural Ctrl-F: Segmentation-Free Query-by-String Word Spotting in Handwritten Manuscript Collections", "year": "2017", "pdf": ["https://arxiv.org/pdf/1703.07645.pdf"], "doi": []}, {"id": "0e0900b88c33b671be5dd2ded9885b6526d6b429", "title": "From captions to visual concepts and back", "year": "2015", "pdf": ["https://arxiv.org/pdf/1411.4952.pdf"], "doi": []}, {"id": "3e08a3912ebe494242f6bcd772929cc65307129c", "title": "Few-Shot Image Recognition by Predicting Parameters from Activations", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578853"]}, {"id": "47a2b1ba7e4275174a017a290f2e5de7de0acb00", "title": "Semantically Coherent Co-Segmentation and Reconstruction of Dynamic Scenes", "year": "2017", "pdf": ["http://epubs.surrey.ac.uk/813877/1/cvprFinalOpenAccess.pdf", "http://openaccess.thecvf.com/content_cvpr_2017/papers/Mustafa_Semantically_Coherent_Co-Segmentation_CVPR_2017_paper.pdf", "https://arminmustafa.github.io/docs/cvpr17/ArminCVPR2017.pdf", "https://arminmustafa.github.io/docs/cvpr17/ppt.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8100075", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2017.592", "http://doi.org/10.1109/CVPR.2017.592"]}, {"id": "7902309d3c5ab2e1e3a1f08503dc39108e1639dc", "title": "Look into Person: Joint Body Parsing & Pose Estimation Network and A New Benchmark", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.01984.pdf"], "doi": []}, {"id": "6c0c368fca391b4456e64d2943d0bcbe6d8e1ecc", "title": "A Pipeline for Creative Visual Storytelling", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.08077.pdf"], "doi": []}, {"id": "dc7a4d5ba20ca07d29c360b26e1e72afae9a77be", "title": "The ApolloScape Open Dataset for Autonomous Driving and its Application", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.06184.pdf"], "doi": []}, {"id": "9120d59f2ca86954b45d254cae1409cb0806d9c7", "title": "DenseFuse: A Fusion Approach to Infrared and Visible Images", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.08361.pdf"], "doi": []}, {"id": "428017f7a6df4d667275c7ac9b3feba39b70e4ae", "title": "CNN-RNN: A Unified Framework for Multi-label Image Classification", "year": "2016", "pdf": ["https://arxiv.org/pdf/1604.04573.pdf"], "doi": []}, {"id": "aa74e63360c341f47a921e0043c5d58d55807fe4", "title": "Multi-Residual Networks: Improving the Speed and Accuracy of Residual Networks", "year": "2016", "pdf": ["https://arxiv.org/pdf/1609.05672.pdf"], "doi": []}, {"id": "ecbaa92c289f4f5ff9a57b19a2725036a92311f5", "title": "Focused Evaluation for Image Description with Binary Forced-Choice Tasks", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/ecba/a92c289f4f5ff9a57b19a2725036a92311f5.pdf"], "doi": []}, {"id": "b3eb61c3542e0c6bafb4c1acd05cffc0970faa85", "title": "Region-Based Image Retrieval Revisited", "year": "2017", "pdf": ["https://arxiv.org/pdf/1709.09106.pdf"], "doi": []}, {"id": "904b322a61d9be9c0b1023946320f9245533085e", "title": "Multi-Residual Networks", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/95cd/6df1b921859103f2eea702e8d3622a862a08.pdf"], "doi": []}, {"id": "62a5f2a142fe636632f6467176ab18bb86e6398e", "title": "Pixelated Semantic Colorization", "year": "2019", "pdf": ["https://arxiv.org/pdf/1901.10889.pdf"], "doi": []}, {"id": "a0e286f3c6a72c857ffd03bd8ab9a9f9b98c4432", "title": "AI Learns to Recognize Bengali Handwritten Digits: Bengali.AI Computer Vision Challenge 2018", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.04452.pdf"], "doi": []}, {"id": "333be4858994e6d9364341aeb520f7800a0f6a07", "title": "Unsupervised Pixel-Level Domain Adaptation with Generative Adversarial Networks", "year": "2017", "pdf": ["https://arxiv.org/pdf/1612.05424.pdf"], "doi": []}, {"id": "23ef40af4fda9c7db8fd7596d34446303c378793", "title": "Towards a Generalized Approach for Deep Neural Network Based Event Processing for the Internet of Multimedia Things", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8331844", "http://doi.org/10.1109/ACCESS.2018.2823590"]}, {"id": "ee53c9480132fc0d09b1192226cb2c460462fd6d", "title": "Channel Pruning for Accelerating Very Deep Neural Networks", "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.06168.pdf"], "doi": []}, {"id": "b0760764dc573b519f76d5a79531d49af333c67a", "title": "Neural Style Transfer: A Review", "year": "2017", "pdf": ["https://arxiv.org/pdf/1705.04058.pdf"], "doi": []}, {"id": "5fa587381a9e4308163b3a5395985f0375c3cf7d", "title": "Deep Extreme Cut: From Extreme Points to Object Segmentation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.09081.pdf"], "doi": []}, {"id": "2f005b31b41face8a8b157e2ce7f97ece5b61391", "title": "L 1 Graph Based Sparse Model for Label Denoising", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/2f00/5b31b41face8a8b157e2ce7f97ece5b61391.pdf"], "doi": []}, {"id": "ede3af38e30ca332af0c1ce3bd5144070f7fb7f3", "title": "SGN: Sequential Grouping Networks for Instance Segmentation", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_ICCV_2017/papers/Liu_SGN_Sequential_Grouping_ICCV_2017_paper.pdf", "http://www.cs.toronto.edu/~fidler/papers/sgn_iccv17.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8237640", "http://doi.ieeecomputersociety.org/10.1109/ICCV.2017.378", "http://doi.org/10.1109/ICCV.2017.378"]}, {"id": "73f1a3d88defbd5481214e27da7df501d0e31fb1", "title": "Tencent ML-Images: A Large-Scale Multi-Label Image Database for Visual Representation Learning", "year": "2019", "pdf": ["https://arxiv.org/pdf/1901.01703.pdf"], "doi": []}, {"id": "2eef20a11324686099ee6f9b1a7613444b0d2112", "title": "Dual-Path Convolutional Image-Text Embedding with Instance Loss", "year": "2017", "pdf": ["https://arxiv.org/pdf/1711.05535.pdf"], "doi": []}, {"id": "6a9c3011b5092daa1d0cacda23f20ca4ae74b902", "title": "Fast and Accurate Person Re-Identification with RMNet.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.02465.pdf"], "doi": []}, {"id": "f9255703f0a89c9ca2e9256595a0526829ff4402", "title": "On the Importance of Visual Context for Data Augmentation in Scene Understanding", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.02492.pdf"], "doi": []}, {"id": "e8e43abbc8bee64a53af64ceca90bfb687f7bb9d", "title": "Fast Object Class Labelling via Speech", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.09461.pdf"], "doi": []}, {"id": "3f775e3be9e1a00ebf4fd281e524932e88cec0ae", "title": "Deep Contextual Recurrent Residual Networks for Scene Labeling", "year": "2018", "pdf": ["https://arxiv.org/pdf/1704.03594.pdf"], "doi": []}, {"id": "c5b9a96fcb07f538be3181922e5f1a24a7936783", "title": "Autonomous drone cinematographer: Using artistic principles to create smooth, safe, occlusion-free trajectories for aerial filming", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.09563.pdf"], "doi": []}, {"id": "d5673c53b3643372dd8d35136769ecd73a6dede3", "title": "A Deep Learning Framework for Smart Street Cleaning", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7944927", "http://doi.ieeecomputersociety.org/10.1109/BigDataService.2017.49", "http://doi.org/10.1109/BigDataService.2017.49"]}, {"id": "456983805a8781d6429bed1ed66dc9f3902767af", "title": "Seeing with Humans : Gaze-Assisted Neural Image", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/4569/83805a8781d6429bed1ed66dc9f3902767af.pdf"], "doi": []}, {"id": "a759570e6ef674cd93068020c2e6bd036961f7c6", "title": "SPEECH-COCO: 600k Visually Grounded Spoken Captions Aligned to MSCOCO Data Set", "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.08435.pdf"], "doi": []}, {"id": "e7b92fc6e2f1a13a3076a48a78e03badacb0465b", "title": "Deep Cauchy Hashing for Hamming Space Retrieval", "year": "2018", "pdf": ["http://ise.thss.tsinghua.edu.cn/~mlong/doc/deep-cauchy-hashing-cvpr18.pdf", "http://openaccess.thecvf.com/content_cvpr_2018/papers/Cao_Deep_Cauchy_Hashing_CVPR_2018_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578232", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2018.00134", "http://doi.org/10.1109/CVPR.2018.00134"]}, {"id": "7892606127c94d0be5c9bea5b6cb539f7fe3bf39", "title": "Combining Multiple Cues for Visual Madlibs Question Answering", "year": "2018", "pdf": [], "doi": ["https://doi.org/10.1007/s11263-018-1096-0"]}, {"id": "e42b2981f4e8de54213d624d1ef12bad4fe02f0a", "title": "Through-Wall Human Pose Estimation Using Radio Signals", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578866"]}, {"id": "c896502edcdec38466e7d66f38fb53a57c8e05db", "title": "Image Companding and Inverse Halftoning using Deep Convolutional Neural Networks", "year": "2017", "pdf": ["https://arxiv.org/pdf/1707.00116.pdf"], "doi": []}, {"id": "30a68bea6a43c239d899d7f02bb8ef9f3c5a8f47", "title": "Cross-Media Similarity Evaluation for Web Image Retrieval in the Wild", "year": "2018", "pdf": ["https://arxiv.org/pdf/1709.01305.pdf"], "doi": []}, {"id": "1daaeae28270b06962eb6fcf812a368892b5dff4", "title": "Modeling Visual Context Is Key to Augmenting Object Detection Datasets", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.07428.pdf"], "doi": []}, {"id": "fdce9cbe5c726201575b3c8a8c1af0752f1af53f", "title": "MAttNet: Modular Attention Network for Referring Expression Comprehension", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578240"]}, {"id": "91bb3680cee8cd37b80e07644f66f9cccf1b1aff", "title": "PASCAL Boundaries: A Semantic Boundary Dataset with a Deep Semantic Boundary Detector", "year": "2017", "pdf": ["http://www.cs.jhu.edu/~alanlab/Pubs17/premachandran2017pascalboundaries.pdf", "https://www.computer.org/csdl/proceedings/wacv/2017/4822/00/07926599.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7926599", "http://doi.org/10.1109/WACV.2017.16"]}, {"id": "09b2e7af73689dbdba1547e19111a6ee06767906", "title": "PoseTrack: A Benchmark for Human Pose Estimation and Tracking", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578640"]}, {"id": "38e3c26829e38c6b56f7c541e0c4445820fab0fe", "title": "BOLD5000: A public fMRI dataset of 5000 images", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.01281.pdf"], "doi": []}, {"id": "57bfbd06e94c5def35c1dde901bb4f1c839dbcdf", "title": "Deep patch learning for weakly supervised object classification and discovery", "year": "2017", "pdf": ["https://arxiv.org/pdf/1705.02429.pdf"], "doi": []}, {"id": "5d165ff5b0b389e32809c17838a2afc218a91d62", "title": "Object Detectors Emerge in Deep Scene CNNs", "year": "2014", "pdf": ["https://arxiv.org/pdf/1412.6856.pdf"], "doi": []}, {"id": "165ef2b5f86b9b2c68b652391db5ece8c5a0bc7e", "title": "Efficient Piecewise Training of Deep Structured Models for Semantic Segmentation", "year": "2016", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7780717"]}, {"id": "944faf7f14f1bead911aeec30cc80c861442b610", "title": "Action Tubelet Detector for Spatio-Temporal Action Localization", "year": "2017", "pdf": ["https://arxiv.org/pdf/1705.01861.pdf"], "doi": []}, {"id": "729a30040132909cda0eab2c6c4ba60d6d1941b5", "title": "Image-based Plant Species Identification with Deep Convolutional Neural Networks", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/729a/30040132909cda0eab2c6c4ba60d6d1941b5.pdf"], "doi": []}, {"id": "28af8e1a3cb3a158f8a642c8493fcfb207743d0a", "title": "Better Image Segmentation by Exploiting Dense Semantic Predictions", "year": "2016", "pdf": ["https://arxiv.org/pdf/1606.01481.pdf"], "doi": []}, {"id": "139ab79a0aae5d4346f907b7b29288d09ef373fa", "title": "A Baseline for Multi-Label Image Classification Using Ensemble Deep CNN", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.08412.pdf"], "doi": []}, {"id": "24aee34d1fb3cf5b1cddc2c6ef9259506be4e9c0", "title": "Visual Context Learning with Big Data Analytics", "year": "2016", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7836722", "http://doi.org/10.1109/ICDMW.2016.0091"]}, {"id": "9b18cc5c938062161a4b6b0c71ee7a6c550a15f7", "title": "A Scalable Optimization Mechanism for Pairwise based Discrete Hashing.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.10810.pdf"], "doi": []}, {"id": "ad9b3dc6c0e54070cec79df86458ed38566da1ff", "title": "Automated Image Captioning for Rapid Prototyping and Resource Constrained Environments", "year": "2016", "pdf": ["https://arxiv.org/pdf/1606.01393.pdf"], "doi": []}, {"id": "2a74b00daa80e64cde5256bc014d1b011926b97b", "title": "Shake-Shake regularization", "year": "2017", "pdf": [], "doi": []}, {"id": "40c1aec7e0830bf9dd8a689d671024567311ae72", "title": "Interact as You Intend: Intention-Driven Human-Object Interaction Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.09796.pdf"], "doi": []}, {"id": "3b08ef7aa0cf9528da42b2b594b66e4a6f7fdb7f", "title": "Active Learning for Delineation of Curvilinear Structures", "year": "2016", "pdf": ["https://arxiv.org/pdf/1512.00747.pdf"], "doi": []}, {"id": "4d1e28368e1121872bcd4ce75bc7ba5e43bd42d0", "title": "Attend to You: Personalized Image Captioning with Context Sequence Memory Networks", "year": "2017", "pdf": ["https://arxiv.org/pdf/1704.06485.pdf"], "doi": []}, {"id": "b6f682648418422e992e3ef78a6965773550d36b", "title": "CBMM Memo No . 061 February 8 , 2017 Full interpretation of minimal images", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/b6f6/82648418422e992e3ef78a6965773550d36b.pdf"], "doi": []}, {"id": "2595319371a06449e3f8cef251303aecbb9e731e", "title": "A Review on Object Recognition for Blind People Based on Deep Learning", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/2595/319371a06449e3f8cef251303aecbb9e731e.pdf"], "doi": []}, {"id": "e7df750805d6c025d0a1a74a05a83bcf8cc63dd7", "title": "Assessment of crowdsourcing and gamification loss in user-assisted object segmentation", "year": "2015", "pdf": ["http://upcommons.upc.edu/bitstream/handle/2117/81293/asknseek.pdf;jsessionid=469B72384B7C56D4A331D68A53942294?sequence=1"], "doi": ["https://doi.org/10.1007/s11042-015-2897-6"]}, {"id": "abd4152773ebb97b90163b9a6bbdf2075e825481", "title": "Procedural Text Generation from an Execution Video", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/abd4/152773ebb97b90163b9a6bbdf2075e825481.pdf"], "doi": []}, {"id": "8b5b8db6a2a2880c14894140ea70ceb5f96c3b9b", "title": "Learning a Text-Video Embedding from Incomplete and Heterogeneous Data", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.02516.pdf"], "doi": []}, {"id": "72f8df596eb9bb3a8c8206329083c42e70fcd9fd", "title": "Will People Like Your Image?", "year": "2016", "pdf": ["https://arxiv.org/pdf/1611.05203.pdf"], "doi": []}, {"id": "9684a54d9ff94855a40a2dc3eeb1ff16b44795fe", "title": "Human, Object and Scene Centric Image Retrieval Engine to Enhance Image Management", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8261019"]}, {"id": "1eeca84f33079c6d7a95daf8994370b2d7a93443", "title": "Fingertip Detection and Tracking for Recognition of Air-Writing in Videos", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.03016.pdf"], "doi": []}, {"id": "5a0209515ab62e008efeca31f80fa0a97031cd9d", "title": "Dataset fingerprints: Exploring image collections through data mining", "year": "2015", "pdf": ["http://www.cv-foundation.org/openaccess/content_cvpr_2015/app/3B_046.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/app/3B_046_ext.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/ext/3B_046_ext.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Rematas_Dataset_Fingerprints_Exploring_2015_CVPR_paper.pdf", "https://homes.cs.washington.edu/~krematas/Publications/rematasCVPR2015.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7299120", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2015.7299120", "http://doi.org/10.1109/CVPR.2015.7299120"]}, {"id": "b3b85d0a697c9b0309965151dcc396eb70adf46e", "title": "Shallowing Deep Networks: Layer-wise Pruning based on Feature Representations.", "year": "2018", "pdf": [], "doi": ["https://www.ncbi.nlm.nih.gov/pubmed/30296213", "http://doi.org/10.1109/TPAMI.2018.2874634"]}, {"id": "5925a25dfe107c49c636eccb8f9fd1aeef7b438c", "title": "Temporal Shift Module for Efficient Video Understanding", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.08383.pdf"], "doi": []}, {"id": "821ba3eba1e36a29cc482f5378f4a0d0f6893159", "title": "Unsupervised Domain Adaptation for Learning Eye Gaze from a Million Synthetic Images: An Adversarial Approach", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.07926.pdf"], "doi": []}, {"id": "1ca06a9c5e80bc0d4011f0d6f6ccc1a5ee746844", "title": "Automatic Image Cropping: A Computational Complexity Study", "year": "2016", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2016/papers/Chen_Automatic_Image_Cropping_CVPR_2016_paper.pdf", "http://pages.cs.wisc.edu/~liangz/12_Chen_Automatic_Image_Cropping_CVPR_2016_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7780430", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2016.61", "http://doi.org/10.1109/CVPR.2016.61"]}, {"id": "64a336f952ec67f85fe00092847d50dd29e4cddc", "title": "Fast Screening Algorithm for Template Matching FAST SCREENING ALGORITHM FOR TEMPLATE MATCHING", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/64a3/36f952ec67f85fe00092847d50dd29e4cddc.pdf"], "doi": []}, {"id": "d42142285c46207a16bd4294e437d504e419a9b7", "title": "Varying image description tasks : spoken versus written descriptions", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/d421/42285c46207a16bd4294e437d504e419a9b7.pdf"], "doi": []}, {"id": "cd8398e82e0c0cc4276a1694fd333214ede337ea", "title": "Decoupled Spatial Neural Attention for Weakly Supervised Semantic Segmentation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.02563.pdf"], "doi": []}, {"id": "8e416d760feb5f23bc1a6dab98eb1f6e75ab8907", "title": "Image-Grounded Conversations: Multimodal Context for Natural Question and Response Generation", "year": "2017", "pdf": ["https://arxiv.org/pdf/1701.08251.pdf"], "doi": []}, {"id": "f7186eb3d717694d4ab1730a8d0f662e90c44d6f", "title": "A Computational Study on Word Meanings and Their Distributed Representations via Polymodal Embedding", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/f718/6eb3d717694d4ab1730a8d0f662e90c44d6f.pdf"], "doi": []}, {"id": "35c5b84fa47e5b6cee201f831507f62482ccebcc", "title": "CUNI System for the WMT 17 Multimodal Traslation Task", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/35c5/b84fa47e5b6cee201f831507f62482ccebcc.pdf"], "doi": []}, {"id": "b63041d05b78a66724fbcb2803508999bf885d6b", "title": "Deep Sets", "year": "2017", "pdf": ["https://arxiv.org/pdf/1703.06114.pdf"], "doi": []}, {"id": "d318f3ca49f7f2159b9fc0face08eb284d5442dc", "title": "Scene Text Detection via Holistic, Multi-Channel Prediction", "year": "2016", "pdf": ["https://arxiv.org/pdf/1606.09002.pdf"], "doi": []}, {"id": "bb4650130c460f413e97b0328624a485bf094967", "title": "Dynamic Lexicon Generation for Natural Scene Images", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/bb46/50130c460f413e97b0328624a485bf094967.pdf"], "doi": []}, {"id": "43cb50f669a0d492256d11c6cc4128ba0ce79a3e", "title": "Per-Pixel Feedback for improving Semantic Segmentation", "year": "2017", "pdf": ["https://arxiv.org/pdf/1712.02861.pdf"], "doi": []}, {"id": "f81f5da2a1e4eb80b465b8dffca4c9e583a8a8a6", "title": "Rapid Object Detection Systems , Utilising Deep Learning and Unmanned Aerial Systems ( Uas ) for Civil Engineering Applications", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/f81f/5da2a1e4eb80b465b8dffca4c9e583a8a8a6.pdf"], "doi": []}, {"id": "e7928bd33d09fd00a588617736b102063ca9d070", "title": "A Non-Technical Survey on Deep Convolutional Neural Network Architectures", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.02129.pdf"], "doi": []}, {"id": "45e2aa7706fcedcbb2d93304a9824fe762b8b3b0", "title": "DAC-SDC Low Power Object Detection Challenge for UAV Applications", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.00110.pdf"], "doi": []}, {"id": "8f45907fba8fce5e9d958cf66cb68b406858fbb6", "title": "Project AutoVision: Localization and 3D Scene Perception for an Autonomous Vehicle with a Multi-Camera System", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.05477.pdf"], "doi": []}, {"id": "f672bf42dbefb6b40921c00a05f60284934e9948", "title": "LDS-Inspired Residual Networks", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/f672/bf42dbefb6b40921c00a05f60284934e9948.pdf"], "doi": []}, {"id": "c0343f9cc5f16166bda83815812c4c71ab3258e3", "title": "Hierarchical LSTMs with Adaptive Attention for Visual Captioning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.11004.pdf"], "doi": []}, {"id": "e075c5ef9d7d4ef77c92008ffaa7ac12e60383c1", "title": "Fusing Saliency Maps with Region Proposals for Unsupervised Object Localization", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.03905.pdf"], "doi": []}, {"id": "fb6bcf5763fe7ec7366f5991400090fa503613a7", "title": "Reduce Cognitive Burden on Drivers through Contextualising Environments", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8417781", "http://doi.org/10.1109/VTCSpring.2018.8417781"]}, {"id": "a133374b9630bbe6eb2b7de8c3204aa57e75c582", "title": "A Deep Network Solution for Attention and Aesthetics Aware Photo Cropping.", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/a133/374b9630bbe6eb2b7de8c3204aa57e75c582.pdf"], "doi": []}, {"id": "ccbd7e417158e7ae0f9f61c3b6d1e5a3317cce34", "title": "Object Proposals in Computer Vision", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/7fb4/19f28652e4d1b1ddab1a19bdc5be1c7bbb62.pdf"], "doi": []}, {"id": "aa09ade36424fd83f067f234baffde294800e705", "title": "Is a Picture Worth Ten Thousand Words in a Review Dataset?", "year": "2016", "pdf": ["https://arxiv.org/pdf/1606.07496.pdf"], "doi": []}, {"id": "30ccfd2b4b6d5b30581356ccefcf96fd77c1766a", "title": "Overview of the ImageCLEF 2014 Scalable Concept Image Annotation Task", "year": "2014", "pdf": ["https://pdfs.semanticscholar.org/a346/060e3b71a0553eaa4bc28fbc0114b1798fec.pdf"], "doi": []}, {"id": "a25c32ea0292af5eacd6e9e0ed53b9ff7e65433f", "title": "Learning to Learn from Web Data Through Deep Semantic Embeddings", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.06368.pdf"], "doi": []}, {"id": "247b14570940601f5c7a2da1db532ecf1c302288", "title": "Dual Attention Networks for Multimodal Reasoning and Matching", "year": "2017", "pdf": ["https://arxiv.org/pdf/1611.00471.pdf"], "doi": []}, {"id": "bb021f58f8822d12f5747d583a46005ade4a0b10", "title": "Breaking Microsoft \u2019 s CAPTCHA", "year": "2015", "pdf": ["https://pdfs.semanticscholar.org/bb02/1f58f8822d12f5747d583a46005ade4a0b10.pdf"], "doi": []}, {"id": "a7066c13ba21817abcf8ff955740493adf95b02c", "title": "Points2Pix: 3D Point-Cloud to Image Translation using conditional Generative Adversarial Networks", "year": "2019", "pdf": ["https://arxiv.org/pdf/1901.09280.pdf"], "doi": []}, {"id": "c06447df3e50ec451240205cefa0708caee8ab8c", "title": "Picture it in your mind: generating high level visual representations from textual descriptions", "year": "2017", "pdf": ["https://arxiv.org/pdf/1606.07287.pdf"], "doi": []}, {"id": "4ecaa651722a98c2847377f3ae1c70294b4791ce", "title": "Few-Example Object Detection with Model Communication.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1706.08249.pdf"], "doi": []}, {"id": "1eb4ea011a3122dc7ef3447e10c1dad5b69b0642", "title": "Contextual Visual Recognition from Images and Videos", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/1eb4/ea011a3122dc7ef3447e10c1dad5b69b0642.pdf"], "doi": []}, {"id": "0b4d3e59a0107f0dad22e74054bab1cf1ad9c32e", "title": "Visual Genome: Connecting Language and Vision Using Crowdsourced Dense Image Annotations", "year": "2016", "pdf": ["https://arxiv.org/pdf/1602.07332.pdf"], "doi": []}, {"id": "28b72ad9229f38ec61f950e1d794d6af070d1800", "title": "Zero-shot Object Prediction using Semantic Scene Knowledge", "year": "2017", "pdf": ["https://arxiv.org/pdf/1604.07952.pdf"], "doi": []}, {"id": "877c5a3ad915c2bbb5595252d08163f34ce58957", "title": "LCR-Net: Localization-Classification-Regression for Human Pose", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2017/papers/Rogez_LCR-Net_Localization-Classification-Regression_for_CVPR_2017_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8099617", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2017.134", "http://doi.org/10.1109/CVPR.2017.134"]}, {"id": "e8e662e45e39249756d2b0090782434a5cf1f4aa", "title": "VegFru: A Domain-Specific Dataset for Fine-Grained Visual Categorization", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_ICCV_2017/papers/Hou_VegFru_A_Domain-Specific_ICCV_2017_paper.pdf", "http://openaccess.thecvf.com/content_ICCV_2017/supplemental/Hou_VegFru_A_Domain-Specific_ICCV_2017_supplemental.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8237328", "http://doi.ieeecomputersociety.org/10.1109/ICCV.2017.66", "http://doi.org/10.1109/ICCV.2017.66"]}, {"id": "1db6a35c42e86d5cd64ad2a4803fd683647fab96", "title": "Tiny Transform Net for Mobile Image Stylization", "year": "2017", "pdf": [], "doi": ["http://dl.acm.org/citation.cfm?id=3079034"]}, {"id": "8efb48fdb8616da5133efb8659cd96c53529f76a", "title": "Active Vision Dataset Benchmark", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018_workshops/papers/w40/Ammirato_Active_Vision_Dataset_CVPR_2018_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8575447", "http://doi.ieeecomputersociety.org/10.1109/CVPRW.2018.00277", "http://doi.org/10.1109/CVPRW.2018.00277"]}, {"id": "71afbb37a81a44299872bee9ae888f4129962e18", "title": "RECURRENT NEURAL FEEDBACK MODEL FOR AUTOMATED IMAGE ANNOTATION", "year": "2017", "pdf": [], "doi": []}, {"id": "9d8747468f0fed8e335656d7fe9737e4dc21c798", "title": "RetinaMask: Learning to predict masks improves state-of-the-art single-shot detection for free", "year": "2019", "pdf": ["https://arxiv.org/pdf/1901.03353.pdf"], "doi": []}, {"id": "511fad07b943f088e487ea09ffc6c89114bb3c3e", "title": "Towards an Integrated Method of Detection and Description for Face Authentication System", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8430774", "http://doi.org/10.1109/HSI.2018.8430774"]}, {"id": "9f545b9006970f7626b7b121c5c3c66204f1ba40", "title": "Improving Pairwise Ranking for Multi-label Image Classification", "year": "2017", "pdf": ["https://arxiv.org/pdf/1704.03135.pdf"], "doi": []}, {"id": "2ba540ca70c7dee81e13768792aa7571952987f6", "title": "Drivable Road Detection Based on Dilated FPN with Feature Aggregation", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8372075", "http://doi.ieeecomputersociety.org/10.1109/ICTAI.2017.00172", "http://doi.org/10.1109/ICTAI.2017.00172"]}, {"id": "2e8b08c8df95d2ef8c0d03820094608e9cf456ab", "title": "License Plate Detection and Recognition in Unconstrained Scenarios", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/2e8b/08c8df95d2ef8c0d03820094608e9cf456ab.pdf"], "doi": []}, {"id": "8331fb280f083767fe85ba476862e519e0275233", "title": "OMNIA Faster R-CNN: Detection in the wild through dataset merging and soft distillation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.02611.pdf"], "doi": []}, {"id": "54e0b4f63e6ed966d76b6ef83488249fd09d3a43", "title": "Plug & Play Generative Networks: Conditional Iterative Generation of Images in Latent Space", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8099857"]}, {"id": "f11f609facfb650ed8e659236a04bc0a664cb665", "title": "An Analysis of Scale Invariance in Object Detection - SNIP", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578475"]}, {"id": "a8773da25b133d258132d646024bf5ffd5694302", "title": "A Bottom-Up Approach Based on Semantics for the Interpretation of the Main Camera Stream in Soccer Games", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8575392"]}, {"id": "666e08b6921a28fed75f35dd70d322f0edc06e41", "title": "Rain Removal in Traffic Surveillance: Does it Matter?", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.12574.pdf"], "doi": []}, {"id": "1ac6a33f04f6c5a8084c15c85295f987cc8e3d72", "title": "FVQA: Fact-Based Visual Question Answering", "year": "2018", "pdf": ["https://arxiv.org/pdf/1606.05433.pdf"], "doi": []}, {"id": "3ab5d67310aa3592b68e9ef55df4603507d0d486", "title": "Learning Spatial Transforms for Refining Object Segment Proposals", "year": "2017", "pdf": ["https://www.computer.org/csdl/proceedings/wacv/2017/4822/00/07926595.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7926595", "http://doi.org/10.1109/WACV.2017.12"]}, {"id": "757b27a3ceb2293b8284fc24a7084a0c3fc2ae21", "title": "Data Distillation: Towards Omni-Supervised Learning", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578531"]}, {"id": "0d4dbd59e42e615ccf6cd4f71203be97afac48fb", "title": "End-to-End Joint Semantic Segmentation of Actors and Actions in Video", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/0d4d/bd59e42e615ccf6cd4f71203be97afac48fb.pdf"], "doi": []}, {"id": "81d327ec41c67728b15438bca86d10b72de1d88f", "title": "Visual Affordance and Function Understanding: A Survey", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.06775.pdf"], "doi": []}, {"id": "df5094b2e8cf7e3bde3943ca7a56eb879b8e34ab", "title": "A Concatenated Residual Convolutional Network for Image Deblurring", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/df50/94b2e8cf7e3bde3943ca7a56eb879b8e34ab.pdf"], "doi": []}, {"id": "b8298cf0056af5afa3185181ddd5f6bb03181696", "title": "Training for Diversity in Image Paragraph Captioning", "year": "2018", "pdf": [], "doi": ["https://aclanthology.info/papers/D18-1084/d18-1084", "http://aclweb.org/anthology/D18-1084"]}, {"id": "7a9fe5781220cca6ca600833015f200a9c03d50e", "title": "Teaching Machines to Describe Images via Natural Language Feedback", "year": "2017", "pdf": ["https://arxiv.org/pdf/1706.00130.pdf"], "doi": []}, {"id": "08f46d6a91e513edd57a0ef15d5367b5d0545c1b", "title": "How do targets, nontargets, and scene context influence real-world object detection?", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/08f4/6d6a91e513edd57a0ef15d5367b5d0545c1b.pdf"], "doi": []}, {"id": "0612745dbd292fc0a548a16d39cd73e127faedde", "title": "Flickr30k Entities: Collecting Region-to-Phrase Correspondences for Richer Image-to-Sentence Models", "year": "2015", "pdf": ["https://arxiv.org/pdf/1505.04870.pdf"], "doi": []}, {"id": "4b4a174f46ce03caf1ffa4addd074aaa70539f35", "title": "BlazeIt: Fast Exploratory Video Queries using Neural Networks.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1805.01046.pdf"], "doi": []}, {"id": "aa6f094f17d78380f927555a348ad514a505cc3b", "title": "SlowFast Networks for Video Recognition", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.03982.pdf"], "doi": []}, {"id": "bfce448a3409d87ba281de53ed696b09119f2ba9", "title": "Object segmentation in depth maps with one user click and a synthetically trained fully convolutional network", "year": "2018", "pdf": ["https://arxiv.org/pdf/1801.01281.pdf"], "doi": []}, {"id": "885d589101ab3c09bda20ee9578f2c6d2f6cddfa", "title": "Learning to Guide Decoding for Image Captioning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.00887.pdf"], "doi": []}, {"id": "1553084dcbf2235428e7dbf57b57e567c5ea4d1f", "title": "AISHELL-2: Transforming Mandarin ASR Research Into Industrial Scale", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.10583.pdf"], "doi": []}, {"id": "d7d2a1d42f0e3182d538cf8fb4d55f3e9d7ce779", "title": "Setting an attention region for convolutional neural networks using region selective features, for recognition of materials within glass vessels", "year": "2017", "pdf": ["https://arxiv.org/pdf/1708.08711.pdf"], "doi": []}, {"id": "658c802890c7133e2ade778b5d88b68bcd0dca9c", "title": "Learning to Segment via Cut-and-Paste", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.06414.pdf"], "doi": []}, {"id": "e56c99e8a94d3e585166fcd66f2ab6da60932f09", "title": "Semantic Speech Retrieval With a Visually Grounded Model of Untranscribed Speech", "year": "2018", "pdf": ["https://arxiv.org/pdf/1710.01949.pdf"], "doi": []}, {"id": "135c71101af5d030f8cf470c454e7b655d699920", "title": "Stacked Latent Attention for Multimodal Reasoning", "year": "2018", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2018/papers/Fan_Stacked_Latent_Attention_CVPR_2018_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578216", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2018.00118", "http://doi.org/10.1109/CVPR.2018.00118"]}, {"id": "9d17e897e8344d1cf42a322359b48d1ff50b4aef", "title": "Learning to Fuse Things and Stuff", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.01192.pdf"], "doi": []}, {"id": "185e7191dc5eca046d90205527da597b6ba9ae3c", "title": "Fast Self-Attentive Multimodal Retrieval", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8354311", "http://doi.ieeecomputersociety.org/10.1109/WACV.2018.00207", "http://doi.org/10.1109/WACV.2018.00207"]}, {"id": "6fdc0bc13f2517061eaa1364dcf853f36e1ea5ae", "title": "DAISEE: Dataset for Affective States in E-Learning Environments", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/6fdc/0bc13f2517061eaa1364dcf853f36e1ea5ae.pdf"], "doi": []}, {"id": "947c973846f2c5f8f42225c1108810bcdb4a7015", "title": "Grounded language understanding for manipulation instructions using GAN-based classification", "year": "2017", "pdf": ["https://arxiv.org/pdf/1801.05096.pdf"], "doi": []}, {"id": "d082f64b8a0a07d105207eb822be58ffb61b353a", "title": "EMOTIC: Emotions in Context Dataset", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_cvpr_2017_workshops/w41/papers/Lapedriza_EMOTIC_Emotions_in_CVPR_2017_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8015019", "http://doi.ieeecomputersociety.org/10.1109/CVPRW.2017.285", "http://doi.org/10.1109/CVPRW.2017.285"]}, {"id": "1e55e9c647832c969e449da28a391205a9704c60", "title": "Actor and Action Video Segmentation from a Sentence", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578722"]}, {"id": "b705ca751a947e3b761e2305b41891051525d9df", "title": "Exploring Context with Deep Structured Models for Semantic Segmentation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1603.03183.pdf"], "doi": []}, {"id": "8bce31108f598986558e9afb1061eb988ea4f3be", "title": "Automated Image Annotation based on YOLOv3", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8592167"]}, {"id": "7c7af300c4780ad01e7db4d60fbf89771672585b", "title": "Detection and Segmentation of Manufacturing Defects with Convolutional Neural Networks and Transfer Learning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1808.02518.pdf"], "doi": []}, {"id": "f4c45108cb41051010d8a5175b8da23eb246c967", "title": "Improving Image Captioning by Leveraging Knowledge Graphs", "year": "2019", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8658870"]}, {"id": "2c28c95066b1df918f956f3cc072e29fd452dcad", "title": "Generalized Multi-View Embedding for Visual Recognition and Cross-Modal Retrieval", "year": "2018", "pdf": ["https://arxiv.org/pdf/1605.09696.pdf"], "doi": []}, {"id": "356ac139eb92753d710c1cd6bbb84403b8cd711b", "title": "A virtual reality platform for dynamic human-scene interaction", "year": "2016", "pdf": [], "doi": ["http://dl.acm.org/citation.cfm?id=2992144"]}, {"id": "d3a545bb8008a717ef95aa981295f38e6cee0d9d", "title": "Art painting detection and identification based on deep learning and image local features", "year": "2018", "pdf": [], "doi": ["https://doi.org/10.1007/s11042-018-6387-5"]}, {"id": "3b1860cacfd59893bb0f7e131e982e9bbeb21e87", "title": "Defect Detection from UAV Images Based on Region-Based CNNs", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8637492"]}, {"id": "bd4b5fcdf801b13d51083f3f7809366751c81c09", "title": "c-RNN: A Fine-Grained Language Model for Image Captioning", "year": "2018", "pdf": [], "doi": ["https://doi.org/10.1007/s11063-018-9836-2"]}, {"id": "3f10b9d98a276fb9e21e5742ce88bc7f48629715", "title": "Imparare a Quantificare Guardando (Learning to Quantify by Watching)", "year": "2016", "pdf": ["https://pdfs.semanticscholar.org/3f10/b9d98a276fb9e21e5742ce88bc7f48629715.pdf"], "doi": []}, {"id": "bb60196acdd4b0870cd0dd7f5a7c712aa042b1d1", "title": "A Benchmark for Endoluminal Scene Segmentation of Colonoscopy Images", "year": "2017", "pdf": ["https://arxiv.org/pdf/1612.00799.pdf"], "doi": []}, {"id": "afc4cc092f990644ff7a11dc7ab60519920cbc9d", "title": "Learning Rich Features for Image Manipulation Detection", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578214"]}, {"id": "09222c50d8ffcc74bbb7462400bd021772850bba", "title": "Incorporating Network Built-in Priors in Weakly-Supervised Semantic Segmentation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1706.02189.pdf"], "doi": []}, {"id": "7d39d69b23424446f0400ef603b2e3e22d0309d6", "title": "YOLO9000: Better, Faster, Stronger", "year": "2017", "pdf": ["https://arxiv.org/pdf/1612.08242.pdf"], "doi": []}, {"id": "bc6a01ea112d45bdded0bb2d34a4782e4f6f16be", "title": "Real-world Multi-object, Multi-grasp Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1802.00520.pdf"], "doi": []}, {"id": "24e680c5e7f59b72984ebc344dfba0f9573fb38c", "title": "On the difficulty of a distributional semantics of spoken language", "year": "2018", "pdf": ["https://arxiv.org/pdf/1803.08869.pdf"], "doi": []}, {"id": "2d15a7546c16d5821ffa8f769eb7ec18e435e64d", "title": "Recognition in Terra Incognita", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.04975.pdf"], "doi": []}, {"id": "84a3478cf884b0cbba480c7c36dab565e778094b", "title": "Learning to detect chest radiographs containing lung nodules using visual attention networks", "year": "2019", "pdf": ["https://arxiv.org/pdf/1712.00996.pdf"], "doi": []}, {"id": "bbe3d39adcb41ad2824204c0b0d299d77c2d8363", "title": "SketchyGAN: Towards Diverse and Realistic Sketch to Image Synthesis", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8579079"]}, {"id": "d7443ed614548498a390aa1b83c74069aa923074", "title": "Dataless Black-Box Model Comparison", "year": "2018", "pdf": [], "doi": ["https://doi.org/10.1134/S1054661818040272"]}, {"id": "225c318f3143b9b9d6c0f7a87672bd1d9d2901f4", "title": "2 D-Driven 3 D Object Detection in RGB-D Images", "year": "2017", "pdf": ["https://pdfs.semanticscholar.org/225c/318f3143b9b9d6c0f7a87672bd1d9d2901f4.pdf"], "doi": []}, {"id": "b20dc4906fc021aaf8353df98f8828d99f951c3c", "title": "Good View Hunting: Learning Photo Composition from Dense View Pairs", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578668"]}, {"id": "f42099bcd6cd13146d96d1ed15353a4ebc1f4d9a", "title": "Single-Shot Refinement Neural Network for Object Detection", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578540"]}, {"id": "d05825a394f11a391c8815f6b0d394cdb4cfaa95", "title": "I2T2I: Learning text to image synthesis with textual data augmentation", "year": "2017", "pdf": ["https://arxiv.org/pdf/1703.06676.pdf"], "doi": []}, {"id": "475e16577be1bfc0dd1f74f67bb651abd6d63524", "title": "DAiSEE: Towards User Engagement Recognition in the Wild", "year": "2016", "pdf": ["https://arxiv.org/pdf/1609.01885.pdf"], "doi": []}, {"id": "7985ac55e170273dd0ffa6bd756e588bab301d57", "title": "Mind's eye: A recurrent visual representation for image caption generation", "year": "2015", "pdf": ["http://www.cs.cmu.edu/~xinleic/docs/rvr/RVR-1.pdf", "http://www.cs.cmu.edu/~xinleic/papers/cvpr15_rnn.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/app/2A_022.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/app/2A_022_ext.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/ext/2A_022_ext.pdf", "http://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Chen_Minds_Eye_A_2015_CVPR_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7298856", "http://doi.ieeecomputersociety.org/10.1109/CVPR.2015.7298856", "http://doi.org/10.1109/CVPR.2015.7298856"]}, {"id": "5979acf247d8af73b41da0eab6d8a7eda1036c9a", "title": "A Fully-Convolutional Framework for Semantic Segmentation", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8227388", "http://doi.org/10.1109/DICTA.2017.8227388"]}, {"id": "05357b8c05b5bc020e871fc330a88910c3177e4d", "title": "Multiple Instance Detection Network with Online Instance Classifier Refinement", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8099809"]}, {"id": "2ce76250731cb19ccc5ffff43e4c6abec8f5af79", "title": "Learning to Segment Affordances", "year": "2017", "pdf": ["http://openaccess.thecvf.com/content_ICCV_2017_workshops/papers/w14/Luddecke_Learning_to_Segment_ICCV_2017_paper.pdf"], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8265305", "http://doi.ieeecomputersociety.org/10.1109/ICCVW.2017.96", "http://doi.org/10.1109/ICCVW.2017.96"]}, {"id": "3468740e4a9fc72a269f4f0ca8470ccd60925f92", "title": "Robustness Analysis of Visual QA Models by Basic Questions", "year": "2017", "pdf": ["https://arxiv.org/pdf/1709.04625.pdf"], "doi": []}, {"id": "9c11b1552748c697bbfd33c157f7b7875686eb10", "title": "Application of deep learning in object detection", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7960069"]}, {"id": "bf2793fc09176f8bf23b3a2b3c6b32185e8a8329", "title": "Multi-level Fusion Based 3D Object Detection from Monocular Images", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578347"]}, {"id": "9a33c33190ce0c1bfffe2896ce162b3be05bc9e8", "title": "Using Explanations to Improve Ensembling of Visual Question Answering Systems", "year": "2017", "pdf": [], "doi": []}, {"id": "052373d7fd12145c41377f9a05513596d32e409c", "title": "Deep Compositional Captioning: Describing Novel Object Categories without Paired Training Data", "year": "2016", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7780377"]}, {"id": "3796fe0e5dd10b01fc9fb486d09e61b28eedf915", "title": "Instance Segmentation of Fibers from Low Resolution CT Scans via 3D Deep Embedding Learning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1901.01034.pdf"], "doi": []}, {"id": "21ff1d20dd7b3e6b1ea02036c0176d200ec5626d", "title": "Loss Max-Pooling for Semantic Image Segmentation", "year": "2017", "pdf": ["https://arxiv.org/pdf/1704.02966.pdf"], "doi": []}, {"id": "2e78e804ddfe86f112719d4a237c882f930a9d3b", "title": "Development of Real-time ADAS Object Detector for Deployment on CPU", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.05894.pdf"], "doi": []}, {"id": "82451d0ae2e0154b5f9cd096af199c35319e911a", "title": "Image spam filtering using convolutional neural networks", "year": "2018", "pdf": [], "doi": ["http://doi.org/10.1007/s00779-018-1168-8"]}, {"id": "5fe7add7bb041eb52c9983fbdd792bfad1af9992", "title": "ELASTIC: Improving CNNs with Instance Specific Scaling Policies", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.05262.pdf"], "doi": []}, {"id": "c2021d9652bf3ec3921bf9c13a06e1ea51588d54", "title": "Multimodal Image Captioning for Marketing Analysis", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8396991"]}, {"id": "c88c233c50f49b4b85a8702d5ee384d96fca8c23", "title": "Object-Proposal Evaluation Protocol is \u2018Gameable\u2019", "year": "2016", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7780466"]}, {"id": "bd37ff771acd72ebdf4024043cb62fcacdd3a82b", "title": "Cycle-Consistent Deep Generative Hashing for Cross-Modal Retrieval", "year": "2019", "pdf": ["https://arxiv.org/pdf/1804.11013.pdf"], "doi": []}, {"id": "a9b219bd6ad71394ad21929fb5c50387698f340d", "title": "Geometry-Aware Traffic Flow Analysis by Detection and Tracking", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8575394"]}, {"id": "e8b2a98f87b7b2593b4a046464c1ec63bfd13b51", "title": "CMS-RCNN: Contextual Multi-Scale Region-based CNN for Unconstrained Face Detection", "year": "2016", "pdf": ["https://arxiv.org/pdf/1606.05413.pdf"], "doi": []}, {"id": "79a05184a6689800fee40bbb95554ffeffafeee2", "title": "Deep Image Harmonization", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8099782"]}, {"id": "4d86a4c74201c0d5df9a4b259a70f2221a5cb0cb", "title": "Quad-Networks: Unsupervised Learning to Rank for Interest Point Detection", "year": "2017", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8099901"]}, {"id": "8067d5d3fc80abb010b239bbe012b50bae8e6611", "title": "On the Robustness of Semantic Segmentation Models to Adversarial Attacks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1711.09856.pdf"], "doi": []}, {"id": "87fe38a79ee12531bb618d4006a476b3a5256f3e", "title": "Relative Saliency and Ranking: Models, Metrics, Data, and Benchmarks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.02426.pdf"], "doi": []}, {"id": "04a5b47caca62d9351219726e92c08e60f74d652", "title": "A Novel Co-design Peta-scale Heterogeneous Cluster for Deep Learning Training.", "year": "2018", "pdf": ["https://arxiv.org/pdf/1802.02326.pdf"], "doi": []}, {"id": "fc09e358a130e88dead33bf6bb2cbc0e94c88291", "title": "Single-Shot Object Detection with Enriched Semantics", "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8578707"]}, {"id": "ca673e6c11000459e3ac27dc4ab5f29177d04fea", "title": "A detection method for low-pixel ratio object", "year": "2018", "pdf": [], "doi": ["https://doi.org/10.1007/s11042-018-6653-6"]}, {"id": "e4a5ff03ac258f1bcc9c214c30497610b3d5faa2", "title": "DropBlock: A regularization method for convolutional networks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.12890.pdf"], "doi": []}, {"id": "58d16e23e1192be4acaf6a29c1f5995817146554", "title": "Bringing back simplicity and lightliness into neural image captioning", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.06245.pdf"], "doi": []}, {"id": "d7cb7ca97a665f23165b3c5420e848563cdf1f15", "title": "Pooling Pyramid Network for Object Detection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.03284.pdf"], "doi": []}, {"id": "38b2e523828a1f23ad5ad4306a0f9fedca167c90", "title": "Satellite Imagery Multiscale Rapid Detection with Windowed Networks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.09978.pdf"], "doi": []}, {"id": "1b7a7d291235e4b6e5f97722124070feb26f3cc1", "title": "Learning Two-Branch Neural Networks for Image-Text Matching Tasks", "year": "2018", "pdf": ["https://arxiv.org/pdf/1704.03470.pdf"], "doi": []}, {"id": "cb4fc4d49783f2049c48a062169f04eb744443ec", "title": "Paying More Attention to Saliency: Image Captioning with Saliency and Context Attention", "year": "2018", "pdf": ["https://arxiv.org/pdf/1706.08474.pdf"], "doi": []}, {"id": "c27c2fe9642fb82a3dfc314ce6003fe7a88eb1ec", "title": "Interpretable R-CNN", "year": "2017", "pdf": [], "doi": []}]} \ No newline at end of file