1 files changed, 1 insertions, 0 deletions
diff --git a/site/datasets/unknown/voxceleb2.json b/site/datasets/unknown/voxceleb2.json
new file mode 100644
index 00000000..8a40e573
--- /dev/null
+++ b/site/datasets/unknown/voxceleb2.json
@@ -0,0 +1 @@
+{"id": "8875ae233bc074f5cd6c4ebba447b536a7e847a5", "citations": [{"id": "9461ae046dbbafbad095bbbc80d0b9e5931f6a72", "title": "Linkage Based Face Clustering via Graph Convolution Network", "year": "2019", "pdf": ["https://arxiv.org/pdf/1903.11306.pdf"], "doi": []}, {"id": "827fdecf6a292cefb21837b9d11533a0e40f9e08", "title": "The Conversation: Deep Audio-Visual Speech Enhancement", "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.04121.pdf"], "doi": []}, {"id": "073e98d1a443e7b5f8b65903f18cd05a2b884400", "title": "Unsupervised Learning of Object Landmarks through Conditional Image Generation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1806.07823.pdf"], "doi": []}, {"id": "a5985dfb2f90cd34d83613b4872ac72b298a876e", "title": "VoiceFilter: Targeted Voice Separation by Speaker-Conditioned Spectrogram Masking", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.04826.pdf"], "doi": []}, {"id": "4519b43e0d22934a2a3035d45f7626fe2e77643a", "title": "The VOiCES from a Distance Challenge 2019 Evaluation Plan", "year": "2019", "pdf": ["https://arxiv.org/pdf/1902.10828.pdf"], "doi": []}, {"id": "54eef13b3fe487cc48a74faf8f486a312f545cc9", "title": "Self-supervised speaker embeddings", "year": "2019", "pdf": ["https://arxiv.org/pdf/1904.03486.pdf"], "doi": []}, {"id": "56a01913e58f721e38b5ddf6b30a588c8d4dfc66", "title": "Noise Robust Speaker Recognition Based on Adaptive Frame Weighting in GMM for i-Vector Extraction", "year": "2019", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8653291"]}, {"id": "ff34d1ede03e99fb4694961d458bf0ca51cd4b31", "title": "A Near Real-Time Automatic Speaker Recognition Architecture for Voice-Based User Interface", "year": "2019", "pdf": ["https://pdfs.semanticscholar.org/ff34/d1ede03e99fb4694961d458bf0ca51cd4b31.pdf"], "doi": []}, {"id": "a2344004f0e1409c0c9473d071a5cfd74bff0a5d", "title": "Learnable PINs: Cross-modal Embeddings for Person Identity", "year": "2018", "pdf": ["https://arxiv.org/pdf/1805.00833.pdf"], "doi": []}, {"id": "bececcda7976be8e19abe19baf2e1f19483b5eaa", "title": "Additive Margin SincNet for Speaker Recognition", "year": "2019", "pdf": ["https://arxiv.org/pdf/1901.10826.pdf"], "doi": []}, {"id": "87061b07b04c55d426e5469935f7d679fe111ede", "title": "VoiceID Loss: Speech Enhancement for Speaker Verification", "year": "2019", "pdf": ["https://arxiv.org/pdf/1904.03601.pdf"], "doi": []}, {"id": "acc405806229912e9723c55bf61dc1d34059f5d1", "title": "Few Shot Speaker Recognition using Deep Neural Networks", "year": "2019", "pdf": ["https://arxiv.org/pdf/1904.08775.pdf"], "doi": []}, {"id": "7fe117f5bbf91f85a97720eca8be36da87a5f8a8", "title": "Revisiting Cross Modal Retrieval", "year": "2018", "pdf": ["https://arxiv.org/pdf/1807.07364.pdf"], "doi": []}, {"id": "a9cbd487b394d94d5f303c91d7f14dae14f70acf", "title": "Can We Use Speaker Recognition Technology to Attack Itself? Enhancing Mimicry Attacks Using Automatic Target Speaker Selection", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.03790.pdf"], "doi": []}, {"id": "b808cfac9c44f27d3716f9280dad4dc2a9bbc8df", "title": "FOR MULTI-SPEAKER CONVERSATIONS USING X-VECTORS", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/b808/cfac9c44f27d3716f9280dad4dc2a9bbc8df.pdf"], "doi": []}, {"id": "35a2ee8bb43ad14b966c294f599475c1edd5213e", "title": "Centroid-based deep metric learning for speaker recognition", "year": "2019", "pdf": ["https://arxiv.org/pdf/1902.02375.pdf"], "doi": []}, {"id": "f2640379adc41360adc1ff03c851b30e509e41b8", "title": "End-to-end losses based on speaker basis vectors and all-speaker hard negative mining for speaker verification", "year": "2019", "pdf": ["https://arxiv.org/pdf/1902.02455.pdf"], "doi": []}, {"id": "2fa77c103e2e4bb166cefab209a0be24c99914d8", "title": "UPC Multimodal Speaker Diarization System for the 2018 Albayzin Challenge", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/2fa7/7c103e2e4bb166cefab209a0be24c99914d8.pdf"], "doi": []}, {"id": "0e8cd058ae29c6f60a8750c1df3caa5dc0e99543", "title": "You Said That?: Synthesising Talking Faces from Audio", "year": "2019", "pdf": ["http://www.robots.ox.ac.uk/~vgg/publications/2019/Jamaludin19/jamaludin19.pdf"], "doi": ["https://doi.org/10.1007/s11263-019-01150-y"]}, {"id": "fa62bd7c8e29a7f2c2104f7b769b487ab9dad4fb", "title": "Deep Neural Network Embedding Learning with High-Order Statistics for Text-Independent Speaker Verification", "year": "2019", "pdf": ["https://arxiv.org/pdf/1903.12058.pdf"], "doi": []}, {"id": "37db8a78a6bb47f6de37f5e42f6df5cd7b1bd304", "title": "Who Do I Sound Like? Showcasing Speaker Recognition Technology by YouTube Voice Search", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.03293.pdf"], "doi": []}, {"id": "2bee6a78bbc3222a76cf1da0217567277aa3ed5f", "title": "Noise-tolerant Audio-visual Online Person Verification using an Attention-based Neural Network Fusion", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.10813.pdf"], "doi": []}, {"id": "78efccbfd1b1fca267c1b7903d3e4344c9d54ce3", "title": "Symbolic Tensor Neural Networks for Digital Media - from Tensor Processing via BNF Graph Rules to CREAMS Applications", "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.06582.pdf"], "doi": []}, {"id": "65a9d4b8740b7ae48127d7eae9443086613a83a7", "title": "FML: Face Model Learning from Videos", "year": "2018", "pdf": ["https://arxiv.org/pdf/1812.07603.pdf"], "doi": []}, {"id": "4da8fe0379af893cca721276e13db3622955b3e7", "title": "Fully Supervised Speaker Diarization", "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.04719.pdf"], "doi": []}, {"id": "5b65716709a2a7a4da2e2aeb611f82e7aacfbbf0", "title": "Large Margin Softmax Loss for Speaker Verification", "year": "2019", "pdf": ["https://arxiv.org/pdf/1904.03479.pdf"], "doi": []}, {"id": "23a109da0c4ce0314f6f016da679a4e1fd6960ef", "title": "JHU Diarization System Description", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/23a1/09da0c4ce0314f6f016da679a4e1fd6960ef.pdf"], "doi": []}, {"id": "70cfbf82cee2f007b3c65cf97373f66f13005b42", "title": "Multi-Task Learning with High-Order Statistics for X-vector based Text-Independent Speaker Verification", "year": "2019", "pdf": ["https://arxiv.org/pdf/1903.12058.pdf"], "doi": []}, {"id": "6f0ce4d957c4e9556b04b539105837a5db63b925", "title": "Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis", "year": "2018", "pdf": ["https://arxiv.org/pdf/1806.04558.pdf"], "doi": []}, {"id": "b3918fab36f106e83e016a3e33d260ad656191c4", "title": "MCE 2018: The 1st Multi-target Speaker Detection and Identification Challenge Evaluation", "year": "2018", "pdf": ["https://arxiv.org/pdf/1904.04240.pdf"], "doi": []}, {"id": "e89cf011bb543137b961807924e0b765d536aa98", "title": "iQIYI-VID: A Large Dataset for Multi-modal Person Identification", "year": "2018", "pdf": ["https://arxiv.org/pdf/1811.07548.pdf"], "doi": []}]}
+\ No newline at end of file