summaryrefslogtreecommitdiff
path: root/site/datasets/final/voxceleb2.json
diff options
context:
space:
mode:
Diffstat (limited to 'site/datasets/final/voxceleb2.json')
-rw-r--r--site/datasets/final/voxceleb2.json2
1 files changed, 1 insertions, 1 deletions
diff --git a/site/datasets/final/voxceleb2.json b/site/datasets/final/voxceleb2.json
index 16ef3875..564401b1 100644
--- a/site/datasets/final/voxceleb2.json
+++ b/site/datasets/final/voxceleb2.json
@@ -1 +1 @@
-{"id": "8875ae233bc074f5cd6c4ebba447b536a7e847a5", "dataset": {"key": "voxceleb2", "name_short": "VoxCeleb2", "name_display": "VoxCeleb2", "name_full": "VoxCeleb2 Dataset", "purpose": "speaker recognition", "comment": "", "created_by": "", "funded_by": "", "funded_by_short": "", "used_by": "", "license": "", "url": "http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox2.html", "dl_im": "N", "dl_meta": "", "dl_paper": "", "dl_web": "", "mp_pub": "N", "ft_share": "Y", "nyt_share": "N", "cooperative": "N", "indoor": "", "outdoor": "", "campus": "", "cyberspace": "Y", "parent": "", "source": "youtube", "usernames": "", "names": "", "flickr_meta": "", "year_start": "", "year_end": "", "year_published": "2018", "ongoing": "", "images": "", "videos": "1,128,246 ", "tracklets": "", "identities": "6,112 ", "img_per_person": "", "num_cameras": "", "faces_or_persons": "", "female": "", "male": "", "landmarks": "", "width": "", "height": "", "color": "", "gray": "", "tags": "", "size_gb": "", "agreement": "", "agreement_signed": "", "flickr": "", "facebook": "", "youtube": "Y", "vimeo": "", "google": "", "bing": "", "adam": "", "berit": "", "charlie": "", "notes": "", "derivative_of": "", "": ""}, "paper": {"paper_id": "8875ae233bc074f5cd6c4ebba447b536a7e847a5", "key": "voxceleb2", "title": "VoxCeleb2: Deep Speaker Recognition.", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/8875/ae233bc074f5cd6c4ebba447b536a7e847a5.pdf"], "address": "", "name": "VoxCeleb2", "doi": []}, "addresses": [], "additional_papers": [], "citations": [{"id": "171f8f1090ef0533ff470ed5a4d31ecfefcc74be", "title": "Audio-Visual Scene Analysis with Self-Supervised Multisensory Features", "addresses": [{"name": "UC Berkeley", "source_name": "UC Berkeley", "street_adddress": "Berkeley, CA, USA", "lat": "37.87189920", "lng": "-122.25853990", "type": "edu", "country": "United States"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.03641.pdf"], "doi": []}, {"id": "2c75e7ee01b362cc5f1ef2e49a84d56af93e6a3d", "title": "Training Speaker Recognition Models with Recording-Level Labels", "addresses": [{"name": "Tallinn University of Technology", "source_name": "Institute of Cybernetics, Tallinn University of Technology", "street_adddress": "Akadeemia tee 21, 12618 Tallinn, Estonia", "lat": "59.39791420", "lng": "24.66085570", "type": "edu", "country": "Estonia"}], "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8639601"]}]} \ No newline at end of file
+{"id": "8875ae233bc074f5cd6c4ebba447b536a7e847a5", "dataset": {"key": "voxceleb2", "name_short": "VoxCeleb2", "name_display": "VoxCeleb2", "name_full": "VoxCeleb2 Dataset", "purpose": "speaker recognition", "comment": "", "created_by": "", "funded_by": "", "funded_by_short": "", "used_by": "", "license": "", "url": "http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox2.html", "dl_im": "N", "dl_meta": "", "dl_paper": "", "dl_web": "", "mp_pub": "N", "ft_share": "Y", "nyt_share": "N", "cooperative": "N", "indoor": "", "outdoor": "", "campus": "", "cyberspace": "Y", "parent": "", "source": "youtube", "usernames": "", "names": "", "flickr_meta": "", "year_start": "", "year_end": "", "year_published": "2018", "ongoing": "", "images": "", "videos": "1,128,246 ", "tracklets": "", "identities": "6,112 ", "img_per_person": "", "num_cameras": "", "faces_or_persons": "", "female": "", "male": "", "landmarks": "", "width": "", "height": "", "color": "", "gray": "", "tags": "", "size_gb": "", "agreement": "", "agreement_signed": "", "flickr": "", "facebook": "", "youtube": "Y", "vimeo": "", "google": "", "bing": "", "adam": "", "berit": "", "charlie": "", "notes": "", "derivative_of": "", "": ""}, "paper": {"paper_id": "8875ae233bc074f5cd6c4ebba447b536a7e847a5", "key": "voxceleb2", "title": "VoxCeleb2: Deep Speaker Recognition.", "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/8875/ae233bc074f5cd6c4ebba447b536a7e847a5.pdf"], "address": "", "name": "VoxCeleb2", "doi": []}, "addresses": [], "additional_papers": [], "citations": [{"id": "827fdecf6a292cefb21837b9d11533a0e40f9e08", "title": "The Conversation: Deep Audio-Visual Speech Enhancement", "addresses": [{"name": "University of Oxford", "source_name": "University of Oxford", "street_adddress": "Radcliffe Camera, Radcliffe Square, Grandpont, Oxford, Oxon, South East, England, OX1 4AJ, UK", "lat": "51.75345380", "lng": "-1.25400997", "type": "edu", "country": "United Kingdom"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.04121.pdf"], "doi": []}, {"id": "073e98d1a443e7b5f8b65903f18cd05a2b884400", "title": "Unsupervised Learning of Object Landmarks through Conditional Image Generation", "addresses": [{"name": "University of Oxford", "source_name": "University of Oxford", "street_adddress": "Radcliffe Camera, Radcliffe Square, Grandpont, Oxford, Oxon, South East, England, OX1 4AJ, UK", "lat": "51.75345380", "lng": "-1.25400997", "type": "edu", "country": "United Kingdom"}, {"name": "University of Edinburgh", "source_name": "University of Edinburgh", "street_adddress": "New College, New College Courtyard, The Mound, Old Town, Edinburgh, City of Edinburgh, Scotland, EH1 2LX, UK", "lat": "55.94951105", "lng": "-3.19534913", "type": "edu", "country": "United Kingdom"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1806.07823.pdf"], "doi": []}, {"id": "a5985dfb2f90cd34d83613b4872ac72b298a876e", "title": "VoiceFilter: Targeted Voice Separation by Speaker-Conditioned Spectrogram Masking", "addresses": [{"name": "Google", "source_name": "Google, Inc.", "street_adddress": "1600 Amphitheatre Pkwy, Mountain View, CA 94043, USA", "lat": "37.42199990", "lng": "-122.08405750", "type": "company", "country": "United States"}, {"name": "IDIAP Research Institute", "source_name": "IDIAP Research Institute", "street_adddress": "Idiap Research Institute, Parking Centre du parc, Martigny, Valais/Wallis, 1920, Schweiz/Suisse/Svizzera/Svizra", "lat": "46.10923700", "lng": "7.08453549", "type": "edu", "country": "Switzerland"}, {"name": "EPFL", "source_name": "EPFL", "street_adddress": "Route Cantonale, 1015 Lausanne, Switzerland", "lat": "46.51905570", "lng": "6.56675760", "type": "edu", "country": "Switzerland"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1810.04826.pdf"], "doi": []}, {"id": "4519b43e0d22934a2a3035d45f7626fe2e77643a", "title": "The VOiCES from a Distance Challenge 2019 Evaluation Plan", "addresses": [{"name": "SRI International", "source_name": "SRI International", "street_adddress": "SRI International Building, West 1st Street, Menlo Park, San Mateo County, California, 94025, USA", "lat": "37.45857960", "lng": "-122.17560525", "type": "edu", "country": "United States"}], "year": "2019", "pdf": ["https://arxiv.org/pdf/1902.10828.pdf"], "doi": []}, {"id": "56a01913e58f721e38b5ddf6b30a588c8d4dfc66", "title": "Noise Robust Speaker Recognition Based on Adaptive Frame Weighting in GMM for i-Vector Extraction", "addresses": [{"name": "Tsinghua University", "source_name": "Tsinghua University", "street_adddress": "\u6e05\u534e\u5927\u5b66, 30, \u53cc\u6e05\u8def, \u4e94\u9053\u53e3, \u540e\u516b\u5bb6, \u6d77\u6dc0\u533a, 100084, \u4e2d\u56fd", "lat": "40.00229045", "lng": "116.32098908", "type": "edu", "country": "China"}], "year": "2019", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8653291"]}, {"id": "171f8f1090ef0533ff470ed5a4d31ecfefcc74be", "title": "Audio-Visual Scene Analysis with Self-Supervised Multisensory Features", "addresses": [{"name": "UC Berkeley", "source_name": "UC Berkeley", "street_adddress": "Berkeley, CA, USA", "lat": "37.87189920", "lng": "-122.25853990", "type": "edu", "country": "United States"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1804.03641.pdf"], "doi": []}, {"id": "b808cfac9c44f27d3716f9280dad4dc2a9bbc8df", "title": "FOR MULTI-SPEAKER CONVERSATIONS USING X-VECTORS", "addresses": [{"name": "Johns Hopkins University", "source_name": "Johns Hopkins University", "street_adddress": "Baltimore, MD 21218, USA", "lat": "39.32990130", "lng": "-76.62051770", "type": "edu", "country": "United States"}], "year": "2018", "pdf": ["https://pdfs.semanticscholar.org/b808/cfac9c44f27d3716f9280dad4dc2a9bbc8df.pdf"], "doi": []}, {"id": "2c75e7ee01b362cc5f1ef2e49a84d56af93e6a3d", "title": "Training Speaker Recognition Models with Recording-Level Labels", "addresses": [{"name": "Tallinn University of Technology", "source_name": "Institute of Cybernetics, Tallinn University of Technology", "street_adddress": "Akadeemia tee 21, 12618 Tallinn, Estonia", "lat": "59.39791420", "lng": "24.66085570", "type": "edu", "country": "Estonia"}], "year": "2018", "pdf": [], "doi": ["http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8639601"]}, {"id": "f2640379adc41360adc1ff03c851b30e509e41b8", "title": "End-to-end losses based on speaker basis vectors and all-speaker hard negative mining for speaker verification", "addresses": [{"name": "University of Seoul", "source_name": "University of Seoul, Seoul, Korea", "street_adddress": "163 Seoulsiripdae-ro, Jeonnong 2(i)-dong, Dongdaemun-gu, Seoul, South Korea", "lat": "37.58386570", "lng": "127.05877710", "type": "edu", "country": "South Korea"}], "year": "2019", "pdf": ["https://arxiv.org/pdf/1902.02455.pdf"], "doi": []}, {"id": "0e8cd058ae29c6f60a8750c1df3caa5dc0e99543", "title": "You Said That?: Synthesising Talking Faces from Audio", "addresses": [{"name": "University of Oxford", "source_name": "University of Oxford", "street_adddress": "Radcliffe Camera, Radcliffe Square, Grandpont, Oxford, Oxon, South East, England, OX1 4AJ, UK", "lat": "51.75345380", "lng": "-1.25400997", "type": "edu", "country": "United Kingdom"}], "year": "2019", "pdf": ["http://www.robots.ox.ac.uk/~vgg/publications/2019/Jamaludin19/jamaludin19.pdf"], "doi": ["https://doi.org/10.1007/s11263-019-01150-y"]}, {"id": "fa62bd7c8e29a7f2c2104f7b769b487ab9dad4fb", "title": "Deep Neural Network Embedding Learning with High-Order Statistics for Text-Independent Speaker Verification", "addresses": [{"name": "University of Science and Technology of China", "source_name": "University of Science and Technology of China", "street_adddress": "\u4e2d\u56fd\u79d1\u5b66\u6280\u672f\u5927\u5b66 \u4e1c\u6821\u533a, 96\u53f7, \u91d1\u5be8\u8def, \u6c5f\u6dee\u5316\u80a5\u5382\u5c0f\u533a, \u829c\u6e56\u8def\u8857\u9053, \u5408\u80a5\u5e02\u533a, \u5408\u80a5\u5e02, \u5b89\u5fbd\u7701, 230026, \u4e2d\u56fd", "lat": "31.83907195", "lng": "117.26420748", "type": "edu", "country": "China"}], "year": "2019", "pdf": ["https://arxiv.org/pdf/1903.12058.pdf"], "doi": []}, {"id": "78efccbfd1b1fca267c1b7903d3e4344c9d54ce3", "title": "Symbolic Tensor Neural Networks for Digital Media - from Tensor Processing via BNF Graph Rules to CREAMS Applications", "addresses": [{"name": "Warsaw University of Technology", "source_name": "Warsaw University of Technology", "street_adddress": "Politechnika Warszawska, 1, Plac Politechniki, VIII, \u015ar\u00f3dmie\u015bcie, Warszawa, mazowieckie, 00-661, RP", "lat": "52.22165395", "lng": "21.00735776", "type": "edu", "country": "Poland"}], "year": "2018", "pdf": ["https://arxiv.org/pdf/1809.06582.pdf"], "doi": []}, {"id": "5b65716709a2a7a4da2e2aeb611f82e7aacfbbf0", "title": "Large Margin Softmax Loss for Speaker Verification", "addresses": [{"name": "Tsinghua University", "source_name": "Tsinghua University", "street_adddress": "\u6e05\u534e\u5927\u5b66, 30, \u53cc\u6e05\u8def, \u4e94\u9053\u53e3, \u540e\u516b\u5bb6, \u6d77\u6dc0\u533a, 100084, \u4e2d\u56fd", "lat": "40.00229045", "lng": "116.32098908", "type": "edu", "country": "China"}], "year": "2019", "pdf": ["https://arxiv.org/pdf/1904.03479.pdf"], "doi": []}, {"id": "70cfbf82cee2f007b3c65cf97373f66f13005b42", "title": "Multi-Task Learning with High-Order Statistics for X-vector based Text-Independent Speaker Verification", "addresses": [{"name": "University of Science and Technology of China", "source_name": "University of Science and Technology of China", "street_adddress": "\u4e2d\u56fd\u79d1\u5b66\u6280\u672f\u5927\u5b66 \u4e1c\u6821\u533a, 96\u53f7, \u91d1\u5be8\u8def, \u6c5f\u6dee\u5316\u80a5\u5382\u5c0f\u533a, \u829c\u6e56\u8def\u8857\u9053, \u5408\u80a5\u5e02\u533a, \u5408\u80a5\u5e02, \u5b89\u5fbd\u7701, 230026, \u4e2d\u56fd", "lat": "31.83907195", "lng": "117.26420748", "type": "edu", "country": "China"}], "year": "2019", "pdf": ["https://arxiv.org/pdf/1903.12058.pdf"], "doi": []}]} \ No newline at end of file