summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoradamhrv <adam@ahprojects.com>2019-02-10 20:39:03 +0100
committeradamhrv <adam@ahprojects.com>2019-02-10 20:39:03 +0100
commit14727041f2b54dea9a37ff6e2dfef161b6243556 (patch)
tree62ba1aef6eb80900e67c5cc344300eefbf720ced
parentd213702d4baf7a8c776ef71383346c0d6402106a (diff)
add csv converter for citations
-rw-r--r--megapixels/app/models/citations.py17
-rw-r--r--megapixels/commands/datasets/citations_to_csv.py92
-rw-r--r--megapixels/commands/datasets/ijb_youtube_meta.py146
-rw-r--r--site/datasets/final/brainwash.json1
4 files changed, 255 insertions, 1 deletions
diff --git a/megapixels/app/models/citations.py b/megapixels/app/models/citations.py
new file mode 100644
index 00000000..b0e02fc7
--- /dev/null
+++ b/megapixels/app/models/citations.py
@@ -0,0 +1,17 @@
+from dataclasses import dataclass
+from dataclasses import dataclass
+from mashumaro import DataClassJSONMixin
+
+@dataclass
+class Paper(DataClassJSONMixin):
+ key: str
+ dataset_name: str
+ paper_id: str
+ title: str
+ paper_type: str
+ year: int
+ paper_url: str = ''
+ loc: str = ''
+ loc_type: str = ''
+ lat: float = 0.0
+ lng: float = 0.0 \ No newline at end of file
diff --git a/megapixels/commands/datasets/citations_to_csv.py b/megapixels/commands/datasets/citations_to_csv.py
new file mode 100644
index 00000000..431ee4cd
--- /dev/null
+++ b/megapixels/commands/datasets/citations_to_csv.py
@@ -0,0 +1,92 @@
+import click
+
+from app.utils import click_utils
+from app.utils.logger_utils import Logger
+from app.models.citations import Paper
+
+log = Logger.getLogger()
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True,
+ help='Input license data CSV')
+@click.option('-o', '--output', 'opt_fp_out',
+ help='Output directory')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out):
+ """Convert JSON to CSV"""
+
+ import sys
+ from glob import glob
+ from os.path import join
+ from pathlib import Path
+
+ import json
+ import pandas as pd
+ from tqdm import tqdm
+
+ from app.utils import file_utils, im_utils
+
+ log = Logger.getLogger()
+ log.info('Convert JSON to CSV')
+
+ # load
+ with open(opt_fp_in, 'r') as fp:
+ json_data = json.load(fp)
+
+ # parse
+ papers = []
+ dataset_key = json_data['paper']['key']
+ dataset_name = json_data['paper']['name']
+ papers_main = get_orig_paper(json_data)
+ papers += papers_main
+ papers_citations = get_citations(dataset_key, dataset_name, json_data)
+ papers += papers_citations
+ papers = [p.to_dict() for p in papers]
+
+ # save
+ if not opt_fp_out:
+ fp_out = opt_fp_in.replace('.json','.csv')
+ log.info(fp_out)
+
+ df_papers = pd.DataFrame.from_dict(papers)
+ df_papers.index.name = 'index'
+ df_papers.to_csv(fp_out)
+
+
+
+# ----------------------------------------------------------------------------
+# Helpers
+# ----------------------------------------------------------------------------
+def get_citations(dataset_key, dataset_name, json_data):
+ papers = []
+ d_type = 'citation'
+ for p in json_data['citations']:
+ year = 0 if p.get('year', 0) == '' else p.get('year', 0)
+ addresses = p.get('addresses', '')
+ if addresses:
+ for a in addresses:
+ paper = Paper(dataset_key, dataset_name, p['id'], p['title'], d_type,
+ year, p['pdf'],
+ a['address'], a['type'], a['lat'], a['lng'])
+ else:
+ paper = Paper(p['key'], p['name'], d['id'], p['title'], 'main', year, p['pdf'])
+
+ papers.append(paper)
+ return papers
+
+def get_orig_paper(json_data):
+ papers = []
+ d = json_data
+ p = d['paper']
+ d_type = 'main'
+ year = 0 if p.get('year', 0) == '' else p.get('year', 0)
+ addresses = p.get('address','')
+ if addresses:
+ for a in addresses:
+ paper = Paper(p['key'], p['name'], p['paper_id'], p['title'], d_type, year,
+ p['pdf'],
+ a['address'], a['type'], a['lat'], a['lng'])
+ else:
+ paper = Paper(p['key'], p['name'], p['paper_id'], p['title'], d_type, year, p['pdf'])
+ papers.append(paper)
+ return papers \ No newline at end of file
diff --git a/megapixels/commands/datasets/ijb_youtube_meta.py b/megapixels/commands/datasets/ijb_youtube_meta.py
new file mode 100644
index 00000000..87df390c
--- /dev/null
+++ b/megapixels/commands/datasets/ijb_youtube_meta.py
@@ -0,0 +1,146 @@
+"""Create screenshots for YouTube.com URLs in the IJB dataset
+
+TODO
+- grey out boxes in sidebar
+- resize driver screenshot area to include author text
+
+Installing webdrivers:
+
+Chrome
+wget https://chromedriver.storage.googleapis.com/73.0.3683.20/chromedriver_linux64.zip
+
+Firefox
+wget https://github.com/mozilla/geckodriver/releases/download/v0.24.0/geckodriver-v0.24.0-linux64.tar.gz
+
+PhantomJS
+npm install -g phantomjs
+"""
+
+import click
+
+from app.settings import app_cfg
+
+fp_default_in_a = '/data_store/datasets/people/ijb_c/downloads/tars/IJB/IJB-C/license/cs3_media.csv'
+fp_default_in_b = '/data_store/datasets/people/ijb_c/downloads/tars/IJB/IJB-C/license/cs4_media.csv'
+fps_default_in = [fp_default_in_a, fp_default_in_b]
+fp_default_out = '/data_store/datasets/people/ijb_c/research/cs3_media_ytmeta.csv'
+
+@click.command()
+@click.option('-i', '--input', 'opt_fp_in', required=True, default=fps_default_in, multiple=True,
+ help='Input license data CSV')
+@click.option('-o', '--output', 'opt_fp_out', required=True, default=fp_default_out,
+ help='Output directory')
+@click.option('-t', '--threads', 'opt_threads', default=4,
+ help='Number of threads')
+@click.pass_context
+def cli(ctx, opt_fp_in, opt_fp_out, opt_threads):
+ """IJB-C screenshot sources"""
+
+ import sys
+ from glob import glob
+ from os.path import join
+ from pathlib import Path
+ import time
+ from functools import partial
+ from multiprocessing.dummy import Pool as ThreadPool
+ import urllib.request
+
+ import lxml
+ from bs4 import BeautifulSoup
+ import pandas as pd
+ import cv2 as cv
+ from tqdm import tqdm
+
+
+ from app.utils import file_utils, im_utils, logger_utils
+
+ log = logger_utils.Logger.getLogger()
+
+
+ metavars = [
+ {'name': ('title','title')},
+ {'name': ('description', 'description')},
+ {'name': ('keywords', 'keywords')},
+ {'itemprop': ('paid', 'paid')},
+ {'itemprop': ('videoId', 'video_id')},
+ {'itemprop': ('duration', 'duration')},
+ {'itemprop': ('width', 'width')},
+ {'itemprop': ('height', 'height')},
+ {'itemprop': ('isFamilyFriendly', 'is_family_friendly')},
+ {'itemprop': ('interactionCount', 'views')},
+ {'itemprop': ('datePublished', 'date_published')},
+ {'itemprop': ('genre', 'genre')},
+ {'itemprop': ('unlisted', 'genre')}
+ ]
+
+ from pprint import pprint
+ def pool_process(media_item):
+ # threaded function
+ global parse_yt_page
+ results = []
+ try:
+ url = media_item['media_url'].strip()
+ url = url.replace('http:', 'https:')
+ url = url.replace('www.youtube','youtube')
+ log.debug(f'get: {url}')
+ data = urllib.request.urlopen(url, timeout=60).read()
+ soup = BeautifulSoup(data,'lxml')
+ for metavar in metavars:
+ propname, propvals = list(metavar.items())[0]
+ #result = parse_yt_meta(soup, propname, propvals)
+ content = soup.find('meta', attrs={propname:propvals[0]})
+ if content:
+ media_item[propvals[1]] = content.get('content','')
+ if 'duration' in media_item.keys():
+ # fix values
+ duration = media_item['duration']
+ mins = int(duration.split('M')[0].replace('PT',''))
+ secs = int(duration.split('M')[1].replace('S',''))
+ media_item['duration'] = mins + (60 * secs)
+ if 'paid' in media_item.keys():
+ media_item['paid'] = int(bool(media_item['paid'] == 'True'))
+ if 'is_family_friendly' in media_item.keys():
+ media_item['is_family_friendly'] = int(bool(media_item['is_family_friendly'] == 'True'))
+ except Exception as e:
+ log.debug(f'Error: {e}, {media_item["media_url"]}')
+ pbar.update(1)
+ return media_item # a list of dict key:val dicts
+
+ # read CSV and get URLs
+ df_media = None
+ for fp in fps_default_in:
+ df = pd.read_csv(fp)
+ log.info(f'reading {len(df)} rows')
+ if df_media is None:
+ df_media = df
+ else:
+ df_media = df_media.append(df, ignore_index=True)
+
+ name_maps = {
+ 'Media ID': 'media_id',
+ 'Media URL': 'media_url',
+ 'Source URL': 'source_url',
+ 'Attribution': 'attribution',
+ 'CC License': 'cc_license',
+ }
+ df_media.rename(columns=name_maps, inplace=True)
+ log.info(f'{len(df_media)} rows')
+ df_media = df_media[df_media.media_id.str.contains("video/")]
+ log.info(f'{len(df_media)} rows')
+ df_media.drop_duplicates(subset=['media_url'], keep='first', inplace=True)
+ log.info(f'{len(df_media)} rows')
+ media_items = df_media.to_dict('records')
+
+ results = []
+ pbar = tqdm(total=len(media_items))
+ pool_process = partial(pool_process)
+ pool = ThreadPool(opt_threads)
+ with tqdm(total=len(media_items)) as pbar:
+ results = pool.map(pool_process, media_items)
+ pbar.close()
+
+ # create DataFrame and save to CSV
+ file_utils.mkdirs(opt_fp_out)
+ df = pd.DataFrame.from_dict(results)
+ df.index.name = 'index'
+ df.to_csv(opt_fp_out) \ No newline at end of file
diff --git a/site/datasets/final/brainwash.json b/site/datasets/final/brainwash.json
deleted file mode 100644
index 2fab71cf..00000000
--- a/site/datasets/final/brainwash.json
+++ /dev/null
@@ -1 +0,0 @@
-{"id": "214c966d1f9c2a4b66f4535d9a0d4078e63a5867", "paper": {"paper_id": "214c966d1f9c2a4b66f4535d9a0d4078e63a5867", "key": "brainwash", "title": "Brainwash: A Data System for Feature Engineering", "year": 2013, "pdf": "http://pdfs.semanticscholar.org/ae44/8015b2ff2bd3b8a5c9a3266f954f5af9ffa9.pdf", "address": "", "name": "Brainwash"}, "address": "", "additional_papers": [], "citations": [{"id": "7ee17d2001c9fcef63e3a56610cacc743861d944", "title": "Extracting Databases from Dark Data with DeepDive", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}, {"address": "University of Michigan", "lat": "42.29421420", "lng": "-83.71003894", "type": "edu"}], "year": 2016, "pdf": "http://cs.stanford.edu/people/chrismre/papers/modiv923-zhangA.pdf"}, {"id": "47de0569259e6a420c3eda69cdebf01bf85a1acd", "title": "An Integrated Development Environment for Faster Feature Engineering", "addresses": [{"address": "University of Michigan", "lat": "42.29421420", "lng": "-83.71003894", "type": "edu"}], "year": 2014, "pdf": "http://pdfs.semanticscholar.org/733c/8af7e2211c9311054e5cef90976f4e9efc8c.pdf"}, {"id": "f312fce73aabd97bf4fc02fe2829f6959e251b1e", "title": "Runtime Support for Human-in-the-Loop Feature Engineering System", "addresses": [{"address": "University of Michigan", "lat": "42.29421420", "lng": "-83.71003894", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/f312/fce73aabd97bf4fc02fe2829f6959e251b1e.pdf"}, {"id": "7033b916a7f2510ca9766b7a8ed15920a9f9e2f3", "title": "Which concepts are worth extracting?", "addresses": [{"address": "Oregon State University", "lat": "45.51982890", "lng": "-122.67797964", "type": "edu"}, {"address": "University of Illinois, Urbana-Champaign", "lat": "40.11116745", "lng": "-88.22587665", "type": "edu"}], "year": 2014, "pdf": "http://doi.acm.org/10.1145/2588555.2610496"}, {"id": "fb22404e46dd46b2c2cb9a85227a1ab6a8ae4f52", "title": "Micro-analytics for Student Performance Prediction Leveraging fine-grained learning analytics to predict performance", "addresses": [{"address": "Dublin City University", "lat": "53.38522185", "lng": "-6.25740874", "type": "edu"}], "year": "2015", "pdf": "https://pdfs.semanticscholar.org/fb22/404e46dd46b2c2cb9a85227a1ab6a8ae4f52.pdf"}, {"id": "533d91cbb5e306c96b71b6f776382f3956e5dc7d", "title": "Faster Feature Engineering by Approximate Evaluation", "addresses": [{"address": "University of Michigan", "lat": "42.29421420", "lng": "-83.71003894", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/533d/91cbb5e306c96b71b6f776382f3956e5dc7d.pdf"}, {"id": "45619a2b7b41fea02345badf880530519d3d4c8f", "title": "Learning Generalized Linear Models Over Normalized Data", "addresses": [{"address": "University of Wisconsin Madison", "lat": "43.07982815", "lng": "-89.43066425", "type": "edu"}], "year": 2015, "pdf": "http://cseweb.ucsd.edu/~arunkk/orion/LearningOverJoinsSIGMOD.pdf"}, {"id": "f4065d13bcad78b563108075f650c29a2f3f1917", "title": "Cost Effective Conceptual Design for Semantic Annotation", "addresses": [{"address": "Oregon State University", "lat": "45.51982890", "lng": "-122.67797964", "type": "edu"}, {"address": "University of Illinois, Urbana-Champaign", "lat": "40.11116745", "lng": "-88.22587665", "type": "edu"}], "year": "2014", "pdf": "https://pdfs.semanticscholar.org/f406/5d13bcad78b563108075f650c29a2f3f1917.pdf"}, {"id": "58f445fa45d4bdafac43893a55b21348f9e1e6c2", "title": "To Join or Not to Join?: Thinking Twice about Joins before Feature Selection", "addresses": [{"address": "University of Wisconsin Madison", "lat": "43.07982815", "lng": "-89.43066425", "type": "edu"}], "year": 2016, "pdf": "http://cseweb.ucsd.edu/~arunkk/hamlet/OptFSSIGMOD.pdf"}, {"id": "40b2652cf3bdee159dacb6e18c761003c31f4205", "title": "Database Learning: Toward a Database that Becomes Smarter Every Time", "addresses": [{"address": "University of Michigan", "lat": "42.29421420", "lng": "-83.71003894", "type": "edu"}], "year": 2017, "pdf": "http://web.eecs.umich.edu/~mozafari/php/data/uploads/dbl_techreport.pdf"}, {"id": "cb1e34d7fcb7fae914fcb65cb9cf25199d49cec9", "title": "SLAQ: quality-driven scheduling for distributed machine learning", "addresses": [{"address": "Princeton University", "lat": "40.34829285", "lng": "-74.66308325", "type": "edu"}], "year": "2017", "pdf": "https://arxiv.org/pdf/1802.04819.pdf"}, {"id": "8b162c2a15bc7aa56cdc1be9773611bc21536782", "title": "On Automating Basic Data Curation Tasks", "addresses": [{"address": "University of New South Wales", "lat": "-33.91758275", "lng": "151.23124025", "type": "edu"}], "year": 2017, "pdf": "http://pdfs.semanticscholar.org/8b16/2c2a15bc7aa56cdc1be9773611bc21536782.pdf"}, {"id": "22a12ec4258f223b43761e5c4729787d1aaa623b", "title": "Optimal Bloom Filters and Adaptive Merging for LSM-Trees", "addresses": [{"address": "Harvard University", "lat": "42.36782045", "lng": "-71.12666653", "type": "edu"}], "year": "2018", "pdf": "https://pdfs.semanticscholar.org/1dcd/a26904637114f5a035d60cdbc4c4eae1c2d2.pdf"}, {"id": "3027727790598d913a8ff9a1bab4538176ad9fc8", "title": "BlinkML : Approximate Machine Learning with Probabilistic Guarantees", "addresses": [{"address": "University of Michigan", "lat": "42.29421420", "lng": "-83.71003894", "type": "edu"}], "year": "2018", "pdf": "https://pdfs.semanticscholar.org/3027/727790598d913a8ff9a1bab4538176ad9fc8.pdf"}, {"id": "c595863b90b904a7b3197667b62efa16b0fd5ff6", "title": "Are Key-Foreign Key Joins Safe to Avoid when Learning High-Capacity Classifiers?", "addresses": [{"address": "University of California, San Diego", "lat": "32.87935255", "lng": "-117.23110049", "type": "edu"}, {"address": "University of Wisconsin Madison", "lat": "43.07982815", "lng": "-89.43066425", "type": "edu"}], "year": "2017", "pdf": "https://arxiv.org/pdf/1704.00485.pdf"}, {"id": "36d858eb19bba43244b92f7faabfce47b13f2403", "title": "Materialization optimizations for feature selection workloads", "addresses": [{"address": "University of Wisconsin Madison", "lat": "43.07982815", "lng": "-89.43066425", "type": "edu"}, {"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2014, "pdf": "http://doi.acm.org/10.1145/2877204"}, {"id": "9d4ebcd84c4ba2241cca3242e22888558b62a0e0", "title": "Demonstration of Santoku: Optimizing Machine Learning over Normalized Data", "addresses": [{"address": "University of Wisconsin Madison", "lat": "43.07982815", "lng": "-89.43066425", "type": "edu"}], "year": 2015, "pdf": "http://pdfs.semanticscholar.org/9d4e/bcd84c4ba2241cca3242e22888558b62a0e0.pdf"}, {"id": "75f302f1372136c5e43e523bacc0a2ddf04c3237", "title": "Schema Independent Relational Learning", "addresses": [{"address": "Oregon State University", "lat": "45.51982890", "lng": "-122.67797964", "type": "edu"}], "year": "2017", "pdf": "https://arxiv.org/pdf/1508.03846.pdf"}, {"id": "11b45236b2798091ddab35c572a35f447bb8d717", "title": "The Case for Personal Data-Driven Decision Making", "addresses": [{"address": "MIT CSAIL", "lat": "42.36194070", "lng": "-71.09043780", "type": "edu"}], "year": 2014, "pdf": "http://pdfs.semanticscholar.org/4b6c/d79708cc7c1059dcad50b0217f9700ae0228.pdf"}, {"id": "75a54f49fccee29faca8931fa8ba700030dcaa75", "title": "Ringtail: A Generalized Nowcasting System", "addresses": [{"address": "University of Michigan", "lat": "42.29421420", "lng": "-83.71003894", "type": "edu"}], "year": 2013, "pdf": "http://pdfs.semanticscholar.org/75a5/4f49fccee29faca8931fa8ba700030dcaa75.pdf"}, {"id": "c5ee2621e5a0692677890df9a10963293ab14fc2", "title": "Feature Engineering for Knowledge Base Construction", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}, {"address": "University of Wisconsin Madison", "lat": "43.07982815", "lng": "-89.43066425", "type": "edu"}], "year": 2014, "pdf": "http://pdfs.semanticscholar.org/c5ee/2621e5a0692677890df9a10963293ab14fc2.pdf"}, {"id": "48b9f9ddf17bd29b957b09f9000576e53acf8719", "title": "Ringtail: Feature Selection For Easier Nowcasting", "addresses": [{"address": "University of Michigan", "lat": "42.29421420", "lng": "-83.71003894", "type": "edu"}], "year": 2013, "pdf": "http://pdfs.semanticscholar.org/48b9/f9ddf17bd29b957b09f9000576e53acf8719.pdf"}, {"id": "e4444820fb3f6d1f41c6ea51c6b2ab8ceb04a3a5", "title": "View-Driven Deduplication with Active Learning", "addresses": [{"address": "University of Washington", "lat": "47.65432380", "lng": "-122.30800894", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/e444/4820fb3f6d1f41c6ea51c6b2ab8ceb04a3a5.pdf"}, {"id": "0a850a9fc853c358aea1167e1f965cda8980b7fd", "title": "INDREX: in-database distributional relation extraction", "addresses": [{"address": "University of Toronto", "lat": "43.66333345", "lng": "-79.39769975", "type": "edu"}], "year": 2013, "pdf": "http://www.cs.toronto.edu/~periklis/pubs/dolap13.pdf"}, {"id": "0c6fa98b7b99d807df7c027e8e97751f1bbb9140", "title": "Data programming with DDLite: putting humans in a different part of the loop", "addresses": [{"address": "Stanford University", "lat": "37.43131385", "lng": "-122.16936535", "type": "edu"}], "year": 2016, "pdf": "http://cs.stanford.edu/people/chrismre/papers/DDL_HILDA_2016.pdf"}, {"id": "0a267d927cfae039cf0a9c995a59ded563344eb6", "title": "Model Selection Management Systems: The Next Frontier of Advanced Analytics", "addresses": [{"address": "University of Wisconsin Madison", "lat": "43.07982815", "lng": "-89.43066425", "type": "edu"}], "year": 2015, "pdf": "http://pages.cs.wisc.edu/~arun/vision/SIGMODRecord15.pdf"}, {"id": "67296e6cd0084c301339889c4ef1f71a04406b3d", "title": "The Periodic Table of Data Structures", "addresses": [{"address": "Harvard University", "lat": "42.36782045", "lng": "-71.12666653", "type": "edu"}], "year": "2018", "pdf": "https://pdfs.semanticscholar.org/be6a/719f7a90611d67cc66a961ea76f70af00cb1.pdf"}, {"id": "47f84928dd6e40797255fa1e1bbb3c12b2659a7c", "title": "Input selection for fast feature engineering", "addresses": [{"address": "University of Michigan", "lat": "42.29421420", "lng": "-83.71003894", "type": "edu"}], "year": 2016, "pdf": "http://web.eecs.umich.edu/~mrander/pubs/ICDE16_research_459.pdf"}, {"id": "5b600cfabfb3c99085ca949fc432684e7ac86471", "title": "Representation Independent Analytics Over Structured Data", "addresses": [{"address": "Oregon State University", "lat": "45.51982890", "lng": "-122.67797964", "type": "edu"}, {"address": "Northeastern University", "lat": "42.33836680", "lng": "-71.08793524", "type": "edu"}, {"address": "University of Illinois, Urbana-Champaign", "lat": "40.11116745", "lng": "-88.22587665", "type": "edu"}], "year": 2014, "pdf": "http://pdfs.semanticscholar.org/a708/7033a0f649a13c30ae2eb35f670456988636.pdf"}, {"id": "8d7d02bdd3a6dfc01982468ed3eb4e66d99a302f", "title": "Data Curation APIs", "addresses": [{"address": "University of New South Wales", "lat": "-33.91758275", "lng": "151.23124025", "type": "edu"}], "year": 2016, "pdf": "http://pdfs.semanticscholar.org/8d7d/02bdd3a6dfc01982468ed3eb4e66d99a302f.pdf"}, {"id": "12dcb25f10d42ad2b4352ba9fe7a6a32ee2635a6", "title": "The Automatic Scientist will be a Data System", "addresses": [{"address": "Harvard University", "lat": "42.36782045", "lng": "-71.12666653", "type": "edu"}], "year": 2017, "pdf": "http://pdfs.semanticscholar.org/131d/9156cf5307c35b5186e6fa700d3a3d111059.pdf"}, {"id": "d0a188debff9baca296787dfb207f151cb78300a", "title": "Physical Representation-based Predicate Optimization for a Visual Analytics Database", "addresses": [{"address": "University of Michigan", "lat": "42.29421420", "lng": "-83.71003894", "type": "edu"}, {"address": "Toyota Research Institute", "lat": "37.40253645", "lng": "-122.11655107", "type": "edu"}], "year": "2018", "pdf": "https://arxiv.org/pdf/1806.04226.pdf"}, {"id": "577c1d59e43f04a4bfda95b0b9e3b41d893bc0a2", "title": "Faster Evaluation of Labor-Intensive Features", "addresses": [{"address": "University of Michigan", "lat": "42.29421420", "lng": "-83.71003894", "type": "edu"}], "year": "2015", "pdf": "https://pdfs.semanticscholar.org/4711/5441c31029bd5ed6ba139c873d6a5aec2be4.pdf"}, {"id": "cada850299f0aa71ecd9b37a2496802ad8d48455", "title": "Cost-effective conceptual design using taxonomies", "addresses": [{"address": "University of Illinois, Urbana-Champaign", "lat": "40.11116745", "lng": "-88.22587665", "type": "edu"}, {"address": "Oregon State University", "lat": "45.51982890", "lng": "-122.67797964", "type": "edu"}], "year": "2018", "pdf": "https://arxiv.org/pdf/1503.05656.pdf"}]} \ No newline at end of file