diff options
| -rw-r--r-- | scraper/client/actions.js | 5 | ||||
| -rw-r--r-- | scraper/client/app.js | 7 | ||||
| -rw-r--r-- | scraper/client/common/common.css | 1 | ||||
| -rw-r--r-- | scraper/client/common/header.component.js | 21 | ||||
| -rw-r--r-- | scraper/client/common/table.component.js | 15 | ||||
| -rw-r--r-- | scraper/client/paper/index.js | 8 | ||||
| -rw-r--r-- | scraper/client/paper/paper.container.js | 31 | ||||
| -rw-r--r-- | scraper/client/paper/paper.css | 3 | ||||
| -rw-r--r-- | scraper/client/paper/paper.info.js | 57 | ||||
| -rw-r--r-- | scraper/client/store.js | 2 | ||||
| -rw-r--r-- | scraper/s2-final-report.py | 44 | ||||
| -rw-r--r-- | scraper/util.py | 26 |
12 files changed, 181 insertions, 39 deletions
diff --git a/scraper/client/actions.js b/scraper/client/actions.js index f281a34c..ee3511bc 100644 --- a/scraper/client/actions.js +++ b/scraper/client/actions.js @@ -18,6 +18,11 @@ export const getPapers = () => dispatch => { api(dispatch, get, 'papers', '/api/papers', {}) } +export const getPaperInfo = (key) => dispatch => { + api(dispatch, get, 'paperInfo', '/reports/datasets/final/' + key + '.json', {}) + api(dispatch, get, 'unknownCitations', '/reports/datasets/unknown/' + key + '.json', {}) +} + export const postAddress = data => dispatch => { api(dispatch, post, 'address', '/api/address', data) } diff --git a/scraper/client/app.js b/scraper/client/app.js index 81152c0c..5a0e5cc7 100644 --- a/scraper/client/app.js +++ b/scraper/client/app.js @@ -3,9 +3,7 @@ import { ConnectedRouter } from 'connected-react-router' import { Route, Switch } from 'react-router' import { Header, Footer } from './common' -// import * as Metadata from './metadata' -// import * as Search from './search' -// import * as Review from './review' +import * as Paper from './paper' export default class App extends Component { render() { @@ -15,6 +13,9 @@ export default class App extends Component { <Header /> <div className='app'> <div className='body'> + <Switch> + <Route exact path="/paper/:key/" component={Paper.Container} /> + </Switch> </div> </div> <Footer /> diff --git a/scraper/client/common/common.css b/scraper/client/common/common.css index 4b939df0..b014541a 100644 --- a/scraper/client/common/common.css +++ b/scraper/client/common/common.css @@ -65,6 +65,7 @@ header > section { display: flex; flex: 1 0; font-weight: bold; + padding: 10px; } header > section:last-of-type { justify-content: flex-end; diff --git a/scraper/client/common/header.component.js b/scraper/client/common/header.component.js index d82a8db0..5a100e90 100644 --- a/scraper/client/common/header.component.js +++ b/scraper/client/common/header.component.js @@ -1,8 +1,10 @@ import React, { Component } from 'react' -import { NavLink } from 'react-router-dom' +// import { NavLink } from 'react-router-dom' import { bindActionCreators } from 'redux' import { connect } from 'react-redux' +import { history } from '../store' + import * as actions from '../actions' class Header extends Component { @@ -11,19 +13,26 @@ class Header extends Component { this.props.actions.getPapers() } + pickPaper(e) { + console.log(e.target.value) + history.push('/paper/' + e.target.value) + // this.props.actions.getPaperData(e.target.value) + } + render() { - console.log(this.props) let { papers } = this.props.api.papers papers = papers || {} const paperOptions = Object.keys(papers).map(key => ( <option key={key} value={key}>{papers[key][1]}</option> )) - console.log(papers) return ( <header> - <select> - {paperOptions} - </select> + <section> + <select onChange={this.pickPaper.bind(this)}> + {paperOptions} + </select> + </section> + <section></section> </header> ) } diff --git a/scraper/client/common/table.component.js b/scraper/client/common/table.component.js index 76a1d57c..f9be0669 100644 --- a/scraper/client/common/table.component.js +++ b/scraper/client/common/table.component.js @@ -12,9 +12,12 @@ export function TableObject({ tag, object, order, summary }) { if (object.err) { return <div className='tableObject error'>{tag}{' Error: '}{object.err}</div> } - let objects = Object.keys(object) + let keys = Object.keys(object) if (order) { - const grouped = objects.reduce((a, b) => { + const grouped = keys.reduce((a, b) => { + if (summary && !object[b].trim().length) { + return a + } const index = order.indexOf(b) if (index !== -1) { a.order.push([index, b]) @@ -23,23 +26,23 @@ export function TableObject({ tag, object, order, summary }) { } return a }, { order: [], alpha: [] }) - objects = grouped.order + keys = grouped.order .sort((a, b) => a[0] - b[0]) .map(([i, s]) => s) if (!summary) { - objects = objects + keys = keys // .concat([__HR__]) .concat(grouped.alpha.sort()) } } else { - objects = objects.sort() + keys = keys.sort() } return ( <div> {tag && <h3>{tag}</h3>} <table className={'tableObject ' + tag}> <tbody> - {objects.map((key, i) => ( + {keys.map((key, i) => ( <TableRow key={key + '_' + i} name={key} value={object[key]} /> ))} </tbody> diff --git a/scraper/client/paper/index.js b/scraper/client/paper/index.js new file mode 100644 index 00000000..60206179 --- /dev/null +++ b/scraper/client/paper/index.js @@ -0,0 +1,8 @@ +import Container from './paper.container' +import './paper.css' + +// import './search.css' + +export { + Container +} diff --git a/scraper/client/paper/paper.container.js b/scraper/client/paper/paper.container.js new file mode 100644 index 00000000..eeb0dfa3 --- /dev/null +++ b/scraper/client/paper/paper.container.js @@ -0,0 +1,31 @@ +import React, { Component } from 'react' +import { bindActionCreators } from 'redux' +import { connect } from 'react-redux' + +import * as actions from '../actions' + +import { Loader } from '../common' +import PaperInfo from './paper.info' + +class PaperContainer extends Component { + componentDidMount() { + this.props.actions.getPaperInfo(this.props.match.params.key) + } + + render() { + if (this.props.api.paperInfo.loading) return <Loader /> + if (!this.props.api.paperInfo.dataset) return null + return ( + <PaperInfo /> + ) + } +} + +const mapStateToProps = state => ({ + api: state.api, +}) +const mapDispatchToProps = dispatch => ({ + actions: bindActionCreators({ ...actions }, dispatch), +}) + +export default connect(mapStateToProps, mapDispatchToProps)(PaperContainer) diff --git a/scraper/client/paper/paper.css b/scraper/client/paper/paper.css new file mode 100644 index 00000000..c1a775c1 --- /dev/null +++ b/scraper/client/paper/paper.css @@ -0,0 +1,3 @@ +.paperInfo { + padding: 10px; +}
\ No newline at end of file diff --git a/scraper/client/paper/paper.info.js b/scraper/client/paper/paper.info.js new file mode 100644 index 00000000..dab4ce5b --- /dev/null +++ b/scraper/client/paper/paper.info.js @@ -0,0 +1,57 @@ +import React, { Component } from 'react' +import { bindActionCreators } from 'redux' +import { connect } from 'react-redux' + +import * as actions from '../actions' + +import { TableObject } from '../common' + +class PaperInfo extends Component { + render() { + const { paperInfo, unknownCitations } = this.props.api + const { dataset, statistics, address } = paperInfo + return ( + <div className='paperInfo'> + <h2>{dataset.name_full}</h2> + <TableObject summary + tag="Dataset" + object={dataset} + order={['key', 'name_full', 'relevance', 'subset_of', 'superset_of']} + /> + <TableObject summary + tag="Statistics" + object={statistics} + order={['year_published', 'purpose_short', + 'wild', 'indoor', 'outdoor', 'cyberspace', + 'names', 'downloaded', + 'year_published', 'year_start', 'year_end', 'ongoing', 'images', 'videos', + 'faces_unique', 'total_faces', 'img_per_person', 'num_cameras', 'faces_persons', 'female', 'male', + 'landmarks', 'width', 'height', + 'comment', + ]} + /> + <TableObject + tag="Address" + object={address} + order={['address', 'type', 'lat', 'lng']} + /> + <TableObject summary + tag="Citations" + object={{ + 'geocoded': paperInfo.citations.length, + 'unknown': unknownCitations.citations ? unknownCitations.citations.length : 'Loading', + }} + /> + </div> + ) + } +} + +const mapStateToProps = state => ({ + api: state.api +}) +const mapDispatchToProps = dispatch => ({ + actions: bindActionCreators({ ...actions }, dispatch), +}) + +export default connect(mapStateToProps, mapDispatchToProps)(PaperInfo) diff --git a/scraper/client/store.js b/scraper/client/store.js index 9eae8939..3bf64114 100644 --- a/scraper/client/store.js +++ b/scraper/client/store.js @@ -11,6 +11,8 @@ const initialState = () => ({ institutions: {}, papers: {}, address: {}, + paperInfo: {}, + unknownCitations: {}, options: {} }) diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py index 58ac481f..283ca4fc 100644 --- a/scraper/s2-final-report.py +++ b/scraper/s2-final-report.py @@ -11,18 +11,18 @@ from util import * DIR_PUBLIC_CITATIONS = "../site/datasets/final" DIR_UNKNOWN_CITATIONS = "../site/datasets/unknown" +addresses = AddressBook() + @click.command() def s2_final_report(): - addresses = AddressBook() megapixels = load_megapixels_lookup() - ft_lookup = load_ft_lookup() - for key, row in megapixels.items(): - print(key) - ft_share = ft_lookup[key] - if ft_share: - paper_data = process_paper(row, addresses) + items = [] + for key, item in megapixels.items(): + if item['dataset']['ft_share'] == '1': + items.append((item,)) + parallelize(process_paper, items) -def process_paper(row, addresses): +def process_paper(row): aggregate_citations = {} unknown_citations = {} address = None @@ -39,6 +39,8 @@ def process_paper(row, addresses): with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f: json.dump({ 'id': papers[0]['paper_id'], + 'dataset': row['dataset'], + 'statistics': row['statistics'], 'paper': papers[0], 'address': address, 'additional_papers': papers[1:], @@ -47,7 +49,6 @@ def process_paper(row, addresses): with open('{}/{}.json'.format(DIR_UNKNOWN_CITATIONS, row['key']), 'w') as f: json.dump({ 'id': papers[0]['paper_id'], - 'paper': papers[0], 'citations': [unknown_citations[key] for key in unknown_citations.keys()], }, f) @@ -161,33 +162,30 @@ def process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_ } return res -def load_ft_lookup(): - keys, rows = fetch_google_sheet('datasets') - lookup = {} - for row in rows: - rec = {} - for index, key in enumerate(keys): - rec[key] = row[index] - if rec['ft_share'] == '1' or rec['ft_share'] == 1: - lookup[rec['key']] = True - else: - lookup[rec['key']] = False - return lookup - def load_megapixels_lookup(): keys, rows = fetch_google_sheet('citation_lookup') + dataset_lookup = fetch_google_lookup('datasets') + statistics_lookup = fetch_google_lookup('statistics') lookup = {} for row in rows: rec = {} for index, key in enumerate(keys): rec[key] = row[index] - if rec['paper_id'] == "": + if rec['paper_id'] == "" or (rec['verified'] != 1 and rec['verified'] != '1'): continue paper_key = rec['key'] if paper_key not in lookup: rec['paper_ids'] = [] lookup[paper_key] = rec lookup[paper_key]['paper_ids'].append(rec['paper_id']) + if paper_key in dataset_lookup: + lookup[paper_key]['dataset'] = dataset_lookup[paper_key] + else: + print("not in datasets lookup:", paper_key) + if paper_key in statistics_lookup: + lookup[paper_key]['statistics'] = statistics_lookup[paper_key] + else: + print("not in statistics lookup:", paper_key) # recs.append(rec) return lookup diff --git a/scraper/util.py b/scraper/util.py index 9b47510a..6c671cec 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -319,8 +319,12 @@ def file_path(key, paper_id, fn): def parallelize(func, rows): print("Fetching {} items".format(len(rows))) + if hasattr(os, 'sched_getaffinity'): + processCount = len(os.sched_getaffinity(0)) + else: + processCount = 4 chunksize = 3 - with Pool(processes=len(os.sched_getaffinity(0))) as pool: + with Pool(processes=processCount) as pool: pool.starmap(func, rows, chunksize) def fetch_paper(s2, paper_id): @@ -359,3 +363,23 @@ def fetch_google_sheet(name="institutions"): keys = rows[0] lines = rows[1:] return keys, lines + +def fetch_google_sheet_objects(name): + keys, rows = fetch_google_sheet(name) + recs = [] + for row in rows: + rec = {} + for index, key in enumerate(keys): + rec[key] = row[index] + recs.append(rec) + return recs + +def fetch_google_lookup(name, item_key='key'): + keys, rows = fetch_google_sheet(name) + lookup = {} + for row in rows: + rec = {} + for index, key in enumerate(keys): + rec[key] = row[index] + lookup[rec[item_key]] = rec + return lookup |
