summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2019-02-13 16:46:10 +0100
committerJules Laplace <julescarbon@gmail.com>2019-02-13 16:46:10 +0100
commitd0dc5cd83f1c436185d247600c3c5be9360bf1ca (patch)
tree92db65b2a525b6512fd7f5349da561c476fe997e
parent1563d1da307a78ddc388483fd95a68a511e18048 (diff)
displaying more info about the papers
-rw-r--r--scraper/client/actions.js5
-rw-r--r--scraper/client/app.js7
-rw-r--r--scraper/client/common/common.css1
-rw-r--r--scraper/client/common/header.component.js21
-rw-r--r--scraper/client/common/table.component.js15
-rw-r--r--scraper/client/paper/index.js8
-rw-r--r--scraper/client/paper/paper.container.js31
-rw-r--r--scraper/client/paper/paper.css3
-rw-r--r--scraper/client/paper/paper.info.js57
-rw-r--r--scraper/client/store.js2
-rw-r--r--scraper/s2-final-report.py44
-rw-r--r--scraper/util.py26
12 files changed, 181 insertions, 39 deletions
diff --git a/scraper/client/actions.js b/scraper/client/actions.js
index f281a34c..ee3511bc 100644
--- a/scraper/client/actions.js
+++ b/scraper/client/actions.js
@@ -18,6 +18,11 @@ export const getPapers = () => dispatch => {
api(dispatch, get, 'papers', '/api/papers', {})
}
+export const getPaperInfo = (key) => dispatch => {
+ api(dispatch, get, 'paperInfo', '/reports/datasets/final/' + key + '.json', {})
+ api(dispatch, get, 'unknownCitations', '/reports/datasets/unknown/' + key + '.json', {})
+}
+
export const postAddress = data => dispatch => {
api(dispatch, post, 'address', '/api/address', data)
}
diff --git a/scraper/client/app.js b/scraper/client/app.js
index 81152c0c..5a0e5cc7 100644
--- a/scraper/client/app.js
+++ b/scraper/client/app.js
@@ -3,9 +3,7 @@ import { ConnectedRouter } from 'connected-react-router'
import { Route, Switch } from 'react-router'
import { Header, Footer } from './common'
-// import * as Metadata from './metadata'
-// import * as Search from './search'
-// import * as Review from './review'
+import * as Paper from './paper'
export default class App extends Component {
render() {
@@ -15,6 +13,9 @@ export default class App extends Component {
<Header />
<div className='app'>
<div className='body'>
+ <Switch>
+ <Route exact path="/paper/:key/" component={Paper.Container} />
+ </Switch>
</div>
</div>
<Footer />
diff --git a/scraper/client/common/common.css b/scraper/client/common/common.css
index 4b939df0..b014541a 100644
--- a/scraper/client/common/common.css
+++ b/scraper/client/common/common.css
@@ -65,6 +65,7 @@ header > section {
display: flex;
flex: 1 0;
font-weight: bold;
+ padding: 10px;
}
header > section:last-of-type {
justify-content: flex-end;
diff --git a/scraper/client/common/header.component.js b/scraper/client/common/header.component.js
index d82a8db0..5a100e90 100644
--- a/scraper/client/common/header.component.js
+++ b/scraper/client/common/header.component.js
@@ -1,8 +1,10 @@
import React, { Component } from 'react'
-import { NavLink } from 'react-router-dom'
+// import { NavLink } from 'react-router-dom'
import { bindActionCreators } from 'redux'
import { connect } from 'react-redux'
+import { history } from '../store'
+
import * as actions from '../actions'
class Header extends Component {
@@ -11,19 +13,26 @@ class Header extends Component {
this.props.actions.getPapers()
}
+ pickPaper(e) {
+ console.log(e.target.value)
+ history.push('/paper/' + e.target.value)
+ // this.props.actions.getPaperData(e.target.value)
+ }
+
render() {
- console.log(this.props)
let { papers } = this.props.api.papers
papers = papers || {}
const paperOptions = Object.keys(papers).map(key => (
<option key={key} value={key}>{papers[key][1]}</option>
))
- console.log(papers)
return (
<header>
- <select>
- {paperOptions}
- </select>
+ <section>
+ <select onChange={this.pickPaper.bind(this)}>
+ {paperOptions}
+ </select>
+ </section>
+ <section></section>
</header>
)
}
diff --git a/scraper/client/common/table.component.js b/scraper/client/common/table.component.js
index 76a1d57c..f9be0669 100644
--- a/scraper/client/common/table.component.js
+++ b/scraper/client/common/table.component.js
@@ -12,9 +12,12 @@ export function TableObject({ tag, object, order, summary }) {
if (object.err) {
return <div className='tableObject error'>{tag}{' Error: '}{object.err}</div>
}
- let objects = Object.keys(object)
+ let keys = Object.keys(object)
if (order) {
- const grouped = objects.reduce((a, b) => {
+ const grouped = keys.reduce((a, b) => {
+ if (summary && !object[b].trim().length) {
+ return a
+ }
const index = order.indexOf(b)
if (index !== -1) {
a.order.push([index, b])
@@ -23,23 +26,23 @@ export function TableObject({ tag, object, order, summary }) {
}
return a
}, { order: [], alpha: [] })
- objects = grouped.order
+ keys = grouped.order
.sort((a, b) => a[0] - b[0])
.map(([i, s]) => s)
if (!summary) {
- objects = objects
+ keys = keys
// .concat([__HR__])
.concat(grouped.alpha.sort())
}
} else {
- objects = objects.sort()
+ keys = keys.sort()
}
return (
<div>
{tag && <h3>{tag}</h3>}
<table className={'tableObject ' + tag}>
<tbody>
- {objects.map((key, i) => (
+ {keys.map((key, i) => (
<TableRow key={key + '_' + i} name={key} value={object[key]} />
))}
</tbody>
diff --git a/scraper/client/paper/index.js b/scraper/client/paper/index.js
new file mode 100644
index 00000000..60206179
--- /dev/null
+++ b/scraper/client/paper/index.js
@@ -0,0 +1,8 @@
+import Container from './paper.container'
+import './paper.css'
+
+// import './search.css'
+
+export {
+ Container
+}
diff --git a/scraper/client/paper/paper.container.js b/scraper/client/paper/paper.container.js
new file mode 100644
index 00000000..eeb0dfa3
--- /dev/null
+++ b/scraper/client/paper/paper.container.js
@@ -0,0 +1,31 @@
+import React, { Component } from 'react'
+import { bindActionCreators } from 'redux'
+import { connect } from 'react-redux'
+
+import * as actions from '../actions'
+
+import { Loader } from '../common'
+import PaperInfo from './paper.info'
+
+class PaperContainer extends Component {
+ componentDidMount() {
+ this.props.actions.getPaperInfo(this.props.match.params.key)
+ }
+
+ render() {
+ if (this.props.api.paperInfo.loading) return <Loader />
+ if (!this.props.api.paperInfo.dataset) return null
+ return (
+ <PaperInfo />
+ )
+ }
+}
+
+const mapStateToProps = state => ({
+ api: state.api,
+})
+const mapDispatchToProps = dispatch => ({
+ actions: bindActionCreators({ ...actions }, dispatch),
+})
+
+export default connect(mapStateToProps, mapDispatchToProps)(PaperContainer)
diff --git a/scraper/client/paper/paper.css b/scraper/client/paper/paper.css
new file mode 100644
index 00000000..c1a775c1
--- /dev/null
+++ b/scraper/client/paper/paper.css
@@ -0,0 +1,3 @@
+.paperInfo {
+ padding: 10px;
+} \ No newline at end of file
diff --git a/scraper/client/paper/paper.info.js b/scraper/client/paper/paper.info.js
new file mode 100644
index 00000000..dab4ce5b
--- /dev/null
+++ b/scraper/client/paper/paper.info.js
@@ -0,0 +1,57 @@
+import React, { Component } from 'react'
+import { bindActionCreators } from 'redux'
+import { connect } from 'react-redux'
+
+import * as actions from '../actions'
+
+import { TableObject } from '../common'
+
+class PaperInfo extends Component {
+ render() {
+ const { paperInfo, unknownCitations } = this.props.api
+ const { dataset, statistics, address } = paperInfo
+ return (
+ <div className='paperInfo'>
+ <h2>{dataset.name_full}</h2>
+ <TableObject summary
+ tag="Dataset"
+ object={dataset}
+ order={['key', 'name_full', 'relevance', 'subset_of', 'superset_of']}
+ />
+ <TableObject summary
+ tag="Statistics"
+ object={statistics}
+ order={['year_published', 'purpose_short',
+ 'wild', 'indoor', 'outdoor', 'cyberspace',
+ 'names', 'downloaded',
+ 'year_published', 'year_start', 'year_end', 'ongoing', 'images', 'videos',
+ 'faces_unique', 'total_faces', 'img_per_person', 'num_cameras', 'faces_persons', 'female', 'male',
+ 'landmarks', 'width', 'height',
+ 'comment',
+ ]}
+ />
+ <TableObject
+ tag="Address"
+ object={address}
+ order={['address', 'type', 'lat', 'lng']}
+ />
+ <TableObject summary
+ tag="Citations"
+ object={{
+ 'geocoded': paperInfo.citations.length,
+ 'unknown': unknownCitations.citations ? unknownCitations.citations.length : 'Loading',
+ }}
+ />
+ </div>
+ )
+ }
+}
+
+const mapStateToProps = state => ({
+ api: state.api
+})
+const mapDispatchToProps = dispatch => ({
+ actions: bindActionCreators({ ...actions }, dispatch),
+})
+
+export default connect(mapStateToProps, mapDispatchToProps)(PaperInfo)
diff --git a/scraper/client/store.js b/scraper/client/store.js
index 9eae8939..3bf64114 100644
--- a/scraper/client/store.js
+++ b/scraper/client/store.js
@@ -11,6 +11,8 @@ const initialState = () => ({
institutions: {},
papers: {},
address: {},
+ paperInfo: {},
+ unknownCitations: {},
options: {}
})
diff --git a/scraper/s2-final-report.py b/scraper/s2-final-report.py
index 58ac481f..283ca4fc 100644
--- a/scraper/s2-final-report.py
+++ b/scraper/s2-final-report.py
@@ -11,18 +11,18 @@ from util import *
DIR_PUBLIC_CITATIONS = "../site/datasets/final"
DIR_UNKNOWN_CITATIONS = "../site/datasets/unknown"
+addresses = AddressBook()
+
@click.command()
def s2_final_report():
- addresses = AddressBook()
megapixels = load_megapixels_lookup()
- ft_lookup = load_ft_lookup()
- for key, row in megapixels.items():
- print(key)
- ft_share = ft_lookup[key]
- if ft_share:
- paper_data = process_paper(row, addresses)
+ items = []
+ for key, item in megapixels.items():
+ if item['dataset']['ft_share'] == '1':
+ items.append((item,))
+ parallelize(process_paper, items)
-def process_paper(row, addresses):
+def process_paper(row):
aggregate_citations = {}
unknown_citations = {}
address = None
@@ -39,6 +39,8 @@ def process_paper(row, addresses):
with open('{}/{}.json'.format(DIR_PUBLIC_CITATIONS, row['key']), 'w') as f:
json.dump({
'id': papers[0]['paper_id'],
+ 'dataset': row['dataset'],
+ 'statistics': row['statistics'],
'paper': papers[0],
'address': address,
'additional_papers': papers[1:],
@@ -47,7 +49,6 @@ def process_paper(row, addresses):
with open('{}/{}.json'.format(DIR_UNKNOWN_CITATIONS, row['key']), 'w') as f:
json.dump({
'id': papers[0]['paper_id'],
- 'paper': papers[0],
'citations': [unknown_citations[key] for key in unknown_citations.keys()],
}, f)
@@ -161,33 +162,30 @@ def process_single_paper(row, paper_id, addresses, aggregate_citations, unknown_
}
return res
-def load_ft_lookup():
- keys, rows = fetch_google_sheet('datasets')
- lookup = {}
- for row in rows:
- rec = {}
- for index, key in enumerate(keys):
- rec[key] = row[index]
- if rec['ft_share'] == '1' or rec['ft_share'] == 1:
- lookup[rec['key']] = True
- else:
- lookup[rec['key']] = False
- return lookup
-
def load_megapixels_lookup():
keys, rows = fetch_google_sheet('citation_lookup')
+ dataset_lookup = fetch_google_lookup('datasets')
+ statistics_lookup = fetch_google_lookup('statistics')
lookup = {}
for row in rows:
rec = {}
for index, key in enumerate(keys):
rec[key] = row[index]
- if rec['paper_id'] == "":
+ if rec['paper_id'] == "" or (rec['verified'] != 1 and rec['verified'] != '1'):
continue
paper_key = rec['key']
if paper_key not in lookup:
rec['paper_ids'] = []
lookup[paper_key] = rec
lookup[paper_key]['paper_ids'].append(rec['paper_id'])
+ if paper_key in dataset_lookup:
+ lookup[paper_key]['dataset'] = dataset_lookup[paper_key]
+ else:
+ print("not in datasets lookup:", paper_key)
+ if paper_key in statistics_lookup:
+ lookup[paper_key]['statistics'] = statistics_lookup[paper_key]
+ else:
+ print("not in statistics lookup:", paper_key)
# recs.append(rec)
return lookup
diff --git a/scraper/util.py b/scraper/util.py
index 9b47510a..6c671cec 100644
--- a/scraper/util.py
+++ b/scraper/util.py
@@ -319,8 +319,12 @@ def file_path(key, paper_id, fn):
def parallelize(func, rows):
print("Fetching {} items".format(len(rows)))
+ if hasattr(os, 'sched_getaffinity'):
+ processCount = len(os.sched_getaffinity(0))
+ else:
+ processCount = 4
chunksize = 3
- with Pool(processes=len(os.sched_getaffinity(0))) as pool:
+ with Pool(processes=processCount) as pool:
pool.starmap(func, rows, chunksize)
def fetch_paper(s2, paper_id):
@@ -359,3 +363,23 @@ def fetch_google_sheet(name="institutions"):
keys = rows[0]
lines = rows[1:]
return keys, lines
+
+def fetch_google_sheet_objects(name):
+ keys, rows = fetch_google_sheet(name)
+ recs = []
+ for row in rows:
+ rec = {}
+ for index, key in enumerate(keys):
+ rec[key] = row[index]
+ recs.append(rec)
+ return recs
+
+def fetch_google_lookup(name, item_key='key'):
+ keys, rows = fetch_google_sheet(name)
+ lookup = {}
+ for row in rows:
+ rec = {}
+ for index, key in enumerate(keys):
+ rec[key] = row[index]
+ lookup[rec[item_key]] = rec
+ return lookup