summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
authorjules@lens <julescarbon@gmail.com>2019-04-02 20:36:51 +0200
committerjules@lens <julescarbon@gmail.com>2019-04-02 20:36:51 +0200
commit1d238346b5609e9454a4917c75631a550b5b43d1 (patch)
tree8a936e721e78c7b5948b303e6a1686c96b882d51 /scraper
parentb4b58f2279fb01fa0240006c460c0b5ec95c1126 (diff)
parentf58d41731fc07d94d594d5582aef203564f990ec (diff)
Merge branch 'master' of asdf.us:megapixels_dev
Diffstat (limited to 'scraper')
-rw-r--r--scraper/client/actions.js4
-rw-r--r--scraper/client/app.js1
-rw-r--r--scraper/client/common/header.component.js9
-rw-r--r--scraper/client/common/table.component.js3
-rw-r--r--scraper/client/paper/citationList.component.js33
-rw-r--r--scraper/client/paper/index.js2
-rw-r--r--scraper/client/paper/paper.chart.js83
-rw-r--r--scraper/client/paper/paper.citations.js6
-rw-r--r--scraper/client/paper/paper.css21
-rw-r--r--scraper/client/paper/paper.info.js41
-rw-r--r--scraper/client/paper/paper.manager.js35
-rw-r--r--scraper/client/paper/paper.unknown.js41
-rw-r--r--scraper/client/paper/paper.verify.js29
-rw-r--r--scraper/s2-geocode-server.py28
14 files changed, 231 insertions, 105 deletions
diff --git a/scraper/client/actions.js b/scraper/client/actions.js
index b5c477f6..47ca6ff5 100644
--- a/scraper/client/actions.js
+++ b/scraper/client/actions.js
@@ -41,8 +41,8 @@ export const getVerificationsDataset = dataset => dispatch => (
api(dispatch, get, 'verifications', '/api/verifications/' + dataset, {})
)
-export const getVerification = sha256 => dispatch => (
- api(dispatch, get, 'verify', '/api/verify/' + sha256, {})
+export const getVerification = (dataset, sha256) => dispatch => (
+ api(dispatch, get, 'verify', '/api/verify/' + dataset + '/' + sha256, {})
)
export const postVerification = data => dispatch => (
diff --git a/scraper/client/app.js b/scraper/client/app.js
index b449d0d0..366d4098 100644
--- a/scraper/client/app.js
+++ b/scraper/client/app.js
@@ -17,7 +17,6 @@ export default class App extends Component {
<Switch>
<Route exact path="/paper/:key/" component={Paper.Info} />
<Route exact path="/paper/:key/citations/" component={Paper.Citations} />
- <Route exact path="/paper/:key/unknown/" component={Paper.UnknownCitations} />
<Route exact path="/paper/:key/info/" component={Paper.Info} />
<Route exact path="/paper/:key/random/" component={Paper.Random} />
<Route exact path="/paper/:key/address/:sha256" component={Paper.Address} />
diff --git a/scraper/client/common/header.component.js b/scraper/client/common/header.component.js
index 2f084979..c9825aab 100644
--- a/scraper/client/common/header.component.js
+++ b/scraper/client/common/header.component.js
@@ -1,5 +1,5 @@
import React, { Component } from 'react'
-// import { NavLink } from 'react-router-dom'
+import { Link } from 'react-router-dom'
import { bindActionCreators } from 'redux'
import { connect } from 'react-redux'
@@ -34,10 +34,9 @@ class Header extends Component {
{dataset &&
<div>
{dataset.name_full}{' - '}
- <a href={"/paper/" + dataset.key + "/info"}>Info</a>{' - '}
- <a href={"/paper/" + dataset.key + "/citations/"}>Citations</a>{' - '}
- <a href={"/paper/" + dataset.key + "/unknown/"}>Unknown</a>{' - '}
- <a href={"/paper/" + dataset.key + "/random/"}>Random</a>
+ <Link to={"/paper/" + dataset.key + "/info"}>Info</Link>{' - '}
+ <Link to={"/paper/" + dataset.key + "/citations/"}>Citations</Link>{' - '}
+ <Link to={"/paper/" + dataset.key + "/random/"}>Random</Link>
</div>
}
</section>
diff --git a/scraper/client/common/table.component.js b/scraper/client/common/table.component.js
index 96b62835..121c9841 100644
--- a/scraper/client/common/table.component.js
+++ b/scraper/client/common/table.component.js
@@ -118,6 +118,9 @@ export function TableCell({ value }) {
value = <TableObject nested tag={''} object={value} />
}
}
+ if (value && typeof value === 'string' && value.indexOf('http') === 0) {
+ value = <a href={value} target="_blank" rel="nofollower noopener">{value}</a>
+ }
return (
<td>{value}</td>
)
diff --git a/scraper/client/paper/citationList.component.js b/scraper/client/paper/citationList.component.js
index be8528bf..002c0dcb 100644
--- a/scraper/client/paper/citationList.component.js
+++ b/scraper/client/paper/citationList.component.js
@@ -9,38 +9,31 @@ import { TableObject, Loader } from '../common'
import { USES_DATASET } from '../types'
class CitationList extends Component {
- componentDidMount() {
- const { citations, api } = this.props
- const { paperInfo, unknownCitations, verifications } = api
- const { dataset } = paperInfo
- if (!dataset || !citations || !verifications[dataset.key]) {
- this.props.actions.setSortedCitations([])
- return
- }
- let verifiedLookup = verifications[dataset.key] || {}
- const sortedCitations = citations.map(citation => [
- citation.title,
- verifiedLookup[citation.id] ? verifiedLookup[citation.id].uses_dataset : USES_DATASET.NO_DATA,
- citation.pdf.length,
- citation
- ])
- .sort((a,b) => (b[1] - a[1] || b[2] - a[2] || (a[0].localeCompare(b[0]))))
- .map(tup => tup[3])
- this.props.actions.setSortedCitations(sortedCitations)
+ state = {
+ filter: USES_DATASET.YES,
}
render() {
const { citations, title, api } = this.props
const { paperInfo, unknownCitations, verifications, sortedCitations } = api
const { dataset } = paperInfo
+ const { filter } = this.state
if (!dataset || !citations || !verifications[dataset.key]) return <Loader />
let verifiedLookup = verifications[dataset.key] || {}
- // console.log(verifications)
+ let filteredCitations = sortedCitations.filter(citation => (
+ citation.verified === filter
+ ))
return (
<div className='citations'>
<h2>{title}</h2>
+ <div className='filter_buttons'>
+ <span className='verified' onClick={() => this.setState({ filter: USES_DATASET.YES })}>uses dataset</span>
+ <span className='unverified' onClick={() => this.setState({ filter: USES_DATASET.NO })}>{"doesn't use dataset"}</span>
+ <span className='not_enough_info' onClick={() => this.setState({ filter: USES_DATASET.UNKNOWN })}>{'not enough information'}</span>
+ <span className='unknown' onClick={() => this.setState({ filter: USES_DATASET.NO_DATA })}>{'unverified'}</span>
+ </div>
<ul>
- {(sortedCitations || []).map((citation, i) => {
+ {(filteredCitations || []).map((citation, i) => {
let cite = { ...citation }
cite.id = {
_raw: true,
diff --git a/scraper/client/paper/index.js b/scraper/client/paper/index.js
index 99672684..9346234a 100644
--- a/scraper/client/paper/index.js
+++ b/scraper/client/paper/index.js
@@ -1,7 +1,6 @@
import Manager from './paper.manager'
import Info from './paper.info'
import Citations from './paper.citations'
-import UnknownCitations from './paper.unknown'
import Random from './paper.random'
import Address from './paper.address'
import Verify from './paper.verify'
@@ -13,7 +12,6 @@ export {
Manager,
Info,
Citations,
- UnknownCitations,
Random,
Address,
Verify,
diff --git a/scraper/client/paper/paper.chart.js b/scraper/client/paper/paper.chart.js
new file mode 100644
index 00000000..01d8d0e8
--- /dev/null
+++ b/scraper/client/paper/paper.chart.js
@@ -0,0 +1,83 @@
+import React, { Component } from 'react'
+import { bindActionCreators } from 'redux'
+import { connect } from 'react-redux'
+import { toArray, toTuples } from '../util'
+import C3Chart from 'react-c3js'
+import 'c3/c3.css'
+
+class PaperChart extends Component {
+ render() {
+ const { rows, title } = this.props
+ if (!rows.length) return null
+ const colorPattern = [
+ "#00b200",
+ "#ff0000",
+ "#e0c200",
+ "#dddddd",
+ ]
+
+ return (
+ <div className='chart'>
+ <div>
+ <C3Chart
+ data={{
+ columns: rows,
+ type: 'pie',
+ }}
+ color={{
+ pattern: colorPattern,
+ }}
+ tooltip={{
+ format: {
+ value: value => value,
+ }
+ }}
+ size={{
+ height: rows.length < 4 ? 316 : 336,
+ }}
+ />
+ <span className='chartCaption'>{title}</span>
+ </div>
+ </div>
+ )
+ }
+}
+
+/*
+ legend={{
+ position: 'right'
+ }}
+ tooltip={{
+ contents: function (d, defaultTitleFormat, defaultValueFormat, color) {
+ const countriesByYearLookup = years[yearList[d[0].x]]
+ let countriesByYear = Object.keys(countriesByYearLookup).map(country => [country, countriesByYearLookup[country]]).sort((a,b) => b[1] - a[1])
+ let topCountriesForThisYear = countriesByYear.slice(0, topCountryCount)
+ let bottomTotal = countriesByYear.slice(topCountryCount).reduce((a,b) => (a + b[1]), 0)
+ // console.log(topCountriesForThisYear)
+ topCountriesForThisYear.push([otherCountriesLabel, bottomTotal])
+ const tableRows = topCountriesForThisYear.filter(pair => !!pair[1]).map(([country, total]) => {
+ let colorIndex = topCountries.indexOf(country)
+ if (colorIndex < 0) colorIndex = colorPattern.length - 1
+ const color = colorPattern[ colorIndex ]
+ return [
+ "<tr>",
+ "<td>",
+ "<span style='background-color:" + color + "' class='swatch'></span>",
+ country,
+ "</td>",
+ "<td>",
+ total,
+ "</td>",
+ "</tr>",
+ ].join('')
+ })
+ return [
+ "<table class='c3-tooltip'>",
+ ...tableRows,
+ "</table>",
+ ].join('')
+ }
+ }}
+*/
+
+export default PaperChart
diff --git a/scraper/client/paper/paper.citations.js b/scraper/client/paper/paper.citations.js
index f0e9ea26..c3a9cc61 100644
--- a/scraper/client/paper/paper.citations.js
+++ b/scraper/client/paper/paper.citations.js
@@ -11,12 +11,6 @@ import { USES_DATASET } from '../types'
import CitationList from './citationList.component'
class PaperCitations extends Component {
- componentDidUpdate(prevProps) {
- if (this.props.api.paperInfo.dataset !== prevProps.api.paperInfo.dataset) {
- this.props.actions.getVerificationsDataset(this.props.api.paperInfo.dataset.key)
- }
- }
-
render() {
const { paperInfo, unknownCitations, verifications } = this.props.api
const { dataset, citations } = paperInfo
diff --git a/scraper/client/paper/paper.css b/scraper/client/paper/paper.css
index 21df2df1..914077b6 100644
--- a/scraper/client/paper/paper.css
+++ b/scraper/client/paper/paper.css
@@ -12,6 +12,20 @@
width: 100%;
}
+.filter_buttons {
+ margin-left: 5px;
+ margin-bottom: 10px;
+}
+.filter_buttons span {
+ margin-right: 10px;
+ cursor: pointer;
+ opacity: 0.8;
+ transition: opacity 0.2;
+}
+.filter_buttons span:hover {
+ opacity: 1;
+}
+
.citations {
padding:40px;
}
@@ -82,6 +96,13 @@
padding:4px;
font-size:12px;
}
+.chartCaption {
+ display: block;
+ width: 100%;
+ font-size: 12px;
+ color: #333;
+ text-align: center;
+}
.param {
display: flex;
diff --git a/scraper/client/paper/paper.info.js b/scraper/client/paper/paper.info.js
index 35234617..25f4472f 100644
--- a/scraper/client/paper/paper.info.js
+++ b/scraper/client/paper/paper.info.js
@@ -3,21 +3,49 @@ import { bindActionCreators } from 'redux'
import { connect } from 'react-redux'
import * as actions from '../actions'
-
import { TableObject } from '../common'
+import { USES_DATASET } from '../types'
+
+import PaperChart from './paper.chart'
class PaperInfo extends Component {
render() {
- const { paperInfo, unknownCitations } = this.props.api
- const { dataset, address } = paperInfo
+ const { paperInfo, sortedCitations, unknownCitations } = this.props.api
+ const { dataset, paper, address } = paperInfo
if (!dataset) return null
+
+ let counts = {}
+ const citationLabels = ['Uses Dataset', 'Doesn\'t Use Dataset', 'Not Enough Information', 'Unknown']
+ const citationCountOrder = [ USES_DATASET.YES, USES_DATASET.NO, USES_DATASET.UNKNOWN, USES_DATASET.NO_DATA ]
+ citationCountOrder.forEach(v => counts[v] = 0)
+
+ sortedCitations.forEach(c => counts[c.verified] += 1)
+
+ let citationCounts = {}
+ let citationRows = []
+ citationCountOrder.forEach((v, i) => {
+ const count = counts[v]
+ const label = citationLabels[i]
+ citationCounts[label] = count
+ citationRows.push([ label, count ])
+ })
+
return (
<div className='paperInfo'>
<h2>{dataset.name_full}</h2>
+ <PaperChart
+ rows={citationRows}
+ title={'Dataset coverage'}
+ />
<TableObject summary
tag="Dataset"
object={dataset}
- order={['key', 'name_full', 'relevance', 'subset_of', 'superset_of']}
+ order={'key name_full purpose comment created_by funded_by funded_by_short license'.split(' ')}
+ />
+ <TableObject summary
+ tag="Paper"
+ object={paper}
+ order={"paper_id title year doi address".split(" ")}
/>
<TableObject summary
tag="Statistics"
@@ -38,10 +66,7 @@ class PaperInfo extends Component {
/>
<TableObject summary
tag="Citations"
- object={{
- 'geocoded': paperInfo.citations.length,
- 'unknown': unknownCitations.citations ? unknownCitations.citations.length : 'Loading',
- }}
+ object={citationCounts}
/>
</div>
)
diff --git a/scraper/client/paper/paper.manager.js b/scraper/client/paper/paper.manager.js
index 2ac03b01..8b25b1cc 100644
--- a/scraper/client/paper/paper.manager.js
+++ b/scraper/client/paper/paper.manager.js
@@ -3,6 +3,7 @@ import { bindActionCreators } from 'redux'
import { connect } from 'react-redux'
import * as actions from '../actions'
+import { USES_DATASET } from '../types'
import { Loader } from '../common'
@@ -15,6 +16,40 @@ class PaperManager extends Component {
if (this.props.match.params.key !== oldProps.match.params.key) {
this.props.actions.getPaperInfo(this.props.match.params.key)
}
+ console.log('whoms?')
+ if (this.props.api.paperInfo.dataset !== oldProps.api.paperInfo.dataset && this.props.api.paperInfo.dataset && this.props.api.paperInfo.dataset.key) {
+ console.log('vert?')
+ this.props.actions.getVerificationsDataset(this.props.api.paperInfo.dataset.key)
+ }
+ if (this.props.api.verifications !== oldProps.api.verifications && this.props.api.paperInfo.dataset) {
+ console.log('updated?')
+ this.updateSortedCitations()
+ }
+ }
+
+ updateSortedCitations() {
+ const { api } = this.props
+ const { paperInfo, unknownCitations, verifications } = api
+ const { dataset } = paperInfo
+ if (!dataset || !paperInfo.citations || !unknownCitations.citations || !verifications[dataset.key]) {
+ this.props.actions.setSortedCitations([])
+ return
+ }
+ const citations = paperInfo.citations.concat(unknownCitations.citations)
+ let verifiedLookup = verifications[dataset.key] || {}
+ const sortedCitations = citations.map(citation => [
+ citation.title,
+ verifiedLookup[citation.id] ? verifiedLookup[citation.id].uses_dataset : USES_DATASET.NO_DATA,
+ citation.pdf.length,
+ citation
+ ])
+ .sort((a,b) => (b[1] - a[1] || b[2] - a[2] || (a[0].localeCompare(b[0]))))
+ .map(tup => ({
+ ...tup[3],
+ verified: tup[1],
+ }))
+ console.log('updated')
+ this.props.actions.setSortedCitations(sortedCitations)
}
render() {
diff --git a/scraper/client/paper/paper.unknown.js b/scraper/client/paper/paper.unknown.js
deleted file mode 100644
index 876ac144..00000000
--- a/scraper/client/paper/paper.unknown.js
+++ /dev/null
@@ -1,41 +0,0 @@
-import React, { Component } from 'react'
-import { bindActionCreators } from 'redux'
-import { connect } from 'react-redux'
-import { Link } from 'react-router-dom'
-
-import * as actions from '../actions'
-
-import { Loader } from '../common'
-import { USES_DATASET } from '../types'
-
-import CitationList from './citationList.component'
-
-class PaperUnknown extends Component {
- componentDidUpdate(prevProps) {
- if (this.props.api.paperInfo.dataset !== prevProps.api.paperInfo.dataset) {
- this.props.actions.getVerificationsDataset(this.props.api.paperInfo.dataset.key)
- }
- }
-
- render() {
- const { paperInfo, unknownCitations, verifications } = this.props.api
- const { dataset, citations } = paperInfo
- if (!dataset || !citations || !verifications[dataset.key]) return <Loader />
-
- return (
- <CitationList
- title={dataset.name_full + ': Unknown Citations'}
- citations={unknownCitations.citations}
- />
- )
- }
-}
-
-const mapStateToProps = state => ({
- api: state.api
-})
-const mapDispatchToProps = dispatch => ({
- actions: bindActionCreators({ ...actions }, dispatch),
-})
-
-export default connect(mapStateToProps, mapDispatchToProps)(PaperUnknown)
diff --git a/scraper/client/paper/paper.verify.js b/scraper/client/paper/paper.verify.js
index 83eb5b44..813abb6d 100644
--- a/scraper/client/paper/paper.verify.js
+++ b/scraper/client/paper/paper.verify.js
@@ -36,7 +36,9 @@ class PaperVerify extends Component {
const { sha256 } = this.props.match.params
this.props.actions.getInstitutions()
this.props.actions.getAddress(sha256)
- this.props.actions.getVerification(sha256)
+ if (this.props.api.paperInfo.dataset) {
+ this.props.actions.getVerification(this.props.api.paperInfo.dataset.key, sha256)
+ }
const citationState = this.getCitationState(sha256)
// console.log('DID MOUNT')
this.setState(citationState)
@@ -51,15 +53,20 @@ class PaperVerify extends Component {
const paper = verify ? verify.paper : null
let newState = {}
+ if (this.props.api.paperInfo.dataset !== oldProps.api.paperInfo.dataset) {
+ this.props.actions.getVerification(this.props.api.paperInfo.dataset.key, sha256)
+ }
+
if (oldSha256 && sha256 !== oldSha256) {
- console.log('update verification')
+ // console.log('update verification')
this.props.actions.getAddress(sha256)
- this.props.actions.getVerification(sha256)
+ this.props.actions.getVerification(this.props.api.paperInfo.dataset.key, sha256)
const citationState = this.getCitationState(sha256)
newState = {
...initialState,
...citationState,
...address.paper,
+ pdfIndex: citationState.citation.pdf.findIndex(el => el.match(/^https:/)),
}
this.setState(newState)
} else if (verify && !verify.loading && verify.paper && (!oldPaper || oldPaper !== verify.paper)) {
@@ -70,6 +77,7 @@ class PaperVerify extends Component {
...initialState,
...citationState,
...address.paper,
+ pdfIndex: citationState.citation.pdf.findIndex(el => el.match(/^https:/)),
}
this.setState(newState)
} else {
@@ -79,6 +87,7 @@ class PaperVerify extends Component {
newState = {
...citationState,
...address.paper,
+ pdfIndex: citationState.citation.pdf.findIndex(el => el.match(/^https:/)),
uses_dataset: paper.uses_dataset,
images_in_paper: paper.images_in_paper,
verified_by: paper.verified_by,
@@ -156,7 +165,7 @@ class PaperVerify extends Component {
if (citationIndex < 0) {
history.push('/paper/' + key + '/info/')
} else {
- let nextId = citations[citationIndex].id
+ let nextId = sortedCitations[citationIndex].id
history.push('/paper/' + key + '/verify/' + nextId)
}
}
@@ -167,15 +176,16 @@ class PaperVerify extends Component {
const { paperInfo, sortedCitations } = this.props.api
const citations = sortedCitations || paperInfo.citations || []
let citationIndex = citations.findIndex(f => f.id === this.state.citation.id)
-
+ console.log(sortedCitations)
+ console.log('going to next', key, citationIndex)
if (citationIndex === -1) {
history.push('/paper/' + key + '/info/')
} else {
citationIndex += 1
- if (citationIndex >= citations.length) {
+ if (citationIndex >= sortedCitations.length) {
history.push('/paper/' + key + '/info/')
} else {
- let nextId = citations[citationIndex].id
+ let nextId = sortedCitations[citationIndex].id
history.push('/paper/' + key + '/verify/' + nextId)
}
}
@@ -414,7 +424,10 @@ class PaperVerify extends Component {
>{'Next >'}</button>
</div>
- <iframe className='pdfViewer' referrerPolicy='no-referrer' src={citation.pdf[this.state.pdf_index]} />
+ {citation.pdf.length
+ ? <iframe className='pdfViewer' referrerPolicy='no-referrer' src={citation.pdf[this.state.pdf_index] || "about:blank"} />
+ : ''
+ }
</div>
)
}
diff --git a/scraper/s2-geocode-server.py b/scraper/s2-geocode-server.py
index 3aeda881..ad3efd5d 100644
--- a/scraper/s2-geocode-server.py
+++ b/scraper/s2-geocode-server.py
@@ -106,28 +106,32 @@ def list_dataset_verifications(dataset):
dataset: verifications,
})
-@app.route('/api/verify/<sha256>', methods=['GET'])
-def find_verification(sha256):
+@app.route('/api/verify/<dataset>/<sha256>', methods=['GET'])
+def find_verification(dataset, sha256):
worksheet = fetch_worksheet('verifications')
+ keys = worksheet.row_values(1)
try:
- cell = worksheet.find(sha256)
+ cells = worksheet.findall(sha256)
except:
return jsonify({
'error': 'no_match'
})
- if cell and cell.row:
- keys = worksheet.row_values(1)
+ if not len(cells):
+ return jsonify({
+ 'error': 'no_match'
+ })
+ for cell in cells:
values_list = worksheet.row_values(cell.row)
lookup = {}
for key, value in zip(keys, values_list):
lookup[key] = value
- return jsonify({
- 'paper': lookup,
- })
- else:
- return jsonify({
- 'error': 'no_match'
- })
+ if lookup['dataset'] == dataset:
+ return jsonify({
+ 'paper': lookup,
+ })
+ return jsonify({
+ 'error': 'no_match'
+ })
@app.route('/api/verify/add', methods=['POST'])
def add_verification():