diff options
Diffstat (limited to 'scraper')
| -rw-r--r-- | scraper/client/actions.js | 4 | ||||
| -rw-r--r-- | scraper/client/app.js | 1 | ||||
| -rw-r--r-- | scraper/client/common/header.component.js | 9 | ||||
| -rw-r--r-- | scraper/client/common/table.component.js | 3 | ||||
| -rw-r--r-- | scraper/client/paper/citationList.component.js | 33 | ||||
| -rw-r--r-- | scraper/client/paper/index.js | 2 | ||||
| -rw-r--r-- | scraper/client/paper/paper.chart.js | 83 | ||||
| -rw-r--r-- | scraper/client/paper/paper.citations.js | 6 | ||||
| -rw-r--r-- | scraper/client/paper/paper.css | 21 | ||||
| -rw-r--r-- | scraper/client/paper/paper.info.js | 41 | ||||
| -rw-r--r-- | scraper/client/paper/paper.manager.js | 35 | ||||
| -rw-r--r-- | scraper/client/paper/paper.unknown.js | 41 | ||||
| -rw-r--r-- | scraper/client/paper/paper.verify.js | 29 | ||||
| -rw-r--r-- | scraper/s2-geocode-server.py | 28 |
14 files changed, 231 insertions, 105 deletions
diff --git a/scraper/client/actions.js b/scraper/client/actions.js index b5c477f6..47ca6ff5 100644 --- a/scraper/client/actions.js +++ b/scraper/client/actions.js @@ -41,8 +41,8 @@ export const getVerificationsDataset = dataset => dispatch => ( api(dispatch, get, 'verifications', '/api/verifications/' + dataset, {}) ) -export const getVerification = sha256 => dispatch => ( - api(dispatch, get, 'verify', '/api/verify/' + sha256, {}) +export const getVerification = (dataset, sha256) => dispatch => ( + api(dispatch, get, 'verify', '/api/verify/' + dataset + '/' + sha256, {}) ) export const postVerification = data => dispatch => ( diff --git a/scraper/client/app.js b/scraper/client/app.js index b449d0d0..366d4098 100644 --- a/scraper/client/app.js +++ b/scraper/client/app.js @@ -17,7 +17,6 @@ export default class App extends Component { <Switch> <Route exact path="/paper/:key/" component={Paper.Info} /> <Route exact path="/paper/:key/citations/" component={Paper.Citations} /> - <Route exact path="/paper/:key/unknown/" component={Paper.UnknownCitations} /> <Route exact path="/paper/:key/info/" component={Paper.Info} /> <Route exact path="/paper/:key/random/" component={Paper.Random} /> <Route exact path="/paper/:key/address/:sha256" component={Paper.Address} /> diff --git a/scraper/client/common/header.component.js b/scraper/client/common/header.component.js index 2f084979..c9825aab 100644 --- a/scraper/client/common/header.component.js +++ b/scraper/client/common/header.component.js @@ -1,5 +1,5 @@ import React, { Component } from 'react' -// import { NavLink } from 'react-router-dom' +import { Link } from 'react-router-dom' import { bindActionCreators } from 'redux' import { connect } from 'react-redux' @@ -34,10 +34,9 @@ class Header extends Component { {dataset && <div> {dataset.name_full}{' - '} - <a href={"/paper/" + dataset.key + "/info"}>Info</a>{' - '} - <a href={"/paper/" + dataset.key + "/citations/"}>Citations</a>{' - '} - <a href={"/paper/" + dataset.key + "/unknown/"}>Unknown</a>{' - '} - <a href={"/paper/" + dataset.key + "/random/"}>Random</a> + <Link to={"/paper/" + dataset.key + "/info"}>Info</Link>{' - '} + <Link to={"/paper/" + dataset.key + "/citations/"}>Citations</Link>{' - '} + <Link to={"/paper/" + dataset.key + "/random/"}>Random</Link> </div> } </section> diff --git a/scraper/client/common/table.component.js b/scraper/client/common/table.component.js index 96b62835..121c9841 100644 --- a/scraper/client/common/table.component.js +++ b/scraper/client/common/table.component.js @@ -118,6 +118,9 @@ export function TableCell({ value }) { value = <TableObject nested tag={''} object={value} /> } } + if (value && typeof value === 'string' && value.indexOf('http') === 0) { + value = <a href={value} target="_blank" rel="nofollower noopener">{value}</a> + } return ( <td>{value}</td> ) diff --git a/scraper/client/paper/citationList.component.js b/scraper/client/paper/citationList.component.js index be8528bf..002c0dcb 100644 --- a/scraper/client/paper/citationList.component.js +++ b/scraper/client/paper/citationList.component.js @@ -9,38 +9,31 @@ import { TableObject, Loader } from '../common' import { USES_DATASET } from '../types' class CitationList extends Component { - componentDidMount() { - const { citations, api } = this.props - const { paperInfo, unknownCitations, verifications } = api - const { dataset } = paperInfo - if (!dataset || !citations || !verifications[dataset.key]) { - this.props.actions.setSortedCitations([]) - return - } - let verifiedLookup = verifications[dataset.key] || {} - const sortedCitations = citations.map(citation => [ - citation.title, - verifiedLookup[citation.id] ? verifiedLookup[citation.id].uses_dataset : USES_DATASET.NO_DATA, - citation.pdf.length, - citation - ]) - .sort((a,b) => (b[1] - a[1] || b[2] - a[2] || (a[0].localeCompare(b[0])))) - .map(tup => tup[3]) - this.props.actions.setSortedCitations(sortedCitations) + state = { + filter: USES_DATASET.YES, } render() { const { citations, title, api } = this.props const { paperInfo, unknownCitations, verifications, sortedCitations } = api const { dataset } = paperInfo + const { filter } = this.state if (!dataset || !citations || !verifications[dataset.key]) return <Loader /> let verifiedLookup = verifications[dataset.key] || {} - // console.log(verifications) + let filteredCitations = sortedCitations.filter(citation => ( + citation.verified === filter + )) return ( <div className='citations'> <h2>{title}</h2> + <div className='filter_buttons'> + <span className='verified' onClick={() => this.setState({ filter: USES_DATASET.YES })}>uses dataset</span> + <span className='unverified' onClick={() => this.setState({ filter: USES_DATASET.NO })}>{"doesn't use dataset"}</span> + <span className='not_enough_info' onClick={() => this.setState({ filter: USES_DATASET.UNKNOWN })}>{'not enough information'}</span> + <span className='unknown' onClick={() => this.setState({ filter: USES_DATASET.NO_DATA })}>{'unverified'}</span> + </div> <ul> - {(sortedCitations || []).map((citation, i) => { + {(filteredCitations || []).map((citation, i) => { let cite = { ...citation } cite.id = { _raw: true, diff --git a/scraper/client/paper/index.js b/scraper/client/paper/index.js index 99672684..9346234a 100644 --- a/scraper/client/paper/index.js +++ b/scraper/client/paper/index.js @@ -1,7 +1,6 @@ import Manager from './paper.manager' import Info from './paper.info' import Citations from './paper.citations' -import UnknownCitations from './paper.unknown' import Random from './paper.random' import Address from './paper.address' import Verify from './paper.verify' @@ -13,7 +12,6 @@ export { Manager, Info, Citations, - UnknownCitations, Random, Address, Verify, diff --git a/scraper/client/paper/paper.chart.js b/scraper/client/paper/paper.chart.js new file mode 100644 index 00000000..01d8d0e8 --- /dev/null +++ b/scraper/client/paper/paper.chart.js @@ -0,0 +1,83 @@ +import React, { Component } from 'react' +import { bindActionCreators } from 'redux' +import { connect } from 'react-redux' +import { toArray, toTuples } from '../util' +import C3Chart from 'react-c3js' +import 'c3/c3.css' + +class PaperChart extends Component { + render() { + const { rows, title } = this.props + if (!rows.length) return null + const colorPattern = [ + "#00b200", + "#ff0000", + "#e0c200", + "#dddddd", + ] + + return ( + <div className='chart'> + <div> + <C3Chart + data={{ + columns: rows, + type: 'pie', + }} + color={{ + pattern: colorPattern, + }} + tooltip={{ + format: { + value: value => value, + } + }} + size={{ + height: rows.length < 4 ? 316 : 336, + }} + /> + <span className='chartCaption'>{title}</span> + </div> + </div> + ) + } +} + +/* + legend={{ + position: 'right' + }} + tooltip={{ + contents: function (d, defaultTitleFormat, defaultValueFormat, color) { + const countriesByYearLookup = years[yearList[d[0].x]] + let countriesByYear = Object.keys(countriesByYearLookup).map(country => [country, countriesByYearLookup[country]]).sort((a,b) => b[1] - a[1]) + let topCountriesForThisYear = countriesByYear.slice(0, topCountryCount) + let bottomTotal = countriesByYear.slice(topCountryCount).reduce((a,b) => (a + b[1]), 0) + // console.log(topCountriesForThisYear) + topCountriesForThisYear.push([otherCountriesLabel, bottomTotal]) + const tableRows = topCountriesForThisYear.filter(pair => !!pair[1]).map(([country, total]) => { + let colorIndex = topCountries.indexOf(country) + if (colorIndex < 0) colorIndex = colorPattern.length - 1 + const color = colorPattern[ colorIndex ] + return [ + "<tr>", + "<td>", + "<span style='background-color:" + color + "' class='swatch'></span>", + country, + "</td>", + "<td>", + total, + "</td>", + "</tr>", + ].join('') + }) + return [ + "<table class='c3-tooltip'>", + ...tableRows, + "</table>", + ].join('') + } + }} +*/ + +export default PaperChart diff --git a/scraper/client/paper/paper.citations.js b/scraper/client/paper/paper.citations.js index f0e9ea26..c3a9cc61 100644 --- a/scraper/client/paper/paper.citations.js +++ b/scraper/client/paper/paper.citations.js @@ -11,12 +11,6 @@ import { USES_DATASET } from '../types' import CitationList from './citationList.component' class PaperCitations extends Component { - componentDidUpdate(prevProps) { - if (this.props.api.paperInfo.dataset !== prevProps.api.paperInfo.dataset) { - this.props.actions.getVerificationsDataset(this.props.api.paperInfo.dataset.key) - } - } - render() { const { paperInfo, unknownCitations, verifications } = this.props.api const { dataset, citations } = paperInfo diff --git a/scraper/client/paper/paper.css b/scraper/client/paper/paper.css index 21df2df1..914077b6 100644 --- a/scraper/client/paper/paper.css +++ b/scraper/client/paper/paper.css @@ -12,6 +12,20 @@ width: 100%; } +.filter_buttons { + margin-left: 5px; + margin-bottom: 10px; +} +.filter_buttons span { + margin-right: 10px; + cursor: pointer; + opacity: 0.8; + transition: opacity 0.2; +} +.filter_buttons span:hover { + opacity: 1; +} + .citations { padding:40px; } @@ -82,6 +96,13 @@ padding:4px; font-size:12px; } +.chartCaption { + display: block; + width: 100%; + font-size: 12px; + color: #333; + text-align: center; +} .param { display: flex; diff --git a/scraper/client/paper/paper.info.js b/scraper/client/paper/paper.info.js index 35234617..25f4472f 100644 --- a/scraper/client/paper/paper.info.js +++ b/scraper/client/paper/paper.info.js @@ -3,21 +3,49 @@ import { bindActionCreators } from 'redux' import { connect } from 'react-redux' import * as actions from '../actions' - import { TableObject } from '../common' +import { USES_DATASET } from '../types' + +import PaperChart from './paper.chart' class PaperInfo extends Component { render() { - const { paperInfo, unknownCitations } = this.props.api - const { dataset, address } = paperInfo + const { paperInfo, sortedCitations, unknownCitations } = this.props.api + const { dataset, paper, address } = paperInfo if (!dataset) return null + + let counts = {} + const citationLabels = ['Uses Dataset', 'Doesn\'t Use Dataset', 'Not Enough Information', 'Unknown'] + const citationCountOrder = [ USES_DATASET.YES, USES_DATASET.NO, USES_DATASET.UNKNOWN, USES_DATASET.NO_DATA ] + citationCountOrder.forEach(v => counts[v] = 0) + + sortedCitations.forEach(c => counts[c.verified] += 1) + + let citationCounts = {} + let citationRows = [] + citationCountOrder.forEach((v, i) => { + const count = counts[v] + const label = citationLabels[i] + citationCounts[label] = count + citationRows.push([ label, count ]) + }) + return ( <div className='paperInfo'> <h2>{dataset.name_full}</h2> + <PaperChart + rows={citationRows} + title={'Dataset coverage'} + /> <TableObject summary tag="Dataset" object={dataset} - order={['key', 'name_full', 'relevance', 'subset_of', 'superset_of']} + order={'key name_full purpose comment created_by funded_by funded_by_short license'.split(' ')} + /> + <TableObject summary + tag="Paper" + object={paper} + order={"paper_id title year doi address".split(" ")} /> <TableObject summary tag="Statistics" @@ -38,10 +66,7 @@ class PaperInfo extends Component { /> <TableObject summary tag="Citations" - object={{ - 'geocoded': paperInfo.citations.length, - 'unknown': unknownCitations.citations ? unknownCitations.citations.length : 'Loading', - }} + object={citationCounts} /> </div> ) diff --git a/scraper/client/paper/paper.manager.js b/scraper/client/paper/paper.manager.js index 2ac03b01..8b25b1cc 100644 --- a/scraper/client/paper/paper.manager.js +++ b/scraper/client/paper/paper.manager.js @@ -3,6 +3,7 @@ import { bindActionCreators } from 'redux' import { connect } from 'react-redux' import * as actions from '../actions' +import { USES_DATASET } from '../types' import { Loader } from '../common' @@ -15,6 +16,40 @@ class PaperManager extends Component { if (this.props.match.params.key !== oldProps.match.params.key) { this.props.actions.getPaperInfo(this.props.match.params.key) } + console.log('whoms?') + if (this.props.api.paperInfo.dataset !== oldProps.api.paperInfo.dataset && this.props.api.paperInfo.dataset && this.props.api.paperInfo.dataset.key) { + console.log('vert?') + this.props.actions.getVerificationsDataset(this.props.api.paperInfo.dataset.key) + } + if (this.props.api.verifications !== oldProps.api.verifications && this.props.api.paperInfo.dataset) { + console.log('updated?') + this.updateSortedCitations() + } + } + + updateSortedCitations() { + const { api } = this.props + const { paperInfo, unknownCitations, verifications } = api + const { dataset } = paperInfo + if (!dataset || !paperInfo.citations || !unknownCitations.citations || !verifications[dataset.key]) { + this.props.actions.setSortedCitations([]) + return + } + const citations = paperInfo.citations.concat(unknownCitations.citations) + let verifiedLookup = verifications[dataset.key] || {} + const sortedCitations = citations.map(citation => [ + citation.title, + verifiedLookup[citation.id] ? verifiedLookup[citation.id].uses_dataset : USES_DATASET.NO_DATA, + citation.pdf.length, + citation + ]) + .sort((a,b) => (b[1] - a[1] || b[2] - a[2] || (a[0].localeCompare(b[0])))) + .map(tup => ({ + ...tup[3], + verified: tup[1], + })) + console.log('updated') + this.props.actions.setSortedCitations(sortedCitations) } render() { diff --git a/scraper/client/paper/paper.unknown.js b/scraper/client/paper/paper.unknown.js deleted file mode 100644 index 876ac144..00000000 --- a/scraper/client/paper/paper.unknown.js +++ /dev/null @@ -1,41 +0,0 @@ -import React, { Component } from 'react' -import { bindActionCreators } from 'redux' -import { connect } from 'react-redux' -import { Link } from 'react-router-dom' - -import * as actions from '../actions' - -import { Loader } from '../common' -import { USES_DATASET } from '../types' - -import CitationList from './citationList.component' - -class PaperUnknown extends Component { - componentDidUpdate(prevProps) { - if (this.props.api.paperInfo.dataset !== prevProps.api.paperInfo.dataset) { - this.props.actions.getVerificationsDataset(this.props.api.paperInfo.dataset.key) - } - } - - render() { - const { paperInfo, unknownCitations, verifications } = this.props.api - const { dataset, citations } = paperInfo - if (!dataset || !citations || !verifications[dataset.key]) return <Loader /> - - return ( - <CitationList - title={dataset.name_full + ': Unknown Citations'} - citations={unknownCitations.citations} - /> - ) - } -} - -const mapStateToProps = state => ({ - api: state.api -}) -const mapDispatchToProps = dispatch => ({ - actions: bindActionCreators({ ...actions }, dispatch), -}) - -export default connect(mapStateToProps, mapDispatchToProps)(PaperUnknown) diff --git a/scraper/client/paper/paper.verify.js b/scraper/client/paper/paper.verify.js index 83eb5b44..813abb6d 100644 --- a/scraper/client/paper/paper.verify.js +++ b/scraper/client/paper/paper.verify.js @@ -36,7 +36,9 @@ class PaperVerify extends Component { const { sha256 } = this.props.match.params this.props.actions.getInstitutions() this.props.actions.getAddress(sha256) - this.props.actions.getVerification(sha256) + if (this.props.api.paperInfo.dataset) { + this.props.actions.getVerification(this.props.api.paperInfo.dataset.key, sha256) + } const citationState = this.getCitationState(sha256) // console.log('DID MOUNT') this.setState(citationState) @@ -51,15 +53,20 @@ class PaperVerify extends Component { const paper = verify ? verify.paper : null let newState = {} + if (this.props.api.paperInfo.dataset !== oldProps.api.paperInfo.dataset) { + this.props.actions.getVerification(this.props.api.paperInfo.dataset.key, sha256) + } + if (oldSha256 && sha256 !== oldSha256) { - console.log('update verification') + // console.log('update verification') this.props.actions.getAddress(sha256) - this.props.actions.getVerification(sha256) + this.props.actions.getVerification(this.props.api.paperInfo.dataset.key, sha256) const citationState = this.getCitationState(sha256) newState = { ...initialState, ...citationState, ...address.paper, + pdfIndex: citationState.citation.pdf.findIndex(el => el.match(/^https:/)), } this.setState(newState) } else if (verify && !verify.loading && verify.paper && (!oldPaper || oldPaper !== verify.paper)) { @@ -70,6 +77,7 @@ class PaperVerify extends Component { ...initialState, ...citationState, ...address.paper, + pdfIndex: citationState.citation.pdf.findIndex(el => el.match(/^https:/)), } this.setState(newState) } else { @@ -79,6 +87,7 @@ class PaperVerify extends Component { newState = { ...citationState, ...address.paper, + pdfIndex: citationState.citation.pdf.findIndex(el => el.match(/^https:/)), uses_dataset: paper.uses_dataset, images_in_paper: paper.images_in_paper, verified_by: paper.verified_by, @@ -156,7 +165,7 @@ class PaperVerify extends Component { if (citationIndex < 0) { history.push('/paper/' + key + '/info/') } else { - let nextId = citations[citationIndex].id + let nextId = sortedCitations[citationIndex].id history.push('/paper/' + key + '/verify/' + nextId) } } @@ -167,15 +176,16 @@ class PaperVerify extends Component { const { paperInfo, sortedCitations } = this.props.api const citations = sortedCitations || paperInfo.citations || [] let citationIndex = citations.findIndex(f => f.id === this.state.citation.id) - + console.log(sortedCitations) + console.log('going to next', key, citationIndex) if (citationIndex === -1) { history.push('/paper/' + key + '/info/') } else { citationIndex += 1 - if (citationIndex >= citations.length) { + if (citationIndex >= sortedCitations.length) { history.push('/paper/' + key + '/info/') } else { - let nextId = citations[citationIndex].id + let nextId = sortedCitations[citationIndex].id history.push('/paper/' + key + '/verify/' + nextId) } } @@ -414,7 +424,10 @@ class PaperVerify extends Component { >{'Next >'}</button> </div> - <iframe className='pdfViewer' referrerPolicy='no-referrer' src={citation.pdf[this.state.pdf_index]} /> + {citation.pdf.length + ? <iframe className='pdfViewer' referrerPolicy='no-referrer' src={citation.pdf[this.state.pdf_index] || "about:blank"} /> + : '' + } </div> ) } diff --git a/scraper/s2-geocode-server.py b/scraper/s2-geocode-server.py index 3aeda881..ad3efd5d 100644 --- a/scraper/s2-geocode-server.py +++ b/scraper/s2-geocode-server.py @@ -106,28 +106,32 @@ def list_dataset_verifications(dataset): dataset: verifications, }) -@app.route('/api/verify/<sha256>', methods=['GET']) -def find_verification(sha256): +@app.route('/api/verify/<dataset>/<sha256>', methods=['GET']) +def find_verification(dataset, sha256): worksheet = fetch_worksheet('verifications') + keys = worksheet.row_values(1) try: - cell = worksheet.find(sha256) + cells = worksheet.findall(sha256) except: return jsonify({ 'error': 'no_match' }) - if cell and cell.row: - keys = worksheet.row_values(1) + if not len(cells): + return jsonify({ + 'error': 'no_match' + }) + for cell in cells: values_list = worksheet.row_values(cell.row) lookup = {} for key, value in zip(keys, values_list): lookup[key] = value - return jsonify({ - 'paper': lookup, - }) - else: - return jsonify({ - 'error': 'no_match' - }) + if lookup['dataset'] == dataset: + return jsonify({ + 'paper': lookup, + }) + return jsonify({ + 'error': 'no_match' + }) @app.route('/api/verify/add', methods=['POST']) def add_verification(): |
