From f8012f88641b0bb378ba79393f277c8918ebe452 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Thu, 18 Apr 2019 16:50:01 +0200 Subject: better sorting of papers --- scraper/client/paper/paper.manager.js | 25 +++++++++++++++++-------- scraper/client/types.js | 6 ++++++ 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'scraper') diff --git a/scraper/client/paper/paper.manager.js b/scraper/client/paper/paper.manager.js index 8b25b1cc..2c5b79de 100644 --- a/scraper/client/paper/paper.manager.js +++ b/scraper/client/paper/paper.manager.js @@ -3,7 +3,7 @@ import { bindActionCreators } from 'redux' import { connect } from 'react-redux' import * as actions from '../actions' -import { USES_DATASET } from '../types' +import { USES_DATASET, ADDRESS_TYPE_ENUM } from '../types' import { Loader } from '../common' @@ -16,13 +16,10 @@ class PaperManager extends Component { if (this.props.match.params.key !== oldProps.match.params.key) { this.props.actions.getPaperInfo(this.props.match.params.key) } - console.log('whoms?') if (this.props.api.paperInfo.dataset !== oldProps.api.paperInfo.dataset && this.props.api.paperInfo.dataset && this.props.api.paperInfo.dataset.key) { - console.log('vert?') this.props.actions.getVerificationsDataset(this.props.api.paperInfo.dataset.key) } if (this.props.api.verifications !== oldProps.api.verifications && this.props.api.paperInfo.dataset) { - console.log('updated?') this.updateSortedCitations() } } @@ -37,13 +34,25 @@ class PaperManager extends Component { } const citations = paperInfo.citations.concat(unknownCitations.citations) let verifiedLookup = verifications[dataset.key] || {} - const sortedCitations = citations.map(citation => [ + // first sort by verification status, + // then by a combination of its geolocated sources and paper count. + // penalize papers with no PDF + const sortedCitations = citations + .map(citation => [ citation.title, - verifiedLookup[citation.id] ? verifiedLookup[citation.id].uses_dataset : USES_DATASET.NO_DATA, - citation.pdf.length, + verifiedLookup[citation.id] + ? verifiedLookup[citation.id].uses_dataset + : USES_DATASET.NO_DATA, + (citation.addresses || []) + .map(address => (ADDRESS_TYPE_ENUM[address.type] || 0)) + .reduce((a, b) => (a + b), 0) + + (citation.pdf.length > 0 + ? Math.max(citation.pdf.length, 2) + : -2 + ), citation ]) - .sort((a,b) => (b[1] - a[1] || b[2] - a[2] || (a[0].localeCompare(b[0])))) + .sort((a, b) => (b[1] - a[1] || b[2] - a[2] || (a[0].localeCompare(b[0])))) .map(tup => ({ ...tup[3], verified: tup[1], diff --git a/scraper/client/types.js b/scraper/client/types.js index 22c93a89..049b2761 100644 --- a/scraper/client/types.js +++ b/scraper/client/types.js @@ -22,3 +22,9 @@ export const USES_DATASET = { UNKNOWN: "0", NO_DATA: "-2", } + +export const ADDRESS_TYPE_ENUM = { + 'edu': 1, + 'company': 3, + 'mil': 10, +} \ No newline at end of file -- cgit v1.2.3-70-g09d2