From fe67fe503ac4a07ece372b37bd1cadad1e88eb46 Mon Sep 17 00:00:00 2001 From: Jules Laplace Date: Fri, 2 Jun 2017 18:34:16 -0400 Subject: awdrone - scrape BIJ drones page every 12 hours --- index.js | 3 ++ lib/awdrone/index.js | 92 ++++++++++++++++++++++++++++++++++++++++++++++++ lib/awdrone/package.json | 15 ++++++++ 3 files changed, 110 insertions(+) create mode 100644 lib/awdrone/index.js create mode 100644 lib/awdrone/package.json diff --git a/index.js b/index.js index d87836c..cbe8dda 100644 --- a/index.js +++ b/index.js @@ -116,6 +116,9 @@ var app = okcms.createApp({ lib: require("./lib/awprint"), mongodbUrl: "mongodb://localhost/awprint", }, + awdrone: { + lib: require("./lib/awdrone"), + }, push: { lib: require("./lib/okpush"), mongodbUrl: "mongodb://localhost/okpush_hga", diff --git a/lib/awdrone/index.js b/lib/awdrone/index.js new file mode 100644 index 0000000..df947cd --- /dev/null +++ b/lib/awdrone/index.js @@ -0,0 +1,92 @@ +const fetch = require('node-fetch') + +function AWDrone (options) { + if (!(this instanceof AWDrone)) + return new AWDrone(options) + + options = options || {} + if (!options.express) + throw new Error('Express not provided to AWDrone'); + if (!options.config) + throw new Error('Configuration not provided to AWDrone'); + + const express = options.express + const router = express.Router() + const config = options.config + const db = options.db + + router.get('/', function (req, res) { + update(db).then( () => { + res.sendStatus(200) + }).catch( (err) => { + res.sendStatus(500) + }) + }) + + function refresh () { + setTimeout(refresh, 60 * 60 * 12) + update(db) + } + + setTimeout(refresh, 60) + + this._router = router +} + +function update (db) { + return new Promise( (resolve, reject) => { + const type = 'drone' + const id = 'drone-statistics' + db.get(type).get(id).then( data => { + scrape().then( matches => { + if (matches.length !== 4) { + throw new Error('problem retrieving matches') + } + + data.strikes = matches[0] + data.totalKilled = matches[1] + data.civiliansKilled = matches[2] + data.childrenKilled = matches[3] + + const resource = db.get(type, id) + resource.update(id, data).then(function(updated) { + resolve() + }).fail( err => { throw err }) + }) + .catch( err => { + reject(err) + }) + }) + }) +} + +function scrape (cb) { + return fetch('https://www.thebureauinvestigates.com/projects/drone-war') + .then(response => { + return response.text() + }) + .then(body => { + const statRegexp = new RegExp('stat__figure">([-0-9,]+)<', 'g') + const matches = getMatches(body, statRegexp) + return matches + }) + .catch( err => { + return [] + }) +} + +function getMatches(string, regex, index) { + index = index || 1 + let matches = [] + let match + while (match = regex.exec(string)) { + matches.push(match[index]) + } + return matches +} + +AWDrone.prototype.middleware = function () { + return this._router +} + +module.exports = AWDrone diff --git a/lib/awdrone/package.json b/lib/awdrone/package.json new file mode 100644 index 0000000..e2ebb2d --- /dev/null +++ b/lib/awdrone/package.json @@ -0,0 +1,15 @@ +{ + "name": "awdrone", + "version": "1.0.0", + "description": "aw drone scrape", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "", + "license": "UNLICENSED", + "dependencies": { + "node-fetch": "^1.7.0", + "q": "^1.5.0" + } +} -- cgit v1.2.3-70-g09d2