summaryrefslogtreecommitdiff
path: root/scraper/content-script/check.js
diff options
context:
space:
mode:
Diffstat (limited to 'scraper/content-script/check.js')
-rw-r--r--scraper/content-script/check.js102
1 files changed, 102 insertions, 0 deletions
diff --git a/scraper/content-script/check.js b/scraper/content-script/check.js
new file mode 100644
index 00000000..0644084d
--- /dev/null
+++ b/scraper/content-script/check.js
@@ -0,0 +1,102 @@
+(function(){
+
+ var listening = false, loaded = false
+ var TYPES = { Status: 0, Connected: 1, Disconnected: 2, Image: 3, Text: 4 }
+ var PUNCTUATION_REGEX = /[\[\]\{\}]/g // i.e. resembles code
+ var NULL_ARRAY = []
+ var seen = {}
+ const toArray = (a) => Array.prototype.slice.call(a)
+ const $ = (s) => document.querySelector(s)
+ const $$ = (s) => document.querySelectorAll(s)
+
+ function init () {
+ if (window.location.href.indexOf("lvh.me") !== -1 || window.location.href.indexOf("localhost") !== -1) return
+ bind()
+ }
+ function bind () {
+ chrome.extension.onMessage.addListener(onMessage)
+ chrome.extension.sendMessage({ type: TYPES.Status }, gotStatus)
+ }
+ function gotStatus (response) {
+ console.log('got status', response)
+ if (response && response.status === "on") {
+ console.log('its on', loaded)
+ if (! loaded) {
+ console.log('started')
+ setTimeout(() => { start() }, 5000)
+ // send(document.body.innerText)
+ // setInterval(function(){
+ // send(document.body.innerText)
+ // }, 10000)
+ }
+ loaded = true
+ }
+ }
+ function onMessage (request, sender, sendResponse) {
+ switch (request.method) {
+ case 'start':
+ start()
+ break
+ case 'stop':
+ listening = false
+ break
+ }
+ }
+ function start(){
+ console.log(window.location.href, listening)
+ if (window.location.href.indexOf('schol' + 'ar' + '.go' + 'og' + 'le') === -1) return
+ if (listening) return
+ listening = true
+ energize()
+ }
+
+ function energize(){
+ console.log('energize')
+ const records = toArray($$(".gs_r")).map((el, i) => {
+ let data = {}
+ let link = el.querySelector("h3 a")
+ if (link) {
+ data.link = link.href
+ }
+ let pdfLink = el.querySelector(".gs_or_ggsm a")
+ if (pdfLink) {
+ data.pdfLink = pdfLink.href
+ }
+ let attribution = el.querySelector('.gs_a')
+ if (attribution) {
+ data.attribution = attribution.innerText
+ data.attributionLinks = toArray(attribution.querySelectorAll('a')).map(a => ({
+ href: a.href,
+ name: a.innerText,
+ }))
+ }
+ let snippet = el.querySelector('.gs_a')
+ if (snippet) {
+ data.snippet = snippet.innerText
+ }
+ let citationLink = el.querySelector('.gs_fl a:nth-of-type(3)')
+ if (citationLink && citationLink.innerText.match('Cited by')) {
+ data.citationLink = citationLink.href
+ data.citationCount = parseInt(citationLink.innerText.replace(/^\s*Cited by /, ''), 10) || -1
+ }
+ return data
+ })
+ let record = {
+ title: document.querySelector('title').innerText,
+ url: window.location.href,
+ records: records,
+ }
+ send(JSON.stringify(record))
+ let nextLink = $("#gs_n td:last-child a")
+ if (nextLink) {
+ setTimeout(() => { nextLink.click() }, 19000 + (Math.random() * 21000))
+ }
+ send("done")
+ }
+ function send (text) {
+ chrome.extension.sendMessage({ type: TYPES.Text, data: text }, function(){})
+ }
+
+ init()
+
+})()