diff options
Diffstat (limited to 'bin')
| -rw-r--r-- | bin/grep.js | 47 | ||||
| -rw-r--r-- | bin/group.js | 274 |
2 files changed, 306 insertions, 15 deletions
diff --git a/bin/grep.js b/bin/grep.js index af30bf2..b4eb3bd 100644 --- a/bin/grep.js +++ b/bin/grep.js @@ -60,7 +60,6 @@ const fs = require('fs') const parse = require('csv-parse') const stringify = require('csv-stringify') -const search = "AR-15" const fields = ("incident_id,date,state,city_or_county," + "address,n_killed,n_injured,incident_url,source_url," + "incident_url_fields_missing,congressional_district," @@ -73,13 +72,21 @@ const fields = ("incident_id,date,state,city_or_county," .split(',') .reduce((a,b,i) => { a[b] = i + return a }, {}) -const search = 'AR-15' +const search = 'good-samaritans' const regexp = new RegExp(search, 'i') function test(row){ - return row[fields.gun_type].match(regexp) + return ( + row + // && row[fields.incident_characteristics].indexOf('Non-Shooting Incident') === -1 + && row[fields.incident_characteristics].indexOf('Samaritan') !== -1 + // // && row[fields.participant_age_group].indexOf('Teen') !== -1 + // && row[fields.participant_age_group].indexOf('Child') !== -1 + // && row[fields.gun_type].indexOf('AR-15') !== -1 + ) } const input = fs.createReadStream('./data/gun_violence.csv') @@ -87,39 +94,49 @@ const parser = parse() const stringifier = stringify() const output = fs.createWriteStream('./data/' + search + '.csv') -stream.on('readable', function() { - var buf; - while ((buf = stream.read()) !== null) { +input.on('readable', function() { + let buf + while ((buf = input.read()) !== null) { parser.write(buf) } -}); -stream.on('finish', function(){ +}) +input.on('error', function(err){ + console.error('input error', err.message) +}) +input.on('finish', function(){ parser.end() }) +let i = 0 parser.on('readable', function(){ - let record - while (record = parser.read()) { - if (test(record[fields])) { - stringifier.write(record) + let row + while (row = parser.read()) { + if (i === 0) { + stringifier.write(row) + } + if ((++i % 10000) === 0) { + console.log(i + '...') + } + if (test(row)) { + stringifier.write(row) } } }) parser.on('error', function(err){ - console.error(err.message) + console.error('parser error', err.message) }) parser.on('end', function(){ stringifier.end() }) stringifier.on('readable', function(){ - let row; + let row while(row = stringifier.read()){ output.write(row) } }) stringifier.on('error', function(err){ - // console.error(err.message) + console.error('stringifier error', err.message) }) stringifier.on('finish', function(){ output.end() diff --git a/bin/group.js b/bin/group.js new file mode 100644 index 0000000..e2e1681 --- /dev/null +++ b/bin/group.js @@ -0,0 +1,274 @@ + +/* + This script parses the big gun violence CSV and filters for lines containing a particular string. + + 479363, + 2013-01-19,New Mexico,Albuquerque,2806 Long Lane, + 5,0, + http://www.gunviolencearchive.org/incident/479363, + http://hinterlandgazette.com/2013/01/pastor-greg-griego-identified-victims-killed-nehemiah-griego-albuquerque-nm-shooting.html, + False,1, + + gun_stolen, + 0::Unknown||1::Unknown, + + gun_type, + 0::22 LR||1::223 Rem [AR-15], + + incident_characteristics, + "Shot - Dead (murder, accidental, suicide) + ||Mass Shooting (4+ victims injured or killed excluding the subject/suspect/perpetrator, one location) + ||Domestic Violence", + + latitude, location_description, longitude, + 34.9791,,-106.716, + + n_guns_involved, notes, + 2,, + + participant_age, + 0::51||1::40||2::9||3::5||4::2||5::15, + + participant_age_group, + 0::Adult 18+||1::Adult 18+||2::Child 0-11||3::Child 0-11||4::Child 0-11||5::Teen 12-17, + + participant_gender, + 0::Male||1::Female||2::Male||3::Female||4::Female||5::Male, + + participant_name, + 0::Greg Griego||1::Sara Griego||2::Zephania Griego||3::Jael Griego||4::Angelina Griego||5::Nehemiah Griego, + + participant_relationship, + 5::Family, + + participant_status, + "0::Killed||1::Killed||2::Killed||3::Killed||4::Killed||5::Unharmed, Arrested", + + participant_type, + 0::Victim||1::Victim||2::Victim||3::Victim||4::Victim||5::Subject-Suspect, + + http://www.cbsnews.com/news/nehemiah-gringo-case-memorial-service-planned-for-family-allegedly-slain-by-new-mexico-teen/|| + http://www.thewire.com/national/2013/01/teenager-reportedly-used-ar-15-kill-five-new-mexico/61199/|| + http://bigstory.ap.org/article/officials-nm-teen-gunman-kills-5-inside-home|| + http://www.huffingtonpost.com/2013/01/21/nehemiah-griego-teen-shoots-parents-3-children_n_2519359.html|| + http://murderpedia.org/male.G/g/griego-nehemiah.htm|| + http://hinterlandgazette.com/2013/01/pastor-greg-griego-identified-victims-killed-nehemiah-griego-albuquerque-nm-shooting.html, + 10,14 +*/ + +const fs = require('fs') +const parse = require('csv-parse') +const stringify = require('csv-stringify') + +const field_names = ( + "incident_id,date,state,city_or_county," + + "address,n_killed,n_injured,incident_url,source_url," + + "incident_url_fields_missing,congressional_district," + + "gun_stolen,gun_type,incident_characteristics,latitude," + + "location_description,longitude,n_guns_involved,notes," + + "participant_age,participant_age_group,participant_gender," + + "participant_name,participant_relationship," + + "participant_status,participant_type," + + "sources,state_house_district,state_senate_district" +) +const fields = field_names.split(',') + .reduce((a,b,i) => { + a[b] = i + return a + }, {}) + +const filter_names = [ + "Date", + "Children incidents", + "Children count", + "Teen incidents", + "Teen count", + "AR-15", + "Shootouts", + "Samaritans", + "Domestic Violence", + "Dead Pigs", + "Perp Shot", + "Carjackings", + "School Shootings", + "Mass Shootings", + "Spree Shootings", + "Officer Involved", + "Surrenders", + "Dead Bystanders", +] +const filters = [ + (row, participants) => row[fields.participant_age_group].indexOf('Child') !== -1, + (row, participants) => participants[1].reduce((a, s) => ( (s && s.indexOf('Child')) !== -1 ? a + 1 : a ), 0), + (row, participants) => row[fields.participant_age_group].indexOf('Teen') !== -1, + (row, participants) => participants[1].reduce((a, s) => ( (s && s.indexOf('Teen')) !== -1 ? a + 1 : a ), 0), + (row, participants) => row[fields.gun_type].indexOf('AR-15') !== -1, + (row, participants) => row[fields.incident_characteristics].indexOf('Shootout') !== -1, + (row, participants) => row[fields.incident_characteristics].indexOf('Samaritan') !== -1, + (row, participants) => row[fields.incident_characteristics].indexOf('Domestic') !== -1, + (row, participants) => row[fields.incident_characteristics].indexOf('Officer shot') !== -1, + (row, participants) => row[fields.incident_characteristics].indexOf('perpetrator shot') !== -1, + (row, participants) => row[fields.incident_characteristics].indexOf('Car-jack') !== -1, + (row, participants) => row[fields.incident_characteristics].indexOf('School Shooting') !== -1, + (row, participants) => row[fields.incident_characteristics].indexOf('Mass Shooting') !== -1, + (row, participants) => row[fields.incident_characteristics].indexOf('Spree Shooting') !== -1, + (row, participants) => row[fields.incident_characteristics].indexOf('Officer Involved Shooting') !== -1, + (row, participants) => row[fields.incident_characteristics].indexOf('surrender') !== -1, + (row, participants) => row[fields.incident_characteristics].indexOf('Bystander') !== -1, +] + +const groups = {} +const incident_counts = {} +for (let i = 2013; i <= 2018; i++) { + for (let j = 1; j <= 12; j++) { + let s = i + '-' + pad(j) + groups[s] = zeroes(filters.length + 1) + groups[s][0] = s + } +} + +function pad(n){ + if (n < 10) { + return '0' + n + } + return n +} +function test(row){ + return ( + row + && row[fields.incident_characteristics].indexOf('Non-Shooting Incident') === -1 + ) +} +function apply(row){ + const date = groupByMonth(row) + const p = groupParticipants(row) + + row[fields.incident_characteristics].split('|').forEach(s => { + if (!s) return + if (incident_counts[s]) { + incident_counts[s] += 1 + } else { + incident_counts[s] = 1 + } + }) + + filters.reduce((a, f, i) => { + a[i + 1] += Number(f(row, p)) + return a + }, groups[date]) +} +function groupParticipants(row){ + return [ + row[fields.participant_age], + row[fields.participant_age_group], + row[fields.participant_gender], + row[fields.participant_relationship], + row[fields.participant_status], + row[fields.participant_type], + ].map(field => field.split('||').map(s => s.split('::')[1])) +} +function groupByMonth(row){ + return row[fields.date].substr(0, 7) +} +function transpose(a) { + let i_len = a.length, j_len = a[0].length + let T = new Array(i_len) + for (let i = 0; i < i_len; i++) { + T[i] = new Array(j_len) + for (var j = 0; j < j_len; j++) { + T[i][j] = a[j][i] + } + } + return T +} +function zeroes(n) { + const a = new Array (n) + for (var k = 0; k < n; k++) { + a[k] = 0 + } + return a +} +// function transform([ +// incident_id, date, state, city_or_county, +// address, n_killed, n_injured, incident_url, source_url, +// incident_url_fields_missing, congressional_district, +// gun_stolen, gun_type, incident_characteristics, latitude, +// location_description, longitude, n_guns_involved, notes, +// participant_age, participant_age_group, participant_gender, +// participant_name, participant_relationship, +// participant_status, participant_type, +// sources, state_house_district, state_senate_district +// ]){ +// return [ +// ] +// } + +const input = fs.createReadStream('./data/gun_violence.csv') +const parser = parse() + +const output = fs.createWriteStream('./data/gun_violence_by_month.csv') +const stringifier = stringify() +stringifier.pipe(output) + +const incident_output = fs.createWriteStream('./data/incident_counts.csv') +const incident_stringifier = stringify() +incident_stringifier.pipe(incident_output) + +input.on('readable', function() { + let buf + while ((buf = input.read()) !== null) { + parser.write(buf) + } +}) +input.on('error', function(err){ + console.error('input error', err.message) +}) +input.on('end', function(){ + parser.write(zeroes(fields.length)) + parser.end() +}) + +let count = 0 +parser.on('readable', function(){ + let row + while (row = parser.read()) { + if (count === 0) { + // stringifier.write(row) + count += 1 + continue + } + if ((++count % 50000) === 0) { + console.log(count + '...') + } + if (row[0] === 0) { + parser.end() + } else if (test(row)) { + apply(row) + } + } +}) +parser.on('error', function(err){ + console.error('parser error', err.message) +}) +parser.on('end', function(){ + console.log('parser end') + // console.log(groups) + stringifier.write(filter_names) + for (let i = 2013; i <= 2018; i++) { + for (let j = 1; j <= 12; j++) { + let s = i + '-' + pad(j) + stringifier.write(groups[s]) + } + } + stringifier.end() + + Object.keys(incident_counts) + .sort((a,b) => incident_counts[b] - incident_counts[a]) + .map(key => { + incident_stringifier.write([key, incident_counts[key]]) + if (incident_counts[key] > 5) { + console.log(incident_counts[key] + "\t" + key) + } + }) + incident_stringifier.end() +}) |
