/* This script parses the big gun violence CSV and filters for lines containing a particular string. 479363, 2013-01-19,New Mexico,Albuquerque,2806 Long Lane, 5,0, http://www.gunviolencearchive.org/incident/479363, http://hinterlandgazette.com/2013/01/pastor-greg-griego-identified-victims-killed-nehemiah-griego-albuquerque-nm-shooting.html, False,1, gun_stolen, 0::Unknown||1::Unknown, gun_type, 0::22 LR||1::223 Rem [AR-15], incident_characteristics, "Shot - Dead (murder, accidental, suicide) ||Mass Shooting (4+ victims injured or killed excluding the subject/suspect/perpetrator, one location) ||Domestic Violence", latitude, location_description, longitude, 34.9791,,-106.716, n_guns_involved, notes, 2,, participant_age, 0::51||1::40||2::9||3::5||4::2||5::15, participant_age_group, 0::Adult 18+||1::Adult 18+||2::Child 0-11||3::Child 0-11||4::Child 0-11||5::Teen 12-17, participant_gender, 0::Male||1::Female||2::Male||3::Female||4::Female||5::Male, participant_name, 0::Greg Griego||1::Sara Griego||2::Zephania Griego||3::Jael Griego||4::Angelina Griego||5::Nehemiah Griego, participant_relationship, 5::Family, participant_status, "0::Killed||1::Killed||2::Killed||3::Killed||4::Killed||5::Unharmed, Arrested", participant_type, 0::Victim||1::Victim||2::Victim||3::Victim||4::Victim||5::Subject-Suspect, http://www.cbsnews.com/news/nehemiah-gringo-case-memorial-service-planned-for-family-allegedly-slain-by-new-mexico-teen/|| http://www.thewire.com/national/2013/01/teenager-reportedly-used-ar-15-kill-five-new-mexico/61199/|| http://bigstory.ap.org/article/officials-nm-teen-gunman-kills-5-inside-home|| http://www.huffingtonpost.com/2013/01/21/nehemiah-griego-teen-shoots-parents-3-children_n_2519359.html|| http://murderpedia.org/male.G/g/griego-nehemiah.htm|| http://hinterlandgazette.com/2013/01/pastor-greg-griego-identified-victims-killed-nehemiah-griego-albuquerque-nm-shooting.html, 10,14 */ const fs = require('fs') const parse = require('csv-parse') const stringify = require('csv-stringify') const field_names = ( "incident_id,date,state,city_or_county," + "address,n_killed,n_injured,incident_url,source_url," + "incident_url_fields_missing,congressional_district," + "gun_stolen,gun_type,incident_characteristics,latitude," + "location_description,longitude,n_guns_involved,notes," + "participant_age,participant_age_group,participant_gender," + "participant_name,participant_relationship," + "participant_status,participant_type," + "sources,state_house_district,state_senate_district" ) const fields = field_names.split(',') .reduce((a,b,i) => { a[b] = i return a }, {}) const filter_names = [ "Date", "Children incidents", "Children count", "Teen incidents", "Teen count", "AR-15", "Shootouts", "Samaritans", "Domestic Violence", "Dead Pigs", "Perp Shot", "Carjackings", "School Shootings", "Mass Shootings", "Spree Shootings", "Officer Involved", "Surrenders", "Dead Bystanders", ] const filters = [ (row, participants) => row[fields.participant_age_group].indexOf('Child') !== -1, (row, participants) => participants[1].reduce((a, s) => { (s && s.indexOf('Child')) !== -1 ? a + 1 : a }, 0), (row, participants) => row[fields.participant_age_group].indexOf('Teen') !== -1, (row, participants) => participants[1].reduce((a, s) => { (s && s.indexOf('Teen')) !== -1 ? a + 1 : a }, 0), (row, participants) => row[fields.gun_type].indexOf('AR-15') !== -1, (row, participants) => row[fields.incident_characteristics].indexOf('Shootout') !== -1, (row, participants) => row[fields.incident_characteristics].indexOf('Samaritan') !== -1, (row, participants) => row[fields.incident_characteristics].indexOf('Domestic') !== -1, (row, participants) => row[fields.incident_characteristics].indexOf('Officer shot') !== -1, (row, participants) => row[fields.incident_characteristics].indexOf('perpetrator shot') !== -1, (row, participants) => row[fields.incident_characteristics].indexOf('Car-jack') !== -1, (row, participants) => row[fields.incident_characteristics].indexOf('School Shooting') !== -1, (row, participants) => row[fields.incident_characteristics].indexOf('Mass Shooting') !== -1, (row, participants) => row[fields.incident_characteristics].indexOf('Spree Shooting') !== -1, (row, participants) => row[fields.incident_characteristics].indexOf('Officer Involved Shooting') !== -1, (row, participants) => row[fields.incident_characteristics].indexOf('surrender') !== -1, (row, participants) => row[fields.incident_characteristics].indexOf('Bystander') !== -1, ] const groups = {} const incident_counts = {} for (let i = 2013; i <= 2018; i++) { for (let j = 1; j <= 12; j++) { let s = i + '-' + pad(j) groups[s] = zeroes(filters.length + 1) groups[s][0] = s } } function pad(n){ if (n < 10) { return '0' + n } return n } function test(row){ return ( row && row[fields.incident_characteristics].indexOf('Non-Shooting Incident') === -1 ) } function apply(row){ const date = groupByMonth(row) const p = groupParticipants(row) row[fields.incident_characteristics].split('|').forEach(s => { if (!s) return if (incident_counts[s]) { incident_counts[s] += 1 } else { incident_counts[s] = 1 } }) filters.reduce((a, f, i) => { a[i + 1] += Number(f(row, p)) return a }, groups[date]) } function groupParticipants(row){ return [ row[fields.participant_age], row[fields.participant_age_group], row[fields.participant_gender], row[fields.participant_relationship], row[fields.participant_status], row[fields.participant_type], ].map(field => field.split('||').map(s => s.split('::')[1])) } function groupByMonth(row){ return row[fields.date].substr(0, 7) } function zeroes(n) { const a = new Array (n) for (var k = 0; k < n; k++) { a[k] = 0 } return a } // function transform([ // incident_id, date, state, city_or_county, // address, n_killed, n_injured, incident_url, source_url, // incident_url_fields_missing, congressional_district, // gun_stolen, gun_type, incident_characteristics, latitude, // location_description, longitude, n_guns_involved, notes, // participant_age, participant_age_group, participant_gender, // participant_name, participant_relationship, // participant_status, participant_type, // sources, state_house_district, state_senate_district // ]){ // return [ // ] // } const input = fs.createReadStream('./data/gun_violence.csv') const parser = parse() const output = fs.createWriteStream('./data/gun_violence_by_month.csv') const stringifier = stringify() stringifier.pipe(output) const incident_output = fs.createWriteStream('./data/incident_counts.csv') const incident_stringifier = stringify() incident_stringifier.pipe(incident_output) input.on('readable', function() { let buf while ((buf = input.read()) !== null) { parser.write(buf) } }) input.on('error', function(err){ console.error('input error', err.message) }) input.on('end', function(){ parser.write(zeroes(fields.length)) parser.end() }) let count = 0 parser.on('readable', function(){ let row while (row = parser.read()) { if (count === 0) { // stringifier.write(row) count += 1 continue } if ((++count % 50000) === 0) { console.log(count + '...') } if (row[0] === 0) { parser.end() } else if (test(row)) { apply(row) } } }) parser.on('error', function(err){ console.error('parser error', err.message) }) parser.on('end', function(){ console.log('parser end') // console.log(groups) stringifier.write(filter_names) for (let i = 2013; i <= 2018; i++) { for (let j = 1; j <= 12; j++) { let s = i + '-' + pad(j) stringifier.write(groups[s]) } } stringifier.end() Object.keys(incident_counts) .sort((a,b) => incident_counts[b] - incident_counts[a]) .map(key => { incident_stringifier.write([key, incident_counts[key]]) if (incident_counts[key] > 5) { console.log(incident_counts[key] + "\t" + key) } }) incident_stringifier.end() })