This post includes some follow-up R code to scrap 2018 election data from New York Times webpage.
Yesterday, I scrapped data for the 2014 gubernatorial election from New York Times webpage. Since the reporting is almost complete in many locations, I also scrapped the 2018 election data from this year’s New York Times election webpage, which is pulling the data from the JSON file at this link..
The whole dataset for house, senate, and governor elections can be downloaded from this link as a CSV file Once this file is merged with the dataset from the MIT Election Data and Science Lab, you are all good to go to dive in. I will post some R code in the following days for some basic analysis and graphs using the FL dataset.
For those interested, below is the source code for scrapping the 2018 elections data.
require(jsonlite)
require(Hmisc)
a = fromJSON("https://int.nyt.com/applications/elections/2018/api/1/races/2018-11-06.json")
races = a[[6]]
candidates = a[[6]]$candidates
counties = a[[6]]$counties
# Pulling Results for Governors
gov <- which(races$race_type=="governor")
governor.race <- vector("list",length(gov))
for(i in 1:length(gov)) {
name = races[gov[i],]$state_slug
info = candidates[[gov[i]]]
rep.key = info[which(info$party_id=="republican"),]$candidate_key
dem.key = info[which(info$party_id=="democrat"),]$candidate_key
others = info$candidate_key[! info$candidate_key %in% c(rep.key,dem.key)]
if(nrow(info)>1){
state = cbind(counties[[gov[i]]]$name,counties[[gov[i]]]$results)
colnames(state)[1] <- "county"
if(length(rep.key)>=1) {
repu = rowSums(as.matrix(state[,rep.key]))
} else {
repu = as.matrix(rep(0,nrow(state)))
}
if(length(dem.key)>=1) {
dem = rowSums(as.matrix(state[,dem.key]))
} else {
dem = as.matrix(rep(0,nrow(state)))
}
if(length(others)>=1) {
other = rowSums(as.matrix(state[,others]))
} else {
other = as.matrix(rep(0,nrow(state)))
}
state <- cbind(state$county,as.data.frame(cbind(repu,dem,other)))
state <- cbind(capitalize(name),state)
colnames(state) <- c("state","county","repgov18","demgov18","othergov18")
governor.race[[i]] = state
}
}
electiongov2018 <- governor.race[[1]]
for(i in 2:length(gov)) {
electiongov2018 <- rbind(electiongov2018,governor.race[[i]])
}
# Pulling Results for Senate
sen <- which(races$race_type=="senate")
senate.race <- vector("list",length(sen))
for(i in 1:length(sen)) {
name = races[sen[i],]$state_slug
info = candidates[[sen[i]]]
rep.key = info[which(info$party_id=="republican"),]$candidate_key
dem.key = info[which(info$party_id=="democrat"),]$candidate_key
others = info$candidate_key[! info$candidate_key %in% c(rep.key,dem.key)]
if(nrow(info)>1){
state = cbind(counties[[sen[i]]]$name,counties[[sen[i]]]$results)
colnames(state)[1] <- "county"
if(length(rep.key)>=1) {
repu = rowSums(as.matrix(state[,rep.key]))
} else {
repu = as.matrix(rep(0,nrow(state)))
}
if(length(dem.key)>=1) {
dem = rowSums(as.matrix(state[,dem.key]))
} else {
dem = as.matrix(rep(0,nrow(state)))
}
if(length(others)>=1) {
other = rowSums(as.matrix(state[,others]))
} else {
other = as.matrix(rep(0,nrow(state)))
}
state <- cbind(state$county,as.data.frame(cbind(repu,dem,other)))
state <- cbind(capitalize(name),state)
colnames(state) <- c("state","county","repsen18","demsen18","othersen18")
senate.race[[i]] = state
}
}
electionsen2018 <- senate.race[[1]]
for(i in 2:length(sen)) {
electionsen2018 <- rbind(electionsen2018,senate.race[[i]])
}
# Pulling Results for House
house <- which(races$race_type=="house")
house.race <- vector("list",length(house))
for(i in 1:length(house)) {
name = races[house[i],]$state_slug
info = candidates[[house[i]]]
rep.key = info[which(info$party_id=="republican"),]$candidate_key
dem.key = info[which(info$party_id=="democrat"),]$candidate_key
others = info$candidate_key[! info$candidate_key %in% c(rep.key,dem.key)]
if(nrow(info)>1){
state = cbind(counties[[house[i]]]$name,counties[[house[i]]]$results)
colnames(state)[1] <- "county"
if(length(rep.key)>=1) {
repu = rowSums(as.matrix(state[,rep.key]))
} else {
repu = as.matrix(rep(0,nrow(state)))
}
if(length(dem.key)>=1) {
dem = rowSums(as.matrix(state[,dem.key]))
} else {
dem = as.matrix(rep(0,nrow(state)))
}
if(length(others)>=1) {
other = rowSums(as.matrix(state[,others]))
} else {
other = as.matrix(rep(0,nrow(state)))
}
state <- cbind(state$county,as.data.frame(cbind(repu,dem,other)))
state <- cbind(capitalize(name),state)
colnames(state) <- c("state","county","rephouse18","demhouse18","otherhouse18")
house.race [[i]] = state
}
}
electionhouse2018 <- house.race[[1]]
for(i in 2:length(house)) {
if(is.null(house.race[[i]])==FALSE){
electionhouse2018 <- rbind(electionhouse2018,house.race[[i]])
}
}
electionhouse2018 <- aggregate(cbind(rephouse18,demhouse18,otherhouse18) ~ state + county,
data = electionhouse2018, sum)
electionhouse2018 <- electionhouse2018[order(electionhouse2018[,1]),]
##########################################################################################
# Merge Senate, House, and Governor Data
election2018 <- merge(electiongov2018,electionsen2018,by=c("state","county"),all=TRUE)
election2018 <- merge(election2018,electionhouse2018,by=c("state","county"),all=TRUE)
For attribution, please cite this work as
Zopluoglu (2018, Nov. 7). Cengiz Zopluoglu: Scraping Election 2018 Data. Retrieved from https://github.com/czopluoglu/website/tree/master/docs/posts/scraping-2018-election-data/
BibTeX citation
@misc{zopluoglu2018scraping, author = {Zopluoglu, Cengiz}, title = {Cengiz Zopluoglu: Scraping Election 2018 Data}, url = {https://github.com/czopluoglu/website/tree/master/docs/posts/scraping-2018-election-data/}, year = {2018} }