########################################################################### # PUBLG100: Introduction to Quantitative Methods # # Week 9 Solutions: Binary models: Logit # # ## ----message = FALSE----------------------------------------------------- library(foreign) library(Zelig) library(texreg) library(lmtest) library(dplyr) ## ------------------------------------------------------------------------ rm(list = ls()) ## ------------------------------------------------------------------------ bes <- read.dta("bes.dta") bes <- filter(bes, !is.na(Turnout), !is.na(Income), !is.na(polinfoindex), !is.na(CivicDutyScores), !is.na(Influence), !is.na(PartyID), !is.na(Gender), !is.na(edu15), !is.na(edu17), !is.na(edu18), !is.na(edu19plus), !is.na(in_school), !is.na(in_uni)) ## ------------------------------------------------------------------------ bes$Gender <- factor(bes$Gender, levels = c(0, 1), labels = c("Female", "Male")) ## ------------------------------------------------------------------------ table(bes$PartyID) hist(bes$CivicDutyScores, main = "Distribution of Civic Duty", xlab = "Level of Civic Duty", ylab = "Frequency") ## ------------------------------------------------------------------------ hist(bes$Income, main = "Distribution of Income", xlab = "Income Level", ylab = "Frequency") hist(bes$polinfoindex, main = "Distribution of Knowledge of Politics", xlab = "Level of Knowledge", ylab = "Frequency", breaks = seq(0, 8, 1)) ## ------------------------------------------------------------------------ hist(bes$Influence, main = "Distribution of Influence", xlab = "Influence Level", ylab = "Frequency") ## ------------------------------------------------------------------------ full_model <- glm(Turnout ~ Income + polinfoindex + Influence + CivicDutyScores + PartyID + Gender + Age + edu15 + edu17 + edu18 + edu19plus + in_school + in_uni, family = binomial(link = "logit"), data = bes) ## ------------------------------------------------------------------------ model2 <- glm(Turnout ~ Income + polinfoindex + Influence + Gender + Age + edu15 + edu17 + edu18 + edu19plus + in_school + in_uni, family = binomial(link = "logit"), data = bes) # likelihood ratio test lrtest(model2, full_model) ## ------------------------------------------------------------------------ predicted_probs <- predict.glm(full_model, type = "response") expected <- as.numeric(predicted_probs > 0.5) observed <- bes$Turnout outcome <- table(observed, expected) outcome (outcome[1,1] + outcome[2,2]) / sum(outcome) ## ------------------------------------------------------------------------ outcome[1,2] ## ------------------------------------------------------------------------ outcome[2,1] ## ------------------------------------------------------------------------ z.out <- zelig(Turnout ~ Income + polinfoindex + Influence + CivicDutyScores + PartyID + Gender + Age + edu15 + edu17 + edu18 + edu19plus + in_school + in_uni, model = "logit", data = bes) # set covariates: no identification with a party x.partyid.no <- setx(z.out, PartyID = 0, Income = mean(bes$Income), polinfoindex = mean(bes$polinfoindex), Influence = mean(bes$Influence), CivicDutyScores = mean(bes$CivicDutyScores), Gender = "Female", Age = mean(bes$Age), edu15 = 1, edu17 = 0, edu18 = 0, edu19plus = 0, in_school = 0, in_uni = 0) # set covariates: person who identifies with a party x.partyid.yes <- setx(z.out, PartyID = 1, Income = mean(bes$Income), polinfoindex = mean(bes$polinfoindex), Influence = mean(bes$Influence), CivicDutyScores = mean(bes$CivicDutyScores), Gender = "Female", Age = mean(bes$Age), edu15 = 1, edu17 = 0, edu18 = 0, edu19plus = 0, in_school = 0, in_uni = 0) # make results replicable set.seed(123) s.out <- sim(z.out, x = x.partyid.no, x1 = x.partyid.yes) summary(s.out) ## ------------------------------------------------------------------------ screenreg(list (model2, full_model), custom.model.names = c("Model 2", "Full Final Model")) ## ------------------------------------------------------------------------ summary(bes$Income) # Income for 1st quartile: 4 x.low <- setx(z.out, Income = 4, polinfoindex = mean(bes$polinfoindex), Influence = mean(bes$Influence), CivicDutyScores = mean(bes$CivicDutyScores), PartyID = 1, Gender = "Female", Age = mean(bes$Age), edu15 = 1, edu17 = 0, edu18 = 0, edu19plus = 0, in_school = 0, in_uni = 0) # Income for 3rd quartile: 7 x.high <- setx(z.out, Income = 7, polinfoindex = mean(bes$polinfoindex), Influence = mean(bes$Influence), CivicDutyScores = mean(bes$CivicDutyScores), PartyID = 1, Gender = "Female", Age = mean(bes$Age), edu15 = 1, edu17 = 0, edu18 = 0, edu19plus = 0, in_school = 0, in_uni = 0) s.out <- sim(z.out, x = x.low, x1 = x.high) summary(s.out) ## ------------------------------------------------------------------------ summary(bes$polinfoindex) # polinfoindex for 1st quartile: 4 x.low <- setx(z.out, Income = mean(bes$Income), polinfoindex = 4, Influence = mean(bes$Influence), CivicDutyScores = mean(bes$CivicDutyScores), PartyID = 1, Gender = "Female", Age = mean(bes$Age), edu15 = 1, edu17 = 0, edu18 = 0, edu19plus = 0, in_school = 0, in_uni = 0) # polinfoindex for 3rd quartile: 7 x.high <- setx(z.out, Income = mean(bes$Income), polinfoindex = 7, Influence = mean(bes$Influence), CivicDutyScores = mean(bes$CivicDutyScores), PartyID = 1, Gender = "Female", Age = mean(bes$Age), edu15 = 1, edu17 = 0, edu18 = 0, edu19plus = 0, in_school = 0, in_uni = 0) s.out <- sim(z.out, x = x.low, x1 = x.high) summary(s.out) ## ------------------------------------------------------------------------ x.out <- setx(z.out, Income = 1:13, polinfoindex = mean(bes$polinfoindex), Influence = mean(bes$Influence), CivicDutyScores = mean(bes$CivicDutyScores), PartyID = 1, Gender = "Female", Age = mean(bes$Age), edu15 = 1, edu17 = 0, edu18 = 0, edu19plus = 0, in_school = 0, in_uni = 0) s.out <- sim(z.out, x.out) ci.plot(s.out, ci = 95, xlab = "Income", ylab = "Predicted probability of Voting", main = "Effect of Income on turnout") ## ------------------------------------------------------------------------ x.out <- setx(z.out, Income = mean(bes$Income), polinfoindex = 0:8, Influence = mean(bes$Influence), CivicDutyScores = mean(bes$CivicDutyScores), PartyID = 1, Gender = "Female", Age = mean(bes$Age), edu15 = 1, edu17 = 0, edu18 = 0, edu19plus = 0, in_school = 0, in_uni = 0) s.out <- sim(z.out, x.out) ci.plot(s.out, ci = 95, xlab = "Knowledge about politics", ylab = "Predicted probability of Voting", main = "Effect of knowledge about politics on turnout") ## ------------------------------------------------------------------------ x.out <- setx(z.out, Income = mean(bes$Income), polinfoindex = mean(bes$polinfoindex), Influence = mean(bes$Influence), CivicDutyScores = seq(min(bes$CivicDutyScores), max(bes$CivicDutyScores), 0.01), PartyID = 1, Gender = "Female", Age = mean(bes$Age), edu15 = 1, edu17 = 0, edu18 = 0, edu19plus = 0, in_school = 0, in_uni = 0) s.out <- sim(z.out, x.out) ci.plot(s.out, ci = 95, ylab = "Predicted probability of Voting", main = "Effect of civic duty on turnout")