rm(list=ls()) library(effects) library(foreign) library(MASS) library(ggplot2) library(stargazer) setwd("~/Dropbox/essex/2015-16/gv903/lab17/") dir() ter<-read.dta("terrismaoz05.dta") head(ter) summary(ter) summary(ter$mediate) summary(ter$medintrus) summary(ter$minreg302) summary(ter$caprat) summary(ter$ally1) summary(ter$lagprmed) summary(ter$cumversatil) names(ter) summary(ter[,c(5,6,7,15,17,24,36)]) terom <- ter[is.na(ter$mediate)==FALSE & is.na(ter$minreg302)==FALSE & is.na(ter$caprat)==FALSE & is.na(ter$ally1)==FALSE & is.na(ter$lagprmed)==FALSE,] terom <- na.omit(ter[,c(5,6,7,15,17,24,36)]) terom <- na.omit(ter) # Check factor and correct levels ter$medintrus levels(ter$medintrus) table(ter$medintrus) summary(ter$medintrus) # Models logit <- glm(mediate ~ minreg302 + caprat + ally1 + lagprmed + cumversatil, data = terom, family=binomial("logit")) ologit <- polr(medintrus ~ minreg302 + caprat + ally1 + lagprmed + cumversatil, data = terom) stargazer(logit,ologit, type = "text",model.names=T) summary(ologit) eff.cv <- effect(term = "cumversatil", mod = logit) eff.cv plot(eff.cv, rescale.axis = FALSE, xlab = "Conflict Versatility", ylab = "Pr(Mediation)", rug = FALSE) eff.cv.ologit <- effect(term = "cumversatil", mod = ologit) eff.cv.ologit plot(eff.cv.ologit, rescale.axis = FALSE, xlab = "Conflict Versatility", ylab = "Pr(medintrus=k)", rug = FALSE) plot(eff.cv.ologit, rescale.axis = FALSE, xlab = "Conflict Versatility", ylab = "Pr(medintrus=k)", rug = FALSE, style = "stacked") eff.cv.mr.max <- effect(term = "cumversatil", mod = ologit, given.values = c("ally1Alliance" = 0)) eff.cv.mr.min <- effect(term = "cumversatil", mod = ologit, given.values = c("ally1Alliance" = 1)) plot(eff.cv.mr.min, rescale.axis = FALSE, xlab = "Conflict Versatility", ylab = "Pr(medintrus=k)", rug = FALSE, style = "stacked") plot(eff.cv.mr.max, rescale.axis = FALSE, xlab = "Conflict Versatility", ylab = "Pr(medintrus=k)", rug = FALSE, style = "stacked") terom$logit.hat <- logit$fitted.values dim(ologit$fitted.values) terom$ologit.hat1 <- ologit$fitted.values[,1] terom$ologit.hat2 <- ologit$fitted.values[,2] terom$ologit.hat3 <- ologit$fitted.values[,3] head(terom) # Count data --------------- rm(list = ls()) abs <- read.dta("http://www.ats.ucla.edu/stat/stata/dae/nb_data.dta") #abs <- read.dta("abs_data") head(abs) # We have 3 diff programmes table(abs$prog) # Days of absence distribution g <- ggplot(abs, aes(x=daysabs)) + geom_histogram(binwidth=1) g # First let's see which variables we will include in the model. fit.1 <- glm(daysabs ~ math, data = abs, family = poisson) stargazer(fit.1, type="text") fit.2 <- glm(daysabs ~ math + gender, data = abs, family = poisson) stargazer(fit.1,fit.2, type="text") # We choose the 1st or the 2nd model? # What about prog? table(abs$prog) fit.3 <- glm(daysabs ~ math + gender + prog, data = abs, family = poisson) stargazer(fit.1, fit.2, fit.3, type="text") # This is completely wrong! You need to know why! abs$prog1 <- ifelse(abs$prog==1,1,0) abs$prog2 <- ifelse(abs$prog==2,1,0) abs$prog3 <- ifelse(abs$prog==3,1,0) abs$prog2 <- factor(abs$prog2) abs$prog3 <- factor(abs$prog3) # Do we actually need the 3 of them? fit.3 <- glm(daysabs ~ math + gender + prog2 + prog3, data = abs, family = poisson) stargazer(fit.3, type="text") # Or we could leave R to do this. abs$prog<-as.factor(abs$prog) fit.3 <- glm(daysabs ~ math + gender + prog, data = abs, family = poisson) stargazer(fit.3, type="text") # So compare the model with the rest: stargazer(fit.1,fit.2,fit.3, type="text") # necessary for glm.nb fit.4 <- glm.nb(daysabs ~ math + gender + prog2 + prog3 , data = abs) stargazer(fit.3,fit.4, type="text") eff1<- effect(term = "gender", mod = fit.4, given.values=c("prog2"=0,"prog3"=0)) eff1 eff2 <- effect(term = "prog2", mod = fit.4, given.values=c("gendermale"=1, "prog31"=0)) eff2 eff2 <- effect(term = "prog3", mod = fit.4, given.values=c("gendermale"=1, "prog21"=0)) eff2