########################################################################### # PUBLG100: Introduction to Quantitative Methods # # Week 3 Seminar: T-test for Difference in Means and Hypothesis Testing # # # Set your working directory # # CAUTION: Make sure the directory you specify here matches the working directory on your computer. # We're using N:/PUBLG100 only for illustration purposes and it would only work if you're # using a UCL dekstop. If you're using your own laptop, then replace N:/PUBLG100 with the # appropriate directory (or folder) setwd("N:/PUBLG100") # Verify that your working directory is set correctly getwd() ## ------------------------------------------------------------------------ rm(list=ls()) # clear workspace ## ------------------------------------------------------------------------ library(foreign) # to work with foreign file formats # loading a STATA format dataset (remember to load the library foreign 1st) world.data <- read.dta("QoG2012.dta") # the dimensions: rows (observations) and columns (variables) dim(world.data) # the variable names names(world.data) # let's look at the first few observations head(world.data) ## ------------------------------------------------------------------------ install.packages("dplyr") ## ----message=FALSE------------------------------------------------------- # load dplyr library(dplyr) ## ------------------------------------------------------------------------ # dataset.name <- rename(argument1, argument2 = argument3) # h_j = 1 means there is an independent judiciary # rename h_j to judiciary # rename a variable and save the result in our data frame world.data <- rename(world.data, judiciary = h_j) # check the result names(world.data) ## ------------------------------------------------------------------------ # frequency table of binary independent variable table(world.data$judiciary) ## ------------------------------------------------------------------------ # creating a factor variable world.data$judiciary <- factor(world.data$judiciary, labels = c("independent", "controlled"), levels = c(1, -5)) # checking the result head(world.data) # a frequency table of judiciary table(world.data$judiciary) ## ------------------------------------------------------------------------ summary(world.data$wdi_gdpc) ## ------------------------------------------------------------------------ # creating subsets of our data based on the status of the judiciary free.legal <- filter(world.data, judiciary == "independent") controlled.legal <- filter(world.data, judiciary == "controlled") ## ------------------------------------------------------------------------ # mean income levels, we remove missings mean(free.legal$wdi_gdpc, na.rm = TRUE) mean(controlled.legal$wdi_gdpc, na.rm = TRUE) ## ------------------------------------------------------------------------ # t.test # Interval DV (GDP per captia) # Binary IV (independent judiciary) t.test(world.data$wdi_gdpc ~ world.data$judiciary, mu=0, alt="two.sided", conf=0.95) ## ------------------------------------------------------------------------ # renaming variables world.data <- rename(world.data, hdi = undp_hdi) world.data <- rename(world.data, corruption.control = wbgi_cce) ## ------------------------------------------------------------------------ # scatterplot plot(x = world.data$corruption.control, y = world.data$hdi, xlim = c(xmin = -2, xmax = 3), ylim = c(ymin = 0, ymax = 1), frame = FALSE, xlab = "World Bank Control of Corruption Index", ylab = "UNDP Human Development Index", main = "Relationship b/w Quality of Institutions and Quality of Life") ## ------------------------------------------------------------------------ # Pearson's r including test statistic cor.test(world.data$corruption.control, world.data$hdi, use="complete.obs", conf.level = 0.99)