Last active
March 27, 2025 16:02
-
-
Save naomispence/84ffbd0850eb85ae7082ba2bd9175568 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Lab Report 5: Making and Comparing Confidence Intervals | |
library(ggplot2) | |
library(dplyr) | |
library(lsr) | |
library(descr) | |
library(Hmisc) | |
library('lehmansociology') | |
data(gss123) | |
options(scipen = 999) | |
#For this LR we will be looking at the | |
#relationship between a dichotomous nominal independent variable | |
#and an interval ratio dependent variable | |
#First, we have to look at our dichotomous variable and see | |
#what the two categories are. | |
#WHICH DICHOTOMOUS VARIABLE ARE YOU USING? | |
#Look at the two cateogories in your frequency table | |
#and decide what you will call each of your mini | |
#datasets. Choose names that make sense. | |
#Below, REPLACE maledata AND femaledata WITH THE NAMES OF YOUR TWO | |
#MINI DATASETS. REPLACE sex WITH THE NAME OF YOUR | |
#DICHOTOMOUS VARIABLE. REPLACE "Male" and "Female" | |
#WITH THE TWO CATEGORIES OF YOUR FREQUENCY TABLE. | |
#REMEMBER TO INCLUDE THEM IN QUOTATION MARKS AND | |
#TO WRITE THEM EXACTLY AS THEY APPEAR (FOR | |
#EXAMPLE, IF THEY ARE CAPITALIZED IN THE FREQUENCY | |
#TABLE THEY NEED TO BE CAPITALIZED IN THE CODE). | |
maledata <- subset(gss123, sex == "Male") | |
femaledata <- subset(gss123, sex == "Female") | |
#Follow the code below for the rest of your | |
#LR but replace maledata and | |
#femaledata with the names of your two mini datasets | |
#and replace sei10 with the name of your | |
#interval ratio variable. | |
#Make sure to change titles and labels, too! | |
#Fill in your answers below | |
#WHICH INTERVAL-RATIO VARIABLE ARE YOU USING? | |
#WHAT IS YOUR RESEARCH QUESTION? | |
#Summary statistics and a histogram for your dependent variable | |
#for your first mini dataset | |
#CHANGE THIS CODE BY REPLACING maledata WITH THE NAME | |
#OF ONE OF YOUR TWO MINI DATASETS AND REPLACING | |
#sei10 WITH YOUR DEPENDENT VARIABLE | |
#AND BY EDITING THE TITLES AND LABELS FOR GRAPHS | |
#REMEMBER TO THINK ABOUT BINWIDTH AND WHETHER YOU WANT | |
#TO CHANGE IT TO SOMETHING BIGGER | |
summary(maledata$sei10, na.rm=TRUE) | |
sd(maledata$sei10, na.rm=TRUE) | |
ggplot(data = maledata, aes(x = sei10)) + | |
geom_histogram(color="blue", fill="pink", binwidth =1, | |
aes(y=(..count../sum(..count..))*100)) + | |
ggtitle("Distribution of Males' Socioeconomic Index Score, GSS") + | |
labs(y="Percent", x="Socioeconomic Index Score") | |
#INTERPRET THE SUMMARY STATISTICS AND HISTOGRAM | |
#Now we are getting summary statistics and a | |
#histogram for your second mini dataset. | |
#CHANGE THIS CODE BY REPLACING femaledata WITH THE NAME | |
#OF YOUR OTHER MINI DATASET AND REPLACING | |
#sei10 WITH YOUR DEPENDENT VARIABLE | |
#AND BY EDITING THE TITLES AND LABELS FOR GRAPHS | |
#REMEMBER TO THINK ABOUT BINWIDTH AND WHETHER YOU WANT | |
#TO CHANGE IT TO SOMETHING BIGGER | |
summary(femaledata$sei10, na.rm=TRUE) | |
sd(femaledata$sei10, na.rm=TRUE) | |
ggplot(data = femaledata, aes(x = sei10)) + | |
geom_histogram(color="blue", fill="pink", binwidth =1, | |
aes(y=(..count../sum(..count..))*100)) + | |
ggtitle("Distribution of Females' Socioeconomic Index Score, GSS") + | |
labs(y="Percent", x="Socioeconomic Index Score") | |
#INTERPRET THE SUMMARY STATISTICS AND HISTOGRAM | |
#Comparing means in a bar graph using the gss123 dataset | |
#CHANGE THE INDEPENDENT VARIABLE (replace sex) | |
#CHANGE THE DEPENDENT VARIABLE (replace sei10) | |
#CHANGE THE LABELS | |
ggplot(data=gss123) + | |
stat_summary(aes(x=sex, y=sei10), fun=mean, geom="bar") + | |
xlab("Sex") + | |
ylab("Mean Socioeconomic Index Score") + | |
theme(axis.text.x=element_text(angle=-45)) | |
#ANSWER THIS: Compare the results you got for your two groups. | |
#Which has a bigger mean? Which has more variability? | |
#Based on the bar graph of means, | |
#does it seem like the two categories of your | |
#dichotomous variable | |
#differ in the dependent variable? | |
#USING INFERENTAL STATISTICS TO COMPARE GROUPS | |
#Let's compare the confidence intervals for the mean of our | |
#dependent variable for the two categories of our independent variable. | |
#REPLACE maledata AND femaledata WITH THE NAMES OF YOUR MINI DATASETS | |
#REPLACE sei10 WITH THE NAME OF YOUR DEPENDENT VARIABLE | |
#Comparing the CIs- 95% | |
ciMean(maledata$sei10, na.rm=TRUE, conf =0.95) | |
ciMean(femaledata$sei10, na.rm=TRUE, conf =0.95) | |
#Comparing the CIs - 99% | |
ciMean(maledata$sei10, na.rm=TRUE, conf =0.99) | |
ciMean(femaledata$sei10, na.rm=TRUE, conf =0.99) | |
#INTERPRET the confidence intervals and come to a conclusion about whether | |
#your independent and dependent variables are related. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment