naomispence · March 27, 2025 16:02
diff --git a/LR5: Making and Comparing Confidence Intervals b/LR5: Making and Comparing Confidence Intervals
 #Lab Report 5: Making and Comparing Confidence Intervals
 library(ggplot2)
 library(dplyr)
 library(lsr)
 library(descr)
 library(Hmisc)
 library('lehmansociology')
 data(gss123)
 options(scipen = 999)

 #For this LR we will be looking at the
 #relationship between a dichotomous nominal independent variable
 #and an interval ratio dependent variable
 #First, we have to look at our dichotomous variable and see
 #what the two categories are. 

 #WHICH DICHOTOMOUS VARIABLE ARE YOU USING?

 #Look at the two cateogories in your frequency table
 #and decide what you will call each of your mini
 #datasets. Choose names that make sense. 

 #Below, REPLACE maledata AND femaledata WITH THE NAMES OF YOUR TWO
 #MINI DATASETS. REPLACE sex WITH THE NAME OF YOUR 
 #DICHOTOMOUS VARIABLE. REPLACE "Male" and "Female"
 #WITH THE TWO CATEGORIES OF YOUR FREQUENCY TABLE.
 #REMEMBER TO INCLUDE THEM IN QUOTATION MARKS AND
 #TO WRITE THEM EXACTLY AS THEY APPEAR (FOR
 #EXAMPLE, IF THEY ARE CAPITALIZED IN THE FREQUENCY
 #TABLE THEY NEED TO BE CAPITALIZED IN THE CODE).

 maledata <- subset(gss123, sex == "Male")
 femaledata <- subset(gss123, sex == "Female")

 #Follow the code below for the rest of your
 #LR but replace maledata and
 #femaledata with the names of your two mini datasets
 #and replace sei10 with the name of your 
 #interval ratio variable.
 #Make sure to change titles and labels, too!
 #Fill in your answers below

 #WHICH INTERVAL-RATIO VARIABLE ARE YOU USING?

 #WHAT IS YOUR RESEARCH QUESTION?

 #Summary statistics and a histogram for your dependent variable 
 #for your first mini dataset

 #CHANGE THIS CODE BY REPLACING maledata WITH THE NAME
 #OF ONE OF YOUR TWO MINI DATASETS AND REPLACING
 #sei10 WITH YOUR DEPENDENT VARIABLE
 #AND BY EDITING THE TITLES AND LABELS FOR GRAPHS
 #REMEMBER TO THINK ABOUT BINWIDTH AND WHETHER YOU WANT
 #TO CHANGE IT TO SOMETHING BIGGER

 summary(maledata$sei10, na.rm=TRUE)
 sd(maledata$sei10, na.rm=TRUE)

 ggplot(data = maledata, aes(x = sei10)) + 
  geom_histogram(color="blue", fill="pink", binwidth =1, 
                 aes(y=(..count../sum(..count..))*100)) + 
  ggtitle("Distribution of Males' Socioeconomic Index Score, GSS") + 
  labs(y="Percent", x="Socioeconomic Index Score") 

 #INTERPRET THE SUMMARY STATISTICS AND HISTOGRAM

 #Now we are getting summary statistics and a
 #histogram for your second mini dataset. 

 #CHANGE THIS CODE BY REPLACING femaledata WITH THE NAME
 #OF YOUR OTHER MINI DATASET AND REPLACING
 #sei10 WITH YOUR DEPENDENT VARIABLE
 #AND BY EDITING THE TITLES AND LABELS FOR GRAPHS
 #REMEMBER TO THINK ABOUT BINWIDTH AND WHETHER YOU WANT
 #TO CHANGE IT TO SOMETHING BIGGER

 summary(femaledata$sei10, na.rm=TRUE)
 sd(femaledata$sei10, na.rm=TRUE)

 ggplot(data = femaledata, aes(x = sei10)) + 
  geom_histogram(color="blue", fill="pink", binwidth =1, 
                 aes(y=(..count../sum(..count..))*100)) + 
  ggtitle("Distribution of Females' Socioeconomic Index Score, GSS") + 
  labs(y="Percent", x="Socioeconomic Index Score") 

 #INTERPRET THE SUMMARY STATISTICS AND HISTOGRAM

 #Comparing means in a bar graph using the gss123 dataset
 #CHANGE THE INDEPENDENT VARIABLE (replace sex)
 #CHANGE THE DEPENDENT VARIABLE (replace sei10) 
 #CHANGE THE LABELS
 ggplot(data=gss123) +   
  stat_summary(aes(x=sex, y=sei10), fun=mean, geom="bar") +   
  xlab("Sex") +   
  ylab("Mean Socioeconomic Index Score") +   
  theme(axis.text.x=element_text(angle=-45))

 #ANSWER THIS: Compare the results you got for your two groups. 
 #Which has a bigger mean? Which has more variability? 
 #Based on the bar graph of means, 
 #does it seem like the two categories of your 
 #dichotomous variable
 #differ in the dependent variable?

 #USING INFERENTAL STATISTICS TO COMPARE GROUPS
 #Let's compare the confidence intervals for the mean of our
 #dependent variable for the two categories of our independent variable.
 #REPLACE maledata AND femaledata WITH THE NAMES OF YOUR MINI DATASETS
 #REPLACE sei10 WITH THE NAME OF YOUR DEPENDENT VARIABLE

 #Comparing the CIs- 95%
 ciMean(maledata$sei10, na.rm=TRUE, conf =0.95)
 ciMean(femaledata$sei10, na.rm=TRUE, conf =0.95)

 #Comparing the CIs - 99%
 ciMean(maledata$sei10, na.rm=TRUE, conf =0.99)
 ciMean(femaledata$sei10, na.rm=TRUE, conf =0.99)

 #INTERPRET the confidence intervals and come to a conclusion about whether
 #your independent and dependent variables are related.
	#Lab Report 5: Making and Comparing Confidence Intervals
	library(ggplot2)
	library(dplyr)
	library(lsr)
	library(descr)
	library(Hmisc)
	library('lehmansociology')
	data(gss123)
	options(scipen = 999)

	#For this LR we will be looking at the
	#relationship between a dichotomous nominal independent variable
	#and an interval ratio dependent variable
	#First, we have to look at our dichotomous variable and see
	#what the two categories are.

	#WHICH DICHOTOMOUS VARIABLE ARE YOU USING?

	#Look at the two cateogories in your frequency table
	#and decide what you will call each of your mini
	#datasets. Choose names that make sense.

	#Below, REPLACE maledata AND femaledata WITH THE NAMES OF YOUR TWO
	#MINI DATASETS. REPLACE sex WITH THE NAME OF YOUR
	#DICHOTOMOUS VARIABLE. REPLACE "Male" and "Female"
	#WITH THE TWO CATEGORIES OF YOUR FREQUENCY TABLE.
	#REMEMBER TO INCLUDE THEM IN QUOTATION MARKS AND
	#TO WRITE THEM EXACTLY AS THEY APPEAR (FOR
	#EXAMPLE, IF THEY ARE CAPITALIZED IN THE FREQUENCY
	#TABLE THEY NEED TO BE CAPITALIZED IN THE CODE).

	maledata <- subset(gss123, sex == "Male")
	femaledata <- subset(gss123, sex == "Female")

	#Follow the code below for the rest of your
	#LR but replace maledata and
	#femaledata with the names of your two mini datasets
	#and replace sei10 with the name of your
	#interval ratio variable.
	#Make sure to change titles and labels, too!
	#Fill in your answers below

	#WHICH INTERVAL-RATIO VARIABLE ARE YOU USING?

	#WHAT IS YOUR RESEARCH QUESTION?

	#Summary statistics and a histogram for your dependent variable
	#for your first mini dataset

	#CHANGE THIS CODE BY REPLACING maledata WITH THE NAME
	#OF ONE OF YOUR TWO MINI DATASETS AND REPLACING
	#sei10 WITH YOUR DEPENDENT VARIABLE
	#AND BY EDITING THE TITLES AND LABELS FOR GRAPHS
	#REMEMBER TO THINK ABOUT BINWIDTH AND WHETHER YOU WANT
	#TO CHANGE IT TO SOMETHING BIGGER

	summary(maledata$sei10, na.rm=TRUE)
	sd(maledata$sei10, na.rm=TRUE)

	ggplot(data = maledata, aes(x = sei10)) +
	geom_histogram(color="blue", fill="pink", binwidth =1,
	aes(y=(..count../sum(..count..))*100)) +
	ggtitle("Distribution of Males' Socioeconomic Index Score, GSS") +
	labs(y="Percent", x="Socioeconomic Index Score")

	#INTERPRET THE SUMMARY STATISTICS AND HISTOGRAM

	#Now we are getting summary statistics and a
	#histogram for your second mini dataset.

	#CHANGE THIS CODE BY REPLACING femaledata WITH THE NAME
	#OF YOUR OTHER MINI DATASET AND REPLACING
	#sei10 WITH YOUR DEPENDENT VARIABLE
	#AND BY EDITING THE TITLES AND LABELS FOR GRAPHS
	#REMEMBER TO THINK ABOUT BINWIDTH AND WHETHER YOU WANT
	#TO CHANGE IT TO SOMETHING BIGGER

	summary(femaledata$sei10, na.rm=TRUE)
	sd(femaledata$sei10, na.rm=TRUE)

	ggplot(data = femaledata, aes(x = sei10)) +
	geom_histogram(color="blue", fill="pink", binwidth =1,
	aes(y=(..count../sum(..count..))*100)) +
	ggtitle("Distribution of Females' Socioeconomic Index Score, GSS") +
	labs(y="Percent", x="Socioeconomic Index Score")

	#INTERPRET THE SUMMARY STATISTICS AND HISTOGRAM

	#Comparing means in a bar graph using the gss123 dataset
	#CHANGE THE INDEPENDENT VARIABLE (replace sex)
	#CHANGE THE DEPENDENT VARIABLE (replace sei10)
	#CHANGE THE LABELS
	ggplot(data=gss123) +
	stat_summary(aes(x=sex, y=sei10), fun=mean, geom="bar") +
	xlab("Sex") +
	ylab("Mean Socioeconomic Index Score") +
	theme(axis.text.x=element_text(angle=-45))

	#ANSWER THIS: Compare the results you got for your two groups.
	#Which has a bigger mean? Which has more variability?
	#Based on the bar graph of means,
	#does it seem like the two categories of your
	#dichotomous variable
	#differ in the dependent variable?

	#USING INFERENTAL STATISTICS TO COMPARE GROUPS
	#Let's compare the confidence intervals for the mean of our
	#dependent variable for the two categories of our independent variable.
	#REPLACE maledata AND femaledata WITH THE NAMES OF YOUR MINI DATASETS
	#REPLACE sei10 WITH THE NAME OF YOUR DEPENDENT VARIABLE

	#Comparing the CIs- 95%
	ciMean(maledata$sei10, na.rm=TRUE, conf =0.95)
	ciMean(femaledata$sei10, na.rm=TRUE, conf =0.95)

	#Comparing the CIs - 99%
	ciMean(maledata$sei10, na.rm=TRUE, conf =0.99)
	ciMean(femaledata$sei10, na.rm=TRUE, conf =0.99)

	#INTERPRET the confidence intervals and come to a conclusion about whether
	#your independent and dependent variables are related.