Forked from Lakens/p-value_misconceptions_figures.R
Created
October 28, 2018 07:31
-
-
Save praveenkumarpgiindia/bb9b5c7da80db5087f910236e2cf55c0 to your computer and use it in GitHub Desktop.
figures to explain p-value misconceptions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
options(scipen=999) #disable scientific notation for numbers | |
#Figure 1 & 2 (set to N <- 5000 for Figure 2) | |
# Set x-axis upper and lower scalepoint (to do: automate) | |
low_x<--1 | |
high_x<-1 | |
y_max<-2 | |
#Set sample size per group and effect size d (assumes equal sample sizes per group) | |
N<-50 #sample size per group for indepndent t-test | |
d<-0.5 #please enter positive d only | |
#Calculate non-centrality parameter - equals t-value from sample | |
ncp<-d*sqrt(N/2) | |
ncp | |
#or Cumming, page 305 | |
ncp<-d/(sqrt((1/N)+(1/N))) | |
#calc d-distribution | |
x=seq(low_x,high_x,length=10000) #create x values | |
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = ncp)*sqrt(N/2) #calculate distribution of d based on t-distribution | |
#Set max Y | |
y_max<-max(d_dist)+0.5 | |
#create plot | |
#png(file="Fig1.png",width=4000,height=2500, res = 500) | |
par(bg = "white") | |
plot(-10,xlim=c(low_x,high_x), ylim=c(0,y_max), xlab="Difference", ylab='',main=paste("Null-hypothesis for N = ",N)) | |
#abline(v = seq(low_x,high_x,0.1), h = seq(0,0.5,0.1), col = "lightgray", lty = 1) | |
#lines(x,d_dist,col='black',type='l', lwd=2) | |
#add d = 0 line | |
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2) | |
lines(x,d_dist,col='black',type='l', lwd=2) | |
#Line for 0 | |
#abline(v=0) | |
#Line for critical t-value | |
# abline(v=abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N))) | |
# abline(v=-abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N))) | |
#Add Type 1 error rate right | |
crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2) | |
y=seq(crit_d,10,length=10000) | |
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon | |
polygon(c(crit_d,y,10),c(0,z,0),col=rgb(1, 0, 0,0.5)) | |
#Add Type 1 error rate left | |
crit_d<--abs(qt(0.05/2, (N*2)-2))/sqrt(N/2) | |
y=seq(-10, crit_d, length=10000) | |
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon | |
polygon(c(y,crit_d,crit_d),c(z,0,0),col=rgb(1, 0, 0,0.5)) | |
#dev.off() | |
#Figure 3 & 4 (set d <- 1.5 and high_x <- 1.5 for figure 4)----- | |
low_x<--1 | |
high_x<-3 | |
y_max<-2 | |
#Set sample size per group and effect size d (assumes equal sample sizes per group) | |
N<-50 #sample size per group for indepndent t-test | |
d<-0.5 #please enter positive d only | |
#Calculate non-centrality parameter - equals t-value from sample | |
ncp<-d*sqrt(N/2) | |
ncp | |
#or Cumming, page 305 | |
ncp<-d/(sqrt((1/N)+(1/N))) | |
#calc d-distribution | |
x=seq(low_x,high_x,length=10000) #create x values | |
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = ncp)*sqrt(N/2) #calculate distribution of d based on t-distribution | |
#Set max Y | |
y_max<-max(d_dist)+0.5 | |
#create plot | |
#png(file="Fig1.png",width=4000,height=2500, res = 500) | |
par(bg = "white") | |
plot(-10,xlim=c(low_x,high_x), ylim=c(0,y_max), xlab="Difference", ylab='',main=paste("Null- and alternative hypothesis for N = ",N)) | |
#abline(v = seq(low_x,high_x,0.1), h = seq(0,0.5,0.1), col = "lightgray", lty = 1) | |
lines(x,d_dist,col='black',type='l', lwd=2) | |
#add d = 0 line | |
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2) | |
lines(x,d_dist,col='darkgrey',type='l', lwd=2) | |
#Line for 0 | |
#abline(v=0) | |
#Line for critical t-value | |
# abline(v=abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N))) | |
# abline(v=-abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N))) | |
#Add Type 1 error rate right | |
crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2) | |
y=seq(crit_d,10,length=10000) | |
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon | |
polygon(c(crit_d,y,10),c(0,z,0),col=rgb(1, 0, 0,0.5)) | |
#Add Type 1 error rate left | |
crit_d<--abs(qt(0.05/2, (N*2)-2))/sqrt(N/2) | |
y=seq(-10, crit_d, length=10000) | |
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon | |
polygon(c(y,crit_d,crit_d),c(z,0,0),col=rgb(1, 0, 0,0.5)) | |
#Add Type 2 error rate | |
#crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2) | |
#y=seq(-10,crit_d,length=10000) | |
#z<-(dt(y*sqrt(N/2),df=(N*2)-2, ncp=ncp)*sqrt(N/2)) #determine upperbounds polygon | |
#polygon(c(y,crit_d,crit_d),c(0,z,0),col=rgb(0, 0, 1,0.5)) | |
#Lines for Cohen's d and Hedges' g | |
# abline(v=d, lwd=2, col=rgb(0, 0, 1,0.5)) #Line at d | |
# abline(v=d*(1-(3/(4*(N-2)-1))), lwd=2, col=rgb(1, 0, 0,0.5)) #Line at Hedges g | |
#segments(crit_d, 0, crit_d, y_max-0.8, col= 'black', lwd=2) | |
#text(crit_d, y_max-0.5, paste("Effects larger than d = ",round(crit_d,2),"will be statistically significant"), cex = 1) | |
#dev.off() | |
#Figure 5---- | |
low_x<--1 | |
high_x<-1.5 | |
y_max<-2 | |
#Set sample size per group and effect size d (assumes equal sample sizes per group) | |
N<-50 #sample size per group for indepndent t-test | |
d<-0.5 #please enter positive d only | |
#Calculate non-centrality parameter - equals t-value from sample | |
ncp<-d*sqrt(N/2) | |
ncp | |
#or Cumming, page 305 | |
ncp<-d/(sqrt((1/N)+(1/N))) | |
#calc d-distribution | |
x=seq(low_x,high_x,length=10000) #create x values | |
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = ncp)*sqrt(N/2) #calculate distribution of d based on t-distribution | |
#Set max Y | |
y_max<-max(d_dist)+0.5 | |
#create plot | |
#png(file="Fig1.png",width=4000,height=2500, res = 500) | |
par(bg = "white") | |
plot(-10,xlim=c(low_x,high_x), ylim=c(0,y_max), xlab="Difference", ylab='',main=paste("Null- and alternative hypothesis for N = ",N)) | |
#abline(v = seq(low_x,high_x,0.1), h = seq(0,0.5,0.1), col = "lightgray", lty = 1) | |
lines(x,d_dist,col='black',type='l', lwd=2) | |
#add d = 0 line | |
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2) | |
lines(x,d_dist,col='darkgrey',type='l', lwd=2) | |
#Line for 0 | |
#abline(v=0) | |
#Line for critical t-value | |
# abline(v=abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N))) | |
# abline(v=-abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N))) | |
#Add Type 1 error rate right | |
crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2) | |
y=seq(crit_d,10,length=10000) | |
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon | |
polygon(c(crit_d,y,10),c(0,z,0),col=rgb(1, 0, 0,0.5)) | |
#Add Type 1 error rate left | |
crit_d<--abs(qt(0.05/2, (N*2)-2))/sqrt(N/2) | |
y=seq(-10, crit_d, length=10000) | |
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon | |
polygon(c(y,crit_d,crit_d),c(z,0,0),col=rgb(1, 0, 0,0.5)) | |
#Add Type 2 error rate | |
#crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2) | |
#y=seq(-10,crit_d,length=10000) | |
#z<-(dt(y*sqrt(N/2),df=(N*2)-2, ncp=ncp)*sqrt(N/2)) #determine upperbounds polygon | |
#polygon(c(y,crit_d,crit_d),c(0,z,0),col=rgb(0, 0, 1,0.5)) | |
#Lines for Cohen's d and Hedges' g | |
# abline(v=d, lwd=2, col=rgb(0, 0, 1,0.5)) #Line at d | |
# abline(v=d*(1-(3/(4*(N-2)-1))), lwd=2, col=rgb(1, 0, 0,0.5)) #Line at Hedges g | |
segments(0.35, 0, 0.35, 2.2, col= 'black', lwd=2) | |
text(0.35, 2.4, paste("Observed mean difference"), cex = 1) | |
#dev.off() | |
#Figure 6 & & (y_max <- 100 m---- | |
# Set x-axis upper and lower scalepoint (to do: automate) | |
low_x<--1 | |
high_x<-1 | |
y_max<-2.5 | |
#Set sample size per group and effect size d (assumes equal sample sizes per group) | |
N<-50 #sample size per group for indepndent t-test | |
d<-0.0 #please enter positive d only | |
#Calculate non-centrality parameter - equals t-value from sample | |
ncp<-d*sqrt(N/2) | |
ncp | |
#or Cumming, page 305 | |
ncp<-d/(sqrt((1/N)+(1/N))) | |
#calc d-distribution | |
x=seq(low_x,high_x,length=10000) #create x values | |
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = ncp)*sqrt(N/2) #calculate distribution of d based on t-distribution | |
#create plot | |
#png(file="Fig1.png",width=4000,height=2500, res = 500) | |
par(bg = "white") | |
plot(-10,xlim=c(low_x,high_x), ylim=c(0,y_max), xlab="Difference", ylab='',main=paste("Null-hypothesis for N = ",N)) | |
#abline(v = seq(low_x,high_x,0.1), h = seq(0,0.5,0.1), col = "lightgray", lty = 1) | |
#lines(x,d_dist,col='black',type='l', lwd=2) | |
#add d = 0 line | |
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2) | |
lines(x,d_dist,col='black',type='l', lwd=2) | |
#Line for 0 | |
#abline(v=0) | |
#Line for critical t-value | |
# abline(v=abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N))) | |
# abline(v=-abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N))) | |
#Add Type 1 error rate right | |
crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2) | |
y=seq(crit_d,10,length=10000) | |
z<-(dt(y*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2)) #determine upperbounds polygon | |
polygon(c(crit_d,y,10),c(0,z,0),col=rgb(1, 0, 0,0.5)) | |
#Add Type 1 error rate left | |
crit_d<--abs(qt(0.05/2, (N*2)-2))/sqrt(N/2) | |
y=seq(-10, crit_d, length=10000) | |
z<-(dt(y*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2)) #determine upperbounds polygon | |
polygon(c(y,crit_d,crit_d),c(z,0,0),col=rgb(1, 0, 0,0.5)) | |
segments(0.5, 0, 0.5, 2.2, col= 'black', lwd=2) | |
text(0.5, 2.4, paste("Observed mean difference"), cex = 1) | |
#segments(0.01, 0, 0.01, 93, col= 'black', lwd=2) | |
#text(0.01, 96, paste("Observed mean difference"), cex = 1) | |
#dev.off() | |
#Figure 8----- | |
low_x<--0.5 | |
high_x<-1 | |
y_max<-5 | |
#Set sample size per group and effect size d (assumes equal sample sizes per group) | |
N<-150 #sample size per group for indepndent t-test | |
d<-0.4175971 #please enter positive d only | |
#Calculate non-centrality parameter - equals t-value from sample | |
ncp<-d*sqrt(N/2) | |
ncp | |
#or Cumming, page 305 | |
ncp<-d/(sqrt((1/N)+(1/N))) | |
#calc d-distribution | |
x=seq(low_x,high_x,length=10000) #create x values | |
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = ncp)*sqrt(N/2) #calculate distribution of d based on t-distribution | |
#Set max Y | |
#y_max<-max(d_dist)+0.5 | |
#create plot | |
#png(file="Fig1.png",width=4000,height=2500, res = 500) | |
par(bg = "white") | |
plot(-10,xlim=c(low_x,high_x), ylim=c(0,y_max), xlab="Difference", ylab='',main=paste("Null- and alternative hypothesis for N = ",N)) | |
#abline(v = seq(low_x,high_x,0.1), h = seq(0,0.5,0.1), col = "lightgray", lty = 1) | |
lines(x,d_dist,col='black',type='l', lwd=2) | |
#add d = 0 line | |
d_dist<-dt(x*sqrt(N/2),df=(N*2)-2, ncp = 0)*sqrt(N/2) | |
lines(x,d_dist,col='darkgrey',type='l', lwd=2) | |
#Line for 0 | |
#abline(v=0) | |
#Line for critical t-value | |
# abline(v=abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N))) | |
# abline(v=-abs(qt(0.05/2, N-2))/sqrt((N/2)*(N/2)/(N))) | |
#Add Type 1 error rate right | |
crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2) | |
y=seq(crit_d,10,length=10000) | |
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon | |
polygon(c(crit_d,y,10),c(0,z,0),col=rgb(1, 0, 0,0.5)) | |
#Add Type 1 error rate left | |
crit_d<--abs(qt(0.05/2, (N*2)-2))/sqrt(N/2) | |
y=seq(-10, crit_d, length=10000) | |
z<-(dt(y*sqrt(N/2),df=(N*2)-2)*sqrt(N/2)) #determine upperbounds polygon | |
polygon(c(y,crit_d,crit_d),c(z,0,0),col=rgb(1, 0, 0,0.5)) | |
#Add Type 2 error rate | |
crit_d<-abs(qt(0.05/2, (N*2)-2))/sqrt(N/2) | |
y=seq(-10,crit_d,length=10000) | |
z<-(dt(y*sqrt(N/2),df=(N*2)-2, ncp=ncp)*sqrt(N/2)) #determine upperbounds polygon | |
polygon(c(y,crit_d,crit_d),c(0,z,0),col=rgb(0, 0, 1,0.5)) | |
#Lines for Cohen's d and Hedges' g | |
# abline(v=d, lwd=2, col=rgb(0, 0, 1,0.5)) #Line at d | |
# abline(v=d*(1-(3/(4*(N-2)-1))), lwd=2, col=rgb(1, 0, 0,0.5)) #Line at Hedges g | |
segments(0.228, 0, 0.228, 3.9, col= 'black', lwd=2) | |
text(0.225, 4.4, paste("Observed mean difference"), cex = 1) | |
#dev.off() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment