5 T test

5.1 Calculate t-statistic step by step in R

#load dataset
library(reshape2)
data(tips)
#create tip percent variable
tips$percent=tips$tip/tips$total_bill
#Split dataset for ease
splits<-split(tips, tips$sex)
#Save data sets
women<-splits[[1]] 
men<-splits[[2]]
#variance by group sample size
var_women<-var(women$percent)/length(women$percent)
var_men<-var(men$percent)/length(men$percent)
#Sum
total_variance<-var_women+var_men
#Squre Root
sqrt_variance<-sqrt(total_variance)

#Group means by pooled variances
(mean(women$percent)-mean(men$percent))/sqrt_variance
## [1] 1.143277
#T.test
t.test(percent~sex, data=tips)
## 
##  Welch Two Sample t-test
## 
## data:  percent by sex
## t = 1.1433, df = 206.76, p-value = 0.2542
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.006404119  0.024084498
## sample estimates:
## mean in group Female   mean in group Male 
##            0.1664907            0.1576505

5.1.1

Your data set is probably not the best illustrative example in terms of normality assumption… but anyway, here is some quick R code to reproduce some of the calculation of t.test().

5.1.1.1 Equal variances

head(tips)
##   total_bill  tip    sex smoker day   time size    percent
## 1      16.99 1.01 Female     No Sun Dinner    2 0.05944673
## 2      10.34 1.66   Male     No Sun Dinner    3 0.16054159
## 3      21.01 3.50   Male     No Sun Dinner    3 0.16658734
## 4      23.68 3.31   Male     No Sun Dinner    2 0.13978041
## 5      24.59 3.61 Female     No Sun Dinner    4 0.14680765
## 6      25.29 4.71   Male     No Sun Dinner    4 0.18623962
t.test(percent ~ sex, data=tips, var.equal=TRUE)
## 
##  Two Sample t-test
## 
## data:  percent by sex
## t = 1.0834, df = 242, p-value = 0.2797
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007232898  0.024913277
## sample estimates:
## mean in group Female   mean in group Male 
##            0.1664907            0.1576505
x1 <- tips$percent[tips$sex == "Female"]
x2 <- tips$percent[tips$sex == "Male"]
n1 <- length(x1)
n2 <- length(x2)

var.pooled <- weighted.mean(x=c(var(x1), var(x2)), w=c(n1 - 1, n2 - 1))

t <- (mean(x1) - mean(x2)) / sqrt(var.pooled / n1 + var.pooled / n2)
t
## [1] 1.083397
df <- n1 + n2 - 2
df
## [1] 242

5.1.1.2 Unequal variance

t.test(percent ~ sex, data=tips, var.equal=FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  percent by sex
## t = 1.1433, df = 206.76, p-value = 0.2542
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.006404119  0.024084498
## sample estimates:
## mean in group Female   mean in group Male 
##            0.1664907            0.1576505
# Welch Two Sample t-test
# 
# data:  percent by sex
# t = 1.1433, df = 206.759, p-value = 0.2542
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
#   -0.006404119  0.024084498
# sample estimates:
#   mean in group Female   mean in group Male 
# 0.1664907            0.1576505 

x1 <- tips$percent[tips$sex == "Female"]
x2 <- tips$percent[tips$sex == "Male"]
n1 <- length(x1)
n2 <- length(x2)

t <- (mean(x1) - mean(x2)) / sqrt(var(x1) / n1 + var(x2) / n2)
t
## [1] 1.143277
# [1] 1.143277
df.num <- (var(x1) / n1 + var(x2) / n2)^2
df.denom <- var(x1)^2 / (n1^2 * (n1 - 1)) + var(x2)^2 / (n2^2 * (n2 - 1))
df <- df.num / df.denom
df
## [1] 206.7587
# [1] 206.7587

5.2 Calculate the standard deviation in R

\(variance = \frac{sum((x-mean(x))^2)}{(length(x)-1)}\)

a <- c(179,160,136,227)
sd(a)
## [1] 38.57892
sqrt(sum((a-mean(a))^2/(length(a)-1)))
## [1] 38.57892