5 T test
5.1 Calculate t-statistic step by step in R
#load dataset
library(reshape2)
data(tips)
#create tip percent variable
tips$percent=tips$tip/tips$total_bill
#Split dataset for ease
splits<-split(tips, tips$sex)
#Save data sets
women<-splits[[1]]
men<-splits[[2]]
#variance by group sample size
var_women<-var(women$percent)/length(women$percent)
var_men<-var(men$percent)/length(men$percent)
#Sum
total_variance<-var_women+var_men
#Squre Root
sqrt_variance<-sqrt(total_variance)
#Group means by pooled variances
(mean(women$percent)-mean(men$percent))/sqrt_variance
## [1] 1.143277
#T.test
t.test(percent~sex, data=tips)
##
## Welch Two Sample t-test
##
## data: percent by sex
## t = 1.1433, df = 206.76, p-value = 0.2542
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.006404119 0.024084498
## sample estimates:
## mean in group Female mean in group Male
## 0.1664907 0.1576505
5.1.1
Your data set is probably not the best illustrative example in terms of normality assumption… but anyway, here is some quick R code to reproduce some of the calculation of t.test().
5.1.1.1 Equal variances
head(tips)
## total_bill tip sex smoker day time size percent
## 1 16.99 1.01 Female No Sun Dinner 2 0.05944673
## 2 10.34 1.66 Male No Sun Dinner 3 0.16054159
## 3 21.01 3.50 Male No Sun Dinner 3 0.16658734
## 4 23.68 3.31 Male No Sun Dinner 2 0.13978041
## 5 24.59 3.61 Female No Sun Dinner 4 0.14680765
## 6 25.29 4.71 Male No Sun Dinner 4 0.18623962
t.test(percent ~ sex, data=tips, var.equal=TRUE)
##
## Two Sample t-test
##
## data: percent by sex
## t = 1.0834, df = 242, p-value = 0.2797
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.007232898 0.024913277
## sample estimates:
## mean in group Female mean in group Male
## 0.1664907 0.1576505
x1 <- tips$percent[tips$sex == "Female"]
x2 <- tips$percent[tips$sex == "Male"]
n1 <- length(x1)
n2 <- length(x2)
var.pooled <- weighted.mean(x=c(var(x1), var(x2)), w=c(n1 - 1, n2 - 1))
t <- (mean(x1) - mean(x2)) / sqrt(var.pooled / n1 + var.pooled / n2)
t
## [1] 1.083397
df <- n1 + n2 - 2
df
## [1] 242
5.1.1.2 Unequal variance
t.test(percent ~ sex, data=tips, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: percent by sex
## t = 1.1433, df = 206.76, p-value = 0.2542
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.006404119 0.024084498
## sample estimates:
## mean in group Female mean in group Male
## 0.1664907 0.1576505
# Welch Two Sample t-test
#
# data: percent by sex
# t = 1.1433, df = 206.759, p-value = 0.2542
# alternative hypothesis: true difference in means is not equal to 0
# 95 percent confidence interval:
# -0.006404119 0.024084498
# sample estimates:
# mean in group Female mean in group Male
# 0.1664907 0.1576505
x1 <- tips$percent[tips$sex == "Female"]
x2 <- tips$percent[tips$sex == "Male"]
n1 <- length(x1)
n2 <- length(x2)
t <- (mean(x1) - mean(x2)) / sqrt(var(x1) / n1 + var(x2) / n2)
t
## [1] 1.143277
# [1] 1.143277
df.num <- (var(x1) / n1 + var(x2) / n2)^2
df.denom <- var(x1)^2 / (n1^2 * (n1 - 1)) + var(x2)^2 / (n2^2 * (n2 - 1))
df <- df.num / df.denom
df
## [1] 206.7587
# [1] 206.7587
5.2 Calculate the standard deviation in R
\(variance = \frac{sum((x-mean(x))^2)}{(length(x)-1)}\)
a <- c(179,160,136,227)
sd(a)
## [1] 38.57892
sqrt(sum((a-mean(a))^2/(length(a)-1)))
## [1] 38.57892
5.3 Reference
Calculate t-statistic step by step in R: https://stats.stackexchange.com/questions/141593/calculate-t-statistic-step-by-step-in-r