x<-rnorm(10, mean =0.25, sd =1)y<-rnorm(10, mean =1, sd =0.5)# 1群のt検定(平均値が0と有意に異なるかを検定する)t.test(x)## ## One Sample t-test## ## data: x## t = 1.5976, df = 9, p-value = 0.1446## alternative hypothesis: true mean is not equal to 0## 95 percent confidence interval:## -0.2533217 1.4711697## sample estimates:## mean of x ## 0.608924# 1群のt検定(平均値が4と有意に異なるかを検定する)t.test(x, mu =4)## ## One Sample t-test## ## data: x## t = -8.8967, df = 9, p-value = 9.383e-06## alternative hypothesis: true mean is not equal to 4## 95 percent confidence interval:## -0.2533217 1.4711697## sample estimates:## mean of x ## 0.608924# 2群のt検定(Welchのt検定、等分散性の仮定がなくても計算できる)t.test(x, y)## ## Welch Two Sample t-test## ## data: x and y## t = -0.52991, df = 10.419, p-value = 0.6073## alternative hypothesis: true difference in means is not equal to 0## 95 percent confidence interval:## -1.0873669 0.6676961## sample estimates:## mean of x mean of y ## 0.6089240 0.8187593
# p値の計算(t分布で1.5975513となる場合の累積分布関数から計算する)(1-pt(1.5975513, df =9))*2## [1] 0.1446073t_temp<-data.frame(x =seq(-5, 5, by=0.01), tvalue =dt(seq(-5, 5, by =0.01), df =9))# 自由度9のt分布t_temp<-t_temp|>mutate(col_t =if_else(x>result_t$statistic, tvalue, NA))t_temp|>ggplot()+geom_area(aes(x =x, y =tvalue), color =NA, fill ="#00BFC4", data =t_temp)+geom_area(aes(x =x, y =tvalue), color =NA, fill ="#F8766D", data =t_temp|>filter(is.na(col_t)))+geom_vline(xintercept =result_t$statistic)
t_temp<-data.frame(x=seq(-5, 5, by =0.01), tvalue =dt(seq(-5, 5, by =0.01), df =5))# 自由度5のt分布# 両側5%(片側2.5%)t_temp<-t_temp|>mutate(col_95 =if_else(x<qt(0.025, df =5)|x>qt(0.975, df =5), tvalue, NA))t_temp|>ggplot()+geom_area(aes(x =x, y =tvalue), color =NA, fill ="#00BFC4", data =t_temp)+geom_area(aes(x =x, y =tvalue), color =NA, fill ="#F8766D", data =t_temp|>filter(is.na(col_95)))
# 下側検定t.test(x, alternative ="less")## ## One Sample t-test## ## data: x## t = 1.5976, df = 9, p-value = 0.9277## alternative hypothesis: true mean is less than 0## 95 percent confidence interval:## -Inf 1.307635## sample estimates:## mean of x ## 0.608924# 上側検定t.test(x, alternative ="greater")## ## One Sample t-test## ## data: x## t = 1.5976, df = 9, p-value = 0.0723## alternative hypothesis: true mean is greater than 0## 95 percent confidence interval:## -0.08978688 Inf## sample estimates:## mean of x ## 0.608924
f<-rep(c("a", "b"), c(5, 5))|>factor()# fはaとbからなる因子d<-data.frame(x, f)d# 数値と因子のデータフレーム(aとbの2群)## x f## 1 1.51295428 a## 2 -0.07623336 a## 3 1.57979926 a## 4 1.52242932 a## 5 0.66464143 a## 6 -1.28995004 b## 7 -0.67856703 b## 8 -0.04472045 b## 9 0.24423283 b## 10 2.65465339 b# データフレームdのx列を目的変数、fを説明変数としてt検定t.test(x~f, data =d)## ## Welch Two Sample t-test## ## data: x by f## t = 1.1534, df = 5.7829, p-value = 0.2942## alternative hypothesis: true difference in means between group a and group b is not equal to 0## 95 percent confidence interval:## -0.985218 2.712395## sample estimates:## mean in group a mean in group b ## 1.0407182 0.1771297
power.t.test(n =10, delta =4, sd =5, sig.level =0.05)# powerを計算## ## Two-sample t test power calculation ## ## n = 10## delta = 4## sd = 5## sig.level = 0.05## power = 0.3949428## alternative = two.sided## ## NOTE: n is number in *each* grouppower.t.test(power =0.8, delta =4, sd =5, sig.level =0.05)# 例数を計算## ## Two-sample t test power calculation ## ## n = 25.52463## delta = 4## sd = 5## sig.level = 0.05## power = 0.8## alternative = two.sided## ## NOTE: n is number in *each* group
t_temp<-data.frame( x=seq(-7, 7, by =0.01), tvalue_h0 =dt(seq(-7, 7, by =0.01), df =15), tvalue_h1 =c(rep(0, 400), dt(seq(-5, 5, by =0.01), df =15)))# alphaとbetaの場所を指定t_temp<-t_temp|>mutate( alpha =if_else(x<1.75305, tvalue_h0, NA), beta =if_else(x>1.75305, tvalue_h1, NA))t_temp|>ggplot()+geom_line(aes(x =x, y =tvalue_h0), data =t_temp)+geom_line(aes(x =x, y =tvalue_h1), data =t_temp)+geom_area(aes(x =x, y =tvalue_h0), color =NA, fill ="#F8766D", alpha =0.5, data =t_temp|>filter(is.na(alpha)))+geom_area(aes(x =x, y =tvalue_h1), color =NA, fill ="#00BFC4", alpha =0.3, data =t_temp|>filter(is.na(beta)))+geom_vline(xintercept =1.75305)+labs(caption ="左の分布が帰無仮説での分布、右の分布がデータの分布、赤がα、青がβ")
set.seed(0)# 因子のデータフレームを作成d<-data.frame( x =rep(c("A", "B"), c(20, 60))|>factor()|>sample(size =25), y =rep(c("C", "D"), c(40, 40))|>factor()|>sample(size =25))head(d)## x y## 1 A C## 2 B C## 3 B D## 4 A D## 5 B D## 6 B Ctable(d)# これが検定の対象となる## y## x C D## A 4 3## B 10 8# 2x2の行列以外の場合には、オッズ比が計算できないので信頼区間は計算されないfisher.test(x =d$x, y =d$y)## ## Fisher's Exact Test for Count Data## ## data: d$x and d$y## p-value = 1## alternative hypothesis: true odds ratio is not equal to 1## 95 percent confidence interval:## 0.1333234 9.4910727## sample estimates:## odds ratio ## 1.06395chisq.test(x =d$x, y =d$y)## Warning in chisq.test(x = d$x, y = d$y): Chi-squared approximation may be## incorrect## ## Pearson's Chi-squared test with Yates' continuity correction## ## data: d$x and d$y## X-squared = 0, df = 1, p-value = 1
29.5 分散分析
正規分布する2群の平均値の差を評価する場合には、t検定を用います。t検定は2群の差を調べる際には用いることができますが、3群以上の平均値の差を検定することはできません。このような、3群以上の平均値の差を検定したい場合には、分散分析(Analysis of variance、ANOVA)を用います。
aov関数の返り値は、平方和(Sum of Squares)、自由度(Deg. of Freedom)、残差の標準誤差(Residual standard error)の3つで、F検定統計量もp値も表示されません。検定統計量やp値を表示させるためには、aov関数の返り値をsummary関数の引数に取る必要があります。aov関数の返り値をsummary関数の引数に取って実行すると、分散分析表(自由度、平方和、F検定統計量、p値を示した表)が表示されます。分散分析表のうち、F valueがF検定統計量、Pr(>F)がF分布で検定統計量が表の値以上の値を取る確率、つまりp値となります。
aov関数で一元分散分析
aov_iris<-aov(Sepal.Length~Species, data =iris)aov_iris# 単に返り値を表示しても、p値は表示されない## Call:## aov(formula = Sepal.Length ~ Species, data = iris)## ## Terms:## Species Residuals## Sum of Squares 63.21213 38.95620## Deg. of Freedom 2 147## ## Residual standard error: 0.5147894## Estimated effects may be unbalanced# summary関数の引数に取ると、p値が表示されるsummary(aov_iris)## Df Sum Sq Mean Sq F value Pr(>F) ## Species 2 63.21 31.606 119.3 <2e-16 ***## Residuals 147 38.96 0.265 ## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ToothGrowth|>ggplot(aes(x =dose, y =len, color =supp))+geom_point(size =2)+geom_abline(intercept =11.55, slope =7.811, color ="red")+geom_abline(intercept =3.295, slope =11.716, color ="blue")+labs( title ="共分散分析のデータ例:ToothGrowthデータをグラフで表示", x ="投与したビタミンCの量", y ="モルモットの歯の長さ", color ="投与方法", caption ="OJ:オレンジジュースに加える、VC:ビタミンCそのまま")
# formulaは使えないpairwise.t.test(iris$Sepal.Length, iris$Species, p.adjust.method ="holm")## ## Pairwise comparisons using t tests with pooled SD ## ## data: iris$Sepal.Length and iris$Species ## ## setosa versicolor## versicolor 1.8e-15 - ## virginica < 2e-16 2.8e-09 ## ## P value adjustment method: holm
p値調整の手法ごとのp値の値を比較したグラフを以下に示します。
gatekeeping法
多重比較の方法には、この他にgatekeeping法と呼ばれるものもあります。gatekeeping法は研究デザインとの関連性が高い手法で、基礎研究ではまずお目にかからないものです。近年の第III相治験のような、ガチガチに研究デザインを固めてから行う大規模研究以外ではまず使うことはないでしょう。Rにはこのgatekeeping法の計算に関連したライブラリとしてMedianaパッケージ (Paux and Dmitrienko. 2019)や gMCPパッケージ (Bretz et al. 2011)があります。
x<-rlnorm(10, meanlog =0, sdlog =1)y<-rlnorm(10, meanlog =1, sdlog =0.5)f<-rep(c("a", "b"), c(5, 5))|>factor()# 2つのベクターを引数に取るwilcox.test(x, y)## ## Wilcoxon rank sum exact test## ## data: x and y## W = 23, p-value = 0.04326## alternative hypothesis: true location shift is not equal to 0# ベクターと因子を引数に取るwilcox.test(x~f)## ## Wilcoxon rank sum exact test## ## data: x by f## W = 22, p-value = 0.05556## alternative hypothesis: true location shift is not equal to 0
wb<-warpbreaks|>group_by(wool, tension)|>summarise(breaks =mean(breaks))## `summarise()` has grouped output by 'wool'. You can override using the## `.groups` argument.wb# 因子が2つあるデータフレーム## # A tibble: 6 × 3## # Groups: wool [2]## wool tension breaks## <fct> <fct> <dbl>## 1 A L 44.6## 2 A M 24 ## 3 A H 24.6## 4 B L 28.2## 5 B M 28.8## 6 B H 18.8wb|>pivot_wider(names_from =wool, values_from ="breaks")# 2因子のブロックデザインデータ## # A tibble: 3 × 3## tension A B## <fct> <dbl> <dbl>## 1 L 44.6 28.2## 2 M 24 28.8## 3 H 24.6 18.8friedman.test(breaks~wool|tension, data =wb)# woolの群間差## ## Friedman rank sum test## ## data: breaks and wool and tension## Friedman chi-squared = 0.33333, df = 1, p-value = 0.5637friedman.test(breaks~tension|wool, data =wb)# tensionの群間差## ## Friedman rank sum test## ## data: breaks and tension and wool## Friedman chi-squared = 1, df = 2, p-value = 0.6065
# xは成功回数、nは試行回数、pは成功確率(pのデフォルト値は0.5)binom.test(x =2, n =10, p =0.5)## ## Exact binomial test## ## data: 2 and 10## number of successes = 2, number of trials = 10, p-value = 0.1094## alternative hypothesis: true probability of success is not equal to 0.5## 95 percent confidence interval:## 0.02521073 0.55609546## sample estimates:## probability of success ## 0.2
binom.test(x =4, n =20, p =0.5, conf.level =0.9)|>_$conf.int## [1] 0.07135388 0.40102812## attr(,"conf.level")## [1] 0.9
binom.test関数はt.test関数と同様に片側、両側検定とすることもできます。
binom.test:片側検定
# 下側検定binom.test(x =5, n =20, p =0.5, alternative =c("less"))## ## Exact binomial test## ## data: 5 and 20## number of successes = 5, number of trials = 20, p-value = 0.02069## alternative hypothesis: true probability of success is less than 0.5## 95 percent confidence interval:## 0.0000000 0.4555824## sample estimates:## probability of success ## 0.25# 上側検定binom.test(x =15, n =20, p =0.5, alternative =c("greater"))## ## Exact binomial test## ## data: 15 and 20## number of successes = 15, number of trials = 20, p-value = 0.02069## alternative hypothesis: true probability of success is greater than 0.5## 95 percent confidence interval:## 0.5444176 1.0000000## sample estimates:## probability of success ## 0.75
# 正規分布同士の分散の均一性の検定(異なっていれば帰無仮説を棄却)var.test(rnorm(10), rnorm(10))## ## F test to compare two variances## ## data: rnorm(10) and rnorm(10)## F = 0.69878, num df = 9, denom df = 9, p-value = 0.602## alternative hypothesis: true ratio of variances is not equal to 1## 95 percent confidence interval:## 0.1735671 2.8132844## sample estimates:## ratio of variances ## 0.69878# 正規分布と一様分布の分散の均一性の検定var.test(rnorm(10), runif(10, min =-1, max =1))## ## F test to compare two variances## ## data: rnorm(10) and runif(10, min = -1, max = 1)## F = 4.9289, num df = 9, denom df = 9, p-value = 0.02628## alternative hypothesis: true ratio of variances is not equal to 1## 95 percent confidence interval:## 1.224275 19.843813## sample estimates:## ratio of variances ## 4.928922
pacman::p_load(outliers)# 正規乱数x<-c(rnorm(10), 10)chisq.out.test(x)## ## chi-squared test for outlier## ## data: x## X-squared = 7.9438, p-value = 0.004825## alternative hypothesis: highest value 10 is an outliergrubbs.test(x)## ## Grubbs test for one outlier## ## data: x## G = 2.81847, U = 0.12618, p-value = 0.0001353## alternative hypothesis: highest value 10 is an outlierdixon.test(x)## ## Dixon test for outliers## ## data: x## Q = 0.81998, p-value < 2.2e-16## alternative hypothesis: highest value 10 is an outlier
Altman, Naomi, and Martin Krzywinski. 2016. “Points of Significance: P Values and the Search for Significance.”Nature Methods 14 (1): 3–4. https://doi.org/10.1038/nmeth.4120.
Benjamin, Daniel J, James O Berger, Magnus Johannesson, Brian A Nosek, E-J Wagenmakers, Richard Berk, Kenneth A Bollen, et al. 2018. “Redefine Statistical Significance.”Nature Human Behaviour 2 (1): 6–10.
Bretz, Frank, Martin Posch, Ekkehard Glimm, Florian Klinglmueller, Willi Maurer, and Kornelius Rohmeyer. 2011. “Graphical Approaches for Multiple Comparison Procedures Using Weighted Bonferroni, Simes or Parametric Tests.”Biometrical Journal 53 (6): 894–913. https://doi.org/10.1002/bimj.201000239.