x <-rnorm(10, mean =0.25, sd =1)y <-rnorm(10, mean =1, sd =0.5)# 1群のt検定(平均値が0と有意に異なるかを検定する)t.test(x)## ## One Sample t-test## ## data: x## t = 1.5976, df = 9, p-value = 0.1446## alternative hypothesis: true mean is not equal to 0## 95 percent confidence interval:## -0.2533217 1.4711697## sample estimates:## mean of x ## 0.608924# 1群のt検定(平均値が4と有意に異なるかを検定する)t.test(x, mu =4)## ## One Sample t-test## ## data: x## t = -8.8967, df = 9, p-value = 9.383e-06## alternative hypothesis: true mean is not equal to 4## 95 percent confidence interval:## -0.2533217 1.4711697## sample estimates:## mean of x ## 0.608924# 2群のt検定(Welchのt検定、等分散性の仮定がなくても計算できる)t.test(x, y)## ## Welch Two Sample t-test## ## data: x and y## t = -0.52991, df = 10.419, p-value = 0.6073## alternative hypothesis: true difference in means is not equal to 0## 95 percent confidence interval:## -1.0873669 0.6676961## sample estimates:## mean of x mean of y ## 0.6089240 0.8187593
result_t <-t.test(x)result_t # t検定の結果## ## One Sample t-test## ## data: x## t = 1.5976, df = 9, p-value = 0.1446## alternative hypothesis: true mean is not equal to 0## 95 percent confidence interval:## -0.2533217 1.4711697## sample estimates:## mean of x ## 0.608924result_t$statistic # t統計量## t ## 1.597551result_t$p.value # p値## [1] 0.1446073result_t$estimate # 平均値## mean of x ## 0.608924result_t$conf.int # 差の95%信頼区間(0との差なので、xの95%信頼区間)## [1] -0.2533217 1.4711697## attr(,"conf.level")## [1] 0.95
# 下側検定t.test(x, alternative ="less")## ## One Sample t-test## ## data: x## t = 1.5976, df = 9, p-value = 0.9277## alternative hypothesis: true mean is less than 0## 95 percent confidence interval:## -Inf 1.307635## sample estimates:## mean of x ## 0.608924# 上側検定t.test(x, alternative ="greater")## ## One Sample t-test## ## data: x## t = 1.5976, df = 9, p-value = 0.0723## alternative hypothesis: true mean is greater than 0## 95 percent confidence interval:## -0.08978688 Inf## sample estimates:## mean of x ## 0.608924
f <-rep(c("a", "b"), c(5, 5)) |>factor() # fはaとbからなる因子d <-data.frame(x, f)d # 数値と因子のデータフレーム(aとbの2群)## x f## 1 1.51295428 a## 2 -0.07623336 a## 3 1.57979926 a## 4 1.52242932 a## 5 0.66464143 a## 6 -1.28995004 b## 7 -0.67856703 b## 8 -0.04472045 b## 9 0.24423283 b## 10 2.65465339 b# データフレームdのx列を目的変数、fを説明変数としてt検定t.test(x ~ f, data = d)## ## Welch Two Sample t-test## ## data: x by f## t = 1.1534, df = 5.7829, p-value = 0.2942## alternative hypothesis: true difference in means between group a and group b is not equal to 0## 95 percent confidence interval:## -0.985218 2.712395## sample estimates:## mean in group a mean in group b ## 1.0407182 0.1771297
power.t.test(n =10, delta =4, sd =5, sig.level =0.05) # powerを計算## ## Two-sample t test power calculation ## ## n = 10## delta = 4## sd = 5## sig.level = 0.05## power = 0.3949428## alternative = two.sided## ## NOTE: n is number in *each* grouppower.t.test(power =0.8, delta =4, sd =5, sig.level =0.05) # 例数を計算## ## Two-sample t test power calculation ## ## n = 25.52463## delta = 4## sd = 5## sig.level = 0.05## power = 0.8## alternative = two.sided## ## NOTE: n is number in *each* group
set.seed(0)# 因子のデータフレームを作成d <-data.frame(x =rep(c("A", "B"), c(20, 60)) |>factor() |>sample(size =25),y =rep(c("C", "D"), c(40, 40)) |>factor() |>sample(size =25))head(d)## x y## 1 A C## 2 B C## 3 B D## 4 A D## 5 B D## 6 B Ctable(d) # これが検定の対象となる## y## x C D## A 4 3## B 10 8# 2x2の行列以外の場合には、オッズ比が計算できないので信頼区間は計算されないfisher.test(x = d$x, y = d$y)## ## Fisher's Exact Test for Count Data## ## data: d$x and d$y## p-value = 1## alternative hypothesis: true odds ratio is not equal to 1## 95 percent confidence interval:## 0.1333234 9.4910727## sample estimates:## odds ratio ## 1.06395chisq.test(x = d$x, y = d$y)## Warning in chisq.test(x = d$x, y = d$y): Chi-squared approximation may be## incorrect## ## Pearson's Chi-squared test with Yates' continuity correction## ## data: d$x and d$y## X-squared = 0, df = 1, p-value = 1
25.5 分散分析
正規分布する2群の平均値の差を評価する場合には、t検定を用います。t検定は2群の差を調べる際には用いることができますが、3群以上の平均値の差を検定することはできません。このような、3群以上の平均値の差を検定したい場合には、分散分析(Analysis of variance、ANOVA)を用います。
aov関数の返り値は、平方和(Sum of Squares)、自由度(Deg. of Freedom)、残差の標準誤差(Residual standard error)の3つで、F検定統計量もp値も表示されません。検定統計量やp値を表示させるためには、aov関数の返り値をsummary関数の引数に取る必要があります。aov関数の返り値をsummary関数の引数に取って実行すると、分散分析表(自由度、平方和、F検定統計量、p値を示した表)が表示されます。分散分析表のうち、F valueがF検定統計量、Pr(>F)がF分布で検定統計量が表の値以上の値を取る確率、つまりp値となります。
aov関数で一元分散分析
aov_iris <-aov(Sepal.Length ~ Species, data = iris)aov_iris # 単に返り値を表示しても、p値は表示されない## Call:## aov(formula = Sepal.Length ~ Species, data = iris)## ## Terms:## Species Residuals## Sum of Squares 63.21213 38.95620## Deg. of Freedom 2 147## ## Residual standard error: 0.5147894## Estimated effects may be unbalanced# summary関数の引数に取ると、p値が表示されるsummary(aov_iris)## Df Sum Sq Mean Sq F value Pr(>F) ## Species 2 63.21 31.606 119.3 <2e-16 ***## Residuals 147 38.96 0.265 ## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# formulaは使えないpairwise.t.test(iris$Sepal.Length, iris$Species, p.adjust.method ="holm")## ## Pairwise comparisons using t tests with pooled SD ## ## data: iris$Sepal.Length and iris$Species ## ## setosa versicolor## versicolor 1.8e-15 - ## virginica < 2e-16 2.8e-09 ## ## P value adjustment method: holm
p値調整の手法ごとのp値の値を比較したグラフを以下に示します。
gatekeeping法
多重比較の方法には、この他にgatekeeping法と呼ばれるものもあります。gatekeeping法は研究デザインとの関連性が高い手法で、基礎研究ではまずお目にかからないものです。近年の第III相治験のような、ガチガチに研究デザインを固めてから行う大規模研究以外ではまず使うことはないでしょう。Rにはこのgatekeeping法の計算に関連したライブラリとしてMedianaパッケージ (Paux and Dmitrienko. 2019)や gMCPパッケージ (Bretz et al. 2011)があります。
x <-rlnorm(10, meanlog =0, sdlog =1)y <-rlnorm(10, meanlog =1, sdlog =0.5)f <-rep(c("a", "b"), c(5, 5)) |>factor()# 2つのベクターを引数に取るwilcox.test(x, y)## ## Wilcoxon rank sum exact test## ## data: x and y## W = 23, p-value = 0.04326## alternative hypothesis: true location shift is not equal to 0# ベクターと因子を引数に取るwilcox.test(x ~ f)## ## Wilcoxon rank sum exact test## ## data: x by f## W = 22, p-value = 0.05556## alternative hypothesis: true location shift is not equal to 0
# 正規分布同士の分散の均一性の検定(異なっていれば帰無仮説を棄却)var.test(rnorm(10), rnorm(10))## ## F test to compare two variances## ## data: rnorm(10) and rnorm(10)## F = 0.69878, num df = 9, denom df = 9, p-value = 0.602## alternative hypothesis: true ratio of variances is not equal to 1## 95 percent confidence interval:## 0.1735671 2.8132844## sample estimates:## ratio of variances ## 0.69878# 正規分布と一様分布の分散の均一性の検定var.test(rnorm(10), runif(10))## ## F test to compare two variances## ## data: rnorm(10) and runif(10)## F = 19.716, num df = 9, denom df = 9, p-value = 0.0001373## alternative hypothesis: true ratio of variances is not equal to 1## 95 percent confidence interval:## 4.897098 79.375252## sample estimates:## ratio of variances ## 19.71569
pacman::p_load(outliers)# 正規乱数x <-c(rnorm(10), 10)chisq.out.test(x)## ## chi-squared test for outlier## ## data: x## X-squared = 7.9438, p-value = 0.004825## alternative hypothesis: highest value 10 is an outliergrubbs.test(x)## ## Grubbs test for one outlier## ## data: x## G = 2.81847, U = 0.12618, p-value = 0.0001353## alternative hypothesis: highest value 10 is an outlierdixon.test(x)## ## Dixon test for outliers## ## data: x## Q = 0.81998, p-value < 2.2e-16## alternative hypothesis: highest value 10 is an outlier
図1:データと対応する検定手法図1:第一の過誤と第二の過誤図1:交互作用の有無
Altman, Naomi, and Martin Krzywinski. 2016. “Points of Significance: P Values and the Search for Significance.”Nature Methods 14 (1): 3–4. https://doi.org/10.1038/nmeth.4120.
Benjamin, Daniel J, James O Berger, Magnus Johannesson, Brian A Nosek, E-J Wagenmakers, Richard Berk, Kenneth A Bollen, et al. 2018. “Redefine Statistical Significance.”Nature Human Behaviour 2 (1): 6–10.
Bretz, Frank, Martin Posch, Ekkehard Glimm, Florian Klinglmueller, Willi Maurer, and Kornelius Rohmeyer. 2011. “Graphical Approaches for Multiple Comparison Procedures Using Weighted Bonferroni, Simes or Parametric Tests.”Biometrical Journal 53 (6): 894–913. https://doi.org/10.1002/bimj.201000239.