분산분석 예제

1. 완전확률화설계

1.1. 반복수 동일한 경우group = c(rep(1,4), rep(2,4),rep(3,4), rep(4,4), rep(5,4))y = c(2.4, 2.7, 3.1, 3.1, 0.7, 1.6, 1.7, 1.8, 2.4, 3.1, 5.4, 6.1, 0.3, 0.3, 2.4, 2.7, 0.5, 0.9, 1.4, 2.0)sol = cbind(group, y)group = as.factor(group)aov1 = aov(y~group)summary(aov1)

## Df Sum Sq Mean Sq F value Pr(>F) ## group 4 27.01 6.752 5.966 0.00444 **## Residuals 15 16.98 1.132 ## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(y~group) # 그룹별상자그림


## ## Call:## aov(formula = y ~ group)## ## Residuals:## Min 1Q Median 3Q Max ## -1.8500 -0.7125 0.1750 0.4625 1.8500 ## ## Coefficients:## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 2.8250 0.5319 5.311 8.72e-05 ***## group2 -1.3750 0.7522 -1.828 0.0875 . ## group3 1.4250 0.7522 1.894 0.0776 . ## group4 -1.4000 0.7522 -1.861 0.0824 . ## group5 -1.6250 0.7522 -2.160 0.0473 * ## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1## ## Residual standard error: 1.064 on 15 degrees of freedom## Multiple R-squared: 0.614, Adjusted R-squared: 0.5111 ## F-statistic: 5.966 on 4 and 15 DF, p-value: 0.004442

tapply(y, group, mean)

## 1 2 3 4 5 ## 2.825 1.450 4.250 1.425 1.200


bartlett.test(y~group) # 등분산성검정

## ## Bartlett test of homogeneity of variances

## ## data: y by group## Bartlett's K-squared = 8.7824, df = 4, p-value = 0.06677

• 분산분석표에서 p- 값 0.004442<0.05 이므로처리의평균이동일하다고할수없음

• 그룹별상자그림, 그룹별평균: 그룹 5<4<2<1<3

• 진단도표: (a) 잔차 vs 추정값 (b) 정규 QQ 도표 (c) 추정값 vs 표준화잔차의제곱근(0~2 범위) (d) 그룹별표준화잔차등에서특별한경향은발견하기어려움

• 등분산성검정: p 값 > 0.05 이므로등분산이라는귀무가설을기각할수없음

tapply(y, group, mean)

## 1 2 3 4 5 ## 2.825 1.450 4.250 1.425 1.200

pairwise.t.test(y, group, p.adjust="none", pool.sd=TRUE) # LSD

## ## Pairwise comparisons using t tests with pooled SD ## ## data: y and group ## ## 1 2 3 4 ## 2 0.0875 - - - ## 3 0.0776 0.0020 - - ## 4 0.0824 0.9739 0.0019 - ## 5 0.0473 0.7442 0.0010 0.7690## ## P value adjustment method: none

pairwise.t.test(y, group, p.adjust="bonferroni", pool.sd=FALSE) # Bonferroni

## ## Pairwise comparisons using t tests with non-pooled SD ## ## data: y and group ## ## 1 2 3 4 ## 2 0.057 - - - ## 3 1.000 0.465 - - ## 4 1.000 1.000 0.463 - ## 5 0.085 1.000 0.349 1.000## ## P value adjustment method: bonferroni

a.tukey = TukeyHSD(aov1, ordered=TRUE) # Tukey 의 HSDa.tukey

## Tukey multiple comparisons of means## 95% family-wise confidence level## factor levels have been ordered## ## Fit: aov(formula = y ~ group)## ## $group## diff lwr upr p adj## 4-5 0.225 -2.0977953 2.547795 0.9980497## 2-5 0.250 -2.0727953 2.572795 0.9970635## 1-5 1.625 -0.6977953 3.947795 0.2466201## 3-5 3.050 0.7272047 5.372795 0.0078279## 2-4 0.025 -2.2977953 2.347795 0.9999997## 1-4 1.400 -0.9227953 3.722795 0.3776700## 3-4 2.825 0.5022047 5.147795 0.0139855## 1-2 1.375 -0.9477953 3.697795 0.3944760## 3-2 2.800 0.4772047 5.122795 0.0149150## 3-1 1.425 -0.8977953 3.747795 0.3612849


1.2. 반복수가 다른 경우 눈동자색깔이갈색, 초록, 파랑인 19 명에대하여분동자깜박거림횟수데이터로광원을


Color = c(rep("Brown", 8), rep("Green", 5), rep("Blue", 6))Color = as.factor(Color)Flicker = c(26.8, 27.9, 23.7, 25, 26.3, 24.8, 25.7, 24.5, 26.4, 24.2, 28.0, 26.9, 29.1, 25.7, 27.2, 29.9, 28.5, 29.4, 28.3)plot(Flicker~Color)

# 그룹별평균표준편차by(Flicker, Color, FUN=function(x) {c(mean(x), sd(x))})

## Color: Blue## [1] 28.166667 1.527962## -------------------------------------------------------- ## Color: Brown## [1] 25.587500 1.365323## -------------------------------------------------------- ## Color: Green## [1] 26.920000 1.843095

# 분산분석aov1 = aov(Flicker~Color)summary(aov1)

## Df Sum Sq Mean Sq F value Pr(>F) ## Color 2 23.00 11.499 4.802 0.0232 *## Residuals 16 38.31 2.394 ## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


## ## Call:## aov(formula = Flicker ~ Color)## ## Residuals:## Min 1Q Median 3Q Max ## -2.7200 -0.8771 0.1125 1.1462 2.3125 ## ## Coefficients:## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 28.1667 0.6317 44.588 < 2e-16 ***## ColorBrown -2.5792 0.8357 -3.086 0.00708 ** ## ColorGreen -1.2467 0.9370 -1.331 0.20200 ## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1## ## Residual standard error: 1.547 on 16 degrees of freedom## Multiple R-squared: 0.3751, Adjusted R-squared: 0.297 ## F-statistic: 4.802 on 2 and 16 DF, p-value: 0.02325

# 추정값predict(aov1)

## 1 2 3 4 5 6 7 8 ## 25.58750 25.58750 25.58750 25.58750 25.58750 25.58750 25.58750 25.58750 ## 9 10 11 12 13 14 15 16 ## 26.92000 26.92000 26.92000 26.92000 26.92000 28.16667 28.16667 28.16667 ## 17 18 19 ## 28.16667 28.16667 28.16667

# 진단plot(aov1)

# 다중비교pairwise.t.test(Flicker, Color, p.adjust="none", pool.sd=TRUE) # LSD

## ## Pairwise comparisons using t tests with pooled SD ## ## data: Flicker and Color ## ## Blue Brown ## Brown 0.0071 - ## Green 0.2020 0.1504## ## P value adjustment method: none

pairwise.t.test(Flicker, Color, p.adjust="bonferroni", pool.sd=FALSE) # Bonferroni

## ## Pairwise comparisons using t tests with non-pooled SD ## ## data: Flicker and Color ## ## Blue Brown## Brown 0.025 - ## Green 0.789 0.621## ## P value adjustment method: bonferroni

a.tukey = TukeyHSD(aov1, ordered=TRUE) # Tukey 의 HSDa.tukey

## Tukey multiple comparisons of means## 95% family-wise confidence level## factor levels have been ordered## ## Fit: aov(formula = Flicker ~ Color)## ## $Color## diff lwr upr p adj## Green-Brown 1.332500 -0.9437168 3.608717 0.3124225## Blue-Brown 2.579167 0.4228360 4.735497 0.0183579## Blue-Green 1.246667 -1.1710626 3.664396 0.3994319


• 분산분석결과 p-값<0.05 이므로눈의색에따라눈동자깜박거림횟수의평균이같지않음

• 진단결과특별한문제는...

• 다중비교결과: Blue 와 Brown 은차이가남

2. 확률화블럭설계3 가지세제에대해박테리아성장의지연효과를비교. 하루에 3 번실험이가능하므로실


trt = c(rep(1,4), rep(2,4), rep(3,4))trt = as.factor(trt)block = c(rep(1:4, 3))block = as.factor(block)y = c(20, 22, 18, 25, 16, 18, 17, 19, 30, 34, 29, 27)

# 시각화plot(y~trt)


# 분산분석fit = aov(y~trt+block)summary(fit)

## Df Sum Sq Mean Sq F value Pr(>F) ## trt 2 329.2 164.58 26.810 0.00102 **## block 3 20.9 6.97 1.136 0.40722 ## Residuals 6 36.8 6.14 ## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# LSDpairwise.t.test(y, trt, p.adjust="none")

## ## Pairwise comparisons using t tests with pooled SD ## ## data: y and trt ## ## 1 2 ## 2 0.06580 - ## 3 0.00087 6.5e-05## ## P value adjustment method: none


• 그림에서처리별차이가있는것으로보임. 블럭별로는차이가유의하지않을듯

• 분산분석표: 처리효과는유의하지만블럭효과는유의하지않음

• 잔차분석결과큰문제발견하지못함

• LSD: 1-3, 2-3 은유의한차이

3. 이원배치 분산분석(two-way ANOVA)y i j k=μ+α i+β j+¿

• 주효과: α 는 I 개의수준, β 는 J 개의수준

• ni j: α 와 β 의수준이 i 와 j 일때관측값의갯수

PVC 생산에미치는요인을알아보기위해 3 명의작업자에게 8 가지다른장비를사용하여PVC 를생산하도록함. 24 가지조합에서두번반복

library(faraway)data(pvc)with(pvc, stripchart(psize~resin, xlab="Particle Size", ylab="Resin Railcar"))

with(pvc, stripchart(psize~operator, xlab="Particle Size", ylab="Operator"))

with(pvc, interaction.plot(operator, resin, psize))

with(pvc, interaction.plot(resin, operator, psize))


# full modelg = lm(psize ~ operator*resin, pvc)anova(g)

## Analysis of Variance Table## ## Response: psize## Df Sum Sq Mean Sq F value Pr(>F) ## operator 2 20.718 10.359 7.0072 0.00401 ** ## resin 7 283.946 40.564 27.4388 5.661e-10 ***## operator:resin 14 14.335 1.024 0.6926 0.75987 ## Residuals 24 35.480 1.478 ## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


plot(fitted(g), residuals(g), xlab="Fitted", ylab="Residuals")

# 잔차값이큰두값제거후g2 = lm(psize ~ operator*resin, pvc[-c(45,46),])anova(g2)

## Analysis of Variance Table## ## Response: psize## Df Sum Sq Mean Sq F value Pr(>F) ## operator 2 24.546 12.273 13.5063 0.000132 ***## resin 7 283.018 40.431 44.4936 6.945e-12 ***## operator:resin 13 11.020 0.848 0.9329 0.537167 ## Residuals 23 20.900 0.909 ## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# main effectsanova(lm(psize~operator+resin, pvc))

## Analysis of Variance Table## ## Response: psize## Df Sum Sq Mean Sq F value Pr(>F) ## operator 2 20.718 10.359 7.902 0.00135 ** ## resin 7 283.946 40.564 30.943 8.111e-14 ***## Residuals 38 49.815 1.311 ## ---## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(aov(psize~operator+resin, pvc))

## Tukey multiple comparisons of means## 95% family-wise confidence level## ## Fit: aov(formula = psize ~ operator + resin, data = pvc)## ## $operator## diff lwr upr p adj## 2-1 -0.26250 -1.249747 0.7247472 0.7943575## 3-1 -1.50625 -2.493497 -0.5190028 0.0018126## 3-2 -1.24375 -2.230997 -0.2565028 0.0106800## ## $resin## diff lwr upr p adj## 2-1 -1.0333333 -3.1522815 1.0856149 0.7683288## 3-1 -5.8000000 -7.9189482 -3.6810518 0.0000000## 4-1 -6.1833333 -8.3022815 -4.0643851 0.0000000## 5-1 -4.8000000 -6.9189482 -2.6810518 0.0000003## 6-1 -5.4500000 -7.5689482 -3.3310518 0.0000000## 7-1 -2.9166667 -5.0356149 -0.7977185 0.0019046## 8-1 -0.1833333 -2.3022815 1.9356149 0.9999924## 3-2 -4.7666667 -6.8856149 -2.6477185 0.0000003

## 4-2 -5.1500000 -7.2689482 -3.0310518 0.0000001## 5-2 -3.7666667 -5.8856149 -1.6477185 0.0000379## 6-2 -4.4166667 -6.5356149 -2.2977185 0.0000018## 7-2 -1.8833333 -4.0022815 0.2356149 0.1127668## 8-2 0.8500000 -1.2689482 2.9689482 0.8984776## 4-3 -0.3833333 -2.5022815 1.7356149 0.9989372## 5-3 1.0000000 -1.1189482 3.1189482 0.7958917## 6-3 0.3500000 -1.7689482 2.4689482 0.9994110## 7-3 2.8833333 0.7643851 5.0022815 0.0022073## 8-3 5.6166667 3.4977185 7.7356149 0.0000000## 5-4 1.3833333 -0.7356149 3.5022815 0.4375901## 6-4 0.7333333 -1.3856149 2.8522815 0.9507745## 7-4 3.2666667 1.1477185 5.3856149 0.0003909## 8-4 6.0000000 3.8810518 8.1189482 0.0000000## 6-5 -0.6500000 -2.7689482 1.4689482 0.9741405## 7-5 1.8833333 -0.2356149 4.0022815 0.1127668## 8-5 4.6166667 2.4977185 6.7356149 0.0000007## 7-6 2.5333333 0.4143851 4.6522815 0.0098978## 8-6 5.2666667 3.1477185 7.3856149 0.0000000## 8-7 2.7333333 0.6143851 4.8522815 0.0042481

