R version 2.7.2 (2008-08-25) Copyright (C) 2008 The R Foundation for Statistical Computing ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # r --vanilla > # ========================================================================== > # ===== java.util data ===================================================== > # ========================================================================== > > javaUtilData <- read.table("v11data-javaUtil-edit.txt", header=TRUE, sep=" ") > names(javaUtilData) [1] "SourceFile" "SLOC" "N10t" "RC10t" "Coverage10" [6] "N11t" "RC11t" "Coverage11" > > # ===== Sums of columns ==================================================== > > sum(javaUtilData$SLOC) [1] 3512 > sum(javaUtilData$N10t) [1] 1519 > sum(javaUtilData$RC10t) [1] 324 > sum(javaUtilData$Coverage10) [1] 2880 > sum(javaUtilData$N11t) [1] 2034 > sum(javaUtilData$RC11t) [1] 322 > sum(javaUtilData$Coverage11) [1] 2883 > > # ===== Coverage ratios ==================================================== > > sum(javaUtilData$Coverage10) / sum(javaUtilData$SLOC) [1] 0.8200456 > sum(javaUtilData$Coverage11) / sum(javaUtilData$SLOC) [1] 0.8208998 > > javaUtilData$Coverage10 / javaUtilData$SLOC [1] 0.93333333 0.02928870 0.96388889 0.95652174 0.91549296 0.84948980 [7] 0.93203883 1.00000000 0.99118943 0.76354680 0.40963855 1.00000000 [13] 0.93594306 0.95161290 0.97500000 0.88757396 > javaUtilData$Coverage11 / javaUtilData$SLOC [1] 0.9333333 0.0502092 0.9638889 0.9565217 0.9267606 0.8545918 0.9320388 [8] 1.0000000 0.9911894 0.7241379 0.4096386 1.0000000 0.9341637 0.9516129 [15] 0.9800000 0.8875740 > > # ===== Means of columns =================================================== > > mean(javaUtilData$SLOC) [1] 219.5 > mean(javaUtilData$N10t) [1] 94.9375 > mean(javaUtilData$RC10t) [1] 20.25 > mean(javaUtilData$Coverage10) [1] 180 > mean(javaUtilData$N11t) [1] 127.125 > mean(javaUtilData$RC11t) [1] 20.125 > mean(javaUtilData$Coverage11) [1] 180.1875 > > # ===== Normality tests on column differences ============================== > # ===== p value is probability that data is normal ========================= > > shapiro.test( javaUtilData$N10t - + javaUtilData$N11t) Shapiro-Wilk normality test data: javaUtilData$N10t - javaUtilData$N11t W = 0.9162, p-value = 0.1463 > shapiro.test( javaUtilData$RC10t - + javaUtilData$RC11t) Shapiro-Wilk normality test data: javaUtilData$RC10t - javaUtilData$RC11t W = 0.7807, p-value = 0.001527 > shapiro.test( javaUtilData$Coverage10 - + javaUtilData$Coverage11) Shapiro-Wilk normality test data: javaUtilData$Coverage10 - javaUtilData$Coverage11 W = 0.7199, p-value = 0.0002822 > > # ===== t tests on columns ================================================= > # ===== p value is probabililty that column difference has mean 0 ========== > > t.test( javaUtilData$N10t, + javaUtilData$N11t, + alternative="two.sided", paired=TRUE, conf.level=0.95) Paired t-test data: javaUtilData$N10t and javaUtilData$N11t t = -1.874, df = 15, p-value = 0.08054 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -68.796461 4.421461 sample estimates: mean of the differences -32.1875 > t.test( javaUtilData$RC10t, + javaUtilData$RC11t, + alternative="two.sided", paired=TRUE, conf.level=0.95) Paired t-test data: javaUtilData$RC10t and javaUtilData$RC11t t = 0.0767, df = 15, p-value = 0.9399 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -3.349519 3.599519 sample estimates: mean of the differences 0.125 > t.test( javaUtilData$Coverage10, + javaUtilData$Coverage11, + alternative="two.sided", paired=TRUE, conf.level=0.95) Paired t-test data: javaUtilData$Coverage10 and javaUtilData$Coverage11 t = -0.2764, df = 15, p-value = 0.786 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -1.633365 1.258365 sample estimates: mean of the differences -0.1875 > > # ===== Paired Wilcoxon tests on columns =================================== > # ===== p value is probabililty that column difference has mean 0 ========== > > wilcox.test( javaUtilData$N10t, + javaUtilData$N11t, + alternative="two.sided", paired=TRUE, conf.level=0.95) Wilcoxon signed rank test with continuity correction data: javaUtilData$N10t and javaUtilData$N11t V = 32, p-value = 0.06636 alternative hypothesis: true location shift is not equal to 0 > wilcox.test( javaUtilData$RC10t, + javaUtilData$RC11t, + alternative="two.sided", paired=TRUE, conf.level=0.95) Wilcoxon signed rank test with continuity correction data: javaUtilData$RC10t and javaUtilData$RC11t V = 67, p-value = 0.979 alternative hypothesis: true location shift is not equal to 0 > wilcox.test( javaUtilData$Coverage10, + javaUtilData$Coverage11, + alternative="two.sided", paired=TRUE, conf.level=0.95) Wilcoxon signed rank test with continuity correction data: javaUtilData$Coverage10 and javaUtilData$Coverage11 V = 7.5, p-value = 0.5992 alternative hypothesis: true location shift is not equal to 0 > > # ========================================================================== > > # ========================================================================== > # ===== Apache data ======================================================== > # ========================================================================== > > apacheData <- read.table("v11data-apache-edit.txt", header=TRUE, sep=" ") > names(apacheData) [1] "SourceFile" "SLOC" "Nt" "RCt" "Coverage" > > # ===== Sums of columns ==================================================== > > sum(apacheData$SLOC) [1] 4427 > sum(apacheData$Nt) [1] 2986 > sum(apacheData$RCt) [1] 717 > sum(apacheData$Coverage) [1] 3885 > > # ===== Means of columns =================================================== > > mean(apacheData$SLOC) [1] 130.2059 > mean(apacheData$Nt) [1] 87.82353 > mean(apacheData$RCt) [1] 21.08824 > mean(apacheData$Coverage) [1] 114.2647 > > # ===== Coverage ratio ===================================================== > > sum(apacheData$Coverage) / sum(apacheData$SLOC) [1] 0.8775695 > > apacheData$Coverage / apacheData$SLOC [1] 1.0000000 1.0000000 0.6509434 0.9932886 1.0000000 1.0000000 1.0000000 [8] 1.0000000 0.6470588 0.6413374 1.0000000 0.9393939 0.9600000 0.9213052 [15] 1.0000000 1.0000000 0.9267823 0.8643411 0.9097222 0.8055556 1.0000000 [22] 0.8508772 1.0000000 0.4375000 0.9275362 1.0000000 0.8316498 0.9830508 [29] 0.6333333 0.9299065 0.8181818 0.6923077 1.0000000 1.0000000 > >