\begin{thebibliography}{10} \providecommand{\url}[1]{#1} \csname url@rmstyle\endcsname \providecommand{\newblock}{\relax} \providecommand{\bibinfo}[2]{#2} \providecommand\BIBentrySTDinterwordspacing{\spaceskip=0pt\relax} \providecommand\BIBentryALTinterwordstretchfactor{4} \providecommand\BIBentryALTinterwordspacing{\spaceskip=\fontdimen2\font plus \BIBentryALTinterwordstretchfactor\fontdimen3\font minus \fontdimen4\font\relax} \providecommand\BIBforeignlanguage[2]{{% \expandafter\ifx\csname l@#1\endcsname\relax \typeout{** WARNING: IEEEtran.bst: No hyphenation pattern has been}% \typeout{** loaded for the language `#1'. Using the pattern for}% \typeout{** the default language instead.}% \else \language=\csname l@#1\endcsname \fi #2}} \bibitem{me06d} T.~Menzies, Z.~Chen, J.~Hihn, and K.~Lum, ``Selecting best practices for effort estimation,'' \emph{IEEE Transactions on Software Engineering}, November 2006, available from \url{http://menzies.us/pdf/06coseekmo.pdf}. \bibitem{kitch07} B.~A. Kitchenham, E.~Mendes, and G.~H. Travassos, ``Cross- vs. within-company cost estimation studies: A systematic review,'' \emph{IEEE Transactions on Software Engineering}, pp. 316--329, May 2007. \bibitem{mendes07} E.~Mendes, G.~Dinakaran, and N.~Mosley, ``How valuable is it for a web company to use a cross-company cost model, compared to using its own single-company model?'' in \emph{16th International World Wide Web Conference, Banff, Canada, May 8-12}, 2007, available from \url{http://www2007.org/paper326.php}. \bibitem{abrahamsson07} P.~Abrahamsson, R.~Moser, W.~Pedrycz, A.~Sillitti, and G.~Succi, ``Effort prediction in iterative software development processes -- incremental versus global prediction models,'' in \emph{First International Symposium on Empirical Software Engineering and Measurement (ESEM 2007)}, 2007, pp. 344--353. \bibitem{macdonell07} S.~MacDonell and M.~Shepperd, ``Comparing local and global software effort estimation models -- reflections on a systematic review,'' in \emph{Empirical Software Engineering and Measurement, ESEM 2007}, 2007, pp. 401--409. \bibitem{Premraj:2007p1626} \BIBentryALTinterwordspacing R.~Premraj and T.~Zimmermann, ``Building software cost estimation models using homogenous data,'' \emph{Empirical Software Engineering and Measurement}, Jan 2007. [Online]. Available: \url{http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=4343767} \BIBentrySTDinterwordspacing \bibitem{boehm00a} B.~Boehm, ``Safe and simple software cost analysis,'' \emph{IEEE Software}, pp. 14--17, September/October 2000, available from \url{http://www.computer.org/certification/beta/Boehm_Safe.pdf}. \bibitem{me07b} T.~Menzies, J.~Greenwald, and A.~Frank, ``Data mining static code attributes to learn defect predictors,'' \emph{IEEE Transactions on Software Engineering}, January 2007, available from \url{http://menzies.us/pdf/06learnPredict.pdf}. \bibitem{mccabe76} T.~McCabe, ``A complexity measure,'' \emph{IEEE Transactions on Software Engineering}, vol.~2, no.~4, pp. 308--320, Dec. 1976. \bibitem{halstead77} M.~Halstead, \emph{Elements of Software Science}.\hskip 1em plus 0.5em minus 0.4em\relax Elsevier, 1977. \bibitem{shu02} F.~Shull, V.~Basili, B.~Boehm, A.~Brown, P.~Costa, M.~Lindvall, D.~Port, I.~Rus, R.~Tesoriero, and M.~Zelkowitz, ``What we have learned about fighting defects,'' in \emph{Proceedings of 8th International Software Metrics Symposium, Ottawa, Canada}, 2002, pp. 249--258, available from \url{http://fc-md.umd.edu/fcmd/Papers/shull_defects.ps}. \bibitem{fagan86} M.~Fagan, ``Advances in software inspections,'' \emph{IEEE Trans. on Software Engineering}, pp. 744--751, July 1986. \bibitem{shull00a} F.~Shull, I.~Rus, and V.~Basili, ``How perspective-based reading can improve requirements inspections,'' \emph{IEEE Computer}, vol.~33, no.~7, pp. 73--79, 2000, available from \url{http://www.cs.umd.edu/projects/SoftEng/ESEG/papers/82.77.pdf}. \bibitem{fagan76} M.~Fagan, ``Design and code inspections to reduce errors in program development,'' \emph{IBM Systems Journal}, vol.~15, no.~3, 1976. \bibitem{conf/icse/NagappanB05a} \BIBentryALTinterwordspacing N.~Nagappan and T.~Ball, ``Static analysis tools as early indicators of pre-release defect density,'' in \emph{ICSE}, 2005, pp. 580--586. [Online]. Available: \url{http://doi.acm.org/10.1145/1062558} \BIBentrySTDinterwordspacing \bibitem{me02f} T.~Menzies, D.~Raffo, S.~on~Setamanit, Y.~Hu, and S.~Tootoonian, ``Model-based tests of truisms,'' in \emph{Proceedings of IEEE ASE 2002}, 2002, available from \url{http://menzies.us/pdf/02truisms.pdf}. \bibitem{chapman02} M.~Chapman and D.~Solomon, ``The relationship of cyclomatic complexity, essential complexity and error rates,'' 2002, proceedings of the NASA Software Assurance Symposium, Coolfont Resort and Conference Center in Berkley Springs, West Virginia. Available from \url{http://www.ivv.nasa.gov/business/research/osmasas/conclusion2002/Mike_C% hapman_The_Relationship_of_Cyclomatic_Complexity_Essential_Complexity_and_Erro% r_Rates.ppt}. \bibitem{me04g} T.~Menzies, J.~DiStefano, A.~Orrego, and R.~Chapman, ``Assessing predictors of software defects,'' in \emph{Proceedings, workshop on Predictive Software Models, Chicago}, 2004, available from \url{http://menzies.us/pdf/04psm.pdf}. \bibitem{polyspace} ``Polyspace verifier$^{\mbox \textregistered}$,'' 2005, available from \url{http://www.di.ens.fr/~cousot/projects/DAEDALUS/synthetic_summary/POLYSP% ACE/polyspace-daedalus.htm}. \bibitem{hall00} G.~Hall and J.~Munson, ``Software evolution: code delta and code churn,'' \emph{Journal of Systems and Software}, pp. 111 -- 118, 2000. \bibitem{nikora03} A.~Nikora and J.~Munson, ``Developing fault predictors for evolving software systems,'' in \emph{Ninth International Software Metrics Symposium (METRICS'03)}, 2003. \bibitem{khoshgoftaar01} T.~Khoshgoftaar, ``An application of zero-inflated poisson regression for software fault prediction,'' in \emph{Proceedings of the 12th International Symposium on Software Reliability Engineering, Hong Kong}, Nov 2001, pp. 66--73. \bibitem{Khoshgoftaar:2004p1877} T.~Khoshgoftaar and N.~Seliya, ``Comparative assessment of software quality classification techniques: An empirical case study,'' \emph{Empirical Software Engineering}, vol.~9, no.~3, pp. 229--257, 2004. \bibitem{conf/ictai/TangK04} \BIBentryALTinterwordspacing W.~Tang and T.~M. Khoshgoftaar, ``Noise identification with the k-means algorithm,'' in \emph{ICTAI}, 2004, pp. 373--378. [Online]. Available: \url{http://doi.ieeecomputersociety.org/10.1109/ICTAI.2004.93} \BIBentrySTDinterwordspacing \bibitem{journals/ese/KhoshgoftaarS03} \BIBentryALTinterwordspacing T.~M. Khoshgoftaar and N.~Seliya, ``Fault prediction modeling for software quality estimation: Comparing commonly used techniques,'' \emph{Empirical Software Engineering}, vol.~8, no.~3, pp. 255--283, 2003. [Online]. Available: \url{http://dx.doi.org/10.1023/A:1024424811345} \BIBentrySTDinterwordspacing \bibitem{me03a} T.~Menzies, J.~D. Stefano, K.~Ammar, K.~McGill, P.~Callis, R.~Chapman, and D.~J, ``When can we test less?'' in \emph{IEEE Metrics'03}, 2003, available from \url{http://menzies.us/pdf/03metrics.pdf}. \bibitem{me02e} T.~Menzies, J.~S. DiStefeno, M.~Chapman, and K.~Mcgill, ``Metrics that matter,'' in \emph{27th NASA SEL workshop on Software Engineering}, 2002, available from \url{http://menzies.us/pdf/02metrics.pdf}. \bibitem{me03k} T.~Menzies, J.~D. Stefano, and M.~Chapman, ``Learning early lifecycle {IVV} quality indicators,'' in \emph{IEEE Metrics '03}, 2003, available from \url{http://menzies.us/pdf/03early.pdf}. \bibitem{me03q} T.~Menzies and J.~S.~D. Stefano, ``How good is your blind spot sampling policy?'' in \emph{2004 IEEE Conference on High Assurance Software Engineering}, 2003, available from \url{http://menzies.us/pdf/03blind.pdf}. \bibitem{porter90} A.~Porter and R.~Selby, ``Empirically guided software development using metric-based classification trees,'' \emph{IEEE Software}, pp. 46--54, March 1990. \bibitem{tiang95} J.~Tian and M.~Zelkowitz, ``Complexity measure evaluation and selection,'' \emph{IEEE Transaction on Software Engineering}, vol.~21, no.~8, pp. 641--649, Aug. 1995. \bibitem{khoshgoftaar99} T.~Khoshgoftaar and E.~Allen, ``Model software quality with classification trees,'' in \emph{Recent Advances in Reliability and Quality Engineering}, H.~Pham, Ed.\hskip 1em plus 0.5em minus 0.4em\relax World Scientific, 2001, pp. 247--270. \bibitem{srinivasan95} K.~Srinivasan and D.~Fisher, ``Machine learning approaches to estimating software development effort,'' \emph{IEEE Trans. Soft. Eng.}, pp. 126--137, February 1995. \bibitem{rakitin01} S.~Rakitin, \emph{Software Verification and Validation for Practitioners and Managers, Second Edition}.\hskip 1em plus 0.5em minus 0.4em\relax Artech House, 2001. \bibitem{graves00} T.~L. Graves, A.~F. Karr, J.~S. Marron, and H.~P. Siy, ``Predicting fault incidence using software change history,'' \emph{IEEE Trans. Software Eng.}, vol.~26, no.~7, pp. 653--661, 2000, available on-line at \url{www.niss.org/technicalreports/tr80.pdf}. \bibitem{Blake+Merz:1998} C.~Blake and C.~Merz, ``{UCI} repository of machine learning databases,'' 1998, uRL: \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}. \bibitem{fenton96} N.~E. Fenton and S.~Pfleeger, \emph{Software Metrics: A Rigorous \& Practical Approach (second edition)}.\hskip 1em plus 0.5em minus 0.4em\relax International Thompson Press, 1995. \bibitem{shepperd94} M.~Shepperd and D.~Ince, ``A critique of three metrics,'' \emph{The Journal of Systems and Software}, vol.~26, no.~3, pp. 197--210, September 1994. \bibitem{brooks95} F.~P. Brooks, \emph{The Mythical Man-Month, Anniversary edition}.\hskip 1em plus 0.5em minus 0.4em\relax Addison-Wesley, 1995. \bibitem{nach08} V.~B. Nachiappan~Nagappan, Brendan~Murphy, ``The influence of organizational structure on software quality: An empirical case study,'' in \emph{ICSE'08}, 2008. \bibitem{jiang07} Y.~Jiang, B.~Cukic, and T.~Menzies, ``Fault prediction using early lifecycle data,'' in \emph{ISSRE'07}, 2007, available from \url{http://menzies.us/pdf/07issre.pdf}. \bibitem{me07c} G.~Boetticher, T.~Menzies, and T.~Ostrand, ``The {PROMISE} {R}epository of {E}mpirical {S}oftware {E}ngineering {D}ata,'' 2007, \url{http://promisedata.org/repository}. \bibitem{kim08} S.~Kim, J.~Whitehead, and Y.~Zhang, ``Classifying software changes: Clean or buggy?'' \emph{IEEE TSE}, pp. 181--196, March/April 2008. \bibitem{basili02} V.~Basili, F.~McGarry, R.~Pajerski, and M.~Zelkowitz, ``Lessons learned from 25 years of process improvement: The rise and fall of the {NASA} software engineering laboratory,'' in \emph{Proceedings of the 24th International Conference on Software Engineering (ICSE) 2002, Orlando, Florida}, 2002, available from \url{http://www.cs.umd.edu/projects/SoftEng/ESEG/papers/83.88.pdf}. \bibitem{drummond03} C.~Drummond and R.~C. Holte, ``C4.5, class imbalance, and cost sensitivity: why under-sampling beats over-sampling,'' in \emph{Workshop on Learning from Imbalanced Datasets II}, 2003. \bibitem{cohen95r} W.~Cohen, ``Fast effective rule induction,'' in \emph{ICML'95}, 1995, pp. 115--123, available on-line from \url{http://www.cs.cmu.edu/~wcohen/postscript/ml-95-ripper.ps}. \bibitem{quinlan92} R.~Quinlan, \emph{C4.5: Programs for Machine Learning}.\hskip 1em plus 0.5em minus 0.4em\relax Morgan Kaufman, 1992, iSBN: 1558602380. \bibitem{holte93} R.~Holte, ``Very simple classification rules perform well on most commonly used datasets,'' \emph{Machine Learning}, vol.~11, p.~63, 1993. \bibitem{brieman96} L.~Brieman, ``Bagging predictors,'' \emph{Machine Learning}, vol.~24, no.~2, pp. 123--140, 1996. \bibitem{FreSch97} Y.~Freund and R.~Schapire, ``A decision-theoretic generalization of on-line learning and an application to boosting,'' \emph{JCSS: Journal of Computer and System Sciences}, vol.~55, 1997. \bibitem{lessmann09} S.~Lessmann, B.~Baesens, C.~Mues, and S.~Pietsch, ``Benchmarking classification models for software defect prediction: A proposed framework and novel findings,'' \emph{accepted for publication IEEE Transactions on Software Engineering}, 2009. \bibitem{witten05} I.~H. Witten and E.~Frank, \emph{Data mining. 2nd edition}.\hskip 1em plus 0.5em minus 0.4em\relax Los Altos, US: Morgan Kaufmann, 2005. \bibitem{turhan07qsic} B.~Turhan and A.~Bener, ``A multivariate analysis of static code attributes for defect prediction,'' in \emph{Proceedings of the Seventh International Conference on Quality Software}.\hskip 1em plus 0.5em minus 0.4em\relax Los Alamitos, CA, USA: IEEE Computer Society, 2007, pp. 231--237. \bibitem{domingos97optimality} \BIBentryALTinterwordspacing P.~Domingos and M.~J. Pazzani, ``On the optimality of the simple bayesian classifier under zero-one loss,'' \emph{Machine Learning}, vol.~29, no. 2-3, pp. 103--130, 1997. [Online]. Available: \url{citeseer.ist.psu.edu/domingos97optimality.html} \BIBentrySTDinterwordspacing \bibitem{me07e} T.~Menzies, A.~Dekhtyar, J.~Distefano, and J.~Greenwald, ``Problems with precision,'' \emph{IEEE Transactions on Software Engineering}, September 2007, \url{http://menzies.us/pdf/07precision.pdf}. \bibitem{mann47} H.~B. Mann and D.~R. Whitney, ``On a test of whether one of two random variables is stochastically larger than the other,'' \emph{Ann. Math. Statist.}, vol.~18, no.~1, pp. 50--60, 1947, available on-line at \url{http://projecteuclid.org/DPubS?service=UI&version=1.0&verb=Display&hand% le=euclid.aoms/1177730491}. \bibitem{demsar06} J.~Demsar, ``Statistical comparisons of clasifiers over multiple data sets,'' \emph{Journal of Machine Learning Research}, vol.~7, pp. 1--30, 2006, avaliable from \url{http://jmlr.csail.mit.edu/papers/v7/demsar06a.html}. \bibitem{hayes06} \BIBentryALTinterwordspacing J.~H. Hayes, A.~Dekhtyar, and S.~K. Sundaram, ``Advancing candidate link generation for requirements tracing: The study of methods,'' \emph{IEEE Trans. Software Eng}, vol.~32, no.~1, pp. 4--19, 2006. [Online]. Available: \url{http://doi.ieeecomputersociety.org/10.1109/TSE.2006.3} \BIBentrySTDinterwordspacing \bibitem{fenton97} N.~E. Fenton and S.~Pfleeger, \emph{Software Metrics: A Rigorous \& Practical Approach}.\hskip 1em plus 0.5em minus 0.4em\relax International Thompson Press, 1997. \bibitem{me08d} T.~Menzies, B.~Turhan, A.~Bener, G.~Gay, B.~Cukic, and Y.~Jiang, ``Implications of ceiling effects in defect predictors,'' in \emph{Proceedings of PROMISE 2008 Workshop (ICSE)}, 2008, available from \url{http://menzies.us/pdf/08ceiling.pdf}. \bibitem{orrego04} A.~Orrego, ``Sawtooth: Learning from huge amounts of data,'' Master's thesis, Computer Science, West Virginia University, 2004. \bibitem{quinlan92b} J.~R. Quinlan, ``Learning with {C}ontinuous {C}lasses,'' in \emph{5th Australian Joint Conference on Artificial Intelligence}, 1992, pp. 343--348, available from \url{http://citeseer.nj.nec.com/quinlan92learning.html}. \bibitem{john95} G.~John and P.~Langley, ``Estimating continuous distributions in bayesian classifiers,'' in \emph{Proceedings of the Eleventh Conference on Uncertainty in Artificial Intelligence Montreal, Quebec: Morgan Kaufmann}, 1995, pp. 338--345, available from \url{http://citeseer.ist.psu.edu/john95estimating.html}. \end{thebibliography}