\relax \citation{weyeuker08,halstead77,mccabe76,chapman02,nagappan05,hall00,nikora03,conf/icse/NagappanB05a,khoshgoftaar01,conf/ictai/TangK04,journals/ese/KhoshgoftaarS03,porter90,tiang95,khoshgoftaar99,srinivasan95} \newlabel{@firstpg}{{}{1}} \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}} \citation{me07b} \citation{lessmann09} \citation{milton08} \@writefile{toc}{\contentsline {section}{\numberline {2}Background}{2}} \newlabel{sec:back}{{2}{2}} \@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Blind Spots}{2}} \citation{leven95} \citation{lutz03} \citation{voas95} \citation{lowrey98} \citation{me99q} \citation{bradley98scaling} \citation{fenton99} \citation{domingos97optimality} \@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Lightweight Sampling}{3}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {2.2.1}Data Mining}{3}} \citation{cohen95} \citation{quinlan92} \citation{briemann01,jiang08a} \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Static code features.}}{4}} \newlabel{fig:features}{{1}{4}} \newlabel{sec:scf}{{2.2.2}{4}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {2.2.2}Static Code Features}{4}} \citation{nikora04} \citation{ostrand04} \citation{mockus05,zimmermann09} \@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Frequently Asked Questions}{5}} \newlabel{sec:xbin}{{2.3.1}{5}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {2.3.1}Why Binary Classifications?}{5}} \newlabel{sec:xregress}{{2.3.2}{5}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {2.3.2}Why Not Use Regression?}{5}} \citation{ostrand04} \citation{quinlan92b} \citation{lessmann09} \citation{me03q} \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Sample of percentage of defects seen in different modules. Note that only a very small percentage of modules have more than one defect. For more details on these data sets, see \fig {data}. }}{6}} \newlabel{fig:pmod}{{2}{6}} \citation{fenton94} \citation{musa87,littlewood97} \citation{hall00} \citation{nach08} \citation{weyeuker08} \citation{nagappan05} \citation{me02f} \@writefile{toc}{\contentsline {subsubsection}{\numberline {2.3.3}Why Static Code Features?}{7}} \citation{rakitin01} \citation{fenton97} \citation{shepperd94} \citation{fenton97} \citation{ism06} \newlabel{sec:xstatic}{{2.3.4}{8}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {2.3.4}What Can be Learned from Static Code Features?}{8}} \citation{tosun09} \citation{tosun10} \citation{me09b} \citation{me07e} \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Tables of data.}}{10}} \newlabel{fig:data}{{3}{10}} \@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Performance measures}}{10}} \newlabel{fig:perform}{{4}{10}} \citation{me09b} \citation{me09b} \citation{shu02} \citation{fagan86} \citation{shull00a} \citation{fagan76} \citation{raffo05} \@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Results of round-robin and self experiments.}}{11}} \newlabel{fig:rrself}{{5}{11}} \citation{porter90} \citation{me07b} \citation{jiang08a} \citation{jiang08a} \citation{lessmann09} \citation{lessmann09} \citation{jiang08a} \citation{yang06} \citation{brieman96} \citation{FreSch97} \citation{cover67} \citation{quinlan92} \citation{cohen95r} \citation{breiman84} \citation{briemann01} \@writefile{toc}{\contentsline {subsection}{\numberline {2.4}Summary}{12}} \newlabel{sec:summary}{{2.4}{12}} \@writefile{toc}{\contentsline {section}{\numberline {3}Ceiling Effects in Defect Predictors}{12}} \newlabel{sec:ceiling}{{3}{12}} \@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Box plot for AUC(pf,pd) seen with 9 learners.}}{13}} \newlabel{fig:logandnom}{{6}{13}} \@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Range of AUC(pf,pd) ranks seen in 19 learners.}}{13}} \newlabel{fig:all}{{7}{13}} \citation{lessmann09} \@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces $Pf$-vs-$pd$-vs-$effort$.}}{14}} \newlabel{fig:pdpf}{{8}{14}} \@writefile{toc}{\contentsline {section}{\numberline {4}Breaking Through the Ceiling}{14}} \newlabel{sec:b}{{4}{14}} \citation{arisholm06} \citation{jiang08b,me07b} \citation{cohen95} \citation{koru07,koru08,koru09} \@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Experimental Set Up}{16}} \newlabel{sec:exauc}{{4.1.1}{16}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {4.1.1}Operationalizing AUC(effort,pd)}{16}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {4.1.2}Upper and Lower Bounds on Performance}{16}} \citation{me02f} \@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces $Effort$-vs-$PD$.}}{17}} \newlabel{fig:effort}{{9}{17}} \newlabel{sec:xdetails}{{4.1.3}{17}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {4.1.3}Details}{17}} \citation{quinlan92} \citation{elkan01} \citation{fawcett01} \citation{jiang08b} \citation{milton08} \@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Initial results}{18}} \@writefile{toc}{\contentsline {section}{\numberline {5}WHICH}{18}} \citation{Blake+Merz:1998} \citation{Blake+Merz:1998} \citation{Blake+Merz:1998} \citation{milton08} \@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces Top-of-stack scores of the WHICH}}{19}} \newlabel{fig:picks}{{10}{19}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Details}{19}} \newlabel{eq:pprime}{{1}{20}} \newlabel{eq:weights}{{2}{20}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Results}{20}} \citation{me07b} \@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.1}Overall Results}{21}} \citation{lessmann09} \citation{me07b} \@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces Results from all data sets of Figure\nobreakspace {}3\hbox {}.}}{22}} \newlabel{fig:allr}{{11}{22}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.2}Individual Results}{22}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.3}External Validity}{22}} \@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces Four examples of pattern \#1}}{23}} \newlabel{fig:bands1a}{{12}{23}} \@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces Three more examples of pattern \#1}}{24}} \newlabel{fig:bands1b}{{13}{24}} \@writefile{toc}{\contentsline {section}{\numberline {6}Discussion}{24}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Methodological Issues}{24}} \newlabel{sec:xmethod}{{6.1}{24}} \@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces Two examples of pattern \#2.}}{25}} \newlabel{fig:bands2}{{14}{25}} \@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces The only example of pattern \#3.}}{25}} \newlabel{fig:bands3}{{15}{25}} \citation{pugh90} \@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Algorithmic Issues}{26}} \@writefile{toc}{\contentsline {section}{\numberline {7}Conclusion}{26}} \bibstyle{acmtrans} \bibdata{refs} \bibcite{arisholm06}{\citeauthoryear {Arisholm and Briand}{Arisholm and Briand}{2006}} \bibcite{tosun10}{\citeauthoryear {Ayse\nobreakspace {}Tosun1}{Ayse\nobreakspace {}Tosun1}{2010}} \bibcite{Blake+Merz:1998}{\citeauthoryear {Blake and Merz}{Blake and Merz}{1998}} \bibcite{bradley98scaling}{\citeauthoryear {Bradley, Fayyad, and Reina}{Bradley et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{1998}} \bibcite{breiman84}{\citeauthoryear {Breiman, Friedman, Olshen, and Stone}{Breiman et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{1984}} \bibcite{briemann01}{\citeauthoryear {Breimann}{Breimann}{2001}} \bibcite{brieman96}{\citeauthoryear {Brieman}{Brieman}{1996}} \bibcite{chapman02}{\citeauthoryear {Chapman and Solomon}{Chapman and Solomon}{2002}} \bibcite{cohen95}{\citeauthoryear {Cohen}{Cohen}{1995a}} \bibcite{cohen95r}{\citeauthoryear {Cohen}{Cohen}{1995b}} \bibcite{cover67}{\citeauthoryear {Cover and Hart}{Cover and Hart}{1967}} \bibcite{demsar06}{\citeauthoryear {Demsar}{Demsar}{2006}} \bibcite{dietterich97}{\citeauthoryear {Dietterich}{Dietterich}{1997}} \bibcite{domingos97optimality}{\citeauthoryear {Domingos and Pazzani}{Domingos and Pazzani}{1997}} \bibcite{elkan01}{\citeauthoryear {Elkan}{Elkan}{2001}} \bibcite{fagan76}{\citeauthoryear {Fagan}{Fagan}{1976}} \bibcite{fagan86}{\citeauthoryear {Fagan}{Fagan}{1986}} \bibcite{fawcett01}{\citeauthoryear {Fawcett}{Fawcett}{2001}} \bibcite{fenton94}{\citeauthoryear {Fenton, Pfleeger, and Glass}{Fenton et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{1994}} \bibcite{fenton99}{\citeauthoryear {Fenton and Neil}{Fenton and Neil}{1999}} \bibcite{fenton96}{\citeauthoryear {Fenton and Pfleeger}{Fenton and Pfleeger}{1995}} \bibcite{fenton97}{\citeauthoryear {Fenton and Pfleeger}{Fenton and Pfleeger}{1997}} \bibcite{FreSch97}{\citeauthoryear {Freund and Schapire}{Freund and Schapire}{1997}} \bibcite{hall00}{\citeauthoryear {Hall and Munson}{Hall and Munson}{2000}} \bibcite{halstead77}{\citeauthoryear {Halstead}{Halstead}{1977}} \bibcite{huang05}{\citeauthoryear {Huang and Ling}{Huang and Ling}{2005}} \bibcite{jiang08b}{\citeauthoryear {Jiang, Cukic, and Ma}{Jiang et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2008}} \bibcite{jiang08a}{\citeauthoryear {Jiang, Cukic, and Menzies}{Jiang et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2008}} \bibcite{khoshgoftaar01}{\citeauthoryear {Khoshgoftaar}{Khoshgoftaar}{2001}} \bibcite{khoshgoftaar99}{\citeauthoryear {Khoshgoftaar and Allen}{Khoshgoftaar and Allen}{2001}} \bibcite{journals/ese/KhoshgoftaarS03}{\citeauthoryear {Khoshgoftaar and Seliya}{Khoshgoftaar and Seliya}{2003}} \bibcite{koru08}{\citeauthoryear {Koru, Emam, Zhang, Liu, and Mathew}{Koru et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2008}} \bibcite{koru09}{\citeauthoryear {Koru, Zhang, El\nobreakspace {}Emam, and Liu}{Koru et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2009}} \bibcite{koru07}{\citeauthoryear {Koru, Zhang, and Liu}{Koru et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2007}} \bibcite{lessmann09}{\citeauthoryear {Lessmann, Baesens, Mues, and Pietsch}{Lessmann et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2008}} \bibcite{leven95}{\citeauthoryear {Leveson}{Leveson}{1995}} \bibcite{littlewood97}{\citeauthoryear {Littlewood and Wright}{Littlewood and Wright}{1997}} \bibcite{lowrey98}{\citeauthoryear {Lowry, Boyd, and Kulkarni}{Lowry et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{1998}} \bibcite{lutz03}{\citeauthoryear {Lutz and Mikulski}{Lutz and Mikulski}{2003}} \bibcite{mccabe76}{\citeauthoryear {McCabe}{McCabe}{1976}} \bibcite{me99q}{\citeauthoryear {Menzies and Cukic}{Menzies and Cukic}{2000}} \bibcite{me07e}{\citeauthoryear {Menzies, Dekhtyar, Distefano, and Greenwald}{Menzies et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2007}} \bibcite{me07b}{\citeauthoryear {Menzies, Greenwald, and Frank}{Menzies et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2007}} \bibcite{me02f}{\citeauthoryear {Menzies, Raffo, on\nobreakspace {}Setamanit, Hu, and Tootoonian}{Menzies et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2002}} \bibcite{me03q}{\citeauthoryear {Menzies and Stefano}{Menzies and Stefano}{2003}} \bibcite{milton08}{\citeauthoryear {Milton}{Milton}{2008}} \bibcite{mockus05}{\citeauthoryear {Mockus, Zhang, and Li}{Mockus et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2005}} \bibcite{musa87}{\citeauthoryear {Musa, Iannino, and Okumoto}{Musa et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{1987}} \bibcite{nagappan05}{\citeauthoryear {Nagappan and Ball}{Nagappan and Ball}{2005a}} \bibcite{conf/icse/NagappanB05a}{\citeauthoryear {Nagappan and Ball}{Nagappan and Ball}{2005b}} \bibcite{nach08}{\citeauthoryear {Nagappan, Murphy, and V}{Nagappan et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2008}} \bibcite{nikora04}{\citeauthoryear {Nikora}{Nikora}{2004}} \bibcite{nikora03}{\citeauthoryear {Nikora and Munson}{Nikora and Munson}{2003}} \bibcite{ostrand04}{\citeauthoryear {Ostrand, Weyuker, and Bell}{Ostrand et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2004}} \bibcite{porter90}{\citeauthoryear {Porter and Selby}{Porter and Selby}{1990}} \bibcite{pugh90}{\citeauthoryear {Pugh}{Pugh}{1990}} \bibcite{quinlan92b}{\citeauthoryear {Quinlan}{Quinlan}{1992a}} \bibcite{quinlan92}{\citeauthoryear {Quinlan}{Quinlan}{1992b}} \bibcite{raffo05}{\citeauthoryear {Raffo}{Raffo}{2005}} \bibcite{rakitin01}{\citeauthoryear {Rakitin}{Rakitin}{2001}} \bibcite{shepperd94}{\citeauthoryear {Shepperd and Ince}{Shepperd and Ince}{1994}} \bibcite{shu02}{\citeauthoryear {Shull, ad\nobreakspace {}B.\nobreakspace {}Boehm, Brown, Costa, Lindvall, Port, Rus, Tesoriero, and Zelkowitz}{Shull et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2002}} \bibcite{shull00a}{\citeauthoryear {Shull, Rus, and Basili}{Shull et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2000}} \bibcite{srinivasan95}{\citeauthoryear {Srinivasan and Fisher}{Srinivasan and Fisher}{1995}} \bibcite{zimmermann09}{\citeauthoryear {T.\nobreakspace {}Zimmermann and Murphy}{T.\nobreakspace {}Zimmermann and Murphy}{2009}} \bibcite{conf/ictai/TangK04}{\citeauthoryear {Tang and Khoshgoftaar}{Tang and Khoshgoftaar}{2004}} \bibcite{tiang95}{\citeauthoryear {Tian and Zelkowitz}{Tian and Zelkowitz}{1995}} \bibcite{tosun09}{\citeauthoryear {Tosun, Bener, and Turhan}{Tosun et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2009}} \bibcite{me09b}{\citeauthoryear {Turhan, Menzies, Bener, and Distefano}{Turhan et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2009}} \bibcite{ism06}{\citeauthoryear {Turner}{Turner}{2006}} \bibcite{voas95}{\citeauthoryear {Voas and Miller}{Voas and Miller}{1995}} \bibcite{weyeuker08}{\citeauthoryear {Weyuker, Ostrand, and Bell}{Weyuker et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2008}} \bibcite{witten05}{\citeauthoryear {Witten and Frank}{Witten and Frank}{2005}} \bibcite{yang06}{\citeauthoryear {Yang, Webb, Cerquides, Korb, Boughton, and Ting}{Yang et\nobreakspace {}al\unhbox \voidb@x \hbox {.}}{2006}} \citation{witten05} \citation{witten05} \citation{domingos97optimality} \citation{domingos97optimality} \citation{quinlan92} \citation{witten05} \citation{quinlan92} \citation{cohen95r} \citation{dietterich97} \citation{halstead77} \citation{mccabe76} \citation{fenton96} \citation{fenton96} \citation{fenton96} \citation{demsar06} \citation{demsar06,huang05} \citation{demsar06} \newlabel{@lastpg}{{7}{32}}