\relax \citation{lessmann08} \citation{journals/ese/KhoshgoftaarS03} \citation{me07b} \citation{zhang07} \citation{me07e} \citation{me07e} \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{\thepage }} \citation{musa87} \citation{littlewood97} \citation{boehm88} \citation{leven95} \citation{lutz03} \citation{voas95} \citation{me00q} \citation{lowrey98} \citation{me99q} \citation{me07b} \citation{shu02} \citation{fagan86} \citation{shull00a} \citation{fagan76} \citation{raffo05} \citation{nagappan05} \citation{me02f} \citation{ism06} \@writefile{toc}{\contentsline {section}{\numberline {2}Background}{\thepage }} \@writefile{toc}{\contentsline {subsection}{\numberline {2.1}How can these defect predictors be used?}{\thepage }} \@writefile{toc}{\contentsline {subsection}{\numberline {2.2}But Does It Work?}{\thepage }} \citation{me07b} \citation{tosun09} \citation{tosun10} \citation{nach08} \citation{weyuker08} \citation{hall00} \citation{journals/ese/KhoshgoftaarS03} \citation{lessmann08} \citation{milton08} \citation{me10a} \citation{me07b} \citation{lessmann08} \citation{me07e} \citation{zhang07} \citation{me07i} \citation{me07b} \citation{menziesgay08} \citation{me09b} \@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Building Better Detectors}{\thepage }} \@writefile{toc}{\contentsline {section}{\numberline {3}The Ceiling Effect}{\thepage }} \@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Defect Prediction Mathematics}{\thepage }} \newlabel{sec:bal}{{3.1}{\thepage }} \newlabel{one}{{1}{\thepage }} \newlabel{two}{{2}{\thepage }} \newlabel{three}{{3}{\thepage }} \citation{me08e} \citation{me08e} \citation{witten05} \citation{quinlan92} \citation{drummond03} \citation{kamei07} \citation{lessmann08} \citation{me08e} \citation{drummond03} \citation{kamei07} \citation{zhang08} \citation{zhang09} \citation{fenton00b} \citation{Andersson07} \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces The change of $pf$ with $precision$ when $neg/pos = 15$}}{\thepage }} \newlabel{fig:precison_change}{{1}{\thepage }} \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Data used in\nobreakspace {}\cite {me08e}. }}{\thepage }} \newlabel{fig:data}{{2}{\thepage }} \@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Changing the $neg/pos$ Ratio}{\thepage }} \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Over- \& under- \& no sampling results. Sorted descending by median $balance$ results ($balance$ is defined in \S 3.1\hbox {}). The right-hand side show median values (as a circle) within a 25\% to 75\% percentile range. The {\em rank}, shown left-hand-side, come from the statistical analysis of Figure\nobreakspace {}4\hbox {}. Three methods share top rank: NB/none, NB/under, j48/under. }}{\thepage }} \newlabel{fig:bands}{{3}{\thepage }} \@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Statistical tests on the Figure\nobreakspace {}3\hbox {} results: sorted in ascending order on the number of losses (so {\em better} methods appear at the {\em top} of the table. First column shows a comparison of one treatment against the other eight. Two treatments have the same rank if their median ranks are statistically insignificantly different (Mann-Whitney, 95\% confidence). }}{\thepage }} \newlabel{fig:stats}{{4}{\thepage }} \@writefile{toc}{\contentsline {section}{\numberline {4}Breaking Through the Ceiling}{\thepage }} \@writefile{toc}{\contentsline {subsection}{\numberline {4.1}The Distribution of Defects}{\thepage }} \@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Learning from Defect Dense Components - An Experiment}{\thepage }} \@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces The Distribution of Defects in CM1 and KC1}}{\thepage }} \newlabel{fig:dist}{{5}{\thepage }} \@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Component defect distribution example}}{\thepage }} \newlabel{fig:kc1-defects}{{6}{\thepage }} \citation{lessmann08} \@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces The component data for PROMISE defect datasets.}}{\thepage }} \newlabel{fig:components}{{1}{\thepage }} \@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Training on Dense Components Vs. All Components}}{\thepage }} \newlabel{fig:dense-comp-experiment}{{7}{\thepage }} \@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Experimental Results}{\thepage }} \@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Dense Component vs. All Component learning - $recall$ - All Data}}{\thepage }} \newlabel{fig:pds-all}{{8}{\thepage }} \@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Dense Component vs. All Component learning - $pf$ - All Data}}{\thepage }} \newlabel{fig:pfs-all}{{9}{\thepage }} \@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces Dense Component vs. All Component learning - $precision$ - All Data}}{\thepage }} \newlabel{fig:prec-all}{{10}{\thepage }} \bibstyle{abbrv} \bibdata{timm1,timm2,hongyu/refs} \bibcite{Andersson07}{1} \bibcite{boehm88}{2} \bibcite{drummond03}{3} \bibcite{fagan76}{4} \bibcite{fagan86}{5} \bibcite{fenton00b}{6} \@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Result statistics per data set. The numeric value next to each treatment represents its Mann-Whitney rank, to the right of each treatment lies the quartile chart for each. Each metric is either sorted by ranking, or in the case of a tie, descending $pd$ and $prec$ or ascending $pf$.}}{\thepage }} \newlabel{fig:metrics}{{2}{\thepage }} \@writefile{toc}{\contentsline {section}{\numberline {5}Conclusion}{\thepage }} \@writefile{toc}{\contentsline {section}{\numberline {6}References}{\thepage }} \bibcite{hall00}{7} \bibcite{kamei07}{8} \bibcite{journals/ese/KhoshgoftaarS03}{9} \bibcite{lessmann08}{10} \bibcite{leven95}{11} \bibcite{littlewood97}{12} \bibcite{lowrey98}{13} \bibcite{lutz03}{14} \bibcite{me99q}{15} \bibcite{me07e}{16} \bibcite{me07b}{17} \bibcite{me00q}{18} \bibcite{me02f}{19} \bibcite{me07i}{20} \bibcite{menziesgay08}{21} \bibcite{me08e}{22} \bibcite{musa87}{23} \bibcite{nagappan05}{24} \bibcite{quinlan92}{25} \bibcite{shu02}{26} \@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces Dense vs. All vs. Sampling - all metrics per data set.}}{\thepage }} \newlabel{fig:plus-minus}{{11}{\thepage }} \bibcite{shull00a}{27} \bibcite{voas95}{28} \bibcite{weyuker08}{29} \bibcite{witten05}{30} \bibcite{zhang08}{31} \bibcite{zhang09}{32} \bibcite{zhang07}{33}