\relax 
\citation{me07c}
\citation{Arisholm:2006p516,Bell:2006p517,Ostrand:2007p519,me07e,chen05,dekhtyar04,jiang07,nagappan05}
\citation{marcus08}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{2}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Motivation}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Why Study WC vs CC?}{3}}
\citation{musa87}
\citation{boehm88}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Why Study Defect Predictors?}{4}}
\citation{goseva07}
\@writefile{toc}{\contentsline {section}{\numberline {3}Methodology}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Data}{5}}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Descriptions of ten software projects used in the paper. The rows labeled ``NASA'' come from NASA aerospace projects while the rows labeled ``SOFTLAB'' come from a Turkish software company writing applications for domestic appliances.}}{5}}
\newlabel{tbl:data1}{{1}{5}}
\citation{me07b}
\citation{dou95}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Summary of data from ten software projects of Table\nobreakspace  {}1\hbox {}, sorted in order of number of functional units.}}{6}}
\newlabel{tbl:data2}{{2}{6}}
\newlabel{eq:ent}{{1}{6}}
\citation{me07b}
\citation{me07b}
\citation{me07b}
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Static code features available in Table\nobreakspace  {}2\hbox {} projects.}}{7}}
\newlabel{tbl:attr1}{{3}{7}}
\newlabel{eq:infogain}{{2}{7}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Best defect predictors learned in \nobreakspace  {}\cite  {me07b}. Mean results from Naive Bayes after a 10 repeats of (i)\nobreakspace  {}randomize the order of the data; (ii)\nobreakspace  {}divide that data into ten 90\%:10\% splits for training:test. Prior to learning, all numerics where replaced with logarithms. InfoGain was then used to select the best two or three attributes shown in the right-hand column (and if ``three'' performed as well as ``two'', then this table shows the results using ``two'').}}{8}}
\newlabel{fig:best}{{1}{8}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Attributes used in Figure\nobreakspace  {}1\hbox {}. }}{8}}
\newlabel{fig:attrs}{{2}{8}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces InfoGain for KC3 attributes. Calculated from Equation\nobreakspace  {}2\hbox {}. Lines show means and t-bars show standard deviations after 10 trials on 90\% of the training data (randomly selected).}}{8}}
\newlabel{fig:sorted}{{3}{8}}
\citation{shepperd94}
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Static code features shared by NASA and SOFTLAB projects.}}{9}}
\newlabel{tbl:attr2}{{4}{9}}
\citation{me07b}
\citation{lessmann09}
\citation{duda76}
\citation{domingos97optimality}
\citation{domingos97optimality}
\citation{yang02}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Naive Bayes Classifier}{10}}
\newlabel{eq:nb}{{3}{10}}
\citation{yang02}
\citation{witten05}
\citation{me07b}
\citation{me07b,me07e}
\newlabel{eq:b}{{4}{11}}
\newlabel{eq:laplace}{{5}{11}}
\newlabel{eq:normal}{{6}{11}}
\citation{me07b,me07e}
\citation{mann47}
\citation{demsar06}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Performance Evaluation}{12}}
\newlabel{eq:bal}{{9}{12}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Analysis \#1: Are CC data ever useful for organizations?}{12}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Design}{12}}
\@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces Pseudocode for Analysis 1}}{13}}
\newlabel{tbl:exp1}{{5}{13}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Results from Analysis \#1}{13}}
\citation{koru08}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Analysis \#1 results averaged over seven NASA tables. Numeric results on left; quartile charts on right. ``Q1'' and ``Q3'' denote the 25\% and 75\% percentile points (respectively). The upper quartile of the first row is not visible since it runs from 100\% to 100\%; i.e. it has zero length.}}{14}}
\newlabel{fig:ccwsnum}{{4}{14}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3} Checking the Analysis \#1 Results}{14}}
\citation{hayes06}
\@writefile{lot}{\contentsline {table}{\numberline {6}{\ignorespaces Summary of U-test results (95\% confidence): moving from WC to CC. For all projects' results, see\nobreakspace  {}Figure\nobreakspace  {}5\hbox {}. }}{15}}
\newlabel{tbl:ccwc}{{6}{15}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Discussion of Analysis \#1}{15}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Analysis \#2: How can companies filter CC data for local tuning? }{15}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Design}{15}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Project-wise Analysis \#1 results for NASA projects.}}{16}}
\newlabel{fig:ccwsprojs}{{5}{16}}
\citation{baker07}
\@writefile{lot}{\contentsline {table}{\numberline {7}{\ignorespaces Pseudocode for Analysis 2}}{17}}
\newlabel{tbl:exp2}{{7}{17}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.1.1}Nearest Neighbor (NN) Filtering}{17}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces  Analysis \#2 PD results where $NN_{pd} \ge WC_{pd}$. Rankings computed via Mann-Whitney (95\% confidence) comparing each row to all other rows.}}{18}}
\newlabel{fig:pd1}{{6}{18}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Results}{18}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces  Analysis \#2 PD results where $NN_{pd} < WC_{pd}$. }}{19}}
\newlabel{fig:pd2}{{7}{19}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Analysis s \#2 PF results where $NN_{pf} \le WC_{pf}$.}}{19}}
\newlabel{fig:pf1}{{8}{19}}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces  Analysis \#2 PF results where $NN_{pf} > WC_{pf}$. }}{19}}
\newlabel{fig:pf2}{{9}{19}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Discussions}{20}}
\@writefile{toc}{\contentsline {section}{\numberline {6}Analysis \#3: What is the smallest amount of local data needed for constructing a model? }{20}}
\@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Design}{20}}
\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces Results from analysis \#3. Training set size grows in units of 100 examples, moving left to right over the x-axis. The MC2 results only appear at the maximum x-value since MC2 has less than 200 examples. }}{21}}
\newlabel{fig:inc}{{10}{21}}
\@writefile{lot}{\contentsline {table}{\numberline {8}{\ignorespaces Pseudocode for Analysis 3}}{21}}
\newlabel{tbl:exp3}{{8}{21}}
\citation{me08d}
\citation{orrego04}
\citation{quinlan92}
\citation{quinlan92b}
\citation{Blake+Merz:1998}
\citation{orrego04}
\citation{quinlan92}
\citation{quinlan92b}
\citation{Blake+Merz:1998}
\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces Y-axis shows plateau point after learning from data sets that have up to $X$ examples (from\nobreakspace  {}\cite  {orrego04}). The left plot shows results from using Naive Bayes (nbk) or a decision tree learner (j48)\nobreakspace  {}\cite  {quinlan92} to predict for discrete classes. Right plot shows results from using linear regression (lsr) or model trees (m5)\nobreakspace  {}\cite  {quinlan92b} to learn predictors for continuous classes. In this study, data sets were drawn from the UC Irvine data repository\nobreakspace  {}\cite  {Blake+Merz:1998}.}}{22}}
\newlabel{fig:plateau}{{11}{22}}
\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Results from Analysis \#3}{22}}
\citation{john95}
\citation{orrego04}
\citation{boehm00a}
\@writefile{lot}{\contentsline {table}{\numberline {9}{\ignorespaces An estimate of the effort required to build and test 100 modules.}}{23}}
\newlabel{tbl:estimate}{{9}{23}}
\@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Checking the Analysis \#3 Results}{23}}
\@writefile{toc}{\contentsline {subsection}{\numberline {6.4}Discussion of Analysis \#3}{23}}
\@writefile{toc}{\contentsline {section}{\numberline {7}Replication: Can our results be generalized?}{23}}
\newlabel{sec:exp4}{{7}{23}}
\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces Analysis \#1 results for the SOFTLAB projects. Overall and individual results are shown respectively.}}{24}}
\newlabel{fig:ccwcturkey}{{12}{24}}
\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces  Analysis \#2 PD results for the SOFTLAB projects where $NN_{pd} \ge WC_{pd}$. Rankings computed via Mann-Whitney (95\% confidence) comparing each row to all other rows.}}{25}}
\newlabel{fig:arpd1}{{13}{25}}
\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces Analysis \#2 PF results for the SOFTLAB projects where $NN_{pf} \le WC_{pf}$.}}{25}}
\newlabel{fig:arpf1}{{14}{25}}
\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces  Analysis \#2 PF results for the SOFTLAB projects where $NN_{pf} > WC_{pf}$. }}{25}}
\newlabel{fig:arpf2}{{15}{25}}
\citation{lessmann09}
\citation{shepperd97}
\citation{khoshgoftaar03}
\citation{moser08}
\citation{shu02}
\citation{fagan86}
\citation{shull00a}
\citation{fagan76}
\@writefile{toc}{\contentsline {section}{\numberline {8}Related Work}{26}}
\@writefile{toc}{\contentsline {subsection}{\numberline {8.1}Neighborhood Reasoning}{26}}
\@writefile{toc}{\contentsline {subsection}{\numberline {8.2}On Defect Prediction Using Static Code Features}{26}}
\newlabel{sec:dp}{{8.2}{26}}
\citation{moser08}
\citation{conf/icse/NagappanB05a}
\citation{me02f}
\citation{halstead77,mccabe76,chapman02,me04g,polyspace,hall00,nikora03,conf/icse/NagappanB05a,Khoshgoftaar:2004p1877,conf/ictai/TangK04,porter90,tiang95,srinivasan95}
\citation{rakitin01}
\citation{Blake+Merz:1998}
\citation{Blake+Merz:1998}
\citation{fenton96}
\citation{fenton96}
\citation{shepperd94}
\@writefile{lot}{\contentsline {table}{\numberline {10}{\ignorespaces Some representative $pd$s and $pf$s for prediction problems from the UC Irvine machine learning database\nobreakspace  {}\cite {Blake+Merz:1998}. These values were generated using the standard settings of a state-of-art decision tree learner (J48). For each data set, ten experiments were conducted, where a decision tree was learned on 90\% of the data, then tests are done of the remaining 10\%. The numbers shown here are the average results across ten such experiments.}}{27}}
\newlabel{tbl:uci}{{10}{27}}
\citation{Blake+Merz:1998}
\citation{arisholm06}
\citation{brooks95}
\@writefile{toc}{\contentsline {section}{\numberline {9}Practical Implications}{28}}
\citation{kitch07}
\citation{me07b}
\citation{lessmann09}
\citation{lessmann09}
\citation{lessmann09}
\citation{graves00}
\citation{me07b}
\citation{basili02}
\@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces x-axis shows the rank of the data miners given in the y-axis. All the methods whose top ranks are 4 to 12 are statistically insignificantly different. From\nobreakspace  {}\cite  {lessmann09}. }}{30}}
\newlabel{fig:lessmann}{{16}{30}}
\@writefile{toc}{\contentsline {section}{\numberline {10}Threats to validity}{30}}
\@writefile{toc}{\contentsline {section}{\numberline {11}Conclusion}{31}}
\bibcite{me07c}{1}
\bibcite{Arisholm:2006p516}{2}
\bibcite{Bell:2006p517}{3}
\bibcite{Ostrand:2007p519}{4}
\bibcite{me07e}{5}
\bibcite{chen05}{6}
\bibcite{dekhtyar04}{7}
\bibcite{jiang07}{8}
\bibcite{nagappan05}{9}
\bibcite{marcus08}{10}
\bibcite{musa87}{11}
\bibcite{boehm88}{12}
\bibcite{goseva07}{13}
\bibcite{me07b}{14}
\bibcite{dou95}{15}
\bibcite{shepperd94}{16}
\bibcite{lessmann09}{17}
\bibcite{duda76}{18}
\bibcite{domingos97optimality}{19}
\bibcite{yang02}{20}
\bibcite{witten05}{21}
\bibcite{mann47}{22}
\bibcite{demsar06}{23}
\bibcite{koru08}{24}
\bibcite{hayes06}{25}
\bibcite{baker07}{26}
\bibcite{me08d}{27}
\bibcite{orrego04}{28}
\bibcite{quinlan92}{29}
\bibcite{quinlan92b}{30}
\bibcite{Blake+Merz:1998}{31}
\bibcite{john95}{32}
\bibcite{boehm00a}{33}
\bibcite{shepperd97}{34}
\bibcite{khoshgoftaar03}{35}
\bibcite{moser08}{36}
\bibcite{shu02}{37}
\bibcite{fagan86}{38}
\bibcite{shull00a}{39}
\bibcite{fagan76}{40}
\bibcite{conf/icse/NagappanB05a}{41}
\bibcite{me02f}{42}
\bibcite{halstead77}{43}
\bibcite{mccabe76}{44}
\bibcite{chapman02}{45}
\bibcite{me04g}{46}
\bibcite{polyspace}{47}
\bibcite{hall00}{48}
\bibcite{nikora03}{49}
\bibcite{Khoshgoftaar:2004p1877}{50}
\bibcite{conf/ictai/TangK04}{51}
\bibcite{porter90}{52}
\bibcite{tiang95}{53}
\bibcite{srinivasan95}{54}
\bibcite{rakitin01}{55}
\bibcite{fenton96}{56}
\bibcite{arisholm06}{57}
\bibcite{brooks95}{58}
\bibcite{kitch07}{59}
\bibcite{graves00}{60}
\bibcite{basili02}{61}