\relax 
\citation{Menzies2009b}
\citation{Menzies2009b}
\citation{Menzies2007c}
\citation{Boehm1981}
\citation{Fenton2007c}
\citation{Fenton2007b}
\citation{Kaariainen2006,Dasgupta2008,Hassan2010}
\citation{Kaariainen2006}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}}
\newlabel{sect:introduction}{{1}{1}}
\citation{Hassan2010}
\citation{Menzies2009b,Menzies2007c,Boehm1981,Fenton2007c,Fenton2007b}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Contributions}{2}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Motivation}{2}}
\newlabel{sect:motivation}{{2}{2}}
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{fig:expected}{{1a}{2}}
\newlabel{sub@fig:expected}{{(a)}{a}}
\newlabel{fig:actual}{{1b}{2}}
\newlabel{sub@fig:actual}{{(b)}{b}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Expected and actual topologies for the purpose of demonstration. Assumption-all assumes that \textit  {all} instances are used in estimation, hence topology would look like a). Assumption-pop states that only the \textit  {popular} instances (filled squares) are used for estimation.\relax }}{2}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Expected}}}{2}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Actual}}}{2}}
\newlabel{fig:topologies}{{1}{2}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Background}{2}}
\newlabel{sect:background}{{3}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Software Effort Estimation}{2}}
\citation{Li2009}
\citation{Jor2004e}
\citation{Jor2005b}
\citation{shepperd96}
\citation{Mendes2003,Li2009,Kadoda2000}
\citation{Keung2011}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Ratio of the instances used for prediction in a closest-neighbor setting to the dataset size. Note that the median percentage value is $25\%$, meaning that only a limited amount of instances are the closest neighbor of other instances and are useful in estimation.\relax }}{3}}
\newlabel{fig:selec-perc}{{2}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Active Learning}{3}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Methodology}{3}}
\newlabel{sect:methodology}{{4}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Algorithms}{3}}
\newlabel{equation:normalization}{{1}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Building a Guidance System}{4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {4.2.1}Toy Example}{4}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces The projects of the toy example. Our hypothetical dataset consists of 3 projects described 1 independent variable (KLOC) and 1 dependent variable (effort in man-months).\relax }}{4}}
\newlabel{fig:toy-dataset}{{3}{4}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Visualization of projects on a linear scale, where the axis shows KLOC values.\relax }}{4}}
\newlabel{fig:linear-scale}{{4}{4}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces The distance matrix of the projects $P_1$, $P_2$ and $P_3$.\relax }}{4}}
\newlabel{fig:toy-distance-matrix}{{5}{4}}
\citation{shepperd97}
\citation{foss03}
\citation{foss03}
\citation{shepperd97}
\citation{foss03}
\citation{Foss}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces The $E(k)$ matrix resulting from the distance matrix of Figure\nobreakspace  {}5\hbox {}. The cells with a value of $na$ mean that ordering for that cell is \textit  {not-applicable}.\relax }}{5}}
\newlabel{fig:ek-matrix}{{6}{5}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces The $E(1)$ matrix and the popularity indices of the toy example. Note that popularity index is the sum of the columns of the $E(1)$ matrix.\relax }}{5}}
\newlabel{fig:toy-pop-index}{{7}{5}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces The change of active pool for the toy example. Note that in an actual setting transition between $Round_i$ to $Round_{i+1}$ is governed by the stopping rules.\relax }}{5}}
\newlabel{fig:active-pool}{{8}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Performance Measures}{5}}
\newlabel{ar}{{2}{5}}
\newlabel{one}{{3}{5}}
\newlabel{onex}{{4}{5}}
\newlabel{one}{{5}{5}}
\citation{keung2008c,shepperd97}
\citation{keung2008c,shepperd97,Finnie1997}
\citation{Menzies2006,Lum2008,Kocaguneli2010}
\citation{Kocaguneli2010}
\citation{Menzies2006,Lum2008,Kocaguneli2010}
\citation{Kocaguneli2010}
\citation{keung2008c,Li2009,Li2009a,shepperd97,Shepperd1996,Finnie1997}
\citation{Menzies2006,Bakir2009,Kocaguneli2010}
\citation{Menzies2006,Lum2008,Kocaguneli2010}
\citation{Kocaguneli2010}
\citation{Keung2008,keung2008c,keung2008b,Kocaguneli2010,shepperd97,Li2008,Kadoda2000,Kirsopp2002,Li2009,Li2009a}
\citation{Li2009a,Sentas2005}
\citation{Kultur2008,Turhan2007}
\citation{Menzies2006,Lum2008,Kocaguneli2010}
\citation{Miyazaki1994}
\citation{Menzies2006,Lum2008,Kocaguneli2010}
\citation{Briand1999,shepperd97}
\citation{Menzies2006,Lum2008,Kocaguneli2010,Boehm1981}
\citation{Menzies2006,Lum2008,Kocaguneli2010}
\citation{Bakir2009}
\citation{Boehm1981}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces The 699 projects used in this study come from 20 data sets. Indentation in column one denotes that indented dataset is a subset of its non-indented parent.\relax }}{6}}
\newlabel{fig:datasets}{{9}{6}}
\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces Comparing algorithms (\textit  {i},\textit  {j}) on performance ($P_i$,$P_j$). The ``better'' predicate changes according to $P$. For error measures like MRE, ``better'' means lower medians. However, for PRED(25), ``better'' means higher medians.\relax }}{6}}
\newlabel{fig:pseudocode-wtl}{{10}{6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Experiments}{6}}
\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces A sample of effort estimation papers that use the data sets explored in this paper.\relax }}{6}}
\newlabel{fig:dataset-paper}{{11}{6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.5}Datasets}{6}}
\citation{Boehm1981}
\citation{Keung2011}
\citation{Keung2011}
\@writefile{toc}{\contentsline {section}{\numberline {5}Results}{7}}
\newlabel{sect:results}{{5}{7}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Performance}{7}}
\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces The distribution of datasets into result-categories. Last column shows the number of datasets in each category. Note that $12$ out of $19$ datasets fall into the category of \textit  {Pro-Active} meaning that for $63\%$ of the datasets \textit  {activeNN} is a substitute for \textit  {passiveNN}.\relax }}{7}}
\newlabel{fig:dataset-to-category}{{12}{7}}
\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces The $win-loss$ values. The datasets where \textit  {activeNN} has lost to another learner (i.e. a negative $win-loss$ value) are highlighted for convenience. \relax }}{7}}
\newlabel{fig:win-tie-loss}{{13}{7}}
\citation{Keung2011}
\citation{Keung2011}
\newlabel{fig:cat-active}{{14a}{8}}
\newlabel{sub@fig:cat-active}{{(a)}{a}}
\newlabel{fig:cat-cart}{{14b}{8}}
\newlabel{sub@fig:cat-cart}{{(b)}{b}}
\newlabel{fig:cat-con-active}{{14c}{8}}
\newlabel{sub@fig:cat-con-active}{{(c)}{c}}
\newlabel{fig:cat-con-cart}{{14d}{8}}
\newlabel{sub@fig:cat-con-cart}{{(d)}{d}}
\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces Sample plots for different category of results. The line parallel to y-axis indicates the stopping point.\relax }}{8}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(a)}{\ignorespaces {Pro-Active: desharnais}}}{8}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Pro-CART: albrecht}}}{8}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(c)}{\ignorespaces {Con-Active: maxwell}}}{8}}
\@writefile{lof}{\contentsline {subfigure}{\numberline{(d)}{\ignorespaces {Con-CART: cocomo81s}}}{8}}
\newlabel{fig:category-plots}{{14}{8}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Labeling Effort Reduction}{8}}
\citation{Alpaydin2004}
\citation{Milic2004}
\citation{Robson2002}
\citation{Kitchenham2001}
\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces The percentage of instances that are labeled at the stopping point. The median percentage value is $38.8\%$. The implication of this table is that it is possible reduce the effort of labeling activities by orders of magnitude.\relax }}{9}}
\newlabel{fig:stopping-percentage}{{15}{9}}
\@writefile{toc}{\contentsline {section}{\numberline {6}Threats To Validity}{9}}
\newlabel{sect:threats-to-validity}{{6}{9}}
\@writefile{toc}{\contentsline {section}{\numberline {7}Conclusions}{9}}
\newlabel{sect:conclusions}{{7}{9}}
\bibstyle{IEEEtran}
\bibdata{library}
\bibcite{Menzies2009b}{1}
\bibcite{Menzies2007c}{2}
\bibcite{Boehm1981}{3}
\bibcite{Fenton2007c}{4}
\bibcite{Fenton2007b}{5}
\bibcite{Kaariainen2006}{6}
\bibcite{Dasgupta2008}{7}
\bibcite{Hassan2010}{8}
\bibcite{Li2009}{9}
\bibcite{Mendes2003}{10}
\bibcite{Kadoda2000}{11}
\bibcite{Keung2011}{12}
\bibcite{shepperd97}{13}
\bibcite{Foss}{14}
\bibcite{keung2008c}{15}
\bibcite{Finnie1997}{16}
\bibcite{Menzies2006}{17}
\bibcite{Lum2008}{18}
\bibcite{Kocaguneli2010}{19}
\bibcite{Li2009a}{20}
\bibcite{Shepperd1996}{21}
\bibcite{Bakir2009}{22}
\bibcite{Keung2008}{23}
\bibcite{keung2008b}{24}
\bibcite{Li2008}{25}
\bibcite{Kirsopp2002}{26}
\bibcite{Sentas2005}{27}
\bibcite{Kultur2008}{28}
\bibcite{Turhan2007}{29}
\@writefile{toc}{\contentsline {section}{\numberline {8}Future Work}{10}}
\@writefile{toc}{\contentsline {section}{References}{10}}
\bibcite{Miyazaki1994}{30}
\bibcite{Briand1999}{31}
\bibcite{Alpaydin2004}{32}
\bibcite{Milic2004}{33}
\bibcite{Robson2002}{34}
\bibcite{Kitchenham2001}{35}