\relax \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}} \citation{DasguptaL09} \citation{DasguptaL09} \citation{NIELSEN1993} \@writefile{toc}{\contentsline {section}{\numberline {2}Active Learning}{2}} \@writefile{toc}{\contentsline {subsection}{\numberline {2.1}What is Active Learning?}{2}} \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces An Active Learning Heuristic suggested by Dasgupta \cite {DasguptaL09}}}{2}} \newlabel{algorithm:ALHeuristic}{{1}{2}} \@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Motivation for Active Learning}{2}} \@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Applying Active Learning to Software Engineering}{2}} \citation{Menzies08} \citation{Menzies08} \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces A function call graph generated in GrammaTech's CodeSurfer source-code analysis tool}}{3}} \newlabel{fig:CallGraph}{{2}{3}} \citation{Smith05} \citation{Smith05} \citation{Binkley10} \@writefile{toc}{\contentsline {section}{\numberline {3}Feature Subset Selection}{5}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Motivation}{5}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Principle Component Analysis}{5}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Term Frequency-Inverse Document Frequency}{5}} \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces An example function \cite {Binkley10}.}}{5}} \newlabel{fig:Function}{{3}{5}} \@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Formulas for tf, idf, and the final tf-idf feature vector.}}{6}} \newlabel{fig:FormulasTFIDF}{{4}{6}} \citation{Dasgupta2008} \citation{Cohn1994} \citation{Wallace2010} \citation{Wallace2010} \@writefile{toc}{\contentsline {section}{\numberline {4}Sampling Techniques}{7}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.1}The Problems of Random Sampling}{7}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Exploration vs. Exploitation}{7}} \@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces An random sampling technique favoring exploration (left) vs. a simple sampling algorithm that favors exploitation (right) \cite {Wallace2010}}}{7}} \newlabel{fig:ExploreVsExploit}{{5}{7}} \citation{Dasgupta2008} \citation{Dasgupta2008} \citation{Dasgupta2004} \@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Hierarchical Sampling}{8}} \@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Hierarchical Sampling Algorithm \cite {Dasgupta2008}}}{8}} \newlabel{algorithm:Hierarchical}{{6}{8}} \citation{Dasgupta2004} \citation{Balcan2006} \citation{FREUND1997} \@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Greedy Sampling}{9}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.5}Agnostic Sampling}{9}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.6}Query by Committee}{9}} \@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces $A^2$ Agnostic Sampling algorithm \cite {Balcan2006} }}{10}} \newlabel{algorithm:Agnostic}{{7}{10}} \@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Query by Committee algorithm for a committee of two \cite {FREUND1997}}}{10}} \newlabel{algorithm:QueryBC}{{8}{10}} \citation{Warmuth2002} \@writefile{toc}{\contentsline {subsection}{\numberline {4.7}Iterative Approach to Selective Sampling}{11}} \citation{Gupta04} \citation{Gupta04} \@writefile{toc}{\contentsline {section}{\numberline {5}Clustering Methods}{12}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Motivation}{12}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.2}k-means}{12}} \@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Pseudo-code for K-means Algorithm}}{12}} \newlabel{algorithm:Kmeans}{{9}{12}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.3}GenIc}{12}} \citation{McCallum00} \citation{McCallum00} \citation{McCullen00} \@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces Pseudo-code for GenIc Algorithm \cite {Gupta04}}}{13}} \newlabel{algorithm:GenIc}{{10}{13}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.4}Canopy}{13}} \citation{McCullen00} \@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces An example of five canopies \cite {McCallum00}}}{14}} \newlabel{fig:Canopies}{{11}{14}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.5}Compass}{14}} \citation{Foss} \@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces Pseudo-code for Compass Clustering Algorithm}}{15}} \newlabel{algorithm:Compass}{{12}{15}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.6}Comparison of Clustering Techniques}{15}} \@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Results from a text mining experiment comparing combinations of feature subset reduction and clustering algorithms to more sophisticated methods.}}{15}} \newlabel{tab:ReducerCluster}{{1}{15}} \citation{Foss} \@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Win-Loss-Tie Results for the COCOMO81 Data Set}}{16}} \newlabel{tab:COCOMO81}{{2}{16}} \@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Win-Loss-Tie Results for the Maxwell Data Set}}{16}} \newlabel{tab:Maxwell}{{3}{16}} \citation{COMPTON1995} \citation{SHAW1989} \citation{COMPTON1995} \citation{SHAW1989} \citation{SHAW1989} \@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces Pseudo-code for Win-Tie-Loss calculation between variants $i$ and $j$}}{17}} \newlabel{WinTieLoss}{{13}{17}} \@writefile{toc}{\contentsline {section}{\numberline {6}Domain Experts}{17}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Motivation}{17}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.2}The Use of Simulated Experts in Evaluating Knowledge Acquisition}{17}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Comparing Conceptual Structures Between Experts}{17}} \citation{SHAW1989} \citation{SHAW1989} \citation{QUINLAN1986} \citation{QUINLAN1986} \citation{QUINLAN1986} \@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces An example of a reperatory (entity-attribute) grid \cite {SHAW1989}}}{18}} \newlabel{fig:Reperatory}{{14}{18}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.4}ID3 and C4.5}{18}} \citation{Yang03} \@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces An example of a difference grid \cite {SHAW1989}}}{19}} \newlabel{fig:Difference}{{15}{19}} \@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces An example data set to be processed by ID3 \cite {QUINLAN1986}}}{19}} \newlabel{fig:ID3ExampleData}{{16}{19}} \@writefile{toc}{\contentsline {subsection}{\numberline {6.5}Naive Bayes}{19}} \citation{DOMINGOS97} \citation{DOMINGOS97} \@writefile{lof}{\contentsline {figure}{\numberline {17}{\ignorespaces An example decision tree produced by the ID3 algorithm \cite {QUINLAN1986}}}{20}} \newlabel{fig:ID3ExampleTree}{{17}{20}} \@writefile{lof}{\contentsline {figure}{\numberline {18}{\ignorespaces Comparison of Naive Bayes to other algorithms \cite {DOMINGOS97}}}{20}} \newlabel{fig:BayesResults}{{18}{20}} \@writefile{toc}{\contentsline {section}{\numberline {7}Summary}{21}} \bibstyle{ieeetr} \bibdata{references} \bibcite{DasguptaL09}{1} \bibcite{NIELSEN1993}{2} \bibcite{Menzies08}{3} \bibcite{Smith05}{4} \bibcite{Binkley10}{5} \bibcite{Dasgupta2008}{6} \bibcite{Cohn1994}{7} \bibcite{Wallace2010}{8} \bibcite{Dasgupta2004}{9} \bibcite{Balcan2006}{10} \bibcite{FREUND1997}{11} \bibcite{Warmuth2002}{12} \bibcite{Gupta04}{13} \bibcite{McCallum00}{14} \bibcite{Foss}{15} \bibcite{COMPTON1995}{16} \bibcite{SHAW1989}{17} \bibcite{QUINLAN1986}{18} \bibcite{Yang03}{19} \bibcite{DOMINGOS97}{20} \citation{DasguptaL09} \citation{Binkley10} \citation{Wallace2010} \citation{Dasgupta2008} \citation{Balcan2006} \citation{FREUND1997} \citation{Gupta04} \citation{McCallum00} \citation{SHAW1989} \citation{SHAW1989} \citation{QUINLAN1986} \citation{QUINLAN1986} \citation{DOMINGOS97}