\begin{thebibliography}{10} \bibitem{awkbook} Brian W.~Kernighan Alfred V.~Aho and Peter~J. Weinberger. \newblock {\em The AWK Programming Language}. \newblock Addison-Wesley, 1988. \bibitem{me05c} Zhihao Chen, Tim Menzies, Dan Port, and Barry Boehm. \newblock Finding the right data for software cost modeling. \newblock {\em IEEE Software}, Nov 2005. \bibitem{Eisenstein04} Jacob Eisenstein and Randall Davis. \newblock Visual and linguistic information in gesture classification. \newblock In {\em ICMI}, pages 113--120, 2004. \newblock Avaliable from \url{http://iccle.googlecode.com/svn/trunk/share/pdf/eisenstein04.pdf}. \bibitem{ferri09} César Ferri, José Hernández-Orallo, and R.~Modroiu. \newblock An experimental comparison of performance measures for classification. \newblock {\em Pattern Recognition Letters}, 30(1):27--38, 2009. \bibitem{Freund99thealternating} Yoav Freund and Llew Mason. \newblock The alternating decision tree learning algorithm. \newblock In {\em In Machine Learning: Proceedings of the Sixteenth International Conference}, pages 124--133. Morgan Kaufmann, 1999. \bibitem{gay09} Greg Gay, Tim Menzies, and Bojan Cukic. \newblock How to build repeatable experiments. \newblock In {\em PROMISE '09: Proceedings of the 5th International Conference on Predictor Models in Software Engineering}, pages 1--9, New York, NY, USA, 2009. ACM. \bibitem{genic04} Chetan Gupta and Robert Grossman. \newblock Genic: A single pass generalized incremental algorithm for clustering. \newblock In {\em In SIAM Int. Conf. on Data Mining}. SIAM, 2004. \bibitem{kitch07} B.~A. Kitchenham, E.~Mendes, and G.~H. Travassos. \newblock Cross- vs. within-company cost estimation studies: A systematic review. \newblock {\em IEEE Transactions on Software Engineering}, pages 316--329, May 2007. \bibitem{lessmann08} S.~Lessmann, B.~Baesens, C.~Mues, and S.~Pietsch. \newblock Benchmarking classification models for software defect prediction: A proposed framework and novel findings. \newblock {\em IEEE Transactions on Software Engineering}, May 2008. \newblock Available from \url{http://iccle.googlecode.com/svn/trunk/share/pdf/lessmann08.pdf}. \bibitem{gawkai} R.~Loui. \newblock Gawk for ai. \newblock {\em Class Lecture}. \newblock Available from \url{http://menzies.us/cs591o/?lecture=gawk}. \bibitem{matheny09} A.~Matheny. \newblock Scaling up text mining, 2009. \newblock Masters thesis, Lane Department of Computer Science and Electrical Engineering, West Virginia University. \bibitem{canopies00} Andrew McCallum, Kamal Nigam, and Lyle~H. Ungar. \newblock Efficient clustering of high-dimensional data sets with application to reference matching. \newblock In {\em KDD '00: Proceedings of the sixth ACM SIGKDD international conference on Knowledge discovery and data mining}, pages 169--178, New York, NY, USA, 2000. ACM. \bibitem{me00v} T.~Menzies. \newblock Evaluation issues for visual programming languages, 2002. \newblock Available from \url{http://menzies.us/pdf/00vp.pdf}. \bibitem{me05d} T.~Menzies, D.~Port, Z.~Chen, J.~Hihn, and S.~Stukes. \newblock Specialization and extrapolation of induced domain models: Case studies in software effort estimation. \newblock 2005. \newblock IEEE ASE, 2005, Available from \url{http://menzies.us/pdf/05learncost.pdf}. \bibitem{me07i} T.~Menzies, B.~Turhan, A.~Bener, and J.~Distefano. \newblock Cross- vs within-company defect prediction studies. \newblock 2007. \newblock Available from \url{http://menzies.us/pdf/07ccwc.pdf}. \bibitem{me06d} Tim Menzies, Zhihao Chen, Jairus Hihn, and Karen Lum. \newblock Selecting best practices for effort estimation. \newblock {\em IEEE Transactions on Software Engineering}, November 2006. \newblock Available from \url{http://menzies.us/pdf/06coseekmo.pdf}. \bibitem{me07b} Tim Menzies, Jeremy Greenwald, and Art Frank. \newblock Data mining static code attributes to learn defect predictors. \newblock {\em IEEE Transactions on Software Engineering}, January 2007. \newblock Available from \url{http://menzies.us/pdf/06learnPredict.pdf}. \bibitem{mie06} I.~Mierswa, M.~Wurst, and R.~Klinkenberg. \newblock Yale: Rapid prototyping for complex data mining tasks. \newblock In {\em KDD'06}, 1996. \bibitem{orrego04} A.S. Orrego. \newblock Sawtooth: Learning from huge amounts of data, 2004. \bibitem{ramey94} Chet Ramey. \newblock Bash, the bourne-again shell. \newblock 1994. \newblock Available from \url{http://tiswww.case.edu/php/chet/bash/rose94.pdf}. \bibitem{ramos03} Juan Ramos. \newblock Using tf-idf to determine word relevance in document queries. \newblock In {\em Proceedings of the First Instructional Conference on Machine Learning}, 2003. \newblock Available from \url{http://www.cs.rutgers.edu/~mlittman/courses/ml03/iCML03/papers/ramos.pd% f}. \bibitem{ritthoff01} O.~Ritthoff, R.~Klinkenberg, S.~Fischer, I.~Mierswa, and S.~Felske. \newblock Yale: Yet another learning environment. \newblock In {\em LLWA 01 - Tagungsband der GI-Workshop-Woche, Dortmund, Germany}, pages 84--92, October 2001. \newblock Available from \url{http://ls2-www.cs.uni-dortmund.de/~fischer/publications/YaleLLWA01.pdf}. \bibitem{turhan08} Burak Turhan, Tim Menzies, Ayse~B. Bener, and Justin~Di Stefano. \newblock On the relative value of cross-company and within-company data for defect prediction. \newblock {\em Empirical Software Engineering}, 2009. \newblock Available from \url{http://menzies.us/pdf/08ccwc.pdf}. \end{thebibliography}