\contentsline {chapter}{\numberline {1}Introduction}{1} \contentsline {section}{\numberline {1.1}Motivation}{2} \contentsline {section}{\numberline {1.2}Statement of Thesis}{3} \contentsline {section}{\numberline {1.3}Contributions}{3} \contentsline {section}{\numberline {1.4}About This Document}{4} \contentsline {chapter}{\numberline {2}Background: Data and Learners}{5} \contentsline {section}{\numberline {2.1}Data and Data Mining}{5} \contentsline {subsection}{\numberline {2.1.1}Data}{5} \contentsline {subsection}{\numberline {2.1.2}Data Mining}{7} \contentsline {section}{\numberline {2.2}Classification}{8} \contentsline {subsection}{\numberline {2.2.1}Decision Tree Learners}{9} \contentsline {subsection}{\numberline {2.2.2}Naive Bayes}{13} \contentsline {subsection}{\numberline {2.2.3}Other Classification Methods}{16} \contentsline {subsubsection}{1-R}{16} \contentsline {subsubsection}{Rule Learners}{18} \contentsline {subsubsection}{Instance-Based Learning}{20} \contentsline {section}{\numberline {2.3}Summary}{21} \contentsline {subsection}{\numberline {2.3.1}Data Mining and Classification}{21} \contentsline {subsection}{\numberline {2.3.2}Classifier Selection}{22} \contentsline {chapter}{\numberline {3}Discretization}{24} \contentsline {section}{\numberline {3.1}General Discretization}{24} \contentsline {section}{\numberline {3.2}Equal Width Discretization (EWD)}{28} \contentsline {section}{\numberline {3.3}Equal Frequency Discretization(EFD)}{29} \contentsline {section}{\numberline {3.4}Bin Logging}{30} \contentsline {section}{\numberline {3.5}Entropy-based Discretization}{30} \contentsline {section}{\numberline {3.6}Proportional k-Interval Discretization}{32} \contentsline {section}{\numberline {3.7}Weighted Proportional k-Interval Discretization (WPKID)}{35} \contentsline {section}{\numberline {3.8}Non-Disjoint Discretization (NDD)}{35} \contentsline {section}{\numberline {3.9}Weighted Non-Disjoint Discretization (WNDD)}{36} \contentsline {section}{\numberline {3.10}Other Methods}{37} \contentsline {section}{\numberline {3.11}DiscTree Algorithm}{37} \contentsline {subsection}{\numberline {3.11.1}Trees}{38} \contentsline {subsection}{\numberline {3.11.2}Binary Trees}{40} \contentsline {subsection}{\numberline {3.11.3}Binary Search Trees}{42} \contentsline {subsection}{\numberline {3.11.4}Randomized Binary Search Trees}{45} \contentsline {subsection}{\numberline {3.11.5}DiscTree}{46} \contentsline {chapter}{\numberline {4}Experiment}{51} \contentsline {section}{\numberline {4.1}Test Data}{51} \contentsline {section}{\numberline {4.2}Cross-Validation}{53} \contentsline {section}{\numberline {4.3}Classifier Performance Measurement}{54} \contentsline {section}{\numberline {4.4}Mann-Whitney}{56} \contentsline {chapter}{\numberline {5}Experimental Results}{60} \contentsline {section}{\numberline {5.1}DiscTree Variant Selection}{60} \contentsline {subsection}{\numberline {5.1.1}Accuracy Results}{61} \contentsline {subsection}{\numberline {5.1.2}Balance Results}{67} \contentsline {subsection}{\numberline {5.1.3}Precision Results}{73} \contentsline {subsection}{\numberline {5.1.4}Probability of Detection Results}{79} \contentsline {subsection}{\numberline {5.1.5}Probability of Not False Alarm}{85} \contentsline {subsection}{\numberline {5.1.6}Decision Tree Method Selection}{91} \contentsline {section}{\numberline {5.2}Discretization Method Comparison}{91} \contentsline {subsection}{\numberline {5.2.1}Accuracy Results}{91} \contentsline {subsection}{\numberline {5.2.2}Balance Results}{98} \contentsline {subsection}{\numberline {5.2.3}Precision Results}{105} \contentsline {subsection}{\numberline {5.2.4}Probability of Detection Results}{112} \contentsline {subsection}{\numberline {5.2.5}Probability of Not False Alarm}{119} \contentsline {section}{\numberline {5.3}Summary}{126} \contentsline {chapter}{\numberline {6}Conclusion}{127} \contentsline {section}{\numberline {6.1}Overview}{127} \contentsline {section}{\numberline {6.2}Conclusions}{128} \contentsline {section}{\numberline {6.3}Future Work}{129} \contentsline {chapter}{\numberline {A}disctree Source Code}{132} \contentsline {chapter}{\numberline {B}crossval Source Code}{141} \contentsline {chapter}{\numberline {C}tenbins Source Code}{143} \contentsline {chapter}{\numberline {D}Script for PKID}{144} \contentsline {chapter}{\numberline {E}Entropy-Minimization Method Script}{145} \contentsline {chapter}{\numberline {F}Performance Measure $U$-test Tables}{146} \contentsline {section}{\numberline {F.1}Accuracy $U$-test By Data Set}{146} \contentsline {section}{\numberline {F.2}Balance $U$-test by Data Set}{151} \contentsline {section}{\numberline {F.3}Precision $U$-test by Data Set}{156} \contentsline {section}{\numberline {F.4}Probability of Detection $U$-test by Data Set}{161} \contentsline {section}{\numberline {F.5}Probability of Not False Alarm $U$-test by Data Set}{166}