\relax \citation{provost99survey} \citation{Blake+Merz:1998} \citation{witten99} \citation{quinlan92} \citation{john95} \citation{witten99} \citation{quinlan92b} \citation{Blake+Merz:1998} \citation{Blake+Merz:1998} \citation{provost99survey} \citation{catlett91} \@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}} \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Incremental cross validation experiments on soybean. 10 repeats. 10 divisions per repeat. Error bars show ${\pm }1$ standard deviations for the accuracies over the repeats.}}{1}} \newlabel{fig:summ}{{1}{1}} \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces {\em R=10*N=10} incremental cross validation experiments on 20 UCI data sets\nobreakspace {}\cite {Blake+Merz:1998}. A:heart-c, B:zoo; C:vote; D:heart-statlog; E:lymph, F:autos. G:ionosphere, H:diabetes, I:balance-scale, J:soybean, K:bodyfat. L:cloud, M:fishcatch, N:sensory, O:pwLinear, Q:strike, R:pbc, S:autoMpg, T:housing. Data sets A..J have discrete classes while Data sets K..T have continuous classes. Data sets are sorted according to how many instances were required. to reach plateau using nbk (left-hand side) or M5' (right-hand side).}}{1}} \newlabel{fig:sim1}{{2}{1}} \citation{oates97} \citation{widmer96learning} \citation{yang02} \citation{yang02} \citation{webb00a} \citation{hall03,dou95} \@writefile{toc}{\contentsline {section}{\numberline {II}Related Work}{2}} \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces A Bayes Classifier. ``?'' denotes ``missing values''. In {\tt classify}, the probabilities are multiplied together using logarithms to stop numeric errors when handling very small numbers. The $m$ and $k$ variables handle low frequencies counts using Laplace and M-estimates\nobreakspace {}\cite [\S 3.1]{yang02}. This code computes $L(H\/\tmspace +\thinmuskip {.1667em}|\tmspace +\thinmuskip {.1667em}E)$; i,e. class {\em likelihoods} not probabilities. Likelihoods become probabilities when they are normalized over the sum of all likelihoods. However, since maximum probability comes from maximum likelihood, this code only needs to return the class with maximum $L(H\/\tmspace +\thinmuskip {.1667em}|\tmspace +\thinmuskip {.1667em}E)$. }}{2}} \newlabel{fig:class}{{3}{2}} \citation{gama00} \citation{chai02bayesian} \citation{john95} \citation{FayIra93Multi} \citation{dou95,YanWeb02Comparative} \citation{dou95} \citation{dou95} \citation{YanWeb02Comparative} \citation{john95} \citation{john95} \citation{dou95} \citation{dou95} \@writefile{toc}{\contentsline {section}{\numberline {III}Handling Numeric Attributes with SPADE}{3}} \@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Comparing SPADE and kernel estimation. Data sets: A=vowel, B=iris, C=ionosphere, D=echo, E=horse-colic, F=anneal, G=hypothyroid, H=hepatitis, I=heart-c, J=diabetes, K=auto-mpg, L=waveform-5000, M=vehicle, N=labor, O=segment.}}{3}} \newlabel{fig:spade}{{4}{3}} \@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces SAWTOOTH and the KDD'99 data}}{3}} \newlabel{fig:kdd99}{{5}{3}} \citation{provost99survey} \@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces $mean \pm standard\tmspace +\thickmuskip {.2777em} deviations$ seen in 10*10-way cross validation experiments on UCI Irvine data sets. ``NB'' and ``nbk'' denote Na\"{\i }veBayes classifiers that use gaussians model continuous attributes. ``NB'' uses a single gaussian while ``nbk'' uses a sum of gaussians in the method recommended by John and Langley\nobreakspace {}\cite {john95}. The plot top-right sorts the differences in the accuracies found by SAWTOOTH and all the other learners. Some of those differences aren't statistically significant: the ``+'' or ``-'' in the left-hand-side table denote mean differences that are significantly different to SAWTOOTH at the $\alpha =0.05$ level. The significant differences between all the learners are shown in the win-loss statistics of the bottom-right table. }}{4}} \newlabel{fig:uci}{{6}{4}} \@writefile{toc}{\contentsline {section}{\numberline {IV}Experiments}{4}} \@writefile{toc}{\contentsline {subsection}{\numberline {IV-A}KDD'99 Data}{4}} \@writefile{toc}{\contentsline {subsection}{\numberline {IV-B}UCI Data}{4}} \citation{holte93} \citation{provost99survey} \bibstyle{IEEEbib} \bibdata{../../refs} \@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces SAWTOOTH and Concept Drift}}{5}} \newlabel{fig:flight}{{7}{5}} \@writefile{toc}{\contentsline {subsection}{\numberline {IV-C}Data with Concept Drift}{5}} \@writefile{toc}{\contentsline {section}{\numberline {V}Conclusion}{5}} \@writefile{toc}{\contentsline {section}{References}{5}}