\relax \ifx\hyper@anchor\@undefined \global \let \oldcontentsline\contentsline \gdef \contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}} \global \let \oldnewlabel\newlabel \gdef \newlabel#1#2{\newlabelxx{#1}#2} \gdef \newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}} \AtEndDocument{\let \contentsline\oldcontentsline \let \newlabel\oldnewlabel} \else \global \let \hyper@last\relax \fi \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}} \citation{ritthoff01} \citation{mie06} \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces The WEKA toolit running the J48 decision tree learner.}}{2}{figure.1}} \newlabel{fig:weka}{{1}{2}{Introduction\relax }{figure.1}{}} \citation{me00v} \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Orange's visual programming environment. Green lines indicate data streams passed between functions.}}{3}{figure.2}} \newlabel{fig:orange}{{2}{3}{Introduction\relax }{figure.2}{}} \citation{ramey94} \citation{awkbook} \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Rapid-I's operator trees. From \nobreakspace {}\cite {mie06}. Internally, this tree is a nested XML expression that is traversed top-down to complete an experiment.}}{4}{figure.3}} \newlabel{fig:yale}{{3}{4}{Introduction\relax }{figure.3}{}} \citation{gay09} \@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces An OURMINE function to clean text documents and collect the results. $Tokes$ is a tokenizer; $caps$ sends all words to lower case; $stops$ removes the stop workds listed in "\$Lists/stops.txt"; and $stems$ performs Porter's stemming algorithm (removes confusing suffixes). }}{5}{figure.4}} \newlabel{fig:clean}{{4}{5}{OURMINE\relax }{figure.4}{}} \@writefile{toc}{\contentsline {section}{\numberline {2}OURMINE}{5}{section.2}} \@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Built-in Data and Functions}{5}{subsection.2.1}} \citation{ramos03} \@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces A small sample of the available OURMINE functions. Built-in functions give the user something with which to start and begin running demos and experiments immediately. For a detailed list of available tools, please see the appendix.}}{6}{figure.5}} \newlabel{fig:functions}{{5}{6}{Built-in Data and Functions\relax }{figure.5}{}} \citation{Eisenstein04} \citation{orrego04} \@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces A GAWK implementation of TF-IDF.}}{7}{figure.6}} \newlabel{fig:tfidf}{{6}{7}{Built-in Data and Functions\relax }{figure.6}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Learning and Teaching with OURMINE}{7}{subsection.2.2}} \citation{gawkai} \@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces A Naive Bayes classifier for a CSV file, where the class label is found in the last column.}}{8}{figure.7}} \newlabel{fig:nbc}{{7}{8}{Learning and Teaching with OURMINE\relax }{figure.7}{}} \citation{turhan08,gay09,me07b,me06d,me05c,me05d} \@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Four Introductory OURMINE programming exercises.}}{9}{figure.8}} \newlabel{fig:ourmine101}{{8}{9}{Learning and Teaching with OURMINE\relax }{figure.8}{}} \@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Function help in OURMINE.}}{9}{figure.9}} \newlabel{fig:help}{{9}{9}{Learning and Teaching with OURMINE\relax }{figure.9}{}} \@writefile{toc}{\contentsline {section}{\numberline {3}Using Ourmine for Research}{9}{section.3}} \@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces A demo OURMINE experiment. This worker function begins by being called by the top level function {\em demo004} on lines 1-4. Noteworthy sections of the demo code are at: line 19, where training sets and test sets are built from 90\% and 10\% of the data respectively, lines 25-27 in which values such as {\em pd,pf} and {\em balance} are computed via the {\em abcd} function that computes values from the confusion matrix, and line 34 in which a {\em Wilcoxon} test is performed on each learner in the experiment using {\em pd} as the performance measure.}}{10}{figure.10}} \newlabel{fig:demo004}{{10}{10}{Learning and Teaching with OURMINE\relax }{figure.10}{}} \citation{turhan08} \citation{kitch07} \citation{matheny09} \@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Experiment I}{11}{subsection.3.1}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.1}Building the Experiment}{11}{subsubsection.3.1.1}} \citation{turhan08} \citation{lessmann08} \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.2}Results}{12}{subsubsection.3.1.2}} \@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces The OURMINE script used in conducting the WC vs. CC experiment.}}{13}{figure.11}} \newlabel{fig:promiseExp}{{11}{13}{Building the Experiment\relax }{figure.11}{}} \@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces Probability of Detection (PD) results, sorted by median values. }}{14}{figure.12}} \newlabel{fig:pds}{{12}{14}{Results\relax }{figure.12}{}} \@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces Probability of False Alarm (PF) results, sorted by median values. }}{14}{figure.13}} \newlabel{fig:pfs}{{13}{14}{Results\relax }{figure.13}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Experiment II}{14}{subsection.3.2}} \newlabel{sec:tmine}{{3.2}{14}{Experiment II\relax }{subsection.3.2}{}} \citation{genic04} \citation{canopies00} \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.1}Classes of Methods }{15}{subsubsection.3.2.1}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.2}The Algorithms}{15}{subsubsection.3.2.2}} \@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces A PCA dimension feature.}}{16}{figure.14}} \newlabel{fig:pca}{{14}{16}{The Algorithms\relax }{figure.14}{}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.3}Building the Experiment}{17}{subsubsection.3.2.3}} \@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces An OURMINE worker function to cluster data using the K-means algorithm. Note that experiments using other clustering methods (such as GenIc and Canopy), could be conducted by calling line 16 above in much the same way, but with varying flags to represent the clusterer.}}{18}{figure.15}} \newlabel{fig:clusterworker}{{15}{18}{Building the Experiment\relax }{figure.15}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Results}{18}{subsection.3.3}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.1}Similarities}{18}{subsubsection.3.3.1}} \@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces An OURMINE worker function to reduce the data using TF-IDF.}}{19}{figure.16}} \newlabel{fig:tfidfworker}{{16}{19}{Building the Experiment\relax }{figure.16}{}} \@writefile{toc}{\contentsline {section}{\numberline {4}Related Work}{19}{section.4}} \@writefile{lof}{\contentsline {figure}{\numberline {17}{\ignorespaces Similarity values normalized according to the combination of most rigorous reducer and clusterer.}}{20}{figure.17}} \newlabel{fig:sims}{{17}{20}{Similarities\relax }{figure.17}{}} \@writefile{toc}{\contentsline {section}{\numberline {5}Conclusions}{21}{section.5}} \bibstyle{plain} \bibdata{refs} \bibcite{awkbook}{1} \bibcite{me05c}{2} \bibcite{Eisenstein04}{3} \bibcite{gay09}{4} \bibcite{genic04}{5} \bibcite{kitch07}{6} \bibcite{lessmann08}{7} \bibcite{gawkai}{8} \bibcite{matheny09}{9} \bibcite{canopies00}{10} \bibcite{me00v}{11} \bibcite{me05d}{12} \bibcite{me06d}{13} \bibcite{me07b}{14} \bibcite{mie06}{15} \bibcite{orrego04}{16} \bibcite{ramey94}{17} \bibcite{ramos03}{18} \bibcite{ritthoff01}{19} \bibcite{turhan08}{20}