;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; This file is part of "NOVA": NOVA = search + COCOMO tools ; Copyright, 2008, Tim Menzies tim@menzies.us ; ; NOVA is free software: you can redistribute it and/or modify ; it under the terms of the GNU General Public License as published by ; the Free Software Foundation, either version 3 of the License, or ; (at your option) any later version. ; ; NOVA is distributed in the hope that it will be useful, ; but WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ; GNU General Public License for more details. ; You should have received a copy of the GNU General Public License ; a long with NOVA. If not, see . ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; copyright and usage stuff. (defun gpl3-string (what when who email) (format nil "This file is part of ~a: NOVA = search + COCOMO tools Copyright, ~a, ~a ~a. ~a is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. ~a is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY, without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License a long with ~a. If not, see ." what when who email what what what)) (defun about-string (what when who email) (format nil "~a Copyright (C) ~a ~a ~a This program comes with ABSOLUTELY NO WARRANTY- for details type (copyright). This is free software, and you are welcome to redistribute it under certain conditions; type (copyright)," what when who email)) ;;;; random stuff (defparameter *seed0* 10013) (defparameter *seed* *seed0*) (defun reset-seed () (setf *seed* *seed0*)) (defun park-miller-randomizer () "The Park-Miller multiplicative congruential randomizer (CACM, October 88, Page 1195). Creates pseudo random floating point numbers in the range 0.0 < x <= 1.0." (let ((multiplier 16807.0d0);16807 is (expt 7 5) (modulus 2147483647.0d0)) ;2147483647 is (- (expt 2 31) 1) (let ((temp (* multiplier *seed*))) (setf *seed* (mod temp modulus)) (/ *seed* modulus)))) (defun my-random (n) "Returns a pseudo random floating-point number in range 0.0 <= number < n" (let ((random-number (park-miller-randomizer))) ;; We subtract the randomly generated number from 1.0 ;; before scaling so that we end up in the range ;; 0.0 <= x < 1.0, not 0.0 < x <= 1.0 (* n (- 1.0d0 random-number)))) (defun my-random-int (n) "Returns a pseudo-random integer in the range 0 <= n-1." (let ((random-number (/ (my-random 1000.0) 1000))) (floor (* n random-number)))) (defun random-demo () (let (counts out) (labels ((sorter (x y) (< (car x) (car y))) (zap () (setf out nil) (reset-seed) (setf counts (make-hash-table))) (inc (n) (setf (gethash n counts) (1+ (gethash n counts 0)))) (cache (k v) (push (list k v) out))) (dotimes (i 2 t) (zap) (dotimes (i 10000) (inc (my-random-int 5))) (maphash #'cache counts) (print (sort out #'sorter)))))) (egs :my-random (eg '(dotimes (i 5) (format t "~a~%" (my-random -23)))) (eg '(random-demo))) ;;;; line stuff (defstruct line "A 'line' runs between two points 'x1@y1' to 'x2@y2' with gradient 'm' and y-intercept 'b'. Also, some runs are 'verticalp'; i.e. run vertically." x1 y1 x2 y2 m b verticalp) (defun point-to-line (x1 y1 x2 y2) "Create a line from two points." (if (> x1 x2) (point-to-line x2 y2 x1 y1) (let* ((rise (- y2 y1)) (run (- x2 x1)) (verticalp (zerop run)) m b) (unless verticalp (setf m (/ rise run) b (- y2 (* m x2)))) (make-line :x1 x1 :y1 y1 :x2 x2 :y2 y2 :m m :b b :verticalp verticalp)))) (defun line-y (x l) "Return the 'y' point associated with 'x' on line 'l'." (if (line-verticalp l) (line-y1 l) (+ (* (line-m l) x) (line-b l)))) (defun interpolate (x x1 y1 x2 y2 &optional too-big ) "interpolate between the points x1@y2 and x2@y2; returns 'too-big' if x > x2 and returns x1 if x 1.4142135 Example2: (euclidean 10 10 10 10) => 20.0" (let ((sumsq 0)) (dolist (x l (sqrt sumsq)) (incf sumsq (* x x))))) (defun chars (n &optional (c "*") (str t)) "print a string of characters 'c' to stream 'str' (and the default stream is standard output)." (if (> n 0) (dotimes (i n) (princ c str)))) (let (last-loaded) (defmacro l (&optional (f last-loaded)) `(progn (setf last-loaded ',f) (load (string-downcase (format nil "~a.lisp" ',f))) ',f))) (defmacro sym (&rest args) `(sym-prim ',args)) (defun sym-prim (l) (intern (format nil "~a~{-~a~}" (car l) (cdr l)))) ;;;; stats stuff (defun list2stdev (l) "Return the standard deviation of a list of numbers." (let ((n 0) (sum 0) (sumSq 0)) (dolist (x l (stdev n sum sumSq)) (incf n) (incf sum x) (incf sumSq (* x x ))))) (defun stdev (n sum sumSq) "Compute the mean and standard deviation." (sqrt (/ (- sumSq(/ (* sum sum) n)) (- n 1)))) (defun sum (l) "Sum a list of numbers." (let ((sum 0)) (dolist (x l sum) (incf sum x)))) (defun mean (l) "Return the mean and sum of list of numbers." (let ((sum 0) (n 0)) (dolist (one l (/ sum n)) (incf n) (incf sum one)))) (defun mean-sd (l) "Return the mean and standard deviation of a list of numbers" (let ((sum 0) (n 0) (sumSq 0)) (labels ((mean () (/ sum n)) (sd () (sqrt (/ (- sumSq(/ (* sum sum) n)) (- n 1) )))) (dolist (x l) (incf n) (incf sum x) (incf sumSq (* x x))) (values (mean) (sd))))) (defun median (nums) "Return 50% and (75-50)% values, the 25% value" (labels ((mean (x y) (/ (+ x y ) 2))) (let* ((n1 (sort nums #'<)) (l (length n1)) (mid (floor (/ l 2))) (midval (nth mid n1)) (25percent (nth (floor (* l 0.25)) n1)) (75percent (nth (floor (* l 0.75)) n1)) (50percent (if (oddp l) midval (mean midval (nth (- mid 1) n1))))) (values 50percent (- 75percent 50percent) 75percent 25percent)))) (egs :average (eg '(sum '(1 1 1 1 2 2 4 100))) (eg '(median '(1 1 1 1 2 2 4 100))) (eg '(mean '(1 1 1 1 2 2 4 100)))) ;;;; ttests (defun ttest-from-lists (one two &optional (conf 95)) "Given two lists of numbers 'a' and 'b', return the ttest result." (let ((as 0) (asq 0) (an 0)(bs 0) (bsq 0) (bn 0)) (dolist (a one) (incf an) (incf as a) (incf asq (* a a))) (dolist (b two) (incf bn) (incf bs b) (incf bsq (* b b))) (ttest as asq an bs bsq bn :conf conf))) (defun ttest (as asq an bs bsq bn &key (conf 95)) "Compares means of two indpendent samples a,b of sizes an,nb, with sums as,bs and sumsquared asq,bsq at either a 95 or 99% confidence" (labels ((less () (< (/ as an) (/ bs bn))) (same () (let* ((tcrit (tcritical (+ an bn -2) conf)) (ssa (- asq (/ (* as as) an))) (ssb (- bsq (/ (* bs bs) bn))) (pooled (/ (+ ssa ssb) (+ bn an -2))) (sxasb (sqrt (* pooled (+ (/ 1 an) (/ 1 bn))))) (tvalue (abs (/ (- (/ bs bn) (/ as an)) sxasb)))) (oo tcrit tvalue) (> tcrit tvalue)))) (cond ((same) 0) ; H0 : mean of a same as mean of b ((less) -1) ; H1a : mean of a < mean of b (t 1)))) ; H1b : mean of a > mean of b (let ((ttable '((95 . (( 1 . 12.70 ) ( 3 . 3.1820 ) ( 5 . 2.5710 ) ( 10 . 2.2280 ) ( 20 . 2.0860 ) ( 80 . 1.99 ) ( 320 . 1.97 ))) (99 . (( 1 . 63.6570 ) ( 3 . 5.8410 ) ( 5 . 4.0320 ) ( 10 . 3.1690 ) ( 20 . 2.8450 ) ( 80 . 2.64 ) ( 320 . 2.58 )))))) (defun tcritical (n conf) "Returns the t-test critical values. Keeps those values as a piecewise set of lines and intermediary values are interpolated between the points." (interpolates n (geta conf ttable))) ) (defun ttest-demo (&optional (fudge 1)) "implements the demo described at http://www.cas.buffalo.edu/classes/psy/segal/2072001/ttests/t-tests1.html" (let ((one '(105 112 96 124 103 92 97 108 105 110)) (two '( 98 108 114 106 117 118 126 116 122 108))) (setf one (mapcar #'(lambda (x) (* x fudge)) one)) (ttest-from-lists one two))) (egs :ttests (eg '(ttest-demo ) :out 0) (eg '(ttest-demo 1.1) :out 0) (eg '(ttest-demo 1.2) :out 1) ) ;;;; report stuff (defstruct dist (min most-positive-double-float) (max most-negative-double-float) (bars '()) (sum 0) (sumsq 0) (n 0) (sorted t) (fuzz 1) (fat nil) (all '()) ) (defun dist-compare (d1 d2 &key (conf 95)) "compares means of two independent samples d1 d2" (ttest (dist-sum d1) (dist-sumsq d1) (dust-n d1) (dist-sum d2) (dist-sumsq d2) (dust-n d2) :conf conf)) (defun as-dist (l) "Return a dist with all the numbers of l" (if (eq 'dist (type-of l)) l (dist-adds l))) (defun dist-adds (l &optional (d (make-dist))) "Add the numbers in the list 'l' to a dist." (dolist (x l d) (dist-add x d))) (defun dist-mean (d) (/ (dist-sum d) (dist-n d))) (defun dist-sd (d) (stdev (dist-n d) (dist-sum d) (dist-sumsq d))) (defun dist-add (num &optional (d (make-dist))) (incf (dist-n d)) (incf (dist-sum d) num) (incf (dist-sumsq d) (* num num)) (if (dist-fat d) (push num (dist-all d))) (if (< num (dist-min d)) (setf (dist-min d) num)) (if (> num (dist-max d)) (setf (dist-max d) num)) (let* ((fuzz (dist-fuzz d)) (num1 (* fuzz (round (/ num fuzz)))) (val (cdr (assoc num1 (dist-bars d))))) (if val (setf (cdr (assoc num1 (dist-bars d))) (1+ val)) (push (cons num1 1) (dist-bars d))) (setf (dist-sorted d) nil)) d) (defun dist-sort (d) (labels ((car< (a b) (< (car a ) (car b)))) (unless (dist-sorted d) (setf (dist-bars d) (sort (dist-bars d) #'car<) (dist-sorted d) t)) d)) (defun distogram (d0 &key header (lwidth 5) (decimals 2) (rwidth 5) (shrink 1) (on "-") (pad 4) (str t)) "Prints the dist 'd0' on stream 'str', shrinking the right hand side bars by 'shrink'. 'Header' is some text to show on top. The other variables control details of how each line is printed." (let ((d (as-dist d0))) (unless (dist-sorted d) (dist-sort d)) (let ((sum (dist-sum d)) (bars (dist-bars d)) (min (dist-min d)) (max (dist-max d)) (n (dist-n d)) (fuzz (dist-fuzz d)) (percentSum 0) (fmt (format nil " ~~~a,~af * ~~~ad = ~~3d% ~~a " lwidth decimals rwidth))) (if header (format t "~a~%" header)) (dolist (bar bars t) (let* ((key (car bar)) (value (cdr bar)) (percent (floor (* 100 (/ value n)))) (stars (round (/ value shrink))) (halfp (< percentSum 50))) (incf percentSum percent) (format str fmt (+ key (/ fuzz 2)) value percent (if halfp "<" ">") ) (chars stars on) (terpri str)))))) (defun demo-distogram () (let ((log (make-dist :fuzz 2))) (dotimes (i 1000) (dist-add (sqrt (random 100)) log)) (distogram log :shrink 10) )) (egs :distogram (eg '(demo-distogram))) ;;;; ranking statistics (defun showh (tmp) (let (out) (maphash #'(lambda (k v) (push (cons k v) out)) tmp) (mapc 'print (sort out #'(lambda (a b) (< (car a) (car b))))))) (defun rank (l &optional (ranks (make-hash-table)) (n 0)) "Return a hash of the ranks in the sorted list of numbers 'l'. All numbers in a run of repeated entries get the average rank of that run." (if (null l) ranks (let (repeats sum now) (labels ((walk () (incf n) (pop l)) (new () (setf repeats 1) (setf sum n)) (same () (incf sum n) (incf repeats)) (spin () (when (eql now (car l)) (walk) (same) (spin)))) (setf now (walk)) (new) (spin) (setf (gethash now ranks) (/ sum repeats)) (rank l ranks n))))) (defun mann-whitney-demo () (mann-whitney '(4.6 4.7 4.9 5.1 5.2 5.5 5.8 6.1 6.5 6.5 7.2) '(5.2 5.3 5.4 5.6 6.2 6.3 6.8 7.7 8.0 8.1))) (defun mann-whitney-demo-big (&optional (fudge 1)) "Generate two lists of 10000 random floats. Multiple the second list by 'fudge'. Check if the median ranks of list one is the same as list two (return '0'), smaller than list two (return '-1'), or larger than list two (return '1')." (labels ((big (n s) (let (out) (dotimes (i n out) (push (* s (random 100)) out))))) (let ((one (big 10000 1)) (two (big 10000 fudge))) (time (mann-whitney one two))))) (defun mann-whitney (a b &optional (conf 95)) "The 'method 1' of http://faculty.vassar.edu/lowry/ch11a.html." (labels ((as-ranks (l r) (mapcar #'(lambda (x) (gethash x r)) l)) (sum (l) (let ((s 0)) (dolist (x l s) (incf s x)))) (median (l n) (let ((sorted (sort l #'<)) (midv (floor (/ n 2)))) (if (oddp n) (nth midv sorted) (/ (+ (nth (1- midv) sorted) (nth midv sorted)) 2))))) (let* ((all (sort (copy-list (append a b)) #'<)) (ranks (rank all)) (ranksa (as-ranks a ranks)) (ranksb (as-ranks b ranks)) (na (length a)) (nb (length b)) (n (+ na nb)) (tcrict (tcritical n conf)) (suma (* 1.0 (sum ranksa))) (ta (/ (* na (+ n 1)) 2.0)) (sigma (sqrt (/ (* na nb (+ n 1)) 12.0))) (za (/ (+ (- suma ta) 0.5) sigma))) ;(oo suma ta sigma tcrict) ;debug line (cond ((< (abs za) tcrict) 0) ((< (median ranksa na) (median ranksb nb)) -1) (t 1)))))