;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; This file is part of "NOVA":  NOVA = search + COCOMO tools
; Copyright, 2008, Tim Menzies tim@menzies.us
;
; NOVA is free software: you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation, either version 3 of the License, or
; (at your option) any later version.
;
; NOVA is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; GNU General Public License for more details.
; You should have received a copy of the GNU General Public License
; a long with NOVA.  If not, see <http://www.gnu.org/licenses/>.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;; copyright and usage stuff

(defun gpl3-string (what when who email)
  (format nil "This file is part of \"~a\":  NOVA = search + COCOMO tools
Copyright, ~a, ~a ~a

~a is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

~a is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
a long with ~a.  If not, see <http://www.gnu.org/licenses/>."
          what when who email what what what))

(defun about-string (what when who email)
  (format nil "~a
Copyright (C) ~a, ~a ~a

This program comes with ABSOLUTELY NO WARRANTY.
This is free software, and you are welcome to redistribute it
under certain conditions.  For details, type (copyright)."
	  what when who email))

;;; random stuff

(defparameter *seed0* 10013)
(defparameter *seed*  *seed0*)

(defun reset-seed () (setf *seed* *seed0*))

(defun park-miller-randomizer ()
  "The Park-Miller multiplicative congruential randomizer (CACM, October 88,
   Page 1195).  Creates pseudorandom floating-point numbers in the range
   0 < x <= 1."
  (let ((multiplier 16807d0)  ; 16807 = (expt 7 5)
	(modulus 2147483647d0))  ; 2147483647 = (- (expt 2 31) 1)
    (let ((temp (* multiplier *seed*)))
      (setf *seed* (mod temp modulus))
      (/ *seed* modulus))))

(defun my-random (n)
  "Returns a pseudorandom floating-point number in the range 0 <= x < n."
  (let ((x (park-miller-randomizer)))
    
    ;; x is in the range 0 < x <= 1
    ;; subtract from 1 to end up in the range 0 <= x < 1
    (* n (- 1 x))))

(defun my-random-int (n)
  "Returns a pseudorandom integer in the range 0 <= x <= n - 1."
  (let ((x (my-random 1)))
    (floor (* n x))))

(defun my-random-demo ()
  "Demonstrates seeding the randomizer and generating random numbers."
  (let (counts out)
    (labels
	((sorter (x y) (< (car x) (car y)))
         (zap    ()    (setf out nil)
		       (reset-seed)
	               (setf counts (make-hash-table)))
	 (inc    (n)   (setf (gethash n counts)
			     (1+ (gethash n counts 0)))) 
	 (cache  (k v) (push (list k v) out)))
      (dotimes (i 2 t)
	(zap)
	(dotimes (i 10000) (inc (my-random-int 5)))
	(maphash #'cache counts)
	(print (sort out #'sorter))))))

(egs :my-random
     (eg '(dotimes (i 5) (format t "~a~%" (my-random 100)))
	 :of "generating random floating-point numbers")
     (eg '(my-random-demo)
	 :of "demonstrating random functions"))

;;; line stuff

(defstruct line 
  "A line runs from (x1, y1) to (x2, y2) with slope m and y-intercept b.  Some
   lines are verticalp (i.e., vertical lines with no defined slope)."
  x1 y1 x2 y2 m b verticalp)

(defun point-to-line (x1 y1 x2 y2) 
  "Creates a line from two points."
  (if (> x1 x2)
      (point-to-line x2 y2 x1 y1)
      (let* ((rise      (- y2 y1))
	     (run       (- x2 x1))
	     (verticalp (zerop run))
	     m 
	     b)
	(unless verticalp
	  (setf m (/ rise run)
		b (- y2 (* m x2))))
	(make-line :x1 x1 :y1 y1 :x2 x2 :y2 y2
		   :m m :b b :verticalp verticalp))))

(defun line-y (x line)
  "Returns the y value for the given x value on the line."
  (if (line-verticalp line)
      (line-y1 line)
      (+ (* (line-m line) x)
	 (line-b line))))

(defun interpolate (x x1 y1 x2 y2 &optional too-big)
  "Calculates the y value for the given x value using linear interpolation
   between the given points.  Returns too-big if x > x2, y1 if x < x1."
  (cond ((<   x x1) y1)
	((eql x x1) y1)
	((eql x x2) y2)
	((>   x x2) too-big)
	(t          (line-y x (point-to-line x1 y1 x2 y2)))))

(defun interpolates (x points)
  "Calculates the y value for the given x value using linear interpolation
   between the (x . y) pairs in points."
  (let ((one (pop points))
	(two (car points)))
    (or (if (null points) (cdr one))
	(interpolate x (car one) (cdr one) (car two) (cdr two))
	(interpolates x points))))

(egs :line
     (eg '(point-to-line 0 0 5 10)
	 :of "converting two points to a line")
     (eg '(line-y 2.5 (point-to-line 0 0 5 10))
	 :of "calculating the y value on a line"
	 :out 5)
     (eg '(interpolates 8 '((0 . 0) (5 . 5) (10 . 15)))
	 :of "interpolating between (x . y) pairs"
	 :out 11))

;;; list stuff

(defun as-list (x)
  "Ensures that x is a list."
  (if (listp x)
      x
      (list x)))

(defun geta (key alist &optional default)
  "Returns a value from an association list.  If the key is absent, returns a
   default value."
  (or (cdr (assoc key alist))
      default))

(defun puta (key value alist)
  "Adds or replaces a value in an association list."
  (if (assoc key alist)
      (setf (cdr (assoc key alist)) value)
      (push (cons key value) alist)))

(egs :geta
     (eg '(geta 'fred '((fred  . 1) (jane . 2)))
	 :of "getting a value from an association list"
	 :out 1)
     (eg '(geta 'fred '((frodo . 1) (jane . 2)) 3)
	 :of "attempting to get a non-existent value from an association list"
	 :out 3))

(defmacro move-elt (x from to)
  "Moves an item from one list to another."
  (let ((x-name (gensym)))
    `(let ((,x-name ,x))
      (when (find ,x-name ,from :test #'equal)
	(setf ,from (remove ,x-name ,from :test #'equal :count 1))
	(setf ,to (append ,to (list ,x-name)))))))

(defmacro move-elts (l from to)
  "Moves a list of items from one list to another."
  `(dolist (x ,l ,to)
    (move-elt x ,from ,to)))

(defmacro switch-elt (x l1 l2)
  "Swaps an item between two lists."
  `(or (move-elt ,x ,l1 ,l2) (move-elt ,x ,l2 ,l1)))

(defmacro switch-elts (l l1 l2)
  "Swaps a list of items between two lists."
  `(dolist (x ,l)
    (switch-elt x ,l1 ,l2)))

;;; misc stuff

(defun my-command-line ()
  "Accesses the command line."
  (or #+CMU       extensions:*command-line-words*
      #+LISPWORKS system:*line-arguments-list*
      #+SBCL      *posix-argv*
      nil))

(defun my-getenv (name &optional default)
  "Returns a variable value from the environment outside of Lisp."
  #+CMU
  (let ((x (assoc name ext:*environment-list* :test #'string=)))
    (if x (cdr x) default))
  #-CMU
  (or #+Allegro   (sys:getenv name)
      #+CLISP     (ext:getenv name)
      #+ECL       (si:getenv name)
      #+LISPWORKS (lispworks:environment-variable name)
      #+SBCL      (sb-unix::posix-getenv name)
      default))

(defun euclidean (&rest coordinates)
  "Returns the distance of a point in n-dimensional space from the origin."
  (let ((sum-sq 0))
    (dolist (x coordinates (sqrt sum-sq))
      (incf sum-sq (* x x)))))

(egs :euclidean
     (eg '(euclidean 1 1)
	 :of "calculating distance to the origin in 2-space"
	 :out (sqrt 2))
     (eg '(euclidean 10 10 10 10)
	 :of "calculating distance to the origin in 4-space"
	 :out 20))

(defun chars (n &optional (c "*") (str t))
  "Prints a string of n identical characters to the given stream."
  (dotimes (i n) (princ c str)))

(let (last-loaded)
  (defmacro l (&optional (f last-loaded))
    `(progn (setf last-loaded ',f)
      (load (string-downcase (format nil "~a.lisp" ',f)))
      ',f)))

(defmacro sym (&rest args) `(sym-prim ',args))

(defun sym-prim (l) (intern (format nil "~a~{-~a~}" (car l) (cdr l))))

;;; stats stuff

(defun sum (l)
  "Computes the sum of a list of numbers."
  (let ((sum 0))
    (dolist (x l sum)
      (incf sum x))))  

(defun sum-sq (l)
  "Computes the sum of squares of a list of numbers."
  (let ((sum 0))
    (dolist (x l sum)
      (incf sum (* x x)))))

(defun mean (l)
  "Computes the mean of a list of numbers."
  (let ((n   0)
	(sum 0))
    (dolist (x l (/ sum n))
      (incf n)
      (incf sum x))))

(defun stdev-from-list (l)
  "Returns the standard deviation of a list of numbers."
  (let ((n      0)
	(sum    0)
	(sum-sq 0))
    (dolist (x l (stdev n sum sum-sq))
      (incf n)
      (incf sum x)
      (incf sum-sq (* x x)))))

(defun stdev (n sum sum-sq)
  "Computes standard deviation."
  (sqrt (/ (- sum-sq (/ (* sum sum) n)) (- n 1))))

(defun median (l)
  "Computes the median, spread, and quartiles of a list of numbers."
  ; also shamelessly hacks in the sum, sum of squares, and size
  (labels ((mean (x y) (/ (+ x y) 2)))
    (let* ((sorted    (sort (copy-list l) #'<))
	   (len       (length sorted))
	   (mid-pos   (floor (/ len 2)))
	   (mid-val   (nth mid-pos sorted))
	   (25percent (nth (floor (* len 0.25)) sorted))
	   (75percent (nth (floor (* len 0.75)) sorted))
	   (50percent (if (oddp len)
			  mid-val
			  (mean mid-val (nth (1- mid-pos) sorted)))))
      (values 50percent
	      (- 75percent 50percent)
	      75percent
	      25percent
              (sum l)
              (sum-sq l)
              len))))

(defun percentile (value data)
  "Computes value's percentile in the given set of data."
  (let ((num-total (length data))
        (num-less  (length (remove-if-not #'(lambda (x) (< x value)) data))))
    (* 100 (/ num-less num-total))))

(egs :stats
     (eg '(sum '(1 1 1 1 2 2 4 100))
	 :of "computing the sum of a list of numbers"
	 :out 112)
     (eg '(mean '(1 1 1 1 2 2 4 100))
	 :of "computing the mean of a list of numbers"
	 :out 14)
     (eg '(median '(1 1 1 1 2 2 4 100))
	 :of "computing the median of a list of numbers"
	 :out 3/2))

;;; ttest stuff

(defun ttest-from-lists (one two &optional (conf 95))
  "Returns the ttest result from two lists."
  (let ((as  0)
	(asq 0)
	(an  0)
	(bs  0)
	(bsq 0)
	(bn  0))
    (dolist (a one)
      (incf an)
      (incf as a)
      (incf asq (* a a)))
    (dolist (b two)
      (incf bn)
      (incf bs b)
      (incf bsq (* b b)))
    (ttest as asq an bs bsq bn conf)))

(defun ttest (as asq an bs bsq bn &optional (conf 95))
  "Compares means of two independent samples a and b.
     as, bs   = sample sums
     asq, bsq = sample sums of squares
     an, bn   = sample sizes
     conf     = confidence level (95 or 99)"
  (labels 
      ((less () (< (/ as an) (/ bs bn)))
       (same () (let* ((tcrit  (tcritical (+ an bn -2) conf))
		       (ssa    (- asq (/ (* as as) an)))
		       (ssb    (- bsq (/ (* bs bs) bn)))
		       (pooled (/ (+ ssa ssb) (+ bn  an -2)))
		       (sxasb  (sqrt (* pooled (+ (/ 1 an) (/ 1 bn)))))
		       (tvalue (abs (/ (- (/ bs bn) (/ as an)) sxasb))))
		  ; (oo tcrit tvalue)
		  (> tcrit tvalue))))
    (cond ((same)  0)    ; H0  : mean of a = mean of b
	  ((less) -1)    ; H1a : mean of a < mean of b
	  (t       1)))) ; H1b : mean of a > mean of b

(let ((ttable '((95 . ((  1 . 12.70  )
		       (  3 .  3.1820)
		       (  5 .  2.5710)
		       ( 10 .  2.2280)
		       ( 20 .  2.0860)
		       ( 80 .  1.99  )
		       (320 .  1.97  )))
		(99 . ((  1 . 63.6570)
		       (  3 .  5.8410)
		       (  5 .  4.0320)
		       ( 10 .  3.1690)
		       ( 20 .  2.8450)
		       ( 80 .  2.64  )
		       (320 .  2.58  ))))))
  (defun tcritical (n conf)
    "Returns the ttest critical values.  Keeps those values as a set of lines,
     and intermediary values are interpolated between the points."
    (interpolates n (geta conf ttable))))

(defun ttest-demo (&optional (fudge 1))
  "Implements the demo described at 
   http://www.cas.buffalo.edu/classes/psy/segal/2072001/ttests/t-tests1.html."
  (let ((one '(105 112  96 124 103  92  97 108 105 110))
	(two '( 98 108 114 106 117 118 126 116 122 108)))
    (setf one (mapcar #'(lambda (x) (* x fudge)) one))
    (ttest-from-lists one two)))

(egs :ttest
     (eg '(ttest-demo)
	 :of "running a ttest where the means are statistically equal"
	 :out 0)
     (eg '(ttest-demo 1.1)
	 :of "running a ttest where the means are statistically equal"
	 :out 0)
     (eg '(ttest-demo 1.2)
	 :of "running a ttest where the means are statistically unequal"
	 :out 1))

;;; report stuff

(defstruct dist 
  (min    most-positive-double-float) 
  (max    most-negative-double-float) 
  (bars   nil)
  (sum    0)
  (sumsq  0)
  (n      0)
  (sorted t)
  (fuzz   1)
  (fat    nil)
  (all    nil))

(defun dist-compare (d1 d2 &optional (conf 95))
  "Compares the means of two independent samples."
  (ttest (dist-sum d1) (dist-sumsq d1) (dist-n d1)
	 (dist-sum d2) (dist-sumsq d2) (dist-n d2)
	 conf))

(defun as-dist (l)
  "Return a dist with all the numbers in the given list."
  (if (eq 'dist (type-of l))
      l
      (dist-adds l)))

(defun dist-adds (l &optional (d (make-dist)))
  "Add the numbers in the given list to a dist."
  (dolist (x l d)
    (dist-add x d)))

(defun dist-mean (d)
  "Returns the mean of a dist."
  (/ (dist-sum d) (dist-n d)))

(defun dist-sd (d)
  "Returns the standard deviation of a dist."
  (stdev (dist-n d) (dist-sum d) (dist-sumsq d)))

(defun dist-add (num &optional (d (make-dist)))
  "Adds a number to a dist."
  (incf (dist-n     d))
  (incf (dist-sum   d) num)
  (incf (dist-sumsq d) (* num num))
  (if (dist-fat d)
      (push num (dist-all d)))
  (if (< num (dist-min d))
      (setf (dist-min d) num))
  (if (> num (dist-max d))
      (setf (dist-max d) num))
  (let* ((fuzz (dist-fuzz d))
	 (num1 (* fuzz (round (/ num fuzz))))
	 (val  (cdr (assoc num1 (dist-bars d)))))
    (if val
	(setf (cdr (assoc num1 (dist-bars d))) (1+ val))
	(push (cons num1 1) (dist-bars d)))
    (setf (dist-sorted d) nil))
  d)

(defun dist-sort (d)
  "Sorts a dist."
  (labels ((car< (a b) (< (car a ) (car b))))
    (unless (dist-sorted d)
      (setf (dist-bars d) (sort (dist-bars d) #'car<)
	    (dist-sorted d) t))
    d))

(defun distogram (d &key header (lwidth 5) (decimals 2) (rwidth 5)
		  (shrink 1) (on "-") (pad 4) (str t))
  "Prints a dist on stream str, shrinking the right hand side bars by shrink.
   header is some text to show on top.  The other variables control how each
   line is printed."
  (let ((d (as-dist d)))
    (unless (dist-sorted d)
      (dist-sort d))
    (let ((sum         (dist-sum  d))
	  (bars        (dist-bars d))
	  (min         (dist-min  d))
	  (max         (dist-max  d))
	  (n           (dist-n    d))
	  (fuzz        (dist-fuzz d))
	  (percent-sum 0)
	  (fmt         (format nil " ~~~a,~af * ~~~ad = ~~3d% ~~a "
			       lwidth decimals rwidth)))
      (if header
	  (format str "~a~%" header))
      (dolist (bar bars t)
	(let* ((key     (car bar))
	       (value   (cdr bar))
	       (percent (floor (* 100 (/ value n))))
	       (stars   (round (/ value shrink)))
	       (halfp   (< percent-sum 50)))
	  (incf percent-sum percent)
	  (format str fmt
		  (+ key (/ fuzz 2)) 
		  value 
		  percent
		  (if halfp "<" ">"))
	  (chars stars on)
	  (terpri str))))))

(defun demo-distogram ()
  "Creates a dist with random numbers and displays it."
  (let ((log (make-dist :fuzz 2)))
    (dotimes (i 1000)
      (dist-add (sqrt (random 100)) log))
    (distogram log :shrink 10)))

(egs :distogram
     (eg '(demo-distogram)
	 :of "displaying a random dist"))

;;; ranking stats stuff

(defun showh (h)
  "Prints hash table entries sorted by key."
  (let (l)
    (maphash #'(lambda (k v) (push (cons k v) l)) h)
    (mapc #'print (sort l #'(lambda (a b) (< (car a) (car b)))))))

(defun rank (l &optional (ranks (make-hash-table)) (n 0))
  "Returns a hash of the ranks in a sorted list.  All numbers in a run of
   repeated entries get the average rank of that run."
  (if (null l)
      ranks
      (let (repeats sum now)
	(labels ((walk () (incf n) (pop l))
		 (new  () (setf repeats 1) (setf sum n))
		 (same () (incf sum n) (incf repeats))
		 (spin () (when (eql now (car l))
			    (walk) (same) (spin))))
	  (setf now (walk))
	  (new)
	  (spin)
	  (setf (gethash now ranks) (/ sum repeats))
	  (rank l ranks n)))))

(defun mann-whitney-demo ()
  "Performs a Mann-Whitney test."
  (mann-whitney '(4.6 4.7 4.9 5.1 5.2 5.5 5.8 6.1 6.5 6.5 7.2)
		'(5.2 5.3 5.4 5.6 6.2 6.3 6.8 7.7 8.0 8.1)))

(defun mann-whitney-demo-big (&optional (fudge 1)) 
  "Generates two lists of 10,000 random floats.  Multiplies the second list by
   fudge.  Performs a Mann-Whitney test on the two lists."
  (labels ((big (n s) (let (out) (dotimes (i n out) 
				   (push (* s (random 100)) out)))))
    (let ((one (big 10000 1))
	  (two (big 10000 fudge)))
      (time (mann-whitney one two)))))

(defun mann-whitney (a b &optional (conf 95))
  "Performs a Mann-Whitney test as described in method 1 of
   http://faculty.vassar.edu/lowry/ch11a.html."
  (labels ((as-ranks (l r) (mapcar #'(lambda (x) (gethash x r)) l))
	   (sum      (l)   (let ((s 0)) (dolist (x l s) (incf s x))))
	   (median   (l n) (let ((sorted (sort l #'<))
				 (midv (floor (/ n 2))))
			     (if (oddp n)
				 (nth midv sorted)
				 (/ (+ (nth (1- midv) sorted) 
				       (nth midv sorted)) 2)))))
    (let* ((all    (sort (copy-list (append a b)) #'<))
	   (ranks  (rank all))
	   (ranksa (as-ranks a ranks))
	   (ranksb (as-ranks b ranks))
	   (na     (length a))
	   (nb     (length b))
	   (n      (+ na nb))	   
	   (tcrit  (tcritical n conf))
	   (suma   (* 1.0 (sum ranksa)))
	   (ta     (/ (* na (+ n 1)) 2.0))
	   (sigma  (sqrt (/  (* na nb (+ n 1)) 12.0)))
	   (za     (/ (+ (- suma ta) 0.5) sigma)))
      ; (oo suma ta sigma tcrit)
      (cond  ((< (abs za) tcrit)                         0)
	     ((< (median ranksa na) (median ranksb nb)) -1)
	     (t                                          1)))))