/timm's /charming /python /tricks

bore.py

Download bore.py.
Read more on How to be Charming (in Python).


001: import sys,math,random
002: sys.dont_write_bytecode = True
003: 
004: class Sym:
005:   "An Accumulator for syms"
006:   def __init__(i): 
007:     i.n=i.most=0; i.mode= None; i.counts={}
008:   def __add__(i,x):
009:     i.n += 1    
010:     new  = i.counts.get(x,0) + 1
011:     if new > i.most:
012:       i.most, i.mode = new, x
013:     i.counts[x] = new
014: 
015: def chops(lst,n):
016:   "Equal frequency binning."
017:   out,i,j,last = [],0,0,None
018:   for bins in range(n,1,-1):
019:     if j >= len(lst) : 
020:       out[-1] += lst[i:]
021:       break
022:     if i: 
023:       out += [lst[i:j]]
024:       last = out[-1][-1]
025:     i  = j
026:     j += int((len(lst) - j)*1.0 /bins)
027:     # jump runs of similar values
028:     while j < len(lst) and lst[j] == last
029:       j += 1
030:   return out
031: 
032: class Num:
033:   "An Accumulator for numbers"
034:   def __init__(i,init=[],cache=128): 
035:     i.n = i.m2 = i.mu = 0.0
036:     i.hi, i.lo = -1*10**32, 10**32
037:     i.some = Sample(cache=cache)
038:     for x in init: i + x
039:   def s(i) : return (i.m2/(i.n - 1))**0.5
040:   def __lt__(i,j): return i.mu < j.mu
041:   def __add__(i,x):
042:     i.some + x
043:     if x > i.hi: i.hi = x
044:     if x < i.lo: i.lo = x
045:     i.n   += 1    
046:     delta  = x - i.mu
047:     i.mu  += delta*1.0/i.n
048:     i.m2  += delta*(x - i.mu)
049: 
050: class Sample:
051:   "Keep a random sample of stuff seen so far."
052:   def __init__(i,cache=128):
053:     i._cache, i.size, i.n = [],cache, 0.0
054:   def chop(i,bins):
055:     return chop(sorted(i._cache),bins)
056:   def __add__(i,x):
057:     i.n += 1
058:     if len(i._cache) < i.size : # if cache not full
059:       i._cache += [x]           # then add
060:     else: # otherwise, maybe replace an old item
061:       if random.random() <= i.size/i.n:
062:         i._cache[int(random.random()*i.size)] = x
063: 
064: def scottknott(data,cohen=0.3,small=3):
065:   "Split to maximize change in mean."
066:   def cutHere(parts,all,big,same):
067:     cut,left,right = None,None,None
068:     before, mu     =  0, all.mu
069:     for i,l,r in leftRight(parts):
070:       if big(l.n) and big(r.n) and not same(l,r):
071:         n   = all.n * 1.0
072:         now = l.n/n*(mu- l.mu)**2 + r.n/n*(mu- r.mu)**2  
073:         if now > before:
074:           before,cut,left,right = now,i,l,r
075:     return cut,left,right  
076:   all  = reduce(lambda x,y:x+y,data)
077:   same = lambda l,r: abs(l.mu - r.mu) <= all.s*cohen
078:   big  = lambda    n: n > small    
079:   return rdiv(data,all,cutHere,big,same)
080: 
081: def rdiv(data,all,div,big,same): 
082:   "Recursive splits"
083:   def recurse(parts,all,rank=0):
084:     cut,left,right = div(parts,all,big,same)
085:     if cut:  # if cut, rank "right" higher than "left"
086:       rank = 1 + recurse(parts[:cut],left,rank) 
087:       rank =     recurse(parts[cut:],right,rank)
088:     else:  # if no cut, then all get same rank
089:       for part in parts: 
090:         part.rank = rank
091:     return rank
092:   recurse(sorted(data),all)
093:   return data
094: 
095: def leftRight(accumulators):
096:   "For all items, yield everything below,above it."
097:   # step1: pre-process: accumulate, right to left
098:   rights = {} 
099:   n = j = len(accumulators) - 1
100:   while j > 0:
101:     rights[j] = accumulators[j]
102:     if j < n: rights[j] += rights[j+1]
103:     j -=1
104:   # step2: output: report left, right accumulation
105:   left = accumulators[0]
106:   for i,one in enumerate(accumulators):
107:     if i> 0: 
108:       yield i,left,rights[i] 
109:       left += one # update the left accumulator
110: 
111: def go(f):
112:   "A decorator that runs code at load time."
113:   print "\n# ---|", f.__name__,"|-----------------"
114:   if f.__doc__: print "#", f.__doc__
115:   f()
116: 
117: #@go
118: def _leftRight(n=20):
119:   for lst in [ [11,12,13,14,21,22,23,24,31,32,33,34],
120:                [11,11,11,11,11,11,11,11,11],
121:               [1,1,11,11,11,11,11],
122:               [11,11,11,11,11,11,11,31,32,33,34],
123:              ]:
124:     lst1 = lst[:]
125:     random.shuffle(lst1)
126:     print rdiv(lst1)
127:     return 1
128:   
129: 

This file is part of Timm's charming Python tricks.
© 2014, Tim Menzies: tim.menzies@gmail.com, http://menzies.us.

Timm's charming Python tricks are free software: you can redistribute it and/or modify it under the terms of the GNU Lesser Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

Timm's charming Python tricks are distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU Lesser Public License along with Foobar. If not, see http://www.gnu.org/licenses.