/timm's /charming /python /tricks
Download
bore.py.
Read more on How to be Charming (in Python).
001: import sys,math,random
002: sys.dont_write_bytecode = True
003:
004: class Sym:
005: "An Accumulator for syms"
006: def __init__(i):
007: i.n=i.most=0; i.mode= None; i.counts={}
008: def __add__(i,x):
009: i.n += 1
010: new = i.counts.get(x,0) + 1
011: if new > i.most:
012: i.most, i.mode = new, x
013: i.counts[x] = new
014:
015: def chops(lst,n):
016: "Equal frequency binning."
017: out,i,j,last = [],0,0,None
018: for bins in range(n,1,-1):
019: if j >= len(lst) :
020: out[-1] += lst[i:]
021: break
022: if i:
023: out += [lst[i:j]]
024: last = out[-1][-1]
025: i = j
026: j += int((len(lst) - j)*1.0 /bins)
027: # jump runs of similar values
028: while j < len(lst) and lst[j] == last
029: j += 1
030: return out
031:
032: class Num:
033: "An Accumulator for numbers"
034: def __init__(i,init=[],cache=128):
035: i.n = i.m2 = i.mu = 0.0
036: i.hi, i.lo = -1*10**32, 10**32
037: i.some = Sample(cache=cache)
038: for x in init: i + x
039: def s(i) : return (i.m2/(i.n - 1))**0.5
040: def __lt__(i,j): return i.mu < j.mu
041: def __add__(i,x):
042: i.some + x
043: if x > i.hi: i.hi = x
044: if x < i.lo: i.lo = x
045: i.n += 1
046: delta = x - i.mu
047: i.mu += delta*1.0/i.n
048: i.m2 += delta*(x - i.mu)
049:
050: class Sample:
051: "Keep a random sample of stuff seen so far."
052: def __init__(i,cache=128):
053: i._cache, i.size, i.n = [],cache, 0.0
054: def chop(i,bins):
055: return chop(sorted(i._cache),bins)
056: def __add__(i,x):
057: i.n += 1
058: if len(i._cache) < i.size : # if cache not full
059: i._cache += [x] # then add
060: else: # otherwise, maybe replace an old item
061: if random.random() <= i.size/i.n:
062: i._cache[int(random.random()*i.size)] = x
063:
064: def scottknott(data,cohen=0.3,small=3):
065: "Split to maximize change in mean."
066: def cutHere(parts,all,big,same):
067: cut,left,right = None,None,None
068: before, mu = 0, all.mu
069: for i,l,r in leftRight(parts):
070: if big(l.n) and big(r.n) and not same(l,r):
071: n = all.n * 1.0
072: now = l.n/n*(mu- l.mu)**2 + r.n/n*(mu- r.mu)**2
073: if now > before:
074: before,cut,left,right = now,i,l,r
075: return cut,left,right
076: all = reduce(lambda x,y:x+y,data)
077: same = lambda l,r: abs(l.mu - r.mu) <= all.s*cohen
078: big = lambda n: n > small
079: return rdiv(data,all,cutHere,big,same)
080:
081: def rdiv(data,all,div,big,same):
082: "Recursive splits"
083: def recurse(parts,all,rank=0):
084: cut,left,right = div(parts,all,big,same)
085: if cut: # if cut, rank "right" higher than "left"
086: rank = 1 + recurse(parts[:cut],left,rank)
087: rank = recurse(parts[cut:],right,rank)
088: else: # if no cut, then all get same rank
089: for part in parts:
090: part.rank = rank
091: return rank
092: recurse(sorted(data),all)
093: return data
094:
095: def leftRight(accumulators):
096: "For all items, yield everything below,above it."
097: # step1: pre-process: accumulate, right to left
098: rights = {}
099: n = j = len(accumulators) - 1
100: while j > 0:
101: rights[j] = accumulators[j]
102: if j < n: rights[j] += rights[j+1]
103: j -=1
104: # step2: output: report left, right accumulation
105: left = accumulators[0]
106: for i,one in enumerate(accumulators):
107: if i> 0:
108: yield i,left,rights[i]
109: left += one # update the left accumulator
110:
111: def go(f):
112: "A decorator that runs code at load time."
113: print "\n# ---|", f.__name__,"|-----------------"
114: if f.__doc__: print "#", f.__doc__
115: f()
116:
117: #@go
118: def _leftRight(n=20):
119: for lst in [ [11,12,13,14,21,22,23,24,31,32,33,34],
120: [11,11,11,11,11,11,11,11,11],
121: [1,1,11,11,11,11,11],
122: [11,11,11,11,11,11,11,31,32,33,34],
123: ]:
124: lst1 = lst[:]
125: random.shuffle(lst1)
126: print rdiv(lst1)
127: return 1
128:
129:
This file is part of Timm's charming Python tricks.
© 2014, Tim Menzies:
tim.menzies@gmail.com,
http://menzies.us.
Timm's charming Python tricks are free software: you can redistribute it and/or modify it under the terms of the GNU Lesser Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
Timm's charming Python tricks are distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU Lesser Public License along with Foobar. If not, see http://www.gnu.org/licenses.