/timm's /charming /python /tricks
Download
bore.py.
Read more on How to be Charming (in Python).
001: import sys,math,random 002: sys.dont_write_bytecode = True 003: 004: class Sym: 005: "An Accumulator for syms" 006: def __init__(i): 007: i.n=i.most=0; i.mode= None; i.counts={} 008: def __add__(i,x): 009: i.n += 1 010: new = i.counts.get(x,0) + 1 011: if new > i.most: 012: i.most, i.mode = new, x 013: i.counts[x] = new 014: 015: def chops(lst,n): 016: "Equal frequency binning." 017: out,i,j,last = [],0,0,None 018: for bins in range(n,1,-1): 019: if j >= len(lst) : 020: out[-1] += lst[i:] 021: break 022: if i: 023: out += [lst[i:j]] 024: last = out[-1][-1] 025: i = j 026: j += int((len(lst) - j)*1.0 /bins) 027: # jump runs of similar values 028: while j < len(lst) and lst[j] == last 029: j += 1 030: return out 031: 032: class Num: 033: "An Accumulator for numbers" 034: def __init__(i,init=[],cache=128): 035: i.n = i.m2 = i.mu = 0.0 036: i.hi, i.lo = -1*10**32, 10**32 037: i.some = Sample(cache=cache) 038: for x in init: i + x 039: def s(i) : return (i.m2/(i.n - 1))**0.5 040: def __lt__(i,j): return i.mu < j.mu 041: def __add__(i,x): 042: i.some + x 043: if x > i.hi: i.hi = x 044: if x < i.lo: i.lo = x 045: i.n += 1 046: delta = x - i.mu 047: i.mu += delta*1.0/i.n 048: i.m2 += delta*(x - i.mu) 049: 050: class Sample: 051: "Keep a random sample of stuff seen so far." 052: def __init__(i,cache=128): 053: i._cache, i.size, i.n = [],cache, 0.0 054: def chop(i,bins): 055: return chop(sorted(i._cache),bins) 056: def __add__(i,x): 057: i.n += 1 058: if len(i._cache) < i.size : # if cache not full 059: i._cache += [x] # then add 060: else: # otherwise, maybe replace an old item 061: if random.random() <= i.size/i.n: 062: i._cache[int(random.random()*i.size)] = x 063: 064: def scottknott(data,cohen=0.3,small=3): 065: "Split to maximize change in mean." 066: def cutHere(parts,all,big,same): 067: cut,left,right = None,None,None 068: before, mu = 0, all.mu 069: for i,l,r in leftRight(parts): 070: if big(l.n) and big(r.n) and not same(l,r): 071: n = all.n * 1.0 072: now = l.n/n*(mu- l.mu)**2 + r.n/n*(mu- r.mu)**2 073: if now > before: 074: before,cut,left,right = now,i,l,r 075: return cut,left,right 076: all = reduce(lambda x,y:x+y,data) 077: same = lambda l,r: abs(l.mu - r.mu) <= all.s*cohen 078: big = lambda n: n > small 079: return rdiv(data,all,cutHere,big,same) 080: 081: def rdiv(data,all,div,big,same): 082: "Recursive splits" 083: def recurse(parts,all,rank=0): 084: cut,left,right = div(parts,all,big,same) 085: if cut: # if cut, rank "right" higher than "left" 086: rank = 1 + recurse(parts[:cut],left,rank) 087: rank = recurse(parts[cut:],right,rank) 088: else: # if no cut, then all get same rank 089: for part in parts: 090: part.rank = rank 091: return rank 092: recurse(sorted(data),all) 093: return data 094: 095: def leftRight(accumulators): 096: "For all items, yield everything below,above it." 097: # step1: pre-process: accumulate, right to left 098: rights = {} 099: n = j = len(accumulators) - 1 100: while j > 0: 101: rights[j] = accumulators[j] 102: if j < n: rights[j] += rights[j+1] 103: j -=1 104: # step2: output: report left, right accumulation 105: left = accumulators[0] 106: for i,one in enumerate(accumulators): 107: if i> 0: 108: yield i,left,rights[i] 109: left += one # update the left accumulator 110: 111: def go(f): 112: "A decorator that runs code at load time." 113: print "\n# ---|", f.__name__,"|-----------------" 114: if f.__doc__: print "#", f.__doc__ 115: f() 116: 117: #@go 118: def _leftRight(n=20): 119: for lst in [ [11,12,13,14,21,22,23,24,31,32,33,34], 120: [11,11,11,11,11,11,11,11,11], 121: [1,1,11,11,11,11,11], 122: [11,11,11,11,11,11,11,31,32,33,34], 123: ]: 124: lst1 = lst[:] 125: random.shuffle(lst1) 126: print rdiv(lst1) 127: return 1 128: 129:
This file is part of Timm's charming Python tricks.
© 2014, Tim Menzies:
tim.menzies@gmail.com,
http://menzies.us.
Timm's charming Python tricks are free software: you can redistribute it and/or modify it under the terms of the GNU Lesser Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
Timm's charming Python tricks are distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU Lesser Public License along with Foobar. If not, see http://www.gnu.org/licenses.