/timm's /charming /python /tricks

about.py

Download about.py.
Read more on How to be Charming (in Python).


001: import sys
002: sys.dont_write_bytecode=True
003: from base import *
004: from lib import *
005: 
006: import the
007:     
008: class About(Charmed): pass
009: 
010: class Sym(About):
011:   "What we know about discrete features."
012:   def __init__(i,name='', inits=[],pos=0):
013:     i.name,  i.pos = name, pos
014:     i.counts, i.most, i.mode, i.sum = {},-1,0,0.0
015:     for x in inits: i.seen(x)
016:   def any(i)      : return one(i.counts.keys())
017:   def approx(i,x) : return x
018:   def centroid(i) : return i.mode
019:   def norm(i,x)   : return x
020:   def sample(i,enough=10**32):
021:     return some(i.counts,enough)
022:   def seen(i,x,inc=1.0):
023:     tmp = i.counts[x] = i.counts.get(x,0) + inc
024:     i.sum += inc
025:     if tmp > i.most:
026:       i.most, i.mode = tmp, x
027:     return x
028: 
029: class Num(About):
030:   "What we know about numeric features."
031:   def __init__(i,name='',inits=[],keep=The.chop.keep,
032:                bins=The.chop.bins,pos=0,
033:                lo=The.math.inf,hi=The.math.ninf):
034:     i.n = i.m2 = i.mu = i.s = 0.0
035:     i.kept,i._bins,i.nbins= Sample(size=keep),[],bins
036:     i.lo,i.hi  = lo, hi
037:     i.name,    i.pos = name,  pos
038:     for x in inits: i.seen(x)
039:   def __add__(i,j):
040:     k = Num(i.name)
041:     for n in i.cached(): k.seen(n)
042:     for n in j.cached(): k.seen(n)
043:     return k
044:   def __lt__(i,j):
045:     return i.mu < j.mu
046:   def any(i)     : return any(i.lo,i.hi)
047:   def cached(i)  : return i.kept._cache
048:   def cache(i,x) : return i.kept.seen(x)
049:   def centroid(i): return i.mu
050:   def bins(i) : 
051:     if not i._bins:
052:       lst     = i.cached()
053:       i._bins = sorted(lst)
054:       m       = len(lst)/i.nbins
055:       if m > 0:
056:         i._bins = i._bins[::m]
057:     return i._bins
058:   def hedges(i,j,small=The.math.hedges):
059:     "Hedges effect size test."
060:     num   = (i.n - 1)*i.s**2 + (j.n - 1)*j.s**2
061:     denom = (i.n - 1) + (j.n - 1)
062:     sp    = sqrt( num / denom )
063:     delta = abs(i.mu - j.mu) / sp  
064:     c     = 1 - 3.0 / (4*(i.n + j.n - 2) - 1)
065:     return delta * c < small
066:   def norm(i,x): 
067:     return (x- i.lo)*1.0/(i.hi- i.lo + The.math.tiny)
068:   def median(i):
069:     return median(i.kept.sorted(),True)
070:   def sample(i,enough=10**32):
071:     for _ in range(enough):
072:       yield one(i.kept._cache)
073:   def saw(i):
074:     return i.kept._cache
075:   def seen(i,x): 
076:     i._bins = None   # the old bins are now out=dated
077:     x = float(x)
078:     i.cache(x)    # keep a random sample of the seens
079:     if x < i.lo: i.lo = x     # update the max values
080:     if x > i.hi: i.hi = x     # update the min values
081:     i.n   += 1    # incrementally update mean & stdev
082:     delta  = x - i.mu
083:     i.mu  += delta*1.0/i.n
084:     i.m2  += delta*(x - i.mu)
085:     if i.n > 1:
086:       i.s = (i.m2/(i.n - 1))**0.5
087:     return x
088: 
089: class Nums(AutoDict):
090:   def __init__(i):
091:     AutoDict.__init__(i,Num)
092: 
093: def discretize(bins,num):
094:   "Round a number to the bins see"
095:   i, mid = 0, None
096:   for j in range(1,len(bins)):
097:     old, new = bins[i], bins[j]
098:     mid      = (old+new)*0.5
099:     if num < old        : return mid
100:     if old <= num < new : return (old + new)*0.5
101:     i += 1
102:   return mid
103: 

This file is part of Timm's charming Python tricks.
© 2014, Tim Menzies: tim.menzies@gmail.com, http://menzies.us.

Timm's charming Python tricks are free software: you can redistribute it and/or modify it under the terms of the GNU Lesser Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

Timm's charming Python tricks are distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU Lesser Public License along with Foobar. If not, see http://www.gnu.org/licenses.