/timm's /charming /python /tricks
Download
about.py.
Read more on How to be Charming (in Python).
001: import sys
002: sys.dont_write_bytecode=True
003: from base import *
004: from lib import *
005:
006: import the
007:
008: class About(Charmed): pass
009:
010: class Sym(About):
011: "What we know about discrete features."
012: def __init__(i,name='', inits=[],pos=0):
013: i.name, i.pos = name, pos
014: i.counts, i.most, i.mode, i.sum = {},-1,0,0.0
015: for x in inits: i.seen(x)
016: def any(i) : return one(i.counts.keys())
017: def approx(i,x) : return x
018: def centroid(i) : return i.mode
019: def norm(i,x) : return x
020: def sample(i,enough=10**32):
021: return some(i.counts,enough)
022: def seen(i,x,inc=1.0):
023: tmp = i.counts[x] = i.counts.get(x,0) + inc
024: i.sum += inc
025: if tmp > i.most:
026: i.most, i.mode = tmp, x
027: return x
028:
029: class Num(About):
030: "What we know about numeric features."
031: def __init__(i,name='',inits=[],keep=The.chop.keep,
032: bins=The.chop.bins,pos=0,
033: lo=The.math.inf,hi=The.math.ninf):
034: i.n = i.m2 = i.mu = i.s = 0.0
035: i.kept,i._bins,i.nbins= Sample(size=keep),[],bins
036: i.lo,i.hi = lo, hi
037: i.name, i.pos = name, pos
038: for x in inits: i.seen(x)
039: def __add__(i,j):
040: k = Num(i.name)
041: for n in i.cached(): k.seen(n)
042: for n in j.cached(): k.seen(n)
043: return k
044: def __lt__(i,j):
045: return i.mu < j.mu
046: def any(i) : return any(i.lo,i.hi)
047: def cached(i) : return i.kept._cache
048: def cache(i,x) : return i.kept.seen(x)
049: def centroid(i): return i.mu
050: def bins(i) :
051: if not i._bins:
052: lst = i.cached()
053: i._bins = sorted(lst)
054: m = len(lst)/i.nbins
055: if m > 0:
056: i._bins = i._bins[::m]
057: return i._bins
058: def hedges(i,j,small=The.math.hedges):
059: "Hedges effect size test."
060: num = (i.n - 1)*i.s**2 + (j.n - 1)*j.s**2
061: denom = (i.n - 1) + (j.n - 1)
062: sp = sqrt( num / denom )
063: delta = abs(i.mu - j.mu) / sp
064: c = 1 - 3.0 / (4*(i.n + j.n - 2) - 1)
065: return delta * c < small
066: def norm(i,x):
067: return (x- i.lo)*1.0/(i.hi- i.lo + The.math.tiny)
068: def median(i):
069: return median(i.kept.sorted(),True)
070: def sample(i,enough=10**32):
071: for _ in range(enough):
072: yield one(i.kept._cache)
073: def saw(i):
074: return i.kept._cache
075: def seen(i,x):
076: i._bins = None # the old bins are now out=dated
077: x = float(x)
078: i.cache(x) # keep a random sample of the seens
079: if x < i.lo: i.lo = x # update the max values
080: if x > i.hi: i.hi = x # update the min values
081: i.n += 1 # incrementally update mean & stdev
082: delta = x - i.mu
083: i.mu += delta*1.0/i.n
084: i.m2 += delta*(x - i.mu)
085: if i.n > 1:
086: i.s = (i.m2/(i.n - 1))**0.5
087: return x
088:
089: class Nums(AutoDict):
090: def __init__(i):
091: AutoDict.__init__(i,Num)
092:
093: def discretize(bins,num):
094: "Round a number to the bins see"
095: i, mid = 0, None
096: for j in range(1,len(bins)):
097: old, new = bins[i], bins[j]
098: mid = (old+new)*0.5
099: if num < old : return mid
100: if old <= num < new : return (old + new)*0.5
101: i += 1
102: return mid
103:
This file is part of Timm's charming Python tricks.
© 2014, Tim Menzies:
tim.menzies@gmail.com,
http://menzies.us.
Timm's charming Python tricks are free software: you can redistribute it and/or modify it under the terms of the GNU Lesser Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
Timm's charming Python tricks are distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU Lesser Public License along with Foobar. If not, see http://www.gnu.org/licenses.