/timm's /charming /python /tricks
Download
about.py.
Read more on How to be Charming (in Python).
001: import sys 002: sys.dont_write_bytecode=True 003: from base import * 004: from lib import * 005: 006: import the 007: 008: class About(Charmed): pass 009: 010: class Sym(About): 011: "What we know about discrete features." 012: def __init__(i,name='', inits=[],pos=0): 013: i.name, i.pos = name, pos 014: i.counts, i.most, i.mode, i.sum = {},-1,0,0.0 015: for x in inits: i.seen(x) 016: def any(i) : return one(i.counts.keys()) 017: def approx(i,x) : return x 018: def centroid(i) : return i.mode 019: def norm(i,x) : return x 020: def sample(i,enough=10**32): 021: return some(i.counts,enough) 022: def seen(i,x,inc=1.0): 023: tmp = i.counts[x] = i.counts.get(x,0) + inc 024: i.sum += inc 025: if tmp > i.most: 026: i.most, i.mode = tmp, x 027: return x 028: 029: class Num(About): 030: "What we know about numeric features." 031: def __init__(i,name='',inits=[],keep=The.chop.keep, 032: bins=The.chop.bins,pos=0, 033: lo=The.math.inf,hi=The.math.ninf): 034: i.n = i.m2 = i.mu = i.s = 0.0 035: i.kept,i._bins,i.nbins= Sample(size=keep),[],bins 036: i.lo,i.hi = lo, hi 037: i.name, i.pos = name, pos 038: for x in inits: i.seen(x) 039: def __add__(i,j): 040: k = Num(i.name) 041: for n in i.cached(): k.seen(n) 042: for n in j.cached(): k.seen(n) 043: return k 044: def __lt__(i,j): 045: return i.mu < j.mu 046: def any(i) : return any(i.lo,i.hi) 047: def cached(i) : return i.kept._cache 048: def cache(i,x) : return i.kept.seen(x) 049: def centroid(i): return i.mu 050: def bins(i) : 051: if not i._bins: 052: lst = i.cached() 053: i._bins = sorted(lst) 054: m = len(lst)/i.nbins 055: if m > 0: 056: i._bins = i._bins[::m] 057: return i._bins 058: def hedges(i,j,small=The.math.hedges): 059: "Hedges effect size test." 060: num = (i.n - 1)*i.s**2 + (j.n - 1)*j.s**2 061: denom = (i.n - 1) + (j.n - 1) 062: sp = sqrt( num / denom ) 063: delta = abs(i.mu - j.mu) / sp 064: c = 1 - 3.0 / (4*(i.n + j.n - 2) - 1) 065: return delta * c < small 066: def norm(i,x): 067: return (x- i.lo)*1.0/(i.hi- i.lo + The.math.tiny) 068: def median(i): 069: return median(i.kept.sorted(),True) 070: def sample(i,enough=10**32): 071: for _ in range(enough): 072: yield one(i.kept._cache) 073: def saw(i): 074: return i.kept._cache 075: def seen(i,x): 076: i._bins = None # the old bins are now out=dated 077: x = float(x) 078: i.cache(x) # keep a random sample of the seens 079: if x < i.lo: i.lo = x # update the max values 080: if x > i.hi: i.hi = x # update the min values 081: i.n += 1 # incrementally update mean & stdev 082: delta = x - i.mu 083: i.mu += delta*1.0/i.n 084: i.m2 += delta*(x - i.mu) 085: if i.n > 1: 086: i.s = (i.m2/(i.n - 1))**0.5 087: return x 088: 089: class Nums(AutoDict): 090: def __init__(i): 091: AutoDict.__init__(i,Num) 092: 093: def discretize(bins,num): 094: "Round a number to the bins see" 095: i, mid = 0, None 096: for j in range(1,len(bins)): 097: old, new = bins[i], bins[j] 098: mid = (old+new)*0.5 099: if num < old : return mid 100: if old <= num < new : return (old + new)*0.5 101: i += 1 102: return mid 103:
This file is part of Timm's charming Python tricks.
© 2014, Tim Menzies:
tim.menzies@gmail.com,
http://menzies.us.
Timm's charming Python tricks are free software: you can redistribute it and/or modify it under the terms of the GNU Lesser Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
Timm's charming Python tricks are distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU Lesser Public License along with Foobar. If not, see http://www.gnu.org/licenses.