import sys
sys.dont_write_bytecode=True
from base import *
from lib import *

import the
    
class About(Charmed): pass

class Sym(About):
  "What we know about discrete features."
  def __init__(i,name='', inits=[],pos=0):
    i.name,  i.pos = name, pos
    i.counts, i.most, i.mode, i.sum = {},-1,0,0.0
    for x in inits: i.seen(x)
  def any(i)      : return one(i.counts.keys())
  def approx(i,x) : return x
  def centroid(i) : return i.mode
  def norm(i,x)   : return x
  def sample(i,enough=10**32):
    return some(i.counts,enough)
  def seen(i,x,inc=1.0):
    tmp = i.counts[x] = i.counts.get(x,0) + inc
    i.sum += inc
    if tmp > i.most:
      i.most, i.mode = tmp, x
    return x

class Num(About):
  "What we know about numeric features."
  def __init__(i,name='',inits=[],keep=The.chop.keep,
               bins=The.chop.bins,pos=0,
               lo=The.math.inf,hi=The.math.ninf):
    i.n = i.m2 = i.mu = i.s = 0.0
    i.kept,i._bins,i.nbins= Sample(size=keep),[],bins
    i.lo,i.hi  = lo, hi
    i.name,    i.pos = name,  pos
    for x in inits: i.seen(x)
  def __add__(i,j):
    k = Num(i.name)
    for n in i.cached(): k.seen(n)
    for n in j.cached(): k.seen(n)
    return k
  def __lt__(i,j):
    return i.mu < j.mu
  def any(i)     : return any(i.lo,i.hi)
  def cached(i)  : return i.kept._cache
  def cache(i,x) : return i.kept.seen(x)
  def centroid(i): return i.mu
  def bins(i) : 
    if not i._bins:
      lst     = i.cached()
      i._bins = sorted(lst)
      m       = len(lst)/i.nbins
      if m > 0:
        i._bins = i._bins[::m]
    return i._bins
  def hedges(i,j,small=The.math.hedges):
    "Hedges effect size test."
    num   = (i.n - 1)*i.s**2 + (j.n - 1)*j.s**2
    denom = (i.n - 1) + (j.n - 1)
    sp    = sqrt( num / denom )
    delta = abs(i.mu - j.mu) / sp  
    c     = 1 - 3.0 / (4*(i.n + j.n - 2) - 1)
    return delta * c < small
  def norm(i,x): 
    return (x- i.lo)*1.0/(i.hi- i.lo + The.math.tiny)
  def median(i):
    return median(i.kept.sorted(),True)
  def sample(i,enough=10**32):
    for _ in range(enough):
      yield one(i.kept._cache)
  def saw(i):
    return i.kept._cache
  def seen(i,x): 
    i._bins = None   # the old bins are now out=dated
    x = float(x)
    i.cache(x)    # keep a random sample of the seens
    if x < i.lo: i.lo = x     # update the max values
    if x > i.hi: i.hi = x     # update the min values
    i.n   += 1    # incrementally update mean & stdev
    delta  = x - i.mu
    i.mu  += delta*1.0/i.n
    i.m2  += delta*(x - i.mu)
    if i.n > 1:
      i.s = (i.m2/(i.n - 1))**0.5
    return x

class Nums(AutoDict):
  def __init__(i):
    AutoDict.__init__(i,Num)

def discretize(bins,num):
  "Round a number to the bins see"
  i, mid = 0, None
  for j in range(1,len(bins)):
    old, new = bins[i], bins[j]
    mid      = (old+new)*0.5
    if num < old        : return mid
    if old <= num < new : return (old + new)*0.5
    i += 1
  return mid
