# n: 1 2 3 4 5 6 7 8 9 10 11 12 13 # data: 1 1 1 2 2 3 4 5 5 5 6 8 8 # ranks: 2 2 2 4.5 4.5 6 7 9 9 9 11 12.5 12.5 # takes about 1/8 th of a second to rank 10,000 numbers # demos # cat rank1.dat | gawk -f rank.awk | sort -n # time gawk -f rank.awk --source 'BEGIN{_stress(); exit}' { Data[++Data[0]]=$1} END { rank(Data,Ranks) for(I in Ranks) print I, Ranks[I] } function _stress( n,r,i,data,ranks) { n=10000 r=0.05 while(n--) data[++data[0]]=int(rand() / r) * r rank(data,ranks) } function rank(data,ranks, starter,n,old,start,skipping,sum,i,r) { delete data[0] starter="x"; n = asort(data) old = starter start = 1; for(i=1;i<=n;i++) { skipping = (old == starter) || (data[i] == old); if (skipping) { sum += i } else { print "sum ",sum," i ",i," start ", start r = sum/(i - start) for(j=start;j