BEGIN { # command-line options
		Samples = 20
		K1		= 5
		K2 		= 15
		Seed	= 1
		Tests   = 0.33
		AutoStop = 1
		Report = 0
		OutFile = "log.txt"
}
BEGIN { # internal options
		OFS=","
		IGNORECASE=1 
		Inf = 10^32
		_ = SUBSEP
		CONVFMT="%.8g"
}

##################################################################
# main program

function main() { 
	worker(Samples,K1,K2) 
}
function worker(samples, k1,k2,        rankeds, ranked) {
	print "samples         : " samples 
	print "k1              : " k1 
	print "k2              : " k2
	print "%test           : " Tests*100
	print "Contrast Method : " (Nomograms ? "Nomograms" : "BSquared")
	print ""
	print "Training results on " Train[0] " historical examples (what looks useful):"
	rankeds = train(samples,k1,k2,ranked)
	#saya(ranked,"ranked")
	print "Test results on " Test[0] " new projects (applying the training results to new data):\n"
	test( samples,k1,k2,rankeds,ranked) 
}
function train(samples,k1,k2,ranked,      \
			 projects, neighbors, memos, best, rest,\
             knearest,rankeds) {
	#       inputs                              outputs
    #       ------                              -------
	projects(Train,samples,                     projects)         # example1 projects
#	saya(projects,"P")
	neighbors(samples,projects,Train[0],Train,  neighbors,memos)  # distances example1 to Train set
	knn(k1+k2,samples,neighbors,memos,          knearest)         # knearest Train instance row numbers to example1 projects
	bestRest(knearest,k1,                       best,rest)        # divide knearest into best/worst 
	rankeds = rank(k1,k2,best,rest,             ranked)           # contrast set between best/worst
	return rankeds
}
function test(samples,k1,k2,rankeds,ranked,    \
			          i,projects,neighbors,memos,knearest,\
					  m,n,sorted,kloc,row,col,data) {
	projects(Test,samples,                      projects)         # different example2 projects
	neighbors(samples,projects,Test[0],Test,    neighbors,memos)  # distances example2 to Test set
	knn(k1+k2,samples,neighbors,memos,          knearest)         # knearest Test instances row numbers to example2 projects
	for(row=1;row<=Test[0];row++)         
		if (row in knearest) {
			data[0]++

			split("",rowKlasses,"")
			for (k in Klasses) {
				rowKlasses[++r] = Test[row,Klasses[k]]
			}

			kloc[++n]= int(score(rowKlasses))
			for(col=1;col<=Cols;col++)
				data[data[0],col]=Test[row,col]               # convert row numbers to their data rows
	}
	m=asort(kloc,sorted)                                          # report baseline distributions
#	print "Baseline (estimates without any project changes): " 
	for(i=1;i<=m;i++)
		printf("%s ", sorted[i])

	print "\n\t\t\t\t25%\t50%\t75%"
	print "\t   Baseline:\t\t"find25(sorted) "\t" findMedian(sorted) "\t" find75(sorted)

	split("",Previous,"")
	split("",Baseline,"")
	Previous[0] = m
	for (k=1; k<=Previous[0]; k++) {
		Previous[k] = sorted[k]
		Baseline[k] = sorted[k]
	}


	print "\nResults of applying the  top n-th ranges found during training\n"
	selects(k1,data,rankeds,ranked)                               # try the tricks found during training on the knearest Test instances
}
##################################################################
# read in data

              { gsub(/%.*/,"") }
/^[	\t]$/     { next }
/^@project/   { In = 0 }
In            { rand() <= Tests ? cells(Test,Cols) : cells(Train,Cols) }
/^@relation/  { Relation=$2 }
/^@attribute/ { def($2) }
/^@class/     { defclass($2) }
/^@data/      { In = 1; inits(Cols) }
/^@/          { next }

function inits(cols,  i) {
#	Klass = Klass < 0 ? cols + Klass + 1 : Klass 
	srand(Seed ? Seed : 1) 
	for(i=1;i<=cols;i++) { Train["max",i]= -1*Inf; Train["min",i]=Inf }
	for(i=1;i<=cols;i++) { Test[ "max",i]= -1*Inf; Test[ "min",i]=Inf }
}
function def(name,  a,i,goalp) {
	goalp  = sub(/?/,"",name)
	if (name in Name)  {
		a = Name[name]
	} else {
		a = Name[name] = ++Cols
		Eman[Cols]=name
	} 
	if (Train["range",a,0])
		
	clearStack(Train, "range" _ a) 
	clearStack(Test, "range" _ a) 

	for(i=3;i<=NF;i++) {
		Train["range",a, ++Train["range",a,0]] = $i
		Test[ "range",a, ++Test[ "range",a,0]] = $i
	}
	if (goalp) Goal[a]=1
}	
function defclass(name) {
	if (name in Name)  {
		a = Name[name]
	} else {
		a = Name[name] = ++Cols
		Eman[Cols]=name
	}
	if (Train["range",a,0])
		
	clearStack(Train, "range" _ a) 
	clearStack(Test, "range" _ a) 

	for(i=3;i<=NF;i++) {
		Train["range",a, ++Train["range",a,0]] = $i
		Test[ "range",a, ++Test[ "range",a,0]] = $i
	}
	Klasses[name] = Name[name]
}
function clearStack(a, key,    i, max) {
	if (max = a[ key _   0 ])
		for(i=1;i<=max;i++)
			delete a[ key _ i ]
	a[key _ 0] = 0
}
function cells(data,cols,      col) {
	data[0]++
	for(col=1;col<=cols;col++)  {
		data[data[0],col] = $col
		data["max",col]   = max(data["max",col],$col)
		data["min",col]   = min(data["min",col],$col)
	}
}