#conver the value[memo] data in the project string to memos[memo] = value
function getRatings(rewritten, project, 	tokens,indx,tmp,value,attr) {
	split(rewritten,tokens," ")
	for (indx in tokens) {
		split(tokens[indx],tmp,"[")
		value = tmp[1]
		sub(/\]/,"",tmp[2])
		attr = tmp[2]
		project[getRating(attr)] = value
	}
}

#memos serve to "remember" where a terminal rewrite came from
#any rhs token defined in the grammar with a precedding asterix
#will be appened to each subsequent rewrite of that token without
#affecting further rewrites
#
#eg:
#pets -> dogs *cats parrots
#cats -> fluffy delicious lol
#when cats is rewritten to a terminal, say fluffy, that terminal
#will also contain the token "cats" in the form "fluffy[cats]"
#This string can be processed using getRatings(rewrittenStr, array)
#to index array using the memos, in this case, array[cats] = fluffy
function rewrite(sentence,	token,memo,new,tokens,tokenIndx) {
	new = "" #empty
	split(sentence,tokens," ")
	for (tokenIndx in tokens) {
		token = tokens[tokenIndx]

		#process memos
		memo = ""
		if (token ~ /\*/) { #new memo found
			sub(/\*/,"",token) #remove '*'
			memo = append("["token"]",memo)
		}

		if (token ~ /[.*]/) { #old memo found
			start = match(token,/\[/)
			end = match(token,/\]/)
			memo = substr(token, start+1,end-start-1)
			sub(/[.*]/,"",token)
		}


		#If token can be rewritten, rewrite it
		if (grammar[token])
			new = append(rewriteToken(token, memo), new)
		#otherwise, we've terminated, return the token[memo]
		else
			new = append(token memo, new)
	}
	return new
}

function rewriteToken(token, memo,	rewrites,sumodds,weightedOdds,choice,roll,i,rewritten) {
	getRewrites(grammar[token], rewrites)

	if (rewrites[size()] > 1) {
		adjustOddsForRisk(token, rewrites)

		sumodds = 0
		split("",weightedOdds,"")
		for(i = 1; i <= rewrites[size()]; i++) {
			sumodds += rewrites[getOdds(i)]
			weightedOdds[i] = sumodds
		}

		roll = rand() * sumodds
		choice = 1

		for(i = 1; i <= rewrites[size()]; i++) {
			if(weightedOdds[i] > roll) {
				choice = i
				break
			}
		}
		rewritten = rewrite(rewrites[getProd(choice)] memo)

		markNewRisks(token, choice)

	}
	else {
		rewritten = rewrite(rewrites[getProd(1)] memo)
	}

	return rewritten
}

function adjustOddsForRisk(attr, rewrites, 	weights,sum,i) {

	makeArray(weights)
	sum = 0

	for(i = 1; i <= rewrites[size()]; i++) {
		if (projRisks[attr,i] == 4)
			rewrites[getOdds(i)] = rewrites[getOdds(i)] * 0.001
		if (projRisks[attr,i] == 2)
			rewrites[getOdds(i)] = rewrites[getOdds(i)] * 0.01
		if (projRisks[attr,i] == 1)
			rewrites[getOdds(i)] = rewrites[getOdds(i)] * 0.1

		sum += rewrites[getOdds(i)]
	}

	#normalize
	for(i = 1; i < rewrites[size()]; i++) {
		rewrites[getOdds(i)] /= sum
	}
}

function markNewRisks(attr, value,	r,newRisks,risk) {
#	for each new risk in modelRisks[attr,value]
#		put risk in projRisks

	split(modelRisks[attr,value],newRisks," ")

	for(r in newRisks) {
		splitRisk(newRisks[r], risk)

#		print "because " attr " was rewritten to " value " the attr: " risk[getAttr()] " is bad if rewritten to " risk[getValue()] " by a weight of " risk[getWeight()]
	
		projRisks[risk[getAttr()], risk[getValue()]] = risk[getWeight()]
	}
}

function splitRisk(riskStr, riskArr,	result,tmp1,tmp2) {
	split(riskStr,tmp1,"@")
	split(tmp1[2],tmp2,"=")
	riskArr[getAttr()] = tmp1[1]
	riskArr[getValue()] = tmp2[1]
	riskArr[getWeight()] = tmp2[2]
}


#split oddStr"rhs1:odd1|rhs2:odd2|rhs3:odd3" into an int-indexed array rewrites
#where rewrites[size()] is the number of rewrites
#where rewrites[getProd(i)] is the ith production
#where rewrites[getOdds(i)] is the odds of production i
function getRewrites(oddStr, result, 	pairs,prod,odds,tmp,i) {
	result[size()] = split(oddStr, pairs, "|") #split into production:odds pair
	for (i = 1; i <= result[size()]; i++) {
		split(pairs[i],tmp,":") #split pair into production, odds
		prod = tmp[1]
		odds = tmp[2]
		result[getProd(i)] = prod
		result[getOdds(i)] = odds
	}
}

#convert grammar to this form:
#odds lhs -> rhs
function formalize(grammarIn, formalized, 	rules,lhs,options,num,odds,i,tmp) {
	FS = OFS = " "
	
	#blindly grab every rewrite
	while ((getline < grammarIn ) > 0) {
		if ($1 ~ /^[ /t]*#/) { continue } #skip comments
		if ($0 ~ /^[ /t]*$/) { continue } #skip blank lines
	
		for (i = 3; i <= NF; i++) {
			rules[$1] = rules[$1] " " $i
		}
	}

	close(grammarIn)

	#output formalized grammar file
	while ((getline < grammarIn ) > 0) {
		if ($1 ~ /^[ /t]*#/) { continue } #skip comments
		if ($0 ~ /^[ /t]*$/) { continue } #skip blank lines
	
		if ($2 ~ "->") { print "1.0 " $0 > formalized }
		if ($2 ~ "-%>") {
			lhs = $1
			options = rules[$3]
			num = split(options,tmp," ")
			odds = 1 / num
			for (i = 1; i <=num; i++) {
				print odds " " lhs " -> " tmp[i] > formalized
			}
		}
	}

	close(formalized)
	close(grammarIn)
}

#shove the grammer into an internal data structure
#each key in grammar gives all possible rewrites and their odds
#in the format of grammar[lhs] -> rhs1:odds|rhs2:odds|rhs3:odds
#user accesses this structure using getRewrites(grammar[lhs])
function readGrammar(formalized, grammar,	lhs,rhs,prob,i) {
	while ((getline < formalized) > 0) {

		#everything from $4 onward goes into rhs
		rhs = $4
		for (i = 5; i <= NF; i++)
			rhs = rhs " " $i
		lhs = $2
		prob = $1
		if (grammar[lhs]) #append
			grammar[lhs] = grammar[lhs]"|"rhs":"prob
		else
			grammar[lhs] = rhs":"prob
	}
}