#conver the value[memo] data in the project string to memos[memo] = value function getRatings(rewritten, project, tokens,indx,tmp,value,attr) { split(rewritten,tokens," ") for (indx in tokens) { split(tokens[indx],tmp,"[") value = tmp[1] sub(/\]/,"",tmp[2]) attr = tmp[2] project[getRating(attr)] = value } } #memos serve to "remember" where a terminal rewrite came from #any rhs token defined in the grammar with a precedding asterix #will be appened to each subsequent rewrite of that token without #affecting further rewrites # #eg: #pets -> dogs *cats parrots #cats -> fluffy delicious lol #when cats is rewritten to a terminal, say fluffy, that terminal #will also contain the token "cats" in the form "fluffy[cats]" #This string can be processed using getRatings(rewrittenStr, array) #to index array using the memos, in this case, array[cats] = fluffy function rewrite(sentence, token,memo,new,tokens,tokenIndx) { new = "" #empty split(sentence,tokens," ") for (tokenIndx in tokens) { token = tokens[tokenIndx] #process memos memo = "" if (token ~ /\*/) { #new memo found sub(/\*/,"",token) #remove '*' memo = append("["token"]",memo) } if (token ~ /[.*]/) { #old memo found start = match(token,/\[/) end = match(token,/\]/) memo = substr(token, start+1,end-start-1) sub(/[.*]/,"",token) } #If token can be rewritten, rewrite it if (grammar[token]) new = append(rewriteToken(token, memo), new) #otherwise, we've terminated, return the token[memo] else new = append(token memo, new) } return new } function rewriteToken(token, memo, rewrites,sumodds,weightedOdds,choice,roll,i,rewritten) { getRewrites(grammar[token], rewrites) if (rewrites[size()] > 1) { adjustOddsForRisk(token, rewrites) sumodds = 0 split("",weightedOdds,"") for(i = 1; i <= rewrites[size()]; i++) { sumodds += rewrites[getOdds(i)] weightedOdds[i] = sumodds } roll = rand() * sumodds choice = 1 for(i = 1; i <= rewrites[size()]; i++) { if(weightedOdds[i] > roll) { choice = i break } } rewritten = rewrite(rewrites[getProd(choice)] memo) markNewRisks(token, choice) } else { rewritten = rewrite(rewrites[getProd(1)] memo) } return rewritten } function adjustOddsForRisk(attr, rewrites, weights,sum,i) { makeArray(weights) sum = 0 for(i = 1; i <= rewrites[size()]; i++) { if (projRisks[attr,i] == 4) rewrites[getOdds(i)] = rewrites[getOdds(i)] * 0.001 if (projRisks[attr,i] == 2) rewrites[getOdds(i)] = rewrites[getOdds(i)] * 0.01 if (projRisks[attr,i] == 1) rewrites[getOdds(i)] = rewrites[getOdds(i)] * 0.1 sum += rewrites[getOdds(i)] } #normalize for(i = 1; i < rewrites[size()]; i++) { rewrites[getOdds(i)] /= sum } } function markNewRisks(attr, value, r,newRisks,risk) { # for each new risk in modelRisks[attr,value] # put risk in projRisks split(modelRisks[attr,value],newRisks," ") for(r in newRisks) { splitRisk(newRisks[r], risk) # print "because " attr " was rewritten to " value " the attr: " risk[getAttr()] " is bad if rewritten to " risk[getValue()] " by a weight of " risk[getWeight()] projRisks[risk[getAttr()], risk[getValue()]] = risk[getWeight()] } } function splitRisk(riskStr, riskArr, result,tmp1,tmp2) { split(riskStr,tmp1,"@") split(tmp1[2],tmp2,"=") riskArr[getAttr()] = tmp1[1] riskArr[getValue()] = tmp2[1] riskArr[getWeight()] = tmp2[2] } #split oddStr"rhs1:odd1|rhs2:odd2|rhs3:odd3" into an int-indexed array rewrites #where rewrites[size()] is the number of rewrites #where rewrites[getProd(i)] is the ith production #where rewrites[getOdds(i)] is the odds of production i function getRewrites(oddStr, result, pairs,prod,odds,tmp,i) { result[size()] = split(oddStr, pairs, "|") #split into production:odds pair for (i = 1; i <= result[size()]; i++) { split(pairs[i],tmp,":") #split pair into production, odds prod = tmp[1] odds = tmp[2] result[getProd(i)] = prod result[getOdds(i)] = odds } } #convert grammar to this form: #odds lhs -> rhs function formalize(grammarIn, formalized, rules,lhs,options,num,odds,i,tmp) { FS = OFS = " " #blindly grab every rewrite while ((getline < grammarIn ) > 0) { if ($1 ~ /^[ /t]*#/) { continue } #skip comments if ($0 ~ /^[ /t]*$/) { continue } #skip blank lines for (i = 3; i <= NF; i++) { rules[$1] = rules[$1] " " $i } } close(grammarIn) #output formalized grammar file while ((getline < grammarIn ) > 0) { if ($1 ~ /^[ /t]*#/) { continue } #skip comments if ($0 ~ /^[ /t]*$/) { continue } #skip blank lines if ($2 ~ "->") { print "1.0 " $0 > formalized } if ($2 ~ "-%>") { lhs = $1 options = rules[$3] num = split(options,tmp," ") odds = 1 / num for (i = 1; i <=num; i++) { print odds " " lhs " -> " tmp[i] > formalized } } } close(formalized) close(grammarIn) } #shove the grammer into an internal data structure #each key in grammar gives all possible rewrites and their odds #in the format of grammar[lhs] -> rhs1:odds|rhs2:odds|rhs3:odds #user accesses this structure using getRewrites(grammar[lhs]) function readGrammar(formalized, grammar, lhs,rhs,prob,i) { while ((getline < formalized) > 0) { #everything from $4 onward goes into rhs rhs = $4 for (i = 5; i <= NF; i++) rhs = rhs " " $i lhs = $2 prob = $1 if (grammar[lhs]) #append grammar[lhs] = grammar[lhs]"|"rhs":"prob else grammar[lhs] = rhs":"prob } }