#!/usr/bin/gawk -f #select the parameters that minimize RMSE between the predicted and actual values over all the data #Tim Menzies & Scott Chen, 2005 # Data ca #Assumes that 0 or more exponential #scale factors are shown in the left hand columns, followed by the effort multipliers followed by #programs size (in Kloc) and finally, actual effort BEGIN { Repeats = 1; IGNORECASE = 1; FS = OFS=","; Pred = 30; E = 2.71828182846; ScaleFactors = 0; # set to zero for cocomo-I stuff Skip = 0; # set to 1 if this is a csv file and we should skip the header line PredN = Tests = Trains = 0; } NR==1 { if (A) A = log(A); Seed = Seed ? srand(Seed) : srand(1) } { sub(/\%.*/,"") } /^[ \t]*$/ { next } FNR==1 { Attrs=0 } /@attribute/ { Attrs++ } /@relation/,/@data/ { next } FNR <= Skip { next } { for(I=ScaleFactors+1;I<=NF;I++) $I = $I ? log($I) : 0; Kloc = $(NF-1); Pm = $NF; for(I=1;I<=ScaleFactors;I++) $I = 0.01*($I)*Kloc; Eaf = 0; for(I=1; I<=(NF-2); I++) Eaf += $I; } Pass==1 { Trains++; Sum1 += Kloc; Sum2 += Kloc*Kloc; Sum3 += Pm - Eaf; Sum4 += (Pm - Eaf) * Kloc; } Pass==2 { for(R=1;R<=Repeats;R++) { Tests++; if(Trains) { if (!A) A = (Sum2*Sum3 - Sum1*Sum4 ) / ( Trains*Sum2 - Sum1*Sum1 ); if (!B) B = (Trains*Sum4 - Sum1*Sum3 ) / ( Trains*Sum2 - Sum1*Sum1 ); } if (Asd) A = log(normal(A0,Asd)); if (Bsd) B = normal(B0,Bsd); Got = A + Eaf + B * Kloc; Got = E ^ Got; Want = E ^ Pm; Re = (Got-Want)/Want; Mre = Re < 0 ? -1* Re : Re; SumMre += Mre; if (Mre < (Pred/100)) PredN++; if (Asd || Bsd) { if (Inc) printf Inc OFS Attrs OFS Repeats OFS; print E^A,B,Mre,(Mre < (Pred/100) ? 1 : 0); next; } if (Inc) { print Inc "," E^A,B,Got,Want } } } END { if( ( ! Asd || ! Bsd ) && ! Inc && Pass==2) report() } function report() { if (Inc) printf Inc OFS Attrs OFS Tests OFS; print E^A,B,100*SumMre/Tests,100*PredN/Tests; } function normal(mean, standardDev){ return mean + box_muller()*standardDev; } function box_muller( n,x1,x2) { w=1; while (w >= 1) { x1= 2.0 * rand() - 1; x2= 2.0 * rand() - 1; w = x1*x1 + x2*x2 }; w = sqrt((-2.0 * log(w))/w); return x1 * w; }