#!/usr/bin/gawk -f
BEGIN {
	FS = OFS = ",";
	RawResultsFileName = "rawResults";
	RanksFileName = "rankResults";
}

NF==3 {
	Results[0,0] = 0;
	key = $1;
	Got = $2;
	Want = $3;
	Mre = (Got-Want)/Want;
	Mre = (Mre < 0 ? -1*Mre : Mre)*100;
	MreArray[key,++MreArray[key,0]] = Mre;
	N[key]++;
}

END {
	print "keyCounter, firstKey, N[firstKey], secondKey, N[secondKey], firstKeyZ, secondKeyZ" > RawResultsFileName;

	for (firstKey in N)
	{
		for (secondKey in N)
		{
			if (firstKey != secondKey)
			{
				#this is to assure that two keys are not compared to each other twice
				newKey = firstKey FS secondKey;
				newKeyReverse = secondKey FS firstKey;
				if (!tempKeyArray[newKey] && !tempKeyArray[newKeyReverse])
				{
					keyCounter++;
					tempKeyArray[newKey] = 1;
					analyze(firstKey, secondKey);
				}
			}
		}
	}
	print "";
	print "key,ties,wins,losses,comparison" > RanksFileName;
	for (key in N)
	{
#		print key, tie[key]+0, winSign[key]+0, lossSign[key]+0, "sign" >> RanksFileName;
		print key, tie[key]+0, winMedianRank[key]+0, lossMedianRank[key]+0, "median" >> RanksFileName;
	}
}

function analyze(firstKey, secondKey, tempArray, tempRanksArray, tempFirstArray, tempSecondArray)
{
	#put the raw data from the two sets together
	for (firstCounter = 1; firstCounter <= N[firstKey]; firstCounter++)
	{
		tempArray[firstCounter] = MreArray[firstKey,firstCounter];
	}
	#use firstCounter-1 as the index from which it starts adding the second group of data
	firstCounter--;
	for (secondCounter = 1; secondCounter <= N[secondKey]; secondCounter++)
	{
		tempArray[firstCounter+secondCounter] = MreArray[secondKey,secondCounter];
	}

	#sort the array containing both set's raw data
	asort(tempArray);

	#generate the ranks and resolve the ties
	for (counter = 1; counter <= N[firstKey]+N[secondKey]; counter++)
	{
		sameRankIndex = counter + 1;
		tempIndexSum = counter;
		while (sameRankIndex < N[firstKey]+N[secondKey] && tempArray[counter] == tempArray[sameRankIndex])
		{
			tempIndexSum = tempIndexSum + sameRankIndex;
			sameRankIndex++;
		}
		#decrement by 1 since the last addition did not result in a equality or was out of range
		sameRankIndex--;

		#this means that no ties were seen
		if (sameRankIndex == counter)
		{
			tempRanksArray[counter,1] = tempArray[counter];
			tempRanksArray[counter,2] = counter*1.0;
		}
		#this means that there were ties (at least between two of them)
		else
		{
			newRankIndex = tempIndexSum / (sameRankIndex - counter + 1);
			for (tempCounter = counter; tempCounter <= sameRankIndex; tempCounter++)
			{
				tempRanksArray[tempCounter,1] = tempArray[counter];
				tempRanksArray[tempCounter,2] = newRankIndex*1.0;
			}
			#it should continue from here (already incremented so it is decremented so the main for loop can increment it correctly)
			counter = tempCounter - 1;
		}
	}

	#calculate the sums of ranks for each group
	firstRankSum = 0.0;
	secondRankSum = 0.0;
	searchRankSum = 0.0;

	#choose the smaller of the two for the search (for speed purposes). The other one can be calculated using the formula
	if (N[firstKey] <= N[secondKey])
		searchKey = firstKey;
	else
		searchKey = secondKey;

	for (counter = 1; counter <= N[firstKey]+N[secondKey]; counter++)
	{
		for (searchCounter = 1; searchCounter <= N[searchKey]; searchCounter++)
		{
			if (tempRanksArray[counter,1] == MreArray[searchKey,searchCounter])
			{
				searchRankSum = searchRankSum + tempRanksArray[counter,2];
				#so it skips checking the rest since there is no point
				searchCounter = N[searchKey];
			}
		}
	}

	if (searchKey == firstKey)
	{
		firstKeySum = searchRankSum;
		secondKeySum = (N[firstKey]+N[secondKey])*(N[firstKey]+N[secondKey]+1)/2 - searchRankSum;
	}
	else
	{
		firstKeySum = (N[firstKey]+N[secondKey])*(N[firstKey]+N[secondKey]+1)/2 - searchRankSum;
		secondKeySum = searchRankSum;
	}

	firstKeyU = firstKeySum - N[firstKey]*(N[firstKey]+1)/2;
	secondKeyU = secondKeySum - N[secondKey]*(N[secondKey]+1)/2;
	
	m = N[firstKey];
	n = N[secondKey];
	meanU = m*n/2;
	sdU = (m*n*(m+n+1)/12)^(0.5);

	firstKeyZ = (firstKeyU - meanU)/sdU;
	secondKeyZ = (secondKeyU - meanU)/sdU;

	print keyCounter, firstKey, N[firstKey], secondKey, N[secondKey], firstKeyZ, secondKeyZ > RawResultsFileName;
	printf ("%d ", keyCounter);

	#since the two keys are equal but different in sign, one is enough to be compared to the critical value 0f 1.96 at 95% confidence
	if (firstKeyZ > secondKeyZ)
		Z = firstKeyZ;
	else
		Z = secondKeyZ;

	if (Z >= 0 && Z <= 1.96) 
	{
		tie[firstKey]++;
		tie[secondKey]++;
	}
	else
	{
#		if (firstKeyZ < 0 && secondKeyZ > 0)
#		{
#			winSign[firstKey]++;
#			lossSign[secondKey]++;
#		}
#		else if (firstKeyZ > 0 && secondKeyZ < 0)
#		{
#			lossSign[firstKey]++;
#			winSign[secondKey]++;
#		}

		#the rest is used to find win vs. loss based on the median of the ranks
		for (firstCounter = 1; firstCounter <= N[firstKey]; firstCounter++)
		{
			tempFirstArray[firstCounter] = MreArray[firstKey,firstCounter];
		}
		for (secondCounter = 1; secondCounter <= N[secondKey]; secondCounter++)
		{
			tempSecondArray[secondCounter] = MreArray[secondKey,secondCounter];
		}

		asort(tempFirstArray);
		asort(tempSecondArray);

		if (N[firstKey] % 2 == 1)
			firstMedianIndex = (N[firstKey]+1)/2;
		else
			firstMedianIndex = N[firstKey]/2;

		if (N[secondKey] % 2 == 1)
			secondMedianIndex = (N[secondKey]+1)/2;
		else
			secondMedianIndex = N[secondKey]/2;

		for (counter = 1; counter <= N[firstKey]+N[secondKey]; counter++)
		{
			if (tempRanksArray[counter,1] == tempFirstArray[firstMedianIndex])
			{
				firstMedianRank = tempRanksArray[counter,2];
				counter = N[firstKey]+N[secondKey];
			}
		}
		for (counter = 1; counter <= N[firstKey]+N[secondKey]; counter++)
		{
			if (tempRanksArray[counter,1] == tempSecondArray[secondMedianIndex])
			{
				secondMedianRank = tempRanksArray[counter,2];
				counter = N[firstKey]+N[secondKey];
			}
		}
		#since the ranking is about MREs, we need lower ranks as winners which give us lower MREs
		if (firstMedianRank < secondMedianRank)
		{
			winMedianRank[firstKey]++;
			lossMedianRank[secondKey]++;
		}
		else if (firstMedianRank > secondMedianRank)

		{
			lossMedianRank[firstKey]++;
			winMedianRank[secondKey]++;
		}
		else
		{
			#each one gets a win and a loss (which means a tie) but not directly a tie (since it is only used when not statistically different).
#			winMedianRank[firstKey]++;
#			lossMedianRank[firstKey]++;
#			winMedianRank[secondKey]++;
#			lossMedianRank[secondKey]++;
			tie[firstKey]++;
			tie[secondKey]++;
		}
	}
}