runTimeVsK() {
	local data=$1
	file=$Ourmine/data/results/step/graphs/$2
	sizes=`cat $data | grep -v \# | cut -d, -f 5 | sort -n | uniq`
	(for size in $sizes; do
		m1=`cat $data | grep genic | gawk 'BEGIN{FS=","} index($5,k)!=0 {print $7}' k=$size - | median`
		m2=`cat $data | grep kmeans | gawk 'BEGIN{FS=","} index($5,k)!=0 {print $7}' k=$size - | median`
		m3=`cat $data | grep canopy | gawk 'BEGIN{FS=","} index($5,k)!=0 {print $7}' k=$size - | median`
		echo "size= $size, m1 = $m1, m2 = $m2, m3 = $m3"
		line=${size}X${m1}X${m2}X${m3}
		echo $line | sed 's/X/\t/g' | sed 's/k//g'
	done) | sort -n > tmp.dat

	echo "set terminal postscript" > tmp.plt
#	echo "set log x" >> tmp.plt
#	echo "set log y" >> tmp.plt
	echo "set autoscale x" >> tmp.plt
	echo "set autoscale y" >> tmp.plt
	echo "set xlabel \"K (number of clusters)\"" >> tmp.plt
	echo "set ylabel \"Run-time (seconds)\"" >> tmp.plt
	echo "set output '$file'" >> tmp.plt
	echo "set title '$3 --- Number of Clusters vs. Run-time'" >> tmp.plt
	echo "plot 'tmp.dat' using 1:2 with linespoints lt 1 pt 8  t \"genic\"," \
		 "'tmp.dat' using 1:3 with linespoints lt 1 pt 4 t \"kmeans\"" \
		 ",'tmp.dat' using 1:4 with linespoints lt 1 pt 6 t \"canopy\"" >> tmp.plt
	echo "load 'tmp.plt'" | gnuplot
	rm tmp.plt tmp.dat
}

runTimeVsN() {
	local data=$1
	file=$Ourmine/data/results/step/graphs/$2
	numAtts=`cat $data | grep -v \# | cut -d, -f 10 | sort -n | uniq`
	(for num in $numAtts; do
		m1=`cat $data | grep genic | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $7}' n=$num - | median`
		m2=`cat $data | grep kmeans | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $7}' n=$num - | median`
		m3=`cat $data | grep canopy | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $7}' n=$num - | median`
		blabln "attrCount=$num, m1 = $m1, m2 = $m2, m3 = $m3"
		line=${num}X${m1}X${m2}X${m3}
		echo $line | sed 's/X/\t/g' | sed 's/n//g' | sed 's/_//g'
	done) | sort -n > tmp.dat

	echo "set terminal postscript" > tmp.plt
#	echo "set log x" >> tmp.plt
#	echo "set log y" >> tmp.plt
	echo "set autoscale x" >> tmp.plt
	echo "set autoscale y" >> tmp.plt
	echo "set xlabel \"N (number of attributes)\"" >> tmp.plt
	echo "set ylabel \"Run-time (seconds)\"" >> tmp.plt
	echo "set output '$file'" >> tmp.plt
	echo "set title '$3 --- Attribute Count vs. Run-time'" >> tmp.plt
	echo "plot 'tmp.dat' using 1:2 with linespoints lt 1 pt 8  t \"genic\"," \
		 "'tmp.dat' using 1:3 with linespoints lt 1 pt 4 t \"kmeans\"" \
		 ",'tmp.dat' using 1:4 with linespoints lt 1 pt 6 t \"canopy\"" >> tmp.plt
	echo "load 'tmp.plt'" | gnuplot
	rm tmp.plt tmp.dat
}

aggSimVsN() {
	f1=$Ourmine/data/results/step/step_multiN_full.csv
	f2=$Ourmine/data/results/step/kmeansDimReduce/pcaKmeans.csv
	f3=$Ourmine/data/results/step/kmeansDimReduce/icaKmeans.csv
	f4=$Ourmine/data/results/step/kmeansDimReduce/fmKmeans.csv
	file=$Ourmine/data/results/step/graphs/$1

	numAtts=`cat $f1 | grep -v \# | cut -d, -f 10 | sort -n | uniq`
	(for num in $numAtts; do
		#external sim
		m1=`cat $f1 | grep genic | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $8}' n=$num - | medianBounded 0 1`
		m2=`cat $f1 | grep kmeans | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $8}' n=$num - | medianBounded 0 1`
		m3=`cat $f1 | grep canopy | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $8}' n=$num - | medianBounded 0 1`

		#internal sim
		m4=`cat $f1 | grep genic | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $9}' n=$num - | medianBounded 0 1`
		m5=`cat $f1 | grep kmeans | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $9}' n=$num - | medianBounded 0 1`
		m6=`cat $f1 | grep canopy | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $9}' n=$num - | medianBounded 0 1`
		blabln "attrCount=$num, m1 = $m1, m2 = $m2, m3 = $m3, m4 = $m4, m5 = $m5, m6 = $m6"
		line=${num}X${m1}X${m2}X${m3}X${m4}X${m5}X${m6}
		echo $line | sed 's/X/\t/g' | sed 's/n//g' | sed 's/_//g'
	done) | sort -n > full.dat

	aggSimVsN_helper $f2 > pca.dat
	aggSimVsN_helper $f3 > ica.dat
	aggSimVsN_helper $f4 > fm.dat

	echo "set terminal postscript" > tmp.plt
#	echo "set log x" >> tmp.plt
#	echo "set log y" >> tmp.plt
	echo "set autoscale x" >> tmp.plt
	echo "set autoscale y" >> tmp.plt
	echo "set xlabel \"N (number of attributes)\"" >> tmp.plt
	echo "set ylabel \"Similarity\"" >> tmp.plt
	echo "set output '$file'" >> tmp.plt
	echo "set title '$2 --- Attribute Count vs. Run-time'" >> tmp.plt
#	echo "plot 'full.dat' using 1:2 with linespoints lt 1 pt 8  t \"genic: external\"," \
#		"'full.dat' using 1:5 with linespoints lt 2 pt 8  t \"internal\"," \
#		" 'full.dat' using 1:3 with linespoints lt 1 pt 4 t \"kmeans: external\"," \
#		" 'full.dat' using 1:6 with linespoints lt 2 pt 4 t \"internal\"," \
#		" 'full.dat' using 1:4 with linespoints lt 1 pt 6 t \"canopy: external\"," \
#		" 'full.dat' using 1:7 with linespoints lt 2 pt 6 t \"internal\"," \
	echo " plot 'pca.dat' using 1:3 with linespoints lt 1 pt 21 t \"kmeans-PCA: external\"," \
		" 'pca.dat' using 1:2 with linespoints lt 2 pt 21 t \"internal\"," \
		" 'fm.dat' using 1:3 with linespoints lt 1 pt 16 t \"kmeans-FastMap: external\"," \
		" 'fm.dat' using 1:2 with linespoints lt 2 pt 16 t \"internal\""  >> tmp.plt
#		" 'ica.dat' using 1:3 with linespoints lt 1 pt 19 t \"kmeans-ICA: external\"," \
#		" 'ica.dat' using 1:2 with linespoints lt 2 pt 19 t \"internal\"" >> tmp.plt
	echo "load 'tmp.plt'" | gnuplot
#	rm tmp.plt tmp.dat	
}

aggSimVsK() {
	f1=$Ourmine/data/results/step/step_multiN_full.csv
	f2=$Ourmine/data/results/step/kmeansDimReduce/pcaKmeans.csv
	f3=$Ourmine/data/results/step/kmeansDimReduce/icaKmeans.csv
	f4=$Ourmine/data/results/step/kmeansDimReduce/fmKmeans.csv
	file=$Ourmine/data/results/step/graphs/$1

	numAtts=`cat $f1 | grep -v \# | cut -d, -f 5 | sort -n | uniq`
	(for num in $numAtts; do
		#external sim
		m1=`cat $f1 | grep genic | gawk 'BEGIN{FS=","} index($5,n)!=0  {print $8}' n=$num - | medianBounded 0 1`
		m2=`cat $f1 | grep kmeans | gawk 'BEGIN{FS=","} index($5,n)!=0 {print $8}' n=$num - | medianBounded 0 1`
		m3=`cat $f1 | grep canopy | gawk 'BEGIN{FS=","} index($5,n)!=0 {print $8}' n=$num - | medianBounded 0 1`

		#internal sim
		m4=`cat $f1 | grep genic | gawk 'BEGIN{FS=","} index($5,n)!=0 {print $9}' n=$num - | medianBounded 0 1`
		m5=`cat $f1 | grep kmeans | gawk 'BEGIN{FS=","} index($5,n)!=0 {print $9}' n=$num - | medianBounded 0 1`
		m6=`cat $f1 | grep canopy | gawk 'BEGIN{FS=","} index($5,n)!=0 {print $9}' n=$num - | medianBounded 0 1`
		blabln "attrCount=$num, m1 = $m1, m2 = $m2, m3 = $m3, m4 = $m4, m5 = $m5, m6 = $m6"
		line=${num}X${m1}X${m2}X${m3}X${m4}X${m5}X${m6}
		echo $line | sed 's/X/\t/g' | sed 's/k//g' | sed 's/_//g'
	done) | sort -n > full.dat

	aggSimVsK_helper $f2 > pca.dat
	aggSimVsK_helper $f3 > ica.dat
	aggSimVsK_helper $f4 > fm.dat

	echo "set terminal postscript" > tmp.plt
#	echo "set log x" >> tmp.plt
#	echo "set log y" >> tmp.plt
	echo "set autoscale x" >> tmp.plt
	echo "set autoscale y" >> tmp.plt
	echo "set xlabel \"N (number of attributes)\"" >> tmp.plt
	echo "set ylabel \"Similarity\"" >> tmp.plt
	echo "set output '$file'" >> tmp.plt
	echo "set title '$2 --- Attribute Count vs. Run-time'" >> tmp.plt
	echo "plot 'full.dat' using 1:2 with linespoints lt 1 pt 8  t \"genic: external\"," \
		"'full.dat' using 1:5 with linespoints lt 2 pt 8  t \"internal\"," \
		" 'full.dat' using 1:3 with linespoints lt 1 pt 4 t \"kmeans: external\"," \
		" 'full.dat' using 1:6 with linespoints lt 2 pt 4 t \"internal\"," \
		" 'full.dat' using 1:4 with linespoints lt 1 pt 6 t \"canopy: external\"," \
		" 'full.dat' using 1:7 with linespoints lt 2 pt 6 t \"internal\"," \
	 	" 'pca.dat' using 1:3 with linespoints lt 1 pt 21 t \"kmeans-PCA: external\"," \
		" 'pca.dat' using 1:2 with linespoints lt 2 pt 21 t \"internal\"," \
		" 'fm.dat' using 1:3 with linespoints lt 1 pt 16 t \"kmeans-FastMap: external\"," \
		" 'fm.dat' using 1:2 with linespoints lt 2 pt 16 t \"internal\""  >> tmp.plt
#		" 'ica.dat' using 1:3 with linespoints lt 1 pt 19 t \"kmeans-ICA: external\"," \
#		" 'ica.dat' using 1:2 with linespoints lt 2 pt 19 t \"internal\"" >> tmp.plt
	echo "load 'tmp.plt'" | gnuplot
#	rm tmp.plt tmp.dat	
}


aggSimVsN_helper(){
	numAtts=`cat $1 | grep -v \# | cut -d, -f 3 | sort -n | uniq`
	(for num in $numAtts; do
		#internal
		m1=`cat $1 | gawk 'BEGIN{FS=","} $3==n {print $7 >= 0 ? $7 : -$7}' n=$num - | medianBounded 0 1`
		#external
		m2=`cat $1 | gawk 'BEGIN{FS=","} $3==n {print $6 >= 0 ? $6 : -$6}' n=$num - | medianBounded 0 1`
		blabln "attrCount=$num, m1 = $m1, m2=$m2"
		line=${num}X${m1}X${m2}
		echo $line | sed 's/X/\t/g' | sed 's/n//g' | sed 's/_//g'
	done) | sort -n
}

aggSimVsK_helper(){
	numAtts=`cat $1 | grep -v \# | cut -d, -f 2 | sort -n | uniq`
	(for num in $numAtts; do
		#internal
		m1=`cat $1 | gawk 'BEGIN{FS=","} $2==n {print $7 >= 0 ? $7 : -$7}' n=$num - | medianBounded 0 1`
		#external
		m2=`cat $1 | gawk 'BEGIN{FS=","} $2==n {print $6 >= 0 ? $6 : -$6}' n=$num - | medianBounded 0 1`
		blabln "attrCount=$num, m1 = $m1, m2=$m2"
		line=${num}X${m1}X${m2}
		echo $line | sed 's/X/\t/g' | sed 's/n//g' | sed 's/_//g'
	done) | sort -n
}

aggRunTimeVsN() {
	f1=$Ourmine/data/results/step/step_multiN_full.csv
	f2=$Ourmine/data/results/step/kmeansDimReduce/pcaKmeans.csv
	f3=$Ourmine/data/results/step/kmeansDimReduce/icaKmeans.csv
	f4=$Ourmine/data/results/step/kmeansDimReduce/fmKmeans.csv
	file=$Ourmine/data/results/step/graphs/$1

	numAtts=`cat $f1 | grep -v \# | cut -d, -f 10 | sort -n | uniq`
	(for num in $numAtts; do
		m1=`cat $f1 | grep genic | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $7}' n=$num - | median`
		m2=`cat $f1 | grep kmeans | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $7}' n=$num - | median`
		m3=`cat $f1 | grep canopy | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $7}' n=$num - | median`
		blabln "attrCount=$num, m1 = $m1, m2 = $m2, m3 = $m3"
		line=${num}X${m1}X${m2}X${m3}
		echo $line | sed 's/X/\t/g' | sed 's/n//g' | sed 's/_//g'
	done) | sort -n > full.dat

	aggRunTimeVsN_helper $f2 > pca.dat
	aggRunTimeVsN_helper $f3 > ica.dat
	aggRunTimeVsN_helper $f4 > 	fm.dat

	echo "set terminal postscript" > tmp.plt
#	echo "set log x" >> tmp.plt
#	echo "set log y" >> tmp.plt
	echo "set autoscale x" >> tmp.plt
	echo "set autoscale y" >> tmp.plt
	echo "set xlabel \"N (number of attributes)\"" >> tmp.plt
	echo "set ylabel \"Run-time (seconds)\"" >> tmp.plt
	echo "set output '$file'" >> tmp.plt
	echo "set title '$2 --- Attribute Count vs. Run-time'" >> tmp.plt
	echo "plot 'full.dat' using 1:2 with linespoints lt 1 pt 8  t \"genic\"," \
		 "'full.dat' using 1:3 with linespoints lt 1 pt 4 t \"kmeans\"," \
		 "'full.dat' using 1:4 with linespoints lt 1 pt 6 t \"canopy\"," \
		 "'pca.dat' using 1:2 with linespoints lt 1 pt 21 t \"kmeans - PCA\"," \
		 "'fm.dat' using 1:2 with linespoints lt 1 pt 16 t \"kmeans - FastMap\""  >> tmp.plt
#		 "'ica.dat' using 1:2 with linespoints lt 1 pt 19 t \"kmeans - ICA\"," \
	echo "load 'tmp.plt'" | gnuplot
#	rm tmp.plt tmp.dat
}

aggRunTimeVsN_helper(){
	numAtts=`cat $1 | grep -v \# | cut -d, -f 3 | sort -n | uniq`
	(for num in $numAtts; do
		m2=`cat $1 | gawk 'BEGIN{FS=","} $3==n {print $5 >= 0 ? $5 : -$5}' n=$num - | median`
		blabln "attrCount=$num, m1 = $m1"
		line=${num}X${m2}
		echo $line | sed 's/X/\t/g' | sed 's/n//g' | sed 's/_//g'
	done) | sort -n
}

simVsK() {
	local data=$1
	file=$Ourmine/data/results/step/graphs/$2
	sizes=`cat $data | grep -v \# | cut -d, -f 5 | sort -n | uniq`
	(for size in $sizes; do 
		m1=`cat $data | grep genic | gawk 'BEGIN{FS=","} index($5,k)!=0 {print $8}' k=$size - | medianBounded 0 1`
		m2=`cat $data | grep kmeans | gawk 'BEGIN{FS=","} index($5,k)!=0 {print $8}' k=$size - | medianBounded 0 1`
		m3=`cat $data | grep canopy | gawk 'BEGIN{FS=","} index($5,k)!=0 {print $8}' k=$size - | medianBounded 0 1`
		m4=`cat $data | grep genic | gawk 'BEGIN{FS=","} index($5,k)!=0 {print $9}' k=$size - | medianBounded 0 1`
		m5=`cat $data | grep kmeans | gawk 'BEGIN{FS=","} index($5,k)!=0 {print $9}' k=$size - | medianBounded 0 1`
		m6=`cat $data | grep canopy | gawk 'BEGIN{FS=","} index($5,k)!=0 {print $9}' k=$size - | medianBounded 0 1`
		line=${size}X${m1}X${m2}X${m3}X${m4}X${m5}X${m6}
		echo $line | sed 's/X/\t/g' | sed 's/k//'
	done) | sort -n > tmp.dat

	echo "set terminal postscript " > tmp.plt
	echo "set autoscale x" >> tmp.plt
	echo "set yrange [0.01:1]" >> tmp.plt
	echo "set logscale y" >> tmp.plt
	echo "set xlabel \"k (number of clusters)\"" >> tmp.plt
	echo "set ylabel \"Similarlity\"" >> tmp.plt
	echo "set output '$file'" >> tmp.plt
	echo "set title '$3 - Similarity Vs. K'" >> tmp.plt
	echo "plot 'tmp.dat' using 1:2 with linespoints lt 1 pt 8  t \"genic: external\"," \
		"'tmp.dat' using 1:5 with linespoints lt 2 pt 8  t \"internal \n\"," \
		" 'tmp.dat' using 1:3 with linespoints lt 1 pt 4 t \"kmeans: external\"," \
		" 'tmp.dat' using 1:6 with linespoints lt 2 pt 4 t \"internal\"," \
		" 'tmp.dat' using 1:4 with linespoints lt 1 pt 6 t \"canopy: external\"," \
		" 'tmp.dat' using 1:7 with linespoints lt 2 pt 6 t \"internal\"" >> tmp.plt
	echo "load 'tmp.plt'" | gnuplot
	rm tmp.dat tmp.plt
}

kVsNVsSim() {
	local data=$1
	file="kVsNvsExternalSim"$3
	ks=`cat $data | grep -v \# | cut -d, -f 5 | sort -n | uniq`
	ns=`cat $data | grep -v \# | cut -d, -f 10 | sort -n | uniq`
	(for k in $ks; do
		for n in $ns; do
			m1=`cat $data | grep $n | grep $k | grep genic | cut -d, -f9 | medianBounded 0 1`
			m2=`cat $data | grep $n | grep $k | grep kmeans | cut -d, -f9 | medianBounded 0 1`
			m3=`cat $data | grep $n | grep $k | grep canopy | cut -d, -f9 | medianBounded 0 1`
			m4=`cat $data | grep $n | grep $k | grep genic | cut -d, -f8 | medianBounded 0 1`
			m5=`cat $data | grep $n | grep $k | grep kmeans | cut -d, -f8 | medianBounded 0 1`
			m6=`cat $data | grep $n | grep $k | grep canopy | cut -d, -f8 | medianBounded 0 1`
			line=${k}X${n}X${m1}X${m2}X${m3}X${m4}X${m5}X${m6}
			echo $line | sed 's/X/\t/g' | sed 's/\(n\|_\)//g' | sed 's/k//g'
		done
	done) | sort -n > tmp.dat

	echo "set terminal postscript " > tmp.plt
#	echo "set autoscale x" >> tmp.plt
#	echo "set autoscale y" >> tmp.plt
#	echo "set autoscale z" >> tmp.plt
#	echo "set zrange [0.01:1]" >> tmp.plt
#	echo "set logscale y" >> tmp.plt
	echo "set xlabel \"N (number of attributes)\"" >> tmp.plt
	echo "set ylabel \"K (number of clusters)\"" >> tmp.plt
	echo "set zlabel \"Similarlity\"" >> tmp.plt
	echo "set dgrid3d 30,30" >> tmp.plt
#	echo "set hidden3d" >> tmp.plt
	cp tmp.plt tmp_g.plt
	cp tmp.plt tmp_k.plt
	cp tmp.plt tmp_c.plt

	echo "set title '$2 - K vs N vs Similarity - genic'" >> tmp_g.plt
	f1=$file"_genic.ps"
	echo "set output '$f1'" >> tmp_g.plt
	echo "splot 'tmp.dat' u 1:2:3 with lines t \"genic: external\"," \
		"'tmp.dat' u 1:2:6 with lines lt 2 t \"internal\"" >> tmp_g.plt
	echo "load 'tmp_g.plt'" | gnuplot

	echo "set title '$2 - K vs N vs Similarity - kmeans'" >> tmp_k.plt
	f2=$file"_kmeans.ps"
	echo "set output '$f2'" >> tmp_k.plt
	echo "splot 'tmp.dat' u 1:2:4 with lines t \"kmeans: external\"," \
		"'tmp.dat' u 1:2:7 with lines lt 2 t \"internal\"" >> tmp_k.plt
	echo "load 'tmp_k.plt'" | gnuplot

	echo "set title '$2 - K vs N vs Similarity - canopy'" >> tmp_c.plt
	f3=$file"_canopy.ps"
	echo "set output '$f3'" >> tmp_c.plt
	echo "splot 'tmp.dat' u 1:2:5 with lines t \"canopy: external\"," \
		"'tmp.dat' u 1:2:8 with lines lt 2 t \"internal\"" >> tmp_c.plt
	echo "load 'tmp_c.plt'" | gnuplot

#	rm tmp.dat tmp.plt tmp_g.plt tmp_k.plt tmp_c.plt

}

simVsN() {
	local data=$1
	file=$Ourmine/data/results/step/graphs/$2
	sizes=`cat $data | grep -v \# | cut -d, -f 10 | sort -n | uniq`
	(for size in $sizes; do 
		m1=`cat $data | grep genic | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $8}' n=$size - | medianBounded 0 1`
		m2=`cat $data | grep kmeans | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $8}' n=$size - | medianBounded 0 1`
		m3=`cat $data | grep canopy | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $8}' n=$size - | medianBounded 0 1`
		m4=`cat $data | grep genic | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $9}' n=$size - | medianBounded 0 1`
		m5=`cat $data | grep kmeans | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $9}' n=$size - | medianBounded 0 1`
		m6=`cat $data | grep canopy | gawk 'BEGIN{FS=","} index($10,n)!=0 {print $9}' n=$size - | medianBounded 0 1`
		line=${size}X${m1}X${m2}X${m3}X${m4}X${m5}X${m6}
		echo $line | sed 's/X/\t/g' | sed 's/\(n\|_\)//g'
	done) | sort -n > tmp.dat

	echo "set terminal postscript " > tmp.plt
	echo "set autoscale x" >> tmp.plt
	echo "set yrange [0.01:1]" >> tmp.plt
	echo "set logscale y" >> tmp.plt
	echo "set xlabel \"N (number of attributes)\"" >> tmp.plt
	echo "set ylabel \"Similarlity\"" >> tmp.plt
	echo "set output '$file'" >> tmp.plt
	echo "set title '$3 - Similarity Vs. N'" >> tmp.plt
	echo "plot 'tmp.dat' using 1:2 with linespoints lt 1 pt 8  t \"genic: external\"," \
		"'tmp.dat' using 1:5 with linespoints lt 2 pt 8  t \"internal \n\"," \
		" 'tmp.dat' using 1:3 with linespoints lt 1 pt 4 t \"kmeans: external\"," \
		" 'tmp.dat' using 1:6 with linespoints lt 2 pt 4 t \"internal\"," \
		" 'tmp.dat' using 1:4 with linespoints lt 1 pt 6 t \"canopy: external\"," \
		" 'tmp.dat' using 1:7 with linespoints lt 2 pt 6 t \"internal\"" >> tmp.plt	echo "load 'tmp.plt'" | gnuplot
	rm tmp.dat tmp.plt
}

speedPlotsPerDS() {
	local data=$1
	local dataSets=`cat $data | grep -v \# | cut -d, -f1 | sort | uniq`
	for ds in $dataSets; do
		file=$Ourmine/data/results/step/graphs/'overall_runTime.png'
		sizes=`cat $data | cut -d, -f 5 | sort -n | uniq`
		for size in $sizes; do
			m1=`cat $data | grep genic | gawk 'BEGIN{FS=","} $5==k {print $7}' k=$size - | mean`
			m2=`cat $data | grep kmeans | gawk 'BEGIN{FS=","} $5==k {print $7}' k=$size - | mean`
			m3=`cat $data | grep canopy | gawk 'BEGIN{FS=","} $5==k {print $7}' k=$size - | mean`
			echo "size= $size, m1 = $m1, m2 = $m2, m3 = $m3"
			line=${size}X${m1}X${m2}X${m3}
			echo $line | sed 's/X/\t/' | sed 's/X/\t/' | sed 's/X/\t/' | sed 's/X/\t/' >> tmp.dat
		done
		echo "set terminal png " > tmp.plt
		echo "set log x" >> tmp.plt
		echo "set output '$file'" >> tmp.plt
		echo "plot 'tmp.dat' using 1:2 with lines lt 3  t \"genic\", 'tmp.dat' using 1:3 with lines lt 2 t \"kmeans\", 'tmp.dat' using 1:4 with lines lt 5 t \"canopy\"" >> tmp.plt
		echo "set title '$ds --- Run-time Comparison'" >> tmp.plt
		echo "load 'tmp.plt'" | gnuplot
		rm tmp.dat tmp.plt
		mv $file $Ourmine/data/runTimeGraphs/
	done
}


kVsNVsPureEnt() {
	local data=$1
	file="kVsNvsEntropyPurity"
	ks=`cat $data | grep -v \# | cut -d, -f 3 | sed 's/k//g' | sort -n | uniq`
	ns=`cat $data | grep -v \# | cut -d, -f 4 | sort -n | uniq`
	(for k in $ks; do
		for n in $ns; do
			ent=`cat $data | grep $n | grep "k"$k | grep genic | cut -d, -f6 | median`
			pur=`cat $data | grep $n | grep "k"$k | grep kmeans | cut -d, -f7 | median`
			line=${k}X${n}X${ent}X${pur}X
			echo $line | sed 's/X/\t/g' | sed 's/dim//g' | sed 's/k//g'
		done
	done) | sort -n > tmp.dat

	echo "set terminal postscript " > tmp.plt
#	echo "set autoscale x" >> tmp.plt
#	echo "set autoscale y" >> tmp.plt
#	echo "set autoscale z" >> tmp.plt
#	echo "set zrange [0.01:1]" >> tmp.plt
#	echo "set logscale y" >> tmp.plt
	echo "set xlabel \"N (number of attributes)\"" >> tmp.plt
	echo "set ylabel \"K (number of clusters)\"" >> tmp.plt
	echo "set zlabel \"Purity/Entropy\"" >> tmp.plt
	echo "set dgrid3d 30,30" >> tmp.plt
#	echo "set hidden3d" >> tmp.plt

	echo "set title '$2 - K vs N vs Purity'" >> tmp.plt
	f1=$file".ps"
	echo "set output '$f1'" >> tmp.plt
	echo "splot 'tmp.dat' u 1:2:3 with lines t \"entropy\"," \
		"'tmp.dat' u 1:2:4 with lines lt 2 t \"purity\"" >> tmp.plt
	echo "load 'tmp.plt'" | gnuplot


#	rm tmp.dat tmp.plt tmp_g.plt tmp_k.plt tmp_c.plt

}


naturalDatasetSimilarityGraph() {
	local file=$Safe/orig_sim_clean.csv
	local meths="tfidf pca fastmap"
	ns=`cat $file | grep -v \# | cut -d, -f 3 | sort -n | uniq`
	datas=`cat $file | grep -v \# | cut -d, -f 1 | sort -n | uniq`
	cd plot_data
	for data in $datas; do
		(for n in $ns; do
			line=${n}
			for m in $meths; do
				eSim=`cat $file | grep $n | grep $m | grep $data | cut -d, -f5 | median`
				iSim=`cat $file | grep $n | grep $m | cut -d, -f4 | median`
				line=${line}X${eSim}X${iSim}
			done
			echo $line | sed 's/X/\t/g' | sed 's/dim//g' | sed 's/k//g'
		done) | sort -n > $data.tmp.dat
	    local output=$data"_natural_sim"
	

		echo "set terminal postscript " > tmp.plt
		echo "set xrange [3:10000]" >> tmp.plt
		echo "set xtics 0,5" >> tmp.plt
		echo "set logscale x" >> tmp.plt
		echo "set xlabel \"N (number of attributes)\"" >> tmp.plt
		echo "set ylabel \"Internal & External Similarity\"" >> tmp.plt

		echo "set title 'Similarity of natural clusterings - $data'" >> tmp.plt
		echo "set output '$output.eps'" >> tmp.plt
		echo "plot '$data.tmp.dat' u 1:2 with linespoints lt -1 pt 6 t \"tfidf - external_sim\"," \
			 "'$data.tmp.dat' u 1:3 with linespoints lt -1 pt 4 t \"tfidf - internal_sim\"," \
			 "'$data.tmp.dat' u 1:4 with linespoints lt 0 pt 6 t \"pca - external_sim\"," \
			 "'$data.tmp.dat' u 1:5 with linespoints lt 0 pt 4 t  \"pca - internal_sim\"," \
			 "'$data.tmp.dat' u 1:6 with linespoints lt 1 pt 6 t \"fastmap - external_sim\"," \
			 "'$data.tmp.dat' u 1:7 with linespoints lt 1 pt 4 t \"fastmap - internal_sim\"" >> tmp.plt
		echo "load 'tmp.plt'" | gnuplot


	#	rm tmp.dat tmp.plt tmp_g.plt tmp_k.plt tmp_c.plt

	done

}


superClusterSplot() {
	local data=$1
	local title=$2
	local Xfield=$3
	local Xlabel=$4
	local Yfield=$5
	local Ylabel=$6
	local Zlabel=$7
	
	local count=7
	local numZs=0
	shift $count;
	while [ $# -gt 0 ]; do
		Zlabels[$numZs]=$1;
		Zfields[$numZs]=$2;
		shift 2
		numZs=$(( $numZs + 1 ))
	done
		
	xs=`cat $data | grep -v \# | cut -d, -f $Xfield  | sort -n | uniq`
	ys=`cat $data | grep -v \# | cut -d, -f $Yfield | sort -n | uniq`
	line=""
	( for x in $xs; do
		for y in $ys; do
			line=${x}X${y}	
			for i in `seq 0 $(($numZs - 1 ))`; do
				med=`cat $data | grep $x | grep $y | cut -d, -f ${Zfields[$i]} | median`
				line=${line}X${med}
			done
			echo $line | sed 's/X/\t/g' | sed 's/k//g' | sed 's/d//g' 
		done
	done ) | sort -n > tmp.dat

	echo "set terminal postscript eps enhanced color " > tmp.plt
#	echo "set autoscale x" >> tmp.plt
#	echo "set autoscale y" >> tmp.plt
#	echo "set autoscale z" >> tmp.plt
#	echo "set zrange [0.01:1]" >> tmp.plt
#	echo "set logscale y" >> tmp.plt
	echo "set xlabel \"$Xlabel\"" >> tmp.plt
	echo "set ylabel \"$Ylabel\"" >> tmp.plt
	echo "set zlabel \"$Zlabel\"" >> tmp.plt
	echo "set surface" >> tmp.plt
	echo "set dgrid3d 30,30" >> tmp.plt

	echo "set title '$title'" >> tmp.plt
	
	echo -n "splot 'tmp.dat' u 1:2:3 with lines lt 2 t \"${Zlabels[0]}\"" >> tmp.plt
	for i in `seq 1 $(( $numZs -1 ))`; do
		echo -n ", 'tmp.dat' u 1:2:$(($i + 3)) with lines lc $i lt $(($i+2)) t \"${Zlabels[$i]}\"" >> tmp.plt
	done
	echo "load 'tmp.plt'" | gnuplot
}

superClusterPlot() {
	local data=$1
	local title=$2
	local Xfield=$3
	local Xlabel=$4
	local Ylabel=$5
	
	local numZs=0
	shift 5;
	while [ $# -gt 0 ]; do
		Ylabels[$numZs]=$1;
		Yfields[$numZs]=$2;
		shift 2
		numZs=$(( $numZs + 1 ))
	done
		
	xs=`cat $data | grep -v \# | cut -d, -f $Xfield  | sort -n | uniq`
	line=""
	( for x in $xs; do
		line=${x}
		for i in `seq 0 $(($numZs - 1 ))`; do
			med=`cat $data | grep $x | cut -d, -f ${Yfields[$i]} | median`
			line=${line}X${med}
		done
		echo $line | sed 's/X/\t/g' | sed 's/k//g' | sed 's/d//g' 
	done ) | sort -n > tmp.dat

	echo "set terminal postscript " > tmp.plt
#	echo "set autoscale x" >> tmp.plt
#	echo "set autoscale y" >> tmp.plt
#	echo "set autoscale z" >> tmp.plt
#	echo "set zrange [0.01:1]" >> tmp.plt
#	echo "set logscale y" >> tmp.plt
	echo "set xlabel \"$Xlabel\"" >> tmp.plt
	echo "set ylabel \"$Ylabel\"" >> tmp.plt

	echo "set title '$title'" >> tmp.plt
	
	echo -n "plot 'tmp.dat' u 1:2 with lines lt 2 t \"${Ylabels[0]}\"" >> tmp.plt
	for i in `seq 1 $(( $numZs -1 ))`; do
		echo -n ", 'tmp.dat' u 1:$(($i + 2)) with lines lt $(($i+2)) t \"${Ylabels[$i]}\"" >> tmp.plt
	done
	echo "load 'tmp.plt'" | gnuplot
}


plot2D(){
	buildGnuplotDataFromArff $1 2 > tmp.dat
	echo "set terminal png " > tmp.plt
	echo "set output '$3'" >> tmp.plt
	echo "set title '$1 - $2'" >> tmp.plt
	echo "plot 'tmp.dat' with points lt 1 pt 12 t \"\"" >> tmp.plt;
	# echo "load 'save.plt'" >> tmp.plt;                                                                           
	echo "load 'tmp.plt" | gnuplot
}

buildGnuPlotDataFromArff() {
	gawk 'BEGIN{OFS="\t"} data {line=""; for(i=1; i<=dims) {line=line$i"\t"} print line } /@data/ {data=1}' dims=$2 $1 | sed 's/,/\t/g' 
}

buildGnuPlotDataFromSparff() {
gawk 'BEGIN{OFS="\t"} 
	  data {
		s=$0; gsub(/{/, "", s); gsub(/}/, "", s); split(s, ary, /,/);
		for(d in ary) {
			split(ary[d], d2, / /)
			line[d2[1]] = d2[2]
		}

		lineStr=""
		for(i = 0; i<dims ; i++) {
			if(line[i] == "")
				lineStr= lineStr 0 "\t"
			else
				lineStr= lineStr line[i] "\t"
		}
		print lineStr

		for(d in line)
			delete line[d]
	  } 
	  /@data/ {data=1}' dims=$2 $1 > tmp.dat;
}

plot3D(){
	
	buildGnuplotDataFromArff $1 3 > tmp.dat
	echo "set terminal png " > tmp.plt
	echo "set output '$3'" >> tmp.plt
	echo "set title '$1 - $2'" >> tmp.plt
	echo "splot 'tmp.dat' with points lt 1 pt 12 t \"\"" >> tmp.plt;
	# echo "load 'save.plt'" >> tmp.plt;                                                                           
	echo "load 'tmp.plt" | gnuplot
}


plot2D_sparff(){

	buildGnuplotDataFromSparff $1 2 > tmp.dat
echo "set terminal png " > tmp.plt;
echo "set output '$3'" >> tmp.plt;
echo "set title '$2'" >> tmp.plt;
echo "plot 'tmp.dat' with points lt 1 pt 12 t \"\"" >> tmp.plt;
echo "load 'tmp.plt" | gnuplot

	echo "set terminal png " > tmp.plt
	echo "set output '$3'" >> tmp.plt
	echo "set title '$1 - $2'" >> tmp.plt
	echo "plot 'tmp.dat' with points lt 1 pt 12 t \"\"" >> tmp.plt;
	# echo "load 'save.plt'" >> tmp.plt;                                                                           
	echo "load 'tmp.plt" | gnuplot
}

plot3D_sparff(){
		buildGnuplotDataFromSparff $1 3 > tmp.dat
	echo "set terminal png " > tmp.plt
	echo "set output '$3'" >> tmp.plt
	echo "set title '$1 - $2'" >> tmp.plt
	echo "splot 'tmp.dat' with points lt 1 pt 12 t \"\"" >> tmp.plt;
	# echo "load 'save.plt'" >> tmp.plt;                                                                           
	echo "load 'tmp.plt" | gnuplot
}

buildColoredClusterPlots() {
	local datas="ap203 ap214"
	local reducers="tfidf pca fastmap"
	local baseFile=~/Dropbox/data/step.csv
	
	for d in $datas; do
		for r in $reducers; do

			cat $baseFile | grep $d | grep $r

			#################################
			# NOT DONE YET
			#################################
		done
	done
}


buildGraphsForTim_reduction(){

	local datasets="ap203 ap214 [0-9a-zA-Z]"
	mkdir graphs_for_tim &> /dev/null
	cd graphs_for_tim

	#buildDataFileForPlot <dataFile> XfieldNo YfieldNo [YfieldNo2 YFieldNo3 ...
	local baseFile=$Ourmine/data/results/step/stepDimReduce/step_dimReduce_runTimes.csv

	for d in $datasets; do	
		mkdir $d &> /dev/null
		cd $d
	
		cat $baseFile | grep $d > $d.csv 


		cat $d.csv | grep pca > pca.csv
		cat $d.csv | grep tfidf > tfidf.csv
		cat $d.csv | grep fastmap > fastmap.csv

		buildDataFileForPlot pca.csv 3 4 | uniq > pcaN.dat
		buildDataFileForPlot tfidf.csv 3 4 | uniq > tfidfN.dat
		buildDataFileForPlot fastmap.csv 3 4 | uniq > fastmapN.dat
	
		echo "set terminal postscript eps 'Helvetica' 20" > buildNplot.plt
		echo "set size 0.5, 0.5" >> buildNplot.plt
		echo "set xtics 0,20" >> buildNplot.plt
		echo "set logscale y" >> buildNplot.plt
		echo "set key bottom right" >> buildNplot.plt
		echo "set xlabel \"N (number of features)\"" >> buildNplot.plt
		echo "set ylabel \"Runtime (seconds)\"" >> buildNplot.plt
		echo "set title '$d - Dimension Reduction - Number of Dimensions vs Runtimes'" >> buildNplot.plt
		echo "set output 'step_reduction_runtimes_N.eps'" >> buildNplot.plt
	
		echo -n "plot 'pcaN.dat' u 1:2 with linesp 0 0 t \"PCA\"" >> buildNplot.plt
		echo -n ", 'tfidfN.dat' u 1:2 with linesp 1 0 t \"TFIDF\"" >> buildNplot.plt
		echo -n ", 'fastmapN.dat' u 1:2 with linesp 1 1 t \"Fastmap\"" >> buildNplot.plt
		echo "load 'buildNplot.plt'" | gnuplot
		
		ps2pdf step_reduction_runtimes_N.eps

		rm $d.csv
	
		cd ..
	done
}

buildGraphsForTim_clustering(){

	local datasets="ap214"
	mkdir graphs_for_tim_clust &> /dev/null
	cd graphs_for_tim_clust

	#buildDataFileForPlot <dataFile> XfieldNo YfieldNo [YfieldNo2 YFieldNo3 ...
	local baseFile=~/Dropbox/data/step.csv

	for d in $datasets; do	
		mkdir $d &> /dev/null
		cd $d
	
		cat $baseFile | grep $d > $d.csv 


		cat $d.csv | grep genic > genic.csv
		cat $d.csv | grep kmeans > kmeans.csv
		cat $d.csv | grep canopy > canopy.csv

		buildDataFileForPlot genic.csv 5 8 | uniq > genicN.dat
		buildDataFileForPlot kmeans.csv 5 8 | uniq > kmeansN.dat
		buildDataFileForPlot canopy.csv 5 8 | uniq > canopyN.dat
	
		echo "set terminal postscript eps 'Helvetica' 20" > buildNplot_clust.plt
		echo "set size 1, 1" >> buildNplot_clust.plt
		echo "set xtics 0,20" >> buildNplot_clust.plt
		echo "set logscale y" >> buildNplot_clust.plt
		echo "set xlabel \"N (number of features)\"" >> buildNplot_clust.plt
		echo "set ylabel \"Runtime (seconds)\"" >> buildNplot_clust.plt
		echo "set title '$d - Clustering - Number of Dimensions vs Runtimes'" >> buildNplot_clust.plt
		echo "set output 'step_cluster_runtimes_N.eps'" >> buildNplot_clust.plt
	
		echo -n "plot 'genicN.dat' u 1:2 with lines lt -1 t \"GenIc\"" >> buildNplot_clust.plt
		echo -n ", 'kmeansN.dat' u 1:2 with lines lt 2 t \"K-Means\"" >> buildNplot_clust.plt
		echo -n ", 'canopyN.dat' u 1:2 with lines lt 1 t \"Canopy\"" >> buildNplot_clust.plt
		echo "load 'buildNplot_clust.plt'" | gnuplot
		ps2pdf step_cluster_runtimes_N.eps

		buildDataFileForPlot genic.csv 6 8 | uniq > genicK.dat
		buildDataFileForPlot kmeans.csv 6 8 | uniq > kmeansK.dat
		buildDataFileForPlot canopy.csv 6 8 | uniq > canopyK.dat
	
		echo "set terminal postscript eps 'Helvetica' 20" > buildKplot_clust.plt
		echo "set size 1, 1" >> buildKplot_clust.plt
		echo "set logscale y" >> buildKplot_clust.plt
		echo "set xlabel \"K (number of clusters)\"" >> buildKplot_clust.plt
		echo "set ylabel \"Runtime (seconds)\"" >> buildKplot_clust.plt
		echo "set title '$d - Clustering - Number of Clusters vs Runtimes'" >> buildKplot_clust.plt
		echo "set output 'step_cluster_runtimes_K.eps'" >> buildKplot_clust.plt
	
		echo -n "plot 'genicK.dat' u 1:2 with lines lt -1 t \"GenIc\"" >> buildKplot_clust.plt
		echo -n ", 'kmeansK.dat' u 1:2 with lines lt 2 t \"K-Means\"" >> buildKplot_clust.plt
		echo -n ", 'canopyK.dat' u 1:2 with lines lt 1 t \"Canopy\"" >> buildKplot_clust.plt
		echo "load 'buildKplot_clust.plt'" | gnuplot
		ps2pdf step_cluster_runtimes_K.eps
	
		cd ..
	done
}


buildOverlappingSplots() {

	datas="bbcsports"
	reductionMethods="pca tfidf fastmap"
	clusterers="kmeans canopy genic"
	local metrics=( InternalSimilarity ExternalSimilarity Purity )
	local fields=( 10 9 12 )

	mkdir plot_data
	cd plot_data

	for data in $datas; do
		for i in `seq 0 2`; do
			metric=${metrics[$i]}
			field=${fields[$i]}

			base=$data"_"$metric"_3d.tmp"
			name=$data"_"$metric"_3d.eps"

			echo "set terminal postscript eps enhanced color" > $base.plt
	#			echo "set logscale x" >> $base.plt
	#			echo "set xrange [3:10000]" >> $base.plt
			echo "set xlabel \"N (number of features)\"" >> $base.plt
			echo "set ylabel \"K (number of clusters)\"" >> $base.plt
			echo "set zlabel \"$metric\"" >> $base.plt
			echo "set title 'Trade offs of Dimensionality Vs Number of Clusters Vs. $metric" >> $base.plt
			echo "set output \"$name\"" >> $base.plt
			echo "set surface" >> $base.plt
			echo "set dgrid3d 30,30" >> $base.plt
			echo "set hidden3d" >> $base.plt
			echo "set isosample 40" >> $base.plt

			j=0
			echo -n "" > tmp.file
			for c in $clusterers; do
				for r in $reductionMethods; do
					echo "$metric  -  $c  -  $r "
					cat ~/Dropbox/data/$data.csv_new.csv | grep $c | grep $r > $base.$c.$r.1
					buildDataFileForSplot $base.$c.$r.1 5 6 $field > $base.$c.$r.2
					echo "'$base.$c.$r.2' u 1:2:3 with lines lc $j t \"$c-$r\"" >> tmp.file
					j=$(( $j + 1 ))
				done
			done


			cat tmp.file | gawk ' BEGIN{ORS=" " } NR==1 { print "splot " $0; next }
										{ print ", " $0;} ' - > splotfile

			cat splotfile >> $base.plt

			echo "load '$base.plt'" | gnuplot

			echo "$metric done"
		done	
	done

}


buildGraphs(){

	simVsK $Ourmine/data/results/step/step_multiN_full.csv simVsK.png "STEP Datasets"
	simVsN $Ourmine/data/results/step/step_multiN_full.csv simVsN.png "STEP Datasets"
	runTimeVsK $Ourmine/data/results/step/step_multiN_full.csv runTimeVsK.png "STEP Datasets"
	runTimeVsN $Ourmine/data/results/step/step_multiN_full.csv runTimeVsN.png "STEP Datasets"

	simVsK $Ourmine/data/results/step/step_multiN_ap203.csv simVsK_ap203.png "AP 203"
	simVsN $Ourmine/data/results/step/step_multiN_ap203.csv simVsN_ap203.png "AP 203"
	runTimeVsK $Ourmine/data/results/step/step_multiN_ap203.csv runTimeVsK_ap203.png "AP 203"
	runTimeVsN $Ourmine/data/results/step/step_multiN_ap203.csv runTimeVsN_ap203.png "AP 203"

	simVsK $Ourmine/data/results/step/step_multiN_ap214.csv simVsK_ap214.png "AP 214"
	simVsN $Ourmine/data/results/step/step_multiN_ap214.csv simVsN_ap214.png "AP 214"
	runTimeVsK $Ourmine/data/results/step/step_multiN_ap214.csv runTimeVsK_ap214.png "AP 214"
	runTimeVsN $Ourmine/data/results/step/step_multiN_ap214.csv runTimeVsN_ap214.png "AP 214"

	
}

makeLatex() {

( 
echo "\documentclass[12pt]{article}"
echo "\usepackage{graphicx}"

echo "\begin{document}"

echo "\begin{center}"
for file in `ls | grep "$1"`; do
	echo "\includegraphics{$file}"
done 
echo "\end{center}"
echo "\end{document}" ) > $2

}

buildDataFileForPlot() {
	#buildDataFileForPlot <dataFile> XfieldNo YfieldNo [YfieldNo2 YFieldNo3 ...]
	local data=$1
	local Xfield=$2
	local Yfields[1]=$3
	
	shift 3;
	local numYs=1
	while [ $# -gt 0 ]; do
		numYs=$(( $numYs + 1 ))
		Yfields[$numYs]=$1;
		shift 1
	done
		
	xs=`cat $data | grep -v \# | cut -d, -f $Xfield | sort -n | uniq`
	line=""
	( for x in $xs; do
		line=${x}
		for i in `seq 1 $numYs`; do
			med=`cat $data | grep $x | cut -d, -f ${Yfields[$i]} | median`
			line=${line}X${med}
		done
		echo $line | sed 's/X/\t/g' | sed 's/k//g' | sed 's/d//g' 
	done ) | sort -n
}

buildDataFileForSplot() {
	#buildDataFileForSplot <dataFile> XfieldNo YfieldNo ZfieldNo
	local data=$1
	local Xfield=$2
	local Yfield=$3
	local Zfield=$4
	
	xs=`cat $data | grep -v \# | cut -d, -f $Xfield | sort -n | uniq`
	ys=`cat $data | grep -v \# | cut -d, -f $Yfield | sort -n | uniq`
	line=""
	( for x in $xs; do
		for y in $ys; do
			med=`cat $data | grep $x | grep $y | cut -d, -f $Zfield | median`
			line=${x}X${y}X${med}
			echo $line | sed 's/X/\t/g' | sed 's/k//g' | sed 's/d//g' 
		done
	done ) | sort -n
}


plotSuperClusterRunTime() {
	local datasets="bbcsports ngBias3"
	local clusterers="kmeans genic canopy"
	local reducers=( tfidf pca fastmap )
	local lts=( -1 2 1 )

	mkdir plot_data

	for d in $datasets; do
		local file=~/Dropbox/data/$d.csv

		for c in $clusterers; do

			base="plot_data/"$d"_"$c".tmp"

			name=plot_data/$c"_"$d"_"$1.eps
			echo "set terminal postscript " > $base.plt
			echo "set logscale x" >> $base.plt
			echo "set xtics 0,5" >> $base.plt
			echo "set xrange [3:1500]" >> $base.plt
#			echo "set xlabel \"N (number of attributes)\"" >> $base.plt
			echo "set xlabel \"K (number of clusters)\"" >> $base.plt
			echo "set title 'Trade offs of Runtime Vs Cluster Validity assessments with $c in $d'" >> $base.plt
			echo "set output \"$name\"" >> $base.plt

			echo -n "" > $base.all.rt.dat
			for i in `seq 0 2` do
				r=${reducers[$i]}
				lt=${lts[$i]}

				cat $file | grep $c | grep $r > $base.$r.csv
											  #k rt is  pur
				buildDataFileForPlot $base.$r.csv 6 8  10  12 > $base.$r.dat
				cat $base.$r.dat | gawk ' {print r"\t"$0}' r=$r - | sed 's/\t/,/g' | cut -d, -f 1,2,3 >> $base.all.rt.dat
				
			done


			#normalize runtimes
			cat $base.all.rt.dat | cut -d, -f 3 | normalize > $base.rt.norm
			gawk 'BEGIN {FS=",";} NR==FNR  { a[FNR]=$0; next}
									     { print $1", "$2", "a[FNR];}' "$base.rt.norm" "$base.all.rt.dat" > $base.rt.norm.dat

			for i in `seq 0 2`; do
				r=${reducers[$i]}
				lt=${lts[$i]}

				cat $base.rt.norm.dat | grep $r | cut -d, -f 2,3 | sed 's/, /\t/g' > $base.$r.rt
	
				local metrics=( RunTime InternalSimilarity Purity )
				local pts=( 7 4 2 )
				start=0
				if [[ $i == 0 ]]; then
					echo -n "plot '$base.$r.rt' u 1:2 with linespoints pt 7 lt $lt t \"Runtime - $r\"" >> $base.plt
					start=1
				fi
				for j in `seq $start 2`; do
					m=${metrics[$j]}
					pt=${pts[$j]}
					if [[ $j == 0 ]]; then
						echo -n ", '$base.$r.rt' u 1:2 with linespoints pt $pt lt $lt t \"$m - $r\"" >> $base.plt
					else
						echo -n ", '$base.$r.dat' u 1:$(($j + 2)) with linespoints pt $pt lt $lt t \"$m - $r\"" >> $base.plt
					fi
				done
			done
		echo "load '$base.plt'" | gnuplot
		done
	done

	cd plot_data
	makeLatex "\.eps" $1".tex"
	echo "	dvipdf $1".dvi""
	dvipdf $1".dvi"
	mv $1".pdf" ../
	cd ..
}

plotSuperClusterRunTime3d() {
	local datasets="bbcsports"
	local clusterers="kmeans genic canopy"
	local reducers=( tfidf pca fastmap )

	local metrics=( InternalSimilarity ExternalSimilarity Purity )
	local lts=( -1 2 1 )

	mkdir plot_data

	for metric in $metrics; do

		base="plot_data/$d"_"$metric""_3d.tmp"

		name=plot_data/$d"_"$metric"_3d.eps"
		echo "set terminal postscript eps enhanced color" > $base.plt
#			echo "set logscale x" >> $base.plt
#			echo "set xrange [3:10000]" >> $base.plt
		echo "set xlabel \"N (number of features)\"" >> $base.plt
		echo "set ylabel \"K (number of clusters)\"" >> $base.plt
		echo "set title 'Trade offs of Dimensionality Vs Number of Clusters Vs. $metric'" >> $base.plt
		echo "set output \"$name\"" >> $base.plt

		for d in $datasets; do
			local file=~/Dropbox/data/$d.csv_new.csv

			for c in $clusterers; do


				for i in `seq 0 2`; do
					r=${reducers[$i]}
					lt=${lts[$i]}

					cat $file | grep $c | grep $r > $base.csv
												  #k rt is es ent pur
					buildDataFileForPlot $base.csv 5 8  10  9 11  12 > $base.$r.dat

					#normalize runtimes
					cat $base.$r.dat | gawk 'BEGIN{FS="\t"} { print $2 } ' - | normalize > $base.rt.norm
					gawk 'BEGIN {FS="\t"; OFS="\t"} NR==FNR  { a[FNR]=$1; next}
															 { $2=a[FNR]; print $0 }' "$base.rt.norm" "$base.$r.dat" > $base.norm.dat
					mv $base.norm.dat $base.$r.dat

	
					local metrics=( RunTime InternalSimilarity ExternalSimilarity Entropy Purity )
					local pts=(7 4 6 2 12 )
					start=0
					if [[ $i == 0 ]]; then
						echo -n "plot '$base.$r.dat' u 1:2 with linespoints pt 7 lt $lt t \"Runtime - $r\"" >> $base.plt
						start=1
					fi
					for j in `seq 0 4`; do
						m=${metrics[$j]}; 
						pt=${pts[$j]}
						echo -n ", '$base.$r.dat' u 1:$(($j + 2)) with linespoints pt $pt lt $lt t \"$m - $r\"" >> $base.plt
					done			
				done
			done
			echo "load '$base.plt'" | gnuplot
		done
	done

#	cd plot_data
#	makeLatex "\.eps" $1".tex"
#	echo "	dvipdf $1".dvi
#	dvipdf $1".dvi"
#	mv $1".pdf" ../
#	cd ../

}

plotSuperCluster() {
	local dataset=$2

	cat $1 | grep tfidf | grep genic > tmp_genicTfidf.csv
	cat $1 | grep fastmap | grep genic > tmp_genicFastmap.csv
	cat $1 | grep pca | grep genic > tmp_genicPCA.csv
	cat $1 | grep tfidf | grep kmeans > tmp_kmeansTfidf.csv
	cat $1 | grep fastmap | grep kmeans > tmp_kmeansFastmap.csv
	cat $1 | grep pca | grep kmeans > tmp_kmeansPCA.csv
	cat $1 | grep tfidf | grep canopy > tmp_canopyTfidf.csv
	cat $1 | grep fastmap | grep canopy > tmp_canopyFastmap.csv
	cat $1 | grep pca | grep canopy > tmp_canopyPCA.csv

	cat $1 | grep genic > tmp_genic.csv
	cat $1 | grep canopy > tmp_canopy.csv
	cat $1 | grep kmeans > tmp_kmeans.csv

	cat $1 | grep fastmap > tmp_Fastmap.csv
	cat $1 | grep pca > tmp_PCA.csv
	cat $1 | grep tfidf > tmp_Tfidf.csv


	#plots
	#N vs Validity
	#each treatment with clust validity (both sims, purity, and entropy) on Y
	superClusterPlot tmp_genicTfidf.csv "Genic_with_TFIDFK_vs_N_vs_Inter/Intra_Sim" 5 "NumDimensions" "VariousValidityMeasures" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_genic_tfidf_NvsClustValidity.ps"
	superClusterPlot tmp_genicFastmap.csv "Genic_with_Fastmap_-_N_vs_Inter/Intra_Sim" 5 "NumDimensions" "VariousValidityMeasures" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_genic_fastmap_NvsClustValidity.ps"
	superClusterPlot tmp_genicPCA.csv "Genic_with_PCA_-_N_vs_Inter/Intra_Sim" 5 "NumDimensions" "VariousValidityMeasures" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_genic_pca_NvsClustValidity.ps"
	superClusterPlot tmp_kmeansTfidf.csv "Kmeans_with_TFIDF_-_N_vs_Inter/Intra_Sim" 5 "NumDimensions" "VariousValidityMeasures" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_kmeans_tfidf_NvsClustValidity.ps"
	superClusterPlot tmp_kmeansFastmap.csv "Kmeans_with_Fastmap_-_N_vs_Inter/Intra_Sim" 5 "NumDimensions" "VariousValidityMeasures" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_kmeans_fastmap_NvsClustValidity.ps"
	superClusterPlot tmp_kmeansPCA.csv "Kmeans_with_PCA_-_N_vs_Inter/Intra_Sim" 5 "NumDimensions" "VariousValidityMeasures" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_kmeans_pca_NvsClustValidity.ps"
	superClusterPlot tmp_canopyTfidf.csv "Canopy_with_TFIDF_-_N_vs_Inter/Intra_Sim" 5 "NumDimensions" "VariousValidityMeasures" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_canopy_tfidf_NvsClustValidity.ps"
	superClusterPlot tmp_canopyFastmap.csv "Canopy_with_Fastmap_-_N_vs_Inter/Intra_Sim" 5 "NumDimensions" "VariousValidityMeasures" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_canopy_fastmap_NvsClustValidity.ps"
	superClusterPlot tmp_canopyPCA.csv "Canopy_with_PCA_-_N_vs_Inter/Intra_Sim" 5 "NumDimensions" "VariousValidityMeasures" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_canopy_pca_NvsClustValidity.ps"
	
	#K vs Validity
	superClusterPlot tmp_genicTfidf.csv "Genic_with_TFIDFK_vs_N_vs_Entropy/Purity" 5 "NumDimensions" "VariousValidityMeasures" "Entropy" 11 "Purity" 12    "Entropy" 11 "Purity" 12  > $dataset"_genic_tfidf_KvsEntropyPurity.ps"
	superClusterPlot tmp_genicFastmap.csv "Genic_with_Fastmap_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" "VariousValidityMeasures" "Entropy" 11 "Purity" 12   "Entropy" 11 "Purity" 12   > $dataset"_genic_fastmap_KvsEntropyPurity.ps"
	superClusterPlot tmp_genicPCA.csv "Genic_with_PCA_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" "VariousValidityMeasures" "Entropy" 11 "Purity" 12   "Entropy" 11 "Purity" 12   > $dataset"_genic_pca_KvsEntropyPurity.ps"
	superClusterPlot tmp_kmeansTfidf.csv "Kmeans_with_TFIDF_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" "VariousValidityMeasures" "Entropy" 11 "Purity" 12   "Entropy" 11 "Purity" 12   > $dataset"_kmeans_tfidf_KvsEntropyPurity.ps"
	superClusterPlot tmp_kmeansFastmap.csv "Kmeans_with_Fastmap_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" "VariousValidityMeasures" "Entropy" 11 "Purity" 12   "Entropy" 11 "Purity" 12   > $dataset"_kmeans_fastmap_KvsEntropyPurity.ps"
	superClusterPlot tmp_kmeansPCA.csv "Kmeans_with_PCA_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" "VariousValidityMeasures" "Entropy" 11 "Purity" 12  "Entropy" 11 "Purity" 12    > $dataset"_kmeans_pca_KvsEntropyPurity.ps"
	superClusterPlot tmp_canopyTfidf.csv "Canopy_with_TFIDF_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" "VariousValidityMeasures" "Entropy" 11 "Purity" 12  "Entropy" 11 "Purity" 12    > $dataset"_canopy_tfidf_KvsEntropyPurity.ps"
	superClusterPlot tmp_canopyFastmap.csv "Canopy_with_Fastmap_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" "VariousValidityMeasures" "Entropy" 11 "Purity" 12  "Entropy" 11 "Purity" 12    > $dataset"_canopy_fastmap_KvsEntropyPurity.ps"
	superClusterPlot tmp_canopyPCA.csv "Canopy_with_PCA_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" "VariousValidityMeasures" "Entropy" 11 "Purity" 12   "Entropy" 11 "Purity" 12   > $dataset"_canopy_pca_KvsEntropyPurity.ps"

	#splots
	#similarity
	superClusterSplot tmp_genicTfidf.csv "Genic_with_TFIDFK_vs_N_vs_Inter/Intra_Sim" 5 "NumDimensions" 6 "NumClusters" "Similarity" "InternalSim" 9 "ExternalSim" 10  > $dataset"_genic_tfidf_3d_KvNvsBothSims.ps"
	superClusterSplot tmp_genicFastmap.csv "Genic_with_Fastmap_-_K_vs_N_vs_Inter/Intra_Sim" 5 "NumDimensions" 6 "NumClusters" "Similarity" "InternalSim" 9 "ExternalSim" 10  > $dataset"_genic_fastmap_3d_KvNvsBothSims.ps"
	superClusterSplot tmp_genicPCA.csv "Genic_with_PCA_-_K_vs_N_vs_Inter/Intra_Sim" 5 "NumDimensions" 6 "NumClusters" "Similarity" "InternalSim" 9 "ExternalSim" 10  > $dataset"_genic_pca_3d_KvNvsBothSims.ps"
	superClusterSplot tmp_kmeansTfidf.csv "Kmeans_with_TFIDF_-_K_vs_N_vs_Inter/Intra_Sim" 5 "NumDimensions" 6 "NumClusters" "Similarity" "InternalSim" 9 "ExternalSim" 10  > $dataset"_kmeans_tfidf_3d_NvsBothSims.ps"
	superClusterSplot tmp_kmeansFastmap.csv "Kmeans_with_Fastmap_-_K_vs_N_vs_Inter/Intra_Sim" 5 "NumDimensions" 6 "NumClusters" "Similarity" "InternalSim" 9 "ExternalSim" 10  > $dataset"_kmeans_fastmap_3d_KvNvsBothSims.ps"
	superClusterSplot tmp_kmeansPCA.csv "Kmeans_with_PCA_-_K_vs_N_vs_Inter/Intra_Sim" 5 "NumDimensions" 6 "NumClusters" "Similarity" "InternalSim" 9 "ExternalSim" 10  > $dataset"_kmeans_pca_3d_KvNvsBothSims.ps"
	superClusterSplot tmp_canopyTfidf.csv "Canopy_with_TFIDF_-_K_vs_N_vs_Inter/Intra_Sim" 5 "NumDimensions" 6 "NumClusters" "Similarity" "InternalSim" 9 "ExternalSim" 10  > $dataset"_canopy_tfidf_3d_KvNvsBothSims.ps"
	superClusterSplot tmp_canopyFastmap.csv "Canopy_with_Fastmap_-_K_vs_N_vs_Inter/Intra_Sim" 5 "NumDimensions" 6 "NumClusters" "Similarity" "InternalSim" 9 "ExternalSim" 10  > $dataset"_canopy_fastmap_3d_KvNvsBothSims.ps"
	superClusterSplot tmp_canopyPCA.csv "Canopy_with_PCA_-_K_vs_N_vs_Inter/Intra_Sim" 5 "NumDimensions" 6 "NumClusters" "Similarity" "InternalSim" 9 "ExternalSim" 10  > $dataset"_canopy_pca_3d_KvNvsBothSims.ps"

	#purity/entropy
	superClusterSplot tmp_genicTfidf.csv "Genic_with_TFIDFK_vs_N_vs_Entropy/Purity" 5 "NumDimensions" 6 "NumClusters" "Entropy/Purity" "Entropy" 11 "Purity" 12  > $dataset"_genic_tfidf_3d_KvNvsEntropyPurity.ps"
	superClusterSplot tmp_genicFastmap.csv "Genic_with_Fastmap_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" 6 "NumClusters" "Entropy/Purity" "Entropy" 11 "Purity" 12  > $dataset"_genic_fastmap_3d_KvNvsEntropyPurity.ps"
	superClusterSplot tmp_genicPCA.csv "Genic_with_PCA_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" 6 "NumClusters" "Entropy/Purity" "Entropy" 11 "Purity" 12  > $dataset"_genic_pca_3d_KvNvsEntropyPurity.ps"
	superClusterSplot tmp_kmeansTfidf.csv "Kmeans_with_TFIDF_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" 6 "NumClusters" "Entropy/Purity" "Entropy" 11 "Purity" 12  > $dataset"_kmeans_tfidf_3d_NvsEntropyPurity.ps"
	superClusterSplot tmp_kmeansFastmap.csv "Kmeans_with_Fastmap_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" 6 "NumClusters" "Entropy/Purity" "Entropy" 11 "Purity" 12  > $dataset"_kmeans_fastmap_3d_KvNvsEntropyPurity.ps"
	superClusterSplot tmp_kmeansPCA.csv "Kmeans_with_PCA_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" 6 "NumClusters" "Entropy/Purity" "Entropy" 11 "Purity" 12  > $dataset"_kmeans_pca_3d_KvNvsEntropyPurity.ps"
	superClusterSplot tmp_canopyTfidf.csv "Canopy_with_TFIDF_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" 6 "NumClusters" "Entropy/Purity" "Entropy" 11 "Purity" 12  > $dataset"_canopy_tfidf_3d_KvNvsEntropyPurity.ps"
	superClusterSplot tmp_canopyFastmap.csv "Canopy_with_Fastmap_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" 6 "NumClusters" "Entropy/Purity" "Entropy" 11 "Purity" 12  > $dataset"_canopy_fastmap_3d_KvNvsEntropyPurity.ps"
	superClusterSplot tmp_canopyPCA.csv "Canopy_with_PCA_-_K_vs_N_vs_Entropy/Purity" 5 "NumDimensions" 6 "NumClusters" "Entropy/Purity" "Entropy" 11 "Purity" 12  > $dataset"_canopy_pca_3d_KvNvsEntropyPurity.ps"


	superClusterPlot tmp_kmeans.csv "All_Kmeans_-_K_vs_vs_ClusterValidity" 6 "NumClusters"  "ClusterValidity" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12 > $dataset"_kmeans_2d_KvNvsEntropyPurity.ps"
	superClusterPlot tmp_canopy.csv "All_Canopy_-_K_vs_vs_ClusterValidity" 6 "NumClusters" "ClusterValidity"  "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_canopy_2d_KvNvsEntropyPurity.ps"
	superClusterPlot tmp_genic.csv "All_Genic_-_K_vs_vs_ClusterValidity" 6 "NumClusters" "ClusterValidity" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_genic_2d_KvNvsEntropyPurity.ps"
	superClusterPlot tmp_Tfidf.csv "All_TFIDF_-_K_vs_vs_ClusterValidity" 6 "NumClusters" "ClusterValidity" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_tfidf_2d_KvNvsEntropyPurity.ps"
	superClusterPlot tmp_PCA.csv "All_PCA_-_K_vs_vs_ClusterValidity" 6 "NumClusters" "ClusterValidity" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12   > $dataset"_pca_2d_KvNvsEntropyPurity.ps"
	superClusterPlot tmp_Fastmap.csv "All_Fastmap_-_K_vs_vs_ClusterValidity" 6 "NumClusters" "ClusterValidity"  "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12 > $dataset"_fastmap_2d_KvNvsEntropyPurity.ps"


	superClusterPlot tmp_kmeans.csv "All_Kmeans_-_N_vs_vs_ClusterValidity" 5 "NumDimensions"  "ClusterValidity" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12 > $dataset"_kmeans_2d_KvNvsEntropyPurity.ps"
	superClusterPlot tmp_canopy.csv "All_Canopy_-_N_vs_vs_ClusterValidity" 5 "NumDimensions" "ClusterValidity"  "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_canopy_2d_KvNvsEntropyPurity.ps"
	superClusterPlot tmp_genic.csv "All_Genic_-_N_vs_vs_ClusterValidity" 5 "NumDimensions" "ClusterValidity" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_genic_2d_KvNvsEntropyPurity.ps"
	superClusterPlot tmp_Tfidf.csv "All_TFIDF_-_N_vs_vs_ClusterValidity" 5 "NumDimensions" "ClusterValidity" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12  > $dataset"_tfidf_2d_KvNvsEntropyPurity.ps"
	superClusterPlot tmp_PCA.csv "All_PCA_-_N_vs_vs_ClusterValidity" 5 "NumDimensions" "ClusterValidity" "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12   > $dataset"_pca_2d_KvNvsEntropyPurity.ps"
	superClusterPlot tmp_Fastmap.csv "All_Fastmap_-_N_vs_vs_ClusterValidity" 5 "NumDimensions" "ClusterValidity"  "InternalSim" 9 "ExternalSim" 10  "Entropy" 11 "Purity" 12 > $dataset"_fastmap_2d_KvNvsEntropyPurity.ps"


}