#!/usr/bin/gawk -f ######################################################################## # static locomo : row pruning method using nearest neighbor and local calibration with fixed neighborhoods # Copyright (C) 2007 Omid Jalali # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . ######################################################################## BEGIN{ FS=","; Neighborhood=10; Ignore="?"; } Pass==1{ TotalFields=NF-1; RecordsRead=1; for (j=1; j<=TotalFields; j++) { Max[j]=-10^20; Min[j]=10^20; Core[j]=$j; } } Pass==2{ for (j=1; j<=TotalFields; j++) { Input[NR-1,j]=$j; if ($j!=Ignore && Max[j]<$j) Max[j]=$j; if ($j!=Ignore && Min[j]>$j) Min[j]=$j; } RecordsRead++; } Pass==3{ Current=NR-RecordsRead; Distance[Current]=0; for (j=1; j<=TotalFields; j++) { if (Input[Current,j]!=Ignore) Distance[Current]+=( (Input[Current,j]-Core[j]) / (Max[j]-Min[j]+10^(-20)) )^2; } Records[$0]=Distance[Current]; } END{ Total=asort(Distance); for(n=1; n<=Neighborhood && n<=Total; n++) { for(m in Records) { if (Records[m]==Distance[n]) { print m; Records[m]="printed"; } } } }