library(foreign) require(stats) setwd("C:/Documents and Settings/anandesh/My Documents/Dissertation/Proposal/FYdata") d<-read.arff("RTData100609.arff") present the summary stats of numeric and categorical variables: xtable(format.df(t(summary(d[,sapply(d,is.numeric)])),1)) xtable(t(summary(d[,sapply(d,is.factor)]))) dd1<-read.csv(file="RTData100609_RET1_Disc.csv",head=TRUE,sep=",",na.strings="?") attach(dd1) mosaicplot(~ HS_GPA + FinAidSTUDENT_TA + RET1, data = dd1, color = c("gray","white")) mosaicplot(~ HS_GPA + FinAidMOTHER_ED + RET1, data = dd1, color = c("gray","white")) dd2<-read.csv(file="RTData100609_RET2_Disc.csv",head=TRUE,sep=",",na.strings="?") attach(dd2) mosaicplot(~ HS_GPA + FinAidSTUDENT_TA + RET2, data = dd2, color = c("gray","white")) mosaicplot(~ HS_GPA + FinAidMOTHER_ED + RET2, data = dd2, color = c("gray","white")) dd3<-read.csv(file="RTData100609_RET3_Disc.csv",head=TRUE,sep=",",na.strings="?") attach(dd3) par(mfrow=c(2,2)) mosaicplot(~ FinAidSTUDENT_TA + HS_GPA + RET3, data = dd3, color = c("gray","white"),las=1,off=0) mosaicplot(~ FinAidPARENT_HOU + HS_GPA + RET3, data = dd3, color = c("gray","white"),las=1) mosaicplot(~ FinAidMOTHER_ED + HS_GPA + RET3, data = dd3, color = c("gray","orange"),las=1,off=0) mosaicplot(~ FinAidSTUDENT_MA + HS_GPA + RET3, data = dd3, color = c("gray","white"),las=1) /* code to generate RET3 vs Tax form HS GPA dd3S_TA1<-subset(dd3,FinAidSTUDENT_TA==1) hsgparetdd3TA1prop<-prop.table(table(dd3S_TA1$HS_GPA,dd3S_TA1$RET3),1) dd3S_TA2<-subset(dd3,FinAidSTUDENT_TA==2) hsgparetdd3TA2prop<-prop.table(table(dd3S_TA2$HS_GPA,dd3S_TA2$RET3),1) dd3S_TA3<-subset(dd3,FinAidSTUDENT_TA==3) hsgparetdd3TA3prop<-prop.table(table(dd3S_TA3$HS_GPA,dd3S_TA3$RET3),1) dd3S_TA4<-subset(dd3,FinAidSTUDENT_TA==4) hsgparetdd3TA4prop<-prop.table(table(dd3S_TA4$HS_GPA,dd3S_TA4$RET3),1) hsgparetdd3allTAprop<-cbind(hsgparetdd3TA1prop,hsgparetdd3TA2prop,hsgparetdd3TA3prop,hsgparetdd3TA4prop) onlyYs<-hsgparetdd3allTAprop[, seq(2, ncol(hsgparetdd3allTAprop), 2)] colnames(onlyYs)<-c(1,2,3,4) rownames(onlyYs)<-c('-inf-2.035', '2.035-2.645', '2.645-2.905', '2.905-3.015', '3.015-3.345', '3.345-3.625', '3.625-inf') dd3$FinAidSTUDENT_TA <- factor(dd3$FinAidSTUDENT_TA,1:4,c("1","2","3","4")) pdf('RET3HSGPAS_TA.pdf') layout(matrix(c(1,2),2,2,byrow=TRUE),widths=c(2,1)) dotchart(t(onlyYs*100),cex.sub=0.8) title(main='Student Tax Form Type vs. RET3 \n Grouped by HS GPA',xlab='RET3 Percentage',ylab='HS GPA and Student Tax Form Type') abline(v=54.78,col='red',lty=3) plot( dd3$FinAidSTUDENT_TA,log="y" ,ylim=range(1:20000),col='white',xlab='Student Tax Form Type',ylab='log(Frequency)') dev.off() --CM <- Type1Font("CM",c(paste("cm-lgc/fonts/afm/public/cm-lgc/",c("fcmr8a.afm", "fcmb8a.afm","fcmri8a.afm", "fcmbi8a.afm"),sep=""),"cmsyase/cmsyase.afm")) --pdfFonts(CM=CM) --pdf('t.pdf',family="CM") --embedFonts("tpdf",outfile="RET3HSGPAS_TA.pdf",fontpaths=c("cm-lgc/fonts/type1/public/cm-lgc","cmsyase")) code end */ /*code to generate parent's household size vs ret3 percentage */ ttt<-subset(dd3,select=c('RET3','FinAidPARENT_HOU')) ttt$FinAidPARENT_HOU = factor(ttt$FinAidPARENT_HOU,levels=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,17),ordered=TRUE) tttret3prop<-prop.table(table(ttt$FinAidPARENT_HOU,ttt$RET3),1) pdf('ParentHouseHoldSizeRET3.pdf') par(mfrow=c(1,2)) dotchart(tttret3prop[,2]*100,yaxt="n") axis(2, at=seq(0, 17, length=6),las=1) title(sub="Parent's Household Size vs. RET3",xlab='RET3 Percentage',ylab="Parent's Household Size") abline(v=54.78,col='red',lty=3) hist(dd3$FinAidPARENT_HOU,las=1,main=NULL,xlab=NULL,sub="Historgram of Parent's Household Size",right=TRUE) dev.off() code end */ /*code to generate parent's education level vs ret3 percentage */ ttt<-subset(dd3,select=c('RET3','FinAidFATHER_ED','FinAidMOTHER_ED')) ttt$FinAidFATHER_ED = factor(ttt$FinAidFATHER_ED,levels=c(1,2,3,4),ordered=TRUE) ttt$FinAidMOTHER_ED = factor(ttt$FinAidMOTHER_ED,levels=c(1,2,3,4),ordered=TRUE) tttret3propF<-prop.table(table(ttt$FinAidFATHER_ED,ttt$RET3),1) tttret3propM<-prop.table(table(ttt$FinAidMOTHER_ED,ttt$RET3),1) pedY<-cbind(tttret3propF[,2]*100,tttret3propM[,2]*100) colnames(pedY)<-c("Father's Education Level","Mother's Education Level") rownames(pedY)<-c('Middle school/Jr. high','High school','College or beyond','Other/unknown') pdf('ParentEducationvsRET3.pdf') layout(matrix(c(1,2,1,3),2,2,byrow=TRUE),widths=c(3,1)) dotchart(pedY) title(main="Parent's Education Level vs. RET3",xlab='RET3 Percentage') abline(v=54.78,col='red',lty=3) plot(ttt$FinAidFATHER_ED,horiz=TRUE,names.arg=c('Middle school/Jr. high','High school','College or beyond','Other/unknown'),las=1) plot(ttt$FinAidMOTHER_ED,horiz=TRUE,names.arg=c('Middle school/Jr. high','High school','College or beyond','Other/unknown'),las=1) dev.off() code end */ par(mfrow=c(1,3)) mosaicplot(~ FinAidSTUDENT_TA + HS_GPA + RET1, data = dd1, color = c("gray","white"),las=1,na.action(na.include)) mosaicplot(~ FinAidSTUDENT_TA + HS_GPA + RET2, data = dd2, color = c("gray","white"),las=1,na.action(na.include)) mosaicplot(~ FinAidSTUDENT_TA + HS_GPA + RET3, data = dd3, color = c("gray","white"),las=1,na.action(na.include)) lrm(formula=RET1~ HS_GPA , data = d, na.action = na.pass) lrm(formula=RET2~ HS_GPA , data = subset(d,TERMB %in% c('199980', '200080', '200180', '200280', '200380', '200480', '200580', '200680')), na.action = na.pass) lrm(formula=RET3~ HS_GPA , data = subset(d,TERMB %in% c('199980', '200080', '200180', '200280', '200380', '200480', '200580')), na.action = na.pass) plo coplot(d$HS_GPA~d$RET1|d$FinAidPARENT_HOU) par(mfrow=c(2,2)) plot(d$RET1,d$HS_GPA,sub='HS GPA vs RET1',xlab='RET1',ylab='HS GPA',las=1) plot(d$FinAidPARENT_HOU,d$RET1, sub='Parent Household Size vs RET1' ,xlab='Household Size',ylab='RET1',las=1) plot(d$FinAidPARENT_MAR,d$RET1, sub ='Parent Marital Size vs RET1' ,xlab='Marital Status',ylab='RET1',las=1) plot(d$RET1,d$PercentileRankMaxACT, sub ='ACT Percentile vs RET1',xlab='RET1',ylab='ACT Percentile',las=1) histogram(RET1~ HS_GPA | factor(FinAidPARENT_MAR), data = d) densityplot(RET1~ HS_GPA | factor(FinAidPARENT_MAR), data = d,plot.points = FALSE, ref = TRUE) densityplot(RET2~ HS_GPA | factor(FinAidPARENT_MAR), data = subset(d,TERMB %in% c('199980', '200080', '200180', '200280', '200380', '200480', '200580', '200680')),plot.points = FALSE, ref = TRUE) densityplot(RET3~ HS_GPA | factor(FinAidPARENT_MAR), data = subset(d,TERMB %in% c('199980', '200080', '200180', '200280', '200380', '200480', '200580')),plot.points = FALSE, ref = TRUE) par(mfrow=c(1,3)) densityplot(RET1~ HS_GPA | factor(FinAidSTUDENT_TA ), data = d,plot.points = FALSE, ref = TRUE) densityplot(RET2~ HS_GPA | factor(FinAidSTUDENT_TA ), data = subset(d,TERMB %in% c('199980', '200080', '200180', '200280', '200380', '200480', '200580', '200680')),plot.points = FALSE, ref = TRUE) densityplot(RET3~ HS_GPA | factor(FinAidSTUDENT_TA ), data = subset(d,TERMB %in% c('199980', '200080', '200180', '200280', '200380', '200480', '200580')),plot.points = FALSE, ref = TRUE) par(mfrow=c(3,1),cex.lab=0.5) hsgparet1prop<-prop.table(table(dd1$HS_GPA,dd1$RET1),1) dotchart(hsgparet1prop*100, main='Percentage of RET1 \n Grouped by HS GPA',xlab = 'Percentage',pch=19,las=1,xlim = c(10,90)) hsgparet2prop<-prop.table(table(dd2$HS_GPA,dd2$RET2),1) dotchart(hsgparet2prop*100, main='Percentage of RET2 \n Grouped by HS GPA',xlab = 'Percentage',pch=19,las=1,xlim = c(10,90)) hsgparet3prop<-prop.table(table(dd3$HS_GPA,dd3$RET3),1) hsgparet3prop<-prop.table(table(dd3$FinAidSTUDENT_TA ,dd3$RET3),1) dotchart(hsgparet3prop*100, main='Percentage of RET3 \n Grouped by HS GPA',xlab = 'Percentage',pch=19,las=1,xlim = c(10,90)) par(mfrow=c(3,1)) barplot(t(hsgparet1prop*100),legend=rownames(t(hsgparet1prop)),sub='RET1 Percentage by HS GPA',cex.names=0.65) barplot(t(hsgparet2prop*100),legend=rownames(t(hsgparet2prop)),sub='RET2 Percentage by HS GPA',cex.names=0.65) barplot(t(hsgparet3prop*100),legend=rownames(t(hsgparet3prop)),sub='RET3 Percentage by HS GPA',cex.names=0.65) plot(t(hsgparet1prop*100))