/************************polynomial regression***********************/ /**Let's look at the relationship between test perfromance and anxiety**/ data anxiety; input anxiety perform; cards; 1 11 1 13 2 24 2 20 3 42 3 36 4 48 4 42 5 46 5 38 6 23 6 19 7 9 7 11 run; proc sgplot data=anxiety; scatter x=anxiety y=perform; run; proc reg data=anxiety; model perform=anxiety; run; data anxiety; set anxiety; anxiety2= anxiety *anxiety ; run; /**************run a 2nd order polynomial regression***************/ proc reg data=anxiety; model perform=anxiety anxiety2 /vif pcorr2 clb influence; OUTPUT OUT=outreg1 P=y_hat R=resid RSTUDENT=rstudent student=standardized COOKD=cookd h=leverage dffits=dffit; run; quit; /*****plot best fitted line*************/ PROC SGPLOT DATA=OUTREG1; scatter X=anxiety Y=perform; series x=anxiety y=y_hat; yaxis values=(0 to 50 by 5) ; xaxis values=(0 to 8 by 1) ; RUN; /*********plot observed versus predicted values *************/ proc sgplot data=outreg1; scatter x=perform y=y_hat; lineparm x=0 y=0 slope=1; xAXIS LABEL="Observed Values" grid; yAXIS LABEL="Predicted Values" grid; RUN; /*****compare full versus reduced model****/ /*************Reduced Model************/ proc mixed data=anxiety method=ml; model perform=anxiety; run;QUIT; /*************Full Model************/ proc mixed data=anxiety method=ml; model perform=anxiety anxiety2; run;QUIT; /************************* /**LRT=2[L(F)-L(R)] =-2L(R)-[-2L(F)] =113-82.1=30.9 ΔG2 = G2 from Reduced model −G2 from Full model LRT~CHI-SQUARE(df=#of the difference in the number of arameters between the full and reduced model) **********************/ proc iml; title 'LRT Statistic'; LRT= 113-82.1; print LRT; run;quit; proc reg data=anxiety outest=est ; m1: model perform=anxiety/aic; m2: model perform=anxiety anxiety2/aic; RUN;quit; proc print data=est; run; /*******LRT= (119-2*3)-(90.1-2*4)=30.9****/ /****ΔG2 = G2 from Reduced model −G2 from Full model***/ /*********SAS calculates AIC=-2LL + 2q where q=p+1***/ /**********************************Hypothesis Testing**************/ /*** /***H0: The Reduced model is adequate (true) ***/ /***H1: The Full model is adequate (true) ****/ /*** /***Alternatively/Equivalently: /*** /***H0: beta2=0 ***/ /***H1: beta2!=0 ****/ /******************************************************************/ /*LRT~CHI-SQUARE(df=#of the difference in the number of arameters between the full and reduced model)*/ proc iml; title 'Calculate p-value for the Likelihood Ratio Test'; LRT= 30.9; pval = 1-probchi(LRT,1); print pval; run;quit; /***************F-test F-statistic=([SSE(R) − SSE(F)]/[DF(R) − DF(F)])/[SSE(F)/DF(F)] F-statistic~F(DF(R) − DF(F),DF(F)) F-statistic=((2621.07143-288.47619)/(12-11))/(288.47619/11)=88.94511 *********************/ proc iml; title 'F Statistic'; F= ((2621.07143-288.47619)/(12-11))/(288.47619/11); print F; run;quit; /**F-statistic~F(DF(R) − DF(F),DF(F))**/ proc iml; title 'Calculate p-value for the F-Test'; F= 88.945114; pval = 1-probF(F,1,11); print pval; run;quit; /***Test the Assumptions of normality and constant variance****/ /* a. Checking Normality of Residuals*/ title "Checking Residuals for Normality"; proc capability data=outreg1 graphics normaltest; var resid; histogram resid/normal; run; proc capability data=outreg1 graphics normaltest; var standardized; histogram standardized/normal; run; ods graphics on; proc univariate data=outreg1 ; var standardized; histogram / normal; qqplot /normal(mu=est sigma=est color=red) square;; run;quit; /* b. Checking Homoscedasticity of Residuals**/ title "Checking Homoscedasticity of Residuals"; PROC SGPLOT DATA=OUTREG1; SCATTER X=Y_HAT Y=resid; refline 0 / axis=Y; RUN; PROC SGPLOT DATA=OUTREG1; SCATTER X=Y_HAT Y=standardized; refline 0 2 -2 / axis=Y; RUN; /****Breusch-Pagan test for heteroskedasticity Ho: Constant variance******/ title "Breusch-Pagan test for heteroskedasticity"; proc model data=anxiety; parms beta0 beta1 beta2 ; perform=beta0+beta1*anxiety+beta2*anxiety2; fit perform /white pagan=(1 anxiety anxiety2); run; /******Check for outliers and influential points***/ /***a. identify outlier points (in the dependent variable) with standardized or studentized residuals greater than 2 in magnitude****/ title "outlier points (in the dependent variable)"; PROC PRINT DATA=OUTREG1; WHERE abs(rstudent) > 2 | abs(standardized) > 2; RUN; /***b. identify outliers (in the independent variables)***/ /***maximum acceptable leverage is 2p/n or (2k+2)/n : p=# of parameters, k=# of predictors***/ title "outliers (in the independent variables)"; PROC PRINT DATA=OUTREG1; WHERE leverage GT 2*3/14; RUN; /***c. identify influential points***/ /****highly influential points are points with di>1 or (to be conservative) points with di>4/n*****/ title "influential points"; PROC PRINT DATA=OUTREG1; WHERE cookd GT 1; RUN; PROC PRINT DATA=OUTREG1; WHERE cookd GT 4/14; RUN; PROC PRINT DATA=OUTREG1; WHERE cookd GT 6/14; RUN;