├── data ├── lgd.sas7bdat ├── hmeq.sas7bdat └── ratings.sas7bdat ├── README.md └── code ├── create_library.sas ├── CRA_tutorial.sas └── exploratory_data_analysis.sas /data/lgd.sas7bdat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hatemr/SAS-code/master/data/lgd.sas7bdat -------------------------------------------------------------------------------- /data/hmeq.sas7bdat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hatemr/SAS-code/master/data/hmeq.sas7bdat -------------------------------------------------------------------------------- /data/ratings.sas7bdat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hatemr/SAS-code/master/data/ratings.sas7bdat -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SAS-code 2 | I have been learning SAS by following the book "Credit Risk Analytics" by Baesens et. al. The book discusses credit risk and risk parameters (PD, LGD, EAD) and provides data and SAS code for hands-on practice. 3 | -------------------------------------------------------------------------------- /code/create_library.sas: -------------------------------------------------------------------------------- 1 | /*create a library in "My Libraries" called data */ 2 | /*used this site as guide: https://communities.sas.com/t5/SAS-Analytics-U/creating-library-in-SAS-University-Edition-Help/td-p/153829*/ 3 | 4 | libname data '/folders/myfolders/sasuser.v94'; 5 | run; 6 | 7 | /* turn the mortgage table from csv to SAS table */ 8 | /*this only needs to be run once to create the SAS table*/ 9 | data data.mortgage; 10 | infile '/folders/myfolders/sasuser.v94/mortgage.csv' dlm=',' firstobs=2; 11 | input id time orig_time first_time mat_time balance_time LTV_time interest_rate_time hpi_time gdp_time uer_time REtype_CO_orig_time REtype_PU_orig_time REtype_SF_orig_time investor_orig_time balance_orig_time FICO_orig_time LTV_orig_time Interest_Rate_orig_time hpi_orig_time default_time payoff_time status_time; 12 | run; -------------------------------------------------------------------------------- /code/CRA_tutorial.sas: -------------------------------------------------------------------------------- 1 | /*This code follows the book Credit Risk Analytics by Baesens et. al.*/ 2 | /*Run using SAS Studio (free version in university edition)*/ 3 | 4 | /*Create new table called 'EXAMPLE' in WORK*/ 5 | DATA example; 6 | SET data.mortgage; 7 | 8 | /*delete some observations*/ 9 | IF FICO_orig_time< 500 THEN DELETE; 10 | 11 | /*generate new variable*/ 12 | IF FICO_orig_time> 500 THEN FICO_cat=1; 13 | IF FICO_orig_time> 700 THEN FICO_cat=2; 14 | 15 | /*filter data*/ 16 | WHERE default_time=1; 17 | 18 | /*drop variable*/ 19 | DROP status_time; 20 | RUN; 21 | 22 | /*Show summary using PROC MEANS*/ 23 | PROC MEANS DATA=data.mortgage; 24 | VAR default_time FICO_orig_time ltv_orig_time gdp_time; 25 | RUN; 26 | 27 | /*Example of linear regression (p.21)*/ 28 | PROC REG DATA=data.mortgage; 29 | MODEL default_time = FICO_orig_time ltv_orig_time gdp_time; 30 | RUN; 31 | 32 | /*Define a macro for linear regression*/ 33 | %MACRO example(datain, lhs, rhs); 34 | PROC REG DATA=&datain; 35 | MODEL &lhs = &rhs; 36 | RUN; 37 | %MEND example; 38 | 39 | /*Calling macro for linear regression*/ 40 | %example(datain=data.mortgage, lhs=default_time, rhs=FICO_orig_time ); 41 | %example(datain=data.mortgage, lhs=default_time, rhs=FICO_orig_time ltv_orig_time); 42 | %example(datain=data.mortgage, lhs=default_time, rhs=FICO_orig_time ltv_orig_time gdp_time); 43 | 44 | /*Build model and save parameters as 'parameters' in the WORK library*/ 45 | ODS LISTING CLOSE; 46 | ODS OUTPUT PARAMETERESTIMATES=parameters; 47 | PROC REG DATA=data.mortgage; 48 | MODEL default_time = FICO_orig_time ltv_orig_time gdp_time; 49 | RUN; 50 | ODS OUTPUT CLOSE; 51 | ODS LISTING; 52 | 53 | /*Export 'parameters' as export.csv (p.23)*/ 54 | PROC EXPORT DATA=parameters 55 | REPLACE 56 | DBMS=CSV 57 | OUTFILE='/folders/myfolders/sasuser.v94/export.csv'; 58 | RUN; 59 | 60 | /*Use IML to find the square root of a number accurate to 3 decimals*/ 61 | PROC IML; 62 | 63 | START MySqrt(x); 64 | y = 1; 65 | DO UNTIL (w<1e-3); 66 | z = y; 67 | y = 0.5#(z+x/z); 68 | w = ABS(y-z); 69 | END; 70 | RETURN(y); 71 | FINISH; 72 | t = MySqrt({3,4,7,9}); *apply our function; 73 | s = SQRT({3,4,7,9}); *apply build-in function; 74 | diff = t - s; *compare the two outputs; 75 | PRINT t s diff; 76 | QUIT; 77 | 78 | PROC SETINIT; -------------------------------------------------------------------------------- /code/exploratory_data_analysis.sas: -------------------------------------------------------------------------------- 1 | /*Chapter 3 of Credit Risk Analytics*/ 2 | 3 | /*plot frequencies using PROC FREQ*/ 4 | PROC FREQ DATA=data.mortgage; 5 | TABLES default_time; 6 | RUN; 7 | 8 | /*Plot histograms with PROC UNIVARIATE*/ 9 | ODS GRAPHICS ON; 10 | PROC UNIVARIATE DATA=data.mortgage; 11 | VAR FICO_orig_time LTV_orig_time; 12 | CDFPLOT FICO_orig_time LTV_orig_time; 13 | HISTOGRAM FICO_orig_time LTV_orig_time; 14 | RUN; 15 | ODS GRAPHICS OFF; 16 | 17 | /*Show summary statistics using PROC MEANS*/ 18 | PROC MEANS DATA=data.mortgage 19 | N MEAN MEDIAN MODE P1 P99 MAXDE=4; 20 | VAR DEFAULT_time FICO_orig_time LTV_orig_time; 21 | RUN; 22 | 23 | /*Show Q-Q plot using PROC UNIVARIATE*/ 24 | ODS GRAPHICS ON; 25 | PROC UNIVARIATE DATA=data.mortgage NOPRINT; 26 | QQPLOT FICO_orig_time / NORMAL(MU=EST SIGMA=EST COLOR=LTGREY); 27 | RUN; 28 | ODS GRAPHICS OFF; 29 | 30 | /*Show variance stats using PROC MEANS*/ 31 | PROC MEANS DATA=data.mortgage 32 | N MIN MAX RANGE QRANGE VAR STD CV MAXDEC=4; 33 | VAR default_time FICO_orig_time LTV_orig_time; 34 | RUN; 35 | 36 | /*Bin the FICO and make 2-D Contingency table*/ 37 | DATA mortgage1; 38 | SET data.mortgage; 39 | RUN; 40 | 41 | PROC RANK DATA = mortgage1 42 | GROUPS=5 43 | OUT=quint(KEEP=id time FICO_orig_time); 44 | VAR FICO_orig_time; 45 | RUN; 46 | 47 | DATA new; 48 | MERGE mortgage1 quint; 49 | BY id time; 50 | RUN; 51 | 52 | PROC FREQ DATA=new; 53 | TABLES default_time*FICO_ORIG_TIME; 54 | RUN; 55 | 56 | /*Show box plots using PROC BOX-PLOT*/ 57 | PROC SORT DATA=mortgage1; 58 | BY default_time; 59 | RUN; 60 | 61 | /*Box plot of FICO_orig_time*/ 62 | ODS GRAPHICS ON; 63 | PROC BOXPLOT DATA=mortgage1; 64 | PLOT FICO_orig_time*default_time /IDSYMBOL=CIRCLE 65 | IDHEIGHT=2 CBOXES=BLACK BOXWIDTH=10; 66 | RUN; 67 | ODS GRAPHICS OFF; 68 | 69 | /*Box plot of LTV_orig_time*/ 70 | ODS GRAPHICS ON; 71 | PROC BOXPLOT DATA=mortgage1; 72 | PLOT LTV_orig_time*default_time / IDSYMBOL=CIRCLE 73 | IDHEIGHT=2 CBOXES=BLACK BOXWIDTH=10; 74 | RUN; 75 | ODS GRAPHICS OFF; 76 | 77 | /*Chi-square tables*/ 78 | PROC FREQ DATA=new; 79 | TABLES default_time*FICO_ORIG_TIME / CHISQ; 80 | RUN; 81 | 82 | /*Calculate correlation coefficients*/ 83 | DATA sample; 84 | SET data.mortgage; 85 | IF RANUNI(123456) < 0.01; 86 | RUN; 87 | 88 | ODS GRAPHICS ON; 89 | PROC CORR DATA=sample 90 | PLOTS(MAXPOINTS=NONE)=SCATTER(NVAR=2 ALPHA=.20 .30) 91 | KENDALL SPEARMAN; 92 | VAR FICO_orig_time LTV_orig_time; 93 | RUN; 94 | ODS GRAPHICS OFF; 95 | 96 | /*Confidence intervals of mean with PROC UNIVARIATE*/ 97 | ODS SELECT BASICINTERVALS; 98 | PROC UNIVARIATE DATA=data.mortgage CIBASIC(ALPHA=.01); 99 | VAR LTV_orig_time; 100 | RUN; 101 | 102 | /*p-value for mean=60*/ 103 | ODS GRAPHICS ON; 104 | ODS SELECT TESTSFORLOCATION ; 105 | PROC UNIVARIATE DATA=data.mortgage MU0=60; 106 | VAR LTV_orig_time; 107 | RUN; 108 | ODS GRAPHICS OFF; --------------------------------------------------------------------------------