/***************************************************************************************/ /* SAS Training - INTACCT */ /* Tilburg, October 22-24 2007 */ /* Philip Joos & Christof Beuselinck */ /* */ /* REPLICATION / EXTENSION OF COLLINS, MAYDEW, WEISS (1997) */ /* "Changes in the value-relevance of earnings and book */ /* values over the past forty years" JAE 24 pp.39-67 */ /***************************************************************************************/ /*---------------------------------------------------------------------*/ /*PART 1: SOME INFO ON SAS DATA FILES STRUCTURE AND WORKING ENVIRONMENT*/ /*---------------------------------------------------------------------*/ /*NOTE: Command lines are in blue-black while explanations are between '/*' and in green*/ /*Create a 'Library' where you store and/or read data from*/ /* local directory Christof */ libname intacct 'C:\1_files-CB\INTACCT\prep CB'; run; libname intacct 'M:\public_html\INTACCT\docs'; run; /* local directory Philip */ libname intacct 'F:\teaching\INTACCT\TILBURGOCT07\SAS\collins'; /* +++++++ INFLATION DATA FILE ++++++ */ /* Getting inflation data from web site of Robert Shiller: http://www.econ.yale.edu/~shiller/data.htm download file "ie_data.xls" convert the excel spreadsheet to SAS format using STATTRANSFER data shiller; set intacct.shiller_inflation; year=input(substr(left(date),1,4),4.0); month0=scan(date,2,"."); if compress(month0)="1" then month0="10"; month2=input(month0,2.0); month=month2; drop month0 month2; attrib date2 format=date9.; date2=intnx('month',mdy(month,15,year),0,'end'); date=" "; attrib date format=date9.; date=date2; drop date2; cpifactor=cpi/208.755; attrib cpifactor label="CPI adjfactor July 2007"; run; data intacct.inflation; set shiller; run; */ /* Now: download data file from 'http://lyrawww.uvt.nl/~beuselic/intacct/inflation.sas7bdat' */ data inflation; set intacct.inflation; run; /*Create contents list of all your variables + relevant info on variable format*/ proc contents data = inflation; run; proc contents data =inflation out=variables; run; /*--------------------------------------------*/ /* PART 2: DOWNLOAD DATA FROM WRDS SERVER */ /*--------------------------------------------*/ /*NOTE: this procedure is based on PC SAS and executes your SAS statements directly on the WRDS server*/ /*More info can be found via the WRDS website or on the document 'PCSAS' on the workshop webpage*/ /*options nocenter nodate nonumber ls=max ps=max msglevel=i; */ %let wrds = wrds.wharton.upenn.edu 4016; options comamid=TCP remote=WRDS; signon username=_prompt_; run; /* retrieve the required data:*/ /*DATA6:total assets, DATA18=INCOME BEFORE EXTRAORD ITEMS, DATA172:net income, DATA60:common equity, DATA25:common shares, DATA27: ADJUSTMENT FACTOR, DATA199:price end fiscal year, DATA17:special items, DATA66: discontinued operations, DATA192:extraordinary items list of data items: http://www.wooster.edu/economics/archive/cstatitems.html or: http://wrds.wharton.upenn.edu/ds/comp/inda/doc.shtml */ rsubmit; libname complib '/wrds/compustat/sasdata'; data collinswrds; set complib.compann ; if data172; if data60; if data25; if data6; if fyr>0 and fyr<=5 then cyear=yeara+1; if fyr>=6 then cyear=yeara; if cyear >=1996 and cyear<=2005; keep CNUM DNUM CONAME GVKEY DATA6 DATA18 DATA46 DATA53 DATA172 DATA60 DATA25 DATA27 DATA199 DATA17 DATA66 DATA192 DATA234 YEARA FYR DATE CYEAR ; run; proc download data=collinswrds out=intacct.collinsdata; endrsubmit; signoff; /* sign off from WRDS */ /*Print only first 50 lines*/ proc print data=intacct.collinsdata(obs=50); var coname fyr date cyear data6 data172 data234; format data6 data172 8.3 date year2.; run; /* ++++ creating/calculating variables of interest ++++ */ /*See footnotes Table 1 (p. 47) for definitions*/ /*Note that we have saved a dataset called 'collinsdata' on our hard-drive and that all following datasets are stored in temporary work-files*/ data comp; set intacct.collinsdata; PPS=data199; if PPS; EPS=data172/data25; /*NOTE: equals the value of data53*/ EPS2=data53; CORE=(data172-data17-data66-data192)/data25; BV=data60/data25; ONE=(data17-data66-data192)/data25; abs_ONE_NI=abs(data17-data66-data192)/abs(CORE); BTM=BV/PPS; NEGNIDUM=0; if EPS<0 then NEGNIDUM=1; CYEARD=mdy(1,1,cyear); * defining SIC industries: 1, 2, 3 digit; if length(compress(dnum))=3 then do; sic1=0; sic2=input(substr(left(dnum),1,1),8.0); sic3=input(substr(left(dnum),1,2),8.0); end; if length(compress(dnum))=4 then do; sic1=input(substr(left(dnum),1,1),8.0); sic2=input(substr(left(dnum),1,2),8.0); sic3=input(substr(left(dnum),1,3),8.0); end; * defining high-tech industries - according to Francis-Schipper 1999; hitech = 0; if SIC3 in (283,357,360,361,362,363,364,365,366,367,368,481,737,873) then hitech=1; attrib hitech label="Hightech dummy"; * intangible intensive - according to Collins et al; intangind = 0; if sic3 in (282,283,357,367) or sic2 in (48,73,87) then intangind=1; attrib intangind label="Intangib-intens industry"; if pps^=. and eps^=. and core^=. and bv^=. and one^=.; run; /*---------------------------------------------------*/ /* PART 3: GETTING SOME BASIC DESCRIPTIVE VALUES */ /*---------------------------------------------------*/ /**** 1. FREQUENCY TABLES ****/ /**************************************/ /* What is the number of observations per calendar year*/ /* PROC FREQ: http://support.sas.com/onlinedoc/913/getDoc/en/procstat.hlp/freq_index.htm */ proc freq data=comp; table cyear; run; /* Frequency of loss firms per year (see Collins p.60) */ proc freq data=comp; table cyear*NEGNIDUM /list; run; proc freq data=comp; table cyear*NEGNIDUM /nocol norow nopercent; run; proc freq data=comp; table cyear*NEGNIDUM /nocol nopercent ; run; /* industry SIC codes */ proc freq data=comp; tables sic1 sic2; run; /* getting output from proc freq in new dataset */ proc freq data=comp /* noprint*/ ; table cyeard*NEGNIDUM / outpct out=freqout ; format cyeard year4.; run; proc sort data=freqout; by negnidum cyeard; run; proc print data=freqout; var cyeard negnidum count pct_row; format cyeard year4. pct_row 8.1; run; /* plotting the loss frequencies */ /* see http://support.sas.com/onlinedoc/913/getDoc/en/graphref.hlp/gplot-plot.htm */ goptions reset=all gunit=pct noborder cback=white ctext=black ftitle=swissb cpattern=black lfactor=25 ftext=swiss htitle=3 htext=2.5 gsfname=outgraph gsfmode=replace device=cgmof97l ; filename outgraph 'C:\1_files-CB\INTACCT\prep CB\loss_frequencies.cgm'; run; symbol1 color=red interpol=join /*spline*/ value=dot height=3 line=1 width=1.5 ; axis1 label=(angle=90 rotate=0 height=3 'Frequency' justify=center) order=(20 to 100 by 10) ; axis2 label=(height=3 'Year' justify=center) order=('1JAN1996'd to '1jan2005'd by year) major=(height=2) minor=none /* minor=(height=1)*/ offset=(1cm); %let opts= vaxis=axis1 vminor=1 vref=40 60 80 lvref=2 hminor=0 overlay haxis=axis2 noframe; proc gplot data=freqout(where=(negnidum=1)); format pct_row 6.0 cyeard year4.; title1 'Loss firms in period 1996-2005'; footnote1 h=1.5 j=l 'Source: Compustat US 2007' j=r 'Intacct Program Oct 22, 2007'; plot pct_row*cyeard / &opts ; run; quit; footnote1; title1 ' '; run;/* this clears footnote1 and title1 */ /* PLOT Frequencies of loss firms and hitech firms in one graph */ proc freq data=comp /*noprint*/ ; table cyeard*hitech / outpct out=freqout2 ; run; data freqout2; set freqout2; if hitech=1; pct_tech=pct_row; keep cyeard pct_tech; run; data freqout; set freqout; if negnidum=1; pct_negni=pct_row; keep cyeard pct_negni; run; data freqall; merge freqout freqout2; by cyeard; run; filename outgraph /*'M:\public_html\INTACCT\docs\losstech_frequencies.cgm'*/'C:\1_files-CB\INTACCT\prep CB\losstech_frequencies.cgm'; run; symbol1 color=red interpol=join value=dot height=3 line=1 width=1.5 ; symbol2 color=black interpol=join value=star height=5 line=4 width=1.5 ; legend1 label=none position=(top center inside) shape=symbol(8,2) value=('loss firms' 'high-tech firms') /*across=2*/ mode=share; axis1 label=(angle=90 rotate=0 height=3 'Frequency' justify=center) order=(20 to 60 by 10) ; axis2 label=(height=3 'Year' justify=center) order=('1JAN1996'd to '1jan2005'd by year) major=(height=2) minor=none offset=(1cm); %let opts= vaxis=axis1 vminor=1 vref=30 40 50 lvref=2 hminor=0 overlay haxis=axis2 noframe legend=legend1 ; proc gplot data=freqall; format pct_negni pct_tech 6.0 cyeard year2.; title1 'Frequencies in period 1996-2005'; footnote1 h=1.5 j=l 'Source: Compustat US 2007' j=r 'Intacct Program Oct 22, 2007'; plot pct_negni*cyeard pct_tech*cyeard / &opts ; run; quit; footnote1; title1 ' '; run;/* this clears footnote1 and title1 */ /**** 2. DESCRIPTIVE STATISTICS OF VARIABLES OF INTEREST ****/ /*****************************************************************/ /* Simple stats: NOBS and AVG/MIN/MAX of EPS per year */ /* program this via an SQL-statement: a very flexible programming tool */ /* http://support.sas.com/onlinedoc/913/getDoc/en/proc.hlp/a000086336.htm */ /* More info on SQL programming language can be found in the manual */ /* on the workshop homepage. */ /* SQL is particularly helpful in merging datasets */ proc sql; create table epsstats as select count(*) as nobs label "nobs", avg(EPS) as avg_eps format 8.3, min(EPS) as min_eps format 8.3, max(eps) as max_eps format 8.3 from comp group by cyear; quit; proc print data=epsstats; run; /* Using PROC UNIVARIATE: 1 variable case */ proc univariate data=comp; var eps; run; /* histogram : http://support.sas.com/onlinedoc/913/getDoc/en/basess.hlp/a001404321.htm */ title 'EPS Histogram period 1996-2005'; run; proc univariate data=comp(where=(eps>-7 and eps<7)) noprint; histogram eps / cframe=white cfill=blue noframe endpoints= -7 to 7 by .25; run; /* multiple variables - descriptives */ /* http://support.sas.com/onlinedoc/913/getDoc/en/proc.hlp/a000146734.htm */ proc means data=comp n mean min p1 p25 median p75 p99 max std; var PPS EPS CORE BV ONE; run; /*You can export this info into a separate output-file, which you may want to export to excel. Note that only basic statistics can be summarized by running these statements (no medians, p1, pp99, etc.)*/ /*Notice command: OUTPUT OUT = XXX*/ proc sort data=comp; by cyear; run; proc means data=comp noprint ; class cyear; var PPS EPS CORE BV ONE; output out=descrip0 n=nobs mean=avg std=std / autoname; output out=descrip1 min=min p1=p1 p25=p25 median=med p75=p75 p99=p99 max=max / autoname; run; /* different format; varname _stat_ value var1 min 1 var1 max 3 var2 min 2 var2 max 8 */ proc means data =comp noprint; by cyear; var PPS EPS CORE BV ONE; ; output out=summary (drop = _freq_ _type_ ) n= mean= std= min= p1= median= p99= max= / autoname ; run ; data summary2( keep = varname cyear _stat_ value ) ; length varname $32 _stat_ $10 ; set summary ; array q (*) _numeric_ ; do i = 1 to dim ( q ) ; varname = vname ( q[i] ) ; x = 33 - index ( reverse ( varname ) , "_" ) ; _stat_ = substr ( varname , x + 1 ) ; varname = substr ( varname , 1 , x - 1 ) ; value = q[i] ; output ; end ; run ; /* different format; varname min max var1 1 3 var2 2 8 */ proc sort data=summary2; by cyear varname ; run; proc transpose data=summary2 out=summary3; by cyear varname; id _stat_; run; data summary3; set summary3; if varname='cyear' then delete; drop _name_; run; proc export data=summary3 dbms=excel2000 replace /*outfile="C:\1_files-CB\INTACCT\prep CB\descripstats.xls";*/ outfile="M:\public_html\INTACCT\docs\descripstats.xls"; run;quit; /* more detailed statistics can be obtained via proc univariate - per variable */ proc univariate data=comp ; var PPS EPS CORE BV ONE BTM ABS_ONE_NI; run; /*----------------------------------------------------------------*/ /* PART 4: OUTLIER REMOVAL PROCEDURES */ /*----------------------------------------------------------------*/ /****OUTLIER DELETION AND DATA CLEAINING***/ /*From the descriptives above: outliers!*/ /*We need to remove/winsorize these in a second 'CLEAN' dataset, which has non-outlier values*/ /*Create an output file, based on the proc univariate statement, in which you identify p1 & p99 (or other percentiles). This dataset then needs to be merged with your original dataset - BY CYEAR! - and 'non-outlier' values of interest can then be calculated easily*/ /*for more info visit: http://support.sas.com/onlinedoc/913/getDoc/nl/procstat.hlp/univariate_sect15.htm*/ /* for example: use percentile info P0.5 P1 P2 P3 P5 P95 P97 P98 P99 P99.5 */ proc sort data=comp; by cyear; run; proc univariate data=comp noprint; var eps btm abs_one_ni ; by cyear; output out=outliers pctlpts=0.5 1 2 3 5 95 97 98 99 99.5 pctlpre= eps_ btm_ abs_one_ni_; run; /* add the percentiles to the original set */ proc sql; create table temp as select a.*,b.* from comp as a left join outliers as b on a.cyear=b.cyear; quit; data comp2; set temp; if eps>eps_0_5 and epsbtm_0_5 and btm See later*/ /*----------------------------------------------------------------*/ /* PART 5: REGRESSION ANALYSIS */ /*----------------------------------------------------------------*/ /* ++++++ Estimating a regression as in Table 3 ++++++++ */ title 'Table 3: regression analysis'; run; proc reg data=comp2; model pps = eps bv; run; quit; /* MACRO TO REMOVE REGRESSION OUTLIERS - see Collins et al p.47 */ /* STEP 1: estimate regression STEP 2: take the studentized residuals from STEP 1 and delete extreme observations i.e. RSTUDENT>4 STEP 3: re-estimate regression excluding extreme observations see: http://support.sas.com/onlinedoc/913/getDoc/en/statug.hlp/reg_sect39.htm */ %MACRO OLSREG; proc sort data=&data; by &class; run; proc reg data=&data outest=params tableout noprint; by &class; model &dvar = &ivar / noint adjrsq; output out=c rstudent=rstudent; run; data df; set c; if not(nmiss(rstudent)=0) then delete; mergevar=1; run; proc sort; by &class; run; data params; set params; if _TYPE_="PARMS"; n=_P_+_EDF_; k=_P_; keep n k &class ; run; data samen; merge df params; by &class; run; data c1 c2; set samen;if abs(rstudent) > &rstudentcutoff then output c1; else output c2; proc reg data=c2 outest=params tableout noprint; by &class; model &dvar = &ivar / noint adjrsq ; proc reg data=c2 outest=params2 tableout noprint; by &class; model &dvar = &ivar / adjrsq ; data params; set params; if _TYPE_="PARMS" or _TYPE_="T"; OBS=_P_+_EDF_; drop _RMSE_ _MODEL_ _DEPVAR_ &dvar _P_ _IN_ _EDF_ _RSQ_ _ADJRSQ_; data params2; set params2; if _TYPE_="PARMS" or _TYPE_="T"; OBS=_P_+_EDF_; keep _ADJRSQ_; data params3; merge params params2; run; title "OLS: REGRESSING &dvar on &ivar"; title2 "SAMPLE DATASET: &data"; proc print data=params3(where=(_TYPE_="PARMS")); format &ivar _ADJRSQ_ 9.3; proc print data=params3(where=(_TYPE_="T")); format &ivar 9.3; /* proc means data=c2; var &dvar &ivar; run; */ proc datasets; delete c1 samen params params2 c df ; run; quit; %MEND OLSREG; /* Estimate annual regressions : 3 REGRESSION MODELS MODEL 1: P = a + b EPS + c BV + epsilon MODEL 2: P = a + b EPS + epsilon MODEL 3: P = a + c BV + epsilon ================================================ */ data comp2; set comp2; onevector=1; run; proc sort data=comp2; by cyear; run; * MODEL 1; %let rstudentcutoff = 4; * consistent with the deletion rule in Collins et al; %let class = cyear; %let data=comp2; %let dvar = pps; %let ivar = onevector eps bv ; %OLSREG; * check how many outliers were removed; proc freq data=comp2; tables cyear; run; * save dataset for further use; data model1data; set c2; model1=1; keep gvkey date model1; run; * MODEL 2; %let class = cyear; %let data=comp2; %let dvar = pps; %let ivar = onevector eps ; %OLSREG; * save dataset for further use; data model2data; set c2; model2=1; keep gvkey date model2; run; * MODEL 3; %let class = cyear; %let data=comp2; %let dvar = pps; %let ivar = onevector bv ; %OLSREG; * save dataset for further use; data model3data; set c2; model3=1; keep gvkey date model3; run; /* Creating the FINAL dataset */ proc sql; create table finaldata as select a.*, b.model1, c.model2, d.model3 from comp2 as a left join model1data as b on a.gvkey=b.gvkey and a.date=b.date left join model2data as c on a.gvkey=c.gvkey and a.date=c.date left join model3data as d on a.gvkey=d.gvkey and a.date=d.date order by a.gvkey, a.date; quit; data finaldata; set finaldata; if model1=. then model1=0; if model2=. then model2=0; if model3=. then model3=0; run; * overview of useful observations from 3 models; proc freq data=finaldata; tables model1*model2*model3; run; data finaldata; set finaldata; if model1=1 and model2=1 and model3=1; run; /*----------------------------------------------------------------*/ /* PART 6: ALL TABLES TOGETHER */ /*----------------------------------------------------------------*/ /* FINAL TABLE 1 : DESCRIPTIVES ON FINAL DATASET */ /* +++++++++++++++++++++++++++++++++++++++++++++ */ proc means data =finaldata ; /*by cyear;*/ var PPS EPS CORE BV ONE; output out=summary(drop = _freq_ _type_ ) n= mean= std= min= p25= median= p75= / autoname ; run ; data summary1( keep = varname /*cyear*/ _stat_ value ) ; length varname $32 _stat_ $10 ; set summary ; array q (*) _numeric_ ; do i = 1 to dim ( q ) ; varname = vname ( q[i] ) ; x = 33 - index ( reverse ( varname ) , "_" ) ; _stat_ = substr ( varname , x + 1 ) ; varname = substr ( varname , 1 , x - 1 ) ; value = q[i] ; output ; end ; run ; proc sort data=summary1; by /*cyear*/ varname ; run; proc transpose data=summary1 out=summary2; by /*cyear*/ varname; id _stat_; run; data summary2; set summary2; if varname='cyear' then delete; drop _name_; run; proc datasets; delete summary summary1; quit; proc export data=summary2 dbms=excel2000 replace /*outfile="C:\1_files-CB\INTACCT\prep CB\descripstatsfinal.xls";*/ outfile="M:\public_html\INTACCT\docs\descripstats2.xls"; run;quit; /* FINAL TABLE 2 : CORRELATIONS ON FINAL DATASET */ /* +++++++++++++++++++++++++++++++++++++++++++++ */ proc corr data=finaldata spearman pearson; var PPS EPS CORE BV ONE; run; ods listing close; ods trace on; proc corr data=finaldata spearman pearson; ods output spearmancorr=spearman pearsoncorr=pearson; var PPS EPS CORE BV ONE; run; ods trace off; ods listing; proc print data=spearman; run; data spearman; set spearman; typevar="S"||variable; run; data pearson; set pearson; typevar="P"||variable; run; data corrs; /*Retain function puts your typevar in front; this is what we want*/ retain typevar; set pearson spearman; run; proc export data=corrs dbms=excel2000 replace /*outfile="C:\1_files-CB\INTACCT\prep CB\corrsfinal.xls";*/ outfile="M:\public_html\INTACCT\docs\corrsfinal.xls"; run;quit; /* TABLE 3: FINAL ANNUAL REGRESSION */ /* +++++++++++++++++++++++++++++++++ */ %let rstudentcutoff = 4; * IF 120: set a very high cutoff such that no observation is removed; * MODEL 1; %let class = cyear; %let data=finaldata; %let dvar = pps; %let ivar = onevector eps bv ; %OLSREG; data model1param; set params3; run; * MODEL 2; %let class = cyear; %let data=finaldata; %let dvar = pps; %let ivar = onevector eps ; %OLSREG; data model2param; set params3; run; * MODEL 3; %let class = cyear; %let data=finaldata; %let dvar = pps; %let ivar = onevector bv ; %OLSREG; data model3param; set params3; run; /* comparing R-squared between 3 models */ proc sql; create table rsquared as select a.cyear, mdy(1,1,a.cyear) as cyeard, a._adjrsq_ as adjrmodel1, b._adjrsq_ as adjrmodel2, c._adjrsq_ as adjrmodel3, a._adjrsq_-c._adjrsq_ as incremEPS, a._adjrsq_-b._adjrsq_ as incremBV from model1param as a left join model2param as b on a.cyear=b.cyear left join model3param as c on a.cyear=c.cyear where a._adjrsq_^=. and c._adjrsq_^=. and b._adjrsq_^=. ; quit; title 'Comparing Adjusted R-squared between the 3 models'; run; proc print data=rsquared(drop= cyeard); format adjrmodel1-adjrmodel3 incremEPS incremBV 8.2; run; goptions reset=all gunit=pct noborder cback=white ctext=black ftitle=swissb cpattern=black lfactor=25 ftext=swiss htitle=3 htext=2.5 gsfname=outgraph gsfmode=replace device=cgmof97l ; filename outgraph 'M:\public_html\INTACCT\docs\rsquared_fig1.cgm'; run; symbol1 color=black interpol=join line=1 width=1.5 ; symbol2 color=blue interpol=join line=2 width=1.5 ; symbol3 color=green interpol=join line=4 width=1.5 ; legend1 label=none position=(top center inside) shape=symbol(8,2) value=('total R2' 'Incr EARN' 'Incr BV') /*across=2*/ mode=share; axis1 label=(angle=90 rotate=0 height=3 'R-squared' justify=center) order=(0 to 0.8 by 0.10) ; axis2 label=(height=3 'Year' justify=center) order=('1JAN1996'd to '1jan2005'd by year) major=(height=2) minor=none offset=(1cm); %let opts= vaxis=axis1 vminor=1 vref=.30 .40 .50 lvref=2 hminor=0 overlay haxis=axis2 noframe legend=legend1 ; proc gplot data=rsquared; format adjrmodel1 incremeps incrembv 6.2 cyeard year2.; title1 'Annual regressions in period 1996-2005'; footnote1 h=1.5 j=l 'Source: Compustat US 2007' j=r 'Intacct Program Oct 22, 2007'; plot adjrmodel1*cyeard incremEPS*cyeard incremBV*cyeard / &opts ; run; quit; footnote1; title1 ' '; run; /* this clears footnote1 and title1 */ /* TABLE 4: ANNUAL REGRESSION CONTROLLING FOR INDUSTRY, ONE-TIME ITEMS, etc. */ /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ /* HIGH-TECH INDUSTRIES (ALSO EXCLUDING BANKS/INSURANCE FIRMS -> SIC1 = 6)*/ %let rstudentcutoff = 120; * set a very high cutoff such that no observation is removed; proc sort data=finaldata; by cyear hitech; run; * MODEL 1; %let class = cyear hitech; %let data=finaldata(where=(sic1^=6)); %let dvar = pps; %let ivar = onevector eps bv ; %OLSREG; data model1param; set params3; run; * MODEL 2; %let class = cyear hitech; %let data=finaldata(where=(sic1^=6)); %let dvar = pps; %let ivar = onevector eps ; %OLSREG; data model2param; set params3; run; * MODEL 3; %let class = cyear hitech; %let data=finaldata(where=(sic1^=6)); %let dvar = pps; %let ivar = onevector bv ; %OLSREG; data model3param; set params3; run; proc sql; create table rsquared as select a.cyear, mdy(1,1,a.cyear) as cyeard, a.hitech, a._adjrsq_ as adjrmodel1, b._adjrsq_ as adjrmodel2, c._adjrsq_ as adjrmodel3, a._adjrsq_-c._adjrsq_ as incremEPS, a._adjrsq_-b._adjrsq_ as incremBV from model1param as a left join model2param as b on a.cyear=b.cyear and a.hitech=b.hitech left join model3param as c on a.cyear=c.cyear and a.hitech=c.hitech where a._adjrsq_^=. and c._adjrsq_^=. and b._adjrsq_^=. ; quit; title 'Comparing Adjusted R-squared between the 3 models'; run; proc print data=rsquared(drop= cyeard); format adjrmodel1-adjrmodel3 incremEPS incremBV 8.2; run; /* ++++++++++++++++++++++++++++++*/ /* EXTRA */ /* ++++++++++++++++++++++++++++++*/ /* INFLATION ADJUSTED DATA: When you report dollar values (such as market value of equity, book value, etc.) over long horizons, you should adjust your time series for inflation (using CPI) Use: inflation dataset (monthly inflation data) */ proc sql; create table finaldataCPI as select a.*, b.cpifactor, pps*cpifactor as ppscpi label='CPI adj price per share' from finaldata as a left join inflation as b on a.cyear=b.year and a.fyr=b.month; quit;