********************************************************************;
* audit.sas - Dataset Audit Macro. *;
* Macro for auditing a SAS dataset. Produces a report in a *;
* format suitable for HTML output. *;
* *;
* Date. User Comments *;
* 30/08/2006 ChrisW Initial Version. *;
* 27/10/2006 ChrisW Fixed 2 issues. 1st for datasets with no *;
* labels. 2nd Warning with long quoted string. *;
* 05/06/2007 ChrisW Added extra columns, lengths of variables. *;
* Added option to switch off Character or *;
* Number report. *;
* *;
********************************************************************;
/**********
How to Run the macro.
libname test 'C:\tempsas';
title 'JN000000 - Bodgit and Legit';
ods listing close;
ods html path='C:\' (URL=NONE)
file='Audit_report.htm'
style=styles.pshtml;
%audit2(SASHELP.dmgmsg);
ods html close;
ods listing;
***********/
%macro audit2(indset,X=50,debug=0,charrep=Y,numrep=Y,format=htm);
/* Get current Options settings prior to changing them for the macro. */
proc sql noprint;
select setting
INTO :options separated by ' '
from dictionary.options
where optname in('NOTES','SOURCE','SOURCE2','MLOGIC','MPRINT','SYMBOLGEN'
,'DATE','NUMBER','QUOTELENMAX')
;
quit;
%if &debug = 0 %then
%do;
options nomprint nosymbolgen nomlogic nonotes nosource nosource2 nodate nonumber noquotelenmax;
%end;
%else
%do;
options mprint symbolgen mlogic notes source source2 nodate nonumber noquotelenmax;
%end;
/* turn off proc title, represses the "Frequency Procedure" title from Proc Freq */
ods noproctitle;
data _null_;
length today $5;
today = put(today(),5.);
call symput('AUDNUM',today);
run;
/* In case job has been killed partway through and restarted, drop the report */
/* tables if they exist. */
/* If this isnt done, the proc append steps will fail. */
%if %sysfunc(exist(ALLCHAR_MSG_&AUDNUM)) %then %do;
proc sql;
drop table allchar_msg_&AUDNUM;
quit;
%end;
%if %sysfunc(exist(ALLNUM_MSG_&AUDNUM)) %then %do;
proc sql;
drop table allnum_msg_&AUDNUM;
quit;
%end;
/* Set up rundate for report*/
%let rundt = %sysfunc(date(),date9.);
/* Get Dataset name and libname fields */
%let dset = %UPCASE(%scan(&INDSET,-1,'.'));
%if %UPCASE(%scan(&INDSET,1,'.')) = &DSET %then %let libname = WORK;
%else %let libname = %UPCASE(%scan(&INDSET,1,'.'));
%put *****************************************;
%put * Unique Value count threshold = &X;
%put * Library = &LIBNAME;
%put * Dataset = &DSET;
%put * Run Date of Report = &RUNDT;
%put * Tempfile suffix = &AUDNUM;
%put *****************************************;
%put NB: If you cancel this run before the macro completes, submit the following;
%put line to restore your options to their previous state:-;
%put options &options %STR(;);
%put ;
/*Create Formats*/
proc format;
value $pop
' ' = 'Missing'
other = 'Populated'
;
value pop
. = 'Missing'
other = 'Populated'
;
quit;
%local CHARS NUMS LABEL;
/* Get list of Character Variables ... */
proc sql noprint;
select UPCASE(name)
INTO :nums separated by ' '
from dictionary.columns
where LIBNAME = UPCASE("&LIBNAME")
and MEMNAME = UPCASE("&DSET")
and type = 'num'
and name ^= 'cwpsgroup000'
;
/* ... and numeric variables */
select UPCASE(name)
INTO :chars separated by ' '
from dictionary.columns
where LIBNAME = UPCASE("&LIBNAME")
and MEMNAME = UPCASE("&DSET")
and type = "char"
and name ^= 'cwpsgroup000'
;
quit;
title2 "Data Audit on &libname..&dset run on &RUNDT";
%put Analysing Character Variables...;
%if "&CHARS" ne "" %then
%do;
/* Analyse the Character Variables */
%let i = 1;
%let var = %scan(&chars,&i);
/* loop through all the character variables ... */
%do %until (&var = );
%put -- &VAR;
proc sql noprint;
select count(distinct &VAR)
,n(&VAR)
,nmiss(&VAR)
,max(length(&VAR))
into :distinct_values, :pop, :miss, :ALENGTH
from &libname..&dset
;
select label
,length
INTO :label, :VLENGTH
from dictionary.columns
where LIBNAME = UPCASE("&LIBNAME")
and MEMNAME = UPCASE("&DSET")
and UPCASE(NAME) = UPCASE("&VAR")
;
quit;
/* Write out message to the log file */
data tmp_msg_&AUDNUM;
length variable $32 values 8 type $10 label $50 miss pop 8;
variable = "&VAR";
type = 'Character';
Values = &distinct_values;
pop = &pop;
miss = &miss;
label = "&label";
vlength = &VLENGTH;
alength = &ALENGTH;
run;
%if &CHARREP = Y %then
%do;
/* If there's <= the threshold set, do a simple freq */
%if %eval(&distinct_values) LE %eval(&X) %then
%do;
proc freq data=&libname..&dset;
table &VAR /missing;
title3 "Character Variable: &VAR";
run;
%end;
/* otherwise just check the population rate... */
%else
%do;
proc freq data=&libname..&dset;
table &VAR /missing;
format &VAR $pop.;
title3 "Character Variable: &VAR";
run;
%end;
%end;
/* Increment the counter variable, get next macro var to work with */
%let i = %eval(&i + 1);
%let VAR = %scan(&chars,&i);
proc append base=allchar_msg_&AUDNUM data=tmp_msg_&AUDNUM;
run;
%end;
%END;
/* Get work copy of the dataset, add in a mock group variable */
data WORK.audit_numvars_&AUDNUM;
set &libname..&dset;
cwpsgroup000 = 1;
run;
%put Analysing Numeric Variables...;
%if format = pdf %then
%do;
goptions ftext='HELVETICA'
ctext=BLACK
htext=1
htitle=2
device=pdfc
hsize=22cm
vsize=20cm
;
%end;
%else
%do;
goptions ftext='HELVETICA'
ctext=BLACK
htext=1
htitle=2
device=html
hsize=15cm
vsize=15cm
;
%end;
axis1 label=none value=none major=none;
symbol color = salmon;
%if "&NUMS" ne "" %then
%do;
/* Loop through the numeric variables */
%let i = 1;
%let var = %scan(&nums,&i);
%do %until (&var = );
%put -- &VAR;
proc sql noprint;
select count(distinct &VAR)
,n(&VAR)
,nmiss(&VAR)
,min(&VAR)
,max(&VAR)
,median(&VAR)
into :distinct_values, :pop, :miss, :min, :max, :median
from WORK.audit_numvars_&AUDNUM
;
select label
INTO :label
from dictionary.columns
where LIBNAME = UPCASE("&LIBNAME")
and MEMNAME = UPCASE("&DSET")
and UPCASE(NAME) = UPCASE("&VAR")
;
quit;
/* Write out message to the log file */
data tmp_msg_&AUDNUM;
length variable $32 values 8 type $10 label $50 miss pop 8;
variable = "&VAR";
type = 'Numeric';
Values = &distinct_values;
pop = &pop;
miss = &miss;
min = &min;
max = &max;
median = &median;
label = "&label";
run;
%if &NUMREP = Y %then
%do;
%if &FORMAT = pdf %then
%do;
ods pdf startpage=NOW;
ODS layout start rows=2;
ODS region;
%end;
proc boxplot data=WORK.audit_numvars_&AUDNUM;
plot &VAR*cwpsgroup000 / cframe = vligb
boxstyle = schematic
cboxes = dagr
cboxfill = ywh
haxis=axis1
idcolor = red
;
inset nobs='Populated Records'
mean='Mean'
/ header = 'Overall Stats' position=NE cfill=ywh;
title3 h=2 "Numeric Variable: &VAR";
run;
quit;
%if &FORMAT = pdf %then
%do;
ODS layout end;
%end;
%end;
%let i = %eval(&i + 1);
%let VAR = %scan(&nums,&i);
proc append base=allnum_msg_&AUDNUM data=tmp_msg_&AUDNUM;
run;
%end;
%END;
%if %sysfunc(exist(ALLCHAR_MSG_&AUDNUM)) %then %do;
proc report data=allchar_msg_&AUDNUM nowd split='*';
columns variable label type Values pop miss vlength alength;
define variable / display 'Variable*Name';
define label / display 'Label';
define type / display 'Variable*Type';
define Values / display 'Number of*Distinct*Values';
define pop / display 'Number of*Populated*Records';
define miss / display 'Number of*Unpopulated*Records';
define vlength / display 'Length of*Variable';
define alength / display 'Maximum*Length of Values';
title3 'Overview of Character Variables';
run;
%end;
%if %sysfunc(exist(ALLNUM_MSG_&AUDNUM)) %then %do;
proc report data=allnum_msg_&AUDNUM nowd split='*';
columns variable label type Values pop miss min max median;
define variable / display 'Variable*Name';
define label / display 'Label';
define type / display 'Variable*Type';
define Values / display 'Number of*Distinct*Values';
define pop / display 'Number of*Populated*Records';
define miss / display 'Number of*Unpopulated*Records';
define min / display 'Minimum*Value';
define max / display 'Maximum*Value';
define median / display 'Median*Value';
title3 'Overview of Numeric Variables';
run;
%end;
%if %sysfunc(exist(ALLCHAR_MSG_&AUDNUM)) %then %do;
proc sql;
drop table allchar_msg_&AUDNUM;
quit;
%end;
%if %sysfunc(exist(ALLNUM_MSG_&AUDNUM)) %then %do;
proc sql;
drop table allnum_msg_&AUDNUM;
drop table audit_numvars_&AUDNUM;
quit;
%end;
%if %sysfunc(exist(tmp_msg_&AUDNUM)) %then %do;
proc sql;
drop table tmp_msg_&AUDNUM;
quit;
%end;
%put ;
%put *****************************************;
%put Done!;
%put *****************************************;
%put ;
%put ;
options &OPTIONS;
%mend audit2;