Sorry, but this is my first post and it's long. But I'm desperate for help at this point because I'm already behind schedule.
We have two production macros that our load files go through -- PRAVALIDATE and PRALOAD.
The way it works, sometimes more than 1 file will be in the FARM landing zone that the file is pushed to from our Unica environment. The file naming convention is “datetime_ci00cmpid_userid.fileext” (i.e. 210329:05:52:35_ci006638_testtmp15.pending15) The PRAVALIDATE reads in all the filenames in the landing zone and runs macro code to properly parse the filename and create a load file in SAS format that then runs through the PRALOADmacro. However, it only validates that the fields in the ECMA file are the correct ones (and in the correct order) for the praload file, depending on the file extension type. It puts the parsed portion of the fields into the PRALOAD macro call line (i.e. %praload(indata =final,campaign_id=&campid,dest=&filetype,usr=&ecma_uid); The PRALOAD macro is the one that validates the actual data.
We wanted to add extra validation in PRAVALIDATE to catch the following errors in the actual file names:
1. missing the two 00's in the campaign_id OR
2. had too many "words" (i.e. more than two underscores before file extension)
Currently, no error reports are produced if the load fails due to the above errors. (Also, this being a production job, we don’t get any SAS logs.) If the file has the 00 error, there is a condition in PRALOAD that will cause the program to fail. However, the extra word error is still allowing the file to pass the existing validations in PRALOAD. The file won't get loaded, but there's no error report produced to tell us WHY it wasn’t loaded. We are not only trying to fix that but also produce a report that tells you what's wrong with the filename.
The new code I created in the PRAVALIDATEmacro was/is technically good. It identifies the error and spits out the error report. The following issues only seem to occur if there is more than 1 file in the landing zone:
1. I haven’t figured out how to get the code to abend for the bad file after spitting out the report without also stopping the processing of any other files that may be in line after it.
2. if there is an instance that more than 1 bad file being read in at once, the values going into the output file name of the 2nd bad file overwrites the values of the first but the error message itself from the 1st file is retained in the report. Depending on how quickly SAS processes the files, I may end up with only 1 report or two reports with filenames that are identical except for a minute difference in the timestamp. I've attached screenshots of the output
Below is the macro code (I just put one example of the validation by file extension (there are 24 possible types!):
%LET indir = /sasdata/cdm/mktgecicm/CAMPAIGNS/ADHOC/TESTS/PRA_MACROS_UPDATE_TEST2/input; /*input files directory*/
%LET bkdir = /sasdata/cdm/mktgecicm/CAMPAIGNS/ADHOC/TESTS/PRA_MACROS_UPDATE_TEST2/backup; /*backup dir post file processing*/
%LET bkdir_err = /sasdata/cdm/mktgecicm/CAMPAIGNS/ADHOC/TESTS/PRA_MACROS_UPDATE_TEST2/bkup_err/; /*backup files with wrong*/
%LET bkdir_zero = /sasdata/cdm/mktgecicm/CAMPAIGNS/ADHOC/TESTS/PRA_MACROS_UPDATE_TEST2/bkup_err; /*backup empty files*/
%let outaud = /sasdata/cdm/mktgecicm/CAMPAIGNS/ADHOC/TESTS/PRA_MACROS_UPDATE_TEST2/pra_out/audit;
filename indata pipe "ls &indir";
data filein;
infile indata truncover; /* infile statement for file names */
input fname :$100.; /* read the file names from the directory */
call symput ('num_files',strip(_n_)); /* store the record number in a macro variable */
run;
data filein2;
set filein;
length fname $100 file_type $3 file_ext $10 ci_pos 3 camp_id cid 6;
file_ext = scan(strip(fname), -1);
ci_pos = find(compress(upcase(fname)),'CI');
camp_id = substr(compress(fname),ci_pos +2,6);
* cid = input(camp_id, 6.);
userid_start = find(compress(fname),'_',20);
userid_end = find(compress(fname),'.');
ecma_uid = substr(compress(fname),userid_start+1, userid_end - userid_start -1);
/* error validation fields */
chkname = index(lowcase(fname),'ci00');
period = index(fname,'.');
fname2 = substr(fname,1,period-1);
chkchar = count(fname2,'_');
if chkchar > 2 then do;
cid = input(camp_id,6.);
errmsg = "Too many words in filename";
end;
else if chkname = 0 then do;
cid = input(compress('99'||camp_id),6.);
errmsg = "Missing leading zeroes in filename CI";
end;
else do;
cid = input(camp_id,6.);
errmsg = '';
end;
if file_ext = 'outcome' then file_type = 'oc';
else if file_ext = 'vetting' then file_type = 'vet';
else if file_ext = 'control' then file_type = 'ctl';
else if file_ext = 'cancel1' then file_type = 'c1';
else if file_ext = 'cancel2' then file_type = 'c2';
else if file_ext = 'cancel3' then file_type = 'c3';
else if file_ext in ('pending' ,'pending0' ,'pending1' ,'pending2' ,'pending3','pending4' ,
'pending5' ,'pending6' ,'pending7' ,'pending8' ,'pending9','pending10',
'pending11','pending12','pending13','pending14','pending15') then file_type = 'pd'; /* AdlC - add template 15 */
else file_type = 'inv';
run;
/* Create a macro to iterate over the filenames, read them in, and process the file */
%macro doit;
%do filecounter=1 %to &num_files;
/*========================================================
START - VALIDATE FILE NAME AdlC 31Mar2021
*/
proc sql;
select * from filein2 where errmsg is not null;
quit;
%if &sqlobs > 0 %then %do;
%let dte = %sysfunc (today(), yymmdd6.);
%let tme = %sysfunc (time(), tod8.);
data _null_;
set filein2;
filename1 = "CMAUI_audit_output_" || "&dte" || ':' || "&tme" || '_filein2_' || strip(camp_id) || '_' || strip(ecma_uid) || ".pdf" ;
call symput ('afilnme1', filename1);
run;
filename outaudt1 "&outaud./&afilnme1";
ods listing close;
ods pdf file= outaudt1;
proc print data=filein2 (obs=1);
var fname errmsg;
title "check file name for mistakes";
run;
ods pdf close;
run;
%end;
/*
END - VALIDATE FILE NAME AdlC 31Mar2021
*========================================================*/
proc sql noprint;
select fname, file_ext, file_type, cid, camp_id, ecma_uid into :filename, :fileext, :filetype, :campid, :camp_id, :ecma_uid
from filein2 (firstobs = &filecounter obs = &filecounter);
quit;
filename fm "&indir/&filename";
%if %sysfunc(compress("&fileext")) = "pending" %then %do; /*pd, oc, vet file*/
data ds;
infile fm delimiter='|' dsd truncover lrecl=1500;
attrib
campaign_id length= 4 format= 6. label= 'Campaign ID'
channel_type_id length= $8 format= $8. label= 'Channel Type ID'
connection_number length= 6 format= 10. label= 'Connection Number'
portfolio_number length= 4 format= 8. label= 'Portfolio Number'
cell_number length= $6 format= $6. label= 'Campaign Cell Number'
treatment_id length= 4 label= 'Treatment Id'
topic_id length= 4 label= 'Topic ID'
creation_datetime length= 8 format= datetime20. label= 'PRA Creation Date/Time'
effective_date length= 8 format= date9. label= 'PRA effective date'
expiry_date length= 8 format= date9. label= 'PRA expiry date'
expected_benefit length= 3 format= 7.2 label= 'Expected Benefit'
acct_transit_number length= 4 format= Z5. label= 'Account Transit Number'
account_number length= $20 format= $20. label= 'Account Number'
related_treatment_id length= 4 label= 'Related Treatment Id'
ecma_instance_id length= 6 format= z9. label= 'ECMA INSTANCE ID'
;
input var1 : $11.
CONNECTION_NUMBER : 10.
PORTFOLIO_NUMBER : 8.
CAMPAIGN_ID : 4.
TOPIC_ID : 5.
TREATMENT_ID : 5.
CELL_NUMBER : $char6.
CHANNEL_TYPE_ID : 3.
EFFECTIVE_DATE : date9.
EXPIRY_DATE : date9.
CREATION_DATETIME : datetime20.
EXPECTED_BENEFIT : 7.2
ACCOUNT_NUMBER : $char20.
ACCT_TRANSIT_NUMBER : 5.
RELATED_TREATMENT_ID : 5.
ECMA_INSTANCE_ID : 9.
;
run;
data final tr;
set ds;
if compress(var1) = 'TRAILER' then output tr;
else output final;
run;
data final(drop=var1);
set final;
party_number = input(var1, 11.);
if party_number = 0 then delete;
if portfolio_number < 1 or connection_number < 1 then delete;
if portfolio_number > 99999 then portfolio_number = 99999;
run;
data IVR REST;
set final;
if channel_type_id in (3,17) then output IVR;
else output REST;
run;
proc sort data=IVR;
by party_number channel_type_id;
run;
data IVR;
set IVR;
by party_number;
if first.party_number;
run;
data final;
set rest ivr;
run;
proc sort data=final;
by party_number;
run;
%LET FILECNT = 0;
%LET DSID = %SYSFUNC(OPEN(final));
%LET FILECNT = %SYSFUNC(ATTRN(&DSID,NOBS));
%LET DSID = %SYSFUNC(CLOSE(&DSID));
%if &FILECNT = 0 %then %do;
/*Move files with 0 or 1 record to BKUP0*/
data _null_;
x "mv /&indir/&filename/&bkdir_zero";
run;
%end;
%else %do;
%test_praload(indata =final,campaign_id=&campid,dest=&filetype,usr=&ecma_uid);
%end;
%end;
…more
%else %do;
%put %sysfunc(compress("&fileext")) 'INVALID file';
data _null_;
x "mv /&indir/&filename/&bkdir_zero";
run;
%end;
%end;
%mend;
%doit;
We have two production macros that our load files go through -- PRAVALIDATE and PRALOAD.
The way it works, sometimes more than 1 file will be in the FARM landing zone that the file is pushed to from our Unica environment. The file naming convention is “datetime_ci00cmpid_userid.fileext” (i.e. 210329:05:52:35_ci006638_testtmp15.pending15) The PRAVALIDATE reads in all the filenames in the landing zone and runs macro code to properly parse the filename and create a load file in SAS format that then runs through the PRALOADmacro. However, it only validates that the fields in the ECMA file are the correct ones (and in the correct order) for the praload file, depending on the file extension type. It puts the parsed portion of the fields into the PRALOAD macro call line (i.e. %praload(indata =final,campaign_id=&campid,dest=&filetype,usr=&ecma_uid); The PRALOAD macro is the one that validates the actual data.
We wanted to add extra validation in PRAVALIDATE to catch the following errors in the actual file names:
1. missing the two 00's in the campaign_id OR
2. had too many "words" (i.e. more than two underscores before file extension)
Currently, no error reports are produced if the load fails due to the above errors. (Also, this being a production job, we don’t get any SAS logs.) If the file has the 00 error, there is a condition in PRALOAD that will cause the program to fail. However, the extra word error is still allowing the file to pass the existing validations in PRALOAD. The file won't get loaded, but there's no error report produced to tell us WHY it wasn’t loaded. We are not only trying to fix that but also produce a report that tells you what's wrong with the filename.
The new code I created in the PRAVALIDATEmacro was/is technically good. It identifies the error and spits out the error report. The following issues only seem to occur if there is more than 1 file in the landing zone:
1. I haven’t figured out how to get the code to abend for the bad file after spitting out the report without also stopping the processing of any other files that may be in line after it.
2. if there is an instance that more than 1 bad file being read in at once, the values going into the output file name of the 2nd bad file overwrites the values of the first but the error message itself from the 1st file is retained in the report. Depending on how quickly SAS processes the files, I may end up with only 1 report or two reports with filenames that are identical except for a minute difference in the timestamp. I've attached screenshots of the output
Below is the macro code (I just put one example of the validation by file extension (there are 24 possible types!):
%LET indir = /sasdata/cdm/mktgecicm/CAMPAIGNS/ADHOC/TESTS/PRA_MACROS_UPDATE_TEST2/input; /*input files directory*/
%LET bkdir = /sasdata/cdm/mktgecicm/CAMPAIGNS/ADHOC/TESTS/PRA_MACROS_UPDATE_TEST2/backup; /*backup dir post file processing*/
%LET bkdir_err = /sasdata/cdm/mktgecicm/CAMPAIGNS/ADHOC/TESTS/PRA_MACROS_UPDATE_TEST2/bkup_err/; /*backup files with wrong*/
%LET bkdir_zero = /sasdata/cdm/mktgecicm/CAMPAIGNS/ADHOC/TESTS/PRA_MACROS_UPDATE_TEST2/bkup_err; /*backup empty files*/
%let outaud = /sasdata/cdm/mktgecicm/CAMPAIGNS/ADHOC/TESTS/PRA_MACROS_UPDATE_TEST2/pra_out/audit;
filename indata pipe "ls &indir";
data filein;
infile indata truncover; /* infile statement for file names */
input fname :$100.; /* read the file names from the directory */
call symput ('num_files',strip(_n_)); /* store the record number in a macro variable */
run;
data filein2;
set filein;
length fname $100 file_type $3 file_ext $10 ci_pos 3 camp_id cid 6;
file_ext = scan(strip(fname), -1);
ci_pos = find(compress(upcase(fname)),'CI');
camp_id = substr(compress(fname),ci_pos +2,6);
* cid = input(camp_id, 6.);
userid_start = find(compress(fname),'_',20);
userid_end = find(compress(fname),'.');
ecma_uid = substr(compress(fname),userid_start+1, userid_end - userid_start -1);
/* error validation fields */
chkname = index(lowcase(fname),'ci00');
period = index(fname,'.');
fname2 = substr(fname,1,period-1);
chkchar = count(fname2,'_');
if chkchar > 2 then do;
cid = input(camp_id,6.);
errmsg = "Too many words in filename";
end;
else if chkname = 0 then do;
cid = input(compress('99'||camp_id),6.);
errmsg = "Missing leading zeroes in filename CI";
end;
else do;
cid = input(camp_id,6.);
errmsg = '';
end;
if file_ext = 'outcome' then file_type = 'oc';
else if file_ext = 'vetting' then file_type = 'vet';
else if file_ext = 'control' then file_type = 'ctl';
else if file_ext = 'cancel1' then file_type = 'c1';
else if file_ext = 'cancel2' then file_type = 'c2';
else if file_ext = 'cancel3' then file_type = 'c3';
else if file_ext in ('pending' ,'pending0' ,'pending1' ,'pending2' ,'pending3','pending4' ,
'pending5' ,'pending6' ,'pending7' ,'pending8' ,'pending9','pending10',
'pending11','pending12','pending13','pending14','pending15') then file_type = 'pd'; /* AdlC - add template 15 */
else file_type = 'inv';
run;
/* Create a macro to iterate over the filenames, read them in, and process the file */
%macro doit;
%do filecounter=1 %to &num_files;
/*========================================================
START - VALIDATE FILE NAME AdlC 31Mar2021
*/
proc sql;
select * from filein2 where errmsg is not null;
quit;
%if &sqlobs > 0 %then %do;
%let dte = %sysfunc (today(), yymmdd6.);
%let tme = %sysfunc (time(), tod8.);
data _null_;
set filein2;
filename1 = "CMAUI_audit_output_" || "&dte" || ':' || "&tme" || '_filein2_' || strip(camp_id) || '_' || strip(ecma_uid) || ".pdf" ;
call symput ('afilnme1', filename1);
run;
filename outaudt1 "&outaud./&afilnme1";
ods listing close;
ods pdf file= outaudt1;
proc print data=filein2 (obs=1);
var fname errmsg;
title "check file name for mistakes";
run;
ods pdf close;
run;
%end;
/*
END - VALIDATE FILE NAME AdlC 31Mar2021
*========================================================*/
proc sql noprint;
select fname, file_ext, file_type, cid, camp_id, ecma_uid into :filename, :fileext, :filetype, :campid, :camp_id, :ecma_uid
from filein2 (firstobs = &filecounter obs = &filecounter);
quit;
filename fm "&indir/&filename";
%if %sysfunc(compress("&fileext")) = "pending" %then %do; /*pd, oc, vet file*/
data ds;
infile fm delimiter='|' dsd truncover lrecl=1500;
attrib
campaign_id length= 4 format= 6. label= 'Campaign ID'
channel_type_id length= $8 format= $8. label= 'Channel Type ID'
connection_number length= 6 format= 10. label= 'Connection Number'
portfolio_number length= 4 format= 8. label= 'Portfolio Number'
cell_number length= $6 format= $6. label= 'Campaign Cell Number'
treatment_id length= 4 label= 'Treatment Id'
topic_id length= 4 label= 'Topic ID'
creation_datetime length= 8 format= datetime20. label= 'PRA Creation Date/Time'
effective_date length= 8 format= date9. label= 'PRA effective date'
expiry_date length= 8 format= date9. label= 'PRA expiry date'
expected_benefit length= 3 format= 7.2 label= 'Expected Benefit'
acct_transit_number length= 4 format= Z5. label= 'Account Transit Number'
account_number length= $20 format= $20. label= 'Account Number'
related_treatment_id length= 4 label= 'Related Treatment Id'
ecma_instance_id length= 6 format= z9. label= 'ECMA INSTANCE ID'
;
input var1 : $11.
CONNECTION_NUMBER : 10.
PORTFOLIO_NUMBER : 8.
CAMPAIGN_ID : 4.
TOPIC_ID : 5.
TREATMENT_ID : 5.
CELL_NUMBER : $char6.
CHANNEL_TYPE_ID : 3.
EFFECTIVE_DATE : date9.
EXPIRY_DATE : date9.
CREATION_DATETIME : datetime20.
EXPECTED_BENEFIT : 7.2
ACCOUNT_NUMBER : $char20.
ACCT_TRANSIT_NUMBER : 5.
RELATED_TREATMENT_ID : 5.
ECMA_INSTANCE_ID : 9.
;
run;
data final tr;
set ds;
if compress(var1) = 'TRAILER' then output tr;
else output final;
run;
data final(drop=var1);
set final;
party_number = input(var1, 11.);
if party_number = 0 then delete;
if portfolio_number < 1 or connection_number < 1 then delete;
if portfolio_number > 99999 then portfolio_number = 99999;
run;
data IVR REST;
set final;
if channel_type_id in (3,17) then output IVR;
else output REST;
run;
proc sort data=IVR;
by party_number channel_type_id;
run;
data IVR;
set IVR;
by party_number;
if first.party_number;
run;
data final;
set rest ivr;
run;
proc sort data=final;
by party_number;
run;
%LET FILECNT = 0;
%LET DSID = %SYSFUNC(OPEN(final));
%LET FILECNT = %SYSFUNC(ATTRN(&DSID,NOBS));
%LET DSID = %SYSFUNC(CLOSE(&DSID));
%if &FILECNT = 0 %then %do;
/*Move files with 0 or 1 record to BKUP0*/
data _null_;
x "mv /&indir/&filename/&bkdir_zero";
run;
%end;
%else %do;
%test_praload(indata =final,campaign_id=&campid,dest=&filetype,usr=&ecma_uid);
%end;
%end;
…more
%else %do;
%put %sysfunc(compress("&fileext")) 'INVALID file';
data _null_;
x "mv /&indir/&filename/&bkdir_zero";
run;
%end;
%end;
%mend;
%doit;