* read a dbf file; * pobf.sas; * data source is the dma filesat http://oig.hhs.gov/fraud/exclusions/database.html; filename file1 '/temp/po.dbf'; libname db3 '\temp\po'; options compress=yes linesize=72; proc dbf db4=file1 out=db3.po; run; options obs=max; data sel; set db3.po; length kflag 4; length lnam2 $2; length lnam1 $1; length lnam3 $3; length vendno $6; vendno = compress(vendor_n); lnam2= substr(vendno,1,2); lnam1 = substr(vendno,1,1); lnam3= substr(vendno,1,3); kflag = 0; if lnam2 = "E1" then kflag = 1; if lnam2 = "E2" then kflag = 1; if lnam2 = "E3" then kflag = 1; if lnam2 = "G2" then kflag = 1; if lnam1 >= "0" and lnam1 <= "9" then kflag = 1; if lnam3 = " E1" then kflag = 1; if lnam3 = " E2" then kflag = 1; if lnam3 = " E3" then kflag = 1; if lnam3 = " G2" then kflag = 1; if lnam2 >= " 0" and lnam2 <= " 9" then kflag = 1; if kflag = 0 then delete; run; proc contents data=sel; proc summary data=sel; var invamoun; output out=s n=n sum=tot min=min max=max; run; proc print data=s; title1 'extract statistics'; run; proc sort data=sel; by vendno; proc summary data=sel; by vendno; var invamoun; output out=s2 n=n sum=tot min=min max=max; data s2a; set s2; if tot < 1000 then delete; proc print data=s2a; title1 'summary statistics by vendor'; title2 'only vendors with more than $1,000'; run; options obs=max; data test; set sel; length ct 4; length amt $9; length digit $1; length tdigit $1; amt = put(invamoun,9.2); digit = ' '; tdigit = substr(amt,1,1); if digit = ' ' and tdigit not = ' ' and tdigit not = '0' and tdigit not = '.' then digit = tdigit; tdigit = substr(amt,2,1); if digit = ' ' and tdigit not = ' ' and tdigit not = '0' and tdigit not = '.' then digit = tdigit; tdigit = substr(amt,3,1); if digit = ' ' and tdigit not = ' ' and tdigit not = '0' and tdigit not = '.' then digit = tdigit; tdigit = substr(amt,4,1); if digit = ' ' and tdigit not = ' ' and tdigit not = '0' and tdigit not = '.' then digit = tdigit; tdigit = substr(amt,5,1); if digit = ' ' and tdigit not = ' ' and tdigit not = '0' and tdigit not = '.' then digit = tdigit; tdigit = substr(amt,6,1); if digit = ' ' and tdigit not = ' ' and tdigit not = '0' and tdigit not = '.' then digit = tdigit; tdigit = substr(amt,7,1); if digit = ' ' and tdigit not = ' ' and tdigit not = '0' and tdigit not = '.' then digit = tdigit; tdigit = substr(amt,8,1); if digit = ' ' and tdigit not = ' ' and tdigit not = '0' and tdigit not = '.' then digit = tdigit; tdigit = substr(amt,9,1); if digit = ' ' and tdigit not = ' ' and tdigit not = '0' and tdigit not = '.' then digit = tdigit; ct = 1; proc sort data=test; by vendno digit; proc summary data=test; by vendno digit; var ct; output out=s3 n=n sum=tot; proc summary data=test; by vendno; var ct; output out=s4 n=n4 sum=tot4; data both; merge s3(in=s) s4(in=t); by vendno; if s and t; if n4 < 10 then delete; data both2; set both; length expect 4; keep vendno digit n n4 expect; if digit = '1' then expect = int(log(1+(1/1))* n4); if digit = '2' then expect = int(log(1+(1/2))* n4); if digit = '3' then expect = int(log(1+(1/3))* n4); if digit = '4' then expect = int(log(1+(1/4))* n4); if digit = '5' then expect = int(log(1+(1/5))* n4); if digit = '6' then expect = int(log(1+(1/6))* n4); if digit = '7' then expect = int(log(1+(1/7))* n4); if digit = '8' then expect = int(log(1+(1/8))* n4); if digit = '9' then expect = int(log(1+(1/9))* n4); proc print data=both2; title1 'leading digit summary by vendor'; run;