• SAS 数据集自动检验


    OPTION  NOCENTER LS=MAX PS=MAX OBS=MAX MACROGEN;
    
    LIBNAME A '.';
    
    OPTIONS FMTSEARCH=(A);
    
        
    /*附录宏*/
    %MACRO M_APPENDIX_FORMAT(N1);
        
        %IF %INDEX(&LVARNAME,&N1.) %THEN %DO;
            %DO K=1 %TO &LA_LEN.;                                
                %LET AN=%SCAN(&LVARNAME.,&K.);
                %LET AC=%SCAN(&LA.,&K.);
                %IF &AN.=&N1. %THEN %DO;
                    FORMAT &N1. $&AC._FMT.;
                    
                %END;
            %END;
        %END;
        %ELSE %DO;
            FORMAT &N1. $3.;
        %END;
                
    %MEND;
    
    
    %LET NL=%SYSFUNC(COUNTW(&NLIST.));/*变量个数*/
    %LET LVARNAME=IDENTITEY_TYPE BUSINESS_ORG_TYPE BUSINESS_TYPE PAYBACK_PERSON_TYPE CLASS5_STATE CURRENCY PAYMENT_STAUTS;/*附录变量*/
    %LET LA=A1 A18 A20 A44 A58 A32 A33;/*变量对应附录*/
    %LET LA_LEN=%SYSFUNC(COUNTW(&LVARNAME.));
    
    %M_APPENDIX_FORMAT(IDENTITEY_TYPE);
    
    LIBNAME XLS EXCEL  '36.xls';
    
    %LET OUTPUTDATA=PCR_BASEINFO;
    
    %MACRO CHECK;
    
    	/*1、检查表结构-保留字段:变量序号、变量名、变量标签、变量类型、变量长度*/
    	PROC CONTENTS DATA=A.&OUTPUTDATA OUT=temp_t1(KEEP=VARNUM NAME LABEL TYPE LENGTH) NOPRINT VARNUM; 
    	RUN;
    	
    	PROC SORT DATA=temp_t1;
    		BY VARNUM;
    	RUN;
    	
    	DATA XLS.CONTENTS;
    		SET temp_t1;
    	RUN;
    	
    	/*2、打印前100条记录查看*/
    	DATA XLS.TOP100;
    		SET A.&OUTPUTDATA(OBS=100);
    	RUN;
    	
    	/*3、唯一性检查和关联关系检查*/
    
    	PROC SORT DATA=A.&OUTPUTDATA(KEEP=REPORT_NO IDENTITEY_TYPE BUSINESS_ORG_TYPE BUSINESS_ORG BUSINESS_TYPE) OUT=NODUP DUPOUT=XLS.REPORT_NO_DUP NODUPKEY;
    		BY REPORT_NO IDENTITEY_TYPE BUSINESS_ORG_TYPE BUSINESS_ORG BUSINESS_TYPE;
    	RUN;
    	
    	DATA XLS.DATE_XC;
    		SET A.&OUTPUTDATA;
    		MON=INTCK("MONTH",OPEN_DATE,PUTOUT_DATE);
    		IF MON<0 AND MON^=.;
    	RUN;
    			
    	/*4、查看数据集频数分布*/
    	
    	PROC SQL NOPRINT;
    		SELECT NAME INTO:NLIST SEPARATED BY ' ' FROM temp_t1;/*变量列表*/
    		SELECT TYPE INTO:TLIST SEPARATED BY ' ' FROM temp_t1;/*变量类型*/
    	QUIT;
    	
    	%LET NL=%SYSFUNC(COUNTW(&NLIST.));/*变量个数*/
    	%LET LVARNAME=IDENTITEY_TYPE BUSINESS_ORG_TYPE BUSINESS_TYPE PAYBACK_PERSON_TYPE CLASS5_STATE CURRENCY PAYMENT_STAUTS;/*附录变量*/
    	%LET LA=A1 A18 A20 A44 A58 A32 A33;/*变量对应附录*/
    	%LET LA_LEN=%SYSFUNC(COUNTW(&LVARNAME.));
    
    	
    	/*计算字符型变量长度*/
    	DATA temp_t2;
    		SET A.&OUTPUTDATA(DROP=);
    		
    		%DO I=1 %TO &NL.;
    			%LET N1=%SCAN(&NLIST.,&I.);
    			%LET T1=%SCAN(&TLIST.,&I.);
    			%IF &T1=2 %THEN %DO;
    				&N1._len=LENGTH(&N1.);
    			%END;
    		%END;
    		
    	RUN;
    	
    	
    	%DO I=1 %TO &NL.;
    		%LET N1=%SCAN(&NLIST.,&I.);
    		%LET T1=%SCAN(&TLIST.,&I.);
    		%IF &T1=1 %THEN %DO;/*数值型-检查统计量*/
    			
    			PROC SUMMARY DATA=A.&OUTPUTDATA N NMISS MAX MIN ;
    				VAR &N1;
    				OUTPUT OUT=XLS.&N1.;
    			RUN;
    			
     			%END;
    			
    		%ELSE %DO;/*字符型-检查变量值前两位和变量值长度*/
    			
    			PROC FREQ DATA=temp_t2;
    				TABLES &N1./MISSING OUT=XLS.&N1.;
    				%M_APPENDIX_FORMAT(&N1.);
    			RUN;
    			
    			PROC FREQ DATA=temp_t2 NOPRINT;
    				TABLES &N1._len/MISSING OUT=XLS.&N1._len;
    			RUN;
    			
    		%END;
    	%END;
    	
    	
    %MEND;
    
    %CHECK;
    

      

  • 相关阅读:
    第五周作业
    第四周作业
    第三周作业
    第二周作业
    Linux常用命令使用格式及实例
    总结linux哲学思想
    配置环境变量,实现执行history的时候可以看到执行命令的时间
    安装Centos7.6操作系统后,创建一个自己名字的用户名,并可以正常登陆
    各系列的Linux发行版及不同发行版之间的联系与区别
    python2使用Tkinter制作GUI内嵌matplotlib时,找不到NavigationToolbar2Tk的问题
  • 原文地址:https://www.cnblogs.com/wdkshy/p/12455635.html
Copyright © 2020-2023  润新知