• Hadoop_UDAF示例


    UDAF: 多进一出

    GenericUDAFEvaluator : 就是根据job的不同阶段执行不同的方法
    Hive通过GenericUDAFEvaluator.Modle来确定job的执行阶段
    PARTIAL1: 从原始数据到部分聚合,调用方法iterate和terminatePartial方法
    PARTIAL2: 从部分数据聚合到部分数据聚合,会调用merge和terminatePartial
    FINAL: 从部分数据聚合到全部数据聚合,会调用merge和terminate
    COMPLETE: 从原始数据全部聚合,会调用方法iterate和terminate
    除了上面提到的iterate,merge,terminatePartial以外,还有init(初始化并返回,返回值的类型)
    getNewAggregationBuffer(获取新的buffer,也就是方法间传递参数的对象),reset(重置buffer对象)
    需求: 实现一个自定义的sum函数,要求韩函数支持整型和浮点型的sum操作
    

    简单示例,重写SUM函数

    package com.hive.udaf;
    
    import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
    import org.apache.hadoop.hive.ql.metadata.HiveException;
    import org.apache.hadoop.hive.ql.parse.SemanticException;
    import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
    import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
    import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
    import org.apache.hadoop.hive.serde2.io.DoubleWritable;
    import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
    import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveWritableObjectInspector;
    import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
    import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
    import org.apache.hadoop.io.LongWritable;
    
    /**
     * @author liuwl
     * mysum support float & double
     */
    public class mysum extends AbstractGenericUDAFResolver{
    	
      public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) throws SemanticException {
    		 
    	// parameters is all clomuns
    	if(info.isAllColumns()){
    	  throw new SemanticException("this function is not support all parameters");
    	}
    	// only one clomun parameter
    	ObjectInspector[] inspectors = info.getParameterObjectInspectors();
    	if(inspectors.length != 1){
    	  throw new SemanticException("the parameters is only one clomun");
    	}
    	if(inspectors[0].getCategory() != ObjectInspector.Category.PRIMITIVE){
    	  throw new SemanticException("the parameters must be Basic data types");
    	}
    	// input parameter's Category
    	AbstractPrimitiveWritableObjectInspector woi = (AbstractPrimitiveWritableObjectInspector)inspectors[0];
    	switch (woi.getPrimitiveCategory()) {
    	  case INT:
    	  case LONG:
    	  case BYTE:
    	  case SHORT:
    	    return new udafLong();
    	  case FLOAT:
    	  case DOUBLE:
    	    return new udafDouble();
    	  default:
    	    throw new SemanticException("the parameter's Category is not support");
    	 }
      }
    	 
      /**
      * sum the long data
      */
      public static class udafLong extends GenericUDAFEvaluator{
    
        // define data Category
        public PrimitiveObjectInspector longInputor;
    		 
        static class sumlongagg implements AggregationBuffer{
          long sum;
          boolean empty;
        }
    		 
        @Override
        public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
    	
          super.init(m, parameters);
          if(parameters.length!=1){
            throw new UDFArgumentException("Argument Exception");
          }
          if(this.longInputor == null){
            this.longInputor=(PrimitiveObjectInspector)parameters[0];
          }
          return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
        }
    
        @Override
        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
    			
          sumlongagg slg = new sumlongagg();
          this.reset(slg);
          return slg;
        }
    
        @Override
        public void reset(AggregationBuffer agg) throws HiveException {
    			
          sumlongagg slg = (sumlongagg)agg;
          slg.sum = 0;
          slg.empty = true;
        }
    
        @Override
        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
    			
          if(parameters.length !=1 ){
            throw new UDFArgumentException("Argument Exception");
          }
          this.merge(agg, parameters[0]);
        }
    
        @Override
        public Object terminatePartial(AggregationBuffer agg) throws HiveException {
          return this.terminate(agg);
        }
    
        @Override
        public void merge(AggregationBuffer agg, Object partial) throws HiveException {
    			
          sumlongagg slg = (sumlongagg)agg;
          if(partial != null){
            slg.sum += PrimitiveObjectInspectorUtils.getLong(partial, longInputor);
            slg.empty = false;
          }
        }
    
        @Override
        public Object terminate(AggregationBuffer agg) throws HiveException {
    			
          sumlongagg slg = (sumlongagg)agg;
          if(slg.empty){
            return null;
          }
          return new LongWritable(slg.sum);
        }
    		 
      }
    	 
      /**
      * sum the double data
      */
      public static class udafDouble extends GenericUDAFEvaluator{
    		 
        // define data Category
        public PrimitiveObjectInspector doubleInputor;
    		 
          static class sumdoubleagg implements AggregationBuffer{
            double sum;
            boolean empty;
          }
        @Override
        public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
    	 		
          super.init(m, parameters);
          if(parameters.length!=1){
            throw new UDFArgumentException("Argument Exception");
          }
          if(this.doubleInputor == null){
            this.doubleInputor=(PrimitiveObjectInspector)parameters[0];
          }
          return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;			}
    
        @Override
        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
    			
          sumdoubleagg sdg = new sumdoubleagg();
            this.reset(sdg);
            return sdg;
          }
    
        @Override
        public void reset(AggregationBuffer agg) throws HiveException {
    			
          sumdoubleagg sdg = (sumdoubleagg)agg;
          sdg.sum = 0;
          sdg.empty = true;
        }
    
        @Override
        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
    			
          if(parameters.length !=1 ){
            throw new UDFArgumentException("Argument Exception");
          }
          this.merge(agg, parameters[0]);
        }
    
        @Override
        public Object terminatePartial(AggregationBuffer agg) throws HiveException {
          return this.terminate(agg);
        }
    
        @Override
        public void merge(AggregationBuffer agg, Object partial) throws HiveException {
    			
          sumdoubleagg sdg = (sumdoubleagg)agg;
          if(partial != null){
            sdg.sum += PrimitiveObjectInspectorUtils.getDouble(partial, doubleInputor);
            sdg.empty = false;
          }
        }
    
        @Override
        public Object terminate(AggregationBuffer agg) throws HiveException {
    			
          sumdoubleagg sdg = (sumdoubleagg)agg;
          if(sdg.empty){
            return null;
          }
          return new DoubleWritable(sdg.sum);
        }		 
      }
    }
    

     测试

    hive (workdb)> add jar /home/liuwl/opt/datas/mysum.jar;
    hive (workdb)> create temporary function mysum as 'com.hive.udaf.mysum';
    hive (workdb)> select sum(deptno),mysum(deptno) from emp;
    结果: _c0  _c1
         310  310
    
  • 相关阅读:
    隐马尔科夫模型(HMM)
    各大IT企业招聘所须要求技能
    Java NIO和IO的主要差别
    Css 选择器总结
    程序猿生存定律-六个程序猿的故事(3)
    JVM学习心得
    APUE读书笔记-第14章-高级I/O
    《Word排版艺术》读后感,兼谈LaTeX
    LaTeX:Figures, Tables, and Equations 插入图表和公式
    LaTeX 的对参考文献的处理
  • 原文地址:https://www.cnblogs.com/eRrsr/p/6096989.html
Copyright © 2020-2023  润新知