• hive UDAF源代码分析


    sss

    /**
     * Licensed to the Apache Software Foundation (ASF) under one
     * or more contributor license agreements.  See the NOTICE file
     * distributed with this work for additional information
     * regarding copyright ownership.  The ASF licenses this file
     * to you under the Apache License, Version 2.0 (the
     * "License"); you may not use this file except in compliance
     * with the License.  You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */
    package org.apache.hadoop.hive.ql.udf.generic;
    
    import java.util.HashSet;
    
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    import org.apache.hadoop.hive.common.type.HiveDecimal;
    import org.apache.hadoop.hive.ql.exec.Description;
    import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
    import org.apache.hadoop.hive.ql.metadata.HiveException;
    import org.apache.hadoop.hive.ql.parse.SemanticException;
    import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
    import org.apache.hadoop.hive.ql.util.JavaDataModel;
    import org.apache.hadoop.hive.serde2.io.DoubleWritable;
    import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
    import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
    import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
    import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
    import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorObject;
    import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
    import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
    import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
    import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
    import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
    import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
    import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Writable;
    import org.apache.hadoop.util.StringUtils;
    
    /**
     * GenericUDAFSum.
     *
     */
    @Description(name = "sum", value = "_FUNC_(x) - Returns the sum of a set of numbers")
    public class GenericUDAFSum extends AbstractGenericUDAFResolver {
    
      static final Logger LOG = LoggerFactory.getLogger(GenericUDAFSum.class.getName());
    
      @Override
      public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
          throws SemanticException {
        if (parameters.length != 1) {
          throw new UDFArgumentTypeException(parameters.length - 1,
              "Exactly one argument is expected.");
        }
    
        if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
          throw new UDFArgumentTypeException(0,
              "Only primitive type arguments are accepted but "
                  + parameters[0].getTypeName() + " is passed.");
        }
        switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
          return new GenericUDAFSumLong();
        case TIMESTAMP:
        case FLOAT:
        case DOUBLE:
        case STRING:
        case VARCHAR:
        case CHAR:
          return new GenericUDAFSumDouble();
        case DECIMAL:
          return new GenericUDAFSumHiveDecimal();
        case BOOLEAN:
        case DATE:
        default:
          throw new UDFArgumentTypeException(0,
              "Only numeric or string type arguments are accepted but "
                  + parameters[0].getTypeName() + " is passed.");
        }
      }
    
      @Override
      public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info)
          throws SemanticException {
        TypeInfo[] parameters = info.getParameters();
    
        GenericUDAFSumEvaluator eval = (GenericUDAFSumEvaluator) getEvaluator(parameters);
        eval.setWindowing(info.isWindowing());
        eval.setSumDistinct(info.isDistinct());
    
        return eval;
      }
    
      public static PrimitiveObjectInspector.PrimitiveCategory getReturnType(TypeInfo type) {
        if (type.getCategory() != ObjectInspector.Category.PRIMITIVE) {
          return null;
        }
        switch (((PrimitiveTypeInfo) type).getPrimitiveCategory()) {
          case BYTE:
          case SHORT:
          case INT:
          case LONG:
            return PrimitiveObjectInspector.PrimitiveCategory.LONG;
          case TIMESTAMP:
          case FLOAT:
          case DOUBLE:
          case STRING:
          case VARCHAR:
          case CHAR:
            return PrimitiveObjectInspector.PrimitiveCategory.DOUBLE;
          case DECIMAL:
            return PrimitiveObjectInspector.PrimitiveCategory.DECIMAL;
        }
        return null;
      }
    
      /**
       * The base type for sum operator evaluator
       *
       */
      public static abstract class GenericUDAFSumEvaluator<ResultType extends Writable> extends GenericUDAFEvaluator {
        static abstract class SumAgg<T> extends AbstractAggregationBuffer {
          boolean empty;
          T sum;
          HashSet<ObjectInspectorObject> uniqueObjects; // Unique rows.
        }
    
        protected PrimitiveObjectInspector inputOI;
        protected PrimitiveObjectInspector outputOI;
        protected ResultType result;
        protected boolean isWindowing;
        protected boolean sumDistinct;
    
        public void setWindowing(boolean isWindowing) {
          this.isWindowing = isWindowing;
        }
    
        public void setSumDistinct(boolean sumDistinct) {
          this.sumDistinct = sumDistinct;
        }
    
        protected boolean isWindowingDistinct() {
          return isWindowing && sumDistinct;
        }
    
        @Override
        public Object terminatePartial(AggregationBuffer agg) throws HiveException {
          if (isWindowingDistinct()) {
            throw new HiveException("Distinct windowing UDAF doesn't support merge and terminatePartial");
          } else {
            return terminate(agg);
          }
        }
    
        /**
         * Check if the input object is eligible to contribute to the sum. If it's null
         * or the same value as the previous one for the case of SUM(DISTINCT). Then
         * skip it.
         * @param input the input object
         * @return True if sumDistinct is false or the non-null input is different from the previous object
         */
        protected boolean isEligibleValue(SumAgg agg, Object input) {
          if (input == null) {
            return false;
          }
    
          if (isWindowingDistinct()) {
            HashSet<ObjectInspectorObject> uniqueObjs = agg.uniqueObjects;
            ObjectInspectorObject obj = input instanceof ObjectInspectorObject ?
                (ObjectInspectorObject)input :
                new ObjectInspectorObject(
                ObjectInspectorUtils.copyToStandardObject(input, inputOI, ObjectInspectorCopyOption.JAVA),
                outputOI);
            if (!uniqueObjs.contains(obj)) {
              uniqueObjs.add(obj);
              return true;
            }
    
            return false;
          }
    
          return true;
        }
      }
    
      /**
       * GenericUDAFSumHiveDecimal.
       *
       */
      public static class GenericUDAFSumHiveDecimal extends GenericUDAFSumEvaluator<HiveDecimalWritable> {
    
        @Override
        public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
          assert (parameters.length == 1);
          super.init(m, parameters);
          result = new HiveDecimalWritable(0);
          inputOI = (PrimitiveObjectInspector) parameters[0];
          // The output precision is 10 greater than the input which should cover at least
          // 10b rows. The scale is the same as the input.
          DecimalTypeInfo outputTypeInfo = null;
          if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
            int precision = Math.min(HiveDecimal.MAX_PRECISION, inputOI.precision() + 10);
            outputTypeInfo = TypeInfoFactory.getDecimalTypeInfo(precision, inputOI.scale());
          } else {
            outputTypeInfo = (DecimalTypeInfo) inputOI.getTypeInfo();
          }
          ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(outputTypeInfo);
          outputOI = (PrimitiveObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(
              oi, ObjectInspectorCopyOption.JAVA);
    
          return oi;
        }
    
        /** class for storing decimal sum value. */
        @AggregationType(estimable = false) // hard to know exactly for decimals
        static class SumHiveDecimalWritableAgg extends SumAgg<HiveDecimalWritable> {
        }
    
        @Override
        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
          SumHiveDecimalWritableAgg agg = new SumHiveDecimalWritableAgg();
          reset(agg);
          return agg;
        }
    
        @Override
        public void reset(AggregationBuffer agg) throws HiveException {
          SumAgg<HiveDecimalWritable> bdAgg = (SumAgg<HiveDecimalWritable>) agg;
          bdAgg.empty = true;
          bdAgg.sum = new HiveDecimalWritable(0);
          bdAgg.uniqueObjects = new HashSet<ObjectInspectorObject>();
        }
    
        boolean warned = false;
    
        @Override
        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
          assert (parameters.length == 1);
          try {
            if (isEligibleValue((SumHiveDecimalWritableAgg) agg, parameters[0])) {
              ((SumHiveDecimalWritableAgg)agg).empty = false;
              ((SumHiveDecimalWritableAgg)agg).sum.mutateAdd(
                  PrimitiveObjectInspectorUtils.getHiveDecimal(parameters[0], inputOI));
            }
          } catch (NumberFormatException e) {
            if (!warned) {
              warned = true;
              LOG.warn(getClass().getSimpleName() + " "
                  + StringUtils.stringifyException(e));
              LOG
              .warn(getClass().getSimpleName()
                  + " ignoring similar exceptions.");
            }
          }
        }
    
        @Override
        public void merge(AggregationBuffer agg, Object partial) throws HiveException {
          if (partial != null) {
            SumHiveDecimalWritableAgg myagg = (SumHiveDecimalWritableAgg) agg;
            if (myagg.sum == null || !myagg.sum.isSet()) {
              return;
            }
    
            myagg.empty = false;
            if (isWindowingDistinct()) {
              throw new HiveException("Distinct windowing UDAF doesn't support merge and terminatePartial");
            } else {
              myagg.sum.mutateAdd(PrimitiveObjectInspectorUtils.getHiveDecimal(partial, inputOI));
            }
          }
        }
    
        @Override
        public Object terminate(AggregationBuffer agg) throws HiveException {
          SumHiveDecimalWritableAgg myagg = (SumHiveDecimalWritableAgg) agg;
          if (myagg.empty || myagg.sum == null || !myagg.sum.isSet()) {
            return null;
          }
          DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo)outputOI.getTypeInfo();
          myagg.sum.mutateEnforcePrecisionScale(decimalTypeInfo.getPrecision(), decimalTypeInfo.getScale());
          if (!myagg.sum.isSet()) {
            LOG.warn("The sum of a column with data type HiveDecimal is out of range");
            return null;
          }
    
          result.set(myagg.sum);
          return result;
        }
    
        @Override
        public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrameDef) {
          // Don't use streaming for distinct cases
          if (sumDistinct) {
            return null;
          }
    
          return new GenericUDAFStreamingEvaluator.SumAvgEnhancer<HiveDecimalWritable, HiveDecimal>(
              this, wFrameDef) {
    
            @Override
            protected HiveDecimalWritable getNextResult(
                org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStreamingEvaluator.SumAvgEnhancer<HiveDecimalWritable, HiveDecimal>.SumAvgStreamingState ss)
                throws HiveException {
              SumHiveDecimalWritableAgg myagg = (SumHiveDecimalWritableAgg) ss.wrappedBuf;
              HiveDecimal r = myagg.empty ? null : myagg.sum.getHiveDecimal();
              HiveDecimal d = ss.retrieveNextIntermediateValue();
              if (d != null ) {
                r = r == null ? null : r.subtract(d);
              }
    
              return r == null ? null : new HiveDecimalWritable(r);
            }
    
            @Override
            protected HiveDecimal getCurrentIntermediateResult(
                org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStreamingEvaluator.SumAvgEnhancer<HiveDecimalWritable, HiveDecimal>.SumAvgStreamingState ss)
                throws HiveException {
              SumHiveDecimalWritableAgg myagg = (SumHiveDecimalWritableAgg) ss.wrappedBuf;
              return myagg.empty ? null : myagg.sum.getHiveDecimal();
            }
    
          };
        }
      }
    
      /**
       * GenericUDAFSumDouble.
       *
       */
      public static class GenericUDAFSumDouble extends GenericUDAFSumEvaluator<DoubleWritable> {
        @Override
        public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
          assert (parameters.length == 1);
          super.init(m, parameters);
          result = new DoubleWritable(0);
          inputOI = (PrimitiveObjectInspector) parameters[0];
          outputOI = (PrimitiveObjectInspector)ObjectInspectorUtils.getStandardObjectInspector(inputOI,
              ObjectInspectorCopyOption.JAVA);
          return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
        }
    
        /** class for storing double sum value. */
        @AggregationType(estimable = true)
        static class SumDoubleAgg extends SumAgg<Double> {
          @Override
          public int estimate() { return JavaDataModel.PRIMITIVES1 + JavaDataModel.PRIMITIVES2; }
        }
    
        @Override
        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
          SumDoubleAgg result = new SumDoubleAgg();
          reset(result);
          return result;
        }
    
        @Override
        public void reset(AggregationBuffer agg) throws HiveException {
          SumDoubleAgg myagg = (SumDoubleAgg) agg;
          myagg.empty = true;
          myagg.sum = 0.0;
          myagg.uniqueObjects = new HashSet<ObjectInspectorObject>();
        }
    
        boolean warned = false;
    
        @Override
        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
          assert (parameters.length == 1);
          try {
            if (isEligibleValue((SumDoubleAgg) agg, parameters[0])) {
              ((SumDoubleAgg)agg).empty = false;
              ((SumDoubleAgg)agg).sum += PrimitiveObjectInspectorUtils.getDouble(parameters[0], inputOI);
            }
          } catch (NumberFormatException e) {
            if (!warned) {
              warned = true;
              LOG.warn(getClass().getSimpleName() + " "
                  + StringUtils.stringifyException(e));
              LOG
              .warn(getClass().getSimpleName()
                  + " ignoring similar exceptions.");
            }
          }
        }
    
        @Override
        public void merge(AggregationBuffer agg, Object partial) throws HiveException {
          if (partial != null) {
            SumDoubleAgg myagg = (SumDoubleAgg) agg;
            myagg.empty = false;
            if (isWindowingDistinct()) {
              throw new HiveException("Distinct windowing UDAF doesn't support merge and terminatePartial");
            } else {
              myagg.sum += PrimitiveObjectInspectorUtils.getDouble(partial, inputOI);
            }
          }
        }
    
        @Override
        public Object terminate(AggregationBuffer agg) throws HiveException {
          SumDoubleAgg myagg = (SumDoubleAgg) agg;
          if (myagg.empty) {
            return null;
          }
          result.set(myagg.sum);
          return result;
        }
    
        @Override
        public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrameDef) {
          // Don't use streaming for distinct cases
          if (sumDistinct) {
            return null;
          }
    
          return new GenericUDAFStreamingEvaluator.SumAvgEnhancer<DoubleWritable, Double>(this,
              wFrameDef) {
    
            @Override
            protected DoubleWritable getNextResult(
                org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStreamingEvaluator.SumAvgEnhancer<DoubleWritable, Double>.SumAvgStreamingState ss)
                throws HiveException {
              SumDoubleAgg myagg = (SumDoubleAgg) ss.wrappedBuf;
              Double r = myagg.empty ? null : myagg.sum;
              Double d = ss.retrieveNextIntermediateValue();
              if (d != null) {
                r = r == null ? null : r - d;
              }
    
              return r == null ? null : new DoubleWritable(r);
            }
    
            @Override
            protected Double getCurrentIntermediateResult(
                org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStreamingEvaluator.SumAvgEnhancer<DoubleWritable, Double>.SumAvgStreamingState ss)
                throws HiveException {
              SumDoubleAgg myagg = (SumDoubleAgg) ss.wrappedBuf;
              return myagg.empty ? null : new Double(myagg.sum);
            }
    
          };
        }
    
      }
    
      /**
       * GenericUDAFSumLong.
       *
       */
      public static class GenericUDAFSumLong extends GenericUDAFSumEvaluator<LongWritable> {
        @Override
        public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
          assert (parameters.length == 1);
          super.init(m, parameters);
          result = new LongWritable(0);
          inputOI = (PrimitiveObjectInspector) parameters[0];
          outputOI = (PrimitiveObjectInspector)ObjectInspectorUtils.getStandardObjectInspector(inputOI,
              ObjectInspectorCopyOption.JAVA);
          return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
        }
    
        /** class for storing double sum value. */
        @AggregationType(estimable = true)
        static class SumLongAgg extends SumAgg<Long> {
          @Override
          public int estimate() { return JavaDataModel.PRIMITIVES1 + JavaDataModel.PRIMITIVES2; }
        }
    
        @Override
        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
          SumLongAgg result = new SumLongAgg();
          reset(result);
          return result;
        }
    
        @Override
        public void reset(AggregationBuffer agg) throws HiveException {
          SumLongAgg myagg = (SumLongAgg) agg;
          myagg.empty = true;
          myagg.sum = 0L;
          myagg.uniqueObjects = new HashSet<ObjectInspectorObject>();
        }
    
        private boolean warned = false;
    
        @Override
        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
          assert (parameters.length == 1);
          try {
            if (isEligibleValue((SumLongAgg) agg, parameters[0])) {
              ((SumLongAgg)agg).empty = false;
              ((SumLongAgg)agg).sum += PrimitiveObjectInspectorUtils.getLong(parameters[0], inputOI);
            }
          } catch (NumberFormatException e) {
            if (!warned) {
              warned = true;
              LOG.warn(getClass().getSimpleName() + " "
                  + StringUtils.stringifyException(e));
            }
          }
        }
    
        @Override
        public void merge(AggregationBuffer agg, Object partial) throws HiveException {
          if (partial != null) {
            SumLongAgg myagg = (SumLongAgg) agg;
            myagg.empty = false;
            if (isWindowingDistinct()) {
              throw new HiveException("Distinct windowing UDAF doesn't support merge and terminatePartial");
            } else {
                myagg.sum += PrimitiveObjectInspectorUtils.getLong(partial, inputOI);
            }
          }
        }
    
        @Override
        public Object terminate(AggregationBuffer agg) throws HiveException {
          SumLongAgg myagg = (SumLongAgg) agg;
          if (myagg.empty) {
            return null;
          }
          result.set(myagg.sum);
          return result;
        }
    
        @Override
        public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrameDef) {
          // Don't use streaming for distinct cases
          if (isWindowingDistinct()) {
            return null;
          }
    
          return new GenericUDAFStreamingEvaluator.SumAvgEnhancer<LongWritable, Long>(this,
              wFrameDef) {
    
            @Override
            protected LongWritable getNextResult(
                org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStreamingEvaluator.SumAvgEnhancer<LongWritable, Long>.SumAvgStreamingState ss)
                throws HiveException {
              SumLongAgg myagg = (SumLongAgg) ss.wrappedBuf;
              Long r = myagg.empty ? null : myagg.sum;
              Long d = ss.retrieveNextIntermediateValue();
              if (d != null) {
                r = r == null ? null : r - d;
              }
    
              return r == null ? null : new LongWritable(r);
            }
    
            @Override
            protected Long getCurrentIntermediateResult(
                org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStreamingEvaluator.SumAvgEnhancer<LongWritable, Long>.SumAvgStreamingState ss)
                throws HiveException {
              SumLongAgg myagg = (SumLongAgg) ss.wrappedBuf;
              return myagg.empty ? null : new Long(myagg.sum);
            }
          };
        }
      }
    }
    

      ddd

    GenericUDAF 
    /**
     * Licensed to the Apache Software Foundation (ASF) under one
     * or more contributor license agreements.  See the NOTICE file
     * distributed with this work for additional information
     * regarding copyright ownership.  The ASF licenses this file
     * to you under the Apache License, Version 2.0 (the
     * "License"); you may not use this file except in compliance
     * with the License.  You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */
    
    package org.apache.hadoop.hive.ql.udf.generic;
    
    import java.io.Closeable;
    import java.io.IOException;
    import java.lang.annotation.Retention;
    import java.lang.annotation.RetentionPolicy;
    
    import org.apache.hadoop.hive.ql.exec.MapredContext;
    import org.apache.hadoop.hive.ql.metadata.HiveException;
    import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
    import org.apache.hadoop.hive.ql.udf.UDFType;
    import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    import org.apache.hive.common.util.AnnotationUtils;
    
    /**
     * A Generic User-defined aggregation function (GenericUDAF) for the use with
     * Hive.
     * 
     * New GenericUDAF classes need to inherit from this GenericUDAF class.
     * 
     * The GenericUDAF are superior to normal UDAFs in the following ways: 1. It can
     * accept arguments of complex types, and return complex types. 2. It can accept
     * variable length of arguments. 3. It can accept an infinite number of function
     * signature - for example, it's easy to write a GenericUDAF that accepts
     * array<int>, array<array<int>> and so on (arbitrary levels of nesting).
     */
    @UDFType(deterministic = true)
    public abstract class GenericUDAFEvaluator implements Closeable {
    
      @Retention(RetentionPolicy.RUNTIME)
      public static @interface AggregationType {
        boolean estimable() default false;
      }
    
      public static boolean isEstimable(AggregationBuffer buffer) {
        if (buffer instanceof AbstractAggregationBuffer) {
          Class<? extends AggregationBuffer> clazz = buffer.getClass();
          AggregationType annotation = AnnotationUtils.getAnnotation(clazz, AggregationType.class);
          return annotation != null && annotation.estimable();
        }
        return false;
      }
    
      /**
       * Mode.
       *
       */
      public static enum Mode {
        /**
         * PARTIAL1: from original data to partial aggregation data: iterate() and
         * terminatePartial() will be called.
         */
        PARTIAL1,  相当于map阶段,调用iterate()和terminatePartial() 
            /**
         * PARTIAL2: from partial aggregation data to partial aggregation data:
         * merge() and terminatePartial() will be called.
         */
        PARTIAL2,  相当于combiner阶段,调用merge()和terminatePartial() 
            /**
         * FINAL: from partial aggregation to full aggregation: merge() and
         * terminate() will be called.
         */
        FINAL,  相当于reduce阶段调用merge()和terminate() 
            /**
         * COMPLETE: from original data directly to full aggregation: iterate() and
         * terminate() will be called.
         */
        COMPLETE COMPLETE: 相当于没有reduce阶段map,调用iterate()和terminate() 
      };
    
      Mode mode;
    
      /**
       * The constructor.
       */
      public GenericUDAFEvaluator() {
      }
    
      /**
       * Additionally setup GenericUDAFEvaluator with MapredContext before initializing.
       * This is only called in runtime of MapRedTask.
       *
       * @param mapredContext context
       */
      public void configure(MapredContext mapredContext) {
      }
    
      /**
       * Initialize the evaluator.
       * 
       * @param m mode Init方式 mode在初始四个方法需要的调用或者初始化
       *          The mode of aggregation.
       * @param parameters
       *          The ObjectInspector for the parameters: In PARTIAL1 and COMPLETE  在partial1 complelte 存储是初始化数据,原理很简单。parital1是map complete 是没有map 的reduce 
       *          mode, the parameters are original data; In PARTIAL2 and FINAL
       *          mode, the parameters are just partial aggregations (in that case,剩下两个是聚合后的数据。
       *          the array will always have a single element).
       * @return The ObjectInspector for the return value. In PARTIAL1 and PARTIAL2
       *         mode, the ObjectInspector for the return value of
       *         terminatePartial() call; In FINAL and COMPLETE mode, the
       *         ObjectInspector for the return value of terminate() call.
       * 
       *         NOTE: We need ObjectInspector[] (in addition to the TypeInfo[] in
       *         GenericUDAFResolver) for 2 reasons: 1. ObjectInspector contains
       *         more information than TypeInfo; and GenericUDAFEvaluator.init at
       *         execution time. 2. We call GenericUDAFResolver.getEvaluator at
       *         compilation time,
       */
      public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
        // This function should be overriden in every sub class
        // And the sub class should call super.init(m, parameters) to get mode set.
        mode = m;
        return null;
      }
    
      /**
       * The interface for a class that is used to store the aggregation result
       * during the process of aggregation.
       * 
       * We split this piece of data out because there can be millions of instances
       * of this Aggregation in hash-based aggregation process, and it's very
       * important to conserve memory.
       * 
       * In the future, we may completely hide this class inside the Evaluator and
       * use integer numbers to identify which aggregation we are looking at.
       *
       * @deprecated use {@link AbstractAggregationBuffer} instead
       */
      public static interface AggregationBuffer {
      };
    
      public static abstract class AbstractAggregationBuffer implements AggregationBuffer {
        /**
         * Estimate the size of memory which is occupied by aggregation buffer.
         * Currently, hive assumes that primitives types occupies 16 byte and java object has
         * 64 byte overhead for each. For map, each entry also has 64 byte overhead.
         */
        public int estimate() { return -1; }
      }
    
      /**
       * Get a new aggregation object.
       */
      public abstract AggregationBuffer getNewAggregationBuffer() throws HiveException;
    
      /**
       * Reset the aggregation. This is useful if we want to reuse the same
       * aggregation.
       */
      public abstract void reset(AggregationBuffer agg) throws HiveException;
    
      /**
       * Close GenericUDFEvaluator.
       * This is only called in runtime of MapRedTask.
       */
      public void close() throws IOException {
      }
    
      /**
       * This function will be called by GroupByOperator when it sees a new input
       * row.
       * 
       * @param agg
       *          The object to store the aggregation result.
       * @param parameters
       *          The row, can be inspected by the OIs passed in init().
       */
      public void aggregate(AggregationBuffer agg, Object[] parameters) throws HiveException {
        if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
          iterate(agg, parameters);
        } else {
          assert (parameters.length == 1);
          merge(agg, parameters[0]);
        }
      }
    
      /**
       * This function will be called by GroupByOperator when it sees a new input
       * row.
       * 
       * @param agg
       *          The object to store the aggregation result.
       */
      public Object evaluate(AggregationBuffer agg) throws HiveException {
        if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
          return terminatePartial(agg);
        } else {
          return terminate(agg);
        }
      }
    
      /**
       * Iterate through original data.
       * 
       * @param parameters
       *          The objects of parameters.
       */
      public abstract void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException;
    
      /**
       * Get partial aggregation result.
       * 
       * @return partial aggregation result.
       */
      public abstract Object terminatePartial(AggregationBuffer agg) throws HiveException;
    
      /**
       * Merge with partial aggregation result. NOTE: null might be passed in case
       * there is no input data.
       * 
       * @param partial
       *          The partial aggregation result.
       */
      public abstract void merge(AggregationBuffer agg, Object partial) throws HiveException;
    
      /**
       * Get final aggregation result.
       * 
       * @return final aggregation result.
       */
      public abstract Object terminate(AggregationBuffer agg) throws HiveException;
    
      /**
       * When evaluating an aggregates over a fixed Window, the naive way to compute
       * results is to compute the aggregate for each row. But often there is a way
       * to compute results in a more efficient manner. This method enables the
       * basic evaluator to provide a function object that does the job in a more
       * efficient manner.
       * <p>
       * This method is called after this Evaluator is initialized. The returned
       * Function must be initialized. It is passed the 'window' of aggregation for
       * each row.
       * 
       * @param wFrmDef
       *          the Window definition in play for this evaluation.
       * @return null implies that this fn cannot be processed in Streaming mode. So
       *         each row is evaluated independently.
       */
      public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) {
        return null;
      }
    
    }
    

      http://paddy-w.iteye.com/blog/2081409

  • 相关阅读:
    vim编辑器
    centos7启动顺序加密的问题
    centos7进入单用户模式
    centos7修改默认运行级别的变化
    C#构建DataTable(转)
    策略模式简介
    简单工厂模式(转)
    NPOI导Excel样式设置(转)
    VS2012启用angularjs智能提示Intelligence
    WebForm页面间传值方法(转)
  • 原文地址:https://www.cnblogs.com/itxuexiwang/p/6263233.html
Copyright © 2020-2023  润新知