• 计算平均数UDAF


    package com.suning.udf;
    
    
    import org.apache.hadoop.hive.ql.exec.UDAF;
    import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
    
    public class Avg extends UDAF {
        public static class AvgState {
            private long mCount;
            private double mSum;
        }
    
        public static class AvgEvaluator implements UDAFEvaluator {
            AvgState state;
            public AvgEvaluator() {
                super();
                state = new AvgState();
                init();
            }
    
    
            /**
             * init函数类似于构造函数,用于UDAF的初始化
             */
            public void init() {
                state.mSum = 0;
                state.mCount = 0;
            }
    
            /**
             * iterate接收传入的参数,并进行内部的轮转。其返回类型为boolean * * @param o * @return
             */
    
            public boolean iterate(Double o) {
                if (o != null) {
                    state.mSum += o;
                    state.mCount++;
                }
                return true;
            }
    
            /**
             * terminatePartial无参数,其为iterate函数遍历结束后,返回轮转数据, * terminatePartial类似于hadoop的Combiner * * @return
             */
    
            public AvgState terminatePartial() {
                // combiner
                return state.mCount == 0 ? null : state;
            }
    
            /**
             * merge接收terminatePartial的返回结果,进行数据merge操作,其返回类型为boolean * * @param o * @return
             */
    
            public boolean merge(AvgState avgState) {
                if (avgState != null) {
                    state.mCount += avgState.mCount;
                    state.mSum += avgState.mSum;
                }
                return true;
            }
    
            /**
             * terminate返回最终的聚集函数结果 * * @return
             */
            public Double terminate() {
                return state.mCount == 0 ? null : Double.valueOf(state.mSum / state.mCount);
            }
        }
    }
  • 相关阅读:
    sleep(),wait(),yield(),notify()
    (三)终结任务
    (二)共享受限资源
    (一)基本的线程机制
    (十八)多线程
    (十七)泛型程序设计
    视频监控中带宽及存储容量的计算
    turtle实例
    python ——钟表
    Python Tkinter Grid布局管理器详解
  • 原文地址:https://www.cnblogs.com/yin-fei/p/10879719.html
Copyright © 2020-2023  润新知