• MR案例:基站相关01


    字段解释product_no:用户手机号; lac_id:用户所在基站; start_time:用户在此基站的开始时间; staytime:用户在此基站的逗留时间。

    product_no lac_id moment start_time user_id county_id staytime city_id
    13429100031 22554 8 2013-03-11 08:55:19.151754088 571 571 282 571
    13429100082 22540 8 2013-03-11 08:58:20.152622488 571 571 270 571
    13429100082 22691 8 2013-03-11 08:56:37.149593624 571 571 103 571
    13429100087 22705 8 2013-03-11 08:56:51.139539816 571 571 220 571
    13429100087 22540 8 2013-03-11 08:55:45.150276800 571 571 66 571
    13429100082 22540 8 2013-03-11 08:55:38.140225200 571 571 133 571
    13429100140 26642 9 2013-03-11 09:02:19.151754088 571 571 18 571
    13429100082 22691 8 2013-03-11 08:57:32.151754088 571 571 287 571
    13429100189 22558 8 2013-03-11 08:56:24.139539816 571 571 48 571
    13429100349 22503 8 2013-03-11 08:54:30.152622440 571 571 211 571

    需求描述:  根据 lac_id 和 start_time 知道用户当时的位置,根据 staytime 知道用户各个基站的逗留时长。根据轨迹合并连续基站的 staytime。最终得到每一个用户按时间排序在每一个基站驻留时长
    期望输出

    13429100082 22540 8 2013-03-11 08:58:20.152622488 571 571 270 571
    13429100082 22691 8 2013-03-11 08:56:37.149593624 571 571 390 571
    13429100082 22540 8 2013-03-11 08:55:38.140225200 571 571 133 571
    13429100087 22705 8 2013-03-11 08:56:51.139539816 571 571 220 571
    13429100087 22540 8 2013-03-11 08:55:45.150276800 571 571 66 571

    问题分析:针对每个product_no按照start_time进行排序(本例降序),如果相邻两项的lac_id相同,则将staytime进行相加保存到后一项中,并将前一项移除。

    完整代码v1:此版本只启用了Map阶段。map()函数:将每行内容解析成自定义的RecordWritable对象并添加到List集合中,然后对List集合进行排序。clearup()函数:将product_no和lac_id相同的相邻两项中的staytime进行相加。

    缺点:将全部数据添加到List集合,对于大数据量无法满足要求。

    package demo0902;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.Comparator;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.io.WritableComparable;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class Demo090203 {
        final static String INPUT_PATH = "hdfs://10.16.17.182:9000/test/in/0902/";
        final static String OUT_PATH = "hdfs://10.16.17.182:9000/test/out/0902/06";
    
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Configuration configuration = new Configuration();
            Job job = Job.getInstance(configuration);
            job.setJarByClass(Demo090203.class);
    
            //指定map
    job.setMapperClass(Demo090201Mapper.
    class); job.setMapOutputKeyClass(RecordWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(RecordWritable.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.setInputPaths(job, new Path(INPUT_PATH)); FileOutputFormat.setOutputPath(job, new Path(OUT_PATH)); job.waitForCompletion(true); } //map public static class Demo090201Mapper extends Mapper<LongWritable, Text, RecordWritable, NullWritable>{ //存储一条记录 ArrayList<RecordWritable> list = new ArrayList<RecordWritable>(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] splited = value.toString().split(" "); //将一行内容组装成一条记录 RecordWritable record = new RecordWritable(); record.product_no=splited[0]; record.lac_id=splited[1]; record.moment=Integer.parseInt(splited[2]); record.start_time=splited[3]; record.user_id=splited[4]; record.county_id=splited[5]; record.staytime=Integer.parseInt(splited[6]); record.city_id=splited[7]; list.add(record); //对List中数据进行排序(自定义比较器) Collections.sort(list, new Comparator<RecordWritable>() { @Override public int compare(RecordWritable r1, RecordWritable r2) { //调用RecordWritable的compareTo()方法 return (r1.compareTo(r2)); } }); } @Override protected void cleanup(Context context) throws IOException, InterruptedException { for(RecordWritable r : list){ System.out.println(r.toString()); } for(int i=0; i<list.size() ;i++){ if(i != list.size()-1){ //取出相邻的两个RecordWritable RecordWritable record_pre = list.get(i); RecordWritable record_next = list.get(i+1); //只有手机号和基站号都相等的情况下,才将 staytime 相加 if(record_pre.product_no.equals(record_next.product_no) && record_pre.lac_id.equals(record_next.lac_id)){ //将相加后的staytime赋予后一条记录 record_next.staytime += record_pre.staytime; //移除前一条记录 list.remove(record_pre); } } } for(RecordWritable record : list){ context.write(record, NullWritable.get()); } } } //自定义的序列化类 public static class RecordWritable implements WritableComparable<RecordWritable>{ String product_no; String lac_id; int moment; String start_time; String user_id; String county_id; int staytime; String city_id; @Override public int compareTo(RecordWritable o) { // 先按手机号排序 Asc int value = this.product_no.compareTo(o.product_no); if(value==0) // 再按时间进行排序 Desc return o.start_time.compareTo(this.start_time); return value; } @Override public void write(DataOutput out) throws IOException { out.writeUTF(product_no); out.writeUTF(lac_id); out.writeInt(moment); out.writeUTF(start_time); out.writeUTF(user_id); out.writeUTF(county_id); out.writeInt(staytime); out.writeUTF(city_id); } @Override public void readFields(DataInput in) throws IOException { product_no=in.readUTF(); lac_id=in.readUTF(); moment=in.readInt(); start_time=in.readUTF(); user_id=in.readUTF(); county_id=in.readUTF(); staytime=in.readInt(); city_id=in.readUTF(); } @Override public String toString() { return this.product_no+" "+this.lac_id+" "+this.moment+" "+this.start_time+" "+user_id+" "+county_id+" "+ staytime+" "+city_id; } } }

    完整代码v2:此版本Map阶段以product_no为key,每行内容为value进行输出。Reduce阶段和上一个版本的Map阶段功能类似。

    优点:相比于v1,此版本优化在于每次只处理一个product_no相关的数据,减缓数据量带来的压力。

    package demo0902;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.Comparator;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.io.WritableComparable;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    
    public class Demo090204 {
        final static String INPUT_PATH = "hdfs://10.16.17.182:9000/test/in/0902/";
        final static String OUT_PATH = "hdfs://10.16.17.182:9000/test/out/0902/02";
    
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Configuration configuration = new Configuration();
            Job job = Job.getInstance(configuration);
            job.setJarByClass(Demo090203.class);
    
            job.setMapperClass(Demo090201Mapper.class);
            job.setReducerClass(Demo090201Reducer.class);
    
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
    
            job.setOutputKeyClass(RecordWritable.class);
            job.setOutputValueClass(NullWritable.class);
    
            FileInputFormat.setInputPaths(job, new Path(INPUT_PATH));
            FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
    
            job.waitForCompletion(true);
    
        }
        //map
        public static class Demo090201Mapper extends Mapper<LongWritable, Text, Text, Text>{
            @Override
            protected void map(LongWritable key, Text value, Context context)
                    throws IOException, InterruptedException {
                String[] splited = value.toString().split("	");
    
                context.write(new Text(splited[0]), new Text(value));
            }
        }
    
        //reduce
        public static class Demo090201Reducer extends Reducer<Text, Text, RecordWritable, NullWritable>{
            @Override
            protected void reduce(Text key, Iterable<Text> v2s, Context context)
                    throws IOException, InterruptedException {
    
                ArrayList<RecordWritable> list = new ArrayList<RecordWritable>();
    
                for(Text text : v2s){
                    String[] splited = text.toString().split("	");
    
                    RecordWritable record = new RecordWritable();
                    record.product_no=splited[0];
                    record.lac_id=splited[1];
                    record.moment=Integer.parseInt(splited[2]);
                    record.start_time=splited[3];
                    record.user_id=splited[4];
                    record.county_id=splited[5];
                    record.staytime=Integer.parseInt(splited[6]);
                    record.city_id=splited[7];
    
                    list.add(record);
                }
    
                //对List中数据进行排序(自定义比较器)
                Collections.sort(list, new Comparator<RecordWritable>() {
                    @Override
                    public int compare(RecordWritable r1, RecordWritable r2) {    
    
                        //调用RecordWritable的compareTo()方法
                        return (r1.compareTo(r2));
                    }
                });
    
                for(int i=0; i<list.size() ;i++){
    
                    //滤过最后一条记录
                    if(i != list.size()-1){
    
                        //取出相邻的两个RecordWritable
                        RecordWritable record_pre = list.get(i);
                        RecordWritable record_next = list.get(i+1);        
    
                        if(record_pre.lac_id.equals(record_next.lac_id)){
    
                            //将相加后的staytime赋予后一条记录
                            record_next.staytime += record_pre.staytime;
    
                            //移除前一条记录
                            list.remove(record_pre);
                        }
                    }
                }
                for(RecordWritable record : list){
                    context.write(record, NullWritable.get());
                }    
            }
        }
        //自定义的序列化类
        public static class RecordWritable implements WritableComparable<RecordWritable>{
            String product_no;
            String lac_id;
            int moment;
            String start_time;
            String user_id;
            String county_id;
            int staytime;
            String city_id;
    
            @Override
            public int compareTo(RecordWritable o) {
                // 先按手机号排序 Asc
                int value = this.product_no.compareTo(o.product_no);
                if(value==0)
                    // 再按时间进行排序 Desc
                    return o.start_time.compareTo(this.start_time);
                return value;
            }
    
            @Override
            public void write(DataOutput out) throws IOException {
                out.writeUTF(product_no);
                out.writeUTF(lac_id);
                out.writeInt(moment);
                out.writeUTF(start_time);
                out.writeUTF(user_id);
                out.writeUTF(county_id);
                out.writeInt(staytime);
                out.writeUTF(city_id);            
            }
    
            @Override
            public void readFields(DataInput in) throws IOException {
                product_no=in.readUTF();
                lac_id=in.readUTF();
                moment=in.readInt();
                start_time=in.readUTF();
                user_id=in.readUTF();
                county_id=in.readUTF();
                staytime=in.readInt();
                city_id=in.readUTF();                
            }
    
            @Override
            public String toString() {
                return this.product_no+" "+this.lac_id+" "+this.moment+" "+this.start_time+" "+user_id+" "+county_id+" "+ staytime+" "+city_id;
            }
        }
    }
  • 相关阅读:
    apache安装 mod_evasive
    sshd调优
    MySQL服务器状态变量
    MySQL查询缓存
    MySQL并发调优和IO调优
    MySQL线程独享[转]
    mysql线程缓存和表缓存
    MySQL点滴
    MySQL优化常用
    apache的prefork的详解
  • 原文地址:https://www.cnblogs.com/skyl/p/4780151.html
Copyright © 2020-2023  润新知