• ChainMapper和ChainReducer


    The ChainMapper class allows to use multiple Mapper classes within a single Map task. 

    The ChainReducer class allows to chain multiple Mapper classes after a Reducer within the Reducer task.

     http://www.oratea.net/?p=371

     通过ChainMapper可以将多个map类合并成一个map任务。

    下面个这个例子没什么实际意思,但是很好的演示了ChainMapper的作用。

    源文件
    100 tom 90
    101 mary 85
    102 kate 60

    map00的结果,过滤掉100的记录
    101 mary 85
    102 kate 60

    map01的结果,过滤掉101的记录
    102 kate 60

    reduce结果
    102 kate 60

     package org.myorg;


    import java.io.IOException;
    import java.util.*;
    import java.lang.String;

    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.conf.*;
    import org.apache.hadoop.io.*;
    import org.apache.hadoop.mapred.*;
    import org.apache.hadoop.util.*;
    import org.apache.hadoop.mapred.lib.*;

    public class WordCount
    {

        
    public static class Map00 extends MapReduceBase implements Mapper
        {

            
    public void map(Text key, Text value, OutputCollector output, Reporter reporter) throws IOException
            {

                Text ft 
    = new Text(“100″);

                
    if(!key.equals(ft))
                {
                    output.collect(key, value);
                }
            }
        }

        
    public static class Map01 extends MapReduceBase implements Mapper
        {

            
    public void map(Text key, Text value, OutputCollector output, Reporter reporter) throws IOException
            {

                Text ft 
    = new Text(“101″);

                
    if(!key.equals(ft))
                {
                    output.collect(key, value);
                }
            }
        }

        
    public static class Reduce extends MapReduceBase implements Reducer
        {
            
    public void reduce(Text key, Iterator values, OutputCollector output, Reporter reporter) throws IOException
            {

                
    while(values.hasNext())
                {
                    output.collect(key, values.next());
                }

            }
        }

        
    public static void main(String[] args) throws Exception
        {

            JobConf conf 
    = new JobConf(WordCount.class);
            conf.setJobName(“wordcount00″);

            conf.setInputFormat(KeyValueTextInputFormat.
    class);
            conf.setOutputFormat(TextOutputFormat.
    class);

            ChainMapper cm 
    = new ChainMapper();

            JobConf mapAConf 
    = new JobConf(false);
            cm.addMapper(conf, Map00.
    class, Text.class, Text.class, Text.class, Text.classtrue, mapAConf);

            JobConf mapBConf 
    = new JobConf(false);
            cm.addMapper(conf, Map01.
    class, Text.class, Text.class, Text.class, Text.classtrue, mapBConf);

            conf.setReducerClass(Reduce.
    class);

            conf00.setOutputKeyClass(Text.
    class);
            conf00.setOutputValueClass(Text.
    class);

            FileInputFormat.setInputPaths(conf, 
    new Path(args[0]));
            FileOutputFormat.setOutputPath(conf, 
    new Path(args[1]));

            JobClient.runJob(conf);

        }
    }
     

    另外一个例子,代码很多,其实很简单,Conn几个类都是相同的

    http://yixiaohuamax.iteye.com/blog/684244 

    package com.oncedq.code;

    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    import java.text.SimpleDateFormat;

    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.io.WritableComparable;
    import org.apache.hadoop.mapred.FileInputFormat;
    import org.apache.hadoop.mapred.FileOutputFormat;
    import org.apache.hadoop.mapred.JobConf;
    import org.apache.hadoop.mapred.MapReduceBase;
    import org.apache.hadoop.mapred.Mapper;
    import org.apache.hadoop.mapred.OutputCollector;
    import org.apache.hadoop.mapred.Reporter;
    import org.apache.hadoop.mapred.TextInputFormat;
    import org.apache.hadoop.mapred.TextOutputFormat;
    import org.apache.hadoop.mapred.jobcontrol.Job;
    import org.apache.hadoop.mapred.jobcontrol.JobControl;
    import org.apache.hadoop.mapred.lib.ChainMapper;

    import com.oncedq.code.util.DateUtil;

    public class ProcessSample {
        
    public static class ExtractMappper extends MapReduceBase implements
                Mapper
    <LongWritable, Text, LongWritable, Conn1> {

            @Override
            
    public void map(LongWritable arg0, Text arg1,
                    OutputCollector
    <LongWritable, Conn1> arg2, Reporter arg3)
                    
    throws IOException {
                String line 
    = arg1.toString();
                String[] strs 
    = line.split(";");
                Conn1 conn1 
    = new Conn1();
                conn1.orderKey 
    = Long.parseLong(strs[0]);
                conn1.customer 
    = Long.parseLong(strs[1]);
                conn1.state 
    = strs[2];
                conn1.price 
    = Double.parseDouble(strs[3]);
                conn1.orderDate 
    = DateUtil.getDateFromString(strs[4], "yyyy-MM-dd");
                LongWritable lw 
    = new LongWritable(conn1.orderKey);
                arg2.collect(lw, conn1);
            }

        }

        
    private static class Conn1 implements WritableComparable<Conn1> {
            
    public long orderKey;
            
    public long customer;
            
    public String state;
            
    public double price;
            
    public java.util.Date orderDate;

            @Override
            
    public void readFields(DataInput in) throws IOException {
                orderKey 
    = in.readLong();
                customer 
    = in.readLong();
                state 
    = Text.readString(in);
                price 
    = in.readDouble();
                orderDate 
    = DateUtil.getDateFromString(Text.readString(in),
                        
    "yyyy-MM-dd");
            }

            @Override
            
    public void write(DataOutput out) throws IOException {
                out.writeLong(orderKey);
                out.writeLong(customer);
                Text.writeString(out, state);
                out.writeDouble(price);
                Text.writeString(out, DateUtil.getDateStr(orderDate, 
    "yyyy-MM-dd"));
            }

            @Override
            
    public int compareTo(Conn1 arg0) {
                
    // TODO Auto-generated method stub
                return 0;
            }

        }

        
    public static class Filter1Mapper extends MapReduceBase implements
                Mapper
    <LongWritable, Conn1, LongWritable, Conn2> {

            @Override
            
    public void map(LongWritable inKey, Conn1 c2,
                    OutputCollector
    <LongWritable, Conn2> collector, Reporter report)
                    
    throws IOException {
                
    if (c2.state.equals("F")) {
                    Conn2 inValue 
    = new Conn2();
                    inValue.customer 
    = c2.customer;
                    inValue.orderDate 
    = c2.orderDate;
                    inValue.orderKey 
    = c2.orderKey;
                    inValue.price 
    = c2.price;
                    inValue.state 
    = c2.state;
                    collector.collect(inKey, inValue);
                }
            }

        }

        
    private static class Conn2 implements WritableComparable<Conn1> {
            
    public long orderKey;
            
    public long customer;
            
    public String state;
            
    public double price;
            
    public java.util.Date orderDate;

            @Override
            
    public void readFields(DataInput in) throws IOException {
                orderKey 
    = in.readLong();
                customer 
    = in.readLong();
                state 
    = Text.readString(in);
                price 
    = in.readDouble();
                orderDate 
    = DateUtil.getDateFromString(Text.readString(in),
                        
    "yyyy-MM-dd");
            }

            @Override
            
    public void write(DataOutput out) throws IOException {
                out.writeLong(orderKey);
                out.writeLong(customer);
                Text.writeString(out, state);
                out.writeDouble(price);
                Text.writeString(out, DateUtil.getDateStr(orderDate, 
    "yyyy-MM-dd"));
            }

            @Override
            
    public int compareTo(Conn1 arg0) {
                
    // TODO Auto-generated method stub
                return 0;
            }

        }

        
    public static class RegexMapper extends MapReduceBase implements
                Mapper
    <LongWritable, Conn2, LongWritable, Conn3> {

            @Override
            
    public void map(LongWritable inKey, Conn2 c3,
                    OutputCollector
    <LongWritable, Conn3> collector, Reporter report)
                    
    throws IOException {
                c3.state 
    = c3.state.replaceAll("F""Find");
                Conn3 c2 
    = new Conn3();
                c2.customer 
    = c3.customer;
                c2.orderDate 
    = c3.orderDate;
                c2.orderKey 
    = c3.orderKey;
                c2.price 
    = c3.price;
                c2.state 
    = c3.state;
                collector.collect(inKey, c2);
            }
        }

        
    private static class Conn3 implements WritableComparable<Conn1> {
            
    public long orderKey;
            
    public long customer;
            
    public String state;
            
    public double price;
            
    public java.util.Date orderDate;

            @Override
            
    public void readFields(DataInput in) throws IOException {
                orderKey 
    = in.readLong();
                customer 
    = in.readLong();
                state 
    = Text.readString(in);
                price 
    = in.readDouble();
                orderDate 
    = DateUtil.getDateFromString(Text.readString(in),
                        
    "yyyy-MM-dd");
            }

            @Override
            
    public void write(DataOutput out) throws IOException {
                out.writeLong(orderKey);
                out.writeLong(customer);
                Text.writeString(out, state);
                out.writeDouble(price);
                Text.writeString(out, DateUtil.getDateStr(orderDate, 
    "yyyy-MM-dd"));
            }

            @Override
            
    public int compareTo(Conn1 arg0) {
                
    // TODO Auto-generated method stub
                return 0;
            }

        }

        
    public static class LoadMapper extends MapReduceBase implements
                Mapper
    <LongWritable, Conn3, LongWritable, Conn3> {

            @Override
            
    public void map(LongWritable arg0, Conn3 arg1,
                    OutputCollector
    <LongWritable, Conn3> arg2, Reporter arg3)
                    
    throws IOException {
                arg2.collect(arg0, arg1);
            }

        }

        
    public static void main(String[] args) {
            JobConf job 
    = new JobConf(ProcessSample.class);
            job.setJobName(
    "ProcessSample");
            job.setNumReduceTasks(
    0);
            job.setInputFormat(TextInputFormat.
    class);
            job.setOutputFormat(TextOutputFormat.
    class);
            JobConf mapper1 
    = new JobConf();
            JobConf mapper2 
    = new JobConf();
            JobConf mapper3 
    = new JobConf();
            JobConf mapper4 
    = new JobConf();
            ChainMapper cm 
    = new ChainMapper();
            cm.addMapper(job, ExtractMappper.
    class, LongWritable.class, Text.class,
                    LongWritable.
    class, Conn1.classtrue, mapper1);
            cm.addMapper(job, Filter1Mapper.
    class, LongWritable.class, Conn1.class,
                    LongWritable.
    class, Conn2.classtrue, mapper2);
            cm.addMapper(job, RegexMapper.
    class, LongWritable.class, Conn2.class,
                    LongWritable.
    class, Conn3.classtrue, mapper3);
            cm.addMapper(job, LoadMapper.
    class, LongWritable.class, Conn3.class,
                    LongWritable.
    class, Conn3.classtrue, mapper4);
            FileInputFormat.setInputPaths(job, 
    new Path("orderData"));
            FileOutputFormat.setOutputPath(job, 
    new Path("orderDataOutput"));
            Job job1;
            
    try {
                job1 
    = new Job(job);
                JobControl jc 
    = new JobControl("test");
                jc.addJob(job1);
                jc.run();
            } 
    catch (IOException e) {
                
    // TODO Auto-generated catch block
                e.printStackTrace();
            }

        }
    } 

  • 相关阅读:
    CodeSmith中SchemaExplorer类详解
    配置 TransactSQL 调试器
    获取SQL所有数据库名、所有表名、所有字段名、表字段长度
    SQL SERVER数据类型与C#数据类型对照表
    C# orm linq 真的不错
    游戏外挂教程
    RDLC钻取式报表开发
    脚本整理
    访问Exchange Mail
    本人开发的带提示的TextBox控件
  • 原文地址:https://www.cnblogs.com/xuxm2007/p/2165774.html
Copyright © 2020-2023  润新知