• hive数据文件简单合并


    MR代码:

    package merge;
    import java.io.IOException;
    import java.util.Iterator;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapred.FileInputFormat;
    import org.apache.hadoop.mapred.FileOutputFormat;
    import org.apache.hadoop.mapred.JobClient;
    import org.apache.hadoop.mapred.JobConf;
    import org.apache.hadoop.mapred.MapReduceBase;
    import org.apache.hadoop.mapred.Mapper;
    import org.apache.hadoop.mapred.OutputCollector;
    import org.apache.hadoop.mapred.Reducer;
    import org.apache.hadoop.mapred.Reporter;
    import org.apache.hadoop.mapred.TextInputFormat;
    import org.apache.hadoop.mapred.TextOutputFormat;
    
    public class merge
    {
        public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text>
        {
            private Text word=new Text("");
            public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
                    throws IOException
            {
                output.collect(value,word);
            }
        }
    
    
        public static void main(String[] args) throws Exception
        {
            JobConf conf = new JobConf(merge.class);
            conf.setJobName("wordcount");
            conf.setOutputKeyClass(Text.class);
            conf.setOutputValueClass(Text.class);
            conf.setMapperClass(Map.class);
            conf.setInputFormat(TextInputFormat.class);
            conf.setOutputFormat(TextOutputFormat.class);
            FileInputFormat.setInputPaths(conf, new Path(args[0]));
            FileOutputFormat.setOutputPath(conf, new Path(args[1]));
            JobClient.runJob(conf);
        }
    }

    Eclipse自动生成.class文件,打包命令:

    jar打包:在项目的bin目录下
    Dev-Fac:bin ce-pc$ jar -cvf hive-merge.jar -C  ../ .

    合并命令:

    hadoop jar /tmp/hive-merge.jar merge.merge /user/hive/warehouse/table1 /user/hive/warehouse/table1/out
    
    #merge.merge 表示merge包下的merge
  • 相关阅读:
    2012暑期川西旅游之第六天(四姑娘山达维夹金山邛崃青城山)
    实现元素拖拽
    asp.net学习资源
    080402 晴れ
    送给Rain的文章
    两天后见
    若水三千
    关于下一代的教育
    20080408 大雨
    Missing You
  • 原文地址:https://www.cnblogs.com/ggzone/p/10121217.html
Copyright © 2020-2023  润新知