• hadoop实战


    hadoop实战  上面一份代码是hdfs上text的读写
    每个key之间是根据hadoop自带的有序,value根据相同key排序,可以在reduce中执行
    partition可以有序的分配reduce,防止一个reduce中数据过大
      1 package com.company;
      2 
      3 import java.io.BufferedReader;
      4 import java.io.IOException;
      5 import java.io.InputStreamReader;
      6 
      7 import org.apache.hadoop.conf.Configuration;
      8 import org.apache.hadoop.fs.FSDataInputStream;
      9 import org.apache.hadoop.fs.FSDataOutputStream;
     10 import org.apache.hadoop.fs.FileSystem;
     11 import org.apache.hadoop.fs.Path;
     12 
     13 
     14 public class Main {
     15     public static void main(String[] args) throws Exception {
     16         try {
     17             Configuration config = new Configuration();
     18             FileSystem hdfs = FileSystem.get(config);
     19             FSDataInputStream hdfsInStream = hdfs.open(new Path("input"));
     20             BufferedReader br = new BufferedReader(new InputStreamReader(hdfsInStream, "utf-8"));
     21             FSDataOutputStream os = hdfs.create(new Path("output"));
     22 
     23             String line = br.readLine();
     24             while (line != null) {
     25                 line = br.readLine();
     26                 int x = Integer.parseInt("1");
     27                 int y = Integer.parseInt("2");
     28 
     29                 String sp[] = line.split("	", -1);
     30                 for (int i = 0; i < sp.length; i++) {
     31                     System.out.print(sp[i] + "   ");
     32                 }
     33                 System.out.println("");
     34             }
     35             br.close();
     36             hdfsInStream.close();
     37             os.write("outputtext".getBytes("UTF-8"));
     38         } catch (IOException e) {
     39             e.printStackTrace();
     40         }
     41 
     42 
     43     }
     44 }
     45 
     46 
     47 
     48 
     49 
     50 
     51 
     52 import java.io.IOException;
     53 import java.util.*;
     54 import java.io.BufferedReader;
     55 import java.io.InputStreamReader;
     56 import org.apache.hadoop.fs.Path;
     57 import org.apache.hadoop.conf.*;
     58 import org.apache.hadoop.io.*;
     59 import org.apache.hadoop.fs.FSDataInputStream;
     60 import org.apache.hadoop.fs.FileSystem;
     61 import org.apache.hadoop.mapreduce.*;
     62 import org.apache.hadoop.mapreduce.Partitioner;
     63 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
     64 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
     65 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
     66 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
     67 import org.apache.hadoop.mapred.JobConf;
     68 
     69 public class WordCount {
     70 
     71  public static class Map extends Mapper<LongWritable, Text, Text, Text> {
     72 
     73     public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
     74         String[] parts = value.toString().split("	",-1);
     75     if(parts.length != 2)
     76        return;
     77     String Label = "";
     78     String Ip = "";
     79     String Num = parts[1];
     80     int length = Num.length();
     81     int ned = 9 - length;
     82     while(ned-- > 0) {
     83         Num = "0" + Num;
     84     }
     85     String Uid = "";
     86     int dex1 = parts[0].indexOf(' ');
     87     int dex2 = parts[0].indexOf('1');
     88     Label = parts[0].substring(0,dex1);
     89     Ip = parts[0].substring(dex2+1);
     90     Uid = parts[0].substring(dex1+1,dex2);
     91     Text Word = new Text(Uid + " " + Label);
     92     Text Val = new Text(Num);
     93     Text Word = new Text(Uid + " " + Label);
     94     Text Val = new Text(Num);
     95     context.write(Word, Val);
     96     }
     97  }
     98 
     99   public static class KeyPartitioner extends Partitioner<Text,Text> implements Configurable {
    100     int partition[] = new int[15];
    101 
    102     @Override
    103     public void setConf(Configuration config) {//implements Configurable 在partition开始时运行,且只执行1次
    104         try { 
    105             FileSystem hdfs = FileSystem.get(config);
    106             FSDataInputStream hdfsInStream = hdfs.open(new Path("input"));//本地hdfs上的文件
    107             BufferedReader br = new BufferedReader(new InputStreamReader(hdfsInStream, "utf-8"));//读取得到分割点的文件
    108 
    109             String line = br.readLine();
    110             while (line != null) {
    111                 String sp[] = line.split(" ", -1);
    112                 int x = Integer.parseInt(sp[0]);
    113                 int y = Integer.parseInt(sp[1]);
    114                 partition[y] = x;
    115                 line = br.readLine();
    116             }
    117             partition[0] = 0;
    118             partition[10] = 65535;
    119             br.close();
    120             hdfsInStream.close();
    121         } catch (IOException e) {
    122             e.printStackTrace();
    123         }
    124     }
    125 
    126         @Override
    127     public Configuration getConf() {
    128         return null;
    129     }
    130 
    131     /*public void configure(JobConf config) {
    132         try {
    133             FileSystem hdfs = FileSystem.get(config);
    134             FSDataInputStream hdfsInStream = hdfs.open(new Path("/group/lwantispam/shenneng.ysn/operator/data"));
    135             BufferedReader br = new BufferedReader(new InputStreamReader(hdfsInStream, "utf-8"));
    136             FSDataInputStream hdfsInStream = hdfs.open(new Path("/group/lwantispam/shenneng.ysn/operator/data"));
    137             BufferedReader br = new BufferedReader(new InputStreamReader(hdfsInStream, "utf-8"));
    138 
    139             String line = br.readLine();
    140             while (line != null) {
    141                 String sp[] = line.split(" ", -1);
    142                 int x = Integer.parseInt(sp[0]);
    143                 int y = Integer.parseInt(sp[1]);
    144                 partition[y] = x;
    145                 line = br.readLine();
    146             }
    147             partition[0] = 0;
    148             partition[10] = 65535;
    149             br.close();
    150             hdfsInStream.close();
    151         } catch (IOException e) {
    152             e.printStackTrace();
    153         }
    154     }*/
    155     public int low(int k) {//取下限
    156         int l = 0,r = 10;
    157         while(l <= r) {
    158             int mid = (l + r) >> 1;
    159             if(partition[mid] < k) {
    160                 l = mid + 1;
    161             } else {
    162                 r = mid - 1;
    163             }
    164         }
    165         return r + 1;
    166     }
    167 
    168     public int up(int k) {//取上限
    169         int l = 0, r = 10;
    170         while(l <= r) {
    171             int mid = (l + r) >> 1;
    172             if(partition[mid] > k) {
    173                 r = mid - 1;
    174             } else {
    175                 l = mid + 1;
    176             }
    177         }
    178         return l - 1;
    179         }
    180         return l - 1;
    181     }
    182 
    183     @Override
    184     public int getPartition(Text key,Text value,int numPartitions) {
    185         int keynumber = Integer.parseInt(value.toString());
    186         int left = low(keynumber);
    187         int right = up(keynumber);
    188         int len = right - left + 1;
    189         if (len > 1) {
    190             int part = (int) (Math.random() * 1000) % len + left - 1;
    191             return part;
    192         } else {
    193             return left-1;
    194         }
    195      }
    196   }
    197 
    198  public static class Reduce extends Reducer<Text, Text, Text, Text> {
    199 
    200     @Override                                            //可以防止重写出错
    201     public void reduce(Text key, Iterable<Text> values, Context context)//要照格式
    202       throws IOException, InterruptedException {
    203         List<Integer> tips = new ArrayList<Integer>();
    204         for(Text value : values) {
    205             int tmp = Integer.parseInt(value.toString());
    206             tips.add(tmp);
    207         }
    208         Collections.sort(tips);
    209         for(int i = 0; i < tips.size(); i++) {
    210             System.out.print(String.valueOf(tips.get(i)) + ";");//可以输出在集群
    211             context.write(key, new Text(String.valueOf(tips.get(i))));
    212         }
    213     }
    214  }
    215 
    216  public static void main(String[] args) throws Exception {
    217     Configuration conf = new Configuration();
    218 
    219     Job job = new Job(conf, "wordcount");
    220     job.setJarByClass( WordCount.class ) ;
    221     job.setOutputKeyClass(Text.class);
    222     job.setJarByClass( WordCount.class ) ;
    223     job.setOutputKeyClass(Text.class);
    224     job.setOutputValueClass(Text.class);
    225 
    226     job.setMapperClass(Map.class);
    227     job.setReducerClass(Reduce.class);
    228 
    229     job.setNumReduceTasks(10);
    230     job.setInputFormatClass(TextInputFormat.class);
    231     job.setOutputFormatClass(TextOutputFormat.class);
    232     job.setPartitionerClass(KeyPartitioner.class);
    233 
    234     FileInputFormat.setInputPaths(job, new Path("datainfile"));
    235     FileOutputFormat.setOutputPath(job, new Path("dataoutfile"));
    236 
    237     job.waitForCompletion(true);
    238     //System.out.println("hello world");
    239  }
    240 
    241 }
    View Code

     finally

    many log about
    url 1...
    wordcount
    url sum (get the total)

    sum 1... (count all sum)
    wordcount
    sum Sum
    get partition point ---〉

     1 class Go{
     2     int[] item = new int[10009];
     3     int total = 0;
     4     public void add(int x,int y) {
     5         item[x] = y;
     6         total += y;
     7     }
     8 
     9     public String print() {
    10         int pre = 0;
    11         int step = total/10 + 1;
    12         int st = 1;
    13         String out = "";
    14         for(int i = 1; i < 10000; i++) {
    15             pre += item[i];
    16             while(st * step <= pre) {
    17                 out += i + " " + st + "
    ";
    18                 st++;
    19             }
    20         }
    21         return out;
    22     }
    23 }
    24 
    25 
    26 Go getPartition = new Go();
    27     try {
    28         Configuration config = new Configuration();
    29         FileSystem hdfs = FileSystem.get(config);
    30          FSDataInputStream hdfsInStream = hdfs.open(new Path("input"));
    31          BufferedReader br = new BufferedReader(new InputStreamReader(
    32                hdfsInStream, "utf-8"));
    33         FSDataOutputStream os = hdfs.create(new Path("output"));
    34          String line = br.readLine();
    35          while (line != null) {
    36             String sp[] = line.split("	",-1);
    37             int x = Integer.parseInt(sp[0]);
    38             int y = Integer.parseInt(sp[1]);
    39             getPartition.add(x,y);
    40             line = br.readLine();
    41          }
    42          br.close();
    43          hdfsInStream.close();
    44         String out = getPartition.print();
    45         os.write(out.getBytes("UTF-8"));
    46       } catch (IOException e) {
    47          e.printStackTrace();
    48       }
    View Code

    url sum
    partition
    reduce sort
    get order url and order sum;

    ***the code is on the top**

  • 相关阅读:
    sqlserver2005存储汉字成问号解决办法:
    .net c# 日期格式和常用处理
    文件夹无法访问拒绝访问,无法删除文件的,快速有效解决方法
    打印出所有的 info.plist 中的 keys、values
    利用时间戳来准确计算某个时间点具现在的时间差
    项目中的技巧经验汇总
    "Base SDK Missing"问题的解决
    UIPopoverController的使用
    给ios自带控件添加圆角边的方法
    实现程序内截屏功能的代码
  • 原文地址:https://www.cnblogs.com/gray035/p/3811346.html
Copyright © 2020-2023  润新知