• 练习 : 用 utils 读 存 kafak,flink sql 查询


    bean

     1 package bean;
     2 
     3 import lombok.AllArgsConstructor;
     4 import lombok.Data;
     5 import lombok.NoArgsConstructor;
     6 
     7 /**
     8  * Package: bean
     9  * Description: TODO
    10  * Author ly
    11  * Date 2022/3/12 15:16
    12  * Created by dell on 2022/3/12
    13  */
    14 @Data
    15 @AllArgsConstructor
    16 @NoArgsConstructor
    17 public class Base_province {
    18     private Long id;
    19     private String name;
    20     private String region_id;
    21     private String area_code;
    22     private String iso_code;
    23 }
     1 package bean;
     2 
     3 import lombok.AllArgsConstructor;
     4 import lombok.Data;
     5 import lombok.NoArgsConstructor;
     6 
     7 @Data
     8 @AllArgsConstructor
     9 @NoArgsConstructor
    10 public class Gmv {
    11     private Integer user_id;
    12     private String username;
    13     private String consignee;
    14     private Double final_total_amount;
    15     private Integer province_id;
    16     private String name;
    17     private Integer sku_id;
    18     private String sku_name;
    19     private Integer sku_num;
    20 }
     1 package bean;
     2 
     3 import lombok.AllArgsConstructor;
     4 import lombok.Data;
     5 import lombok.NoArgsConstructor;
     6 
     7 /**
     8  * Package: bean
     9  * Description: TODO
    10  * Author ly
    11  * Date 2022/3/13 16:01
    12  * Created by dell on 2022/3/13
    13  */
    14 @Data
    15 @AllArgsConstructor
    16 @NoArgsConstructor
    17 public class Order_detail {
    18     private Long id;
    19     private Long order_id;
    20     private Long sku_id;
    21     private String sku_name;
    22     private String img_url;
    23     private Double order_price;
    24     private String sku_num;
    25 }
     1 package bean;
     2 
     3 import lombok.AllArgsConstructor;
     4 import lombok.Data;
     5 import lombok.NoArgsConstructor;
     6 
     7 /**
     8  * Package: bean
     9  * Description: TODO
    10  * Author ly
    11  * Date 2022/3/12 15:33
    12  * Created by dell on 2022/3/12
    13  */
    14 @Data
    15 @AllArgsConstructor
    16 @NoArgsConstructor
    17 public class Order_info {
    18     private Long id;
    19     private String consignee;
    20     private String consignee_tel;
    21     private Double final_total_amount;
    22     private String order_status;
    23     private Long user_id;
    24     private String delivery_address;
    25     private String order_comment;
    26     private String out_trade_no;
    27     private String trade_body;
    28     private String create_time;
    29     private String operate_time;
    30     private String expire_time;
    31     private String tracking_no;
    32     private Long parent_order_id;
    33     private String img_url;
    34     private Integer province_id;
    35     private Double benefit_reduce_amount;
    36     private Double original_total_amount;
    37     private Double feight_fee;
    38 }
     1 package bean;
     2 
     3 import lombok.AllArgsConstructor;
     4 import lombok.Data;
     5 import lombok.NoArgsConstructor;
     6 
     7 /**
     8  * Package: bean
     9  * Description: TODO
    10  * Author ly
    11  * Date 2022/3/12 15:23
    12  * Created by dell on 2022/3/12
    13  */
    14 @Data
    15 @AllArgsConstructor
    16 @NoArgsConstructor
    17 public class Sku_info {
    18     private Long id;
    19     private Long spu_id;
    20     private Double price;
    21     private String sku_name;
    22     private String sku_desc;
    23     private Double weight;
    24     private Long tm_id;
    25     private Long category3_id;
    26     private String sku_default_img;
    27     private String create_time;
    28 }
     1 package bean;
     2 
     3 import lombok.AllArgsConstructor;
     4 import lombok.Data;
     5 import lombok.NoArgsConstructor;
     6 
     7 /**
     8  * Package: bean
     9  * Description: TODO
    10  * Author ly
    11  * Date 2022/3/11 11:51
    12  * Created by dell on 2022/3/11
    13  */
    14 @Data
    15 @AllArgsConstructor
    16 @NoArgsConstructor
    17 public class Student {
    18     private Integer sid;
    19     private String sname;
    20     private String sex;
    21 }
     1 package bean;
     2 
     3 import lombok.AllArgsConstructor;
     4 import lombok.Data;
     5 import lombok.NoArgsConstructor;
     6 
     7 
     8 /**
     9  * Package: bean
    10  * Description: TODO
    11  * Author ly
    12  * Date 2022/3/11 17:50
    13  * Created by dell on 2022/3/11
    14  */
    15 @Data
    16 @AllArgsConstructor
    17 @NoArgsConstructor
    18 public class User_info {
    19     private Long id;
    20     private String login_name;
    21     private String nick_name;
    22     private String passwd;
    23     private String name;
    24     private String phone_num;
    25     private String email;
    26     private String head_img;
    27     private String user_level;
    28     private String birthday;
    29     private String gender;
    30     private String create_time;
    31 }

    utils

     1 package util;
     2 
     3 import org.apache.hadoop.conf.Configuration;
     4 import java.util.HashMap;
     5 import java.util.Properties;
     6 
     7 public class Propss {
     8     public static Properties producer_Props = new Properties();
     9     public static Properties consumer_Props = new Properties();
    10     static HashMap<String, Object> kafka_Consumer = new HashMap<>();
    11     public static Configuration setConf(Configuration conf){
    12          conf.set("hbase.zookeeper.quorum","hadoop106,hadoop107,hadoop108");
    13         conf.set("hbae.zookeeper.property.client","2181");
    14         return conf;
    15     }
    16     //january february march april may june july august september october november december
    17     static{
    18         kafka_Consumer.put("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092");
    19         kafka_Consumer.put("group.id", "com/test");
    20         //from beginning
    21         kafka_Consumer.put("auto.offset.reset","earliest");
    22         kafka_Consumer.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
    23         kafka_Consumer.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    24 
    25         producer_Props.setProperty("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092");
    26         producer_Props.setProperty("ack","all");
    27         producer_Props.put("key.serializer","org.apache.kafka.common.serialization.StringSerializer");
    28         producer_Props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    29         producer_Props.put("auto.offset.reset","earliest");
    30 
    31         consumer_Props.setProperty("bootstrap.servers","hadoop106:9092,hadoop107:9092,hadoop108:9092");
    32         consumer_Props.setProperty("group.id", "com/test");
    33         consumer_Props.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
    34         consumer_Props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    35         consumer_Props.put("auto.offset.reset","earliest");
    36     }
    37 }
     1 package util;
     2 
     3 import org.apache.flink.api.java.io.jdbc.JDBCInputFormat;
     4 import org.apache.flink.api.java.typeutils.RowTypeInfo;
     5 import org.apache.kafka.clients.producer.KafkaProducer;
     6 import org.apache.kafka.clients.producer.ProducerRecord;
     7 
     8 import java.lang.reflect.Field;
     9 import java.sql.*;
    10 
    11 public class MySQL_Utils {
    12     //方法一:原生 mysql jdbc (已经废弃)
    13     private static Connection conn;
    14     private static PreparedStatement pre;
    15     static {
    16         try {
    17             Class.forName("com.mysql.jdbc.Driver");
    18             conn = DriverManager.getConnection("jdbc:mysql://hadoop106:3306/test3", "root", "root");
    19             conn.setAutoCommit(true);
    20         } catch (Exception e) {
    21             e.printStackTrace();
    22         }
    23     }
    24     //方法二:flink.api.java.io.jdbc 获取format
    25     public static JDBCInputFormat getFormat(String database, String sql, RowTypeInfo rowTypeInfo){
    26         String driver="com.mysql.jdbc.Driver";
    27         String url="jdbc:mysql://hadoop106:3306/"+database+"?useSSL=false&useUnicode=true&characterEncoding=UTF-8";
    28         String user="root";
    29         String passwd="root";
    30         JDBCInputFormat format = JDBCInputFormat.buildJDBCInputFormat()
    31                 .setDrivername(driver)
    32                 .setDBUrl(url)
    33                 .setUsername(user)
    34                 .setPassword(passwd)
    35                 .setQuery(sql)
    36                 .setRowTypeInfo(rowTypeInfo)
    37                 .finish();
    38         return format;
    39     }
    40 
    41     //执行sql建立宽表
    42     public static void execute(String database, String sql){
    43         String driver="com.mysql.jdbc.Driver";
    44         String url="jdbc:mysql://hadoop106:3306/"+database+"?useSSL=false&useUnicode=true&characterEncoding=UTF-8";
    45         String user="root";
    46         String passwd="root";
    47         try {
    48             Class.forName(driver);
    49             conn = DriverManager.getConnection(url, user, passwd);
    50             conn.setAutoCommit(true);
    51             pre = conn.prepareStatement(sql);
    52             pre.execute();
    53         } catch (Exception throwables) {
    54             throwables.printStackTrace();
    55         }
    56     }
    57     //表发送到kafka
    58     public static void sendKafka(String sql,String topic,Class T) throws Exception {
    59 
    60       Field[] fields =T.getClass().getDeclaredFields();
    61       String[] fieldName = new String[fields.length];
    62       for (int i = 0; i < fieldName.length; i++) {
    63           fieldName[i] = fields[i].getName();
    64       }
    65 
    66         PreparedStatement ps = conn.prepareStatement(sql);
    67         ResultSet rs = ps.executeQuery();
    68         while (rs.next()){
    69             //Producer
    70             KafkaProducer kafkaProducer = new KafkaProducer(Propss.producer_Props);
    71             String line="";
    72             for (int i = 0; i < fieldName.length; i++) {
    73                 line+=",";
    74                 line+=rs.getString(fieldName[i]);
    75             }
    76             line=line.substring(1);
    77             System.out.println(line);
    78             kafkaProducer.send(new ProducerRecord(topic,line));
    79             kafkaProducer.close();
    80         }
    81     }
    82 }
     1 package util;
     2 
     3 import org.apache.hadoop.conf.Configuration;
     4 import org.apache.hadoop.hbase.HBaseConfiguration;
     5 import org.apache.hadoop.hbase.client.Connection;
     6 import org.apache.hadoop.hbase.client.ConnectionFactory;
     7 
     8 import java.io.IOException;
     9 
    10 public class HBase_Util {
    11     public static Connection getConf(){
    12         //创建HBase的配置对象
    13         Configuration conf = HBaseConfiguration.create();
    14         //设置hbase配置属性
    15         conf.set("hbase.zookeeper.quorum","hadoop106,hadoop107,hadoop108");
    16         conf.set("hbase.zookeeper.property.clientPort","2181");
    17         Connection connection=null;
    18         //通过连接函数,创建连接对象
    19         try {
    20             connection = ConnectionFactory.createConnection(conf);
    21 
    22         } catch (IOException e) {
    23             e.printStackTrace();
    24         }
    25         return connection;
    26     }
    27 }
     1 package util;
     2 
     3 import org.apache.kafka.clients.producer.KafkaProducer;
     4 import org.apache.kafka.clients.producer.ProducerRecord;
     5 
     6 import java.lang.reflect.Field;
     7 import java.sql.Connection;
     8 import java.sql.DriverManager;
     9 import java.sql.PreparedStatement;
    10 import java.sql.ResultSet;
    11 
    12 public class Send_Kafka_Urils {
    13     private static Connection conn;
    14     static {
    15         try {
    16             Class.forName("com.mysql.jdbc.Driver");
    17             conn = DriverManager.getConnection("jdbc:mysql://hadoop106:3306/test3", "root", "root");
    18             conn.setAutoCommit(true);
    19         } catch (Exception e) {
    20             e.printStackTrace();
    21         }
    22     }
    23     public static void selectAndSendKafka(String sql,String topic,Class T) throws Exception {
    24 
    25         Field[] fields = T.getDeclaredFields();
    26         String[] fieldName = new String[fields.length];
    27         for (int i = 0; i < fieldName.length; i++) {
    28             fieldName[i] = fields[i].getName();
    29         }
    30 
    31         PreparedStatement ps = conn.prepareStatement(sql);
    32         ResultSet rs = ps.executeQuery();
    33         while (rs.next()) {
    34             //Producer
    35             KafkaProducer kafkaProducer = new KafkaProducer(Propss.producer_Props);
    36             String line = "";
    37             for (int i = 0; i < fieldName.length; i++) {
    38                 line += ",";
    39                 line += rs.getString(fieldName[i]);
    40             }
    41             line = line.substring(1);
    42             System.out.println(line);
    43             kafkaProducer.send(new ProducerRecord(topic, line));
    44             kafkaProducer.close();
    45         }
    46     }
    47 
    48         public static void sendKafka(String line,String topic) throws Exception {
    49 
    50             while (line!=null){
    51                 //Producer
    52                 KafkaProducer kafkaProducer = new KafkaProducer(Propss.producer_Props);
    53                 kafkaProducer.send(new ProducerRecord(topic,line));
    54                 kafkaProducer.close();
    55             }
    56     }
    57 }

    sink

     1 package sink;
     2 
     3 import util.HBase_Util;
     4 import org.apache.flink.configuration.Configuration;
     5 import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
     6 import org.apache.hadoop.hbase.TableName;
     7 import org.apache.hadoop.hbase.client.Connection;
     8 import org.apache.hadoop.hbase.client.Put;
     9 import org.apache.hadoop.hbase.client.Table;
    10 import org.apache.hadoop.hbase.util.Bytes;
    11 
    12 import java.lang.reflect.Field;
    13 import java.util.ArrayList;
    14 import java.util.List;
    15 import java.util.UUID;
    16 
    17 public class HBase_Sink extends RichSinkFunction<String>  {
    18     private Connection connection;
    19     private Class T;
    20     private String tableName;
    21     private String[] fieldsName;
    22     List<Put> list=new ArrayList<Put>();
    23     
    24     public static String[] getFiledName(Class T) {
    25         Field[] fields =T.getDeclaredFields();
    26         String[] fieldName = new String[fields.length];
    27         for (int i = 0; i < fieldName.length; i++) {
    28             fieldName[i] = fields[i].getName();
    29         }
    30         return fieldName;
    31     }
    32 
    33     public HBase_Sink(Class T,String tableName){
    34         this.T=T;
    35         this.tableName=tableName;
    36         this.fieldsName=getFiledName(T);
    37     }
    38 
    39     @Override
    40     public void open(Configuration parameters) throws Exception {
    41         connection= HBase_Util.getConf();
    42     }
    43 
    44     @Override
    45     public void invoke(String value, Context context) throws Exception {
    46         String[] s1 = value.split(",");
    47         Table table = connection.getTable(TableName.valueOf(tableName));
    48 //        String rowkey = UUID.randomUUID().toString().replaceAll("-", "");
    49         Put put = new Put(Bytes.toBytes(s1[0]));
    50         for (int i = 0; i < fieldsName.length; i++) {
    51             put.addColumn(Bytes.toBytes("info"),Bytes.toBytes(fieldsName[i]),Bytes.toBytes(s1[i]));
    52             list.add(put);
    53         }
    54         table.put(list);
    55     }
    56 
    57     @Override
    58     public void close() throws Exception {
    59         connection.close();
    60     }
    61 }

    test

      1 package test;
      2 
      3 import bean.*;
      4 import org.apache.flink.api.common.functions.MapFunction;
      5 import org.apache.flink.api.common.serialization.SimpleStringSchema;
      6 import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
      7 import org.apache.flink.api.java.io.jdbc.JDBCInputFormat;
      8 import org.apache.flink.api.java.typeutils.RowTypeInfo;
      9 import org.apache.flink.streaming.api.CheckpointingMode;
     10 import org.apache.flink.streaming.api.datastream.DataStreamSource;
     11 import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
     12 import org.apache.flink.streaming.api.environment.LocalStreamEnvironment;
     13 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
     14 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
     15 import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
     16 import org.apache.flink.table.api.EnvironmentSettings;
     17 import org.apache.flink.table.api.Table;
     18 import org.apache.flink.table.api.TableResult;
     19 import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
     20 import org.apache.flink.types.Row;
     21 import sink.HBase_Sink;
     22 import util.MySQL_Utils;
     23 import util.Propss;
     24 import util.Send_Kafka_Urils;
     25 
     26 
     27 public class Flink_Test {
     28     public static void main(String[] args) throws Exception {
     29         //3)    创建flink环境,要求并行度为1,设置checkpoint为精准一次消费
     30         LocalStreamEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
     31         env.setParallelism(1);
     32         env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
     33 
     34         //4)    读取kafka数据源并格式化
     35         DataStreamSource<String> ods_db_temp = env.addSource(new FlinkKafkaConsumer<String>("ods_db_temp", new SimpleStringSchema(), Propss.consumer_Props));
     36         SingleOutputStreamOperator<Student> map = ods_db_temp.map(new MapFunction<String, Student>() {
     37             @Override
     38             public Student map(String s) throws Exception {
     39                 String[] split = s.split(",");
     40                 return new Student(Integer.valueOf(split[0]), split[1], split[2]);
     41             }
     42         });
     43         map.print();
     44 
     45 
     46 
     47         //5)    创建维度标签 发送到对应的kafka topic
     48         RowTypeInfo rowTypeInfo = new RowTypeInfo(
     49                 BasicTypeInfo.LONG_TYPE_INFO,
     50                 BasicTypeInfo.STRING_TYPE_INFO,
     51                 BasicTypeInfo.STRING_TYPE_INFO,
     52                 BasicTypeInfo.STRING_TYPE_INFO,
     53                 BasicTypeInfo.STRING_TYPE_INFO,
     54                 BasicTypeInfo.STRING_TYPE_INFO,
     55                 BasicTypeInfo.STRING_TYPE_INFO,
     56                 BasicTypeInfo.STRING_TYPE_INFO,
     57                 BasicTypeInfo.STRING_TYPE_INFO,
     58                 BasicTypeInfo.DATE_TYPE_INFO,
     59                 BasicTypeInfo.STRING_TYPE_INFO,
     60                 BasicTypeInfo.DATE_TYPE_INFO
     61         );
     62         JDBCInputFormat user_format = MySQL_Utils.getFormat("test3", "select * from user_info",rowTypeInfo);
     63         DataStreamSource<Row> input = env.createInput(user_format);
     64         input.print();
     65         SingleOutputStreamOperator<String> line_ = input.map(new MapFunction<Row, String>() {
     66             @Override
     67             public String map(Row row) throws Exception {
     68                 String[] fields = rowTypeInfo.getFieldNames();
     69                 String line = "";
     70                 for (int i = 0; i < fields.length; i++) {
     71                     line += ",";
     72                     line += row.getField(i);
     73                 }
     74                 return line.substring(1);
     75             }
     76         });
     77         line_.addSink(new FlinkKafkaProducer("hadoop106:9092", "user_", new SimpleStringSchema()));
     78 
     79         //6)    动态分流 把用户维度,地区维度,商品维度表 发送到 kafka 对应的 topic 中
     80         Send_Kafka_Urils.selectAndSendKafka("select * from user_info","user_info", User_info.class);
     81         Send_Kafka_Urils.selectAndSendKafka("select * from base_province","base_province", Base_province.class);
     82         Send_Kafka_Urils.selectAndSendKafka("select * from sku_info","sku_info", Sku_info.class);
     83 
     84         //7)    维度标签数据 保存到hbase
     85         FlinkKafkaConsumer<String> sss = new FlinkKafkaConsumer<>("user_info", new SimpleStringSchema(), Propss.consumer_Props);
     86         sss.setStartFromEarliest();
     87         DataStreamSource<String> user = env.addSource(sss);
     88         user.print();
     89         user.addSink(new HBase_Sink(User_info.class,"user_info"));
     90 
     91         FlinkKafkaConsumer<String> sst = new FlinkKafkaConsumer<>("base_province", new SimpleStringSchema(), Propss.consumer_Props);
     92         sst.setStartFromEarliest();
     93         DataStreamSource<String> province = env.addSource(sst);
     94         province.print();
     95         province.addSink(new HBase_Sink(Base_province.class,"base_province"));
     96 
     97         FlinkKafkaConsumer<String> ssw = new FlinkKafkaConsumer<>("sku_info", new SimpleStringSchema(), Propss.consumer_Props);
     98         ssw.setStartFromEarliest();
     99         DataStreamSource<String> sku = env.addSource(ssw);
    100         sku.print();
    101         sku.addSink(new HBase_Sink(Sku_info.class,"sku_info"));
    102 
    103         //8)    动态分流把订单事实表写入到kafka的ods_db主题中
    104         Send_Kafka_Urils.selectAndSendKafka("select * from order_info","ods_db", Order_info.class);
    105 
    106         //9)    读取kafka中的ods_db数据
    107         DataStreamSource<String> ods_db = env.addSource(new FlinkKafkaConsumer<String>("ods_db", new SimpleStringSchema(), Propss.consumer_Props));
    108         ods_db.print();
    109         //10)    订单表拉宽关联用户,地区,商品维度,并把结果保存到kafka的dwd_db主题中
    110         String sql="create table gmv as (select user_id,user_info.`name` username,consignee,final_total_amount,province_id,base_province.`name`,sku_id,sku_info.sku_name,sku_num from user_info  join order_info on user_info.id = order_info.user_id " +
    111                 " join order_detail on order_info.id = order_detail.order_id " +
    112                 " join sku_info on order_detail.sku_id = sku_info.spu_id " +
    113                 " join base_province on base_province.id=province_id)";
    114         MySQL_Utils.execute("test3",sql);
    115         Send_Kafka_Urils.selectAndSendKafka("select * from gmv","gmv",Gmv.class);
    116         //11)    读取kafka中dwd层数据,求gmv
    117        FlinkKafkaConsumer<String> gsource = new FlinkKafkaConsumer<>("gmv", new SimpleStringSchema(), Propss.consumer_Props);
    118        gsource.setStartFromEarliest();
    119         DataStreamSource<String> gsources = env.addSource(gsource);
    120         SingleOutputStreamOperator<Gmv> gmv = gsources.map(new MapFunction<String, Gmv>() {
    121             @Override
    122             public Gmv map(String s) throws Exception {
    123                 String[] split = s.split(",");
    124                 return new Gmv(Integer.valueOf(split[0]), split[1], split[2], Double.valueOf(split[3]), Integer.valueOf(split[4]), split[5],
    125                         Integer.valueOf(split[6]), split[7], Integer.valueOf(split[8]));
    126             }
    127         });
    128 
    129         EnvironmentSettings build = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
    130         StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env,build);
    131         tableEnv.createTemporaryView("gmv",gmv);
    132         Table table = tableEnv.sqlQuery("select sum(final_total_amount) from gmv");
    133         TableResult execute = table.execute();
    134         execute.print();
    135 
    136 
    137         //12)    数据保存到clickhouse
    138         //13)    查询clickhouse结果正确
    139 
    140         env.execute();
    141 
    142     }
    143 }
     
     
  • 相关阅读:
    postgresql 在linux下导出数据
    第一次linux下安装nginx记录
    第一次搭建redis集群
    手动mvn install指令向maven本地仓库安装jar包
    windows10下Kafka环境搭建
    在win10环境下搭建 solr 开发环境
    git 常用命令
    生成文件夹的树结构信息
    List集合和JSON互转工具类
    Cmd命令 查看端口被占用
  • 原文地址:https://www.cnblogs.com/chang09/p/16004639.html
Copyright © 2020-2023  润新知