• Storm-jdbc-2讲 高级API及Trident


                          之前对Storm集成JDBC写了一个简单的demo,最近深度研究了下,代码如下

    首先,先写一个抽象类,便于减少代码的重复性:

    
    import com.google.common.collect.Lists;
    import com.google.common.collect.Maps;
    import org.apache.storm.Config;
    import org.apache.storm.LocalCluster;
    import org.apache.storm.StormSubmitter;
    import org.apache.storm.generated.StormTopology;
    import org.apache.storm.jdbc.common.Column;
    import org.apache.storm.jdbc.common.ConnectionProvider;
    import org.apache.storm.jdbc.common.HikariCPConnectionProvider;
    import org.apache.storm.jdbc.common.JdbcClient;
    import org.apache.storm.jdbc.mapper.JdbcLookupMapper;
    import org.apache.storm.jdbc.mapper.JdbcMapper;
    import org.apache.storm.jdbc.mapper.SimpleJdbcLookupMapper;
    import org.apache.storm.jdbc.mapper.SimpleJdbcMapper;
    import org.apache.storm.tuple.Fields;
    
    import java.sql.Types;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Map;
    
    /**
     * @author cwc
     * @date 2018年6月30日  
     * @description:这里创建一个抽象类,提高代码的重用性
     * @version 1.0.0 
     */
    public abstract  class AbstractUserTopology {
    	//sql语句 建标,建字段,自己灵活使用
        private static final List<String> setupSqls = Lists.newArrayList(
                "drop table if exists user",
                "drop table if exists department",
                "drop table if exists user_department",
                "create table if not exists user (user_id integer, user_name varchar(100), dept_name varchar(100), create_date date)",
                "create table if not exists department (dept_id integer, dept_name varchar(100))",
                "create table if not exists user_department (user_id integer, dept_id integer)",
                "insert into department values (1, 'R&D')",
                "insert into department values (2, 'Finance')",
                "insert into department values (3, 'HR')",
                "insert into department values (4, 'Sales')",
                "insert into user_department values (1, 1)",
                "insert into user_department values (2, 2)",
                "insert into user_department values (3, 3)",
                "insert into user_department values (4, 4)"
        );
        
        protected JdbcSpout jdbcSpout;//测试使用的spout
        protected JdbcMapper jdbcMapper;//用于映射的Mapper
        protected JdbcLookupMapper jdbcLookupMapper;
        
        //线程安全的 实现了ConnectionProvider接口  有三个方法 prepare(),getConnection()  获取连接,cleanUp(),接口采用直接赋值
        protected ConnectionProvider connectionProvider;
        
        protected static final String TABLE_NAME = "storms";//表名
        protected static final String JDBC_CONF = "jdbc.conf";//jdbc配置
        protected static final String  SELECT_QUERY = "select dept_name from department, user_department where department.dept_id = user_department.dept_id" +
                " and user_department.user_id = ?";//查询sql语句
        
        public void execute(String[] args) throws Exception {
        	//将配置放入map当中
            Map map = Maps.newHashMap();
            map.put("dataSourceClassName", "com.mysql.jdbc.jdbc2.optional.MysqlDataSource");
            map.put("dataSource.url", "jdbc:mysql://localhost:3306/mytest?useUnicode=true&characterEncoding=UTF-8&serverTimezone=Asia/Shanghai");
            map.put("dataSource.user", "root");
            map.put("dataSource.password", "密码");
            
            Config config = new Config();
            config.put(JDBC_CONF, map);//加载到配置中
    
            ConnectionProvider connectionProvider = new HikariCPConnectionProvider(map);
            //对数据库连接池进行初始化
            connectionProvider.prepare();
            //数据查找超时时间
            int queryTimeoutSecs = 60;
            //获得数据库连接
            JdbcClient jdbcClient = new JdbcClient(connectionProvider, queryTimeoutSecs);
            //创建表及字段
            for (String sql : setupSqls) {
                System.err.println("sql:" + sql);
                //执行sql语句
                jdbcClient.executeSql(sql);
            }
    
            this.jdbcSpout = new JdbcSpout();
            //通过connectionProvider和table自己去获取数据表的metadata(元数据)表字段的类型,名称,初始化schemaColumns
            // 使用tableName进行插入数据,需要指定表中的所有字段
            this.jdbcMapper = new SimpleJdbcMapper(TABLE_NAME, connectionProvider);
            //关闭数据库连接池
            connectionProvider.cleanup();
            //上面的代码可以独立运行
            Fields outputFields = new Fields("user_id", "user_name", "dept_name", "create_date");
            //指定查询条件字段  user_id的值是spout中发射出user_id的值
            List<Column> queryParamColumns = Lists.newArrayList(new Column("user_id", Types.INTEGER));
            //通过查询为outputFields中的 dept_name赋值   其他三个字段是原始spout中的
            this.jdbcLookupMapper = new SimpleJdbcLookupMapper(outputFields, queryParamColumns);
            //拿到还未初始化的连接
            this.connectionProvider = new HikariCPConnectionProvider(map);
            String topoName = "test";
            if (args.length == 0||args ==null) {
            	//当args为0,就本地使用
                LocalCluster cluster = new LocalCluster();
                cluster.submitTopology(topoName, config, getTopology());
                Thread.sleep(1000000);//这个时为了防止你忘记关闭程序,造成内存爆炸,但是不要设置时间太小,太小程序没跑完就终止了,要报错。
                cluster.shutdown();
            } else {
                StormSubmitter.submitTopology(args[4], config, getTopology());
            }
        }
    
        public abstract StormTopology getTopology();
        }

    接下来是普通的storm方法来写入数据:

    
    import com.google.common.collect.Lists;
    import org.apache.storm.generated.StormTopology;
    import org.apache.storm.jdbc.bolt.JdbcInsertBolt;
    import org.apache.storm.jdbc.bolt.JdbcLookupBolt;
    import org.apache.storm.jdbc.common.Column;
    import org.apache.storm.jdbc.mapper.JdbcMapper;
    import org.apache.storm.jdbc.mapper.SimpleJdbcMapper;
    import org.apache.storm.topology.TopologyBuilder;
    
    import java.sql.Types;
    import java.util.List;
    /**
      * @author cwc
     * @date 2018年7月4日  
     * @version 2.0.0 
     * @description:将数据批量写入表中
     */
    public class PersistanceTopology extends AbstractUserTopology {
        private static final String USER_SPOUT = "USER_SPOUT";
        private static final String LOOKUP_BOLT = "LOOKUP_BOLT";
        private static final String PERSISTANCE_BOLT = "PERSISTANCE_BOLT";
    
        public static void main(String[] args) throws Exception {
            new PersistanceTopology().execute(args);//继承的方法,从而获得了连接
        }
    
        @Override
        public StormTopology getTopology() {
            JdbcLookupBolt departmentLookupBolt = new JdbcLookupBolt(connectionProvider, SELECT_QUERY, this.jdbcLookupMapper);
    
            //获取映射字段
            List<Column> schemaColumns = Lists.newArrayList(new Column("create_date", Types.DATE),
                    new Column("dept_name", Types.VARCHAR), new Column("user_id", Types.INTEGER), new Column("user_name", Types.VARCHAR));
            JdbcMapper mapper = new SimpleJdbcMapper(schemaColumns);
            //创建bolt
            JdbcInsertBolt userPersistanceBolt = new JdbcInsertBolt(connectionProvider, mapper)
                    .withInsertQuery("insert into user (create_date, dept_name, user_id, user_name) values (?,?,?,?)");
    
            TopologyBuilder builder = new TopologyBuilder();
    
            builder.setSpout(USER_SPOUT, this.jdbcSpout, 1);
            builder.setBolt(LOOKUP_BOLT, departmentLookupBolt, 1).shuffleGrouping(USER_SPOUT);
            builder.setBolt(PERSISTANCE_BOLT, userPersistanceBolt, 1).shuffleGrouping(LOOKUP_BOLT);
            return builder.createTopology();
        }
    }

    使用Trident方法写入数据库:

    
    
    import org.apache.storm.generated.StormTopology;
    import org.apache.storm.tuple.Fields;
    import com.google.common.collect.Lists;
    import com.sunsheen.jfids.bigdata.storm.demo.count.TestSpout;
    import com.sunsheen.jfids.bigdata.storm.demo.jdbc.AbstractUserTopology;
    import com.sunsheen.jfids.bigdata.storm.demo.jdbc.JdbcSpout;
    
    import org.apache.storm.jdbc.common.Column;
    import org.apache.storm.jdbc.mapper.SimpleJdbcLookupMapper;
    import org.apache.storm.jdbc.trident.state.JdbcQuery;
    import org.apache.storm.jdbc.trident.state.JdbcState;
    import org.apache.storm.jdbc.trident.state.JdbcStateFactory;
    import org.apache.storm.jdbc.trident.state.JdbcUpdater;
    import org.apache.storm.trident.Stream;
    import org.apache.storm.trident.TridentState;
    import org.apache.storm.trident.TridentTopology;
    
    import java.sql.Types;
    /**
     * @author cwc
     * @date 2018年7月4日  
     * @version 1.0.0 
     * @description:将数据通过Trident的方法写入数据库表中
     */
    public class UserPersistanceTridentTopology extends AbstractUserTopology {
    
        public static void main(String[] args) throws Exception {
            new UserPersistanceTridentTopology().execute(args);
        }
    
        @Override
        public StormTopology getTopology() {
            TridentTopology topology = new TridentTopology();
            //这里通过Trident的方式将数据写入数据库,代替了bolt等类的使用
            JdbcState.Options options = new JdbcState.Options()
                    .withConnectionProvider(connectionProvider)
                    .withMapper(this.jdbcMapper)
                    .withJdbcLookupMapper(new SimpleJdbcLookupMapper(new Fields("dept_name"), Lists.newArrayList(new Column("user_id", Types.INTEGER))))
                    .withTableName(TABLE_NAME)
                    .withSelectQuery(SELECT_QUERY);
    
            JdbcStateFactory jdbcStateFactory = new JdbcStateFactory(options);
    
            Stream stream = topology.newStream("userSpout", new JdbcSpout());
            TridentState state = topology.newStaticState(jdbcStateFactory);
            stream = stream.stateQuery(state, new Fields("user_id","user_name","create_date"), new JdbcQuery(), new Fields("dept_name"));
            stream.partitionPersist(jdbcStateFactory, new Fields("user_id","user_name","dept_name","create_date"),  new JdbcUpdater(), new Fields());
            return topology.build();
        }
    }

    spout类:

    
    
    import org.apache.storm.spout.SpoutOutputCollector;
    import org.apache.storm.task.TopologyContext;
    import org.apache.storm.topology.OutputFieldsDeclarer;
    import org.apache.storm.topology.base.BaseRichSpout;
    import org.apache.storm.tuple.Fields;
    import org.apache.storm.tuple.Values;
    
    import com.google.common.collect.Lists;
    
    import java.util.List;
    import java.util.Map;
    import java.util.Random;
    /**
     * @author cwc
     * @date 2018年5月31日  
     * @description:存储数据的spout,我的读与写共用的这一个spout
     * @version 1.0.0 
     */
    public class JdbcSpout extends BaseRichSpout {
    	private static final long serialVersionUID = 1L;
    	private SpoutOutputCollector collector;
    	//模拟数据
    	public static final List<Values> rows = Lists.newArrayList(
    	            new Values(1,"peter",System.currentTimeMillis()),
    	            new Values(2,"bob",System.currentTimeMillis()),
    	            new Values(3,"alice",System.currentTimeMillis()));
    
    	@Override
    	public void nextTuple() {
    		  Random rand = new Random();
    	      Values row = rows.get(rand.nextInt(rows.size() - 1));
    	      this.collector.emit(row);
    	        Thread.yield();
            System.out.println("信息加载中---------------------");
    	}
    
    	@Override
    	public void open(Map arg0, TopologyContext arg1, SpoutOutputCollector collector) {
    		this.collector =collector;
    	}
    
    	@Override
    	public void declareOutputFields(OutputFieldsDeclarer declarer) {
    		 declarer.declare(new Fields("user_id","user_name","create_date"));
    	}
    
    }

    今天的代码就分享到这,各位共勉,努力、

    深夜码文不易,若对看官有帮助,望看官可以在右侧打赏。
  • 相关阅读:
    P3180 [HAOI2016]地图
    P2787 语文1(chin1)- 理理思维
    P2221 [HAOI2012]高速公路
    P4137 Rmq Problem / mex
    P3746 [六省联考2017]组合数问题
    P2461 [SDOI2008]递归数列
    P3715 [BJOI2017]魔法咒语
    P3195 [HNOI2008]玩具装箱TOY
    Linux下的strerror是否线程安全?
    bash/shell的字符串trim实现
  • 原文地址:https://www.cnblogs.com/wanchen-chen/p/12934125.html
Copyright © 2020-2023  润新知