• flinkmongodbcdc 自定义解析器


     flink1.3.1

    官文: 

           https://ververica.github.io/flink-cdc-connectors/master/content/connectors/mongodb-cdc.html

    依赖:

            <!-- https://mvnrepository.com/artifact/com.ververica/flink-connector-mongodb-cdc -->
            <!--doc "https://ververica.github.io/flink-cdc-connectors/master/content/connectors/mongodb-cdc.html-->
            <dependency>
                <groupId>com.ververica</groupId>
                <artifactId>flink-connector-mongodb-cdc</artifactId>
                <version>2.2.0</version>
            </dependency>
    
            <dependency>
                <groupId>com.alibaba</groupId>
                <artifactId>fastjson</artifactId>
                <version>1.2.79</version>
            </dependency>
    package com.sea.flink.sea.cdc;
    
    import com.alibaba.fastjson.JSON;
    import com.alibaba.fastjson.JSONObject;
    import com.ververica.cdc.connectors.mongodb.MongoDBSource;
    import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
    import com.ververica.cdc.debezium.DebeziumSourceFunction;
    import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
    import com.ververica.cdc.debezium.StringDebeziumDeserializationSchema;
    import lombok.Data;
    import org.apache.flink.api.common.functions.MapFunction;
    import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
    import org.apache.flink.api.common.typeinfo.TypeInformation;
    import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
    import org.apache.flink.streaming.api.functions.source.SourceFunction;
    import org.apache.flink.util.Collector;
    import org.apache.kafka.connect.data.Struct;
    import org.apache.kafka.connect.source.SourceRecord;
    
    import java.util.Date;
    
    /***************************
     *<pre>
     * @PACKAGE : com.sea.flink.sea.cdc
     *
     * @Author    :  Sea
     *
     * @Date    : 6/13/22 5:40 PM
     *
     * @Desc    :
     *</pre>
     ***************************/
    public class MongoCDCTest {
    
        public static void main(String[] args) throws Exception {
            DebeziumSourceFunction<String> sourceFunction = MongoDBSource.<String>builder()
                    .hosts("192.160.3.160:27017")
                    .username("root")
                    .password("root")
                    .databaseList("shippool") // set captured database, support regex
                    .collectionList("shippool.test") //set captured collections, support regex
    //                .deserializer(new JsonDebeziumDeserializationSchema())
                    .deserializer(new MyMongoCDCDeserialazation()) //自定义解析器
                    .copyExisting(true) //是否拷贝之前的所有的数据,默认为true
                    .build();
    
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
            env.addSource(sourceFunction)
                    /*.map(new MapFunction<String, JSONObject>() {
                        @Override
                        public Object map(String line) throws Exception {
                            JSONObject source = JSON.parseObject(line);
                            JSONObject data = source.getJSONObject("fullDocument");
                            Object id = data.get("_id");
                            if(!(id instanceof String)){
                                data.put("_id",((JSONObject)id).getString("$oid"));
                            }
                            String type = source.getString("operationType");
                            JSONObject ns = source.getJSONObject("ns");
                            String database = ns.getString("db");
                            String table = ns.getString("coll");
                            JSONObject result = new JSONObject();
                            result.put("database",database);
                            result.put("table",database);
                            result.put("data",data);
                            result.put("type","replace".equalsIgnoreCase(type)?"update":type);
                            return result;
                        }
                    })*/
                    //自定义解析
                    .map(new MapFunction<String, JSONObject>() {
                        @Override
                        public JSONObject map(String source) throws Exception {
                            JSONObject data= JSONObject.parseObject(source);
                            JSONObject info = data.getJSONObject("data");
                            Object id = info.get("_id");
                            if((id instanceof JSONObject)){
                                System.err.println(id.getClass());
                                info.put("_id",((JSONObject)id).getString("$oid"));
                            }
                            data.put("data",info);
                            //SeaBean seaBean = info.toJavaObject(SeaBean.class);
                            return data;
                        }
                    })
    
                    .print().setParallelism(1); // use parallelism 1 for sink to keep message ordering
            env.execute();
        }
    
    
        @Data
        public static  class  SeaBean{
            private String _id;
            private String ss;
            private Date time;
        }
    
    
    
        /**
         * 封装的数据格式
         * {
         * "database":"",
         * "tableName":"",
         * "data":{"id":"","tm_name":""....},
         * "type":"c u d",
         * "ts":156456135615
         * }
         */
        public static class MyMongoCDCDeserialazation implements DebeziumDeserializationSchema<String> {
    
            public void deserialize(SourceRecord record, Collector<String> out) throws Exception {
                JSONObject result = new JSONObject();
                Object copy = record.sourceOffset().get("copy");
                if(copy==null) {copy="false";}
                //1.获取库名&表名
                String topic = record.topic();
                String[] fields = topic.split("\\.");
                String database = fields[0];
                String tableName = fields[1];
                //3.获取"fullDocument"数据
                Struct value = (Struct) record.value();
                Object data = value.get("fullDocument");
                //4.获取操作类型
                String type = value.getString("operationType");
                type = "replace".equalsIgnoreCase(type)?"update":type;
                result.put("database", database);
                result.put("tableName", tableName);
                result.put("type",type);
                result.put("data",data);
                result.put("copy",copy);
                out.collect(result.toJSONString());
            }
    
            public TypeInformation<String> getProducedType() {
                return BasicTypeInfo.STRING_TYPE_INFO;
            }
        }
    
    }
  • 相关阅读:
    JS定时器相关用法
    asp.net 使用DroDownList来实现二级联动
    移动端页面开发流程
    使用DataList实现数据分页的技术
    使用CSS3制作三角形小图标
    JS面向对象
    JS封闭函数、闭包、内置对象
    封装自己的DB类(PHP)
    github之文件的三种状态关系
    Windows Socket编程笔记之最简单的小Demo
  • 原文地址:https://www.cnblogs.com/lshan/p/16373508.html
Copyright © 2020-2023  润新知