• Flink连接器-批处理-读写Hbase





    import org.apache.flink.api.common.io.LocatableInputSplitAssigner;
    import org.apache.flink.api.common.io.RichInputFormat;
    import org.apache.flink.api.common.io.statistics.BaseStatistics;
    import org.apache.flink.api.java.utils.ParameterTool;
    import org.apache.flink.configuration.Configuration;
    import org.apache.flink.core.io.InputSplitAssigner;
    import org.apache.hadoop.hbase.HBaseConfiguration;
    import org.apache.hadoop.hbase.TableName;
    import org.apache.hadoop.hbase.client.*;
    import org.apache.hadoop.hbase.util.Bytes;
    import org.apache.hadoop.hbase.util.Pair;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
     * @Auther WeiJiQian
     * @描述
    public abstract class SourceHBaseInputBase<T> extends RichInputFormat<T, MyTableInputSplit>{
        protected static final Logger LOG = LoggerFactory.getLogger(SourceHBaseInputBase.class);
        // helper variable to decide whether the input is exhausted or not
        protected boolean endReached = false;
        protected transient HTable table = null;
        protected transient Scan scan = null;
        protected transient Connection connection = null;
        /** HBase iterator wrapper. */
        protected ResultScanner resultScanner = null;
        protected byte[] currentRow;
        protected long scannedRows;
        protected ParameterTool parameterTool;
        protected abstract T mapResultToOutType(Result r);
        protected abstract void getScan();
        protected abstract TableName getTableName();
        protected void getTable() throws IOException {
            org.apache.hadoop.conf.Configuration configuration;
            parameterTool = PropertiesUtil.PARAMETER_TOOL;
            configuration = HBaseConfiguration.create();
            configuration.set(HBASE_ZOOKEEPER_QUORUM, parameterTool.get(HBASE_ZOOKEEPER_QUORUM));
            configuration.set(HBASE_RPC_TIMEOUT, parameterTool.get(HBASE_RPC_TIMEOUT));
            configuration.set(HBASE_CLIENT_OPERATION_TIMEOUT, parameterTool.get(HBASE_CLIENT_OPERATION_TIMEOUT));
            connection = ConnectionFactory.createConnection(configuration);
            table = (HTable) connection.getTable(getTableName());
        public void configure(Configuration parameters) {
        public void open(MyTableInputSplit split) throws IOException {
            System.out.println("open:" + table == null);
            if (table == null) {
                System.out.println("open:table is null ---------");
                throw new IOException("The HBase table has not been opened! " +
                        "This needs to be done in configure().");
            if (scan == null) {
                throw new IOException("Scan has not been initialized! " +
                        "This needs to be done in configure().");
            if (split == null) {
                throw new IOException("Input split is null!");
            logSplitInfo("opening", split);
            // set scan range
            currentRow = split.getStartRow();
            resultScanner = table.getScanner(scan);
            endReached = false;
            scannedRows = 0;
        public T nextRecord(T reuse) throws IOException {
            if (resultScanner == null) {
                throw new IOException("No table result scanner provided!");
            Result res;
            try {
                res = resultScanner.next();
            } catch (Exception e) {
                //workaround for timeout on scan
                LOG.warn("Error after scan of " + scannedRows + " rows. Retry with a new scanner...", e);
                scan.withStartRow(currentRow, false);
                resultScanner = table.getScanner(scan);
                res = resultScanner.next();
            if (res != null) {
                currentRow = res.getRow();
                return mapResultToOutType(res);
            endReached = true;
            return null;
        private void logSplitInfo(String action, MyTableInputSplit split) {
            int splitId = split.getSplitNumber();
            String splitStart = Bytes.toString(split.getStartRow());
            String splitEnd = Bytes.toString(split.getEndRow());
            String splitStartKey = splitStart.isEmpty() ? "-" : splitStart;
            String splitStopKey = splitEnd.isEmpty() ? "-" : splitEnd;
            String[] hostnames = split.getHostnames();
            LOG.info("{} split (this={})[{}|{}|{}|{}]", action, this, splitId, hostnames, splitStartKey, splitStopKey);
        public boolean reachedEnd() throws IOException {
            return endReached;
        public void close() throws IOException {
            LOG.info("Closing split (scanned {} rows)", scannedRows);
            currentRow = null;
            try {
                if (resultScanner != null) {
            } finally {
                resultScanner = null;
        public void closeInputFormat() throws IOException {
            try {
                if (connection != null) {
            } finally {
                connection = null;
            try {
                if (table != null) {
            } finally {
                table = null;
        public MyTableInputSplit[] createInputSplits(final int minNumSplits) throws IOException {
            if (table == null) {
                throw new IOException("The HBase table has not been opened! " +
                        "This needs to be done in configure().");
            if (scan == null) {
                throw new IOException("Scan has not been initialized! " +
                        "This needs to be done in configure().");
            // Get the starting and ending row keys for every region in the currently open table
            final Pair<byte[][], byte[][]> keys = table.getRegionLocator().getStartEndKeys();
            if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) {
                throw new IOException("Expecting at least one region.");
            final byte[] startRow = scan.getStartRow();
            final byte[] stopRow = scan.getStopRow();
            final boolean scanWithNoLowerBound = startRow.length == 0;
            final boolean scanWithNoUpperBound = stopRow.length == 0;
            final List<MyTableInputSplit> splits = new ArrayList<MyTableInputSplit>(minNumSplits);
            for (int i = 0; i < keys.getFirst().length; i++) {
                final byte[] startKey = keys.getFirst()[i];
                final byte[] endKey = keys.getSecond()[i];
                final String regionLocation = table.getRegionLocator().getRegionLocation(startKey, false).getHostnamePort();
                // Test if the given region is to be included in the InputSplit while splitting the regions of a table
                if (!includeRegionInScan(startKey, endKey)) {
                // Find the region on which the given row is being served
                final String[] hosts = new String[]{regionLocation};
                // Determine if regions contains keys used by the scan
                boolean isLastRegion = endKey.length == 0;
                if ((scanWithNoLowerBound || isLastRegion || Bytes.compareTo(startRow, endKey) < 0) &&
                        (scanWithNoUpperBound || Bytes.compareTo(stopRow, startKey) > 0)) {
                    final byte[] splitStart = scanWithNoLowerBound || Bytes.compareTo(startKey, startRow) >= 0 ? startKey : startRow;
                    final byte[] splitStop = (scanWithNoUpperBound || Bytes.compareTo(endKey, stopRow) <= 0)
                            && !isLastRegion ? endKey : stopRow;
                    int id = splits.size();
                    final MyTableInputSplit split = new MyTableInputSplit(id, hosts, table.getName().getName(), splitStart, splitStop);
            LOG.info("Created " + splits.size() + " splits");
            for (MyTableInputSplit split : splits) {
                logSplitInfo("created", split);
            return splits.toArray(new MyTableInputSplit[splits.size()]);
         * Test if the given region is to be included in the scan while splitting the regions of a table.
         * @param startKey Start key of the region
         * @param endKey   End key of the region
         * @return true, if this region needs to be included as part of the input (default).
        protected boolean includeRegionInScan(final byte[] startKey, final byte[] endKey) {
            return true;
        public InputSplitAssigner getInputSplitAssigner(MyTableInputSplit[] inputSplits) {
            return new LocatableInputSplitAssigner(inputSplits);
        public BaseStatistics getStatistics(BaseStatistics cachedStatistics) {
            return null;


    import org.apache.flink.configuration.Configuration;
    import org.apache.hadoop.hbase.TableName;
    import org.apache.hadoop.hbase.client.Result;
    import org.apache.hadoop.hbase.client.Scan;
    import org.apache.hadoop.hbase.filter.BinaryPrefixComparator;
    import org.apache.hadoop.hbase.filter.CompareFilter;
    import org.apache.hadoop.hbase.filter.FilterList;
    import org.apache.hadoop.hbase.filter.RowFilter;
    import org.apache.hadoop.hbase.util.Bytes;
    import javax.swing.*;
    import java.util.List;
    import static org.apache.hadoop.hbase.filter.FilterList.Operator.MUST_PASS_ONE;
     * @author WeiJiQian
     * @param
     * @return
    public class SourceDaysHbase extends SourceHBaseInputBase<UsersBean> {
        public SourceDaysHbase(List<String> dates){
            this.dates = dates;
        private List<String> dates;
        private UsersBean usersBean = new UsersBean();
        public void configure(Configuration parameters) {
        protected UsersBean mapResultToOutType(Result r) {
             return usersBean;
        protected void getScan() {
            scan = new Scan();
            scan.addColumn(HBaseConstant.HBASE_PERSONA_FAMILY_MONTH_DAY, HBaseConstant.HBASE_PERSONA_ACTIVITE_DATE);
        protected TableName getTableName() {
            return TableName.valueOf(parameterTool.get(HBaseConstant.HBASE_TABLE_NAME_PERSONA_DATA));


    import lombok.extern.slf4j.Slf4j;
    import org.apache.flink.api.common.io.OutputFormat;
    import org.apache.flink.api.java.utils.ParameterTool;
    import org.apache.flink.configuration.Configuration;
    import org.apache.hadoop.hbase.HBaseConfiguration;
    import org.apache.hadoop.hbase.TableName;
    import org.apache.hadoop.hbase.client.*;
    import org.apache.hadoop.hbase.util.Bytes;
    import org.mortbay.util.MultiPartWriter;
    import java.io.IOException;
    import static com.hecaiyun.common.bean.HBaseConstant.*;
     * @Auther WeiJiQian
     * @描述
    public abstract class HBaseOutputFormatBase<T> implements OutputFormat<T> {
        protected final String valueString = "1";
        protected String date ;
        protected Table table ;
        protected Connection connection;
        protected BufferedMutatorParams params;
        protected BufferedMutator mutator;
        protected org.apache.hadoop.conf.Configuration configuration;
        protected ParameterTool parameterTool;
        public abstract TableName getTableName();
        public void configure(Configuration parameters) {
            parameterTool = PropertiesUtil.PARAMETER_TOOL;
            configuration = HBaseConfiguration.create();
            configuration.set(HBASE_ZOOKEEPER_QUORUM, parameterTool.get(HBASE_ZOOKEEPER_QUORUM));
            configuration.set(HBASE_RPC_TIMEOUT, parameterTool.get(HBASE_RPC_TIMEOUT));
            configuration.set(HBASE_CLIENT_OPERATION_TIMEOUT, parameterTool.get(HBASE_CLIENT_OPERATION_TIMEOUT));
        public void open(int taskNumber, int numTasks) throws IOException {
            connection =  ConnectionFactory.createConnection(configuration);
            table = connection.getTable(getTableName());
            params = new BufferedMutatorParams(table.getName());
            //设置缓存的大小 100M
            mutator = connection.getBufferedMutator(params);
         * @author WeiJiQian
         * @param rowKey
         * @param family
         * @param colum
         * @param value
         * @return org.apache.hadoop.hbase.client.Put
         * 描述  覆盖数据
        public void putData(String rowKey,byte[] family, byte[] colum,String value ) throws IOException {
            Put put = new Put(Bytes.toBytes(rowKey));
        public void close() throws IOException {
            if (mutator != null){
            if (table != null){
           if (connection != null){
Copyright © 2020-2023  润新知