• 为Vertica数据库增加自定义函数to_base64和from_base64


    Vertica提供了UDx的机制,用来扩展自定义函数,本文演示了在公司项目中的真实案例。

    ------------1、编写自定义函数类---------------

    自定义函数的Java类,依赖VerticaSDK.jar包,位于服务器上/opt/vertica/bin/VerticaSDK.jar,下载到本地,让项目依赖这个jar包。

    然后就可以实现to_base64和from_base64这两个函数了。

    package tebon.vertica;
    
    import com.vertica.sdk.*;
    
    import java.util.TimeZone;
    
    /**
     * 自定义vertica函数,tebon_to_base64,把指定的字符串转换为base64格式。
     *
     * @author zhanglei
     */
    public class ToBase64Function extends ScalarFunctionFactory {
        public ScalarFunction createScalarFunction(ServerInterface serverInterface) {
            return new InternToBase64Function();
        }
    
        public void getPrototype(ServerInterface serverInterface, ColumnTypes inputTypes, ColumnTypes returnType) {
            inputTypes.addVarchar();
            returnType.addVarchar();
        }
        
        @Override
        public void getReturnType(ServerInterface srvInterface, final SizedColumnTypes argTypes, SizedColumnTypes returnType){
            returnType.addVarchar(argTypes.getColumnType(0).getStringLength());
        }
    
        public class InternToBase64Function extends ScalarFunction {
            public void processBlock(ServerInterface serverInterface, BlockReader argReader, BlockWriter resWriter) throws UdfException, DestroyInvocation {
                TimeZone.setDefault(TimeZone.getTimeZone("Asia/Shanghai"));
                do {
                    String inputString = argReader.getString(0);
                    String returnString = null;
                    if (inputString != null) {
                        try {
                            returnString = encodeToBase64(inputString.getBytes("utf-8"));
                        } catch (Exception ex) {
                            returnString = inputString;
                        }
                    }
                    resWriter.setString(returnString);
                    resWriter.next();
                } while (argReader.next());
            }
    
            //Base64编码表
            private final char[] BASE64CODE
                    = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
                    'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
                    'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/',};
    
            private final int HEX_255 = 0x0000ff;
            private final int HEX_16515072 = 0xfc0000;
            private final int HEX_258048 = 0x3f000;
            private final int HEX_4032 = 0xfc0;
            private final int HEX_63 = 0x3f;
            private final int NUMBER_TWO = 2;
            private final int NUMBER_THREE = 3;
            private final int NUMBER_FOUR = 4;
            private final int NUMBER_SIX = 6;
            private final int NUMBER_EIGHT = 8;
            private final int NUMBER_TWELVE = 12;
            private final int NUMBER_SIXTEEN = 16;
            private final int NUMBER_EIGHTEEN = 18;
    
            private String encodeToBase64(byte[] b) {
                if (b == null || b.length == 0) {
                    return "";
                }
    
                // 按实际编码后长度开辟内存,加快速度
                StringBuilder sb = new StringBuilder(((b.length - 1) / NUMBER_THREE) << NUMBER_TWO + NUMBER_FOUR);
    
                // 进行编码
                int code = 0;
                for (int i = 0; i < b.length; i++) {
                    code |= (b[i] << (NUMBER_SIXTEEN - i % NUMBER_THREE * NUMBER_EIGHT))
                            & (HEX_255 << (NUMBER_SIXTEEN - i % NUMBER_THREE * NUMBER_EIGHT));
                    if (i % NUMBER_THREE == NUMBER_TWO || i == b.length - 1) {
                        sb.append(BASE64CODE[(code & HEX_16515072) >>> NUMBER_EIGHTEEN]);
                        sb.append(BASE64CODE[(code & HEX_258048) >>> NUMBER_TWELVE]);
                        sb.append(BASE64CODE[(code & HEX_4032) >>> NUMBER_SIX]);
                        sb.append(BASE64CODE[code & HEX_63]);
                        code = 0;
                    }
                }
    
                // 对于长度非3的整数倍的字节数组,编码前先补0,编码后结尾处编码用=代替,
                // =的个数和短缺的长度一致,以此来标识出数据实际长度
                if (b.length % NUMBER_THREE > 0) {
                    sb.setCharAt(sb.length() - 1, '=');
                }
                if (b.length % NUMBER_THREE == 1) {
                    sb.setCharAt(sb.length() - NUMBER_TWO, '=');
                }
                return sb.toString();
            }
        }
    }
    package tebon.vertica;
    
    import com.vertica.sdk.*;
    
    import java.util.TimeZone;
    
    /**
     * 自定义vertica函数,tebon_from_base64,把指定的base64字符串转换为普通格式。
     *
     * @author zhanglei
     */
    public class FromBase64Function extends ScalarFunctionFactory {
        public ScalarFunction createScalarFunction(ServerInterface serverInterface) {
            return new InternFromBase64Function();
        }
    
        public void getPrototype(ServerInterface serverInterface, ColumnTypes inputTypes, ColumnTypes returnType) {
            inputTypes.addVarchar();
            returnType.addVarchar();
        }
    
        @Override
        public void getReturnType(ServerInterface srvInterface, final SizedColumnTypes argTypes, SizedColumnTypes returnType){
            returnType.addVarchar(argTypes.getColumnType(0).getStringLength());
        }
    
        public class InternFromBase64Function extends ScalarFunction {
            public void processBlock(ServerInterface serverInterface, BlockReader argReader, BlockWriter resWriter) throws UdfException, DestroyInvocation {
                TimeZone.setDefault(TimeZone.getTimeZone("Asia/Shanghai"));
                do {
                    String inputString = argReader.getString(0);
                    String returnString = null;
                    if (inputString != null) {
                        try {
                            returnString = new String(decodeFromBase64(inputString), "utf-8");
                        } catch (Exception ex) {
                            returnString = inputString;
                        }
                    }
                    resWriter.setString(returnString);
                    resWriter.next();
                } while (argReader.next());
            }
    
            //Base64解码表
            private final byte[] BASE64DECODE
                    = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                    -1, -1, -1,
                    -1,
                    -1, // 注意两个63,为兼容SMP,
                    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 63,
                    -1,
                    63, // “/”和“-”都翻译成63。
                    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, 0, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
                    12, 13,
                    14, // 注意两个0:
                    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1,
                    -1, // “A”和“=”都翻译成0。
                    -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
                    -1, -1, -1, -1, -1,};
    
            private final int HEX_255 = 0x0000ff;
            private final int HEX_16711680 = 0xff0000;
            private final int HEX_65280 = 0x00ff00;
            private final int NUMBER_TWO = 2;
            private final int NUMBER_THREE = 3;
            private final int NUMBER_FOUR = 4;
            private final int NUMBER_SIX = 6;
            private final int NUMBER_EIGHT = 8;
            private final int NUMBER_TWELVE = 12;
            private final int NUMBER_SIXTEEN = 16;
            private final int NUMBER_EIGHTEEN = 18;
    
            private byte[] decodeFromBase64(String code) {
                if (code == null || code.length() <= 0) {
                    return null;
                }
    
                code = code.replace("
    ", "").replace("
    ", "").replace(" ", "");
    
                int len = code.length();
                if (len % NUMBER_FOUR != 0) {
                    throw new IllegalArgumentException("Base64 string length must be 4*n");
                }
    
                // 统计填充的等号个数
                int pad = 0;
                if (code.charAt(len - 1) == '=') {
                    pad++;
                }
                if (code.charAt(len - NUMBER_TWO) == '=') {
                    pad++;
                }
    
                // 根据填充等号的个数来计算实际数据长度
                int retLen = len / NUMBER_FOUR * NUMBER_THREE - pad;
    
                // 分配字节数组空间
                byte[] ret = new byte[retLen];
    
                // 查表解码
                char ch1, ch2, ch3, ch4;
                int i;
                for (i = 0; i < len; i += NUMBER_FOUR) {
                    int j = i / NUMBER_FOUR * NUMBER_THREE;
                    ch1 = code.charAt(i);
                    ch2 = code.charAt(i + 1);
                    ch3 = code.charAt(i + NUMBER_TWO);
                    ch4 = code.charAt(i + NUMBER_THREE);
                    int tmp = (BASE64DECODE[ch1] << NUMBER_EIGHTEEN) | (BASE64DECODE[ch2] << NUMBER_TWELVE)
                            | (BASE64DECODE[ch3] << NUMBER_SIX) | (BASE64DECODE[ch4]);
                    ret[j] = (byte) ((tmp & HEX_16711680) >> NUMBER_SIXTEEN);
                    if (i < len - NUMBER_FOUR) {
                        ret[j + 1] = (byte) ((tmp & HEX_65280) >> NUMBER_EIGHT);
                        ret[j + NUMBER_TWO] = (byte) ((tmp & HEX_255));
                    } else {
                        if (j + 1 < retLen) {
                            ret[j + 1] = (byte) ((tmp & HEX_65280) >> NUMBER_EIGHT);
                        }
                        if (j + NUMBER_TWO < retLen) {
                            ret[j + NUMBER_TWO] = (byte) ((tmp & HEX_255));
                        }
                    }
                }
                return ret;
            }
        }
    }

    ------------2、把函数类上传到服务器上---------------

    确保java代码在本地是编译通过的,然后上传到服务器上。

    建议上传到这个位置:/opt/vertica/sdk/   ,后面以此路径为准。

    ------------3、配置java编译器环境---------------

    安装Jave-devel。
    
    通过如下命令查看已经安装的jdk版本:
    rpm -qa|grep java
    
    如果是Centos,一般情况下会列出两个,例如:
    java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64
    java-1.8.0-openjdk-headless-1.8.0.282.b08-1.el7_9.x86_64
    
    通过如下命令卸载掉:
    rpm -e --nodeps java-1.8.0-openjdk-1.8.0.282.b08-1.el7_9.x86_64
    rpm -e --nodeps java-1.8.0-openjdk-headless-1.8.0.282.b08-1.el7_9.x86_64
    
    然后安装新的java-devel:
    yum -y install java-1.8.0-openjdk-devel.x86_64
    
    测试一下:
    java -version
    javac -version

    ------------4、编译源代码---------------

    cd /opt/vertica/sdk
    
    javac -g -cp /opt/vertica/bin/VerticaSDK.jar /opt/vertica/sdk/BuildInfo.java -d .
    
    javac -g -cp /opt/vertica/bin/VerticaSDK.jar FromBase64Function.java -d .
    
    javac -g -cp /opt/vertica/bin/VerticaSDK.jar ToBase64Function.java -d .
    
    jar cf TebonVerticaFunctions.jar ./tebon/vertica/*.class ./com/vertica/sdk/*.class

    ------------5、注册类库和函数---------------

    用vsql登录vertica,执行如下SQL:

    SELECT SET_CONFIG_PARAMETER('JavaBinaryForUDx','/usr/bin/java');

    DROP
    LIBRARY TebonVerticaFunctions CASCADE; CREATE LIBRARY TebonVerticaFunctions AS '/opt/vertica/sdk/TebonVerticaFunctions.jar' LANGUAGE 'JAVA'; CREATE FUNCTION tebon_to_base64 AS language 'java' NAME 'tebon.vertica.ToBase64Function' LIBRARY TebonVerticaFunctions; CREATE FUNCTION tebon_from_base64 AS language 'java' NAME 'tebon.vertica.FromBase64Function' LIBRARY TebonVerticaFunctions;

    ------------6、测试---------------

    dbadmin=> select tebon_to_base64('hello123你好'), tebon_from_base64('aGVsbG8xMjPkvaDlpb0=');
    tebon_to_base64 | tebon_from_base64
    ----------------------+-------------------
    aGVsbG8xMjPkvaDlpb0= | hello123你好
    (1 row)

    到这里就结束了。

    如果java代码执行有问题,可以看错误日志,位于这里(记得把/home/dbadmin/bigdata换成你自己的数据位置):

    /home/dbadmin/bigdata/v_bigdata_node0001_catalog/UDxLogs/UDxFencedProcessesJava.log

    作者:Lave Zhang
    出处:http://www.cnblogs.com/lavezhang/
    本文版权归作者所有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利。

  • 相关阅读:
    Linux下查看CPU型号,内存大小,硬盘空间的命令
    java_opts 参数与JVM内存调优
    less 查看日志
    如何实时查看Linux下日志
    mysql的sql语句的性能诊断分析
    使用zabbix-java-gateway可以通过该网关来监听多个JVM
    性能瓶颈分析
    Git客户端的安装与配置入门
    渗透测试的8个步骤 展现一次完整的渗透测试过程及思路
    Python&selenium&tesseract自动化测试随机码、验证码(Captcha)的OCR识别解决方案参考
  • 原文地址:https://www.cnblogs.com/lavezhang/p/14634469.html
Copyright © 2020-2023  润新知