• kettle系列-6.kettle实现多字段字典快速翻译


      在数据清洗转换中,常见的字典翻译,如性别在原表中是1(男)、2(女)等,类似还有很多较大的字典需要翻译,若同一个表中有很多个字典需要翻译,采用【数据库查询】方式翻译的话效率就会相当低下。

      这里采用java代码来翻译,初始化时将相关字典加载到内存中,此后就不需要再查询数据库了,然后每条记录进来就翻译各个字典,其实很简单,只是【java代码】这个控件限制较多,不支持泛型、this并不是步骤本身、能使用的方法都列在了左侧,使用起来不是很方便。关于字典翻译这个事,其实写一个专门的控件也不难,也是很不错的一个主意,只是没有真正完整的写个一个控件(后台实现和ui部分等),要写的话比较耗时,暂时就采用java代码实现,有时间可以考虑写这么个控件。

      算了废话太多,测试转换如下图

      自定义常量就是模拟了几条数据,你可以直接传递要翻译的数据,写日志就是看看翻译结果,【java代码】中的代码如下:

    import java.util.Arrays;
    import java.util.List;
    import java.util.HashMap;
    import java.util.Map;
    import org.pentaho.di.core.database.Database;
    import org.pentaho.di.core.database.DatabaseMeta;
    import org.pentaho.di.repository.Repository;
    import org.pentaho.di.core.Const;
    
    public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException
    {
        if (first){
          first = false;
    
          /* TODO: Your code here. (Using info fields)
    
          FieldHelper infoField = get(Fields.Info, "info_field_name");
    
          RowSet infoStream = findInfoRowSet("info_stream_tag");
    
          Object[] infoRow = null;
    
          int infoRowCount = 0;
    
          // Read all rows from info step before calling getRow() method, which returns first row from any
          // input rowset. As rowMeta for info and input steps varies getRow() can lead to errors.
          while((infoRow = getRowFrom(infoStream)) != null){
    
            // do something with info data
            infoRowCount++;
          }
          */
        }
    
        Object[] r = getRow();
    
        if (r == null) {
          setOutputDone();
          return false;
        }
        //获取要翻译字典的代码
        String kkdm = get(Fields.In, "kkdm").getString(r);
        String cllx = get(Fields.In, "cllx").getString(r);
    
        // It is always safest to call createOutputRow() to ensure that your output row's Object[] is large
        // enough to handle any new fields you are creating in this step.
        r = createOutputRow(r, data.outputRowMeta.size());
        //翻译字典并设置到输出字段中
        get(Fields.Out, "kkmc").setValue(r, kkzdMap.get(kkdm));
        get(Fields.Out, "cxmc").setValue(r, cxzdMap.get(cllx));
        
        
        /* TODO: Your code here. (See Sample)
        
        // Get the value from an input field
        String foobar = get(Fields.In, "a_fieldname").getString(r);
    
        foobar += "bar";
        
        // Set a value in a new output field
        get(Fields.Out, "output_fieldname").setValue(r, foobar);
    
        */
        // Send the row on to the next step.
        putRow(data.outputRowMeta, r);
    
        return true;
    }
    
    //定义字典缓存Map
    public static Map cxzdMap = new HashMap();
    public static Map kkzdMap = new HashMap();
    public boolean init(StepMetaInterface stepMetaInterface, StepDataInterface stepDataInterface)
    {
          try {
            //连接数据库,pg_test是数据库名称,在左侧db连接处创建
            DatabaseMeta dbmeta = DatabaseMeta.findDatabase(this.getTrans().getRepository().readDatabases(), "pg_test");
            Database zddb = new Database(this.getTrans(),dbmeta);
            logBasic(zddb.getObjectName());
            zddb.shareVariablesWith( this.getTrans() );
            zddb.setQueryLimit( Const.toInt( this.getTrans().environmentSubstitute( "100" ), 0 ) );
    
              try {
    
                  if ( getTransMeta().isUsingUniqueConnections() ) {
                      synchronized ( getTrans() ) { 
                          zddb.connect( getTrans().getTransactionId(), "zdfy" );
                        logBasic(zddb.getObjectId().toString());
                      }
                    } else {
                        zddb.connect( getTrans().getTransactionId(), null );
                    }
                }catch ( KettleException e ) {
                    logError( "An error occurred, processing will be stopped: " + e.getMessage() );
                    setErrors( 1 );
                    stopAll();
                 }
              if ( dbmeta.isRequiringTransactionsOnQueries() ) {
                  zddb.setCommit( 100 ); 
              }
              logBasic(Arrays.asList(zddb.getTablenames()).toString());
              //查询字典表,获取字典数据本缓存到对应Map中
              List list = zddb.getRows("SELECT * from t_cxzd", 1000);
              for(int i=0;i<list.size();i++){
                  Object[] objs = (Object[]) list.get(i);
                  cxzdMap.put(objs[0].toString(), objs[1].toString());
              }
              logBasic(cxzdMap.entrySet().toString());
              list = zddb.getRows("SELECT * from t_kkzd", 1000);
              for(int i=0;i<list.size();i++){
                  Object[] objs = (Object[]) list.get(i);
                  kkzdMap.put(objs[0].toString(), objs[1].toString());
              }
              logBasic(kkzdMap.entrySet().toString());
              zddb.disconnect();
        } catch (KettleException e1) {
            logError("获取数据库失败", e1);
        }
        return parent.initImpl(stepMetaInterface, stepDataInterface);
        
    }
  • 相关阅读:
    java学习网址大全
    Js 提示框
    api帮助文档及常见IT学习网站
    传参给 jsp
    jsp>action
    二级横菜单显示+sitemesh母板应用
    request,session
    map>json
    清空image画布并改变大小填充背景色
    取汉字首字母方法
  • 原文地址:https://www.cnblogs.com/majinju/p/5002626.html
Copyright © 2020-2023  润新知