• solr4.0.0学习(二) 数据库导入clob与blob为索引


    导入clob很简单。但是blob好像没有提供方法,所以改了一下源码,重新编译替换class文件,竟然成功了。

    先把配置文件贴上

    SCHEMA.XML

    <?xml version="1.0" ?>
    <schema name="test" version="1.1">
      <types>
       <fieldtype name="string"  class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
    
       <fieldType name="standard" class="solr.TextField" positionIncrementGap="100">
    	  <analyzer type="index">
    		<tokenizer class="solr.StandardTokenizerFactory"/>
    		<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
    		<filter class="solr.LowerCaseFilterFactory"/>
    	  </analyzer>
    	  <analyzer type="query">
    		<tokenizer class="solr.StandardTokenizerFactory"/>
    		<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
    		<filter class="solr.LowerCaseFilterFactory"/>
    	  </analyzer>
    	</fieldType>
    
    	<fieldType name="ik" class="solr.TextField">   
           <analyzer class="org.wltea.analyzer.lucene.IKAnalyzer"/>   
    	</fieldType>
    
      </types>
    
     <fields>   
      <field name="blogId"      type="string"   indexed="false"  stored="true"  multiValued="false"/>
      <field name="blogTitle"   type="ik"   indexed="true"  stored="true"  multiValued="false" /> 
      <field name="blogAuthorName"   type="ik"   indexed="true"  stored="true"  multiValued="false" /> 
      <field name="blogContent"   type="ik"   indexed="true"  stored="true"  multiValued="false" /> 
      <field name="TITLE"   type="ik"   indexed="true"  stored="true"  /> 
      <field name="TEXT"   type="ik"   indexed="true"  stored="true"  /> 
     </fields>
     <defaultSearchField>blogTitle</defaultSearchField>
     <solrQueryParser defaultOperator="OR"/>
    
    </schema>
    
    


    这里的field只用到了blogContent一个。

    SOLRCONFIG.XML

    <?xml version="1.0" encoding="UTF-8" ?>
    <config>
      <luceneMatchVersion>LUCENE_34</luceneMatchVersion>
      <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
      <updateHandler class="solr.DirectUpdateHandler2" />
    
      <requestDispatcher handleSelect="true" >
        <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
      </requestDispatcher>
      
      <requestHandler name="standard" class="solr.StandardRequestHandler" default="true" />
      <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
      <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
          
     <!-- the dataimport requestHandler --> 
           <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> 
                   <lst name="defaults"> 
                  <str name="config">db-data-config.xml</str> 
                 </lst> 
           </requestHandler> 
    
      <admin>
        <defaultQuery>solr</defaultQuery>
      </admin>
      <unlockOnStartup>true</unlockOnStartup>
    	<lockType>simple</lockType>
      <requestHandler name="/analysis/field" 
                      startup="lazy"
                      class="solr.FieldAnalysisRequestHandler" />
    
    </config>
    
    


    db-data-config.xml

    <dataConfig> 
    <dataSource name="f1" type="FieldStreamDataSource"/>
     <dataSource driver="oracle.jdbc.driver.OracleDriver"  
     url="jdbc:oracle:thin:@127.0.0.1:1521:orcl" user="HT" password="HT"/> 
     <document> 
    		<entity name="blog" query="SELECT BLOG_CONTENT from  TB_ENT_BLOG" transformer="ClobTransformer"> 
    				 <field column="BLOG_CONTENT" name="blogContent" clob="true"/> 
    		</entity>
     </document> 
    </dataConfig> 


    然后修改了ClobTransformer.java。使其同时支持BLOG格式。


    package org.apache.solr.handler.dataimport;
    
    import java.io.BufferedReader;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.io.Reader;
    import java.sql.Blob;
    import java.sql.Clob;
    import java.sql.SQLException;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Map;
    
    public class ClobTransformer extends Transformer
    {
      public static final String CLOB = "clob";
    
      public Object transformRow(Map<String, Object> aRow, Context context)
      {
        for (Map map : context.getAllEntityFields()) {
          if ("true".equals(map.get("clob"))) {
            String column = (String)map.get("column");
            String srcCol = (String)map.get("sourceColName");
            if (srcCol == null)
              srcCol = column;
            Object o = aRow.get(srcCol);
            if ((o instanceof List)) {
              List inputs = (List)o;
              List results = new ArrayList();
              for (Object input : inputs) {
                if ((input instanceof Clob)) {
                  Clob clob = (Clob)input;
                  results.add(readFromClob(clob));
                }else if(input instanceof Blob){
                	Blob blob = (Blob)input;
                	results.add(readFromBlob(blob));
                }
              }
              aRow.put(column, results);
            }
            else if ((o instanceof Clob)) {
              Clob clob = (Clob)o;
              aRow.put(column, readFromClob(clob));
            }else if(o instanceof Blob){
            	Blob blob = (Blob)o;
            	aRow.put(column, readFromBlob(blob));
            }
          }
        }
        return aRow;
      }
    
      private String readFromBlob(Blob blob) {
    	  try{
    		  	InputStream is = blob.getBinaryStream();
    			BufferedReader br = new BufferedReader(new InputStreamReader(is));
    			String str = "";
    			String res = "";
    			while((str=br.readLine())!=null){
    				res += str;
    			}
    			return res;
    	  }catch (Exception e) {
    		  e.printStackTrace();
    		  return "";
    	}
    }
    
    private String readFromClob(Clob clob) {
        Reader reader = null;
    	try {
    		reader = clob.getCharacterStream();
    	} catch (SQLException e1) {
    		e1.printStackTrace();
    	}
        StringBuilder sb = new StringBuilder();
        char[] buf = new char[1024];
        try
        {
          int len;
          while ((len = reader.read(buf)) != -1)
            sb.append(buf, 0, len);
        }
        catch (IOException e) {
          DataImportHandlerException.wrapAndThrow(500, e);
        }
        return sb.toString();
      }
    }

    这里加了一个readFromBlob方法,加了两个else if。异常的处理很粗糙。

    这样替换class文件,导入索引就正常了。在query ":" 页面的response会出现所有blob内容。

    如果response没有blob字段或者显示为对象地址,都是错了。



  • 相关阅读:
    Linux基础文件打包
    Linux基础文件查找
    Apache的三种工作模式及相关配置
    elasticsearch启动错误整理
    Zabbix-agentd错误整理
    Nginx编译安装
    PHP编译安装
    Zabbix编译安装(全)
    Chetsheet: 2017 01.01 ~ 01.31
    Cheatsheet: 2016 12.01 ~ 12.31
  • 原文地址:https://www.cnblogs.com/riskyer/p/3358177.html
Copyright © 2020-2023  润新知