• java假设去请求一个网页的数据


    我们能够通过在java程序中模拟浏览器一样,把数据抓下来,详细方法是在java程序中set header和cookie,以下是一个样例:

    public class NetConnection {
    
    	public static final int MAX_HOTWORDS_FILE_SIZE = 256 * 1024;
    	public static void main(String[] args) {
    		send("http://tuan.aibang.com/shenzhen/new_2033549.html");
    	}
    	
    	static  void send(String _url){
    		HttpClient http = new HttpClient();
    		http.getHttpConnectionManager().getParams().setConnectionTimeout(
    				100000);
    		GetMethod get = new GetMethod(_url);
    		get.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, 100000);
    		get.setFollowRedirects(false);
    		InputStream is;
    		String host = "www.aibang.com";
    		  try {
    	    	  get.setRequestHeader("Host",host);
    	   			get.setRequestHeader(
    	       			"user-agent",
    	       			"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1.8) Gecko/20100202 Firefox/3.5.8");
    				get.setRequestHeader(
    				"Accept",
    				"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
    	                	get.setRequestHeader("Accept-Language","zh-cn,zh;q=0.5");
    	               		get.setRequestHeader("Accept-Charset","GB2312,utf-8;q=0.7,*;q=0.7");
    				get.setRequestHeader("Connection","Keep-Alive");  
    				get.setRequestHeader("Cookie","582081171805; cy=1; __utma=205923334.3209590505032285000.1256126987.1269858466.1270605495.11; __utmz=205923334.1256126987.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); ano=0snUeoHWygEkAAAAOTAyZjM3ZjQtNjA2NC00NWYzLWIxNWYtMjRlMDliZjAzYTM3jnk83_pKoSEk-9gwcIFx8jFOXVM1; sid=no4tpkmvedoj3gycvfz1s055; lb.dp=100729098.20480.0000; __utmb=205923334.1.10.1270605495; __utmc=205923334");
    	    	  int er = 0;
    	    	  er = http.executeMethod(get);
    	    	  System.out.println("er="+er);
    	    	  if (er == 200) {
    	    		  try {
    						is = get.getResponseBodyAsStream();
    						ReadFile2(is);
    					} catch (Exception e) {
    						System.out.println("download error="+e);
    					}
    	    	  }
    	    	  
    		  }catch(Exception ex){
    			  ex.printStackTrace();
    		  }
    	}
    	
    	//解析inputStream里的数据
    	static void ReadFile2(InputStream inputStream){
    		byte[] buffer = new byte[1024];
    		StringBuffer bab = new StringBuffer();
    		int nTotal = 0;
    		
    		try {
    		do{
    			int nBytesRead = inputStream.read(buffer);
    			System.out.println("nBytesRead="+nBytesRead);
    			if(nBytesRead > 0){
    				bab.append(new String(buffer,0,nBytesRead));
    				nTotal += nBytesRead;
    			}else{
    				System.out.println(String.format("download end. file size=%d", nTotal));
    				break;
    			}
    			
    		}while(nTotal < MAX_HOTWORDS_FILE_SIZE);
    		System.out.println("bab = "+bab);
    		
    		} catch (IOException e) {
    			// TODO Auto-generated catch block
    			e.printStackTrace();
    		}
    	}
    	
    }
    

    这里要引入这几个包

    import org.apache.commons.httpclient.HttpClient;
    import org.apache.commons.httpclient.methods.GetMethod;
    import org.apache.commons.httpclient.params.HttpMethodParams;



  • 相关阅读:
    Linux 任务计划
    Linux 进程及作业管理
    算法-动规
    算法-递归
    继承自string 的MyString
    魔兽2-装备
    [小甲鱼]入门学习python笔记 【魔法方法】
    [小甲鱼]入门学习python笔记 【类与对象】
    魔兽1 -备战
    讨厌的大整数加法
  • 原文地址:https://www.cnblogs.com/zfyouxi/p/4386860.html
Copyright © 2020-2023  润新知