• 【java】抓取页面内容,提取链接(此方法可以http get无需账号密码的请求)


     1 package 网络编程;
     2 
     3 import java.io.BufferedReader;
     4 import java.io.BufferedWriter;
     5 import java.io.FileOutputStream;
     6 import java.io.IOException;
     7 import java.io.InputStreamReader;
     8 import java.io.OutputStreamWriter;
     9 import java.net.URL;
    10 
    11 public class TestBaidu {
    12     public static void main(String[] args) throws IOException {
    13         URL url=new URL("http://www.baidu.com");
    14         /*此方法会有乱码输出
    15         InputStream is=url.openStream();
    16         byte[] b=new byte[1024];
    17         int len=0;
    18         while((len=is.read(b))!=-1){
    19             System.out.println(new String(b,0,len));
    20         }
    21         */
    22         BufferedReader br=new BufferedReader(new InputStreamReader(url.openStream(),"utf-8"));
    23         BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(new FileOutputStream("baidu.html"),"utf-8"));
    24         String str=null;
    25         while((str=br.readLine())!=null){
    26             bw.append(str);
    27             bw.newLine();            
    28         }
    29             //System.out.print(str);
    30         bw.flush();
    31         bw.close();
    32         br.close();
    33     }
    34 }
    抓取页面内容
     1 package 网络编程;
     2 
     3 import java.io.BufferedReader;
     4 import java.io.IOException;
     5 import java.io.InputStreamReader;
     6 import java.net.URL;
     7 import java.nio.charset.Charset;
     8 import java.util.regex.Matcher;
     9 import java.util.regex.Pattern;
    10 
    11 public class Get163URL {
    12     public static void main(String[] args) throws IOException {
    13         URL url=new URL("http://www.163.com");
    14         BufferedReader br=new BufferedReader(new InputStreamReader(url.openStream(),Charset.forName("gbk")));
    15         StringBuffer sb=new StringBuffer();
    16         String tmp=null;
    17         while((tmp=br.readLine())!=null){
    18             sb.append(tmp);            
    19         }
    20 //        System.out.println(sb.toString());
    21         Pattern p=Pattern.compile(""(http:\/\/.+?)"");
    22         Matcher m=p.matcher(sb);
    23         while(m.find())
    24             System.out.println(m.group(1));
    25     }
    26 }
    提出链接

     1 public class WikiDownload {
     2     static final String name = "username";
     3     static final String pwd = "password";
     4 
     5     public static void main(String[] args){
     6         CookieManager manager = new CookieManager();
     7         CookieHandler.setDefault(manager);
     8         String wikiUrl = "http://wiki.xxxxx.org/pages/viewpage.action?pageId=71709153";
     9         String loginUrl = "http://wiki.xxxxx.org/login.action?os_destination=%2Fpages%2Fviewpage.action%3FpageId%3D71709153";
    10         try{
    11             URL url = new URL(loginUrl);
    12             HttpURLConnection connection = (HttpURLConnection)url.openConnection();
    13             connection.setRequestProperty("accept", "*/*");
    14             connection.setRequestProperty("connection", "Keep-Alive");
    15             connection.setRequestProperty("user-agent",
    16                     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36");
    17             String line;
    18             connection.setDoInput(true);
    19             connection.setDoOutput(true);
    20             connection.setUseCaches(false);
    21             connection.setRequestMethod("POST");
    22             try(OutputStreamWriter writer = new OutputStreamWriter(connection.getOutputStream())){
    23                 writer.write("os_username=" + name
    24                         +"&os_password="+ pwd
    25                         + "&login=%E7%99%BB%E5%BD%95&os_destination="
    26                         + URLEncoder.encode(wikiUrl.split("http://wiki.xxxxx.org")[0],"utf-8"));
    27             }
    28             try(InputStreamReader reader = new InputStreamReader(connection.getInputStream())){
    29                 BufferedReader in = new BufferedReader(reader);
    30                 StringBuilder result= new StringBuilder("");
    31                 while ((line = in.readLine()) != null) {
    32                     result.append("
    ");
    33                     result.append(line);
    34                 }
    35                 System.out.println(result);
    36             }
    37         }catch (Exception e){
    38             e.printStackTrace();
    39         }
    40 
    41     }
    42 
    43 }
    获取需要登录的网页
  • 相关阅读:
    day06 tar命令使用,vim简单操作以及linux开机过程
    day05 创建用户过程、文件夹,文件等权限修改等
    简单算法
    day04
    day03
    Vim常用快捷键
    day02
    ssh注解开发
    spring07 JDBC
    spring06Aop
  • 原文地址:https://www.cnblogs.com/xiongjiawei/p/6714398.html
Copyright © 2020-2023  润新知