• 通过自己技能把某个网站的ppt全部下载下来的过程


    1、该网站的ppt链接全部都在页面上,用正则手动提取所有链接,放在指定位置的,以txt形式保存,格式如下

    2、写个java文件处理一下,如下:

      1 package platform;
      2 
      3 import java.io.BufferedInputStream;
      4 import java.io.BufferedOutputStream;
      5 import java.io.BufferedReader;
      6 import java.io.File;
      7 import java.io.FileInputStream;
      8 import java.io.FileNotFoundException;
      9 import java.io.FileOutputStream;
     10 import java.io.IOException;
     11 import java.io.InputStreamReader;
     12 import java.io.UnsupportedEncodingException;
     13 import java.net.HttpURLConnection;
     14 import java.net.URL;
     15 import java.util.HashMap;
     16 import java.util.Map;
     17 
     18 import org.apache.http.HttpResponse;
     19 import org.apache.http.client.ClientProtocolException;
     20 import org.apache.http.client.methods.HttpPost;
     21 import org.apache.http.impl.client.DefaultHttpClient;
     22 
     23 public class TestQConDownload {
     24 
     25     public static void main(String[] args) {
     26         BufferedReader bufferedReader;
     27         String lineTxt = null;
     28         String title="1";
     29         String url="";
     30         try {
     31             //读文件
     32             bufferedReader = readTxtFile("E:\test\downinfo.txt");
     33             //循环遍历每行
     34             while((lineTxt = bufferedReader.readLine()) != null){
     35                 if(lineTxt.startsWith("【标题】")){
     36                     title = lineTxt.substring(4).replaceAll(":", "");
     37                     System.out.println(title);
     38                 }
     39                 if(lineTxt.startsWith("【下载地址】")){
     40                     url= lineTxt.substring(6);
     41                     //获取跳转后的地址
     42                     url = getRedirectLocation(url);
     43                     System.out.println(url);
     44                     //下载到指定位置
     45                     downloadFile(url, "E:\test\download\"+title+".pdf");
     46                 }
     47             }
     48             bufferedReader.close();
     49         } catch (UnsupportedEncodingException e) {
     50             // TODO Auto-generated catch block
     51             e.printStackTrace();
     52         } catch (FileNotFoundException e) {
     53             // TODO Auto-generated catch block
     54             e.printStackTrace();
     55         } catch (IOException e) {
     56             // TODO Auto-generated catch block
     57             e.printStackTrace();
     58         }
     59         
     60     }
     61     
     62     public static String getRedirectLocation(String url) throws ClientProtocolException, IOException {
     63         String SEND_MESSAGE_URL = url;
     64         Map<String, Object> params = new HashMap<String, Object>();
     65         HttpPost get = new HttpPost(SEND_MESSAGE_URL);
     66         get.setHeader("Cookie", "dx_un=%E5%B9%B4%E8%BD%BB%E7%9A%84%E7%96%AF%E5%AD%90; dx_avatar=http%3A%2F%2F7xil0e.com1.z0.glb.clouddn.com%2Fuser_580d84f25ea61.png; dx_token=0c6b719ffff50f3746b64f058cb4e719");
     67         get.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
     68         get.setHeader("Accept-Encoding", "zh-CN,zh;q=0.8");
     69         get.setHeader("Connection", "keep-alive");
     70         get.setHeader("Host", "ppt.geekbang.org");
     71         get.setHeader("Referer", "http://2016.qconshanghai.com/schedule");
     72         get.setHeader("Upgrade-Insecure-Requests", "1");
     73         get.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36");
     74 
     75         // 设置编码
     76         HttpResponse re = new DefaultHttpClient().execute(get);
     77         /*if (re.getStatusLine().getStatusCode() == 200) {// 如果状态码为200,就是正常返回
     78             String result = EntityUtils.toString(re.getEntity());
     79             System.out.println(result);
     80         }*/
     81         String location = re.getFirstHeader("Location").getValue();
     82         get.releaseConnection();
     83         return location;
     84     }
     85     
     86     /**  
     87      * 下载远程文件并保存到本地  
     88      * @param remoteFilePath 远程文件路径   
     89      * @param localFilePath 本地文件路径  
     90      */
     91     public static void downloadFile(String remoteFilePath, String localFilePath)
     92     {
     93         URL urlfile = null;
     94         HttpURLConnection httpUrl = null;
     95         BufferedInputStream bis = null;
     96         BufferedOutputStream bos = null;
     97         File f = new File(localFilePath);
     98         try
     99         {
    100             urlfile = new URL(remoteFilePath);
    101             httpUrl = (HttpURLConnection)urlfile.openConnection();
    102             httpUrl.connect();
    103             bis = new BufferedInputStream(httpUrl.getInputStream());
    104             bos = new BufferedOutputStream(new FileOutputStream(f));
    105             int len = 2048;
    106             byte[] b = new byte[len];
    107             while ((len = bis.read(b)) != -1)
    108             {
    109                 bos.write(b, 0, len);
    110             }
    111             bos.flush();
    112             bis.close();
    113             httpUrl.disconnect();
    114         }
    115         catch (Exception e)
    116         {
    117             e.printStackTrace();
    118         }
    119         finally
    120         {
    121             try
    122             {
    123                 bis.close();
    124                 bos.close();
    125             }
    126             catch (IOException e)
    127             {
    128                 e.printStackTrace();
    129             }
    130         }
    131     }
    132     
    133     public static BufferedReader readTxtFile(String filePath) throws UnsupportedEncodingException, FileNotFoundException{
    134                 String encoding="UTF-8";
    135                 File file=new File(filePath);
    136                     InputStreamReader read = new InputStreamReader(
    137                     new FileInputStream(file),encoding);//考虑到编码格式
    138                     BufferedReader bufferedReader = new BufferedReader(read);
    139                     return bufferedReader;
    140     }
    141 }
  • 相关阅读:
    智能指针
    C++学习之对类中的成员函数的定义和声明最后添加一个const作用
    动态链接,静态链接库
    Java 位运算
    Java 工具类
    Java 枚举
    Java 内部类
    Java 异常机制
    Java hashCode 和 equals
    Java 字节流和字符流
  • 原文地址:https://www.cnblogs.com/flying607/p/5993409.html
Copyright © 2020-2023  润新知