• 爬取中国信用黑名单网站图片和数据到本地


     2 
     3 import java.io.File;
     4 import java.io.IOException;
     5 import java.io.InputStream;
     6 import java.net.URL;
     7 import java.net.URLConnection;
     8 
     9 import org.apache.commons.io.FileUtils;
    10 
    11 
    12 
    13 public class SpiderDemo {
    14     public static void main(String[] args) throws IOException {
    15 //        URL url = new URL("http://www.zhongguoxinyongheimingdan.com");
    16 //        URLConnection connection = url.openConnection();
    17 //        InputStream in = connection.getInputStream();
    18 //        File file = new File("F://a.txt");
    19 //        FileUtils.copyInputStreamToFile(in, file);
    20         File srcDir = new File("F://a.txt");
    21         String str = FileUtils.readFileToString(srcDir, "UTF-8");
    22         String[] str1 = str.split("href=");
    23         for (int i = 3; i < str1.length-1; i++) {
    24             URL url = new URL("http://www.zhongguoxinyongheimingdan.com"+str1[i].substring(1, 27));
    25             File f = new File("F://abc//"+str1[i].substring(2, 22));
    26             if(!f.exists()){
    27             f.mkdir();    
    28             File desc1 = new File(f,str1[i].substring(1, 22)+".txt");
    29             URLConnection connection = url.openConnection();
    30             InputStream in = connection.getInputStream();
    31             FileUtils.copyInputStreamToFile(in, desc1);
    32             String str2 = FileUtils.readFileToString(desc1, "UTF-8");
    33             String[] str3 = str2.split("" src="");
    34             for(int j = 1;j<str3.length-2;j++){
    35                 URL url1 = new URL(str3[j].substring(0, 81));
    36                 URLConnection connection1 = url1.openConnection();
    37                 connection1.setDoInput(true);
    38                 InputStream in1 = connection1.getInputStream();
    39                 File desc2 = new File(f,str3[j].substring(44,76)+".jpg");
    40                 FileUtils.copyInputStreamToFile(in1, desc2);
    41             }
    42             }
    43             }
    44         }
    45     
    46 }
  • 相关阅读:
    [转]快速矩阵快速幂
    继续学习C:数字进制表示
    pthread_cond_wait()用法分析
    [原]NYOJ-光棍的yy-655
    [原]NYOJ-组合数-32
    [转]_int64、long long 的区别
    [原]NYOJ-6174问题-57
    [转]sscanf函数具体用法
    [原]NYOJ-A*B Problem II-623
    集存款(复利单利)贷款为一体的计算器(最新版)
  • 原文地址:https://www.cnblogs.com/bianqi/p/6404066.html
Copyright © 2020-2023  润新知