• Java解析采集模块


      1 package step3;
      2 
      3 import java.io.BufferedReader;
      4 import java.io.BufferedWriter;
      5 import java.io.File;
      6 import java.io.FileReader;
      7 import java.io.FileWriter;
      8 import java.io.IOException;
      9 import java.io.InputStream;
     10 import java.io.InputStreamReader;
     11 import java.io.PrintWriter;
     12 import java.sql.ResultSet;
     13 import java.sql.SQLException;
     14 import java.sql.Statement;
     15 import java.util.ArrayList;
     16 import java.util.Calendar;
     17 import java.util.List;
     18 
     19 import org.apache.commons.httpclient.HttpClient;
     20 import org.apache.commons.httpclient.methods.GetMethod;
     21 import org.apache.commons.httpclient.methods.PostMethod;
     22 import org.json.JSONObject;
     23 import org.jsoup.Jsoup;
     24 import org.jsoup.nodes.Document;
     25 import org.jsoup.select.Elements;
     26 
     27 import bean.Porn;
     28 import util.DBConnection;
     29 
     30 /**
     31  * 
     32  * @ClassName: quhao
     33  * @Description: 91porn地址解析
     34  * @author zeze
     35  * @date 2016年06月30日 下午7:55:31
     36  *
     37  */
     38 public class porn91 {
     39 
     40     private static String cookie = "incap_ses_401_649914=31EbXVOgx0r6Ql5TmqOQBdjxdFcAAAAAu7MrrqICFZvpjsIw5VriGQ==; incap_ses_434_649914=wx2HcnWH7GDQCChRweAFBt/xdFcAAAAAczn9Ohl2VBPqxEd8kRi2GA==; incap_ses_407_649914=U4VYNM5iO1l1H0VP7/SlBWXydFcAAAAAifL73Yq/OnIgRqKWiWPqUg==; incap_ses_406_649914=8Ub/DfvqEGs9L9gFemeiBWEKdVcAAAAA+aBeDqKyWw37Sv+KZ4cdlA==; incap_ses_432_649914=bLzAYBXvVG0kSU6wyMX+BWUKdVcAAAAAZW+uykXgylzu/dZOu7IDWw==; _ga=GA1.2.1738858661.1466764840; _gat=1; visid_incap_649914=2hb3ym0OQ9C7sr1krqKCQTUObVcAAAAAQUIPAAAAAADQQCM/QP5jhCXO3+mlIKmg; incap_ses_199_649914=RkWbbfybyCoL2fxKs/3CAqIbdVcAAAAAOa+RJFdt35NV8xtM8MbP8Q==; session=eyJfZnJlc2giOmZhbHNlLCJjc3JmX3Rva2VuIjp7IiBiIjoiTkdFek9HRmtNakkxTldVM05EVXpZMkZoTldKaE5tWXpOV014TlRBNU1UZ3dPVGcyTkRNMU5BPT0ifX0.ClatMQ.INJmWYMZ8T220CgsSTcfpHhTxXI";
     41     private static String cookie2 = "incap_ses_401_649914=31EbXVOgx0r6Ql5TmqOQBdjxdFcAAAAAu7MrrqICFZvpjsIw5VriGQ==; incap_ses_434_649914=wx2HcnWH7GDQCChRweAFBt/xdFcAAAAAczn9Ohl2VBPqxEd8kRi2GA==; incap_ses_407_649914=U4VYNM5iO1l1H0VP7/SlBWXydFcAAAAAifL73Yq/OnIgRqKWiWPqUg==; incap_ses_406_649914=8Ub/DfvqEGs9L9gFemeiBWEKdVcAAAAA+aBeDqKyWw37Sv+KZ4cdlA==; incap_ses_432_649914=bLzAYBXvVG0kSU6wyMX+BWUKdVcAAAAAZW+uykXgylzu/dZOu7IDWw==; _ga=GA1.2.1738858661.1466764840; _gat=1; visid_incap_649914=2hb3ym0OQ9C7sr1krqKCQTUObVcAAAAAQUIPAAAAAADQQCM/QP5jhCXO3+mlIKmg; incap_ses_199_649914=RkWbbfybyCoL2fxKs/3CAqIbdVcAAAAAOa+RJFdt35NV8xtM8MbP8Q==; session=eyJfZnJlc2giOmZhbHNlLCJjc3JmX3Rva2VuIjp7IiBiIjoiTkdFek9HRmtNakkxTldVM05EVXpZMkZoTldKaE5tWXpOV014TlRBNU1UZ3dPVGcyTkRNMU5BPT0ifX0.ClatMw.6MGC1jX7mgjsChpGFBd-xHTv9ZU";
     42 
     43     private static String Token = "1467296187##60ecf40d9328862cc6cd6a478adfc72ee0554050";
     44 
     45     private static String Url = "http://freeget.co/video/extraction";
     46     private static String url001 = null;
     47     private static String dirfile = "F:/91porn/91url.csv";
     48     private static String destfile = "F:/91porn/data.txt";
     49 
     50     private static int cnt0 = 0;
     51 
     52     private static String num = null;
     53     private static String title = null;
     54     private static String time = null;
     55     private static String longtime = null;
     56     private static String viewnum = null;
     57     private static String Parurl = null;// "http://www.91porn.com/view_video.php?viewkey=c5ec60d0da8c8fbdb180&page=4&viewtype=basic&category=mr";
     58 
     59     public static void main(String[] args) throws InterruptedException {
     60 
     61         File file = new File(dirfile);
     62         FileReader reader = null;
     63         BufferedReader br = null;
     64         try {
     65             reader = new FileReader(file);
     66             br = new BufferedReader(reader);
     67             String str = null;
     68             String[] strArr = null;
     69             int cnt = 0;
     70             while ((str = br.readLine()) != null) {
     71                 // System.out.println(str);
     72                 strArr = str.split(",");
     73                 if (strArr.length != 7)
     74                     continue;
     75                 num = strArr[0];
     76                 title = strArr[1];
     77                 time = strArr[2];
     78                 longtime = strArr[4];
     79                 viewnum = strArr[5];
     80                 Parurl = strArr[6];
     81                 cnt++;
     82                 System.out.println(num + "," + title + "," + time);
     83                 func_step1();
     84             }
     85             System.out.println("采集结束,总共:" + cnt + "条,成功写入" + cnt0 + "条");
     86 
     87         } catch (Exception e) {
     88             // TODO: handle exception
     89             e.printStackTrace();
     90         } finally {
     91             if (br != null) {
     92                 try {
     93                     br.close();
     94                 } catch (Exception e2) {
     95                     // TODO: handle exception
     96                     e2.printStackTrace();
     97                 }
     98             }
     99             if (reader != null) {
    100                 try {
    101                     reader.close();
    102                 } catch (Exception e2) {
    103                     // TODO: handle exception
    104                     e2.printStackTrace();
    105                 }
    106             }
    107         }
    108 
    109     }
    110 
    111     private static void func_step1() {
    112         HttpClient httpClient = new HttpClient();
    113         try {
    114             PostMethod postMethod = new PostMethod(Url);
    115             postMethod.getParams().setContentCharset("utf-8");
    116             // 每次访问需授权的网址时需 cookie 作为通行证
    117             postMethod.setRequestHeader("cookie", cookie);
    118             postMethod.setRequestHeader("X-CSRFToken", Token);
    119             postMethod.setRequestHeader("Accept-Language", "zh-CN,zh;q=0.8");
    120             postMethod.setRequestHeader("Host", "freeget.co");
    121             postMethod.setRequestHeader("Referer", "http://freeget.co/");
    122             postMethod.setRequestHeader("User-Agent",
    123                     "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) QQBrowser/9.2.5063.400");
    124             postMethod.setParameter("url", Parurl);
    125             int statusCode = httpClient.executeMethod(postMethod);// 返回状态码200为成功,500为服务器端发生运行错误
    126             System.out.println("返回状态码:" + statusCode);
    127             // 打印出返回数据,检验一下是否成功
    128             String result = postMethod.getResponseBodyAsString();
    129             if (statusCode == 200) {
    130                 // 解析成功,取得token和view_key
    131                 JSONObject a = new JSONObject(result);
    132                 url001 = "http://freeget.co/video/" + a.get("view_key") + "/" + a.get("token");
    133                 System.out.println("视频解析地址:" + url001);
    134                 func_step2(url001);
    135             }
    136         } catch (Exception e) {
    137             e.printStackTrace();
    138         }
    139     }
    140 
    141     private static void func_step2(String url) {
    142         HttpClient httpClient = new HttpClient();
    143         try {
    144             GetMethod getMethod = new GetMethod(url);
    145             getMethod.getParams().setContentCharset("utf-8");
    146             getMethod.setRequestHeader("cookie", cookie2);
    147             getMethod.setRequestHeader("Accept-Language", "zh-cn");
    148             getMethod.setRequestHeader("User-Agent",
    149                     "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) QQBrowser/9.2.5063.400");
    150             int statusCode = httpClient.executeMethod(getMethod);// 返回状态码200为成功,500为服务器端发生运行错误
    151             // System.out.println("返回状态码:" + statusCode);
    152             // 打印出返回数据,检验一下是否成功
    153             InputStream inputStream = getMethod.getResponseBodyAsStream();
    154             BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
    155             StringBuffer stringBuffer = new StringBuffer();
    156             String str = "";
    157             while ((str = br.readLine()) != null) {
    158                 stringBuffer.append(str);
    159             }
    160             if (statusCode == 200) {
    161                 Document doc = Jsoup.parse(stringBuffer.toString());
    162                 Elements name = doc.select("a");
    163                 String playurl = name.get(4).text();
    164                 System.out.println("在线播放地址:" + playurl);
    165                 writefile(playurl);
    166                 cnt0++;
    167             }
    168         } catch (Exception e) {
    169             e.printStackTrace();
    170         }
    171     }
    172 
    173     private static void writefile(String url) {
    174         FileWriter fw = null;
    175         BufferedWriter bw = null;
    176         PrintWriter pw = null;
    177         try {
    178             fw = new FileWriter(new File(destfile), true);
    179             bw = new BufferedWriter(fw);
    180             pw = new PrintWriter(bw);
    181             pw.write(num + ',' + title + ',' + time + ',' + longtime + ',' + viewnum + ',' + url + "
    ");
    182         } catch (IOException e) {
    183             // TODO Auto-generated catch block
    184             e.printStackTrace();
    185         } finally {
    186             if (pw != null) {
    187                 pw.close();
    188             }
    189             if (bw != null) {
    190                 try {
    191                     bw.close();
    192                 } catch (IOException e) {
    193                     // TODO Auto-generated catch block
    194                     e.printStackTrace();
    195                 }
    196             }
    197             if (fw != null) {
    198                 try {
    199                     fw.close();
    200                 } catch (IOException e) {
    201                     // TODO Auto-generated catch block
    202                     e.printStackTrace();
    203                 }
    204             }
    205         }
    206     }
    207 
    208     public List<Porn> QueryAllBook() {
    209         java.sql.Connection connection = DBConnection.getConnection();
    210         String sql = "select * from porn where status=0";
    211         java.sql.PreparedStatement pstmt = DBConnection.getPreparedStatement(connection, sql);
    212         List<Porn> pornlist = new ArrayList<Porn>();
    213         System.out.println(sql);
    214         try {
    215             Statement stmt = connection.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_READ_ONLY);
    216             java.sql.ResultSet rs = stmt.executeQuery(sql);
    217             while (rs.next()) {
    218                 Porn porn = new Porn();
    219                 porn.setNum(rs.getString(1));
    220                 porn.setTitle(rs.getString(2));
    221                 porn.setTime(rs.getString(3));
    222                 porn.setViewkey(rs.getString(4));
    223                 porn.setLongtime(rs.getString(5));
    224                 porn.setViewnum(rs.getString(6));
    225                 porn.setParurl(rs.getString(7));
    226                 pornlist.add(porn);
    227             }
    228             rs.last();
    229         } catch (SQLException e) {
    230             e.printStackTrace();
    231         } finally {
    232             DBConnection.close(connection, pstmt, null);
    233         }
    234         return pornlist;
    235     }
    236 }
    View Code
  • 相关阅读:
    转基因(转载)
    Diwali
    使用Matplotlib画图
    项目格式规范
    关于Dapper
    JQuery
    javascript封装
    2015年2月16日——作者观点
    2015年2月12日——不懂点
    在VS2013上使用git
  • 原文地址:https://www.cnblogs.com/zeze/p/5631448.html
Copyright © 2020-2023  润新知