1 /// <summary>
2 /// 获取大王饶命小说页面
3 /// </summary>
4 /// <param name="firstUrl">第一次进入的页面</param>
5 /// <param name="nexturl">下一页</param>
6 private static void GetContent(string firstUrl, out string nexturl)
7 {
8 var html = GetUrlContent(firstUrl);
9 var url = @"<a href=([^>]+?)>下一页</a>";
10 string re1 = "/.+html";
11 nexturl = MatchReg(re1, MatchReg(url, html));
12 var divContent = @"(?m)<div id=""BookText""[^>]*>(?<div>(?:w|W)*?)</div[^>]*>";
13 html = MatchReg(divContent, html, "div").Trim().Replace("<br />", "");
14 var delh4 = @"<h4>([sS]*?)</h4>";
15 html = html.Replace(MatchReg(delh4, html), "");
16 Console.WriteLine(html);
17
18 }
19
20 /// <summary>
21 /// 筛选数据
22 /// </summary>
23 /// <param name="regStr">正则字符串</param>
24 /// <param name="html">网页标签</param>
25 /// <param name="input">需要获取的标签</param>
26 /// <returns></returns>
27 public static string MatchReg(string regStr, string html, string input = "0")
28 {
29 var reg = new Regex(regStr, RegexOptions.Multiline | RegexOptions.IgnoreCase);
30 var mc = reg.Match(html);
31 if (mc.Success)
32 {
33 return mc.Groups[input].Value;
34 }
35 return "";
36 }
37
38 /// <summary>
39 /// 爬取地址
40 /// </summary>
41 const string qsbkMainUrl = "http://www.dawangraoming.com";
42 /// <summary>
43 /// 爬取页面位置
44 /// </summary>
45 /// <param name="firsturl"></param>
46 /// <returns></returns>
47 private static string GetWBJokeUrl(string firsturl)
48 {
49 StringBuilder url = new StringBuilder();
50 url.Append(qsbkMainUrl);
51 url.Append(firsturl);
52 return url.ToString();
53 }
54
55 /// <summary>
56 /// /伪装网站访问
57 /// </summary>
58 /// <param name="url">目标网站地址</param>
59 /// <returns></returns>
60 private static string GetUrlContent(string url)
61 {
62 try
63 {
64
65 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
66
67 request.UserAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.8.1000 Chrome/30.0.1599.101 Safari/537.36";
68
69 request.Method = "GET";
70
71 request.ContentType = "text/html;charset=UTF-8";
72
73 HttpWebResponse response = (HttpWebResponse)request.GetResponse();
74
75 Stream myResponseStream = response.GetResponseStream();
76
77 StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
78
79 string retString = myStreamReader.ReadToEnd();
80
81 myStreamReader.Close();
82
83 myResponseStream.Close();
84
85 return retString;
86
87 }
88
89 catch { return null; }
90
91 }