• 简简单单C#爬虫小计


    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Text.RegularExpressions;
    using System.Threading.Tasks;
    
    namespace 正则
    {
        class Program
        {
            static void Main(string[] args)
            {
                string url = "http://www.admin5.com/browse/177/";
                string html = GetHtml(url, Encoding.UTF8);
                Regex r = new Regex("(?<=href=").*?(?=")");
                MatchCollection mc = r.Matches(html);
                int a = 1;
                foreach (Match m in mc)
                {
                    if (m.Value.Contains("article"))
                    {
                        Console.WriteLine("http://www.admin5.com/" + m.Value);
                        Console.WriteLine("抓取内容");
                        string content = GetHtml(m.Value, Encoding.UTF8);
                        Regex i = new Regex("(?<=title>).*?(?=</title>)");
                        MatchCollection mm = i.Matches(content);
                        Regex rcontent = new Regex("<div class="content">[\s\S]*?</div>");
                        MatchCollection nr = rcontent.Matches(content);
                        string title = mm[0].Value;
                        string neirong = nr[0].Value;
                        Console.WriteLine("保存数据");
                        string path = Directory.GetCurrentDirectory();
                        if (!Directory.Exists(path + "\data"))
                        {
                            Directory.CreateDirectory(path + "\data");
                        }
                        File.WriteAllText(path + "\data" + "\" + a + ".txt", title + "
    " + neirong);
                        a++;
                        Console.WriteLine("保存成功");
                    }
                }
                Console.WriteLine("ok");
                Console.ReadKey();
            }
    
            private static string GetHtml(string url, Encoding encoding)
            {
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream s = response.GetResponseStream();
                StreamReader sr = new StreamReader(s);
                return sr.ReadToEnd();
            }
        }
    }
    

      

    谢谢你长得这么好看还来看我的博客!
  • 相关阅读:
    比特币的加密算法
    区块链项目-Lisk
    以太坊(二)
    以太坊的货币发行模式
    以太坊(一)
    前端er们如何最快开发h5移动端页面?
    jQuery Ajax常用总结
    js中变量作用域
    网页引入特殊字体的几种方案
    几个有趣的WEB设备API(二)
  • 原文地址:https://www.cnblogs.com/hexd1230/p/4781526.html
Copyright © 2020-2023  润新知