using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; namespace 正则 { class Program { static void Main(string[] args) { string url = "http://www.admin5.com/browse/177/"; string html = GetHtml(url, Encoding.UTF8); Regex r = new Regex("(?<=href=").*?(?=")"); MatchCollection mc = r.Matches(html); int a = 1; foreach (Match m in mc) { if (m.Value.Contains("article")) { Console.WriteLine("http://www.admin5.com/" + m.Value); Console.WriteLine("抓取内容"); string content = GetHtml(m.Value, Encoding.UTF8); Regex i = new Regex("(?<=title>).*?(?=</title>)"); MatchCollection mm = i.Matches(content); Regex rcontent = new Regex("<div class="content">[\s\S]*?</div>"); MatchCollection nr = rcontent.Matches(content); string title = mm[0].Value; string neirong = nr[0].Value; Console.WriteLine("保存数据"); string path = Directory.GetCurrentDirectory(); if (!Directory.Exists(path + "\data")) { Directory.CreateDirectory(path + "\data"); } File.WriteAllText(path + "\data" + "\" + a + ".txt", title + " " + neirong); a++; Console.WriteLine("保存成功"); } } Console.WriteLine("ok"); Console.ReadKey(); } private static string GetHtml(string url, Encoding encoding) { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream s = response.GetResponseStream(); StreamReader sr = new StreamReader(s); return sr.ReadToEnd(); } } }