• C#程序


    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Text.RegularExpressions;
    using System.Threading.Tasks;
    
    namespace MeiZi
    {
        public class Program
        {
            static void Main(string[] args)
            {
                new GetMeiziPic();
            }
        }
        /// <summary>
        /// 获取妹子图片
        /// </summary>
        public class GetMeiziPic
        {
            private readonly string _path;
            private const string ImgRegex = @"<img[^>]*?srcs*=s*[""']?([^'"" >]+?)[ '""][^>]*?>";//图片的正则表达式
            private const string LinkRegex = @"<h2><as+[^>]*?>[^<>]*?</a></h2>";
            public GetMeiziPic()
            {
                _path = DealDir(Path.Combine(Environment.CurrentDirectory, "Images"));
                Console.WriteLine("===============    开始采集   ===============");
                for (var i = 1; i < 10; i++)
                {
                    Console.WriteLine("===============正在下载第{0}页数据===============", i);
                    DoFetchStep1(i);
                }
    
                Console.WriteLine("===============   采集完成   ===============");
            }
            private string DealDir(string path)
            {
                if (!Directory.Exists(path))
                    Directory.CreateDirectory(path);
                return path;
            }
            private void DoFetchStep1(int pageNum)
            {
                var request = (HttpWebRequest)WebRequest.Create("http://www.sepaidui.com/?sort=4&page=" + pageNum);
                request.Credentials = CredentialCache.DefaultCredentials;
                var response = (HttpWebResponse)request.GetResponse();
                if (response.StatusCode != HttpStatusCode.OK) return;
                var stream = response.GetResponseStream();
                if (stream == null) return;
                using (var sr = new StreamReader(stream))
                {
                    FetchLinksFromSource1(sr.ReadToEnd());
                }
            }
    
            private void FetchLinksFromSource1(string htmlSource)
            {
                var matchesLink = Regex.Matches(htmlSource, LinkRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline);
                foreach (Match m in matchesLink)
                {
                    string href = m.Groups[0].Value.Split('"')[1];
                    DoFetchStep2(href);
                }
            }
    
            private void DoFetchStep2(string href)
            {
                var request = (HttpWebRequest)WebRequest.Create(href);
                var h = request.Headers;
                request.Credentials = CredentialCache.DefaultCredentials;
                var response = (HttpWebResponse)request.GetResponse();
                if (response.StatusCode != HttpStatusCode.OK) return;
                var stream = response.GetResponseStream();
                if (stream == null) return;
                using (var sr = new StreamReader(stream))
                {
                    FetchLinksFromSource2(sr.ReadToEnd());
                }
            }
            private void FetchLinksFromSource2(string htmlSource)
            {
                var matchesImgSrc = Regex.Matches(htmlSource, ImgRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline);
                foreach (Match m in matchesImgSrc)
                {
                    var href = m.Groups[1].Value;
                    //只选取来自新浪相册的图片
                    if (href.Contains("sinaimg") && CheckIsUrlFormat(href) && !href.Contains("60d02b59tw1eq6g7srmiwj20pv03mdg8"))
                    {
                        Console.WriteLine(href);
                    }
                    else
                        continue;
                    using (var myWebClient = new WebClient())
                    {
                        try
                        {
                            myWebClient.DownloadFile(new Uri(href), Path.Combine(_path, Path.GetRandomFileName() + Path.GetExtension(href)));
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }
                    }
                }
            }
    
            private readonly Regex _isUrlFormat = new Regex(@"http://?([w-]+.)+[w-]+(/[w- ./?%&=]*)?");
            private bool CheckIsUrlFormat(string value)
            {
                return _isUrlFormat.IsMatch(value);
            }
        }
    }
  • 相关阅读:
    while循环学习之统计流量
    MySQL的启动脚本
    UVA 725 Division
    UVA 712 S-tree
    UVA 514
    字典树
    UVA 1595 multimap 的应用
    C++ map 和 multimap
    浮点数
    UVA 227
  • 原文地址:https://www.cnblogs.com/talentzemin/p/4355035.html
Copyright © 2020-2023  润新知