• 通过WebClient类来发起请求并下载html 抓取邮箱 图片


     using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Net;
    using System.Text.RegularExpressions;
    using System.IO;
    
    namespace 通过WebClient类来发起请求并下载html 抓取邮箱 图片
    {
        class Program
        {
            static void Main(string[] args)
            {
    
                #region 抓取网页email
                //string url = "http://192.168.1.100:8080/提取Email.htm";
                ////1.根据网址下载对应html字符串
                //WebClient wc = new WebClient();
                //wc.Encoding = Encoding.UTF8;
                //string html = wc.DownloadString("http://192.168.1.100:8080/提取Email.htm");
                ////2.从下载到字符串中提取Email,并把提取到的Email写入到文本文件中
                //MatchCollection matches = Regex.Matches(html, @"[-a-zA-Z0-9_.]+@[-a-zA-Z0-9]+(.[a-zA-Z0-9]+){1,}");
    
                //using (StreamWriter writer = new StreamWriter("email.txt"))
                //{
                //    //遍历提取到的email
                //    foreach (Match item in matches)
                //    {
                //        //Console.WriteLine(item.Value);
                //        writer.WriteLine(item.Value);
                //    }
                //}
    
    
                //Console.ReadKey();
                #endregion
    
    
    
                #region 抓取网页图片
                //WebClient wc = new WebClient();
    
    
                ////1.下载网页源代码
                //string html = wc.DownloadString("http://192.168.1.100:8080/美女图片/美女们.htm");
                ////2.提取网页中的图片,其实就是<img>标签
                ////<img alt="" src="hotgirls/00_00.jpg" />
                //MatchCollection matches = Regex.Matches(html, @"<imgs+alt="""" src=""(.+)""s*/>");
                //foreach (Match item in matches)
                //{
                //    string imgPath = "http://192.168.1.100:8080/美女图片/" + item.Groups[1].Value;
                //    //下载图片
                //    wc.DownloadFile(imgPath, @"c:mv" + Path.GetFileName(imgPath));
                //}
                //Console.WriteLine("ok");
                //Console.ReadKey();
    
                #endregion
    
                #region 抓取职位信息
                WebClient webClient = new WebClient();
                string html = webClient.DownloadString("http://192.168.1.100:8080/【上海,IT-管理,计算机软件招聘,求职】-前程无忧.htm");
    
                //<a href="http://search.51job.com/job/46621778,c.html" onclick="zzSearch.acStatRecJob( 1 );" class="jobname" target="_blank">ERP项目经理</a>
                MatchCollection matches = Regex.Matches(html, @"<as+href=""http://search.51job.com/job/[0-9]{8},c.html"".+>(.+)</a>");
                foreach (Match item in matches)
                {
                    Console.WriteLine(item.Groups[1].Value);
                }
                Console.WriteLine("共{0}个职位信息。", matches.Count);
                Console.ReadKey();
    
                #endregion
    
    
    
    
    
    
            }
        }
    }
  • 相关阅读:
    一文读懂网管协议
    Python 3.8 离线安装
    Redis 6.X 离线安装
    ES 日期格式影响聚合效果
    (1)Canal入门
    (10)MySQL进阶篇SQL优化(InnoDB锁-间隙锁)
    (9)MySQL进阶篇SQL优化(InnoDB锁-记录锁)
    (8)MySQL进阶篇SQL优化(InnoDB锁-共享锁、排他锁与意向锁)
    (7)MySQL进阶篇SQL优化(InnoDB锁-事务隔离级别 )
    (6)MySQL进阶篇SQL优化(MyISAM锁)
  • 原文地址:https://www.cnblogs.com/blacop/p/6021345.html
Copyright © 2020-2023  润新知