• 抓取网页并用正则表达式匹配邮箱地址


    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Text.RegularExpressions;
    using System.Net;
    using System.IO;
    
    namespace _07正则_匹配邮箱
    {
        class Program
        {
            static void Main(string[] args)
            {
                List<Uri> listUrl = new List<Uri>() {
                    new Uri("http://gb.corp.163.com/gb/contactus.html"),
                    new Uri("https://passport.csdn.net/help/faq"),
                    new Uri("http://www.kuaipan.cn/"),
                    new Uri("http://www.ksyun.com/home/joinUs/campus"),
                    new Uri("http://www.cnblogs.com/about/ad.aspx"),
                    new Uri("http://www.cnblogs.com/about/contactus.aspx"),
                    new Uri("http://www.csdn.net/company/statement.html"),
                    new Uri("http://hb.qq.com/job/dczp/index.htm")
                };
                List<string> listMail = new List<string>();
                foreach (Uri ur in listUrl) 
                {
                    GetMails(ur, listMail);
                }
    
                cw(listMail);
    
                Console.ReadKey();
            }
    
            private static void GetMails(Uri uri,List<string> list)
            {
                try
                {
                    WebClient wc = new WebClient();
                    Console.WriteLine("创建WebClient - [{0}]", uri.ToString());
                    Stream stream = wc.OpenRead(uri);
                    //Console.WriteLine("正在下载:{0}", uri.ToString());
                    StreamReader reader = new StreamReader(stream, Encoding.Default);
                    string input = reader.ReadToEnd();
                    string reg = @"(?<mail1>[a-zA-Z0-9_]+@[a-zA-Z0-9]+(?:.[a-zA-Z0-9]+)+)"  //zhangsan@163.com
                        + @"|((?<mail2>[a-zA-Z0-9_]+#[a-zA-Z0-9]+(?:.[a-zA-Z0-9]+)+))"      //zhangsan#163.com
                        + @"|((?<mail3>[a-zA-Z0-9_]+(at)[a-zA-Z0-9]+(?:.[a-zA-Z0-9]+)+))";//zhangsan(at)163.com
                    Regex regex = new Regex(reg);
                    Console.WriteLine(Regex.IsMatch(input, reg));
                    MatchCollection matches = regex.Matches(input);
                    for (int i = 0; i < matches.Count; i++)
                    {
                        Match match = matches[i];
                        //Console.WriteLine("match: {0}",match.Value);
                        //Console.WriteLine(match.Groups.Count);
                        for (int j = 1; j < match.Groups.Count; j++)
                        {
                            string mail = match.Groups[j].Value;
                            if (!string.IsNullOrEmpty(mail))
                            {
                                mail = Regex.Replace(mail, @"(.+)(?:@)(.+)", "$1@$2");
                                mail = Regex.Replace(mail, "(.+)#(.+)", "$1@$2"); //把zhangsan#163.com替换为zhangsan@163.com
                                mail = Regex.Replace(mail, @"(.+)(at)(.+)", "$1@$2");
    
                                if (!list.Contains(mail)) 
                                {
                                    list.Add(mail);
                                } 
                            }
                            //Console.WriteLine("group: {0}", match.Groups[j].Value);
                        }
                    }
                }
                catch (Exception e) 
                {
                    Console.WriteLine(e.Message);
                }
            }
    
            static void cw(List<string> list)
            {
                Console.WriteLine("长度为{0}", list.Count);
                int i = 0;
                foreach (string str in list)
                {
                    i++;
                    Console.WriteLine("{0} - [{1}]", i, str);
                }
                Console.WriteLine("______________________");
            }
        }
    }
    

      

  • 相关阅读:
    C++中的命名空间
    [3D数学基础:图形与游戏开发]专栏前言
    Step01-题目申报
    《通用型云端物联网网关系统的设计与实现》
    博弈论题目总结(一)——简单组合游戏
    单纯形模板
    BZOJ 3434 [WC2014]时空穿梭 (莫比乌斯反演)
    BZOJ 3533 [SDOI2014]向量集 (线段树维护凸包)
    BZOJ 2161 布娃娃 (主席树)
    UOJ #86 mx的组合数 (数位DP+NTT+原根优化)
  • 原文地址:https://www.cnblogs.com/liqipeng/p/4576160.html
Copyright © 2020-2023  润新知