• C#收集网页中的EMail实现源码


    C#收集网页中的EMail实现源码:

         //CAll
            private void GetAllURL(string urlStr)
            {
                new Thread(new ParameterizedThreadStart(GetEmailAddress)).Start(urlStr); 
                   ...    //处理页面中的Link
                  }
            /**//// <summary>
            /// 提取网页中的Eamil
            /// </summary>
            /// <param name="urlStr">网页地址</param>
            private void GetEmailAddress(object urlStr)
            {
                ArrayList EmailStrs = GetWebInfo((string)urlStr, @"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)");   //得到Email
                foreach (object tmp in EmailStrs)
                {
                    Invoke(new AppendTextDelegate(AppendText), new object[] { tmp + "\r\n" });
                }
            }

            private ArrayList GetWebInfo(string URlStr,string RegExpress)
            {
                //打开指定页
                HttpWebRequest webRequest1 = (HttpWebRequest)WebRequest.Create(new Uri(URlStr));
                webRequest1.Method = "GET";
                HttpWebResponse response = (HttpWebResponse)webRequest1.GetResponse();
                String textData = new StreamReader(response.GetResponseStream(), Encoding.Default).ReadToEnd();


                //用正则表达式,提取指定内容,带一个变量
                Regex r;
                Match m;
                r = new Regex(RegExpress,   //@"copyTitle.\'(?<AdInfo>.*)\'",
                    RegexOptions.IgnoreCase | RegexOptions.Compiled);
                int pos1=RegExpress.IndexOf("(?<");
                int pos2=RegExpress.IndexOf(">",pos1);
                string DestionKey = RegExpress.Substring(pos1 + 3, pos2 - pos1 - 3);
                string  AdStr = "";
                ArrayList Result = new ArrayList();
                for (m = r.Match(textData); m.Success; m = m.NextMatch())
                {
                    AdStr = m.Result("${" + DestionKey + "}").Trim();   //地址
                    Result.Add(AdStr);
                }
                return Result;
            }

    上述代码中的关键是书写提取EMail的表达式:
                   @"(?<EmailStr>\b[A-Z0-9._%-]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)"

  • 相关阅读:
    HDU 2563 统计问题 (DFS + 打表)
    KendoUi中KendoDropDownList控件的使用——三级级联模块的实现
    POJ 1325 &amp;&amp; ZOJ 1364--Machine Schedule【二分图 &amp;&amp; 最小点覆盖数】
    crontab FAQ
    思科2960trunk vlan配置及路由IP配置
    hdoj-1593-find a way to escape【数学题】
    Java编程思想(四) —— 复用类
    在 Android 应用程序中使用 SQLite 数据库以及怎么用
    Swift Standard Library Reference.pdf
    VC、IE、ASP环境下打印、预备的完美解决方式
  • 原文地址:https://www.cnblogs.com/hfzsjz/p/1656714.html
Copyright © 2020-2023  润新知