• 正则抓取SINA天气预报数据!!!


    /// <summary>
            /// SINA天气预报 1天
            /// </summary>
            /// <returns></returns>
            public static XmlDataDocument GetSinaWeather()
            {
                //http://weather.news.sina.com.cn/images/figureWeather/map/northEast.html
                //http://weather.news.sina.com.cn/images/figureWeather/map/eastOfChina.html
                //http://weather.news.sina.com.cn/images/figureWeather/map/northOfChina.html
                //http://weather.news.sina.com.cn/images/figureWeather/map/southOfChina.html
                //http://weather.news.sina.com.cn/images/figureWeather/map/southWest.html
                //http://weather.news.sina.com.cn/images/figureWeather/map/northWest.html

                XmlDataDocument objXml = new XmlDataDocument();
                objXml.LoadXml("<root />");
                //东北
                string content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/northEast.html","gb2312");

                //抓到内容后,开始分析数据
                Regex regex;
                Match mc;
                XmlElement objXmlCityList = objXml.CreateElement("citylist");

                string partten = "城市:(?<city>[^<]+) <br> 天气:(?<weather>[^<]+)<br> 温度:(?<temperature>[^<]+)<br>风向:(?<windway>[^<]+)<br>风力:(?<wind>[^<]+) <br>";
                regex = new Regex(partten, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                objXmlCityList.SetAttribute("vdatetime", DateTime.Now.ToShortDateString());
                //城市:哈尔滨市 <br> 天气:多云转晴<br> 温度:-10 ℃~-1 ℃<br>风向: 西风<br>风力:小于3级<br>
       int i= 1;
                for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
                {
                    try
                    {
                        Yesun.Edzh.BLL.Log.LogHelper.WriteError(i + "、" + mc.Groups["city"].Value.Trim());
                        XmlElement objXmlElementCity = objXml.CreateElement("city");
                        objXmlElementCity.SetAttribute("orders", "1");
                        objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                        objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                        objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                        objXmlElementCity.SetAttribute("windway", mc.Groups["windway"].Value.Trim());
                        objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                        objXmlCityList.AppendChild(objXmlElementCity);
                    }
                    catch(Exception ex)
                    {
                        Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                    }
                }

                //东
                content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/eastOfChina.html", "gb2312");
                for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
                {
                    try
                    {
                        XmlElement objXmlElementCity = objXml.CreateElement("city");
                        objXmlElementCity.SetAttribute("orders", "2");
                        objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                        objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                        objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                        objXmlElementCity.SetAttribute("windway", mc.Groups["windway"].Value.Trim());
                        objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                        objXmlCityList.AppendChild(objXmlElementCity);
                    }
                    catch (Exception ex)
                    {
                        Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                    }
                }
                //北
                content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/northOfChina.html", "gb2312");
                for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
                {
                    try
                    {
                        Yesun.Edzh.BLL.Log.LogHelper.WriteError(i + "、" + mc.Groups["city"].Value.Trim());
                        XmlElement objXmlElementCity = objXml.CreateElement("city");
                        objXmlElementCity.SetAttribute("orders", "3");
                        objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                        objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                        objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                        objXmlElementCity.SetAttribute("windway", mc.Groups["windway"].Value.Trim());
                        objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                        objXmlCityList.AppendChild(objXmlElementCity);
                    }
                    catch (Exception ex)
                    {
                        Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                    }
                }

                //南
                content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/southOfChina.html", "gb2312");
                for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
                {
                    try
                    {
                        XmlElement objXmlElementCity = objXml.CreateElement("city");
                        objXmlElementCity.SetAttribute("orders", "4");
                        objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                        objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                        objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                        objXmlElementCity.SetAttribute("windway", mc.Groups["windway"].Value.Trim());
                        objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                        objXmlCityList.AppendChild(objXmlElementCity);
                    }
                    catch (Exception ex)
                    {
                        Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                    }
                }

                //南西
                content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/southWest.html", "gb2312");
                for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
                {
                    try
                    {
                        XmlElement objXmlElementCity = objXml.CreateElement("city");
                        objXmlElementCity.SetAttribute("orders", "5");
                        objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                        objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                        objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                        objXmlElementCity.SetAttribute("windway", mc.Groups["windway"].Value.Trim());
                        objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                        objXmlCityList.AppendChild(objXmlElementCity);
                    }
                    catch (Exception ex)
                    {
                        Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                    }
                }

                //北西
                content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/northWest.html", "gb2312");
                for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
                {
                    try
                    {
                        XmlElement objXmlElementCity = objXml.CreateElement("city");
                        objXmlElementCity.SetAttribute("orders", "6");
                        objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                        objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                        objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                        objXmlElementCity.SetAttribute("windway", mc.Groups["windway"].Value.Trim());
                        objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                        objXmlCityList.AppendChild(objXmlElementCity);
                    }
                    catch (Exception ex)
                    {
                        Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                    }
                }
                partten = "城市:(?<city>[^<]+) <br> 天气:(?<weather>[^<]+)<br> 温度:(?<temperature>[^<]+)<br>风力:(?<wind>[^<]+)<br>";
                regex = new Regex(partten, RegexOptions.Compiled | RegexOptions.IgnoreCase);
                //北西 城市:武汉 <br> 天气:小雨转阴<br> 温度:10 ℃~16 ℃<br>风力:小于3级<br>
                content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/wholeNation.html", "gb2312");
                for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
                {
                    try
                    {
                        XmlElement objXmlElementCity = objXml.CreateElement("city");
                        objXmlElementCity.SetAttribute("orders", "7");
                        objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                        objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                        objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                        objXmlElementCity.SetAttribute("windway", "");
                        objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                        objXmlCityList.AppendChild(objXmlElementCity);
                    }
                    catch (Exception ex)
                    {
                        Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                    }
                }

                objXml.DocumentElement.AppendChild(objXmlCityList);

                return objXml;
            }

     /// <summary>
            /// 抓取页面接口
            /// </summary>
            /// <param name="url"></param>
            /// <returns></returns>
            private static string GetContent(string url, string encoding)
            {
                string str = "";
                WebClient client = new WebClient();
                client.Headers.Add("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*");
                client.Headers.Add("Accept-Language", "zh-cn");
                client.Headers.Add("UA-CPU", "x86");
                client.Headers.Add("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)");
                try
                {
                    byte[] buffer = client.DownloadData(url);
                    if (encoding == "utf-8")
                    {
                        str = System.Text.Encoding.GetEncoding("utf-8").GetString(buffer, 0, buffer.Length);
                    }
                    else
                    {
                        str = System.Text.Encoding.GetEncoding("gb2312").GetString(buffer, 0, buffer.Length);
                    }
                }
                catch (Exception ex)
                {
                    Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                }
                return str;
            }

  • 相关阅读:
    python 基于gevent协程实现socket并发
    python asyncio
    python 池 协程
    python
    python 守护进程
    python 线程 threading模块
    python 安装Django失败处理
    python 队列
    python 锁
    继承,抽象类,多态,封装
  • 原文地址:https://www.cnblogs.com/Fooo/p/617662.html
Copyright © 2020-2023  润新知