• C# 按地址获取网页数据并解析


    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Windows.Forms;
    using System.Net;
    using System.IO;
    
    namespace OneHand
    {
        class googleMap
        {
            //根据Url地址得到网页的html源码 
            public static string GetWebContent(string Url)
            {
                string strResult = "";
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);
                    //声明一个HttpWebRequest请求 
                    request.Timeout = 30000;
                    //设置连接超时时间 
                    request.Headers.Set("Pragma", "no-cache");
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    Stream streamReceive = response.GetResponseStream();
                    Encoding encoding = Encoding.GetEncoding("GB2312");
                    StreamReader streamReader = new StreamReader(streamReceive, encoding);
                    strResult = streamReader.ReadToEnd();
                }
                catch
                {
                    MessageBox.Show("出错");
                }
                return strResult;
            } 
        }
    }

    调用解析

      private string Search(string argAddress)
           {
               //要抓取的URL地址 
               StringBuilder sb = new StringBuilder();
               sb.Append("http://ditu.google.cn/maps");
               sb.Append("?");
               sb.Append("hl=zh-CN&newwindow=1&safe=strict");
               sb.Append("&q=");
               sb.Append(argAddress);
               sb.Append("&bav=on.2,or.&bvm=bv.44158598,d.dGI&biw=1440&bih=775&um=1&ie=UTF-8&sa=N&tab=wl");
    
               //得到指定Url的源码 
               string strWebContent = googleMap.GetWebContent(sb.ToString());
    
               //生成HtmlDocument 
               WebBrowser webb = new WebBrowser();
               webb.Navigate("about:blank");
               HtmlDocument htmldoc = webb.Document.OpenNew(true);
               htmldoc.Write(strWebContent);
               HtmlElementCollection htmlTR = htmldoc.GetElementsByTagName("TR");
               foreach (HtmlElement tr in htmlTR)
               {
                   string address = string.Empty;
                   try
                   {
                       string resultspanel = tr.Document.GetElementById("resultspanel").Document.GetElementById("panel_A_2").InnerText;
    
                       string[] ContentLines = resultspanel.Split(new string[] { "\r\n" }, StringSplitOptions.None);//不忽略空行
    
                       address = ContentLines[1];
                   }
                   catch { };
                   //插入DataTable 
                   if (address != string.Empty)
                   {
                       return address;
                   }
                   else
                   {
                       continue;
                   }
               }
    
               return "";
           }
  • 相关阅读:
    SecureCRT
    Jsoup 标签选择器 选择img标签中src的值
    使用Jsoup 爬取网易首页所有的图片
    java自定义类型 比较排序 Comparator接口
    eclipse下导入jdk源码
    java爬虫--使用正则表达式获取网页中的email
    Java正则表达式--Matcher.group函数的用法
    使用org.jsoup.Jsoup下载网络中的图片
    Tomcat中的Session小结
    关于JAVA中的static方法、并发问题以及JAVA运行时内存模型
  • 原文地址:https://www.cnblogs.com/Kakasi/p/2998259.html
Copyright © 2020-2023  润新知