• 实现百度搜索页面网页遍历


    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Text;
    using System.Windows.Forms;
    using mshtml;
    using System.Threading;
    using mshtml;
    
    namespace WindowsFormsApplication3
    {
        public partial class Form1 : Form
        {
            public Form1()
            {
                InitializeComponent();
            }
            /// <summary>
            /// 在控件中打开网页
            /// </summary>
            public void baidu()
            {
                string url = "http://www.baidu.com/s?wd="+textBox1.Text;
                webBrowser1.Navigate(url.Trim());
            }
            private void button1_Click(object sender, EventArgs e)
            {
                baidu();
                
            }
            /// <summary>
            /// 打开新网页不会跳转到其他浏览器
            /// </summary>
            /// <param name="sender"></param>
            /// <param name="e"></param>
            private void webBrowser1_NewWindow(object sender, CancelEventArgs e)
            {
                e.Cancel = true;
                webBrowser1.Navigate(webBrowser1.StatusText);
            }
    
            /// <summary>
            /// 获取网页所有节点,遍历所有节点,如有标签的文本值是"下一页",模拟点击,进入下一页
            /// </summary>
            public void bianli()
            {
                IHTMLDocument2 doc = webBrowser1.Document.DomDocument as IHTMLDocument2;
                foreach (IHTMLElement ele in doc.all)
                {
                    if (ele.innerText == "下一页>")
                    {                   
                        ele.click();
                        break;
                    }
                }
            }
    
            /// <summary>
            /// 判读网页是否加载完成
            /// </summary>
            /// <param name="sender"></param>
            /// <param name="e"></param>
            private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
            {
                if (e.Url==webBrowser1.Document.Url)
                {
                    bianliwangye();
                }
            }
    
            private void button2_Click(object sender, EventArgs e)
            {
                //bianli();
                bianliwangye();
            }
    
            /// <summary>
            /// 枚举获取百度搜索页面的所有网址
            /// </summary>
            public void bianliwangye()
            {            
                IHTMLDocument2 document = (IHTMLDocument2)webBrowser1.Document.DomDocument;//获取源代码
                IHTMLElementCollection hc = (IHTMLElementCollection)document.all;//获取所有标签
                //MessageBox.Show(hc.ToString());
                foreach (IHTMLElement h in hc)//遍历标签
                {
                    if (h.className=="g"||h.className=="c-showurl")//以标签classname判读
                    {
                        string a= h.innerHTML;//获取标签文本内容
                        if (a.Contains("&nbsp"))
                        {
                            int b = a.IndexOf("&nbsp");
                            string a1 = a.Substring(0, b);//截取网址
                            MessageBox.Show(a1);
                        }                                      
                    }
                }
                bianli();//当枚举到当前页面最后一个网址,模拟点击进入下一页           
            }
        }
    }
  • 相关阅读:
    xml中DTD关键字说明
    xml学习笔记
    HTTP请求方法:GET和POST区别
    三种方法从键盘输入
    crontab定时器
    收藏一篇关于Asp.net Response.Filter的文章
    MethodImplOptions.Synchronized的一点讨论
    需要知道关于struct的一些事情
    Excel使用技巧总结
    HTTP协议中POST、GET、HEAD、PUT等请求方法以及一些常见错误
  • 原文地址:https://www.cnblogs.com/happinesshappy/p/4596297.html
Copyright © 2020-2023  润新知