• 最近采集写的一个超简单实用的HTML解析类


    1. [文件] HtmlDom.php 
    <?php
    $oldSetting = libxml_use_internal_errors( true ); 
    libxml_clear_errors();
    /**
     * 
     * -+-----------------------------------
     * |PHP5 Framework - 2011
     * |Web Site: www.iblue.cc
     * |E-mail: mejinke@gmail.com
     * |Date: 2012-10-12
     * -+-----------------------------------
     * 
     * @desc HTML解析器
     * @author jingke
     */
    class XF_HtmlDom
    {
        private $_xpath = null;
        private $_nodePath = '';
     
        public function __construct($xpath = null, $nodePath = '')
        {
            $this->_xpath = $xpath;
            $this->_nodePath = $nodePath;
        }
     
        public function loadHtml($url)
        {
            ini_set('user_agent', 'Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17 –Nexus');
            $content = '';
            if(strpos(strtolower($url), 'http')===false)
            {
                $content = file_get_contents($url);
            }
            else
            {
                $ch = curl_init(); 
                $user_agent = "Baiduspider+(+http://www.baidu.com/search/spider.htm)";
                $user_agent1='Mozilla/5.0 (Windows NT 5.1; rv:6.0) Gecko/20100101 Firefox/6.0';
                curl_setopt($ch, CURLOPT_URL, $url); 
                curl_setopt($ch, CURLOPT_HEADER, false); 
                curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
                curl_setopt($ch, CURLOPT_REFERER, $url);
                curl_setopt($ch, CURLOPT_USERAGENT, $user_agent1);
                curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
                $content =curl_exec($ch); 
                curl_close($ch);
            }
     
            $html = new DOMDocument(); 
            $html->loadHtml($content); 
            $this->_xpath = new DOMXPath( $html ); 
            return $this;
             
        }
     
        public function find($query, $index = null)
        {http://www.enterdesk.com/special/shouhui
            if($this->_nodePath == '')
                $this->_nodePath = '//';
            else手绘图片
                $this->_nodePath .= '/';
      
            $nodes = $this->_xpath->query($this->_nodePath.$query);
            if ($index == null && !is_numeric($index)) 
            { 
                $tmp = array();
                foreach ($nodes as $node) 
                {
                    $tmp[] = new XF_HtmlDom($this->_xpath, $node->getNodePath());
                }
                return $tmp;
            }
            return new XF_HtmlDom($this->_xpath,$this->_xpath->query($this->_nodePath.$query)->item($index)->getNodePath());
        }
     
        /**
         * 获取内容
         */
        public function text()
        {
            if ($this->_nodePath != '' && $this->_xpath != null ) 
                return $this->_xpath->query($this->_nodePath)->item(0)->textContent;
            else
                return false;
        }
     
        /**
         * 获取属性值
         */
        public function getAttribute($name)
        {
            if ($this->_nodePath != '' && $this->_xpath != null ) 
                return $this->_xpath->query($this->_nodePath)->item(0)->getAttribute($name);
            else
                return false;
        }
         
        public function __get($name)
        {
            if($name == 'innertext')
                return $this->text();
            else
                return $this->getAttribute($name);
        }
     
    }

  • 相关阅读:
    Kubernetes+Federation打造跨多云管理服务
    idou老师教你学istio 31:Istio-proxy的report流程
    《软件工程》课程总结(补)
    《软件工程》课程总结
    十二周# 学习进度总结
    十一周# 学习进度总结
    十周# 学习进度总结
    九周# 学习进度总结
    八周# 学习进度总结
    团队项目——编写项目的Spec
  • 原文地址:https://www.cnblogs.com/xkzy/p/3946588.html
Copyright © 2020-2023  润新知