• perl 爬取html findvalues 方法


    node2:/root/pachong/yylc#cat t500.html 
        <p id="p-page"><input type='submit' style='display:none' name='turnPage' id='turnPage'><input type='hidden' id='pageNum' name='pageNum' value='1'/>
    
    <span οnmοuseοver="this.className='cur-s-page'" οnmοuseοut="this.className=''"><</span>
    
    <span class='cur-s-page'>1</span>
    
    <span οnclick="document.getElementById('pageNum').value=2;document.getElementById('turnPage').click();" οnmοuseοver="this.className='cur-s-page'" οnmοuseοut="this.className=''">
    2</span>
    
    <span οnclick="document.getElementById('pageNum').value=3;document.getElementById('turnPage').click();" οnmοuseοver="this.className='cur-s-page'" οnmοuseοut="this.className=''">
    3</span>...   
    
    <span οnclick="document.getElementById('pageNum').value=1749;document.getElementById('turnPage').click();" οnmοuseοver="this.className='cur-s-page'" οnmοuseοut="this.className=''">
    1749</span>
    
    <span οnclick="document.getElementById('pageNum').value=2;document.getElementById('turnPage').click();" οnmοuseοver="this.className='cur-s-page'" οnmοuseοut="this.className=''">
    ></span></p>
        </form>
    
    
    node2:/root/pachong/yylc#perl t400.pl 
    <html> @0 (IMPLICIT)
      <head> @0.0 (IMPLICIT)
      <body> @0.1 (IMPLICIT)
        <p id="p-page"> @0.1.0
          <input id="turnPage" name="turnPage" style="display:none" type="submit" /> @0.1.0.0
          <input id="pageNum" name="pageNum" type="hidden" value="1" /> @0.1.0.1
          <span οnmοuseοut="this.className=''" οnmοuseοver="this.className='cur-s-page'"> @0.1.0.2
            "<"
          <span class="cur-s-page"> @0.1.0.3
            "1"
          <span οnclick="document.getElementById('pageNum').value=2;document.getElementById('turnPage').click();" οnmοuseοut="this.className=''" οnmοuseοver="this.className='cur-s-page'"> @0.1.0.4
            "2"
          <span οnclick="document.getElementById('pageNum').value=3;document.getElementById('turnPage').click();" οnmοuseοut="this.className=''" οnmοuseοver="this.className='cur-s-page'"> @0.1.0.5
            "3"
          "...???"
          <span οnclick="document.getElementById('pageNum').value=1749;document.getElementById('turnPage').click();" οnmοuseοut="this.className=''" οnmοuseοver="this.className='cur-s-page'"> @0.1.0.7
            "1749"
          <span οnclick="document.getElementById('pageNum').value=2;document.getElementById('turnPage').click();" οnmοuseοut="this.className=''" οnmοuseοver="this.className='cur-s-page'"> @0.1.0.8
            ">"
    @pageString is < 1 2 3 1749 >
    
    
    node2:/root/pachong/yylc#cat t500.pl 
    use LWP::UserAgent;  
    use POSIX;  
    use HTML::TreeBuilder::XPath;   
    use Encode;   
    use HTML::TreeBuilder;    
    use Data::Dumper;
    use HTML::TreeBuilder::XPath;  
    use DBI;  
    use Encode;
        my $tree= HTML::TreeBuilder::XPath->new;  
           $tree->parse_file("t500.html");
           my @pageString = $tree->findvalues('/html/body//p[@id="p-page"]/span');  
           print "@pageString is @pageString
    "; 
    node2:/root/pachong/yylc#perl t500.pl 
    @pageString is < 1 2 3 1749 >
    
    
    
    findvalues ($path)
    
    Returns the values of the matching nodes as a list. 
    This is mostly the same as findnodes_as_strings,
     except that the elements of the list are objects (with overloaded stringification) instead of plain strings.
    
    返回 匹配节点的值作为一个列表,这个是和findnodes_as_strings 很像,
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    

  • 相关阅读:
    Bat脚本处理ftp超强案例解说
    struts2中的输入校验
    struts国际化
    Spring2.5+Hibernate3.3的集成
    SQL Server如果视图存在就删除
    struts2自定义拦截器
    struts2标签
    spring2.5的第一个简单应用的学习
    基于XML配置方式实现对action的所有方法进行校验
    DataGridViewCell 类
  • 原文地址:https://www.cnblogs.com/hzcya1995/p/13349801.html
Copyright © 2020-2023  润新知