• perl xpath 根据a标签 查找属性为href的值


    [root@yyjk sbin]# 
    [root@yyjk sbin]# cat a2.pl 
    use LWP::UserAgent;
    use HTTP::Cookies;
    use HTTP::Headers;
    use HTTP::Response;
    use Encode;
    use JSON;
    use File::Temp qw/tempfile/;
    use HTML::TreeBuilder::XPath;
    use Encode;
    use HTML::TreeBuilder;
    use Data::Dumper;
    use HTML::TreeBuilder::XPath;
    use DBI;
    use Encode;
    my $ua = LWP::UserAgent->new( ssl_opts => { verify_hostname => 0 }, );;
    $ua->timeout(10);
    $ua->env_proxy;
    my $now          = time();
    $ua->agent("Mozilla/8.0");
    my $cookie_jar = HTTP::Cookies->new(
    
        file           => 'lwp_cookies.txt',
        autosave       => 1,
        ignore_discard => 1
    );
    my $tree= HTML::TreeBuilder::XPath->new;
    $tree->parse_file("test.html");
    ##获取url
    $tree->parse_file( "fh2.html");
    #获取博客分类的URL,根据a标签查找属性为href 
    @Links = $tree->find_by_tag_name('a'); 
      foreach (@Links) {  
          @Href = $_->attr('href');
          print @Href;
          print "
    ";
          print @Href + 0;
          print "
    ";
    };
    
    
    [root@yyjk sbin]# cat fh2.html 
    <div class="daohang-kuai">
            <div class="daohang-org"><span>风险管理部</span></div>
            <div class="daohang-links"><a href="http://990.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">信贷系统</a> <span >|</span><a href="http://9000.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">信贷系统(授权码)</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="http://800.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">外部数据管理平</a> <span >|</span><a href="/tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">非现场监测系统</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="/tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">风险事件报送系统</a> <span >|</span><div class="clear"></div></div>
            </div>
                
                
                
                <div class="daohang-kuai">
                    <div class="daohang-org"><span>国际业务部</span></div>
                    <div class="daohang-links"><a href="http://800.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">国际结算系统</a> <span >|</span><div class="clear"></div></div>
                </div>
    [root@yyjk sbin]# perl a2.pl 
    http://999.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://999.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://999.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    /tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    /tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://999.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://999.3.246.2:7001/newaml?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org
    1
    /tailong/syslink/goAml.jsp?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org
    1
    /tailong/syslink/oaforward.jsp?idName=crmis&tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org
    1
    http://999.3.200.16:7001/UtanWeb/index.jsp?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org
    1
    http://990.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://9000.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://800.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    /tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    /tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
    http://800.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
    1
  • 相关阅读:
    VMWare的Cloud Foundry实践(二):和MongoDB对接成功~
    MongoDB实践
    MongoDB 可读性比较差的语句
    C#异步编程之:(一)Task对象和lamda表达式探究
    创建自己的awaitable类型
    C#异步编程之(三):深入 Async 和 Await 的实现及其成本
    Node.js实践
    MVC+EasyUI+三层新闻网站建立(六 tabs的完成)
    MVC+EasyUI+三层新闻网站建立(七:分页查询出数据)
    MVC+EasyUI+三层新闻网站建立(八,详情页面完成)
  • 原文地址:https://www.cnblogs.com/hzcya1995/p/13349026.html
Copyright © 2020-2023  润新知