• perl HTML::LinkExtor模块(2)


     1 use LWP::Simple;
     2 use HTML::LinkExtor;
     3 
     4 $html_code = get("https://tieba.baidu.com/p/4929234512");
     5 $img_link = HTML::LinkExtor->new(&IMG);
     6 $img_link->parse($html_code);
     7 
     8 #爬图片链接
     9 sub IMG{
    10     ($tag, %links) = @_;
    11     if($tag eq 'img'){
    12     #如里是图片标签
    13         foreach $key(keys %links){
    14             print "$key -> $links{$key}
    "
    15         }
    16     }
    17 }
    18 
    19 
    20 
    21 # src -> https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/343a66656e6768756f7069616e323031af7c
    22 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    23 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    24 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    25 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    26 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    27 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    28 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    29 # src -> https://ss0.bdstatic.com/9r-1bjml2gcT8tyhnq/fc-feed/0/pic/51d89e69dd318a8c2bcb07341879ac64.jpg
    30 # src -> https://ss0.bdstatic.com/9r-1bjml2gcT8tyhnq/fc-feed/0/pic/223a419756a2209b84f8f306d021a4a5.jpg
    31 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    32 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    33 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    34 # src -> https://gsp0.baidu.com/5aAHeD3nKhI2p27j8IqW0jdnxx1xbK/tb/editor/images/client/image_emoticon25.png
    35 # src -> https://gsp0.baidu.com/5aAHeD3nKhI2p27j8IqW0jdnxx1xbK/tb/editor/images/client/image_emoticon25.png
    36 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    37 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    38 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    39 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    40 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    41 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    42 # src -> //tb2.bdstatic.com/tb/static-pb/img/head_80.jpg
    43 # src -> https://imgsa.baidu.com/forum/pic/item/d933c895d143ad4bcf1ab5478b025aafa40f0604.jpg
    44 # src -> https://imgsa.baidu.com/forum/pic/item/78f0f736afc379319921ed85e2c4b74542a911d4.jpg
    45 # src -> https://imgsa.baidu.com/forum/pic/item/2f2eb9389b504fc23bf50aaaecdde71191ef6df3.jpg
    46 # src -> https://imgsa.baidu.com/forum/pic/item/d100baa1cd11728ba5c4656bc1fcc3cec2fd2c8a.jpg
    47 # src -> https://imgsa.baidu.com/forum/pic/item/2df5e0fe9925bc31b71993f157df8db1cb137017.jpg

    当然, 你还可以加一下正则, 去掉不是http://开头的也行

  • 相关阅读:
    bootstrap 的页码显示问题-------------德州
    大神的---解决tomcat内存溢出问题----tomcat报错:This is very likely to create a memory leak问题解决
    如何设置tomcat,直接通过IP 访问
    如何把MyEclipse中的web项目导入到Eclipse中运行
    易捷框架之EChart 的使用
    打包jar文件并自动运行
    『PLSQL』在oracle表中怎样创建自增长字段?
    解决MySql 数据库 提示:1045 access denied for user 'root'@'localhost' using password yes
    Oracle 与 MySQL 批量添加
    SPR, subpixel rendering
  • 原文地址:https://www.cnblogs.com/perl6/p/6536882.html
Copyright © 2020-2023  润新知