[root@yyjk sbin]#
[root@yyjk sbin]# cat a2.pl
use LWP::UserAgent;
use HTTP::Cookies;
use HTTP::Headers;
use HTTP::Response;
use Encode;
use JSON;
use File::Temp qw/tempfile/;
use HTML::TreeBuilder::XPath;
use Encode;
use HTML::TreeBuilder;
use Data::Dumper;
use HTML::TreeBuilder::XPath;
use DBI;
use Encode;
my $ua = LWP::UserAgent->new( ssl_opts => { verify_hostname => 0 }, );;
$ua->timeout(10);
$ua->env_proxy;
my $now = time();
$ua->agent("Mozilla/8.0");
my $cookie_jar = HTTP::Cookies->new(
file => 'lwp_cookies.txt',
autosave => 1,
ignore_discard => 1
);
my $tree= HTML::TreeBuilder::XPath->new;
$tree->parse_file("test.html");
##获取url
$tree->parse_file( "fh2.html");
#获取博客分类的URL,根据a标签查找属性为href
@Links = $tree->find_by_tag_name('a');
foreach (@Links) {
@Href = $_->attr('href');
print @Href;
print "
";
print @Href + 0;
print "
";
};
[root@yyjk sbin]# cat fh2.html
<div class="daohang-kuai">
<div class="daohang-org"><span>风险管理部</span></div>
<div class="daohang-links"><a href="http://990.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">信贷系统</a> <span >|</span><a href="http://9000.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">信贷系统(授权码)</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="http://800.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">外部数据管理平</a> <span >|</span><a href="/tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">非现场监测系统</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="/tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">风险事件报送系统</a> <span >|</span><div class="clear"></div></div>
</div>
<div class="daohang-kuai">
<div class="daohang-org"><span>国际业务部</span></div>
<div class="daohang-links"><a href="http://800.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">国际结算系统</a> <span >|</span><div class="clear"></div></div>
</div>
[root@yyjk sbin]# perl a2.pl
http://999.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
1
http://999.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
1
http://999.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
1
/tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
1
/tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
1
http://999.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
1
http://999.3.246.2:7001/newaml?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org
1
/tailong/syslink/goAml.jsp?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org
1
/tailong/syslink/oaforward.jsp?idName=crmis&tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org
1
http://999.3.200.16:7001/UtanWeb/index.jsp?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org
1
http://990.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
1
http://9000.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
1
http://800.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
1
/tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
1
/tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
1
http://800.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org
1