node2:/root/pachong#cat test.html
<div id="xxoo" class="pagelist">
<span> 159条 共8页</span><strong>1</strong> <a href="/zhaoyangjian724/article/category/1756685/2">2</a> <a href="/zhaoyangjian724/article/category/1756685/3">3</a> <a href="/zhaoyangjian724/article/category/1756685/4">4</a> <a href="/zhaoyangjian724/article/category/1756685/5">5</a> <a href="/zhaoyangjian724/article/category/1756685/6">...</a> <a href="/zhaoyangjian724/article/category/1756685/2">下一页</a> <a href="/zhaoyangjian724/article/category/1756685/8">尾页</a>
</div>
</div>
node2:/root/pachong#cat a5.pl
use HTML::TreeBuilder::XPath;
my $tree= HTML::TreeBuilder::XPath->new;
$tree->parse_file( "test.html");
##获取博客分类的URL,根据a标签查找属性为href
@Links = $tree->find_by_tag_name('div');
#print Dumper($Links[0]);
print "--------------------
";
print $Links[0]->attr('id');
print "
";
node2:/root/pachong#perl a5.pl
--------------------
xxoo
@Links = $tree->find_by_tag_name('div');
根据div 标签 查找id属性的值
print $Links[0]->attr('id');
node2:/root/pachong#cat a5.pl
use HTML::TreeBuilder::XPath;
my $tree= HTML::TreeBuilder::XPath->new;
$tree->parse_file( "test.html");
##获取博客分类的URL,根据a标签查找属性为href
@Links = $tree->find_by_tag_name('div');
#print Dumper($Links[0]);
print "--------------------
";
print $Links[0]->attr('id');
print "
";
node2:/root/pachong#perl a5.pl
--------------------
xxoo