从网页中提取链接 |
以下是HTML::LinkExtor中的一个提取图片链接的例程 #!/usr/bin/perl -w use LWP::UserAgent; use HTML::LinkExtor; use URI::URL; $url = "http://www.163.com/"; # for instance $ua = LWP::UserAgent->new; # Set up a callback that collect image links my @imgs = (); sub callback { my($tag, %attr) = @_; return if $tag ne 'img'; push(@imgs, values %attr); } # Make the parser. Unfortunately, we don’t know the base yet # (it might be diffent from $url) $p = HTML::LinkExtor->new(\&callback); # Request document and parse it as it arrives $res = $ua->request(HTTP::Request->new(GET => $url),sub ); # Expand all image URLs to absolute ones my $base = $res->base; @imgs = map { $_ = url($_, $base)->abs; } @imgs; #Print them out print join("\n", @imgs), "\n"; |