use LWP::Simple;
use Parallel::ForkManager;
...
my @links=(
["http://www.foo.bar/rulez.data","rulez_data.txt"],
["http://new.host/more_data.doc","more_data.doc"],
...
);
...
# Max 30 processes for parallel download
my $pm = Parallel::ForkManager->new(30);
LINKS:
foreach my $linkarray (@links) {
$pm->start and next LINKS; # do the fork
my ($link, $fn) = @$linkarray;
warn "Cannot get $fn from $link"
if getstore($link, $fn) != RC_OK;
$pm->finish; # do the exit in the child process
}
$pm->wait_all_children;
use LWP::Simple;
use Parallel::ForkManager;
my @links=(
["http://blog.csdn.net/zhaoyangjian724/article/category/2757485/1","1.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/2757485/2","2.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/2757485/3","3.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/1","5.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/2","6.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/3","7.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/4","8.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/5","9.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/6","10.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/7","11.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/8","12.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/9","13.txt"]
);
sub getsrore{
my $link=shift;
my $fn=shift;
open fh1,">$fn" || die "open csdn file failed:$!";
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent("Mozilla/8.0");
my $response = $ua->get("$link");
print fh1 $response->content;
close fh1;
};
# Max 30 processes for parallel download
my $pm = Parallel::ForkManager->new(30);
LINKS:
foreach my $linkarray (@links) {
$pm->start and next LINKS; # do the fork
print "$linkarray=== @$linkarray
";
my ($link, $fn) = @$linkarray;
&getstore($link, $fn);
$pm->finish; # do the exit in the child process
}
$pm->wait_all_children;
[root@node01 fork]# time perl wget.pl
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/2757485/1 1.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/4 8.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/5 9.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/2 6.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/2757485/2 2.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/1 5.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/6 10.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/9 13.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/8 12.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/7 11.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/3 7.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/2757485/3 3.txt
real 0m8.108s
user 0m0.752s
sys 0m0.350s
[root@node01 fork]# cat sget.pl
use LWP::Simple;
use Parallel::ForkManager;
my @links=(
["http://blog.csdn.net/zhaoyangjian724/article/category/2757485/1","1.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/2757485/2","2.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/2757485/3","3.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/1","5.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/2","6.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/3","7.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/4","8.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/5","9.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/6","10.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/7","11.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/8","12.txt"],
["http://blog.csdn.net/zhaoyangjian724/article/category/1756685/9","13.txt"]
);
sub getsrore{
my $link=shift;
my $fn=shift;
open fh1,">$fn" || die "open csdn file failed:$!";
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent("Mozilla/8.0");
my $response = $ua->get("$link");
print fh1 $response->content;
close fh1;
};
# Max 30 processes for parallel download
foreach my $linkarray (@links) {
print "$linkarray=== @$linkarray
";
my ($link, $fn) = @$linkarray;
&getstore($link, $fn);
}
[root@node01 fork]# time perl sget.pl
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/2757485/1 1.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/2757485/2 2.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/2757485/3 3.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/1 5.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/2 6.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/3 7.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/4 8.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/5 9.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/6 10.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/7 11.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/8 12.txt
$linkarray=== http://blog.csdn.net/zhaoyangjian724/article/category/1756685/9 13.txt
real 1m5.067s
user 0m0.150s
sys 0m0.051s
[oracle@node01 ~]$ ps -ef | grep wget
root 4612 4159 4 14:01 pts/1 00:00:00 perl wget.pl
root 4614 4612 2 14:01 pts/1 00:00:00 perl wget.pl
root 4615 4612 1 14:01 pts/1 00:00:00 perl wget.pl
root 4616 4612 2 14:01 pts/1 00:00:00 perl wget.pl
root 4617 4612 1 14:01 pts/1 00:00:00 perl wget.pl
root 4618 4612 1 14:01 pts/1 00:00:00 perl wget.pl
root 4619 4612 4 14:01 pts/1 00:00:00 perl wget.pl
root 4620 4612 1 14:01 pts/1 00:00:00 perl wget.pl
root 4621 4612 0 14:01 pts/1 00:00:00 perl wget.pl
root 4622 4612 2 14:01 pts/1 00:00:00 perl wget.pl
root 4623 4612 1 14:01 pts/1 00:00:00 perl wget.pl
root 4624 4612 3 14:01 pts/1 00:00:00 perl wget.pl