Ok, I have put up with HTML::LinkExtor for long enough. The current code just seems to be too tempermental;
use LWP::UserAgent;
use HTML::LinkExtor;
use URI::URL;
my $ua = new LWP::UserAgent;
my $url = 'http://www.test.com';
# Set up a callback that collect image links
my @urls = ();
my ($p, $res);
sub callback2 {
my($tag, %attr) = @_;
return if $tag ne 'a'; # we only look closer at <img ...>
push(@urls, values %attr);
}
# Make the parser. Unfortunately, we don't know the base yet
# (it might be diffent from $url)
$p = HTML::LinkExtor->new(\&callback2);
# Request document and parse it as it arrives
$res = $ua->request(HTTP::Request->new(GET => $url),
sub {$p->parse($_[0])}) || die $!;
# Expand all image URLs to absolute ones
my $base = $res->base;
@urls = map { $_ = url($_, $base)->abs; } @urls;
print $IN->header();
print join("<BR>",@urls);
}
Does anyone have a reasonably reliable method to grab URL's from a page?
TIA
Andy (mod)
andy@ultranerds.co.uk
Want to give me something back for my help? Please see my Amazon Wish List
GLinks ULTRA Package | GLinks ULTRA Package PRO
Links SQL Plugins | Website Design and SEO | UltraNerds | ULTRAGLobals Plugin | Pre-Made Template Sets | FREE GLinks Plugins!
Code:
sub Test { use LWP::UserAgent;
use HTML::LinkExtor;
use URI::URL;
my $ua = new LWP::UserAgent;
my $url = 'http://www.test.com';
# Set up a callback that collect image links
my @urls = ();
my ($p, $res);
sub callback2 {
my($tag, %attr) = @_;
return if $tag ne 'a'; # we only look closer at <img ...>
push(@urls, values %attr);
}
# Make the parser. Unfortunately, we don't know the base yet
# (it might be diffent from $url)
$p = HTML::LinkExtor->new(\&callback2);
# Request document and parse it as it arrives
$res = $ua->request(HTTP::Request->new(GET => $url),
sub {$p->parse($_[0])}) || die $!;
# Expand all image URLs to absolute ones
my $base = $res->base;
@urls = map { $_ = url($_, $base)->abs; } @urls;
print $IN->header();
print join("<BR>",@urls);
}
Does anyone have a reasonably reliable method to grab URL's from a page?
TIA
Andy (mod)
andy@ultranerds.co.uk
Want to give me something back for my help? Please see my Amazon Wish List
GLinks ULTRA Package | GLinks ULTRA Package PRO
Links SQL Plugins | Website Design and SEO | UltraNerds | ULTRAGLobals Plugin | Pre-Made Template Sets | FREE GLinks Plugins!