Hi there,
Im extracing links from two betting websites.
One of them i can do with the code i have written, however the other doesnt return any data when i parse the website.
two two websites which are the $url's are:
heres my code:
#!/usr/perl/site/lib -w
use Date::Calc qw(Today);
use LWP::UserAgent;
use HTML::LinkExtor;
use DBI;
use Switch;
use URI::URL;
use Strict;
use Warnings;
my $sql;
my $statement;
my $statement_main;
my $siteid;
my $url;
my $title;
my $dateage;
my $siteId;
$sql = "SELECT * FROM bookies";
$statement_main = AccessDatabase($sql);
$fields = $statement_main->{NUM_OF_FIELDS};
while (my $ref = $statement_main->fetchrow_arrayref) {
for (my $i=0; $i < $fields; $i++) {
switch ($i){
case 0 {$siteid = $$ref[$i]}
case 1 {$url = $$ref[$i]}
case 2 {$title = $$ref[$i]}
}
print "$i = $$ref[$i] ";
}
#$url = " # for instance
my $ua = LWP::UserAgent:roxyAny->new;
$ua->env_proxy;
# Set up a callback that collect image links
my @imgs = ();
sub callback {
my($tag, %attr) = @_;
return if $tag ne 'a'; # we only look closer at <img ...>
push(@imgs, values %attr);
}
# Make the parser. Unfortunately, we don't know the base yet
# (it might be diffent from $url)
my $p = HTML::LinkExtor->new(\&callback);
# Request document and parse it as it arrives
$res = $ua->request(HTTP::Request->new(GET => $url),
sub {$p->parse($_[0])});
# Expand all image URLs to absolute ones
#my $base = $res->base;
@imgs = map { $_ = url($_, $base)->abs; } @imgs;
# Print them out
@imgs = grep(/sID/, @imgs);
print join("\n", @imgs), "\n";
foreach (@imgs) {
$sql = "insert into Links(url, title) values ('$_', '$title')";
$statement = AccessDatabase($sql);
}
}
sub AccessDatabase{
($sql)= @_;
my $dbh = DBI->connect("dbi:mysql:database=mybetting;host=localhost;user=root;password=password")
or die "Couldn't connect to database: $DBI::errstr\n";
$DBI::result = $dbh->prepare($sql);
$DBI::result->execute() or die "Couldn't execute query '$sql': $DBI::errstr\n";
$dbh->disconnect();
return $DBI::result;
};
******************************
Ive been looking at this for about a month and cant sort it out!!
If anyone knows a way to do this let me know!!
Kind regards
Nash
Im extracing links from two betting websites.
One of them i can do with the code i have written, however the other doesnt return any data when i parse the website.
two two websites which are the $url's are:
heres my code:
#!/usr/perl/site/lib -w
use Date::Calc qw(Today);
use LWP::UserAgent;
use HTML::LinkExtor;
use DBI;
use Switch;
use URI::URL;
use Strict;
use Warnings;
my $sql;
my $statement;
my $statement_main;
my $siteid;
my $url;
my $title;
my $dateage;
my $siteId;
$sql = "SELECT * FROM bookies";
$statement_main = AccessDatabase($sql);
$fields = $statement_main->{NUM_OF_FIELDS};
while (my $ref = $statement_main->fetchrow_arrayref) {
for (my $i=0; $i < $fields; $i++) {
switch ($i){
case 0 {$siteid = $$ref[$i]}
case 1 {$url = $$ref[$i]}
case 2 {$title = $$ref[$i]}
}
print "$i = $$ref[$i] ";
}
#$url = " # for instance
my $ua = LWP::UserAgent:roxyAny->new;
$ua->env_proxy;
# Set up a callback that collect image links
my @imgs = ();
sub callback {
my($tag, %attr) = @_;
return if $tag ne 'a'; # we only look closer at <img ...>
push(@imgs, values %attr);
}
# Make the parser. Unfortunately, we don't know the base yet
# (it might be diffent from $url)
my $p = HTML::LinkExtor->new(\&callback);
# Request document and parse it as it arrives
$res = $ua->request(HTTP::Request->new(GET => $url),
sub {$p->parse($_[0])});
# Expand all image URLs to absolute ones
#my $base = $res->base;
@imgs = map { $_ = url($_, $base)->abs; } @imgs;
# Print them out
@imgs = grep(/sID/, @imgs);
print join("\n", @imgs), "\n";
foreach (@imgs) {
$sql = "insert into Links(url, title) values ('$_', '$title')";
$statement = AccessDatabase($sql);
}
}
sub AccessDatabase{
($sql)= @_;
my $dbh = DBI->connect("dbi:mysql:database=mybetting;host=localhost;user=root;password=password")
or die "Couldn't connect to database: $DBI::errstr\n";
$DBI::result = $dbh->prepare($sql);
$DBI::result->execute() or die "Couldn't execute query '$sql': $DBI::errstr\n";
$dbh->disconnect();
return $DBI::result;
};
******************************
Ive been looking at this for about a month and cant sort it out!!
If anyone knows a way to do this let me know!!
Kind regards
Nash