################################ # WWW Module # ################################ package BotModules::WWW; use vars qw(@ISA); # Need HTML::Entities for decode_entities() in wwwtitle use HTML::Entities; @ISA = qw(BotModules); 1; # RegisterConfig - Called when initialised, should call registerVariables sub RegisterConfig { my $self = shift; $self->SUPER::RegisterConfig(@_); # $self->registerVariables( # # [ name, save?, settable? ] # ['x', 1, 1, 0], # ); } sub Help { my $self = shift; my ($event) = @_; return { '' => 'The WWW module provides a web interface.', 'wwwsize' => 'Reports on the size of a webpage. Syntax: \'wwwsize http://...\'', 'wwwlint' => 'Reports on whether the webpage contains any obvious (I mean _really_ obvious) no-nos like or document.all. Syntax: \'wwwlint http://...\'', 'wwwdoctype' => 'Reports on the doctype of a webpage. (Warning: Does not check that the doctype is not commented out!) Syntax: \'wwwdoctype http://...\'', 'wwwtitle' => 'Tries to heuristically determine a web page\'s title. Syntax: \'wwwtitle http://...\'', }; } sub Told { my $self = shift; my ($event, $message) = @_; if ($message =~ /^\s*wwwsize\s+(.+?)\s*$/osi) { $self->Fetch($event, $1, 'size'); } elsif ($message =~ /^\s*wwwlint\s+(.+?)\s*$/osi) { $self->Fetch($event, $1, 'lint'); } elsif ($message =~ /^\s*wwwdoctype\s+(.+?)\s*$/osi) { $self->Fetch($event, $1, 'doctype'); } elsif ($message =~ /^\s*wwwtitle\s+(.+?)\s*$/osi) { $self->Fetch($event, $1, 'title'); } else { return $self->SUPER::Told(@_); } return 0; # dealt with it... } sub Fetch { my $self = shift; my ($event, $uri, $type) = @_; $self->getURI($event, $uri, $type); } sub GotURI { my $self = shift; my ($event, $uri, $output, $type) = @_; my $chars = length($output); if ($type eq 'size') { if ($chars) { $self->say($event, "$uri is $chars bytes long."); } else { $self->say($event, "$uri is either empty, or I could not download it."); } } elsif ($type eq 'lint') { # ignore whether things are commented out or not. unless ($chars) { $self->say($event, "$uri is either empty, or I could not download it."); } else { my @status; if ($output =~ /document\.all/os) { push(@status, 'document.all'); } if ($output =~ /document\.layers/os) { push(@status, 'document.layers'); } if ($output =~ / tag'); } if (@status) { my $status = shift(@status); if (@status) { while (scalar(@status) > 1) { $status .= ', '.shift(@status); } $status .= ' and '.shift(@status); } $self->say($event, "$uri contains $status."); } else { $self->say($event, "$uri doesn't have any _obvious_ flaws..."); # XXX doesn't work! try php.net } } } elsif ($type eq 'doctype') { # assume doctype is not commented. unless ($chars) { $self->say($event, "$uri is either empty, or I could not download it."); } elsif ($output =~ /(]*>)/osi) { my $doctype = $1; $doctype =~ s/[\n\r]+/ /gosi; # -- #mozilla was here -- # it would break 99% of the web if we didn't do it that way. # including most of my test cases ;-) # test cases don't matter... # you'll fix them if we decide they're wrong # but the web is a problem if (length($doctype) > 220) { # arbitrary length greater than two 80 character lines $self->say($event, "$uri has a very long and possibly corrupted doctype (maybe it has an internal subset)."); } else { $self->say($event, "$uri has the following doctype: $doctype"); } } else { $self->say($event, "$uri has no specified doctype."); } } elsif ($type eq 'title') { # assume doctype is not commented. unless ($chars) { $self->say($event, "$uri is either empty, or I could not download it."); } elsif ($output =~ /(.*?)<\/title\s*>/osi or $output =~ /(.*?)<\/h1\s*>/osi) { my $title = $1; $title =~ s/\s+/ /gosi; if (length($title) > 100) { # arbitrary length $title = substr($title, 0, 100) . '...'; } $self->say($event, "$uri has the following title: " . decode_entities($title)); } else { $self->say($event, "$uri has no specified title."); } } else { return $self->SUPER::GotURI(@_); } }