1 ################################
3 ################################
5 package BotModules::WWW;
7 # Need HTML::Entities for decode_entities() in wwwtitle
12 # RegisterConfig - Called when initialised, should call registerVariables
15 $self->SUPER::RegisterConfig(@_);
16 # $self->registerVariables(
17 # # [ name, save?, settable? ]
26 '' => 'The WWW module provides a web interface.',
27 'wwwsize' => 'Reports on the size of a webpage. Syntax: \'wwwsize http://...\'',
28 'wwwlint' => 'Reports on whether the webpage contains any obvious (I mean _really_ obvious) no-nos like <layer> or document.all. Syntax: \'wwwlint http://...\'',
29 'wwwdoctype' => 'Reports on the doctype of a webpage. (Warning: Does not check that the doctype is not commented out!) Syntax: \'wwwdoctype http://...\'',
30 'wwwtitle' => 'Tries to heuristically determine a web page\'s title. Syntax: \'wwwtitle http://...\'',
36 my ($event, $message) = @_;
37 if ($message =~ /^\s*wwwsize\s+(.+?)\s*$/osi) {
38 $self->Fetch($event, $1, 'size');
39 } elsif ($message =~ /^\s*wwwlint\s+(.+?)\s*$/osi) {
40 $self->Fetch($event, $1, 'lint');
41 } elsif ($message =~ /^\s*wwwdoctype\s+(.+?)\s*$/osi) {
42 $self->Fetch($event, $1, 'doctype');
43 } elsif ($message =~ /^\s*wwwtitle\s+(.+?)\s*$/osi) {
44 $self->Fetch($event, $1, 'title');
46 return $self->SUPER::Told(@_);
48 return 0; # dealt with it...
53 my ($event, $uri, $type) = @_;
54 $self->getURI($event, $uri, $type);
59 my ($event, $uri, $output, $type) = @_;
60 my $chars = length($output);
61 if ($type eq 'size') {
63 $self->say($event, "$uri is $chars bytes long.");
65 $self->say($event, "$uri is either empty, or I could not download it.");
67 } elsif ($type eq 'lint') {
68 # ignore whether things are commented out or not.
70 $self->say($event, "$uri is either empty, or I could not download it.");
73 if ($output =~ /document\.all/os) {
74 push(@status, 'document.all');
76 if ($output =~ /document\.layers/os) {
77 push(@status, 'document.layers');
79 if ($output =~ /<i?layer/osi) {
80 push(@status, 'the <layer> tag');
83 my $status = shift(@status);
85 while (scalar(@status) > 1) {
86 $status .= ', '.shift(@status);
88 $status .= ' and '.shift(@status);
90 $self->say($event, "$uri contains $status.");
92 $self->say($event, "$uri doesn't have any _obvious_ flaws..."); # XXX doesn't work! try php.net
95 } elsif ($type eq 'doctype') {
96 # assume doctype is not commented.
98 $self->say($event, "$uri is either empty, or I could not download it.");
99 } elsif ($output =~ /(<!DOCTYPE\s[^>]*>)/osi) {
101 $doctype =~ s/[\n\r]+/ /gosi;
103 # -- #mozilla was here --
104 # <Hixie> it would break 99% of the web if we didn't do it that way.
105 # <Hixie> including most of my test cases ;-)
106 # <dbaron> test cases don't matter...
107 # <dbaron> you'll fix them if we decide they're wrong
108 # <dbaron> but the web is a problem
110 if (length($doctype) > 220) { # arbitrary length greater than two 80 character lines
111 $self->say($event, "$uri has a very long and possibly corrupted doctype (maybe it has an internal subset).");
113 $self->say($event, "$uri has the following doctype: $doctype");
116 $self->say($event, "$uri has no specified doctype.");
118 } elsif ($type eq 'title') {
119 # assume doctype is not commented.
121 $self->say($event, "$uri is either empty, or I could not download it.");
122 } elsif ($output =~ /<title\s*>(.*?)<\/title\s*>/osi or
123 $output =~ /<h1\s*>(.*?)<\/h1\s*>/osi) {
125 $title =~ s/\s+/ /gosi;
126 if (length($title) > 100) { # arbitrary length
127 $title = substr($title, 0, 100) . '...';
129 $self->say($event, "$uri has the following title: " . decode_entities($title));
131 $self->say($event, "$uri has no specified title.");
134 return $self->SUPER::GotURI(@_);