wwwnews.pl
Jack Shirazi - BIU (js@bison.lif.icnet.uk)
Mon, 26 Sep 94 16:48:00 BST
# $Id: wwwnews.pl
# ---------------------------------------------------------------------------
# wwwnews: A package for sending NNTP requests and handling responses for the
# World-Wide Web. This package is designed for use by www.pl
# for handling URL's with the "news" scheme designator.
#
# This package has been developed by Jack Shirazi <js@biu.icnet.uk>
# as part of the Arcadia project at the University of California, Irvine.
# Each routine in this package has been derived from the work of multiple
# authors -- those that are known are listed above the respective routines.
# The routines have been changed substantially, so don't blame them for bugs.
# It is distributed under the Artistic License (included with your Perl
# distribution files).
#
# 23 Sep 1994 (JS) : Initial version
#
# If you have any suggestions, bug reports, fixes, or enhancements,
# send them to the libwww-perl mailing list at <libwww-perl@ics.uci.edu>.
# ---------------------------------------------------------------------------
# This script was developed using the wwwhttp.pl script in this library
# developed by Roy Fielding <fielding@ics.uci.edu> as a starting point.
#
# Some of these routines are enhanced versions of those distributed by
# Oscar Nierstrasz <oscar@iam.unibe.ch> from IAM, University of Berne.
# See <ftp://cui.unige.ch/PUBLIC/oscar/scripts/README.html> for more info.
# ===========================================================================
require "wwwerror.pl";
require "sys/socket.ph";
require "nntplib.pl";
package wwwnews;
$MAX_RANGE = 20;
%AllowedMethods = ( # Specify what NEWS request methods are supported
'GET', 1, # 1 = Allowed without content in request
'HEAD', 0,
'POST', 0, # 2 = Allowed and with content in request
);
# ===========================================================================
# request(): perform a news request for the $object at the NNTP server
# on the specified $host and $port, giving up after $timeout seconds.
# Return the NNTP response code along with (as named parameters)
# the parsed response %headers and document $content.
#
# This is a vastly modified version of Oscar's http'get() dated 28/3/94 in
# <ftp://cui.unige.ch/PUBLIC/oscar/scripts/http.pl>
# including contributions from Marc van Heyningen and Martijn Koster.
#
sub request
{
local($method, $host, $port, $object, *headers, *content, $timeout) = @_;
local($fqdn, $aliases, $addrtype, $len, $thataddr);
local($reqstr, $hd, $val);
local($response) = 0;
local($resphead) = '';
if (!$AllowedMethods{$method})
{
return &wwwerror'onrequest($wwwerror'RC_bad_request_client, $method,
'news', $host, $port, $object, *headers, *content,
"Library does not allow that method for NEWS");
}
if (!$object) { #I suppose i could do a "LIST" on the server
#If there is no object. Right now I'll die.
return &wwwerror'onrequest($wwwerror'RC_bad_request_client, $method,
'news', $host, $port, $object, *headers, *content,
"Library requires an object - a newsgroup or article");
}
#Three possibilities, a message id or a group name (or a *)
local($range_flag,$r_start,$r_end,$group);
if ($object =~ /^([^@]+@[^@]+)$/) {
$reqstr = "\$content = &wwwnews'get_article('$timeout','$object','$host','$port','$1')";
} elsif ($object =~ /\*/) {
$reqstr = "\$content = &wwwnews'get_list('$timeout','$object','$host','$port');";
} elsif ($object =~ m:^([^/]+)(/.*)?$:) {
$group = $1;
$range_flag = $2;
if ($range_flag) {
if ($range_flag =~ m:^/(\d+)-(\d+)$:) {$r_start = $1, $r_end = $2;}
elsif ($range_flag =~ m:^/(\d+)-?$:) {$r_start = $1;}
}
$reqstr = "\$content = &wwwnews'get_group_range('$timeout','$MAX_RANGE','$object','$host','$port','$group','$r_start','$r_end');";
} else {#oh dear, I don't recognize it
return &wwwerror'onrequest($wwwerror'RC_bad_request_client, $method,
'news', $host, $port, $object, *headers, *content,
"Library does not recognize the object - should be a newsgroup or article");
}
# local($/);
eval $reqstr;
if ($@)
{
$SIG{'ALRM'} = "IGNORE";
alarm(0);
&nntplib'close_connection;
if ($@ =~ /^Time/o) { $response = $wwwerror'RC_timed_out; }
elsif ($@ =~ /^Couldn't connect to/o) { $response = $wwwerror'RC_connection_failed;}
elsif ($@ =~ /^Group not vali/o) { $response = $wwwerror'RC_bad_response; }
elsif ($@ =~ /^Unable to retreive/o) { $response = $wwwerror'RC_bad_response; }
else { $response = $wwwerror'RC_connection_failed; }
return &wwwerror'onrequest($response, $method, 'news', $host, $port,
$object, *headers, *content, $@);
}
&parseRFC822head($resphead, *headers);
return $response;
}
sub timed_out { die "Timed Out\n"; }
sub get_list {
local($timeout,$object,$host,$port) = @_;
local($i,$html,@groups);
local($groupname,$first,$last,$moderated);
$SIG{'ALRM'} = "wwwnews'timed_out";
alarm($timeout);
(&nntplib'connect_to_nntp($host,$port) > 0) ||
die "Couldn't connect to news server\n";
$timeout <<= 3; # Multiply timeout by 8 after 1st response
alarm($timeout);
@groups = &nntplib'list_newsgroups;
alarm(0);
@groups || die "Unable to retreive list of groups\n";
$html = "<base href=\"news:$object\">\n<H1>Newsgroups</PRE>\n\n<MENU>\n\n";
for ($i=0;$i <= $#groups; $i++) {
($groupname,$first,$last,$moderated) = split(/[ \t\n\r]+/,$groups[$i]);
$html .= "<LI> <A HREF=\"news:$groupname\">$groupname ";
$html .= ($first + 0) . "-" . ($last + 0);
$html .= ($moderated =~ /y/) ? "\n</A>\n" : " moderated\n</A>\n";
}
$html .= "</MENU>\n";
$html;
}
sub get_group_range {
local($timeout,$max_range,$object,$host,$port,$group,$start,$end) = @_;
local($a,$first,$last,$diff,$real_start,$real_end);
local($one_before_start,$one_after_end,$html);
$diff = ($max_range > 2) ? $max_range - 1 : 19 ;
if ($end && $start) {
($end - $start > 0) || die "Wrong ranges\n";
($end - $start < $diff) && ($diff = $end - $start);
}
$SIG{'ALRM'} = "wwwnews'timed_out";
alarm($timeout);
(&nntplib'connect_to_nntp($host,$port) > 0) ||
die "Couldn't connect to news server\n";
$timeout <<= 2; # Quadruple timeout after 1st response
alarm($timeout);
(($a,$first,$last) = &nntplib'set_current_group($group)) ||
die "Group not valid\n";
if ($end) {
if ( ($start > $last) || ($end < $first) ) {
die "Out of range\n";
} else {
$real_start = $start; $real_end = $real_start + $diff
}
} elsif ($start) {
if ( ($start > $last) ) {
die "Out of range\n";
} else {
$real_start = $start; $real_end = $real_start + $diff}
} else {$real_end = $last; $real_start = $real_end - $diff}
alarm($timeout);
%my_headers = &nntplib'supports_command("XHDR") ?
&nntplib'headers_from_range1($real_start,$real_end,"Lines","Subject","From","Message-id"):
&nntplib'headers_from_range2($real_start,$real_end,"Lines","Subject","From","Message-id");
alarm(0);
&nntplib'close_connection;
%my_headers || die "Unable to retreive any headers\n";
$one_before_start = $real_start - 1;
$one_after_end = $real_end + 1;
$html = "<base href=\"news:$object\">\n";
$html .= "<TITLE>Newsgroup $group, Articles $real_start-$real_end</TITLE>\n";
if ($real_start != $first) {
$html .= "(<A HREF=\"news:$group/$first-$one_before_start\">Earlier articles</A>...)";
}
$html .= "\nArticles in comp.lang.perl\n<MENU>\n\n";
foreach $num ($real_start .. $real_end) {
($lines,$subj,$from,$mess_id) = split(/\0/,$my_headers{$num});
$lines || next;
$mess_id =~ s/^<//;
$mess_id =~ s/>$//;
$html .= "<LI> <A HREF=\"news:$mess_id\">$num [$lines] \"$subj\" - $from</A>\n";
}
$html .= "</MENU>\n<P>\n";
if ($real_end != $last) {
$html .= "(<A HREF=\"news:$group/$one_after_end-$last\">Later articles</A>...)\n";
}
$html;
}
sub get_article {
local($timeout,$object,$host,$port,$message_id) = @_;
local($head,$body,%headers,@body,$html);
$SIG{'ALRM'} = "wwwnews'timed_out";
alarm($timeout);
(&nntplib'connect_to_nntp($host,$port) > 0) ||
die "Couldn't connect to news server\n";
$timeout <<= 2; # Quadruple timeout after 1st response
alarm($timeout);
($head,$body) = &nntplib'get_article($message_id);
alarm(0);
&nntplib'close_connection;
$head || die "Unable to retreive the article\n";
&parseRFC822head($head,*headers);
#Using $from, $subject, $date, $organization, @newsgroups, @references, $body
$html = "<base href=\"news:$object\">\n<ADDRESS> ";
$html .= $headers{'from'};
$html .= "\n</ADDRESS>\n<TITLE> ";
$html .= $headers{'subject'};
$html .= "</TITLE>\n<ADDRESS><H1> ";
$html .= $headers{'subject'};
$html .= "</H1>\n</ADDRESS>\n<ADDRESS> ";
$html .= $headers{'date'} . "\n " . $headers{'organization'};
$html .= "\n</ADDRESS>\n\n<DL>\n\n<DT> Newsgroups:\n<DD> ";
#process newsgroups
foreach $newsgroup (split(/,/,$headers{'newsgroups'})) {
$html .= "<A HREF=\"news:$newsgroup\">$newsgroup</A>";
}
$html .= "\n<DT> References:\n<DD> ";
#process references
foreach $reference (split(/,/,$headers{'references'})) {
$reference =~ s/^<//;
$reference =~ s/>$//;
$html .= "<A HREF=\"news:$reference\"><$reference></A>";
}
$body =~ tr/\r//d;
# $body =~ s/<([^@>< \n\t]+@[^@>< \n\t]+)>/<A HREF="$1"><$1><\/A>/g;
$html .= "\n</DL>\n\n\n\n<PRE>$body\n</PRE>\n";
}
# ===========================================================================
# parseRFC822head(): Breaks out the headers passed in $head into a %headers
# indexed by a lower-cased keyword. Returns nothing.
#
# This routine is (mostly) from Gene Spafford's <spaf@cs.purdue.EDU>
# ParseMailHeader() routine in the MailStuff package.
#
sub parseRFC822head
{
local($head, *headers) = @_;
return if (!$head);
local($save1, $keyw, $val, @array);
$save1 = ($* || 0);
$* = 1;
$_ = $head;
s/\r//g;
s/\n\s+/ /g;
@array = split('\n');
foreach $_ (@array)
{
($keyw, $val) = m/^([^:]+):\s*(.*\S)\s*$/g;
$keyw =~ tr/A-Z/a-z/;
if (defined($headers{$keyw}))
{
$headers{$keyw} .= ", $val";
}
else
{
$headers{$keyw} = $val;
}
}
$* = $save1;
}
1;