wwwnews.pl

Jack Shirazi - BIU (js@bison.lif.icnet.uk)
Mon, 26 Sep 94 16:48:00 BST


# $Id: wwwnews.pl
# ---------------------------------------------------------------------------
# wwwnews: A package for sending NNTP requests and handling responses for the
#          World-Wide Web.  This package is designed for use by www.pl
#          for handling URL's with the "news" scheme designator.
#
# This package has been developed by Jack Shirazi <js@biu.icnet.uk>
# as part of the Arcadia project at the University of California, Irvine.
# Each routine in this package has been derived from the work of multiple
# authors -- those that are known are listed above the respective routines.
# The routines have been changed substantially, so don't blame them for bugs.
# It is distributed under the Artistic License (included with your Perl
# distribution files).
#
# 23 Sep 1994 (JS) : Initial version 
#
# If you have any suggestions, bug reports, fixes, or enhancements,
# send them to the libwww-perl mailing list at <libwww-perl@ics.uci.edu>.
# ---------------------------------------------------------------------------
# This script was developed using the wwwhttp.pl script in this library
# developed by Roy Fielding <fielding@ics.uci.edu> as a starting point.
# 
# Some of these routines are enhanced versions of those distributed by
# Oscar Nierstrasz <oscar@iam.unibe.ch> from IAM, University of Berne. 
# See <ftp://cui.unige.ch/PUBLIC/oscar/scripts/README.html> for more info.
# ===========================================================================
require "wwwerror.pl";
require "sys/socket.ph";
require "nntplib.pl";

package wwwnews;
$MAX_RANGE = 20;
%AllowedMethods = (     # Specify what NEWS request methods are supported
    'GET',        1,    # 1 = Allowed without content in request
    'HEAD',       0,
    'POST',       0,    # 2 = Allowed and with content in request
);


# ===========================================================================
# request(): perform a news request for the $object at the NNTP server
#            on the specified $host and $port, giving up after $timeout seconds.
#            Return the NNTP response code along with (as named parameters)
#            the parsed response %headers and document $content.
#
# This is a vastly modified version of Oscar's http'get() dated 28/3/94 in
#      <ftp://cui.unige.ch/PUBLIC/oscar/scripts/http.pl>
# including contributions from Marc van Heyningen and Martijn Koster.
#
sub request
{
    local($method, $host, $port, $object, *headers, *content, $timeout) = @_;
    local($fqdn, $aliases, $addrtype, $len, $thataddr);
    local($reqstr, $hd, $val);

    local($response) = 0;
    local($resphead) = '';

    if (!$AllowedMethods{$method})
    {
        return &wwwerror'onrequest($wwwerror'RC_bad_request_client, $method,
                          'news', $host, $port, $object, *headers, *content,
                          "Library does not allow that method for NEWS");
    }

    if (!$object) { #I suppose i could do a "LIST" on the server
                    #If there is no object. Right now I'll die.
        return &wwwerror'onrequest($wwwerror'RC_bad_request_client, $method,
                     'news', $host, $port, $object, *headers, *content,
                     "Library requires an object - a newsgroup or article");
    }

    
    #Three possibilities, a message id or a group name (or a *)
    local($range_flag,$r_start,$r_end,$group);
    if ($object =~ /^([^@]+@[^@]+)$/) {
        $reqstr = "\$content = &wwwnews'get_article('$timeout','$object','$host','$port','$1')";
    } elsif ($object =~ /\*/) {
        $reqstr = "\$content = &wwwnews'get_list('$timeout','$object','$host','$port');";
    } elsif ($object =~ m:^([^/]+)(/.*)?$:) {
        $group = $1;
        $range_flag = $2;
        if ($range_flag) {
            if ($range_flag =~ m:^/(\d+)-(\d+)$:) {$r_start = $1, $r_end = $2;}
            elsif ($range_flag =~ m:^/(\d+)-?$:) {$r_start = $1;}
        }
        $reqstr = "\$content = &wwwnews'get_group_range('$timeout','$MAX_RANGE','$object','$host','$port','$group','$r_start','$r_end');";
    } else {#oh dear, I don't recognize it
        return &wwwerror'onrequest($wwwerror'RC_bad_request_client, $method,
                     'news', $host, $port, $object, *headers, *content,
                     "Library does not recognize the object - should be a newsgroup or article");
    }

#    local($/);
    eval $reqstr;
    if ($@)
    {
        $SIG{'ALRM'} = "IGNORE";
        alarm(0);
        &nntplib'close_connection;
        if    ($@ =~ /^Time/o) { $response = $wwwerror'RC_timed_out;         }
        elsif ($@ =~ /^Couldn't connect to/o) { $response = $wwwerror'RC_connection_failed;}
        elsif ($@ =~ /^Group not vali/o) { $response = $wwwerror'RC_bad_response;      }
        elsif ($@ =~ /^Unable to retreive/o) { $response = $wwwerror'RC_bad_response;      }
        else                   { $response = $wwwerror'RC_connection_failed; }
        return &wwwerror'onrequest($response, $method, 'news', $host, $port,
                                   $object, *headers, *content, $@);
    }


    &parseRFC822head($resphead, *headers);

    return $response;
}

sub timed_out { die "Timed Out\n"; }

sub get_list {
    local($timeout,$object,$host,$port) = @_;
    local($i,$html,@groups);
    local($groupname,$first,$last,$moderated);
    
    $SIG{'ALRM'} = "wwwnews'timed_out";
    alarm($timeout);
    (&nntplib'connect_to_nntp($host,$port) > 0) || 
	die "Couldn't connect to news server\n";

    $timeout <<= 3;                  # Multiply timeout by 8 after 1st response
    alarm($timeout);
    @groups = &nntplib'list_newsgroups;
    alarm(0);

    @groups || die "Unable to retreive list of groups\n";

    $html = "<base href=\"news:$object\">\n<H1>Newsgroups</PRE>\n\n<MENU>\n\n";
    for ($i=0;$i <= $#groups; $i++) {
      ($groupname,$first,$last,$moderated) = split(/[ \t\n\r]+/,$groups[$i]);
      $html .= "<LI> <A HREF=\"news:$groupname\">$groupname ";
      $html .= ($first + 0) . "-" . ($last + 0);
      $html .= ($moderated =~ /y/) ? "\n</A>\n" : " moderated\n</A>\n";
    }
    $html .= "</MENU>\n";
    $html;
}

sub get_group_range {
    local($timeout,$max_range,$object,$host,$port,$group,$start,$end) = @_;
    local($a,$first,$last,$diff,$real_start,$real_end);
    local($one_before_start,$one_after_end,$html);
    $diff = ($max_range > 2) ? $max_range - 1 : 19 ;
    if ($end && $start) {
       ($end - $start > 0) || die "Wrong ranges\n";
       ($end - $start < $diff) && ($diff = $end - $start);
    }

    $SIG{'ALRM'} = "wwwnews'timed_out";
    alarm($timeout);
    (&nntplib'connect_to_nntp($host,$port) > 0) || 
	die "Couldn't connect to news server\n";

    $timeout <<= 2;                  # Quadruple timeout after 1st response
    alarm($timeout);
    (($a,$first,$last) = &nntplib'set_current_group($group)) || 
	die "Group not valid\n";

    if ($end) {
        if ( ($start > $last) ||  ($end < $first) ) { 
	    die "Out of range\n";
	} else {
	    $real_start = $start; $real_end = $real_start + $diff
        }
    } elsif ($start) {
        if ( ($start > $last) ) { 
	    die "Out of range\n";
        } else {
	    $real_start = $start; $real_end = $real_start + $diff}
    } else {$real_end = $last; $real_start = $real_end - $diff}

    alarm($timeout);
    %my_headers = &nntplib'supports_command("XHDR") ? 
	&nntplib'headers_from_range1($real_start,$real_end,"Lines","Subject","From","Message-id"):
	&nntplib'headers_from_range2($real_start,$real_end,"Lines","Subject","From","Message-id");
    alarm(0);

    &nntplib'close_connection;

    %my_headers || die "Unable to retreive any headers\n";

    $one_before_start = $real_start - 1;
    $one_after_end = $real_end + 1;


    $html = "<base href=\"news:$object\">\n";
    $html .= "<TITLE>Newsgroup $group,  Articles $real_start-$real_end</TITLE>\n";

    if ($real_start != $first) {
        $html .= "(<A HREF=\"news:$group/$first-$one_before_start\">Earlier articles</A>...)";
    }

    $html .= "\nArticles in comp.lang.perl\n<MENU>\n\n";

    foreach $num ($real_start .. $real_end) {
        ($lines,$subj,$from,$mess_id) = split(/\0/,$my_headers{$num});
        $lines || next;
        $mess_id =~ s/^<//;
        $mess_id =~ s/>$//;
        $html .= "<LI> <A HREF=\"news:$mess_id\">$num [$lines] \"$subj\" - $from</A>\n";
    }

    $html .= "</MENU>\n<P>\n";

    if ($real_end != $last) {
        $html .= "(<A HREF=\"news:$group/$one_after_end-$last\">Later articles</A>...)\n";
    }
    $html;
}

sub get_article {
    local($timeout,$object,$host,$port,$message_id) = @_;
    local($head,$body,%headers,@body,$html);

    $SIG{'ALRM'} = "wwwnews'timed_out";
    alarm($timeout);
    (&nntplib'connect_to_nntp($host,$port) > 0) || 
	die "Couldn't connect to news server\n";

    $timeout <<= 2;                  # Quadruple timeout after 1st response
    alarm($timeout);
    ($head,$body) = &nntplib'get_article($message_id);

    alarm(0);
    &nntplib'close_connection;

    $head || die "Unable to retreive the article\n";

    &parseRFC822head($head,*headers);

#Using $from, $subject, $date, $organization, @newsgroups, @references, $body

    $html = "<base href=\"news:$object\">\n<ADDRESS> ";
    $html .= $headers{'from'};
    $html .= "\n</ADDRESS>\n<TITLE> ";
    $html .= $headers{'subject'};
    $html .= "</TITLE>\n<ADDRESS><H1> ";
    $html .= $headers{'subject'};
    $html .= "</H1>\n</ADDRESS>\n<ADDRESS> ";
    $html .= $headers{'date'} . "\n " . $headers{'organization'};
    $html .= "\n</ADDRESS>\n\n<DL>\n\n<DT> Newsgroups:\n<DD> ";

    #process newsgroups
    foreach $newsgroup (split(/,/,$headers{'newsgroups'})) {
        $html .= "<A HREF=\"news:$newsgroup\">$newsgroup</A>";
    }
    $html .= "\n<DT> References:\n<DD> ";

    #process references
    foreach $reference (split(/,/,$headers{'references'})) {
        $reference =~ s/^<//;
        $reference =~ s/>$//;
        $html .= "<A HREF=\"news:$reference\"><$reference></A>";
    }
    $body =~ tr/\r//d;
#    $body =~ s/<([^@>< \n\t]+@[^@>< \n\t]+)>/<A HREF="$1"><$1><\/A>/g;
    $html .= "\n</DL>\n\n\n\n<PRE>$body\n</PRE>\n";
}




# ===========================================================================
# parseRFC822head(): Breaks out the headers passed in $head into a %headers
#                    indexed by a lower-cased keyword.  Returns nothing.
#
# This routine is (mostly) from Gene Spafford's <spaf@cs.purdue.EDU>
# ParseMailHeader() routine in the MailStuff package.
#
sub parseRFC822head
{
    local($head, *headers) =  @_;
    return if (!$head);

    local($save1, $keyw, $val, @array);

    $save1 = ($* || 0);
    $* = 1;
    $_ = $head;
    s/\r//g;
    s/\n\s+/ /g;
       
    @array = split('\n');
    foreach $_ (@array)
    {
        ($keyw, $val) = m/^([^:]+):\s*(.*\S)\s*$/g;
        $keyw =~ tr/A-Z/a-z/;
        if (defined($headers{$keyw}))
        {
            $headers{$keyw} .= ", $val";
        }
        else
        {
            $headers{$keyw} = $val;
        }
    }
    $* = $save1;
}


1;