nntplib.pl

Jack Shirazi - BIU (js@bison.lif.icnet.uk)
Mon, 26 Sep 94 16:47:46 BST


# $Id: nntplib.pl
# ---------------------------------------------------------------------------
# nntplib: A package for handling NNTP interactions.
#
# This package has been developed by Jack Shirazi <js@biu.icnet.uk>
#
# 23 Sep 1994 (JS) : Initial version 
#
# ===========================================================================
# 
# NOTE all public routines (except connect_to_nntp) are checked to see 
# that there is a current connection open. If there is not, then the current
# process is terminated with an error message. The moral is that you should
# always start with connect_to_nntp, and test for its success (and you
# can close the connection and reopen it or a new one, but don't call
# any other routine in between closing the last and opening the next).
# Only one connection can be open at a time.
# 
# 
# 
# Public subroutines:
# 
# nntplib'connect_to_nntp([HOST [,PORT]])
#    All args optional. Sets up a connection to the news server. If HOST (and PORT)
#    are given, uses those, else PORT defaults to 119, and HOST is tried to be
#    obtained from environment variable NNTPSERVER, or if not present, then as
#    a last hope, sees if there is a host called "news" available.
#    Returns 1 on success, negative as follows on failure:
#         -1   No host specified or obtainable from environment
#         -2   Host unresolvable
#         -3   Unable to create a socket
#         -4   Unable to make connection to host:port
#         -5   Inappropriate response code received from server
# 
# 
# nntplib'get_article(MESSAGE_ID)
# nntplib'get_head(MESSAGE_ID)
# nntplib'get_body(MESSAGE_ID)
#    Given the message id of an article, these routines retrieve the head,
#    body or all of the article. get_article returns the head and body
#    as a two string array, the other two return the head/body as a string
#    respectively. Failure returns empty array or false appropriately.
#    These do not require a current newsgroup. The message ids are article
#    specific.
# 
# nntplib'list_newsgroups
#    Returns a list or string (depending on context) of all the newsgroups
#    available on this server. Returns undef or empty array on failure.
#    This could be big, and take a while. Format of each line is
#      groupname first_article_num last_article_num moderated_status_bool(y/m)
# 
# nntplib'set_current_group(GROUP_NAME)
#    Sets the current group to GROUP_NAME. Returns empty array on failure, on
#    success returns an array of three elements consisting of: 
#            1. The estimated number of articles in group;
#            2. First article number in the group;
#            3. Last article number in the group.
# 
# nntplib'get_header_on_range(HEADER_STRING,START_RANGE,END_RANGE)
#    Gets the lines from headers corresponding to HEADER from all articles
#    in the range START_RANGE to END_RANGE inclusive. Returns as array
#    or string depending on context. Returns undef or empty array on failure.
#    You need to have set a current newsgroup for this to work. If it is not,
#    then the current process is terminated with an error message.
# 
# nntplib'headers_from_range1(START_RANGE,END_RANGE,LIST_OF_HEADERS)
# nntplib'headers_from_range2(START_RANGE,END_RANGE,LIST_OF_HEADERS)
#    These two subroutines (should) do the same thing: return a list
#    of headers for each article in the range. They are optimized to
#    use different amounts of message sends to the server. The first
#    can only be used if the command XHDR is available from the server
#    - test this with &nntplib'supports_command("XHDR"). It gets the
#    server to do the grepping for the headers, and only transfers the
#    results back. For many headers on just a few files, or where XHDR
#    is not available, the second option should be used - this transfers
#    every header across one by one, and they are processed at this end
#    You need to have set a current newsgroup for this to work. If it is not,
#    then the current process is terminated with an error message.
# 
# 
# nntplib'close_connection([BOOL])
#    Closes the connection. All state information is dumped. Takes an optional
#    boolean - give it a true argument if you want to be polite and wait for
#    the closing response from the server.
# 
# 
# 
# 
# Private subroutines:
# 
# nntplib'initialize
#    Initializes various globals. No useable return value.
# 
# nntplib'get_netaddress(STRING)
#    Assumes STRING is some sort of Internet hostname (dibble.dob, or 3.2.3.4)
#    Returns the 4 byte ip address or undef
# 
# nntplib'get_article_part(MESSAGE_ID,RETREIVAL_TYPE,BOOL)
#    Used by get_article, get_head, get_body. RETREIVAL_TYPE is one of
#    "ARTICLE", "BODY" or "HEAD". If BOOL is true, then this assumes
#    the MESSAGE_ID is a number, and that a current group has been set.
#    If BOOL is true, you need to have set a current newsgroup for this
#    to work. If it is not, then the current process is terminated with
#    an error message. Requests an article from the server, but doesn't
#    read anything except the initial status line. Returns true if
#    there is lines to be read, false if the status returns no good.
# 
# nntplib'read_to_dot
#    Reads all the lines from the server until there is a dot on its
#    own in one line. Folds doubled dots back to single ones. Returns
#    a string or array of lines according to context.
# 
# nntplib'read_to_blank_line
#    Reads all the lines from the server until there is a blank line on
#    its own in one line. Folds doubled dots back to single ones. Returns
#    a string or array of lines according to context.
# 
# nntplib'read_to_pattern(PATTERN)
#    Reads all the lines from the server until there is a line
#    matching PATTERN. Folds doubled dots back to single ones. Returns
#    a string or array of lines according to context. NOTE that
#    it may be problematical to include a "/" character in pattern.
#    (try replacing it with "\/", and use single quotes).
#
# nntplib'get_help
#    Gets the help message from the server, or from the help cache
#    if its been obtained before
# 
# nntplib'supports_command(COMMAND)
#    Checks in the help message whether COMMAND is supported. Caches
#    the results of the check, so after the first check its further
#    checks are efficient.
# 
# nntplib'check_connected
#    Checks that there is currently a connection - if there isn't, the current
#    process is terminated with an error message.
# 
# nntplib'nil
#    Returns undef or empty array depending on context.
# 

#Need the sys/socket.ph file. I assume that the file "www_sys_socket.ph"
#references all the necessary items from there.
require "www_sys_socket.ph";

package nntplib;

&initialize;

sub initialize {
    close NNTP_SERVER;
    $Connected = 0;
    $Posting_allowed = 0;
    $Help = "";
    undef %Supports_Command;
    $Host = "";
    $Port = 0;
    $Debug = 0;
    $Last_Server_Status = "";
    $Current_group = "";
}

sub get_netaddress {
    local($str) = @_;
    local(@temp) = gethostbyname($str);
    local($return);
    if (@temp) {
        $return = $temp[4];
    } elsif ($str =~ /^(\d+).(\d+).(\d+).(\d+)$/ && 
                $1 < 256 && $2 < 256 && $3 < 256 && $4 < 256) {
        $return = pack("C4",$1,$2,$3,$4);
    } else {
        $return = undef;
    }
    $return;
}

#Returns 1 on success.
#Falure returns:
# -1   No host specified or obtainable from environment
# -2   Host unresolvable
# -3   Unable to create a socket
# -4   Unable to make connection to host:port
# -5   Inappropriate response code received from server
sub connect_to_nntp {
    ($Host,$Port) = @_;
    local($ip_addr,$sock_addr,$line);

    if ( $Port <= 0 ) { $Port = 119; };   # The default NNTP port is 119

    $Host || ($Host = $ENV{'NNTPSERVER'}) || 
       (($Host) = gethostbyname('newshost')) ||
       (($Host) = gethostbyname('news')) || return -1;

    ($ip_addr = &get_netaddress($Host)) || return -2;

    $sock_addr = pack('S n a4 x8', &main'AF_INET, $Port, $ip_addr);
    socket(NNTP_SERVER, &main'PF_INET, &main'SOCK_STREAM, 0) || return -3;

    connect(NNTP_SERVER,$sock_addr) || return -4;

    select((select(NNTP_SERVER),$|=1)[0]);

    $line = <NNTP_SERVER>;
    $Last_Server_Status = $line;
    if ( $line !~ /^20(\d)/ ) {
         close NNTP_SERVER;
         return -5;
    }
    $Posting_allowed = $1 ? 0 : 1;
    $Connected = 1;

    1;
}


sub get_article {
    local($head,$body);
    &get_article_part($_[0],"ARTICLE") || return ();
    $head = &read_to_blank_line;
    $body = &read_to_dot;
    ($head,$body);
}

sub get_head    {
    &get_article_part($_[0],"HEAD") ? &read_to_dot : "";
}

sub get_body    {
    &get_article_part($_[0],"BODY") ? &read_to_dot : "";
}

#$type should be ARTICLE,BODY or HEAD
sub get_article_part {
    local($message_id,$type,$group_specific) = @_;
    &check_connected('get_article_part');
    if ($group_specific) {
        &check_in_group("get_article_part");
    } else { 
        ($message_id =~ /^<.*>$/) || ($message_id = "<" . $message_id . ">");
    }
    local($line);
    print NNTP_SERVER "$type $message_id\r\n";
    $line = <NNTP_SERVER>;
    $Last_Server_Status = $line;
    $line =~ /^22[0-2]/
}

sub read_to_dot {&read_to_pattern('^\.\r\n$')}
sub read_to_blank_line {&read_to_pattern('^\s*\r\n$')}

#Assumes pattern does not have a "/" character in it.
#If you want to include a "/" character, replace it with
#"\\/" or '\/'
sub read_to_pattern {
    local($pattern) = @_;
    local($read,@read);
    &check_connected('read_to_pattern');
    if (wantarray) {
        while (<NNTP_SERVER>) {
            print $_ if $Debug;
#            m/^\.\r\n$/ && last;  #to dot
#            m/^\s*\r\n$/ && last;  #to blank line
            m/$pattern/ && last;
            s/^\.\./\./;
            push(@read,$_);
        }
        return @read;
    } else {
        while (<NNTP_SERVER>) {
            print $_ if $Debug;
            m/$pattern/ && last;
            s/^\.\./\./;
            $read .= $_;
        }
    }
    $read;
}

#On success returns an array consisting of 
#1. The estimated number of articles in group;
#2. First article number in the group;
#3. Last article number in the group.
#On failure, returns an empty array.
sub set_current_group {
    local($group) = @_;
    &check_connected('set_current_group');
    print NNTP_SERVER "GROUP $group\r\n";
    local($line);
    $line = <NNTP_SERVER>;
    $Last_Server_Status = $line;
    ($line =~ /^211 (\d+) (\d+) (\d+)/) || return ();
    $Current_group = $group;
    ($1,$2,$3);
}


sub list_newsgroups {
    local($line);
    &check_connected('list_newsgroups');
    print NNTP_SERVER "LIST\r\n";
    $line = <NNTP_SERVER>;
    $Last_Server_Status = $line;
    $line =~ /^215/ || return &nil;
    &read_to_dot;
}

#takes one optional arg - if true, then will wait to read the
#terminating response from the server (being polite).
sub close_connection {
    &check_connected('close_connection');
    print NNTP_SERVER "QUIT\r\n";
    $_[0] && ($_ = <NNTP_SERVER>); #I don't really care what the response is.
    &initialize;
}

sub get_help {
    &check_connected('get_help');
    print NNTP_SERVER "HELP\r\n";
    $_ = <NNTP_SERVER>;
    $Last_Server_Status = $_;
    /^100/ || return &nil;
    $Help = &read_to_dot;
}

sub supports_command {
    local($command) = @_;
    $Supports_Command{$command} && return 1;
    $Help || &get_help;
    ($Help =~ /$command/i) ? ($Supports_Command{$command} = 1) : 0;
}

sub get_header_on_range {
    local($header,$start,$end) = @_;
    &check_connected('get_header_on_range');
    &check_in_group("get_header_on_range");
    &supports_command("XHDR") || return &nil;
    print NNTP_SERVER "XHDR $header $start-$end\r\n";
    $_ = <NNTP_SERVER>;
    $Last_Server_Status = $_;
    /^221/ || return &nil;
    &read_to_dot;
}

#use this for bigger range and smaller number of headers
sub headers_from_range1 {
    local($start,$end,@headers) = @_;
    local($num,$read_headers,@read_headers,%read_headers);
    &supports_command("XHDR") || return ();
    &check_in_group("headers_from_range1");
    foreach $header (@headers) {
        @read_headers = &get_header_on_range($header,$start,$end);
        foreach (@read_headers) {
	    chop; chop;
            ($num,$read_headers) = split(/ /,$_,2);
	    $read_headers{$num} .= $read_headers . "\0";
        }
    }
    %read_headers;
}

#use this for smaller range and bigger number of headers
sub headers_from_range2 {
    local($start,$end,@headers) = @_;
    local($read_headers,%read_headers);
    &check_in_group("headers_from_range2");
    $* = 1;
    foreach $num ($start .. $end) {
        $read_headers = &get_article_part($num,"HEAD",1);
        $read_headers =~ tr/\r//d;
        $read_headers =~ s/\n[\t ]+/ /;
        foreach $header (@headers) {
            ($read_headers =~ /^$header: (.*)$/) && ($read_headers{$num} .= $1 . "\0");
        }
    }
    $* = 0;
    %read_headers;
}



sub check_connected {
    $Connected || (close NNTP_SERVER,
        die "nntplib'",$_[0],": Error - not connected to server\n");
}

sub check_in_group {
    $Current_group || (close NNTP_SERVER,
        die "nntplib'",$_[0],": Error - group specific function used with no current group set\n");
}

sub nil {wantarray ? () : undef}

1;

__END__
To do:
POST
NEWNEWS
NEWGROUPS
LIST parsing
Moderation?
Internal logic about what has been read, etc.