nntplib.pl
Jack Shirazi - BIU (js@bison.lif.icnet.uk)
Mon, 26 Sep 94 16:47:46 BST
# $Id: nntplib.pl
# ---------------------------------------------------------------------------
# nntplib: A package for handling NNTP interactions.
#
# This package has been developed by Jack Shirazi <js@biu.icnet.uk>
#
# 23 Sep 1994 (JS) : Initial version
#
# ===========================================================================
#
# NOTE all public routines (except connect_to_nntp) are checked to see
# that there is a current connection open. If there is not, then the current
# process is terminated with an error message. The moral is that you should
# always start with connect_to_nntp, and test for its success (and you
# can close the connection and reopen it or a new one, but don't call
# any other routine in between closing the last and opening the next).
# Only one connection can be open at a time.
#
#
#
# Public subroutines:
#
# nntplib'connect_to_nntp([HOST [,PORT]])
# All args optional. Sets up a connection to the news server. If HOST (and PORT)
# are given, uses those, else PORT defaults to 119, and HOST is tried to be
# obtained from environment variable NNTPSERVER, or if not present, then as
# a last hope, sees if there is a host called "news" available.
# Returns 1 on success, negative as follows on failure:
# -1 No host specified or obtainable from environment
# -2 Host unresolvable
# -3 Unable to create a socket
# -4 Unable to make connection to host:port
# -5 Inappropriate response code received from server
#
#
# nntplib'get_article(MESSAGE_ID)
# nntplib'get_head(MESSAGE_ID)
# nntplib'get_body(MESSAGE_ID)
# Given the message id of an article, these routines retrieve the head,
# body or all of the article. get_article returns the head and body
# as a two string array, the other two return the head/body as a string
# respectively. Failure returns empty array or false appropriately.
# These do not require a current newsgroup. The message ids are article
# specific.
#
# nntplib'list_newsgroups
# Returns a list or string (depending on context) of all the newsgroups
# available on this server. Returns undef or empty array on failure.
# This could be big, and take a while. Format of each line is
# groupname first_article_num last_article_num moderated_status_bool(y/m)
#
# nntplib'set_current_group(GROUP_NAME)
# Sets the current group to GROUP_NAME. Returns empty array on failure, on
# success returns an array of three elements consisting of:
# 1. The estimated number of articles in group;
# 2. First article number in the group;
# 3. Last article number in the group.
#
# nntplib'get_header_on_range(HEADER_STRING,START_RANGE,END_RANGE)
# Gets the lines from headers corresponding to HEADER from all articles
# in the range START_RANGE to END_RANGE inclusive. Returns as array
# or string depending on context. Returns undef or empty array on failure.
# You need to have set a current newsgroup for this to work. If it is not,
# then the current process is terminated with an error message.
#
# nntplib'headers_from_range1(START_RANGE,END_RANGE,LIST_OF_HEADERS)
# nntplib'headers_from_range2(START_RANGE,END_RANGE,LIST_OF_HEADERS)
# These two subroutines (should) do the same thing: return a list
# of headers for each article in the range. They are optimized to
# use different amounts of message sends to the server. The first
# can only be used if the command XHDR is available from the server
# - test this with &nntplib'supports_command("XHDR"). It gets the
# server to do the grepping for the headers, and only transfers the
# results back. For many headers on just a few files, or where XHDR
# is not available, the second option should be used - this transfers
# every header across one by one, and they are processed at this end
# You need to have set a current newsgroup for this to work. If it is not,
# then the current process is terminated with an error message.
#
#
# nntplib'close_connection([BOOL])
# Closes the connection. All state information is dumped. Takes an optional
# boolean - give it a true argument if you want to be polite and wait for
# the closing response from the server.
#
#
#
#
# Private subroutines:
#
# nntplib'initialize
# Initializes various globals. No useable return value.
#
# nntplib'get_netaddress(STRING)
# Assumes STRING is some sort of Internet hostname (dibble.dob, or 3.2.3.4)
# Returns the 4 byte ip address or undef
#
# nntplib'get_article_part(MESSAGE_ID,RETREIVAL_TYPE,BOOL)
# Used by get_article, get_head, get_body. RETREIVAL_TYPE is one of
# "ARTICLE", "BODY" or "HEAD". If BOOL is true, then this assumes
# the MESSAGE_ID is a number, and that a current group has been set.
# If BOOL is true, you need to have set a current newsgroup for this
# to work. If it is not, then the current process is terminated with
# an error message. Requests an article from the server, but doesn't
# read anything except the initial status line. Returns true if
# there is lines to be read, false if the status returns no good.
#
# nntplib'read_to_dot
# Reads all the lines from the server until there is a dot on its
# own in one line. Folds doubled dots back to single ones. Returns
# a string or array of lines according to context.
#
# nntplib'read_to_blank_line
# Reads all the lines from the server until there is a blank line on
# its own in one line. Folds doubled dots back to single ones. Returns
# a string or array of lines according to context.
#
# nntplib'read_to_pattern(PATTERN)
# Reads all the lines from the server until there is a line
# matching PATTERN. Folds doubled dots back to single ones. Returns
# a string or array of lines according to context. NOTE that
# it may be problematical to include a "/" character in pattern.
# (try replacing it with "\/", and use single quotes).
#
# nntplib'get_help
# Gets the help message from the server, or from the help cache
# if its been obtained before
#
# nntplib'supports_command(COMMAND)
# Checks in the help message whether COMMAND is supported. Caches
# the results of the check, so after the first check its further
# checks are efficient.
#
# nntplib'check_connected
# Checks that there is currently a connection - if there isn't, the current
# process is terminated with an error message.
#
# nntplib'nil
# Returns undef or empty array depending on context.
#
#Need the sys/socket.ph file. I assume that the file "www_sys_socket.ph"
#references all the necessary items from there.
require "www_sys_socket.ph";
package nntplib;
&initialize;
sub initialize {
close NNTP_SERVER;
$Connected = 0;
$Posting_allowed = 0;
$Help = "";
undef %Supports_Command;
$Host = "";
$Port = 0;
$Debug = 0;
$Last_Server_Status = "";
$Current_group = "";
}
sub get_netaddress {
local($str) = @_;
local(@temp) = gethostbyname($str);
local($return);
if (@temp) {
$return = $temp[4];
} elsif ($str =~ /^(\d+).(\d+).(\d+).(\d+)$/ &&
$1 < 256 && $2 < 256 && $3 < 256 && $4 < 256) {
$return = pack("C4",$1,$2,$3,$4);
} else {
$return = undef;
}
$return;
}
#Returns 1 on success.
#Falure returns:
# -1 No host specified or obtainable from environment
# -2 Host unresolvable
# -3 Unable to create a socket
# -4 Unable to make connection to host:port
# -5 Inappropriate response code received from server
sub connect_to_nntp {
($Host,$Port) = @_;
local($ip_addr,$sock_addr,$line);
if ( $Port <= 0 ) { $Port = 119; }; # The default NNTP port is 119
$Host || ($Host = $ENV{'NNTPSERVER'}) ||
(($Host) = gethostbyname('newshost')) ||
(($Host) = gethostbyname('news')) || return -1;
($ip_addr = &get_netaddress($Host)) || return -2;
$sock_addr = pack('S n a4 x8', &main'AF_INET, $Port, $ip_addr);
socket(NNTP_SERVER, &main'PF_INET, &main'SOCK_STREAM, 0) || return -3;
connect(NNTP_SERVER,$sock_addr) || return -4;
select((select(NNTP_SERVER),$|=1)[0]);
$line = <NNTP_SERVER>;
$Last_Server_Status = $line;
if ( $line !~ /^20(\d)/ ) {
close NNTP_SERVER;
return -5;
}
$Posting_allowed = $1 ? 0 : 1;
$Connected = 1;
1;
}
sub get_article {
local($head,$body);
&get_article_part($_[0],"ARTICLE") || return ();
$head = &read_to_blank_line;
$body = &read_to_dot;
($head,$body);
}
sub get_head {
&get_article_part($_[0],"HEAD") ? &read_to_dot : "";
}
sub get_body {
&get_article_part($_[0],"BODY") ? &read_to_dot : "";
}
#$type should be ARTICLE,BODY or HEAD
sub get_article_part {
local($message_id,$type,$group_specific) = @_;
&check_connected('get_article_part');
if ($group_specific) {
&check_in_group("get_article_part");
} else {
($message_id =~ /^<.*>$/) || ($message_id = "<" . $message_id . ">");
}
local($line);
print NNTP_SERVER "$type $message_id\r\n";
$line = <NNTP_SERVER>;
$Last_Server_Status = $line;
$line =~ /^22[0-2]/
}
sub read_to_dot {&read_to_pattern('^\.\r\n$')}
sub read_to_blank_line {&read_to_pattern('^\s*\r\n$')}
#Assumes pattern does not have a "/" character in it.
#If you want to include a "/" character, replace it with
#"\\/" or '\/'
sub read_to_pattern {
local($pattern) = @_;
local($read,@read);
&check_connected('read_to_pattern');
if (wantarray) {
while (<NNTP_SERVER>) {
print $_ if $Debug;
# m/^\.\r\n$/ && last; #to dot
# m/^\s*\r\n$/ && last; #to blank line
m/$pattern/ && last;
s/^\.\./\./;
push(@read,$_);
}
return @read;
} else {
while (<NNTP_SERVER>) {
print $_ if $Debug;
m/$pattern/ && last;
s/^\.\./\./;
$read .= $_;
}
}
$read;
}
#On success returns an array consisting of
#1. The estimated number of articles in group;
#2. First article number in the group;
#3. Last article number in the group.
#On failure, returns an empty array.
sub set_current_group {
local($group) = @_;
&check_connected('set_current_group');
print NNTP_SERVER "GROUP $group\r\n";
local($line);
$line = <NNTP_SERVER>;
$Last_Server_Status = $line;
($line =~ /^211 (\d+) (\d+) (\d+)/) || return ();
$Current_group = $group;
($1,$2,$3);
}
sub list_newsgroups {
local($line);
&check_connected('list_newsgroups');
print NNTP_SERVER "LIST\r\n";
$line = <NNTP_SERVER>;
$Last_Server_Status = $line;
$line =~ /^215/ || return &nil;
&read_to_dot;
}
#takes one optional arg - if true, then will wait to read the
#terminating response from the server (being polite).
sub close_connection {
&check_connected('close_connection');
print NNTP_SERVER "QUIT\r\n";
$_[0] && ($_ = <NNTP_SERVER>); #I don't really care what the response is.
&initialize;
}
sub get_help {
&check_connected('get_help');
print NNTP_SERVER "HELP\r\n";
$_ = <NNTP_SERVER>;
$Last_Server_Status = $_;
/^100/ || return &nil;
$Help = &read_to_dot;
}
sub supports_command {
local($command) = @_;
$Supports_Command{$command} && return 1;
$Help || &get_help;
($Help =~ /$command/i) ? ($Supports_Command{$command} = 1) : 0;
}
sub get_header_on_range {
local($header,$start,$end) = @_;
&check_connected('get_header_on_range');
&check_in_group("get_header_on_range");
&supports_command("XHDR") || return &nil;
print NNTP_SERVER "XHDR $header $start-$end\r\n";
$_ = <NNTP_SERVER>;
$Last_Server_Status = $_;
/^221/ || return &nil;
&read_to_dot;
}
#use this for bigger range and smaller number of headers
sub headers_from_range1 {
local($start,$end,@headers) = @_;
local($num,$read_headers,@read_headers,%read_headers);
&supports_command("XHDR") || return ();
&check_in_group("headers_from_range1");
foreach $header (@headers) {
@read_headers = &get_header_on_range($header,$start,$end);
foreach (@read_headers) {
chop; chop;
($num,$read_headers) = split(/ /,$_,2);
$read_headers{$num} .= $read_headers . "\0";
}
}
%read_headers;
}
#use this for smaller range and bigger number of headers
sub headers_from_range2 {
local($start,$end,@headers) = @_;
local($read_headers,%read_headers);
&check_in_group("headers_from_range2");
$* = 1;
foreach $num ($start .. $end) {
$read_headers = &get_article_part($num,"HEAD",1);
$read_headers =~ tr/\r//d;
$read_headers =~ s/\n[\t ]+/ /;
foreach $header (@headers) {
($read_headers =~ /^$header: (.*)$/) && ($read_headers{$num} .= $1 . "\0");
}
}
$* = 0;
%read_headers;
}
sub check_connected {
$Connected || (close NNTP_SERVER,
die "nntplib'",$_[0],": Error - not connected to server\n");
}
sub check_in_group {
$Current_group || (close NNTP_SERVER,
die "nntplib'",$_[0],": Error - group specific function used with no current group set\n");
}
sub nil {wantarray ? () : undef}
1;
__END__
To do:
POST
NEWNEWS
NEWGROUPS
LIST parsing
Moderation?
Internal logic about what has been read, etc.