# $Id: nntplib.pl $ # --------------------------------------------------------------------------- # nntplib: A package for handling NNTP interactions. # # This package has been developed by Jack Shirazi # # 23 Sep 1994 (JS) : Initial version # # =========================================================================== # # NOTE all public routines (except connect_to_nntp) are checked to see # that there is a current connection open. If there is not, then the current # process is terminated with an error message. The moral is that you should # always start with connect_to_nntp, and test for its success (and you # can close the connection and reopen it or a new one, but don't call # any other routine in between closing the last and opening the next). # Only one connection can be open at a time. # # # # Public subroutines: # # nntplib'connect_to_nntp([HOST [,PORT]]) # All args optional. Sets up a connection to the news server. If HOST (and PORT) # are given, uses those, else PORT defaults to 119, and HOST is tried to be # obtained from environment variable NNTPSERVER, or if not present, then as # a last hope, sees if there is a host called "news" available. # Returns 1 on success, negative as follows on failure: # -1 No host specified or obtainable from environment # -2 Host unresolvable # -3 Unable to create a socket # -4 Unable to make connection to host:port # -5 Inappropriate response code received from server # # # nntplib'get_article(MESSAGE_ID) # nntplib'get_head(MESSAGE_ID) # nntplib'get_body(MESSAGE_ID) # Given the message id of an article, these routines retrieve the head, # body or all of the article. get_article returns the head and body # as a two string array, the other two return the head/body as a string # respectively. Failure returns empty array or false appropriately. # These do not require a current newsgroup. The message ids are article # specific. # # nntplib'list_newsgroups # Returns a list or string (depending on context) of all the newsgroups # available on this server. Returns undef or empty array on failure. # This could be big, and take a while. Format of each line is # groupname first_article_num last_article_num moderated_status_bool(y/m) # # nntplib'set_current_group(GROUP_NAME) # Sets the current group to GROUP_NAME. Returns empty array on failure, on # success returns an array of three elements consisting of: # 1. The estimated number of articles in group; # 2. First article number in the group; # 3. Last article number in the group. # # nntplib'get_header_on_range(HEADER_STRING,START_RANGE,END_RANGE) # Gets the lines from headers corresponding to HEADER from all articles # in the range START_RANGE to END_RANGE inclusive. Returns as array # or string depending on context. Returns undef or empty array on failure. # You need to have set a current newsgroup for this to work. If it is not, # then the current process is terminated with an error message. # # nntplib'headers_from_range1(START_RANGE,END_RANGE,LIST_OF_HEADERS) # nntplib'headers_from_range2(START_RANGE,END_RANGE,LIST_OF_HEADERS) # These two subroutines (should) do the same thing: return a list # of headers for each article in the range. They are optimized to # use different amounts of message sends to the server. The first # can only be used if the command XHDR is available from the server # - test this with &nntplib'supports_command("XHDR"). It gets the # server to do the grepping for the headers, and only transfers the # results back. For many headers on just a few files, or where XHDR # is not available, the second option should be used - this transfers # every header across one by one, and they are processed at this end # You need to have set a current newsgroup for this to work. If it is not, # then the current process is terminated with an error message. # # # nntplib'close_connection([BOOL]) # Closes the connection. All state information is dumped. Takes an optional # boolean - give it a true argument if you want to be polite and wait for # the closing response from the server. # # # # # Private subroutines: # # nntplib'initialize # Initializes various globals. No useable return value. # # nntplib'get_netaddress(STRING) # Assumes STRING is some sort of Internet hostname (dibble.dob, or 3.2.3.4) # Returns the 4 byte ip address or undef # # nntplib'get_article_part(MESSAGE_ID,RETREIVAL_TYPE,BOOL) # Used by get_article, get_head, get_body. RETREIVAL_TYPE is one of # "ARTICLE", "BODY" or "HEAD". If BOOL is true, then this assumes # the MESSAGE_ID is a number, and that a current group has been set. # If BOOL is true, you need to have set a current newsgroup for this # to work. If it is not, then the current process is terminated with # an error message. Requests an article from the server, but doesn't # read anything except the initial status line. Returns true if # there is lines to be read, false if the status returns no good. # # nntplib'read_to_dot # Reads all the lines from the server until there is a dot on its # own in one line. Folds doubled dots back to single ones. Returns # a string or array of lines according to context. # # nntplib'read_to_blank_line # Reads all the lines from the server until there is a blank line on # its own in one line. Folds doubled dots back to single ones. Returns # a string or array of lines according to context. # # nntplib'read_to_pattern(PATTERN) # Reads all the lines from the server until there is a line # matching PATTERN. Folds doubled dots back to single ones. Returns # a string or array of lines according to context. NOTE that # it may be problematical to include a "/" character in pattern. # (try replacing it with "\/", and use single quotes). # # nntplib'get_help # Gets the help message from the server, or from the help cache # if its been obtained before # # nntplib'supports_command(COMMAND) # Checks in the help message whether COMMAND is supported. Caches # the results of the check, so after the first check its further # checks are efficient. # # nntplib'check_connected # Checks that there is currently a connection - if there isn't, the current # process is terminated with an error message. # # nntplib'nil # Returns undef or empty array depending on context. # #Need the sys/socket.ph file. I assume that the file "www_sys_socket.ph" #references all the necessary items from there. require "www_sys_socket.ph"; package nntplib; &initialize; sub initialize { close NNTP_SERVER; $Connected = 0; $Posting_allowed = 0; $Help = ""; undef %Supports_Command; $Host = ""; $Port = 0; $Debug = 0; $Last_Server_Status = ""; $Current_group = ""; } sub get_netaddress { local($str) = @_; local(@temp) = gethostbyname($str); local($return); if (@temp) { $return = $temp[4]; } elsif ($str =~ /^(\d+).(\d+).(\d+).(\d+)$/ && $1 < 256 && $2 < 256 && $3 < 256 && $4 < 256) { $return = pack("C4",$1,$2,$3,$4); } else { $return = undef; } $return; } #Returns 1 on success. #Falure returns: # -1 No host specified or obtainable from environment # -2 Host unresolvable # -3 Unable to create a socket # -4 Unable to make connection to host:port # -5 Inappropriate response code received from server sub connect_to_nntp { ($Host,$Port) = @_; local($ip_addr,$sock_addr,$line); if ( $Port <= 0 ) { $Port = 119; }; # The default NNTP port is 119 $Host || ($Host = $ENV{'NNTPSERVER'}) || (($Host) = gethostbyname('newshost')) || (($Host) = gethostbyname('news')) || return -1; ($ip_addr = &get_netaddress($Host)) || return -2; $sock_addr = pack('S n a4 x8', &main'AF_INET, $Port, $ip_addr); socket(NNTP_SERVER, &main'PF_INET, &main'SOCK_STREAM, 0) || return -3; connect(NNTP_SERVER,$sock_addr) || return -4; select((select(NNTP_SERVER),$|=1)[0]); $line = ; $Last_Server_Status = $line; if ( $line !~ /^20(\d)/ ) { close NNTP_SERVER; return -5; } $Posting_allowed = $1 ? 0 : 1; $Connected = 1; 1; } sub get_article { local($head,$body); &get_article_part($_[0],"ARTICLE") || return (); $head = &read_to_blank_line; $body = &read_to_dot; ($head,$body); } sub get_head { &get_article_part($_[0],"HEAD") ? &read_to_dot : ""; } sub get_body { &get_article_part($_[0],"BODY") ? &read_to_dot : ""; } #$type should be ARTICLE,BODY or HEAD sub get_article_part { local($message_id,$type,$group_specific) = @_; &check_connected('get_article_part'); if ($group_specific) { &check_in_group("get_article_part"); } else { ($message_id =~ /^<.*>$/) || ($message_id = "<" . $message_id . ">"); } local($line); print NNTP_SERVER "$type $message_id\r\n"; $line = ; $Last_Server_Status = $line; $line =~ /^22[0-2]/ } sub read_to_dot {&read_to_pattern('^\.\r\n$')} sub read_to_blank_line {&read_to_pattern('^\s*\r\n$')} #Assumes pattern does not have a "/" character in it. #If you want to include a "/" character, replace it with #"\\/" or '\/' sub read_to_pattern { local($pattern) = @_; local($read,@read); &check_connected('read_to_pattern'); if (wantarray) { while () { print $_ if $Debug; # m/^\.\r\n$/ && last; #to dot # m/^\s*\r\n$/ && last; #to blank line m/$pattern/ && last; s/^\.\./\./; push(@read,$_); } return @read; } else { while () { print $_ if $Debug; m/$pattern/ && last; s/^\.\./\./; $read .= $_; } } $read; } #On success returns an array consisting of #1. The estimated number of articles in group; #2. First article number in the group; #3. Last article number in the group. #On failure, returns an empty array. sub set_current_group { local($group) = @_; &check_connected('set_current_group'); print NNTP_SERVER "GROUP $group\r\n"; local($line); $line = ; $Last_Server_Status = $line; ($line =~ /^211 (\d+) (\d+) (\d+)/) || return (); $Current_group = $group; ($1,$2,$3); } sub list_newsgroups { local($line); &check_connected('list_newsgroups'); print NNTP_SERVER "LIST\r\n"; $line = ; $Last_Server_Status = $line; $line =~ /^215/ || return &nil; &read_to_dot; } #takes one optional arg - if true, then will wait to read the #terminating response from the server (being polite). sub close_connection { &check_connected('close_connection'); print NNTP_SERVER "QUIT\r\n"; $_[0] && ($_ = ); #I don't really care what the response is. &initialize; } sub get_help { &check_connected('get_help'); print NNTP_SERVER "HELP\r\n"; $_ = ; $Last_Server_Status = $_; /^100/ || return &nil; $Help = &read_to_dot; } sub supports_command { local($command) = @_; $Supports_Command{$command} && return 1; $Help || &get_help; ($Help =~ /$command/i) ? ($Supports_Command{$command} = 1) : 0; } sub get_header_on_range { local($header,$start,$end) = @_; &check_connected('get_header_on_range'); &check_in_group("get_header_on_range"); &supports_command("XHDR") || return &nil; print NNTP_SERVER "XHDR $header $start-$end\r\n"; $_ = ; $Last_Server_Status = $_; /^221/ || return &nil; &read_to_dot; } #use this for bigger range and smaller number of headers sub headers_from_range1 { local($start,$end,@headers) = @_; local($num,$read_headers,@read_headers,%read_headers); &supports_command("XHDR") || return (); &check_in_group("headers_from_range1"); foreach $header (@headers) { @read_headers = &get_header_on_range($header,$start,$end); foreach (@read_headers) { chop; chop; ($num,$read_headers) = split(/ /,$_,2); $read_headers{$num} .= $read_headers . "\0"; } } %read_headers; } #use this for smaller range and bigger number of headers sub headers_from_range2 { local($start,$end,@headers) = @_; local($read_headers,%read_headers); &check_in_group("headers_from_range2"); $* = 1; foreach $num ($start .. $end) { $read_headers = &get_article_part($num,"HEAD",1); $read_headers =~ tr/\r//d; $read_headers =~ s/\n[\t ]+/ /; foreach $header (@headers) { ($read_headers =~ /^$header: (.*)$/) && ($read_headers{$num} .= $1 . "\0"); } } $* = 0; %read_headers; } sub check_connected { $Connected || (close NNTP_SERVER, die "nntplib'",$_[0],": Error - not connected to server\n"); } sub check_in_group { $Current_group || (close NNTP_SERVER, die "nntplib'",$_[0],": Error - group specific function used with no current group set\n"); } sub nil {wantarray ? () : undef} 1; __END__ To do: POST NEWNEWS NEWGROUPS LIST parsing Moderation? Internal logic about what has been read, etc.