wwwgopher.pl

Brian T. Shelden (shelden@spoke.law.cornell.edu)
Mon, 17 Oct 1994 14:24:52 -0400


 
# $Id: wwwgopher.pl,v 0.10 1994/10/17 18:12:34 shelden Exp $
# ---------------------------------------------------------------------------
# wwwgopher: A package for sending gopher requests and handling responses 
#	   as if in http.  This package is designed for use by www.pl
#          for handling URL's with the "gopher" scheme designator.
#
# This package is heavily based on the wwwhttp package in libwww-perl.
#
#					--Brian T. Shelden
#					  Legal Information Institute
#					  Cornell Law School
#
#					  bts1@cornell.edu
#					  http://www.law.cornell.edu/~shelden/
#					  14 October 1994
#
# ---------------------------------------------------------------------------
require "wwwerror.pl";
require "sys/socket.ph";

package wwwgopher;

%AllowedMethods = (     # Specify what GOPHER request methods are supported
    'GET',        1,    # 1 = Allowed without content in request
    'HEAD',       1,
#    'POST',       2,    # 2 = Allowed and with content in request
#    'PUT',        2,
#    'DELETE',     1,    # 0 = Not allowed (same as undefined)
#    'LINK',       1,
#    'UNLINK',     1,
#    'CHECKIN',    2,
#    'CHECKOUT',   1,
#    'SHOWMETHOD', 1,
);

#
# Setup the socket parameters for this process
#

$SockAddr = 'S n a4 x8';
chop($ThisHost = `hostname`);
if (!$ThisHost) { die "Can't get hostname of this host, stopped"; }
($name, $aliases, $Proto) = getprotobyname("tcp");
($name, $aliases, $addrtype, $len, $ThisAddr) = gethostbyname($ThisHost);
$ThisSock = pack($SockAddr, &main'AF_INET, 0, $ThisAddr);



# ===========================================================================
# request(): perform an gopher request for the $object at the GOPHER server
#            on the specified $host and $port, giving up after $timeout secs.
#            Return the HTTP response code along with (as named parameters)
#            the parsed response %headers and document $content.
#
# This is a vastly modified version of Oscar's http'get() dated 28/3/94 in
#      <ftp://cui.unige.ch/PUBLIC/oscar/scripts/http.pl>
# including contributions from Marc van Heyningen and Martijn Koster.
#


# It should be easy to add stuff delivered via gopher-protocol, like
# 'I'.  Need wwwcso.pl for cso indexes, really.  I wonder why
# we don't have cso://host/ ?

%supported = (
	'1',	'menu',
	'0',	'file',
	'h',	'html file',
	'9',	'binary file',
);

%gopher2mime = (
	'0',	'text/plain',
	'h',	'text/html',
	'1',	'text/html',
	'9',	'application/octet-stream', 
	'I',	'image/gif',		# ???  Add more if you want to
					# Support them.
);

sub request
{
    local($method, $host, $port, $object, *headers, *content, $timeout) = @_;
    local($fqdn, $aliases, $addrtype, $len, $thataddr);
    local($reqstr, $hd, $val);
    local($gophertype);

    local($response) = 0;
    local($resphead) = '';

    if (!$AllowedMethods{$method})
    {
        return &wwwerror'onrequest($wwwerror'RC_bad_request_client, $method,
                          'gopher', $host, $port, $object, *headers, *content,
                          "GET is the only method that makes sense for gopher");
    }

    local($gophertype) = substr($object, $[, 1);
#warn "Object: \"$object\"...\n";
    if ($object eq "/") {
	# 'Tis a request for the top of the gopher tree: gopher://host/
	$gophertype = 1;
	$reqstr = '';
    }
    else {
	$gophertype = substr($object, $[ + 1, 1);
	$reqstr = $object;
	substr($reqstr, $[, 2) = '';
    }

#warn "$0: Is gophertype $gophertype supported?\n";
    if (! $supported{$gophertype}) {
	$response = $wwwerror'RC_not_implemented_client;
            return &wwwerror'onrequest($response, $method,
                         'gopher', $host, $port, $object, *headers, *content,
                         "Unsupported Gopher Type: $gophertype");
    }

    $reqstr = &wwwurl'unescape($reqstr);
    
#warn "Request string: $reqstr\n";

#    foreach $hd (keys(%headers))
    while(0)			# No headers in a gopher request...
    {
        if ($val = $headers{$hd})
        {
            $reqstr .= "$hd: $val\r\n";
        }
    }
    $reqstr .= "\r\n";

    if (!$port) { $port = 70; };   # The default gopher port is always 70

    if ($host =~ /^\d+\.\d+\.\d+\.\d+$/)
    {
        $thataddr = pack('c4', split(/\./, $host));
    }
    else
    {
        ($fqdn, $aliases, $addrtype, $len, $thataddr) = gethostbyname($host);
        if (!$fqdn)
        {
            return &wwwerror'onrequest($wwwerror'RC_connection_failed, $method,
                         'gopher', $host, $port, $object, *headers, *content,
                         "Cannot find hostname $host");
        }
    }

    $that = pack($SockAddr, &main'AF_INET, $port, $thataddr);
    if (!( socket(FS, &main'AF_INET, &main'SOCK_STREAM, $Proto) &&
           bind(FS, $ThisSock) ))
    {
        return &wwwerror'onrequest($wwwerror'RC_connection_failed, $method,
                         'gopher', $host, $port, $object, *headers, *content,
                         "Failed bind to our local socket: $!");
    }

    local($/);
    $run_it = <<'EOF';
        $SIG{'ALRM'} = "wwwgopher'timeout";
        $response = $wwwerror'RC_bad_request;
#        $headers{"x-gopher-error"} = "Nothing Available";
        alarm($timeout);
        connect(FS, $that) || die "Cannot connect to $host:$port, $! \n";
        select((select(FS), $| = 1)[0]); # Make FS unbuffered
        print FS $reqstr; 
        if ($AllowedMethods{$method} == 2) { print FS $content; }

        $/ = "\n";
        $_ = <FS>;

	local($msg, $errorpath, $errorhost, $errorport) = split("\t");
	substr($msg, $[, 1) = '';
        if (m:^3: || ($errorhost eq 'error.host')) 
        {
            $response = $wwwerror'RC_bad_request; # Probably could be more informative...
	    $headers{"x-gopher-error"} = $msg;
            undef($/);
            $content = <FS>;
        }
        else			        # Not an error?  
        {
            $response = $wwwerror'RC_ok; # I have no idea if it's good or not
	    $headers{'x-gopher-error'} = '';
            undef($/);
            $content = $_;
            $_ = <FS>;            
            $content .= $_;
        }
        $SIG{'ALRM'} = "IGNORE";
EOF

    eval $run_it;

    if ($@)
    {
        $SIG{'ALRM'} = "IGNORE";
        close(FS);
        if ($@ =~ /^Time/o) { $response = $wwwerror'RC_timed_out;         }
        else                { $response = $wwwerror'RC_connection_failed; }

        return &wwwerror'onrequest($response, $method, 'gopher', $host, $port,
                                   $object, *headers, *content, $@);
    }
    close(FS);

#warn "That dude was gopher type of $gophertype\n";
    if ($gophertype eq '1') {
	# Gopher Menu.  Convert to HTML
	$content = &menu2html($content);
    }

    # I couldn't get &wwwmime'fakehead to work for some reason, so 
    # here's my cheap hack...
    $headers{'date'} = &wwwdates'wtime(time, 'GMT');
    $headers{'mime-version'} = '1.0';
    $headers{'content-length'} = length($content);
    $headers{'content-type'} = $gopher2mime{$gophertype} || "text/plain";

    return $response;
}

sub timeout { die "Timed Out\n"; }


sub gopher2url {
    local($gophertype, $path, $host, $port) = @_;
    local($tmp, @tmp, $_);

    if ($gophertype eq '8') {
	# telnet session
	$tmp = join('', 'telnet://', $path ? "$path@" : '', $host, ':', $port);
    }
    elsif ($gophertype eq 'T') {
	# telnet session
	$tmp = join('', 'tn3270://', $path ? "$path@" : '', $host, ':', $port);
    }
    else {
#	if ($gophertype eq '1' ||
#	$gophertype eq '0' ||
#	$gophertype eq '2' ||
#	$gophertype eq 'h' 
#	) {
	# Notice that / is not escaped below...
	$path = &wwwurl'escape($path, '[\x00-\x20"#%;<>?\x7F-\xFF]');
	$tmp = join('', 'gopher://', $host, ($port == 70) ? ":$port" : ''
		, '/', $gophertype, $path);
   }
   return $tmp;
}

sub menu2html {
	local($menu) = @_;
	local($_, $type, $pretty, $path, $host, $port, $tmp, $url);

	$menu =~ s/\r//g;
	$tmp = <<"EOF";
gopher menu generated by menu2html() EOF for (split("\n", $menu)) { next if /^\./; $type = substr($_, $[, 1); ($pretty, $path, $host, $port) = split("\t"); substr($pretty, $[, 1) = ''; $url = &gopher2url($type, $path, $host, $port); $tmp .= <<"EOF"; $pretty
EOF } $tmp .= <<"EOF"; EOF return $tmp } 1;