[PATCH] LWP 5.47, Keep-Alive, and HTTPS through a proxy server

WORENKLEIN, David, GCM (David.Worenklein@gcm.com)
Wed, 5 Apr 2000 16:12:22 -0400


------_=_NextPart_000_01BF9F3B.41E55A60
Content-Type: text/plain;
	charset="iso-8859-1"

I've merged Scott Gifford's "Keep-Alive" patch with my own "HTTPS through a
proxy server", and applied them to libwww-perl-5.47.  The resulting diff is
attached.

I've included Scott's original e-mail that describes his changes.  My
change is simple:  saying 
GET 'https://some.site'
will now cause LWP to connect to the proxy server, issue a "CONNECT
some.site::443 HTTP/1.0" request, and then pass SSL messages through the
resulting connection.

 <<libwww-perl-5.47.patch>>  <<Persistent connections patch for
libwww-perl>> 

**********************************************************************
This e-mail is intended only for the addressee named above.
As this e-mail may contain confidential or privileged information,
if you are not the named addressee, you are not authorised to
retain, read, copy or disseminate this message or any part of it.
************************************************************************
------_=_NextPart_000_01BF9F3B.41E55A60
Content-Type: application/octet-stream;
	name="libwww-perl-5.47.patch"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
	filename="libwww-perl-5.47.patch"

diff -c5 -r -w libwww-perl-5.47/lib/LWP/Protocol/http.pm =
libwww-perl-5.47-patch/lib/LWP/Protocol/http.pm
*** libwww-perl-5.47/lib/LWP/Protocol/http.pm	Thu Nov  4 15:31:21 1999
--- libwww-perl-5.47-patch/lib/LWP/Protocol/http.pm	Wed Apr  5 15:55:09 =
2000
***************
*** 18,39 ****
  				   # instance MacPerl defines it to "\012\015"
 =20
  sub _new_socket
  {
      my($self, $host, $port, $timeout) =3D @_;
 =20
      local($^W) =3D 0;  # IO::Socket::INET can be noisy
!     my $sock =3D IO::Socket::INET->new(PeerAddr =3D> $host,
  				     PeerPort =3D> $port,
  				     Proto    =3D> 'tcp',
  				     Timeout  =3D> $timeout,
  				    );
      unless ($sock) {
  	# IO::Socket::INET leaves additional error messages in $@
  	$@ =3D~ s/^.*?: //;
  	die "Can't connect to $host:$port ($@)";
      }
      $sock;
  }
 =20
 =20
  sub _check_sock
--- 18,60 ----
  				   # instance MacPerl defines it to "\012\015"
 =20
  sub _new_socket
  {
      my($self, $host, $port, $timeout) =3D @_;
+     my($sock);
 =20
+     if ($self->connection_cache_active())
+     {
+         if =
(defined($sock=3D$self->connection_cache_get($host,$port)))
+         {
+ 	    # Make an effort to make sure the connection is still there.
+             unless (IO::Select->new($sock)->has_exception(0))
+ 	    {
+                 LWP::Debug::debug("Using kept-alive connection to =
$host:$port");
+                 $self->{socket_from_cache}=3D1;
+                 return $sock;
+             }
+ 	    &LWP::Debug::debug("Exception occured on kept-alive connection =
to $host:$port; closing cached socket.");
+         }
+     }
+     $self->{socket_from_cache}=3Dundef;
      local($^W) =3D 0;  # IO::Socket::INET can be noisy
!     $sock =3D IO::Socket::INET->new(PeerAddr =3D> $host,
  				  PeerPort =3D> $port,
  				  Proto    =3D> 'tcp',
  				  Timeout  =3D> $timeout,
  				  );
      unless ($sock) {
  	# IO::Socket::INET leaves additional error messages in $@
  	$@ =3D~ s/^.*?: //;
  	die "Can't connect to $host:$port ($@)";
      }
+     if ($self->connection_cache_active())
+     {
+         $self->connection_cache_add($host,$port,$sock);
+         LWP::Debug::debug("Caching socket at $host:$port");
+     }
      $sock;
  }
 =20
 =20
  sub _check_sock
***************
*** 46,55 ****
--- 67,113 ----
      my($self, $res, $sock) =3D @_;
      $res->header("Client-Peer" =3D>
  		 $sock->peerhost . ":" . $sock->peerport);
  }
 =20
+ sub connection_cache
+ {
+     my($self,$pCache)=3D@_;
+=20
+     LWP::Debug::debug("Activating connection cache for http");
+     $self->{"connection_cache"}=3D$pCache;
+ }
+=20
+ sub connection_cache_add
+ {
+   my($self,$host,$port,$sock)=3D@_;
+   my($lcHost,$lcPort)=3D(lc $host, lc $port);
+  =20
+   $self->{"connection_cache"}{"$lcHost:$lcPort"}=3D$sock;
+ }
+=20
+ sub connection_cache_del
+ {
+   my($self,$host,$port)=3D@_;
+   my($lcHost,$lcPort)=3D(lc $host, lc $port);
+=20
+   delete $self->{"connection_cache"}{"$lcHost:$lcPort"};
+ }
+=20
+ sub connection_cache_get
+ {
+   my($self,$host,$port)=3D@_;
+   my($lcHost,$lcPort)=3D(lc $host, lc $port);
+=20
+   return $self->{"connection_cache"}{"$lcHost:$lcPort"};
+ }
+=20
+ sub connection_cache_active
+ {
+   my($self)=3D@_;
+   return (defined($self->{"connection_cache"}));
+ }
 =20
  sub request
  {
      my($self, $request, $proxy, $arg, $size, $timeout) =3D @_;
      LWP::Debug::trace('()');
***************
*** 79,95 ****
  	$port =3D $url->port;
  	$fullpath =3D $url->path_query;
  	$fullpath =3D "/" unless length $fullpath;
      }
 =20
!     # connect to remote site
!     my $socket =3D $self->_new_socket($host, $port, $timeout);
!     $self->_check_sock($request, $socket);
!=20
!     my $sel =3D IO::Select->new($socket) if $timeout;
!=20
!     my $request_line =3D "$method $fullpath HTTP/1.0$CRLF";
 =20
      my $h =3D $request->headers->clone;
      my $cont_ref =3D $request->content_ref;
      $cont_ref =3D $$cont_ref if ref($$cont_ref);
      my $ctype =3D ref($cont_ref);
--- 137,155 ----
  	$port =3D $url->port;
  	$fullpath =3D $url->path_query;
  	$fullpath =3D "/" unless length $fullpath;
      }
 =20
! #     # connect to remote site
! #     my $socket =3D $self->_new_socket($host, $port, $timeout);
! #     $self->_check_sock($request, $socket);
! #
! #     my $sel =3D IO::Select->new($socket) if $timeout;
!=20
!     my $request_line =3D ($method eq "CONNECT") ?
!       "$method $host:$port HTTP/1.0$CRLF" :
! 	"$method $fullpath HTTP/1.0$CRLF";
 =20
      my $h =3D $request->headers->clone;
      my $cont_ref =3D $request->content_ref;
      $cont_ref =3D $$cont_ref if ref($$cont_ref);
      my $ctype =3D ref($cont_ref);
***************
*** 112,132 ****
      $hhost =3D~ s/^([^\@]*)\@//;  # get rid of potential "user:pass@"
      $h->header('Host' =3D> $hhost) unless defined $h->header('Host');
 =20
      $h->remove_header('Connection');  # need support here to be =
useful
 =20
      # add authorization header if we need them.  HTTP URLs do
      # not really support specification of user and password, but
      # we allow it.
      if (defined($1) && not $h->header('Authorization')) {
  	$h->authorization_basic(split(":", $1));
      }
 =20
!     my $buf =3D $request_line . $h->as_string($CRLF) . $CRLF;
      my $n;  # used for return value from syswrite/sysread
 =20
      die "write timeout" if $timeout && !$sel->can_write($timeout);
      $n =3D $socket->syswrite($buf, length($buf));
      die $! unless defined($n);
      die "short write" unless $n =3D=3D length($buf);
      LWP::Debug::conns($buf);
 =20
--- 172,210 ----
      $hhost =3D~ s/^([^\@]*)\@//;  # get rid of potential "user:pass@"
      $h->header('Host' =3D> $hhost) unless defined $h->header('Host');
 =20
      $h->remove_header('Connection');  # need support here to be =
useful
 =20
+     # If we're caching connections, ask the server not to shut
+     # down the connection.
+     if ($self->connection_cache_active())
+     {
+         $h->header('Connection','Keep-Alive');
+     }
+=20
      # add authorization header if we need them.  HTTP URLs do
      # not really support specification of user and password, but
      # we allow it.
      if (defined($1) && not $h->header('Authorization')) {
  	$h->authorization_basic(split(":", $1));
      }
 =20
!     # connect to remote site
!     my($socket,$sel,$buf);
      my $n;  # used for return value from syswrite/sysread
+     my $response;
+   SENDREQ: {
+       eval {
+ 	$socket =3D $self->_new_socket($host, $port, $timeout);
+ 	$self->_check_sock($request, $socket);
+=20
+ 	$sel =3D IO::Select->new($socket) if $timeout;
+ =09
+ 	$buf =3D $request_line . $h->as_string($CRLF) . $CRLF;
 =20
  	die "write timeout" if $timeout && !$sel->can_write($timeout);
+=20
  	$n =3D $socket->syswrite($buf, length($buf));
  	die $! unless defined($n);
  	die "short write" unless $n =3D=3D length($buf);
  	LWP::Debug::conns($buf);
 =20
***************
*** 147,159 ****
      }
 =20
      # read response line from server
      LWP::Debug::debug('reading response');
 =20
-     my $response;
      $buf =3D '';
-=20
      # Inside this loop we will read the response line and all headers
      # found in the response.
      while (1) {
  	{
  	    die "read timeout" if $timeout && !$sel->can_read($timeout);
--- 225,235 ----
***************
*** 170,179 ****
--- 246,256 ----
  	    $response =3D HTTP::Response->new($code, $msg);
  	    $response->protocol($ver);
 =20
  	    # ensure that we have read all headers.  The headers will be
  	    # terminated by two blank lines
+=20
  	    until ($buf =3D~ /^\015?\012/ || $buf =3D~ /\015?\012\015?\012/) =
{
  	      # must read more if we can...
  	      LWP::Debug::debug("need more header data");
  	      die "read timeout" if $timeout && !$sel->can_read($timeout);
  	      $n =3D $socket->sysread($buf, $size, length($buf));
***************
*** 183,192 ****
--- 260,279 ----
  	    }
 =20
  	    # now we start parsing the headers.  The strategy is to
  	    # remove one line at a time from the beginning of the header
  	    # buffer ($res).
+=20
+ 	    # Now we've written a line, and read a line;
+ 	    # we're pretty sure the connection is up.
+ 	    # Disable the special-case for cached connections.
+ 	    #   --sg
+ 	    if (defined($self->{socket_from_cache}))
+ 	      {
+ 		delete $self->{socket_from_cache};
+ 	      }
+=20
  	    my($key, $val);
  	    while ($buf =3D~ s/([^\012]*)\012//) {
  	      my $line =3D $1;
 =20
  	      # if we need to restore as content when illegal headers
***************
*** 228,255 ****
  	} else {
  	    # need more data
  	    LWP::Debug::debug("need more status line data");
  	}
      };
      $response->request($request);
      $self->_get_sock_info($response, $socket);
 =20
 =20
      my $usebuf =3D length($buf) > 0;
      $response =3D $self->collect($arg, $response, sub {
          if ($usebuf) {
  	    $usebuf =3D 0;
  	    return \$buf;
  	}
  	die "read timeout" if $timeout && !$sel->can_read($timeout);
  	my $n =3D $socket->sysread($buf, $size);
  	die $! unless defined($n);
  	#LWP::Debug::conns($buf);
  	return \$buf;
  	} );
 =20
!     $socket->close;
 =20
      $response;
  }
 =20
  1;
--- 315,398 ----
  	  } else {
  	    # need more data
  	    LWP::Debug::debug("need more status line data");
  	  }
  	};
+       };  # end eval
+=20
+       if ($@)
+ 	{
+ 	  if (defined($self->{socket_from_cache}))
+ 	    {
+ 	      LWP::Debug::debug("Error on cached connection: '$@'.  Trying =
again with new connection.");
+ 	      undef $socket;
+ 	      $self->connection_cache_del($host,$port);
+ 	      redo SENDREQ;
+ 	    }
+ 	  else
+ 	    {
+ 	      die $@;
+ 	    }
+ 	}
+     } # end SENDREQ block
+=20
      $response->request($request);
      $self->_get_sock_info($response, $socket);
 =20
 =20
      my $usebuf =3D length($buf) > 0;
+     ### DCW ### -- don't collect on CONNECT
+     my $clen=3D0;
+     if ($self->connection_cache_active())
+       {
+ 	$clen=3D$response->header("Content-Length");
+       }
+     my $curlen=3D0;
+ =09
+     if (defined($response->header("Connection")) &&
+ 	$response->header("Connection") !~ /\bkeep-alive\b/i)
+       {
+ 	LWP::Debug::debug("Server requested connection be closed");
+ 	if ($self->connection_cache_get($host,$port))
+ 	  {
+ 	    $self->connection_cache_delete($host,$port);
+ 	  }
+ 	$clen=3D0;
+       }
+=20
+     if ($clen)
+       {
+ 	LWP::Debug::debug("Content-Length is $clen bytes");
+       }
+=20
+     if ($method ne "CONNECT") {
        $response =3D $self->collect($arg, $response, sub {
  				   if ($usebuf) {
  				     $usebuf =3D 0;
  				     return \$buf;
  				   }
+ 				   if ($clen)
+ 				     {
+ 				       if (($curlen =3D=3D $clen))
+ 					 {
+ 					   return undef;
+ 					 }
+ 				     }
  				   die "read timeout" if $timeout && !$sel->can_read($timeout);
  				   my $n =3D $socket->sysread($buf, $size);
  				   die $! unless defined($n);
+ 				   $curlen +=3D $n;
  				   #LWP::Debug::conns($buf);
  				   return \$buf;
  				 } );
+     }
+=20
+     # Don't close it -- just let it go out of scope.  If it's being
+     # cached, it will be referenced elsewhere, so won't be destroyed.
 =20
!     #   --sg
!     #    $socket->close;
 =20
      $response;
  }
 =20
  1;
Only in libwww-perl-5.47-patch/lib/LWP/Protocol: http.pm.orig
Only in libwww-perl-5.47-patch/lib/LWP/Protocol: http.pm.rej
diff -c5 -r -w libwww-perl-5.47/lib/LWP/Protocol/https.pm =
libwww-perl-5.47-patch/lib/LWP/Protocol/https.pm
*** libwww-perl-5.47/lib/LWP/Protocol/https.pm	Mon Sep 20 08:48:37 1999
--- libwww-perl-5.47-patch/lib/LWP/Protocol/https.pm	Wed Apr  5 =
15:58:18 2000
***************
*** 26,37 ****
  @ISA=3Dqw(LWP::Protocol::http);
 =20
  sub _new_socket
  {
      my($self, $host, $port, $timeout) =3D @_;
      local($^W) =3D 0;  # IO::Socket::INET can be noisy
!     my $sock =3D $SSL_CLASS->new(PeerAddr =3D> $host,
  			       PeerPort =3D> $port,
  			       Proto    =3D> 'tcp',
  			       Timeout  =3D> $timeout,
  			      );
      unless ($sock) {
--- 26,53 ----
  @ISA=3Dqw(LWP::Protocol::http);
 =20
  sub _new_socket
  {
      my($self, $host, $port, $timeout) =3D @_;
+ 	my($sock);
+     if ($self->connection_cache_active())
+     {
+         if =
(defined($sock=3D$self->connection_cache_get($host,$port)))
+         {
+ 	    # Make an effort to make sure the connection is still there.
+             unless (IO::Select->new($sock)->has_exception(0))
+ 	    {
+                 LWP::Debug::debug("Using kept-alive connection to =
$host:$port");
+                 $self->{socket_from_cache}=3D1;
+                 return $sock;
+             }
+ 	    &LWP::Debug::debug("Exception occured on kept-alive connection =
to $host:$port; closing cached socket.");
+         }
+     }
+     $self->{socket_from_cache}=3Dundef;
      local($^W) =3D 0;  # IO::Socket::INET can be noisy
!     $sock =3D $SSL_CLASS->new(PeerAddr =3D> $host,
  							PeerPort =3D> $port,
  							Proto    =3D> 'tcp',
  							Timeout  =3D> $timeout,
  			      );
      unless ($sock) {
***************
*** 67,73 ****
--- 83,163 ----
  	$res->header("Client-SSL-Cert-Subject" =3D> $cert->subject_name);
  	$res->header("Client-SSL-Cert-Issuer" =3D> $cert->issuer_name);
      }
      $res->header("Client-SSL-Warning" =3D> "Peer certificate not =
verified");
  }
+=20
+=20
+ sub request
+ {
+   my $self =3D shift;
+   my($request, $proxy, $arg, $size, $timeout) =3D @_;
+   LWP::Debug::trace('()');
+=20
+   return $self->SUPER::request(@_)
+ 	unless ($proxy);
+=20
+   # We need to get through the proxy server with a regular
+   # http connection, then we transmogrify the connection into
+   # a secure socket.
+=20
+   my $url  =3D $request->url;
+   my $host =3D $url->host;
+   my $port =3D $url->port;
+=20
+   my $socket;
+=20
+   if (!$self->connection_cache_get($host, $port)) {
+ 	my $remember_cache =3D $self->{"connection_cache"};
+ 	$self->{"connection_cache"} =3D undef;
+ 	$socket=3D $self->SUPER::_new_socket($proxy->host, $proxy->port, =
$timeout);
+ 	LWP::Debug::trace("Creating new " . ref($socket) . " to get through =
firewall");
+ 	$self->SUPER::_check_sock($request, $socket);
+ 	$self->{"connection_cache"} =3D $remember_cache;
+=20
+ 	my $proxy_request  =3D $request->clone;
+ 	$proxy_request->method("CONNECT");
+ #   $proxy_request->uri(new URI("$host:$port"));
+=20
+ 	my $proxy_protocol =3D LWP::Protocol::create('http');
+ 	$proxy_protocol->connection_cache_add( $host, $port, $socket );
+=20
+ 	LWP::Debug::trace("Trying to CONNECT through the proxy server");
+=20
+=20
+ 	my $remove_cache=3D0;
+ 	if (!$self->connection_cache_active()) {
+ 	  $remove_cache=3D1;
+ 	  $self->{"connection_cache"} =3D {};
+ 	}
+ 	$self->connection_cache_add( $host, $port, $socket );
+=20
+ 	my $proxy_response =3D $proxy_protocol->request($proxy_request, =
undef, $arg, $size, $timeout);
+=20
+ 	if ($remove_cache) {
+ 	  $self->{"connection_cache"} =3D undef;
+ 	}
+=20
+ 	# Did we get through the proxy server?
+ 	return $proxy_response
+ 	  unless($proxy_response->code =3D=3D 200);
+=20
+ 	# Turn the socket into an SSL socket
+ 	LWP::Debug::trace("Transmogrifying our socket");
+ 	bless $socket, "Net::SSL";
+ 	*$socket->{'ssl_ctx'} =3D Net::SSL::_default_context();
+ 	my $ssl =3D Crypt::SSLeay::Conn->new(*$socket->{'ssl_ctx'}, =
$socket);
+ 	if ($ssl->connect <=3D 0) {
+ 	  # XXX should obtain the real SSLeay error message
+ 	  $socket->_error("SSL negotiation failed");
+ 	  return new HTTP::Response &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ 		'SSL negotiation failed.';
+ 	}
+ 	*$socket->{'ssl_ssl'} =3D $ssl;
+   }
+=20
+   return $self->SUPER::request($request, undef, $arg, $size, =
$timeout);
+ }
+=20
+=20
 =20
  1;
diff -c5 -r -w libwww-perl-5.47/lib/LWP/UserAgent.pm =
libwww-perl-5.47-patch/lib/LWP/UserAgent.pm
*** libwww-perl-5.47/lib/LWP/UserAgent.pm	Thu Nov  4 15:21:01 1999
--- libwww-perl-5.47-patch/lib/LWP/UserAgent.pm	Wed Apr  5 11:49:40 =
2000
***************
*** 129,138 ****
--- 129,139 ----
  		'cookie_jar'  =3D> undef,
  		'use_eval'    =3D> 1,
                  'parse_head'  =3D> 1,
                  'max_size'    =3D> undef,
  		'no_proxy'    =3D> [],
+                 'connection_cache' =3D> undef,
  	}, $class;
      }
  }
 =20
 =20
***************
*** 188,197 ****
--- 189,200 ----
      if ($@) {
  	$@ =3D~ s/\s+at\s+\S+\s+line\s+\d+\.?\s*//;  # remove file/line =
number
  	return HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED, $@)
      }
     =20
+     LWP::Debug::debug("Got protocol object at $protocol");
+=20
      # Extract fields that will be used below
      my ($agent, $from, $timeout, $cookie_jar,
          $use_eval, $parse_head, $max_size) =3D
        @{$self}{qw(agent from timeout cookie_jar
                    use_eval parse_head max_size)};
***************
*** 204,213 ****
--- 207,228 ----
 =20
      # Transfer some attributes to the protocol object
      $protocol->parse_head($parse_head);
      $protocol->max_size($max_size);
     =20
+     if (defined($self->{connection_cache}))
+     {
+         eval {
+             $protocol->connection_cache($self->{connection_cache});
+         };
+         if ($@)
+         {
+             warn("Tried to use connection caching with protocol that =
does not support it.\n");
+         }
+ 	LWP::Debug::debug("Configured connection_cache for protocol");
+     }
+=20
      my $response;
      if ($use_eval) {
  	# we eval, and turn dies into responses below
  	eval {
  	    $response =3D $protocol->request($request, $proxy,
***************
*** 229,238 ****
--- 244,266 ----
      $cookie_jar->extract_cookies($response) if $cookie_jar;
      $response->header("Client-Date" =3D> HTTP::Date::time2str(time));
      return $response;
  }
 =20
+ # Debugging function.
+ sub dump_connection_cache
+ {
+     my ($self)=3D@_;
+=20
+     my $i;
+=20
+     print "Dumping connection cache =
(=3D$self->{connection_cache})\n";
+     foreach $i (keys %{$self->{connection_cache}})
+     {
+         print "Have connection to $i\n";
+     }
+ }
 =20
  =3Ditem $ua->request($request, $arg [, $size])
 =20
  Process a request, including redirects and security.  This method may
  actually send several different simple requests.
***************
*** 466,475 ****
--- 494,533 ----
  sub from       { shift->_elem('from',      @_); }
  sub cookie_jar { shift->_elem('cookie_jar',@_); }
  sub parse_head { shift->_elem('parse_head',@_); }
  sub max_size   { shift->_elem('max_size',  @_); }
 =20
+ =3Ditem $ua->connection_cache([$boolean])
+=20
+ Get/set a value indicating whether we should try to cache connections
+ between requests.  If this is activated, HTTP sockets won't be closed
+ after a request is completed, but will be stored as part of the
+ UserAgent.  If, later on, we try to connect to the same host and =
port,
+ this connection will be reused.
+=20
+ If you pass a value to this function, all open connections will be
+ closed.
+=20
+ =3Dcut
+=20
+ sub connection_cache {
+   my($self,$setting)=3D@_;
+=20
+   if (defined($setting))
+   {
+     if ($setting)
+     {
+       $self->{connection_cache} =3D {};
+     }
+     else
+     {
+       $self->{connection_cache} =3D undef;
+     }
+   }
+   return defined($self->{connection_cache});
+ }
+    =20
  # depreciated
  sub use_eval   { shift->_elem('use_eval',  @_); }
  sub use_alarm
  {
      Carp::carp("LWP::UserAgent->use_alarm(BOOL) is a no-op")

------_=_NextPart_000_01BF9F3B.41E55A60
Content-Type: message/rfc822
Content-Description: Persistent connections patch for libwww-perl

Message-ID: <72EB16749642D3118C4D0008C7B18A000109DECF@gpmal004.gcm.com>
From: Scott Gifford <sgifford@tir.com>
To: libwww@perl.org
Subject: Persistent connections patch for libwww-perl
Date: Tue, 2 Nov 1999 03:27:10 -0400 
MIME-Version: 1.0
X-Mailer: Internet Mail Service (5.5.2651.18)
List-Help: <mailto:libwww-help@perl.org>
List-Unsubscribe: <mailto:libwww-unsubscribe@perl.org>
Content-Type: multipart/mixed;
	boundary="----_=_NextPart_002_01BF9F3B.41E55A60"


------_=_NextPart_002_01BF9F3B.41E55A60
Content-Type: text/plain;
	charset="iso-8859-1"

Hello,

  I just finished patching up libwww-perl to handle persistent
connections, and thought I'd pop onto this list to post my patch and
make sure it works.  Apologies if this duplicates somebody else's
efforts; I just threw it together for a project, without checking if
anybody else was working on it.

  The patch adds a 'connection_cache' method to LWP::UserAgent, which
takes a boolean argument to turn connection caching on or off.  When
it is on, a connection cache is maintained as part of the UserAgent
object.  It is passed in to protocol handlers that support it (right
now just http), and they can access and modify the cache.  When it is
off, LWP behaves identically to before the patch.

  The bulk of the work is done in LWP::Protocol::http.  The
_new_socket method has been changed to check the cache first, and
store opened connections in the cache.  The request method has been
rearranged a little bit, to build all the headers and the request line
before connecting the socket.  This made it easier to wrap the whole
connect/send request/read response code inside a block and an eval, so
we can detect a cached connection we thought was open that wasn't, and
retry the whole thing if we need to.  If connection caching is
activated, we add a "Connection: Keep-Alive" header to our request,
and if we get the same back in our response, we look at the
Content-Length, and only read that may bytes before returning, leaving
the connection open.  We never explicitly close the socket now; we
rely on Perl to close it correctly when all references to it have
disappeared.  This makes managing the cached connections much simpler.

  This patch implements HTTP/1.0 style connection caching.  It works
correctly with or without proxies.  It never shuts connections down
for the lifetime of the UserAgent unless there is an error or the
server requests it; this could be considered a bug.

  Let me know if you have any problems with this patch.  I have tried
to make the changes as straightforward as possible, and as transparent
as possible if you don't turn on the connection_cache option.

-----ScottG.



------_=_NextPart_002_01BF9F3B.41E55A60
Content-Type: application/octet-stream;
	name="libwww-perl-keepalive.patch"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
	filename="libwww-perl-keepalive.patch"

*** /usr/lib/perl5/site_perl/5.005/LWP/Protocol/http.pm	Fri Mar 19 =
17:03:10 1999
--- LWP/Protocol/http.pm	Tue Nov  2 02:09:20 1999
***************
*** 1,5 ****
  #
! # $Id: http.pm,v 1.46 1999/03/19 22:03:10 gisle Exp $
 =20
  package LWP::Protocol::http;
 =20
--- 1,5 ----
  #
! # $Id: http.pm,v 1.2 1999/11/02 07:09:20 sgifford Exp $
 =20
  package LWP::Protocol::http;
 =20
***************
*** 20,37 ****
  sub _new_socket
  {
      my($self, $host, $port, $timeout) =3D @_;
!=20
      local($^W) =3D 0;  # IO::Socket::INET can be noisy
!     my $sock =3D IO::Socket::INET->new(PeerAddr =3D> $host,
! 				     PeerPort =3D> $port,
! 				     Proto    =3D> 'tcp',
! 				     Timeout  =3D> $timeout,
! 				    );
      unless ($sock) {
  	# IO::Socket::INET leaves additional error messages in $@
  	$@ =3D~ s/^.*?: //;
  	die "Can't connect to $host:$port ($@)";
      }
      $sock;
  }
 =20
--- 20,58 ----
  sub _new_socket
  {
      my($self, $host, $port, $timeout) =3D @_;
!     my($sock);
!    =20
!     if ($self->connection_cache_active())
!     {
!         if =
(defined($sock=3D$self->connection_cache_get($host,$port)))
!         {
! 	    # Make an effort to make sure the connection is still there.
!             unless (IO::Select->new($sock)->has_exception(0))
! 	    {
!                 LWP::Debug::debug("Using kept-alive connection to =
$host:$port");
!                 $self->{socket_from_cache}=3D1;
!                 return $sock;
!             }
! 	    &LWP::Debug::debug("Exception occured on kept-alive connection =
to $host:$port; closing cached socket.");
!         }
!     }
!     $self->{socket_from_cache}=3Dundef;
      local($^W) =3D 0;  # IO::Socket::INET can be noisy
!     $sock =3D IO::Socket::INET->new(PeerAddr =3D> $host,
! 				  PeerPort =3D> $port,
! 				  Proto    =3D> 'tcp',
! 				  Timeout  =3D> $timeout,
! 				  );
      unless ($sock) {
  	# IO::Socket::INET leaves additional error messages in $@
  	$@ =3D~ s/^.*?: //;
  	die "Can't connect to $host:$port ($@)";
      }
+     if ($self->connection_cache_active())
+     {
+         $self->connection_cache_add($host,$port,$sock);
+         LWP::Debug::debug("Caching socket at $host:$port");
+     }
      $sock;
  }
 =20
***************
*** 48,53 ****
--- 69,111 ----
  		 $sock->peerhost . ":" . $sock->peerport);
  }
 =20
+ sub connection_cache
+ {
+     my($self,$pCache)=3D@_;
+=20
+     LWP::Debug::debug("Activating connection cache for http");
+     $self->{connection_cache}=3D$pCache;
+ }
+=20
+ sub connection_cache_add
+ {
+   my($self,$host,$port,$sock)=3D@_;
+   my($lcHost,$lcPort)=3D(lc $host, lc $port);
+  =20
+   $self->{connection_cache}{"$lcHost:$lcPort"}=3D$sock;
+ }
+=20
+ sub connection_cache_del
+ {
+   my($self,$host,$port)=3D@_;
+   my($lcHost,$lcPort)=3D(lc $host, lc $port);
+=20
+   delete $self->{connection_cache}{"$lcHost:$lcPort"};
+ }
+=20
+ sub connection_cache_get
+ {
+   my($self,$host,$port)=3D@_;
+   my($lcHost,$lcPort)=3D(lc $host, lc $port);
+=20
+   return $self->{connection_cache}{"$lcHost:$lcPort"};
+ }
+=20
+ sub connection_cache_active
+ {
+   my($self)=3D@_;
+   return (defined($self->{connection_cache}));
+ }
 =20
  sub request
  {
***************
*** 81,92 ****
  	$fullpath =3D "/" unless length $fullpath;
      }
 =20
-     # connect to remote site
-     my $socket =3D $self->_new_socket($host, $port, $timeout);
-     $self->_check_sock($request, $socket);
- 	   =20
-     my $sel =3D IO::Select->new($socket) if $timeout;
-=20
      my $request_line =3D "$method $fullpath HTTP/1.0$CRLF";
 =20
      my $h =3D $request->headers->clone;
--- 139,144 ----
***************
*** 112,117 ****
--- 164,176 ----
      $hhost =3D~ s/^([^\@]*)\@//;  # get rid of potential "user:pass@"
      $h->header('Host' =3D> $hhost) unless defined $h->header('Host');
 =20
+     # If we're caching connections, ask the server not to shut
+     # down the connection.
+     if ($self->connection_cache_active())
+     {
+         $h->header('Connection','Keep-Alive');
+     }
+=20
      # add authorization header if we need them.  HTTP URLs do
      # not really support specification of user and password, but
      # we allow it.
***************
*** 119,251 ****
  	$h->authorization_basic(split(":", $1));
      }
 =20
!     my $buf =3D $request_line . $h->as_string($CRLF) . $CRLF;
      my $n;  # used for return value from syswrite/sysread
!=20
!     die "write timeout" if $timeout && !$sel->can_write($timeout);
!     $n =3D $socket->syswrite($buf, length($buf));
!     die $! unless defined($n);
!     die "short write" unless $n =3D=3D length($buf);
!     LWP::Debug::conns($buf);
!=20
!     if ($ctype eq 'CODE') {
! 	while ( ($buf =3D &$cont_ref()), defined($buf) && length($buf)) {
  	    die "write timeout" if $timeout && !$sel->can_write($timeout);
  	    $n =3D $socket->syswrite($buf, length($buf));
  	    die $! unless defined($n);
  	    die "short write" unless $n =3D=3D length($buf);
  	    LWP::Debug::conns($buf);
- 	}
-     } elsif (defined($$cont_ref) && length($$cont_ref)) {
- 	die "write timeout" if $timeout && !$sel->can_write($timeout);
- 	$n =3D $socket->syswrite($$cont_ref, length($$cont_ref));
- 	die $! unless defined($n);
- 	die "short write" unless $n =3D=3D length($$cont_ref);
- 	LWP::Debug::conns($buf);
-     }
-    =20
-     # read response line from server
-     LWP::Debug::debug('reading response');
-=20
-     my $response;
-     $buf =3D '';
 =20
!     # Inside this loop we will read the response line and all headers
!     # found in the response.
!     while (1) {
! 	{
! 	    die "read timeout" if $timeout && !$sel->can_read($timeout);
! 	    $n =3D $socket->sysread($buf, $size, length($buf));
! 	    die $! unless defined($n);
! 	    die "unexpected EOF before status line seen" unless $n;
! 	    LWP::Debug::conns($buf);
! 	}
! 	if ($buf =3D~ s/^(HTTP\/\d+\.\d+)[ \t]+(\d+)[ \t]*([^\012]*)\012//) =
{
! 	    # HTTP/1.0 response or better
! 	    my($ver,$code,$msg) =3D ($1, $2, $3);
! 	    $msg =3D~ s/\015$//;
! 	    LWP::Debug::debug("$ver $code $msg");
! 	    $response =3D HTTP::Response->new($code, $msg);
! 	    $response->protocol($ver);
!=20
! 	    # ensure that we have read all headers.  The headers will be
! 	    # terminated by two blank lines
! 	    until ($buf =3D~ /^\015?\012/ || $buf =3D~ /\015?\012\015?\012/) =
{
! 		# must read more if we can...
! 		LWP::Debug::debug("need more header data");
! 		die "read timeout" if $timeout && !$sel->can_read($timeout);
! 		$n =3D $socket->sysread($buf, $size, length($buf));
! 		die $! unless defined($n);
! 		die "unexpected EOF before all headers seen" unless $n;
! 		#LWP::Debug::conns($buf);
  	    }
 =20
! 	    # now we start parsing the headers.  The strategy is to
! 	    # remove one line at a time from the beginning of the header
! 	    # buffer ($res).
! 	    my($key, $val);
! 	    while ($buf =3D~ s/([^\012]*)\012//) {
! 		my $line =3D $1;
!=20
! 		# if we need to restore as content when illegal headers
! 		# are found.
! 		my $save =3D "$line\012";=20
!=20
! 		$line =3D~ s/\015$//;
! 		last unless length $line;
 =20
! 		if ($line =3D~ /^([a-zA-Z0-9_\-.]+)\s*:\s*(.*)/) {
! 		    $response->push_header($key, $val) if $key;
! 		    ($key, $val) =3D ($1, $2);
! 		} elsif ($line =3D~ /^\s+(.*)/) {
! 		    unless ($key) {
! 			$response->header("Client-Warning" =3D>
! 					 =3D> "Illegal continuation header");
! 			$buf =3D "$save$buf";
! 			last;
  		    }
! 		    $val .=3D " $1";
! 		} else {
! 		    $response->header("Client-Warning" =3D>
! 				      "Illegal header '$line'");
! 		    $buf =3D "$save$buf";
  		    last;
! 		}
  	    }
- 	    $response->push_header($key, $val) if $key;
- 	    last;
-=20
- 	} elsif ((length($buf) >=3D 5 and $buf !~ /^HTTP\//) or
- 		 $buf =3D~ /\012/ ) {
- 	    # HTTP/0.9 or worse
- 	    LWP::Debug::debug("HTTP/0.9 assume OK");
- 	    $response =3D HTTP::Response->new(&HTTP::Status::RC_OK, "OK");
- 	    $response->protocol('HTTP/0.9');
- 	    last;
-=20
- 	} else {
- 	    # need more data
- 	    LWP::Debug::debug("need more status line data");
  	}
!     };
      $response->request($request);
      $self->_get_sock_info($response, $socket);
 =20
 =20
      my $usebuf =3D length($buf) > 0;
      $response =3D $self->collect($arg, $response, sub {
          if ($usebuf) {
  	    $usebuf =3D 0;
  	    return \$buf;
  	}
  	die "read timeout" if $timeout && !$sel->can_read($timeout);
  	my $n =3D $socket->sysread($buf, $size);
  	die $! unless defined($n);
  	#LWP::Debug::conns($buf);
  	return \$buf;
  	} );
 =20
!     $socket->close;
 =20
      $response;
  }
--- 178,382 ----
  	$h->authorization_basic(split(":", $1));
      }
 =20
!     # connect to remote site
!     my($socket,$sel,$buf);
      my $n;  # used for return value from syswrite/sysread
!     my $response;
!     SENDREQ: {
!         eval {
! 	    $socket =3D $self->_new_socket($host, $port, $timeout);
! 	    $self->_check_sock($request, $socket);
! 	   =20
! 	    $sel =3D IO::Select->new($socket) if $timeout;
! 	   =20
! 	    $buf =3D $request_line . $h->as_string($CRLF) . $CRLF;
! 	   =20
  	    die "write timeout" if $timeout && !$sel->can_write($timeout);
+=20
  	    $n =3D $socket->syswrite($buf, length($buf));
  	    die $! unless defined($n);
  	    die "short write" unless $n =3D=3D length($buf);
  	    LWP::Debug::conns($buf);
 =20
! 	    if ($ctype eq 'CODE') {
! 	        while ( ($buf =3D &$cont_ref()), defined($buf) && =
length($buf)) {
! 		    die "write timeout" if $timeout && !$sel->can_write($timeout);
! 		    $n =3D $socket->syswrite($buf, length($buf));
! 		    die $! unless defined($n);
! 		    die "short write" unless $n =3D=3D length($buf);
! 	            LWP::Debug::conns($buf);
! 	        }
! 	    } elsif (defined($$cont_ref) && length($$cont_ref)) {
! 	        die "write timeout" if $timeout && =
!$sel->can_write($timeout);
! 	        $n =3D $socket->syswrite($$cont_ref, length($$cont_ref));
! 	        die $! unless defined($n);
! 	        die "short write" unless $n =3D=3D length($$cont_ref);
! 	        LWP::Debug::conns($buf);
  	    }
 =20
!             # read response line from server
!   	    LWP::Debug::debug('reading response');
 =20
! 	    $buf =3D '';
! 	    # Inside this loop we will read the response line and all =
headers
! 	    # found in the response.
! 	    while (1) {
! 	        {
! 		    die "read timeout" if $timeout && !$sel->can_read($timeout);
! 		    $n =3D $socket->sysread($buf, $size, length($buf));
! 		    die $! unless defined($n);
! 		    die "unexpected EOF before status line seen" unless $n;
! 	            LWP::Debug::conns($buf);
! 	        }
! 	        if ($buf =3D~ s/^(HTTP\/\d+\.\d+)[ \t]+(\d+)[ =
\t]*([^\012]*)\012//) {
! 		    # HTTP/1.0 response or better
! 		    my($ver,$code,$msg) =3D ($1, $2, $3);
! 		    $msg =3D~ s/\015$//;
! 	            LWP::Debug::debug("$ver $code $msg");
! 		    $response =3D HTTP::Response->new($code, $msg);
! 		    $response->protocol($ver);
! 	=09
! 		    # ensure that we have read all headers.  The
! 		    # headers will be terminated by two blank lines
!=20
!   		    until ($buf =3D~ /^\015?\012/ || $buf =3D~ =
/\015?\012\015?\012/) {
! 		        # must read more if we can...
! 		        LWP::Debug::debug("need more header data");
! 		        die "read timeout" if $timeout && =
!$sel->can_read($timeout);
! 		        $n =3D $socket->sysread($buf, $size, length($buf));
! 		        die $! unless defined($n);
! 		        die "unexpected EOF before all headers seen" unless $n;
! 		        #LWP::Debug::conns($buf);
! 		    }
! 	=09
! 		    # now we start parsing the headers.  The strategy
! 		    # is to remove one line at a time from the
! 		    # beginning of the header buffer ($res).
!=20
! 		    # Now we've written a line, and read a line;
! 		    # we're pretty sure the connection is up.
! 		    # Disable the special-case for cached connections.
! 		    #   --sg
! 		    if (defined($self->{socket_from_cache}))
! 	 	    {
! 		        delete $self->{socket_from_cache};
  		    }
! 	=09
! 		    my($key, $val);
! 		    while ($buf =3D~ s/([^\012]*)\012//) {
! 		        my $line =3D $1;
! 		  
! 		        # if we need to restore as content when
! 		        # illegal headers are found.
! 		       =20
!                         my $save =3D "$line\012";=20
! 		 =20
! 		        $line =3D~ s/\015$//;
! 		        last unless length $line;
! 		 =20
! 		        if ($line =3D~ /^([a-zA-Z0-9_\-.]+)\s*:\s*(.*)/) {
! 		            $response->push_header($key, $val) if $key;
! 		            ($key, $val) =3D ($1, $2);
! 		        } elsif ($line =3D~ /^\s+(.*)/) {
! 		            unless ($key) {
! 		                $response->header("Client-Warning" =3D>
! 				  	          "Illegal continuation header");
! 		                $buf =3D "$save$buf";
! 		                last;
! 		            }
! 		            $val .=3D " $1";
! 		        } else {
! 		            $response->header("Client-Warning" =3D>
! 				              "Illegal header '$line'");
! 		            $buf =3D "$save$buf";
! 		            last;
! 		        }
! 		    }
! 		    $response->push_header($key, $val) if $key;
  		    last;
! 	=09
! 	            } elsif ((length($buf) >=3D 5 and $buf !~ /^HTTP\//) or
! 		       $buf =3D~ /\012/ ) {
! 		        # HTTP/0.9 or worse
! 	                LWP::Debug::debug("HTTP/0.9 assume OK");
! 		        $response =3D HTTP::Response->new(&HTTP::Status::RC_OK, =
"OK");
! 		        $response->protocol('HTTP/0.9');
! 		        last;
! 	=09
! 	            } else {
! 		        # need more data
! 	                LWP::Debug::debug("need more status line data");
! 	            }
! 	    };
! 	};  # end eval
! 	if ($@)
! 	{
! 	    if (defined($self->{socket_from_cache}))
! 	    {
!   	        LWP::Debug::debug("Error on cached connection: '$@'.  =
Trying again with new connection.");
! 	        undef $socket;
!                 $self->connection_cache_del($host,$port);
! 	        redo SENDREQ;
! 	    }
! 	    else
! 	    {
! 	        die $@;
  	    }
  	}
!     } # end SENDREQ block
!=20
      $response->request($request);
      $self->_get_sock_info($response, $socket);
 =20
 =20
      my $usebuf =3D length($buf) > 0;
+     my $clen=3D0;
+     if ($self->connection_cache_active())
+     {
+ 	$clen=3D$response->header("Content-Length");
+     }
+     my $curlen=3D0;
+=20
+     if ($response->header("Connection") !~ /\bkeep-alive\b/i)
+     {
+         LWP::Debug::debug("Server requested connection be closed");
+         if ($self->connection_cache_get($host,$port))
+         {
+ 	    $self->connection_cache_delete($host,$port);
+         }
+         $clen=3D0;
+     }
+=20
+     if ($clen)
+     {
+ 	LWP::Debug::debug("Content-Length is $clen bytes");
+     }
      $response =3D $self->collect($arg, $response, sub {
          if ($usebuf) {
  	    $usebuf =3D 0;
+ 	    $curlen +=3D length($buf);
  	    return \$buf;
  	}
+ 	if ($clen)
+ 	{
+ 	    if (($curlen =3D=3D $clen))
+ 	    {
+ 	      return undef;
+ 	    }
+ 	}
  	die "read timeout" if $timeout && !$sel->can_read($timeout);
  	my $n =3D $socket->sysread($buf, $size);
  	die $! unless defined($n);
+ 	$curlen +=3D $n;
  	#LWP::Debug::conns($buf);
  	return \$buf;
  	} );
 =20
! # Don't close it -- just let it go out of scope.  If it's being
! # cached, it will be referenced elsewhere, so won't be destroyed.
!=20
! #   --sg
! #    $socket->close;
 =20
      $response;
  }
*** /usr/lib/perl5/site_perl/5.005/LWP/UserAgent.pm	Mon Aug  2 18:57:09 =
1999
--- LWP/UserAgent.pm	Tue Nov  2 01:40:36 1999
***************
*** 131,136 ****
--- 131,137 ----
                  'parse_head'  =3D> 1,
                  'max_size'    =3D> undef,
  		'no_proxy'    =3D> [],
+                 'connection_cache' =3D> undef,
  	}, $class;
      }
  }
***************
*** 172,178 ****
  =09
 =20
      LWP::Debug::trace("$method $url");
!=20
      # Locate protocol to use
      my $scheme =3D '';
      my $proxy =3D $self->_need_proxy($url);
--- 173,179 ----
  =09
 =20
      LWP::Debug::trace("$method $url");
!    =20
      # Locate protocol to use
      my $scheme =3D '';
      my $proxy =3D $self->_need_proxy($url);
***************
*** 189,194 ****
--- 190,197 ----
  	$@ =3D~ s/\s+at\s+\S+\s+line\s+\d+\.?\s*//;  # remove file/line =
number
  	return HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED, $@)
      }
+    =20
+     LWP::Debug::debug("Got protocol object at $protocol");
 =20
      # Extract fields that will be used below
      my ($agent, $from, $timeout, $cookie_jar,
***************
*** 206,211 ****
--- 209,226 ----
      $protocol->parse_head($parse_head);
      $protocol->max_size($max_size);
     =20
+     if (defined($self->{connection_cache}))
+     {
+         eval {
+             $protocol->connection_cache($self->{connection_cache});
+         };
+         if ($@)
+         {
+             warn("Tried to use connection caching with protocol that =
does not support it.\n");
+         }
+ 	LWP::Debug::debug("Configured connection_cache for protocol");
+     }
+=20
      my $response;
      if ($use_eval) {
  	# we eval, and turn dies into responses below
***************
*** 231,236 ****
--- 246,264 ----
      return $response;
  }
 =20
+ # Debugging function.
+ sub dump_connection_cache
+ {
+     my ($self)=3D@_;
+=20
+     my $i;
+=20
+     print "Dumping connection cache =
(=3D$self->{connection_cache})\n";
+     foreach $i (keys %{$self->{connection_cache}})
+     {
+         print "Have connection to $i\n";
+     }
+ }
 =20
  =3Ditem $ua->request($request, $arg [, $size])
 =20
***************
*** 468,473 ****
--- 496,531 ----
  sub parse_head { shift->_elem('parse_head',@_); }
  sub max_size   { shift->_elem('max_size',  @_); }
 =20
+ =3Ditem $ua->connection_cache([$boolean])
+=20
+ Get/set a value indicating whether we should try to cache connections
+ between requests.  If this is activated, HTTP sockets won't be closed
+ after a request is completed, but will be stored as part of the
+ UserAgent.  If, later on, we try to connect to the same host and =
port,
+ this connection will be reused.
+=20
+ If you pass a value to this function, all open connections will be
+ closed.
+=20
+ =3Dcut
+=20
+ sub connection_cache {
+   my($self,$setting)=3D@_;
+=20
+   if (defined($setting))
+   {
+     if ($setting)
+     {
+       $self->{connection_cache} =3D {};
+     }
+     else
+     {
+       $self->{connection_cache} =3D undef;
+     }
+   }
+   return defined($self->{connection_cache});
+ }
+    =20
  # depreciated
  sub use_eval   { shift->_elem('use_eval',  @_); }
  sub use_alarm

------_=_NextPart_002_01BF9F3B.41E55A60--

------_=_NextPart_000_01BF9F3B.41E55A60--