Re: CGI harness, anyone?

Sean M. Burke (sburke@spinn.net)
Tue, 02 Jan 2001 14:23:14 -0700


At 09:31 AM 2001-01-02 -0800, Randal Schwartz wrote:
>
>Has anyone done a CGI protocol module?  No, not CGI.pm.  The *server*
>side of the protocol! [...]

I wrote something that is sort of a CGI harness, but doesn't seem
really in the direction you have in mind.  But I'll include it in
case it's useful to someone.


This module is a harness for Perl CGI programs.  Its package name
prefix, SMB, does not refer to Samba, but is instead just my initials;
I'm in the habit of naming all my experimental/personal modules that
way, so that I can tell them from real CPAN-released modules.  (If
I had it to do over, I'd probably name them starting with _SMB or
something, since leading '_' explicitly conveys "this is private!".)

To use this harness on a CGI program, just have "use SMB::Harness" early
on in the program, altho possibly after any "use lib 'wherever'" lines.
That's it.

What the harness does is tie STDOUT and STDERR, and defines an END 
block to process (emitting to STDOUT with appropriate wrapping) what
it caught going to STDOUT and STDERR.  If decent headers and content
both went to STDOUT and nothing went to STDERR, the program just passes
it all thru.  But if there was ANYTHING wrong (things sent to STDERR,
and/or lack of content, and/or lack of headers), then the program
gives details.  See the code starting at "END {" for a full rundown
of all the cases it traps.

I think there are a few cases of program error that this program
doesn't trap -- basically coredumping Perl, or exec()ing, or anything
that keeps the END block from getting run.  But I think that's about
it.  I'd be pleased to hear if anyone finds this useful.  An existing
program that you want to have use this harness shouldn't require any
modification, beyond just adding "use SMB::Harness" to it.


package SMB::Harness;
BEGIN {
  if($] < 5.004) { # sane lower limit, I think
    print "Status: 500 Server Error\nContent-type: text/plain\n\n",
          "Perl version $] is too old for ", __PACKAGE__, ".\n",
    ;
    exit;
  }
}

use strict;
use vars qw(
  $VERSION $HEADERS_OVER $BODY_CONTENT $THROWN $REAL_STDOUT
  @HEADERS $buffer @ERROR_CONTENT
);

# This class is for tying handles to the real STDOUT and STDERR.
# When used, it goes and does exactly that.
#
#--------------------------------------------------------------------------

#TODO: bitch if there's neither a content-type nor a location nor status
# is present and 204?
#TODO: bitch if content-type is text/html or text/plain and the body
# is 0-length, or is all whitespace characters?

$VERSION = '1.01';
$THROWN = 0;
@HEADERS = ();
@ERROR_CONTENT = ();
$BODY_CONTENT = '';

#Make sure the FH class is loaded.
eval "require " . ref(*STDOUT{IO}) . ";";
if($@) {
  print "Status: 500 Server Error\nContent-type: text/plain\n\n",
    "Fatal error in ", __PACKAGE__, " v$VERSION: $@\n",
  ;
  exit;
}

$REAL_STDOUT = *STDOUT{IO};

#--------------------------------------------------------------------------

sub DEBUG () {0};

sub TIEHANDLE {
  shift @_;
  return bless [$REAL_STDOUT, @_];
}

$buffer = '';

sub PRINT {
  my $x = shift @_;
  
  if($x->[1]) { # sign that this is an error handle
    if(@_) {
      push @ERROR_CONTENT, @_;
    } else {
      push @ERROR_CONTENT, undef; # sanity?
    }
    DEBUG and print $REAL_STDOUT "Error Content [", @_, "]\n";
    return 1;
    # To be caught later.
  }
  
  return 1 unless @_;
  
  if($HEADERS_OVER) {
    DEBUG and print $REAL_STDOUT "Nonheader Content [", @_, "]\n";
    $BODY_CONTENT .= join '', grep defined $_, @_;
    return 1;
  }
  # Otherwise we're trapping headers:
  my $content;
  
  return 1 unless length($content = join '', grep defined $_, @_);
  
  unless($content =~ m<(?:(\cm\cj?)|\cj)>) { # does it contain a newline?
    $buffer .= $content;
    DEBUG and print $REAL_STDOUT "Headerly Content [$content] is nonlinear\n";
    return 1;
  }
  
  if(length $buffer) {
    $content = $buffer . $content;
    $buffer = '';
  }

  DEBUG and print $REAL_STDOUT "Content [$content]\n";
  
  # Else it contains one or more lines:
  my $line;
  while($content
       =~ m<\G
               ([^\cm\cj]*)
               ( (\cm\cj?)
                 |\cj
               )?
           >gsx
  ) {
    next unless $1 or $2; # ignore when both are empty
    
    if($HEADERS_OVER) {
      $BODY_CONTENT .= $1;
      $BODY_CONTENT .= $2 if $2;
      # Else keep processing headers...
    } elsif(! $2) {
      # terminal incomplete line-fragment in header segment.
      $buffer = $1;
      last;
    } elsif(not( defined $1 and length $1)) {
      # End-of-header marker.
      #print $REAL_STDOUT "EOH\n";
      $HEADERS_OVER = 1;
    } elsif(($line = $1 . $2) =~
        m<^[^\x00-\x1F\x7F\(\)\<\>\@,;:\\\"/\[\]\?\=\{\}\x20\x09]+\:>s
    ) {
      # It's a header line.  Normal case.
        # RFC 2068 says header lines start with a token and a ":", and that:
        #
        #          token          = 1*<any CHAR except CTLs or tspecials>
        #
        #          tspecials      = "(" | ")" | "<" | ">" | "@"
        #                         | "," | ";" | ":" | "\" | <">
        #                         | "/" | "[" | "]" | "?" | "="
        #                         | "{" | "}" | SP | HT
        #
        #          CTL            = <any US-ASCII control character
        #                           (octets 0 - 31) and DEL (127)>
        #          SP             = <US-ASCII SP, space (32)>
        #          HT             = <US-ASCII HT, horizontal-tab (9)>
      push @HEADERS, $line;
      DEBUG and print $REAL_STDOUT "Header line $1\n";
      
    } else {
      # It's not a header line.
      # No, we don't bother recognizing continuation lines, since
      #  Apache doesn't either.  Good enough excuse?
      $BODY_CONTENT .= $1;
      push @ERROR_CONTENT, "Nonheader line \"$1\" found while expecting headers.\n";
      $HEADERS_OVER = 1;
    }
  }
  1;
}

sub PRINTF {
  my($x, $f) = splice @_,0,2;
  PRINT($x, sprintf($f, @_));
}

#--------------------------------------------------------------------------
# And set up the end block:

END {
  if(!$REAL_STDOUT) {
    # My init code was never called, so I must be in -w mode, I guess.  No-op.
  } elsif(@HEADERS and not @ERROR_CONTENT and $HEADERS_OVER) {
    # Normal case:
    print $REAL_STDOUT @HEADERS, "\n", $BODY_CONTENT;
  } else {
    # Some sort of error:
    if(length $buffer) {
      if($HEADERS_OVER) {
        $BODY_CONTENT .= $buffer;
      } else {
        push @HEADERS, "$buffer\n", "[Last line is incomplete!]\n";
      }
    }
    
    print $REAL_STDOUT
      "Status: 500 Server Error (Perl ", __PACKAGE__, "-trapped)\n",
      "Content-type: text/plain\n\n",
      __PACKAGE__ , " v$VERSION traps an error: ",
    ;
    
    if(@ERROR_CONTENT) { #Any error
      print $REAL_STDOUT " [\n",
        grep(defined($_), @ERROR_CONTENT), "]\n\n",
        @HEADERS
          ? ("Headers: [\n", @HEADERS, "]\n\n" )
          : "(No headers.)\n\n",
        length($BODY_CONTENT)
          ? ("Body content (",
             length($BODY_CONTENT), " characters):\n", $BODY_CONTENT,
          ) : "(No body content.)\n",
      ;
    } elsif(length $BODY_CONTENT) {
      print $REAL_STDOUT "No header!\n\n",
        "Just ", length($BODY_CONTENT), " characters of body content:\n",
        $BODY_CONTENT,
      ;
    } elsif(@HEADERS) {
      print $REAL_STDOUT
         "Headers not terminated by a blank end-of-header line.\n\n",
         "Headers: [\n", @HEADERS, "]\n\n",
      ;
    } else {
      # No header or body!
      print $REAL_STDOUT "No header or body?\n\n",
        $HEADERS_OVER ? "(Except for the blank end-header line.)\n" : (),
      ;
    }
  }
}

#--------------------------------------------------------------------------
# Now, tie, and byebye!

tie *STDOUT, __PACKAGE__, 0;
close(STDERR); # to keep the real warn/die from blabbing
tie *STDERR, __PACKAGE__, 1;

$^M = 'a' x 10_000; # just in case that'll help.

$SIG{'__DIE__' } = sub { print STDERR "Fatal exception:\n", @_ };
$SIG{'__WARN__'} = sub { print STDERR @_ };
# not that any CGI should warn anyway.

1;

__END__

--
Sean M. Burke  sburke@cpan.org  http://www.spinn.net/~sburke/