Re: pretty HMTL

Brian Grossman (brian@softhome.net)
Thu, 13 Nov 1997 16:42:00 -0700


This is a multipart MIME message.

--==_Exmh_4913659450
Content-Type: text/plain; charset=us-ascii


> 1) Is there a way to produce "pretty" HTML that I have missed while
>   searching the docs on the source?

Here's a script I wrote awhile back as part of another application.

Brian


--==_Exmh_4913659450
Content-Type: text/plain ; name="PPhtml.pl"; charset=us-ascii
Content-Description: PPhtml.pl
Content-Disposition: attachment; filename="PPhtml.pl"

#!/usr/local/bin/perl -w

# Author:  Brian Grossman <brian@SoftHome.net>
# 25 Jan 1997	finished first pass
#  9 Nov 1997	fixed a few things, repackaged into standalone script

use HTML::TreeBuilder;
require HTML::Element;

package HTML::TreeBuilder;

# TreeBuilder doesn't define a comment method, so we have to.
sub comment {
	my $self = shift;
	my $pos = $self->{'_pos'};
	$pos = $self unless defined($pos);
	my $text = shift;
	return unless length $text;
	$pos->push_content('<!--' . $text . '-->');
	}

package HTML::Element;

my $sh_ws;
$sh_encode_text = 1;

sub sh_as_HTML {
	my $self = shift;
	my @html = ();
	$sh_ws = 0;
	$self->traverse(
		sub {
			my($node, $start, $depth) = @_;
			if (ref $node) {
				my $tag = $node->tag;
				push(@html, "\n") if $sh_ws;
				push(@html, "  " x $depth) if $sh_ws;
				if ($start) {
					push(@html, $node->starttag);
					}
				elsif (not $emptyElement{$tag} ) {
					push(@html, $node->endtag);
					}
				$sh_ws = 1;
				}
			else {
				my $in_pre =
					$self->is_inside(qw(pre xmp listing));
				if($node eq '' && !$in_pre) { $node = ' ' }
				# simple text content
				if($node =~ m/^\s/ ) {
						push(@html, "\n") if $node !~ m/^\s*$/;
						$node =~ s/^\s*// if $depth > 0;
						my $ws = "  " x $depth;
						$node =~ s/\A/$ws/;
						}
				if($node =~ m/\s$/) {
					$sh_ws = 1;
					$node =~ s/\s*\Z//;
					}
				else { $sh_ws = 0 }
				# HTML::Entities::encode_entities($node, "<>&") if $sh_encode_text;
				HTML::Entities::encode_entities($node) if $sh_encode_text;
				push(@html, $node);
				}
			}
	);
	join('', @html, "\n");
	}

# action in form is optional, but is still an implied link
# actually, action is NOT optional
undef &extract_links;
sub extract_links {
	my $self = shift;
	my %wantType; @wantType{map { lc $_ } @_} = (1) x @_;
	my $wantType = scalar(@_);
	my @links;
	$self->traverse(
		sub {
			my($self, $start, $depth) = @_;
			return 1 unless $start;
			my $tag = $self->{'_tag'};
			return 1 if $wantType && !$wantType{$tag};
			my $attr = $linkElements{$tag};
			return 1 unless defined $attr;
			$attr = [$attr] unless ref $attr;
			for (@$attr) {
				my $val = $self->attr($_);
				push(@links, [$val, $self]) if defined $val || $tag eq 'form';
				}
			1;
			}, 'ignoretext');
	\@links;
	}

package main;

# $HTML::Element::sh_encode_text = 0;

my $html = new HTML::TreeBuilder;
$html->ignore_unknown(0);
$html->netscape_buggy_comment(1);
$html->parse(join('',<>));
print $html->sh_as_HTML;

--==_Exmh_4913659450--