Re: pretty HMTL
Brian Grossman (brian@softhome.net)
Thu, 13 Nov 1997 16:42:00 -0700
This is a multipart MIME message.
--==_Exmh_4913659450
Content-Type: text/plain; charset=us-ascii
> 1) Is there a way to produce "pretty" HTML that I have missed while
> searching the docs on the source?
Here's a script I wrote awhile back as part of another application.
Brian
--==_Exmh_4913659450
Content-Type: text/plain ; name="PPhtml.pl"; charset=us-ascii
Content-Description: PPhtml.pl
Content-Disposition: attachment; filename="PPhtml.pl"
#!/usr/local/bin/perl -w
# Author: Brian Grossman <brian@SoftHome.net>
# 25 Jan 1997 finished first pass
# 9 Nov 1997 fixed a few things, repackaged into standalone script
use HTML::TreeBuilder;
require HTML::Element;
package HTML::TreeBuilder;
# TreeBuilder doesn't define a comment method, so we have to.
sub comment {
my $self = shift;
my $pos = $self->{'_pos'};
$pos = $self unless defined($pos);
my $text = shift;
return unless length $text;
$pos->push_content('<!--' . $text . '-->');
}
package HTML::Element;
my $sh_ws;
$sh_encode_text = 1;
sub sh_as_HTML {
my $self = shift;
my @html = ();
$sh_ws = 0;
$self->traverse(
sub {
my($node, $start, $depth) = @_;
if (ref $node) {
my $tag = $node->tag;
push(@html, "\n") if $sh_ws;
push(@html, " " x $depth) if $sh_ws;
if ($start) {
push(@html, $node->starttag);
}
elsif (not $emptyElement{$tag} ) {
push(@html, $node->endtag);
}
$sh_ws = 1;
}
else {
my $in_pre =
$self->is_inside(qw(pre xmp listing));
if($node eq '' && !$in_pre) { $node = ' ' }
# simple text content
if($node =~ m/^\s/ ) {
push(@html, "\n") if $node !~ m/^\s*$/;
$node =~ s/^\s*// if $depth > 0;
my $ws = " " x $depth;
$node =~ s/\A/$ws/;
}
if($node =~ m/\s$/) {
$sh_ws = 1;
$node =~ s/\s*\Z//;
}
else { $sh_ws = 0 }
# HTML::Entities::encode_entities($node, "<>&") if $sh_encode_text;
HTML::Entities::encode_entities($node) if $sh_encode_text;
push(@html, $node);
}
}
);
join('', @html, "\n");
}
# action in form is optional, but is still an implied link
# actually, action is NOT optional
undef &extract_links;
sub extract_links {
my $self = shift;
my %wantType; @wantType{map { lc $_ } @_} = (1) x @_;
my $wantType = scalar(@_);
my @links;
$self->traverse(
sub {
my($self, $start, $depth) = @_;
return 1 unless $start;
my $tag = $self->{'_tag'};
return 1 if $wantType && !$wantType{$tag};
my $attr = $linkElements{$tag};
return 1 unless defined $attr;
$attr = [$attr] unless ref $attr;
for (@$attr) {
my $val = $self->attr($_);
push(@links, [$val, $self]) if defined $val || $tag eq 'form';
}
1;
}, 'ignoretext');
\@links;
}
package main;
# $HTML::Element::sh_encode_text = 0;
my $html = new HTML::TreeBuilder;
$html->ignore_unknown(0);
$html->netscape_buggy_comment(1);
$html->parse(join('',<>));
print $html->sh_as_HTML;
--==_Exmh_4913659450--