Re: HTML::Parser line numbers

Gisle Aas (gisle@aas.no)
23 Jan 1998 10:54:55 +0100


Using Randal's idea we could make a subclass like the one below.  You
could extent it to count characters and offsets by calling
SUPER::parse for each character.  If would not be very efficient
though.  Perhaps splitting on /([<>\n])/ would do?

I tested your $parser->count callback patch, and noticed that it
slowed down the generic parser about 6% for parsing of some random
HTML code I had laying around.  I don't want this if I can avoid it.

Regards,
Gisle


-----------------------------------------------------------
package HTML::LineParser;

require HTML::Parser;
@ISA=qw(HTML::Parser);

sub new
{
    my $class = shift;
    my $self = $class->SUPER::new(@_);
    $self->lineno(1);
    $self;
}

sub parse
{
    my $self = shift;
    return $self->SUPER::parse($_[0]) unless defined $_[0];

    my @lines = split(/(\n)/, $_[0]);
    for (@lines) {
	$self->SUPER::parse($_);
	$self->{_lineno}++ if $_ eq "\n";
    }
    $self;
}

sub lineno
{
    my $self = shift;
    my $old = $self->{_lineno};
    $self->{_lineno} = shift if @_;
    $old;
}

1;


})) {
> 			push(@html, $node->endtag);
> 		    }
> 		}
> 	    } else {
> 		# simple text content
> 		if ($enc) { 
> 		  HTML::Entities::encode_entities($node, "<>&");
> 		}
> 		push(@html, $node);
> 	    }
>         }
>     );
>     # join('', @html, "\n");		--  AJ (Just for ease of reading)
>     join("\n", @html, "\n");
> }