Bug in Parser (about comments)

Calle Aasman (md4calle@mdstud.chalmers.se)
Thu, 26 Jun 1997 12:24:04 +0200 (MET DST)


Hi, I don't know if I'm doing anything wrong or if it's a bug:

I have made a parser that shall do some stuff whenever it hit specific
tags, but if a htmldocument looks like this:


...
this text will be parser out fine
<!---------------------->
this will be parsad as if it were inside a comment
<!--------------------->
this will work fine
....


anyone know of this problem before?

If anyone have any idea I would really appreciate it, cause i'm right now
in panicmode since I thought the parser worked fine...





I've included the two files I got:




#!/usr/local/bin/perl
require "tagfunctions.pm";=20
 =20
#####################main#####################################
package main;
%allinfo;
$uselessdict =3D "ORD/uselesswords.lst";

my $form =3D new ParseXX;
$form->parse_file("tst.html");   =20


for ($i=3D0; $i<=3D $#links; $i++){=20
    print "$links[$i]\n";   #skriver ut l=E4nkarna.
}

while (($key,$value) =3D each %allinfo) {
    print "$key =3D $allinfo{$key}\n\n";	          =20
}      =20



####################subrutiner######################################
#f=F6r att lagra data i hashen
sub lagra{
    my ($tag,$info) =3D@_;
    $allinfo{$tag}=3D "$allinfo{$tag}$info";
}




this is the file with what it shall do when it strike a tag:
package ParseXX;
require HTML::Parser;
use HTML::Entities ();     =20

@ISA=3Dqw(HTML::Parser);
sub start{
    my($self,$tag, $attr, $attrseq, $origtext) =3D @_;
        eval("start_$tag".'($self, $attr, $attrseq, $origtext)');
}

sub text{
    my($self, $text) =3D @_;
    $decoded =3D HTML::Entities::decode($text);
   =20
    $decoded =3D~ s/\s+/ /g;  #ta bort newline och whitespace i texten

    if ($takecare){
	$tempcare=3D$decoded;
    }
  main::lagra("text",$decoded);
}


sub end{
    my($self, $tag) =3D @_;
    eval("end_$tag");   =20
}

sub start_header{
    my ($self,$attr, $attrseq, $origtext) =3D @_;
    $takecare=3D1;     =20
}

sub end_header{
    my($self, $tag) =3D @_;
    if($tempcare ne ""){
      main::lagra("header", "$tempcare ");
    }
    $takecare=3D0;
    $tempcare =3D ""; #/!!fult att beh=F6va mollst=E4lla s=E5 h=E4r      =
     =20
}

sub start_h1{my ($self,$attr, $attrseq, $origtext) =3D
@_;start_header($self,$attr, $attrseq, $origtext);}
sub start_h2{my ($self,$attr, $attrseq, $origtext) =3D
@_;start_header($self,$attr, $attrseq, $origtext);}
sub start_h3{my ($self,$attr, $attrseq, $origtext) =3D
@_;start_header($self,$attr, $attrseq, $origtext);}
sub start_h4{my ($self,$attr, $attrseq, $origtext) =3D
@_;start_header($self,$attr, $attrseq, $origtext);}
sub start_h5{my ($self,$attr, $attrseq, $origtext) =3D
@_;start_header($self,$attr, $attrseq, $origtext);}

sub end_h1{my($self, $tag) =3D @_;end_header($self, $tag);}
sub end_h2{my($self, $tag) =3D @_;end_header($self, $tag);}
sub end_h3{my($self, $tag) =3D @_;end_header($self, $tag);}
sub end_h4{my($self, $tag) =3D @_;end_header($self, $tag);}
sub end_h5{my($self, $tag) =3D @_;end_header($self, $tag);}

sub start_hr{
    main::lagra("text","\n");
}

sub start_p{
    main::lagra("text","\n");
}

sub start_meta{
    my ($self,$attr, $attrseq, $origtext) =3D @_;
    my $tmp;
   =20
    while (($key,$value) =3D each (%$attr)) {
	if(!($key=3D~/name/i)){
	    $tmp =3D $value;
	}
    }
    #print $attr->{'name'};
  main::lagra($attr->{'name'},$tmp);
}=20

sub start_script{
    my ($self,$attr, $attrseq, $origtext) =3D @_;
    main::lagra("script","+");
}