Routines for processing anchors

James Casey (casey@ptsun00.cern.ch)
Mon, 01 Aug 1994 17:25:54 +0200


Here is a set of routines for splitting out anchors from a HTML document and 
processing them one by one. 

james.

----- CUT HERE ----
;#
;# PACKAGE:
;#	wwwhtml
;#
;# AUTHOR: 
;#	James Casey <casey@ptsun00.cern.ch>
;#
;# DESCRIPTION:
;#	A set of routines for dealing with anchors...
;#	next_anchor : returns next anchor in a section of HTML
;#	parse_anchor : break up an anchor into tags
;#	create_anchor : recompose a set of tags into an anchor
;#
;# EXAMPLE:
;#
;#	do{     
;#		local(*Tags);
;# 
;#		($Tstart,$Tanchor,$Trest)=&wwwhtml'next_anchor($Trest);
;# 
;#		&output_html($Tstart);
;# 
;#		&wwwhtml'parse_anchor($Tanchor,*Tags);   
;# 
;#		;# ok, got the tags, now process them...
;#
;#		$Tanchor=&wwwhtml'create_anchor(*Tags) if $Tanchor ne "";       
;#		&output_html($Tanchor);
;#	} while ($Tanchor ne "");
;#	&output_html($Trest);
;#
;#

sub next_anchor{
;#
;# sub:	($start,$anchor,$rest)=&next_anchor($content);
;# 
;# Given a section of HTML in $content, will split at next anchor, returning
;# all text before the anchor in $start, the anchor itself in $anchor, and the
;# rest of the file in $rest.
;# If no anchor exixts, returns (start,"","") where $start contains the whole 
;# section of HTML.
;#

	local($_)=@_;
	local($Anchor,$Start,$Rest)=("","","");


	if (! /(<\s*a\s+[^>]*>)/i){;# no anchor found
		return ($Rest,"","");
	}
	
	;# anchor found
	$Start=$`;
	$Anchor=$&;
	$Rest=$';

	$Start =~ s/(\W)/\\$1/g;# protect any meta-chars in $Start...
	$Anchor=~ s/$Start//ig;# strip off start....
	$Anchor=~ s/>[\s\S]*/>/i;# and end.

	return ($Start,$Anchor,$Rest);
}

sub parse_anchor{
;#
;# USAGE:
;#		$Error=&parse_anchor($anchor,*tags)
;# where $anchor is of form <A .... ID="val"... >
;# and tag-value pairs are returned in %tags
;#

	local($Anchor,*Tags)=@_;
	local($Tag,$Id,$Value);

	$_=$Anchor;

	s/\n//g;# strip out newlines
	s/<\s*A(.*)>/\1/i;# strip off <A ...  >
	s/\s*=\s*/=/gi;# strip out spaces around '='s

	@Tags=split(' ');
	foreach $Tag (@Tags){
		($Id,$Value)=split ('=',$Tag);
		$Value=~ s/"//gi;# strip "'s from $Value
		$Id=~ tr/a-z/A-Z/;# convert $Id's to uppercase
		$Tags{$Id}=$Value;
	}
}

sub create_anchor{
;# USAGE:
;#	$anchor=&create_anchor(*tags);
;#
;# This takes an associative array of tags, and creates and anchor from it.
;#
;# It returns an anchor.

	local(*Tags)=@_;
	local($Id,$Anchor);

	$Anchor="<A ";
	foreach $Id (sort keys %Tags){
		$Anchor .= "$Id=\"$Tags{$Id}\" ";
	}
	$Anchor .=">";

	$Anchor;
}