# Routines for HTML to ASCII. # (fixed width font, no font changes for size, bold, etc) with a little # BUGS AND MISSING FEATURES # font tags (e.g. CODE, EM) cause an extra whitespace # e.g. foo, -> foo , # Jim Davis July 15 1994 # modified 3 Aug 94 to support MENU and DIR require "tformat.pl" || die "Could not load tformat.pl: $@\nStopped"; # Can be set by command line arg if (! defined($columns_per_line)) { $columns_per_line = 72;} if (! defined($flush_last_page)) { $flush_last_page = 1;} # amount to add to indentation $indent_left = 5; $indent_right = 5; # ignore contents inside HEAD. $ignore_text = 0; # Set variables in tformat $left_margin = 1; $right_margin = $columns_per_line; $bottom_margin = 0; ## Routines called by html.pl $Begin{"HEAD"} = "begin_head"; $End{"HEAD"} = "end_head"; sub begin_head { local ($element, $tag) = @_; $ignore_text = 1;} sub end_head { local ($element) = @_; $ignore_text = 0;} $Begin{"BODY"} = "begin_document"; sub begin_document { local ($element, $tag) = @_; &start_page();} $End{"BODY"} = "end_document"; sub end_document { local ($element) = @_; &fresh_line();} ## Headers $Begin{"H1"} = "begin_header"; $End{"H1"} = "end_header"; $Begin{"H2"} = "begin_header"; $End{"H2"} = "end_header"; $Begin{"H3"} = "begin_header"; $End{"H3"} = "end_header"; $Begin{"H4"} = "begin_header"; $End{"H4"} = "end_header"; $Skip_Before{"H1"} = 1; $Skip_After{"H1"} = 1; $Skip_Before{"H2"} = 1; $Skip_After{"H2"} = 1; $Skip_Before{"H3"} = 1; $Skip_After{"H3"} = 0; sub begin_header { local ($element, $tag) = @_; &skip_n_lines ($Skip_Before{$element}, 5);} sub end_header { local ($element) = @_; &skip_n_lines ($Skip_After{$element});} $Begin{"BR"} = "line_break"; sub line_break { local ($element, $tag) = @_; &fresh_line();} $Begin{"P"} = "begin_paragraph"; # if fewer than this many lines left on page, start new page $widow_cutoff = 5; sub begin_paragraph { local ($element, $tag) = @_; &skip_n_lines (1, $widow_cutoff);} $Begin{"BLOCKQUOTE"} = "begin_blockquote"; $End{"BLOCKQUOTE"} = "end_blockquote"; sub begin_blockquote { local ($element, $tag) = @_; $left_margin += $indent_left; $right_margin = $columns_per_line - $indent_right; &skip_n_lines (1);} sub end_blockquote { local ($element) = @_; $left_margin -= $indent_left; $right_margin = $columns_per_line; &skip_n_lines (1);} $Begin{"PRE"} = "begin_pre"; $End{"PRE"} = "end_pre"; sub begin_pre { local ($element, $tag) = @_; $whitespace_significant = 1;} sub end_pre { local ($element) = @_; $whitespace_significant = 0;} $Begin{"HR"} = "horizontal_rule"; sub horizontal_rule { local ($element, $tag) = @_; &fresh_line (); &print_n_chars ($right_margin - $left_margin, "-");} # Add code for IMG (use ALT attribute) # Ignore I, B, EM, TT, CODE (no font changes) ## List environments $Begin{"UL"} = "begin_itemize"; $End{"UL"} = "end_list_env"; $Begin{"OL"} = "begin_enumerated"; $End{"OL"} = "end_list_env"; $Begin{"MENU"} = "begin_menu"; $End{"MENU"} = "end_list_env"; $Begin{"DIR"} = "begin_dir"; $End{"DIR"} = "end_list_env"; $Begin{"LI"} = "begin_list_item"; # application-specific initialization routine sub html_begin_doc { @list_stack = (); $list_type = "bullet"; $list_counter = 0;} sub push_list_env { push (@list_stack, join (":", $list_type, $list_counter));} sub pop_list_env { ($list_type, $list_counter) = split (":", pop (@list_stack)); $left_margin -= $indent_left;} sub begin_itemize { local ($element, $tag) = @_; &push_list_env(); $left_margin += $indent_left; $list_type = "bullet"; $list_counter = "*";} sub begin_menu { local ($element, $tag) = @_; &push_list_env(); $left_margin += $indent_left; $list_type = "bullet"; $list_counter = "*";} sub begin_dir { local ($element, $tag) = @_; &push_list_env(); $left_margin += $indent_left; $list_type = "bullet"; $list_counter = "*";} sub begin_enumerated { local ($element, $tag) = @_; &push_list_env(); $left_margin += $indent_left; $list_type = "enumerated"; $list_counter = 1;} sub end_list_env { local ($element) = @_; &pop_list_env(); # &fresh_line(); } sub begin_list_item { local ($element, $tag) = @_; $left_margin -= 2; &fresh_line(); &print_word_wrap("$list_counter "); if ($list_type eq "enumerated") {$list_counter++;} $left_margin += 2;} $Begin{"DL"} = "begin_dl"; sub begin_dl { local ($element, $tag) = @_; &skip_n_lines(1,5);} $Begin{"DT"} = "begin_defined_term"; $Begin{"DD"} = "begin_defined_definition"; $End{"DD"} = "end_defined_definition"; sub begin_defined_term { local ($element, $tag) = @_; &fresh_line();} sub begin_defined_definition { local ($element, $tag) = @_; $left_margin += $indent_left; &fresh_line();} sub end_defined_definition { local ($element) = @_; $left_margin -= $indent_left; &fresh_line();} $Begin{"META"} = "begin_meta"; # a META tag sets a value in the assoc array %Variable # i.e. sers $Variable{author} to "Rushdie" sub begin_meta { local ($element, $tag, *attributes) = @_; local ($variable, $value); $variable = $attributes{name}; $value = $attributes{content}; $Variable{$variable} = $value;} $Begin{"IMG"} = "begin_img"; sub begin_img { local ($element, $tag, *attributes) = @_; &print_word_wrap (($attributes{"alt"} ne "") ? $attributes{"alt"} : "[IMAGE]");} # Content and whitespace. sub html_content { local ($string) = @_; unless ($ignore_text) { &print_word_wrap ($string);}} sub html_whitespace { local ($string) = @_; if (! $whitespace_significant) { die "Internal error, called html_whitespace when whitespace was not significant";} local ($i); for ($i = 0; $i < length ($string); $i++) { &print_whitespace (substr($string,$i,1));}} # called by tformat. Do nothing. sub do_footer { } sub do_header { } 1;