#!/Perl
###########################################################################
# p2h02.pl - 21 April, 2006 - Geoff McLane
#
# Another attempt at 'converting' perl scripts to a colour coded HTML page.
# The previous attempt got too unwieldy - abandoned at p2html12.pl ...
# This works on a line by line, character by character, decode,
# and colour encode ... a modest file can grow to 4 or more times
# its original size ... adding colour coding COSTS!
#
# NOTE: While this conversion to coloured HTML produces a 'pretty
# picture' of the original perl script file, often it can NOT be copied
# exactly by others. Asside from some big spacing differences, entities
# such as $tok .= '&amp;'; will NOT translate correctly. In a copy-and-
# paste operation, this will be 'translated' as $tok .= '&';, which
# produces ERRANT perl code! Other ERRORS are $ln =~ s/</&lt;/g;
# will become $ln =~ s/</</g; which does NOTHING!!
#
# If you want to SHARE your perl script, then you MUST also place the
# actual script, perhaps in a TEXT (.txt) file, on the web for example.
# I only use it to convert code fragments, with pretty colouring, on
# example pages.
#
# Also it still has some other foibles ;=))
#
# Coding like $cond{$#value} can be taken as a comment from the #
# in SOME cases, but maybe most are fixed ... and other unescaped #
# can likewise go wrong ...
#
# The best thing is does is correctly handle such things as
# print <<"EOF";, placing all the following text in one colour, until EOF
# and even my $help = <<EOH; is greyed until EOH
#
# There is presently some slight miss-indenting, as all tabs are converted
# to 3 spaces, so lines with a say 4 spaces, will be different to lines
# with tab ... $tab_space can be adjust below. The only auto-type
# solution would be to pre-process the lines, and try to make a tab-stop
# decision, but that is a lot of extra work ;=()
#
# It presently has NO input command - you have to manually adjust the
# $in_file variable to the file you want decode. Likewise with the
# htm $out_file, and $log_file ...
#
# NOTE: The 'reserved words' and 'builtin functions' can come from the
# perl.stx file of EditPlus 2 - http://www.editplus.com/ - This makes the
# load flexible, as the perl.stx file can be adjusted as desired.
# Without this, you can define $use_local and the local list will
# be used. 
#
# I have needlessly included Time::HiRes to give an indication of how
# long the processing took, but usually I can 'see' it takes longer
# than the very minimal time elapsed ... If you do not have this
# module, then these time references can be commented out.
#
# The CSS class names, and colours can be changed via the set of 'class'
# and 'color' variables, $a_class, $a_color, $b_class, $b_color, etc.
# And of course the head and ending of the HTML document can be modified
# as desired.
#
# There are a couple of DEBUG switch. The $debug_on immencely increases
# the output, but can often aid is 'seeing' and 'understanding' the code
# path taken ... $add_chart adds a colour chart at the end of the
# document, together with some stats on colour use ... and the increased
# size of the document.
#
###########################################################################
use Time::HiRes qw(usleep ualarm gettimeofday tv_interval nanosleep );
use strict;

# USER VARIABLES
my $out_file = "tempout01.htm";
my $log_file = "tempp2h01.txt";
###my $in_file = "am2dsp5.pl";
###my $in_file = "plandevol-eng.pl";
my $in_file = "p2h02.pl";
###my $in_file = 'temptest.pl';

my $perlstx = 'C:/Program Files/EditPlus 2/perl.stx'; # fix location - or use local list!
my $tab_space = '   '; # note tabs to 3 spaces - change if desired
my $use_local = 0; # set 1 to local internal lists
my $debug_on = 0; # heavy DEBUG ONLY output
my $add_chart = 0; # add colour chart at end, with document stats
my $out_lists = 0; # output the lists in qw form
my $brown_qw = 1; # to process a qw(...);
my $add_table = 1; # use table to outline code

# set the CLASS and COLOUR strings
my $a_class = 'a';
my $b_class = 'b';
my $c_class = 'c';
my $d_class = 'd';
my $e_class = 'e';
my $f_class = 'f'; #{ color:#666666; }
my $o_class = 'o'; #{ color:#FFA500; }
my $v_class = 'v'; #{ color:#808000; }
my $t_class = 't'; #{ color:#006600; }
my $a_color = 'red';
my $b_color = '#006666';
my $c_color = 'blue';
my $d_color = 'brown';
my $e_color = '#00008B';
my $f_color = '#666666';
my $o_color = '#FFA500';
my $v_color = '#808000';
my $t_color = '#006600';

# PROGRAM VARIABLES
my ($LF, $OF);
my @ResWords = ();
my @BuiltIns = ();

# TIME VARIABLES
my ($t0, $t1, $elapsed);
$t0 = [gettimeofday];

# load perl.stx file, or use local list
if ($use_local) {

@ResWords = qw(continue do else elsif for foreach goto if last local lock map my next package redo 
require return sub unless until use while STDIN STDOUT STDERR ARGV ARGVOUT ENV INC SIG TRUE FALSE __FILE__ 
__LINE__ __PACKAGE__ __END__ __DATA__ lt gt le ge eq ne cmp x not and or xor q qq qx qw $ @ % );

@BuiltIns = qw(abs accept alarm atan2 bind binmode bless caller chdir chmod chomp chop chown chr 
chroot close closedir connect cos crypt dbmclose dbmopen defined delete die dump each eof eval exec exists 
exit exp fcntl fileno flock fork format formline getc getlogin getpeername getpgrp getppid getpriority 
getpwnam getgrnam gethostbyname getnetbyname getprotobyname getpwuid getgrgid getservbyname gethostbyaddr 
getnetbyaddr getprotobynumber getservbyport getpwent getgrent gethostent getnetent getprotoent 
getservent setpwent setgrent sethostent setnetent setprotoent setservent endpwent endgrent endhostent 
endnetent endprotoent endservent getsockname getsockopt glob gmtime grep hex import index int ioctl 
join keys kill lc lcfirst length link listen localtime log lstat mkdir msgctl msgget msgsnd msgrcv no oct 
open opendir ord pack pipe pop pos print printf prototype push quotemeta rand read readdir readlink recv 
ref rename reset reverse rewinddir rindex rmdir scalar seek seekdir select semctl semget semop send setpgrp 
setpriority setsockopt shift shmctl shmget shmread shmwrite shutdown sin sleep socket socketpair sort 
splice split sprintf sqrt srand stat study substr symlink syscall sysopen sysread sysseek system syswrite 
tell telldir tie tied time times truncate uc ucfirst umask undef unlink unpack untie unshift utime values 
vec wait waitpid wantarray warn write );

} 

my @lines = ();
my $line = '';
my $last_builtin = '';
my $last_resword = '';
my $doc_total = 0;
my $out_total = 0;
# these are really just DEBUG counters
my $a_cnt = 0;
my $b_cnt = 0;
my $c_cnt = 0;
my $d_cnt = 0;
my $e_cnt = 0;
my $f_cnt = 0;
my $o_cnt = 0;
my $v_cnt = 0;
my $q_cnt = 0;

open $LF, ">$log_file" or die "ERROR: Unable to open LOG file $log_file ... aborting ...\n";
if ( ! $use_local) {
	load_stx_file( $perlstx );
}
prt( "Got ".scalar @ResWords." Reserved Words, and ".scalar @BuiltIns." Built-in functions ...\n" );
process_file( $in_file ); # main processing of the file lines
prt( "Got ".scalar @lines." new lines ...\n" );
write_out_file(); # write out results, using HTML format ...

$t1 = [gettimeofday];
$elapsed = tv_interval ( $t0, $t1 );
prt( "$0 processing took $elapsed seconds ...\n" );

close($LF);
exit 0;

#######################
### only subs below ###
#######################

sub write_out_file {
	# this is what it is all about - to generate a HTML document
	open $OF, ">$out_file" or die "ERROR: Unable to create $out_file ... aborting ...\n";

	print $OF <<"EOF";
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>$in_file - Generated HTML from Perl Script</title>
<meta http-equiv="Content-Language" content="en-au">
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
<style type="text/css">
<!-- /* Style Definitions */
body {
margin-left:1cm;
margin-right:1cm;
margin-top:0cm;
margin-bottom:0cm;
font-family: Courier New;
font-size: 10pt; 
}
.$a_class { color:$a_color; }
.$b_class { color:$b_color; }
.$c_class { color:$c_color; }
.$d_class { color:$d_color; }
.$e_class { color:$e_color; }
.$f_class { color:$f_color; }
.$o_class { color:$o_color; }
.$v_class { color:$v_color; }
.$t_class { color:$t_color; }
-->
</style>
</head>
<body>

EOF

	print $OF "<p>$in_file to HTML.<br>\n";

	if ($add_table) {
		print $OF '<table width="100%" border="4"><tr><td>'."\n";
	}
	# actual output of generated lines
	foreach $line (@lines) {
		$out_total += length($line);
		print $OF $line;
	}

	if ($add_table) {
		print $OF '</td></tr></table>'."\n";
	}

	if ($add_chart) {
		# mainly only for DEBUG
		print $OF <<"EOF";
Colour Chart<br>
<span class="$a_class">Class '$a_class'  - $a_color RED       $a_cnt</span><br>
<span class="$b_class">Class '$b_class'  - $b_color BLUEGREEN $b_cnt</span><br>
<span class="$c_class">Class '$c_class'  - $c_color BLUE      $c_cnt</span><br>
<span class="$d_class">Class '$d_class'  - $d_color BROWN     $d_cnt</span><br>
<span class="$e_class">Class '$e_class'  - $e_color DARKBLUE  $e_cnt</span><br>
<span class="$f_class">Class '$f_class'  - $f_color GREY      $f_cnt</span><br>
<span class="$o_class">Class '$o_class'  - $o_color ORANGE    $o_cnt</span><br>
<span class="$v_class">Class '$v_class'  - $v_color OLIVE     $v_cnt</span><br>
<span class="$t_class">Class '$t_class'  - $t_color GREEN     $q_cnt</span><br>
End of chart<br>
EOF
		my $tot = ($a_cnt+$b_cnt+$c_cnt+$d_cnt+$e_cnt+$f_cnt+$o_cnt+$v_cnt+$q_cnt);
		my $diff = $out_total - $doc_total;
		print $OF "This use of $tot colour span sequences added $diff bytes. In=$doc_total Out=$out_total<br>\n";
	}

	print $OF 'Generated: ' . localtime(time()) . " from $in_file.<br>\n";

	print $OF "</body>\n";
	close($OF);
}

sub add_red {
	my ($t) = shift;
	$a_cnt++;
	return ('<span class="'.$a_class.'">'.$t.'</span>');
}

sub add_class_b {
	my ($t) = shift;
	$b_cnt++;
	return ('<span class="'.$b_class.'">'.$t.'</span>');
}

sub add_blue {
	my ($t) = shift;
	$c_cnt++;
	return ('<span class="'.$c_class.'">'.$t.'</span>');
}

sub add_class_d {
	my ($t) = shift;
	$d_cnt++;
	return ('<span class="'.$d_class.'">'.$t.'</span>');
}

sub add_class_e {
	my ($t) = shift;
	$e_cnt++;
	return ('<span class="'.$e_class.'">'.$t.'</span>');
}

sub add_class_f {
	my ($t) = shift;
	$f_cnt++;
	return ('<span class="'.$f_class.'">'.$t.'</span>');
}

sub add_class_o {
	my ($t) = shift;
	$o_cnt++;
	return ('<span class="'.$o_class.'">'.$t.'</span>');
}

sub add_class_v {
	my ($t) = shift;
	$v_cnt++;
	return ('<span class="'.$v_class.'">'.$t.'</span>');
}

sub add_quote {
	my ($t) = shift;
	$q_cnt++;
	return ('<span class="'.$t_class.'">'.$t.'</span>');
}


sub in_res_words {
	my ($t) = shift;
	foreach my $rw (@ResWords) {
		if ($t eq $rw) {
			$last_resword = $rw;
			return 1;
		}
	}
	return 0;
}

sub in_built_in {
	my ($t) = shift;
	foreach my $rw (@BuiltIns) {
		if ($t eq $rw) {
			$last_builtin = $rw;
			return 1;
		}
	}
	return 0;
}

sub is2lt {
	my $t = shift;
	$t =~ s/&lt;/</g;
	if ( (length($t) >= 2 ) && ( $t =~ /<<$/ ) ) {
		return 1;
	}
	return 0;
}

sub sans_quotes {
	my $t = shift;
	$t =~ s/\"//g;
	return $t;
}

sub process_file {
	my ($in_file) = shift;
	my ($IF);
	my ($ch1,$ch2,$ch3,$ch4);
	open $IF, "<$in_file" or die "ERROR: Unable to open $in_file ... aborting ...\n";
	my @lns = <$IF>; # slurp into line array
	close($IF);
	prt( "Got ".scalar @lns." to process ...\n" );
	my $st = 0; # current status
	my $nst = 0;
	my $pc = '';
	my $ch = '';
	my $tok = '';
	my $ltok = ''; # last token
	my $ltok1 = '';
	my $ltok2 = '';
	my $qtok = ''; # print <<"EOF" token
	my $i = 0;
	foreach my $ln (@lns) {
		$doc_total += length($ln);
		chomp $ln;
		$ln =~ s/\r$//; # and remove CR, if present
		$ln =~ s/</&lt;/g; # make sure all '<' is/are swapped out
		###$ln =~ s/>/&gt;/g; # forget greater than!!!
		my $len = length($ln);
		my $nline = '';
		prt( "line=[$ln] ...\n" ) if $debug_on;
		$pc = '';
		$tok = '';
		$ltok = ''; # last token
		$ltok1 = ''; # token stack
		$ltok2 = '';
		$i = 0;
		$nst = 0; # if fall through, next status is IN space
		if ($st == 3) {
			# locked in a 'print' string to end token
			$nline = add_class_f($ln);
			$nline .= "<br>\n";
			push(@lines, $nline);
			if ($ln =~ /^$qtok/) {
				$st = 0;
			}
			next;
		} elsif ($st == 4) {
			# processing a 'qw' block - only if $brown_qw is ON
			$tok = '';
			for ( ; $i < $len; $i++) {
				$ch = substr($ln, $i, 1);
				if ($ch eq ')') {
					$nline .= add_class_d($tok) if (length($tok));
					# $nline .= $ch; # leave for fall through
					$tok = '';
					last;
				}
				$tok .= $ch;
			}
			if ($i < $len) {
				$nst = 2; # fall through to continue line
			} else {
				$nline = add_class_d($ln);
				$nline .= "<br>\n";
				push(@lines, $nline);
				next;
			}
		}
		$st = $nst;
		for ( ; $i < $len; $i++) {
			$ch = substr($ln, $i, 1);
			# make a BIG exception of '&lt;' ...
			if (($ch eq '&') && (($i + 3) < $len)) {
				$ch1 = substr($ln, $i, 4);
				if ($ch1 eq '&lt;') {
					$tok .= $ch1;
					$i += 3;
					$st = 2;
					$pc = ';';
					next;
				}
			}
			if ($st == 0) {
				# IN white space territory
				if ($ch =~ /\S/) {
					prt( "IN s, changed to NOT white space with [$ch] ...tok=[$tok]\n" ) if $debug_on; 
					if ((length($nline) == 0) && length($tok)) {
						$tok =~ s/\t/$tab_space/g; # TAB CONVERSION
						$tok =~ s/\s/&nbsp;/g; # SPACE TO &nbsp; CONVERSION
						$tok .= ' '; # and ADD ONE SPACE
						prt( "converted leading space t tok=[$tok]...\n" ) if $debug_on; 
					}
					$nline .= $tok; # add any white space to new line
					$ltok2 = $ltok1;
					$ltok1 = $ltok;
					$ltok = $tok;
					$tok = '';
					if ($pc ne '\\') {
						if ($ch eq '#') {
							# start of a COMMENT
							$tok = substr($ln, $i); 
							$nline .= add_class_b($tok);
							$tok = '';
							$st = 0;
							last;
						} elsif (($ch eq '"')||($ch eq "'")) {
							my $bch = $ch;
							$tok = $ch;
							$i++;
							for ( ; $i < $len; $i++ ) {
								$ch = substr($ln, $i, 1);
								if ($pc ne '\\') {
									if ($ch eq $bch) {
										$tok .= $ch;
										$tok =~ s/\"/&quot;/g;
										$nline .= add_quote($tok);
										$tok = '';
										$pc = $ch;
										last;
									}
								}
								$tok .= $ch;
								$pc = $ch;
							}
							$pc = $ch;
							next;
						}
					}
					$tok = $ch;
					if ($ch =~ /\w/) {
						$st = 1;
					} else {
						if ($ch eq '&') {
							$tok = '&amp;';
						}
						$st = 2;
					}
					$pc = $ch;
					next;
				} else {
					# staying in white space
					$tok .= $ch;
					$pc = $ch;
					next;
				}
			} elsif ($st == 1) {
				# dealing with alphanumberic + _
				if ($ch =~ /\w/) {
					$tok .= $ch;
					$pc = $ch;
					next; # continue alphanumeric + _
				}
				prt( "IN an_, no longer an_ with [$ch] ... tok=[$tok]\n" ) if $debug_on;
				if (length($tok)) {
					if (in_res_words($tok) ) {
						$nline .= add_blue($tok);
						if ($brown_qw && ($ch eq '(') && ($last_resword eq 'qw')) {
							prt( "Excepting a qw list ...\n" ) if $debug_on;
							$i++;
							$nline .= $ch;
							$tok = '';
							for ( ; $i < $len ; $i++) {
								$ch = substr($ln,$i,1);
								if ($ch eq ')') {
									$nline .= add_class_d($tok) if (length($tok));
									$nline .= $ch;
									$tok = '';
									last;
								}
								$tok .= $ch;
							}
							if ($i < $len) {
								next;
							} # else, we have ended the line, still in a 'qw' ...
							$nline .= add_class_d($tok) if (length($tok));
							$tok = '';
							$st = 4;
							last; # end of THIS line
						}
					} elsif (in_built_in($tok)) {
						$nline .= add_red($tok);
					} else {
						$nline .= $tok;
					}
					$ltok2 = $ltok1;
					$ltok1 = $ltok;
					$ltok = $tok;
				}
				$tok = $ch;
				if ($ch =~ /\s/) {
					$st = 0; # goto SPACE mode
				} elsif ($ch =~ /\w/) {
					$st = 1; # goto AN_ mode
				} else {
					$st = 2; # goto NOT SPACE or AN_ mode
				}
				$pc = $ch;
				next;
			} elsif ($st == 2) {
				# not IN space or IN an_
				if ($ch =~ /\s/) {
					prt( "change back to space with [$ch] ... tok=[$tok]\n" ) if $debug_on;
					$nline .= $tok;
					$ltok2 = $ltok1;
					$ltok1 = $ltok;
					$ltok = $tok;
					$tok = $ch;
					$st = 0;
					$pc = $ch;
					next;
				} elsif ($ch =~ /\w/) {
					prt( "change back to an_ with [$ch] ... tok=[$tok]\n" ) if $debug_on;
					if ( is2lt($tok) ) {
						$ch1 = substr($ln,$i); # get balance of line
						$ch1 =~ s/\s+$//; # remove any trailing white space
						if ( ($ch1 =~ /;$/) && ($ltok =~ /=/) ) {
							$ch1 =~ s/^\s+//; # remove any leading spaces
							$ch1 =~ s/;$//; # remove colon
							$ch1 =~ s/\s+$//; # now again remove any trailing white space
							if ( !($ch1 =~ /\s/) ) {
								$qtok = $ch1; # STORE THE END MARKER !!!
								prt( "Got <<EOH type tok[$tok] $ch1 ltok[$ltok] ltok1[$ltok1] ltok2[$ltok2]... qtok[$qtok]\n" ) if $debug_on;
								$nline .= $tok;
								$tok = '';
								$nline .= substr($ln,$i); # get balance of line
								$st = 3;
								last; # done this line
							}
						}
					}

					if (($tok eq '$')||($tok eq '@')||($tok eq '%')) {
						$tok .= $ch;
					} else {
						$nline .= $tok;
						$ltok2 = $ltok1;
						$ltok1 = $ltok;
						$ltok = $tok;
						$tok = $ch;
					}
					$st = 1;
					$pc = $ch;
					next;
				}
				###if (($pc ne '\\') && (($ch eq '#') || ($ch eq '"') || ($ch eq "'"))) {
				if (($pc ne '\\') && ((($ch eq '#')&&($pc ne '$')) || ($ch eq '"') || ($ch eq "'"))) {
					$nline .= $tok; # add in current token
					$ltok2 = $ltok1;
					$ltok1 = $ltok;
					$ltok = $tok;
					$tok = '';
					if ($ch eq '#') {
						# start of a COMMENT
						$tok = substr($ln, $i); 
						$nline .= add_class_b($tok);
						$tok = '';
						$st = 0;
						last;
					} elsif (($ch eq '"')||($ch eq "'")) {
						my $bch = $ch;
						$tok = $ch;
						$i++;
						for ( ; $i < $len; $i++ ) {
							$ch = substr($ln, $i, 1);
							if ($pc ne '\\') {
								if ($ch eq $bch) {
									$tok .= $ch;
									$qtok = sans_quotes($tok);
									$tok =~ s/\"/&quot;/g;
									$nline .= add_quote($tok);
									$tok = '';
									$pc = $ch;
									last;
								}
							}
							$tok .= $ch;
							$pc = $ch;
						}
						# check for 'print ... <<"EOF";'
						if ( ($i < $len) && ($last_builtin eq 'print') && (length($ltok) >= 2) && is2lt($ltok) && length($qtok) ) {
							prt( "Got $last_builtin ltok[$ltok] qtok[$qtok] ...\n" ) if $debug_on;
							$i++;
							$nline .= substr($ln,$i);
							$tok = '';
							$st = 3;
							last; # done this line
						}
						$pc = $ch;
						next;
					}
				}

				if ($ch eq '&') {
					$ch = '&amp;';
				}
				$tok .= $ch;
			}
			$pc = $ch;
		}
		$nline .= $tok;
		$nline .= "<br>\n";
		push(@lines, $nline);
	}
}

sub trim_line {
	my ($l) = shift;
	chomp $l;
	$l =~ s/\r$//; # and remove CR, if present
	$l =~ s/\t/ /g;
	$l =~ s/\s\s/ /g while ($l =~ /\s\s/);
	$l = substr($l,1) while ($l =~ /^\s/);
	$l = substr($l,0,length($l)-1) while (($l =~ /\s$/)&&(length($l)));
	return $l;
}

sub load_stx_file {
	my ($in_file) = shift;
	my ($IF);
	my @stx = ();
	my %dchk = ();
	open $IF, "<$in_file" or die "ERROR: Unable to open $in_file ... aborting ...\n";
	@stx = <$IF>; # slurp entire file into array
	close($IF);
	my $scnt = scalar @stx;
	prt( "Got $scnt lines in $in_file to process ...\n" );
	my $st = 0;
	foreach my $ln (@stx) {
		my $tln = trim_line($ln);
		my $ll = length($tln);
		next if ($ll == 0);
		if( $tln =~ /^\#KEYWORD=Reserved words/ ) {
			$st = 1;
			next;
		} elsif ($tln =~ /^\#KEYWORD=Built-in functions/ ) {
			$st = 2;
			next;
		} elsif (($tln =~ /^\#/) || ($tln =~ /^;/)) {
			$st = 0;
			next;
		}

		if (exists $dchk{$tln}) {
			prt( "Warning: Avoiding duplicate of [$tln] ...\n" );
			next;
		}
		$dchk{$tln} = 1;

		if( $st == 1 ) {
			push(@ResWords, $tln);
		} elsif ($st == 2) {
			push(@BuiltIns, $tln);
		}
	}

	if ($out_lists) {
		my $max = 85;
		my $cnt = 20;
		prt( 'my @ResWords = qw(' );
		foreach my $ln (@ResWords) {
			prt( $ln.' ' );
			$cnt += length($ln);
			if ($cnt > $max) {
				prt("\n");
				$cnt = 0;
			}
		}
		prt( ");\n" );
		$cnt = 20;
		prt( 'my @BuiltIns = qw(' );
		foreach my $ln (@BuiltIns) {
			prt( $ln.' ' );
			$cnt += length($ln);
			if ($cnt > $max) {
				prt("\n");
				$cnt = 0;
			}
		}
		prt( ");\n" );
	}
}


sub prt {
	my ($m) = shift;
	print $m;
	print $LF $m;
}

