#!/usr/bin/perl -w
# NAME: sh2htm.pl
# AIM: Parse a shell script, and render in HTML
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use Cwd;
my $perl_dir = 'C:\GTools\perl';
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' ...\n";
# log file stuff
our ($LF);
my ($OF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $perl_dir."\\temp.$pgmname.txt";
open_log($outfile);

# user variables
my $VERS = "0.0.1 2011-08-02";
my $out_file = $perl_dir."\\tempsh2htm.htm";
my $load_log = 0;
my $in_file = '';

# set the CLASS and COLOUR strings
my $a_class = 'a'; # built-in function (red)
my $b_class = 'b'; # comments (#006666)
my $c_class = 'c'; # reserved words (blue)
my $d_class = 'd'; # inside qw(...)
my $e_class = 'e'; # $scalar (#9400d3)
my $f_class = 'f'; # in <<EOF...EOF block (#666666)
my $o_class = 'o'; # @array  (#008b8b - was #FFA500)
my $v_class = 'v'; # %hash (#a52a2a - was #808000)
my $t_class = 't'; # quoted - single and double (#006600)
# this is an extract from my perl.css file
#.bif { color: #ff0000 } /* a built-in functions */
#.com { color: #008000 } /* b comments after # */
#.rw { color: #0000cd }  /* c reserved words */
# no d
#.sca { color: #9400d3 } /* e scalar variables */
# no f
#.arr { color: #008b8b } /* o array variables */
#.has { color: #808000 } /* v hash variables */
#.qot { color: #009900 } /* t quoted items */

my $a_color = 'red';
my $b_color = '#006666';
my $c_color = 'blue';
my $d_color = '#a52a2a';
#my $e_color = '#00008B';
my $e_color = '#9400d3';
my $f_color = '#666666';
my $o_color = '#ffa500';
#my $o_color = '#008b8b';
my $v_color = '#808000';
my $t_color = '#006600';

# other USER variables
my $tab_space = '   '; # note tabs to 3 spaces - change if desired
# some USER OPTIONS
my $add_chart = 0; # add colour chart at end, with document stats
my $brown_qw = 1; # to process a qw(...);
# these a mutually exclusive - either or ...
my $add_table = 0; # use table to outline code
my $add_pre = 1; # use a <pre>...</pre> block
# this option REALLY adds weight to certain files
my $add_uvars = 1; # colour code user variables
# this load the output result into a browser
my $load_html = 0; # load the final HTML

my $debug_on = 0;
my $def_file = 'tempsh2.sh';
#degug
my $dbg_01 = 0; # Show conversion to HTML

### program variables
my @warnings = ();
my $cwd = cwd();
my $os = $^O;
my @html_lines = ();
my $out_total = 0;
my $doc_total = 0;
# these are really just DEBUG counters
my $a_cnt = 0;
my $b_cnt = 0;
my $c_cnt = 0;
my $d_cnt = 0;
my $e_cnt = 0;
my $f_cnt = 0;
my $o_cnt = 0;
my $v_cnt = 0;
my $q_cnt = 0;

my @ResWords = qw( if then fi while do done );
my @BuiltIns = qw( echo read );
my ($last_resword,$last_builtin);

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        ###prt( "\nNo warnings issued.\n\n" );
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

#######################
### only subs below ###
#######################
sub get_code_section() {
    my $code = <<EOF;

  <p class="nom">
   code:
  </p>

EOF
    return $code;
}

sub add_color_chart {
   my ($fh) = shift;
     print $fh <<"EOF";
Chart of Colours Used<br>
<table border="1" summary="Table of colours, and count of times used">
<tr>
<th>Class</th><th>Ref</th><th>Colour</th><th>Use</th><th>Count</th>
</tr>
<tr>
<td><span class="$a_class">class='$a_class'</span></td>
<td><span class="$a_class">$a_color</span></td>
<td><span class="$a_class">RED</span></td>
<td><span class="$a_class">Built-in Functions</span></td>
<td><span class="$a_class">$a_cnt</span></td>
</tr>
<tr>
<td><span class="$b_class">class='$b_class'</span></td>
<td><span class="$b_class">$b_color</span></td>
<td><span class="$b_class">BLUEGREEN</span></td>
<td><span class="$b_class">Comments (following #)</span></td>
<td><span class="$b_class">$b_cnt</span></td>
</tr>
<tr>
<td><span class="$c_class">class='$c_class'</span></td>
<td><span class="$c_class">$c_color</span></td>
<td><span class="$c_class">BLUE</span></td>
<td><span class="$c_class">Reserved Words</span></td>
<td><span class="$c_class">$c_cnt</span></td>
</tr>
<tr>
<td><span class="$d_class">class='$d_class'</span></td>
<td><span class="$d_class">$d_color</span></td>
<td><span class="$d_class">BROWN</span></td>
<td><span class="$d_class">Inside qw(...)</span></td>
<td><span class="$d_class">$d_cnt</span></td>
</tr>
<tr>
<td><span class="$e_class">class='$e_class'</span></td>
<td><span class="$e_class">$e_color</span></td>
<td><span class="$e_class">DARKBLUE</span></td>
<td><span class="$e_class">Scalar Variables</span></td>
<td><span class="$e_class">$e_cnt</span></td>
</tr>
<tr>
<td><span class="$f_class">class='$f_class'</span></td>
<td><span class="$f_class">$f_color</span></td>
<td><span class="$f_class">GREY</span></td>
<td><span class="$f_class">Inside &gt;&gt;EOF thingy</span></td>
<td><span class="$f_class">$f_cnt</span></td>
</tr>
<tr>
<td><span class="$o_class">class='$o_class'</span></td>
<td><span class="$o_class">$o_color</span></td>
<td><span class="$o_class">ORANGE</span></td>
<td><span class="$o_class">Array Variables</span></td>
<td><span class="$o_class">$o_cnt</span></td>
</tr>
<tr>
<td><span class="$v_class">class='$v_class'</span></td>
<td><span class="$v_class">$v_color</span></td>
<td><span class="$v_class">OLIVE</span></td>
<td><span class="$v_class">Hash Variables</span></td>
<td><span class="$v_class">$v_cnt</span></td>
</tr>
<tr>
<td><span class="$t_class">class='$t_class'</span></td>
<td><span class="$t_class">$t_color</span></td>
<td><span class="$t_class">GREEN</span></td>
<td><span class="$t_class">Single and Double Quotes</span></td>
<td><span class="$t_class">$q_cnt</span></td>
</tr>
</table>
<br>End of chart<br>
EOF

}

##########################################################################
# The main file OUTPUT - that is the HTML file.
# It establishes the HTML header, which includes the CSS style
# information. then outputs each of the 'converted' lines ...
##########################################################################
sub write_out_file {
   # this is what it is all about - to generate a HTML document
   open $OF, ">$out_file" or die "ERROR: Unable to create $out_file ... aborting ...\n";

   print $OF <<EOF;
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
 <head>
  <title>
   $in_file - Generated HTML from Perl Script
  </title>
  <meta http-equiv="Content-Language" content="en-au">
  <meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
  <style type="text/css">
   <!-- /* Style Definitions */
body { margin-top:0cm; margin-right:1cm; margin-bottom:0cm; margin-left:1cm; font-family: Courier New; font-size: 10pt; }
.$a_class { color:$a_color; }
.$b_class { color:$b_color; }
.$c_class { color:$c_color; }
.$d_class { color:$d_color; }
.$e_class { color:$e_color; }
.$f_class { color:$f_color; background-color:#fff8f8; }
.$o_class { color:$o_color; }
.$v_class { color:$v_color; }
.$t_class { color:$t_color; }
.bld { font-weight: bold; }
.rite { text-align : right; }
.nmb { margin-bottom : 0; border-style : none; padding : 0; }
.nmt { margin-top : 0; border-style : none; padding : 0; }
.nob { margin : 0; border-style : none; padding : 0; }
p.top { margin: 0; border-style: none; padding: 0; text-align: center; }
p.nom { margin:0cm; margin-bottom:0; color: red; }
.cd {
  /* top, right, bottom, left */
  padding: 0px 10px 0px 10px;
  margin: 1px 10px 1px 10px;
  background: #f0f8ff;
  border-width: 1px;
  border-style: solid solid solid solid;
  border-color: #cccccc;
  width: 90%;
  font-family:"Courier New";
}
-->
</style>
</head>
<body>

EOF

   print $OF "<p class=\"nmb\">File [<b>$in_file</b>] to HTML.</p>\n";

   if ($add_table) {
      print $OF '<table width="100%" border="1" summary="Simple HTML of $in_file"><tr><td>'."\n";
   } elsif ($add_pre) {
       print $OF get_code_section();
       print $OF '<pre class="cd">';
   }
   my ($line);
   # actual output of generated lines
   foreach $line (@html_lines) {
      $out_total += length($line);
      print $OF $line;
   }

   if ($add_table) {
      print $OF '</td></tr></table>'."\n";
   } elsif ($add_pre) {
      print $OF '</pre>'."\n";
   }

   if ($add_chart) {
       # mainly only for DEBUG
       add_color_chart($OF);
       my $tot = ($a_cnt+$b_cnt+$c_cnt+$d_cnt+$e_cnt+$f_cnt+$o_cnt+$v_cnt+$q_cnt);
       my $diff = $out_total - $doc_total;
       print $OF "This use of $tot colour span sequences added $diff bytes. In=$doc_total Out=$out_total<br>\n";
   }

   print $OF '<p class="rite">Generated: ' . localtime(time()) . " from [$in_file].</p>\n";

   print $OF "</body>\n";
   close($OF);
   prt("Written HTML result to [$out_file]\n");
}

######################################################
# Converting SPACES to '&nbsp;'
# Of course this could be done just using perl's
# powerful search and replace, but this handles
# any number of spaces, only converting the number
# minus 1 to &nbsp; ... not sure how to have
# this level of control with regex replacement
######################################################
sub conv_spaces {
   my $t = shift;
   my ($c, $i, $nt, $ln, $sc, $sp);
   $nt = ''; # accumulate new line here
   $ln = length($t);
   for ($i = 0; $i < $ln; $i++) {
      $c = substr($t,$i,1);
      if ($c eq ' ') {
         $i++; # bump to next 
         $sc = 0;
         $sp = '';
         for ( ; $i < $ln; $i++) {
            $c = substr($t,$i,1);
            if ($c ne ' ') {
               last; # exit
            }
            $sc++;
            $sp .= $c;
         }
         if ($sc) {
            $sp =~ s/ /&nbsp;/g;
            $nt .= $sp;
         }
         $i--; # back up one
         $c = ' '; # add back the 1 space
      }
      $nt .= $c;
   }
   prt( "conv_space: from [$t] to [$nt] ...\n" ) if $dbg_01;
   return $nt;
}

###########################################################################
# VERY IMPORTANT SERVICE
# This converts the 'text' into HTML text, but only does a partial job!
# 1. Convert '&' to '&amp;' to avoid interpreting as replacement
# 2. Convert '<' to '&lt;' to avoid interpreting as HTML
# 3. Convert '"' to '&quot;'
# 4. Convert '\t' to SPACES
# 5. Finally, if there are double or more SPACES, convert to '&nbsp;'
###########################################################################
sub html_line {
   my $t = shift;
   my $ot = $t;
   $t =~ s/&/&amp;/g; # all '&' become '&amp;'
   $t =~ s/</&lt;/g; # make sure all '<' is/are swapped out
   $t =~ s/>/&gt;/g; # make sure all '<' is/are swapped out
   $t =~ s/\"/&quot;/g; # and all quotes become &quot;
   $t =~ s/\t/$tab_space/g; # tabs to spaces
   if (!$add_pre) {
      if ($t =~ /\s\s/) { # if any two consecutive white space
         $t = conv_spaces($t);
      }
   }
   prt( "html_line: from [$ot] to [$t] ...\n" ) if ($dbg_01);
   return $t;
}

sub html_char($) {
    my $t = shift;
    return html_line($t);
}

# search the @ResWord array for an entry
sub in_res_words {
   my ($t) = shift;
   foreach my $rw (@ResWords) {
      if ($t eq $rw) {
         $last_resword = $rw;
         return 1;
      }
   }
   return 0;
}

# search the @BuiltIns array for an entry
sub in_built_in {
   my ($t) = shift;
   foreach my $rw (@BuiltIns) {
      if ($t eq $rw) {
         $last_builtin = $rw;
         return 1;
      }
   }
   return 0;
}

# shell built in functions
sub add_class_a {
   my ($t) = shift;
   $a_cnt++;
   return ('<span class="'.$a_class.'">'.$t.'</span>');
}

# shell comment - #...
sub add_class_b {
   my ($t) = shift;
   $b_cnt++;
   return ('<span class="'.$b_class.'">'.$t.'</span>');
}

# shell reserved words
sub add_class_c {
   my ($t) = shift;
   $c_cnt++;
   return ('<span class="'.$c_class.'">'.$t.'</span>');
}

sub add_quote {
   my ($t) = shift;
   $q_cnt++;
   return ('<span class="'.$t_class.'">'.$t.'</span>');
}


sub process_in_file($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$inc,$lnn,$nline,$len,$tag,$ch,$tline,$i);
    my ($inquot,$qc,$quot);
    $lnn = 0;
    $tag = '';
    foreach $line (@lines) {
        $lnn++;
        chomp $line;
        $tline = trim_all($line);
        $len = length($tline);
        $nline = '';
        if ($len == 0) {
            push(@html_lines,"\n");
            next;
        }
        if ($line =~ /^\s*\#/) {
            $nline = add_class_b(html_line($line));
        } else {
            #$nline = $line;
            $nline = '';
            $len = length($line);
            $inquot = 0;
            $quot = '';
            for ($i = 0; $i < $len; $i++) {
                $ch = substr($line,$i,1);
                if ($inquot) {
                    $quot .= $ch;
                    if ($ch eq $qc) {
                        $nline .= add_quote(html_line($quot));
                        $quot = '';
                        $inquot = 0;
                    }
                    next;
                }
                if (($ch eq '"')||($ch eq "'")||($ch eq "`")) {
                    if (length($tag)) {
                        if (in_built_in($tag)) {
                            $tag = add_class_a($tag);
                        } elsif (in_res_words($tag)) {
                            $tag = add_class_c($tag);
                        }
                        $nline .= $tag;
                        $tag = '';
                    }
                    $quot = $ch;
                    $qc = $ch;
                    $inquot = 1;
                    next;
                }
                if ($ch =~ /\w/) {
                    $tag .= $ch; # accumulate
                } else {
                    if (length($tag)) {
                        if (in_built_in($tag)) {
                            $tag = add_class_a($tag);
                        } elsif (in_res_words($tag)) {
                            $tag = add_class_c($tag);
                        }
                        $nline .= $tag;
                        $tag = '';
                    }
                    if ($ch =~ /\s/) {
                        $nline .= ' ';
                    } else {
                        $ch = html_char($ch);
                        $nline .= $ch;
                    }
                }
            }
            # done a line...
            if (length($tag)) {
                if (in_built_in($tag)) {
                    $tag = add_class_a($tag);
                } elsif (in_res_words($tag)) {
                    $tag = add_class_c($tag);
                }
                $nline .= $tag;
                $tag = '';
            }
            if ($inquot) {
                $nline .= add_quote(html_line($quot)) if (length($quot));
                $quot = '';
                $inquot = 0;
            }
            if (length($quot)) {
                $nline .= add_quote(html_line($quot)) if (length($quot));
                $quot = '';
            }
        }
        push(@html_lines,"$nline\n");
    }
}

#########################################
### MAIN ###
parse_args(@ARGV);
process_in_file($in_file);
write_out_file();
if ($load_html) {
    prt("Loading [$out_file] in default browser...\n");
    system($out_file);
}
pgm_exit(0,"");
########################################
sub give_help {
    prt("$pgmname: $VERS\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help (-h or -?) = This help, and exit 0.\n");
    prt(" --chart      (-c) = Add the color chart. (for diagnostics)\n");
    prt(" --log        (-l) = Load LOG at end.\n");
    prt(" --show       (-s) = Show the HTML in the default browser.\n");
}

sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg);
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^c/) {
                $add_chart = 1;
            } elsif ($sarg =~ /^s/) {
                $load_html = 1;
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = $arg;
            prt("Set input to [$in_file]\n");
        }
        shift @av;
    }

    if ((length($in_file) ==  0) && $debug_on) {
        $in_file = $def_file;
        $load_html = 1;
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
    if (! -f $in_file) {
        pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
    }
}

# eof - template.pl
