#!/usr/bin/perl -w
# NAME: c2htm.pl
# AIM: Convert C/C++ code to colored HTML
# I am sure I have done this before, but where???
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use Cwd;
use Clipboard;
my $os = $^O;
my $perl_dir = '/home/geoff/bin';
my $PATH_SEP = '/';
my $temp_dir = '/tmp';
if ($os =~ /win/i) {
    $perl_dir = 'C:\GTools\perl';
    $temp_dir = $perl_dir;
    $PATH_SEP = "\\";
}
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt";
open_log($outfile);

# user variables
my $VERS = "0.0.1 2012-02-06";
my $load_log = 0;
my $in_file = '';
my $verbosity = 0;
my $out_xml = ''; # $temp_dir.$PATH_SEP."temp.$pgmname.htm";

### program variables
my @warnings = ();
my $cwd = cwd();

# reserved words, and build-ins
my @ResWords = ();
my @BuiltIns = ();  # KEYWORD=Compiler directives
my %RESWORDS = ();
my %BUILDINS = ();
# DEBUG
my $debug_on = 0;
my $def_file = 'C:\Documents and Settings\Geoff McLane\My Documents\MS\temp1.c';


sub VERB1() { return $verbosity >= 1; }
sub VERB2() { return $verbosity >= 2; }
sub VERB5() { return $verbosity >= 5; }
sub VERB9() { return $verbosity >= 9; }

sub is_in_reswords($) {
    my $word = shift;
    return 1 if (defined $RESWORDS{$word});
    return 0;
}

sub is_in_builtins($) {
    my $word = shift;
    return 1 if (defined $BUILDINS{$word});
    return 0;
}

########################################
# Loading the reserved words, and
# perl built-in functions from a
# special EditPlus 2, perl.stx file,
# but there are arrays already included
# if you do not have this file.
########################################
sub load_stx_lines($) {
    my ($rstx) = shift;
    my $scnt = scalar @{$rstx};
    prt( "Got $scnt lines to process ...\n" );
    my $st = 0;
    my %dchk = ();
    my ($ln,$tln,$ll);
    foreach $ln (@{$rstx}) {
        $tln = trim_all($ln);
        $ll = length($tln);
        next if ($ll == 0);
        if( $tln =~ /^\#KEYWORD=Reserved words/ ) {
            $st = 1;
            next;
        } elsif ($tln =~ /^\#KEYWORD=Compiler directives/ ) {
            $st = 2;
            next;
        } elsif (($tln =~ /^\#/) || ($tln =~ /^;/)) {
            $st = 0;
            next;
        }
        next if ($st == 0);
        # NO, there are duplicate but in different category
        #if (exists $dchk{$tln}) {
        #    prt( "Warning: Avoiding duplicate of [$tln] ...\n" ) if (VERB9());
        #    next;
        #}
        #$dchk{$tln} = 1;

        if( $st == 1 ) {
            push(@ResWords, $tln) if (!defined $RESWORDS{$tln});
            $RESWORDS{$tln} = 1;
        } elsif ($st == 2) {
            push(@BuiltIns, $tln) if (!defined $BUILDINS{$tln});
            $BUILDINS{$tln} = 1;
        }
    }
    $ln = scalar @ResWords;
    $tln = scalar @BuiltIns;
    prt("From $scnt lines, got $ln ResWords, and $tln Directives\n");
}
sub init_reserved_words() {
    my $txt = cpp_stx_txt();
    my @arr = split("\n",$txt);
    load_stx_lines(\@arr);
}

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        prt( "\nNo warnings issued.\n\n" ) if (VERB9());
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub get_class() {
    my $class = <<EOF;
 /* reserved words */
.rw { color: #0000cd; }
.res { color: #0000cd; }
 /* built-in functions */
.bif { color: #ff0000; }
/* scalar variables */
.sca { color: #9400d3; }
/* array variables */
.arr { color: #008b8b; }
/* hash variables */
.has { color: #a52a2a; }
/* comments after # */
.com { color: #008000; }
/* quoted items */
.qot { color: #009900; }
EOF
    return $class;
}

sub add_comment_span($) {
    my $txt = shift;
    $txt = trim_tailing($txt);
    return '<span class="com">'.$txt.'</span>';
}

sub process_in_file($) {
    my ($inf) = @_;
    my ($line,$inc,$lnn);
    my ($len,$i,$ch,$nc,$i2,$pc,$qc,$inquot,$tag,$incomm,$bal,$max,$hlen);
    my @lines = ();
    if ($inf eq '_CB_') {
        $line = Clipboard->paste;
        @lines = split("\n",$line);
    } else {
        if (! open INF, "<$inf") {
            pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
        }
        @lines = <INF>;
        close INF;
    }
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    $lnn = 0;
    $inquot = 0;
    $incomm = 0;
    my @brackets = ();
    my @braces = ();
    my $brkcnt = 0;
    my $brccnt = 0;
    my @nlines = ();
    my $html = '';
    $tag = '';
    my $last_schr = '';
    my $is_res = 0;
    my $is_bin = 0;
    foreach $line (@lines) {
        $lnn++;
        $line = trim_tailing($line);
        $max = length($line);
        if ($max == 0) {
            push(@nlines,"");
            next;
        }
        $ch = '';
        $html = '';
        $last_schr = '';
        for ($i = 0; $i < $max; $i++) {
            $pc = $ch;
            $ch = substr($line,$i,1);
            $i2 = $i + 1;
            $nc = ($i2 < $max) ? substr($line,$i2,1) : "";
            if ($inquot) {
                $html .= $ch;
                if ($ch eq $qc) {
                    prt("$lnn:$i: End quotes [$ch]\n") if (VERB9());
                    $inquot = 0;
                    $html .= '</span>';
                }
                next;
            }
            if ($incomm) {
                $html .= $ch;
                if (($ch eq '*')&&($nc eq '/')) {
                    prt("$lnn:$i2: Exit   comment\n") if (VERB9());
                    $incomm = 0;
                    $html .= $nc;
                    $html .= '</span>';
                    $i++;
                    next;
                }
            } else {
                if (($ch eq '/')&&($nc eq '*')) {
                    prt("$lnn:$i2: Entered comment\n") if (VERB9());
                    $incomm = 1;
                    $html .= '<span class="com">';
                    $html .= $ch;
                    $html .= $nc;
                    $i++;
                    next;
                }
                if (($ch eq '/')&&($nc eq '/')) {
                    prt("$lnn:$i2: Line comment '//\n") if (VERB9());
                    $bal = substr($line,$i);
                    $html .= add_comment_span($bal);
                    last;   # done this line
                }
                if (($ch eq '"')||($ch eq "'")) {
                    $qc = $ch;
                    $inquot = 1;
                    $html .= '<span class="qot">';
                    $html .= $ch;
                    prt("$lnn:$i2: Entered quotes [$ch]\n") if (VERB9());
                    next;
                }
                if ($ch eq '(') {
                    push(@brackets,[$lnn,$i2]);
                    $brkcnt = scalar @brackets;
                    prt("$lnn:$i2: Entered brackets [$ch] $brkcnt\n") if (VERB9());
                } elsif ($ch eq ')') {
                    if (@brackets) {
                        pop @brackets;
                        $brkcnt = scalar @brackets;
                        prt("$lnn:$i2: Ended brackets [$ch] $brkcnt\n") if (VERB9());
                    } else {
                        prtw("WARNING:$lnn:$i2: Close bracket ')' without stack!\n");
                    }
                } elsif ($ch eq '{') {
                    push(@braces,[$lnn,$i2]);
                    $brccnt = scalar @braces;
                    prt("$lnn:$i2: Entered braces [$ch] $brccnt\n") if (VERB9());
                } elsif ($ch eq '}') {
                    if (@braces) {
                        pop @braces;
                        $brccnt = scalar @braces;
                        prt("$lnn:$i2: Ended braces [$ch] $brccnt\n") if (VERB9());
                    } else {
                        prtw("WARNING:$lnn:$i2: Close braces '}' without stack!\n");
                    }
                }
                if ($ch =~ /\w/) {
                    $tag .= $ch;
                } else {
                    # space or symbol
                    if (length($tag)) {
                        $is_res = is_in_reswords($tag);
                        $is_bin = is_in_builtins($tag);
                        if ($is_res && $is_bin) {
                            if ($last_schr eq '#') {
                                $is_res = 0;
                                prt("Got BOTH res and bin, but cancel res due last sig char = #\n") if (VERB9());
                            } else {
                                prt("Got BOTH res and bin, but NOT cancellin res due last sig char = $last_schr\n") if (VERB9());
                            }
                        }
                        if ($is_res) {
                            $len = length($tag);
                            $hlen = length($html);
                            $bal = substr($html,0,($hlen-$len));
                            $tag = '<span class="res">'.$tag.'</span>';
                            $html = $bal.$tag;
                            prt("Found RESWORD [$tag]$len add res class [$bal]$hlen\n") if (VERB9());
                        } elsif ($is_bin) {
                            $len = length($tag);
                            $hlen = length($html);
                            $bal = substr($html,0,($hlen-$len));
                            $tag = '<span class="bif">'.$tag.'</span>';
                            $html = $bal.$tag;
                            prt("Found BUILTIN [$tag]$len add bif class [$bal]$hlen\n") if (VERB9());
                        }
                    }
                    $tag = '';
                    $last_schr = $ch if ($ch =~ /\S/);
                }
                $html .= $ch;   # add to output
            }
        } # for length of line
        if (length($tag)) {
            $is_res = is_in_reswords($tag);
            $is_bin = is_in_builtins($tag);
            if ($is_res && $is_bin) {
                if ($last_schr eq '#') {
                    $is_res = 0;
                    prt("Got BOTH res and bin, but cancel res due last sig char = #\n") if (VERB9());
                } else {
                    prt("Got BOTH res and bin, but NOT cancellin res due last sig char = $last_schr\n") if (VERB9());
                }
            }
            if ($is_res) {
                $len = length($tag);
                $hlen = length($html);
                $bal = substr($html,0,($hlen-$len));
                $tag = '<span class="res">'.$tag.'</span>';
                $html = $bal.$tag;
                prt("Found RESWORD [$tag]$len add res class [$bal]$hlen\n") if (VERB9());
            } elsif ($is_bin) {
                $len = length($tag);
                $hlen = length($html);
                $bal = substr($html,0,($hlen-$len));
                $tag = '<span class="bif">'.$tag.'</span>';
                $html = $bal.$tag;
                prt("Found BUILTIN [$tag]$len add bif class [$bal]$hlen\n") if (VERB9());
            }
            $tag = '';
        }
        if ($inquot) {
            prtw("WARNING:$lnn:$i2: End line in QUOTE [$qc]!\n");
            $inquot = 0;
        }
        prt("$lnn:$max: Pushing line [$html]\n") if (VERB9());
        push(@nlines,$html);
        $html = '';
    }
    if (@brackets) {
        prtw("WARNING: Exit lines with $brkcnt brackets on stack!\n");
    }
    if (@braces) {
        prtw("WARNING: Exit lines with $brccnt braces on stack!\n");
    }
    $lnn = scalar @nlines;
    prt("Lines out $lnn...\n");
    $html = join("\n",@nlines)."\n";
    $tag = '';
    if (length($out_xml)) {
        write2file($html,$out_xml);
        $tag .= "Written to $out_xml, and ";
    }
    prt($html);
    Clipboard->copy($html);
    prt($tag."copied to CLIPBOARD\n");
}

#########################################
### MAIN ###
parse_args(@ARGV);
init_reserved_words();
process_in_file($in_file);
pgm_exit(0,"");
########################################
sub give_help {
    prt("$pgmname: version $VERS\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help  (-h or -?) = This help, and exit 0.\n");
    prt(" --verb[n]     (-v) = Bump [or set] verbosity. def=$verbosity\n");
    prt(" --load        (-l) = Load LOG at end. ($outfile)\n");
    prt(" --out <file>  (-o) = Write output to this file.\n");
    prt(" To read the input data from the clipboard, using _CB_ as the in-file name.\n");
    prt(" The output is always copied to the clipboard.\n");
}

sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg);
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^v/) {
                if ($sarg =~ /^v.*(\d+)$/) {
                    $verbosity = $1;
                } else {
                    while ($sarg =~ /^v/) {
                        $verbosity++;
                        $sarg = substr($sarg,1);
                    }
                }
                prt("Verbosity = $verbosity\n") if (VERB1());
            } elsif ($sarg =~ /^l/) {
                $load_log = 1;
                prt("Set to load log at end.\n") if (VERB1());
            } elsif ($sarg =~ /^o/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $out_xml = $sarg;
                prt("Set out file to [$out_xml].\n") if (VERB1());
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = $arg;
            prt("Set input to [$in_file]\n") if (VERB1());
        }
        shift @av;
    }

    if ((length($in_file) ==  0) && $debug_on) {
        #$in_file = $def_file;
        $in_file = "_CB_";
        prt("Set DEFAULT input to [$in_file]\n");
        ###$load_log = 1;
        $out_xml = $temp_dir.$PATH_SEP."temp.$pgmname.htm";
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command! Use _CB_ to read from clipboard.\n");
    }
    if ( !(-f $in_file) && !($in_file eq '_CB_') ) {
        pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
    }
}

sub cpp_stx_txt { # contents of cpp.stx
    my $txt = <<EOF;
#TITLE=C/C++
; C/C++ syntax file written by ES-Computing.
; This file is required for EditPlus to run correctly.

#DELIMITER=,(){}[]-+*%/="'~!&|<>?:;.#
#QUOTATION1='
#QUOTATION2="
#CONTINUE_QUOTE=n
#LINECOMMENT=//
#COMMENTON=/*
#COMMENTOFF=*/
#ESCAPE=\
#CASE=y
#NUMBER_PATTERN=cpp
#SPECIAL_STX=cpp

#KEYWORD=Reserved words
__int64
auto
bool
break
case
catch
char
cerr
cin
class
const
continue
cout
default
delete
do
double
else
enum
explicit
extern
float
for
friend
goto
if
inline
int
long
namespace
new
operator
private
protected
public
register
return
short
signed
sizeof
static
struct
switch
template
this
throw
try
typedef
union
unsigned
using
virtual
void
volatile
wchar_t
while
__asm
__fastcall
__stdcall
__based
__cdecl
__pascal
__inline
__multiple_inheritance
__single_inheritance
__virtual_inheritance
__declspec
dllimport
dllexport
WIN32
_WIN32
warning
disable
_MSC_VER
_WINDLL
_DLL
_LIB
_WIN32_WCE
_stdcall
_inline
_XBOX

#KEYWORD=Compiler directives
define
defined
error
include
line
ifdef
pragma
ifndef
undef
if
elif
else
endif
#
EOF
    return $txt;
}

# eof - c2htm.pl
