#!/usr/bin/perl -w
# NAME: bldctbl.pl
# AIM: Very specific processing of a tidy cleaned winword html to a C table
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use Cwd;
my $perl_dir = 'C:\GTools\perl';
unshift(@INC, $perl_dir);
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $perl_dir."\\temp.$pgmname.txt";
open_log($outfile);

# user variables
my $load_log = 1;
my $in_file = '';

my $dbgon = 1;
my $def_file = 'C:\Documents and Settings\Geoff McLane\My Documents\MS\temp1.htm';

### program variables
my @warnings = ();
my $cwd = cwd();
my $os = $^O;

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        prt( "\nNo warnings issued.\n\n" );
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub get_p2br($) {
    my $txt = shift;
    my $rtxt = '';
    my $len = length($txt);
    my ($i,$ch,$tt);
    $tt = '';
    for ($i = 0; $i < $len; $i++) {
        $ch = substr($txt,$i,1);
        $tt .= $ch;
        last if ($tt =~ /<p>$/);
    }
    $i++;
    $tt = '';
    for (; $i < $len; $i++) {
        $ch = substr($txt,$i,1);
        $tt .= $ch;
        if ($tt =~ /<br>$/) {
            $tt =~ s/<br>$//;
            $tt = trim_all($tt);
            $rtxt = $tt;
        }
    }
    return $rtxt;
}

sub get_i2i($) {
    my $txt = shift;
    my $rtxt = '';
    my $len = length($txt);
    my ($i,$ch,$tt);
    $tt = '';
    for ($i = 0; $i < $len; $i++) {
        $ch = substr($txt,$i,1);
        $tt .= $ch;
        last if ($tt =~ /<i>$/);
    }
    $i++;
    $tt = '';
    for (; $i < $len; $i++) {
        $ch = substr($txt,$i,1);
        $tt .= $ch;
        if ($tt =~ /<\/i>$/) {
            $tt =~ s/<\/i>$//;
            $tt = trim_all($tt);
            $rtxt = $tt;
        }
    }
    return $rtxt;
}

sub get_br2_p($) {
    my $txt = shift;
    my $rtxt = '';
    my $len = length($txt);
    my ($i,$ch,$tt);
    $tt = '';
    for ($i = 0; $i < $len; $i++) {
        $ch = substr($txt,$i,1);
        $tt .= $ch;
        last if ($tt =~ /<br>$/);
    }
    $i++;
    $tt = '';
    for (; $i < $len; $i++) {
        $ch = substr($txt,$i,1);
        $tt .= $ch;
        if ($tt =~ /<\/p>$/) {
            $tt =~ s/<\/p>$//;
            $tt = trim_all($tt);
            $rtxt = $tt;
        }
    }
    return $rtxt;
}

sub process_in_file($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$inc,$lnn, $intbl,$intd,$intr,$tdcnt,$txt);
    my $tdtxt = '';
    $lnn = 0;
    $intbl = 0;
    $intd = 0;
    $tdcnt = 0;
    my $txt1 = '';
    my $txt2 = '';
    my $txt3 = '';
    my @array = ();
    foreach $line (@lines) {
        chomp $line;
        $line = trim_all($line);
        $lnn++;
        if ($intbl) {
            if ($line =~ /<\/table/i) {
                $intbl = 0;
                prt("$lnn: Exit table...\n");
            } elsif ($intr) {
                if ($line =~ /<\/tr/) {
                    $intr = 0;
                    prt("$lnn: Exit </tr>\n");
                } elsif ($intd) {
                    if ($line =~ /<\/td>/) {
                        $intd = 0;
                        if ($tdcnt == 1) {
                            $txt = get_p2br($tdtxt);
                            $txt1 = $txt;
                            $txt3 = get_br2_p($tdtxt);
                        } else {
                            $txt = get_i2i($tdtxt);
                            $txt2 = $txt;
                            if (length($txt1) && length($txt2) && length($txt3)) {
                                push(@array, [$txt1,$txt2,$txt3]);
                            } else {
                                prt("FAILED: [$txt1] [$txt2] [$txt3]\n");
                            }
                        }
                        prt("$lnn: Exit </td> $txt $txt3\n");
                        $tdtxt = '';
                        if ($tdcnt == 2) {
                            $txt1 = '';
                            $txt2 = '';
                            $txt3 = '';
                        }
                    } else {
                        $tdtxt .= ' ' if (length($tdtxt));
                        $tdtxt .= $line;
                    }
                } else {
                    # in <tr>, but not <td>
                    if ($line =~ /<td/) {
                        $intd = 1;
                        $tdcnt++;
                        prt("$lnn: Enter <td> $tdcnt\n");
                    }
                }
            } else {
                if ($line =~ /<tr/) {
                    $intr = 1;
                    prt("$lnn: Enter <tr>\n");
                    $tdcnt = 0;
                    $tdtxt = '';
                }
            }
        } elsif ($line =~ /<table/i) {
            $intbl = 1;
            prt("$lnn: Enter table...\n");
        }
    }
    my ($key,$val);
    my $cnt = scalar @array;
    prt("List of $cnt keys...\n");
    my ($min,$len);
    $min = 0;
    foreach $key (@array) {
        $txt1 = ${$key}[0];
        $txt2 = ${$key}[2];
        $len = length($txt1);
        $min = $len if ($len > $min);
    }
    foreach $key (@array) {
        $txt1 = ${$key}[0];
        $txt2 = ${$key}[1];
        $txt3 = ${$key}[2];
        $txt1 .= ' ' while (length($txt1) < $min);
        prt(" { $txt1, \"$txt2\" }, // $txt3\n");
    }
}

#########################################
### MAIN ###
parse_args(@ARGV);
prt( "$pgmname: in [$cwd]: Hello, World...\n" );
process_in_file($in_file);
pgm_exit(0,"Normal exit(0)");
########################################
sub give_help {
    prt("$pgmname: version 0.0.1 2010-09-11\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help (-h or -?) = This help, and exit 0.\n");
}
sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg);
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = $arg;
            prt("Set input to [$in_file]\n");
        }
        shift @av;
    }

    if ((length($in_file) ==  0) && $dbgon) {
        $in_file = $def_file;
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
    if (! -f $in_file) {
        pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
    }
}

# eof - template.pl
