#!/perl -w
# NAME: table2str.pl
# AIM: VERY SPECIFIC - read a HTML file, extract the 'table', and write it as a structure...
# 01/10/2009 geoff mclane http://geoffair.net/mperl
use strict;
use warnings;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
   my @tmpsp = split(/\\/,$pgmname);
   $pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);

my $in_file = 'C:\Documents and Settings\Geoff McLane\My Documents\MS\FOURCC_tidy.htm';

sub process_file($) {
   my ($fil) = @_;
   my @codes = ();
   if (open INF, "<$fil") {
      my @lines = <INF>;
      close INF;
      my $lncnt = scalar @lines;
      prt( "Doing $lncnt lines, from [$fil]...\n" );
      my ($line,$i,$max,$cc, $tag, $in_table);
      for ($i = 0; $i < $lncnt; $i++) {
         $line = $lines[$i];
         chomp $line;
         $line = trim_all($line);
         $lines[$i] = $line;
      }
      $line = join(" ",@lines);
      $line = trim_all($line);
      $max = length($line);
      prt( "Processing $max characters...\n" );
      my ($in_tr, $in_td, $code, $tr_lns, $last, $tmp, $prev);
      my ($col2, $col3, $col4);
      $in_table = 0;
      $tag = '';
      $in_tr = 0;
      $code = '';
      $tr_lns = 0;
      $last = 0;
      $in_td = 0;
      $col2 = '';
      $col3 = '';
      $col4 = '';
      for ($i = 0; $i < $max; $i++) {
         $cc = substr($line,$i,1);
         if ($cc eq '<') {
            if ( !($tag =~ /^br$/i) && ($in_td == 4)) {
               $last = -1;
               if (@codes) {
                  $last = $codes[-1][1];
               }
               $tmp = trim_all($code);
               if (length($tmp)) {
                  if ($last == $tr_lns) {
                     $prev = $codes[-1][0];
                     $prev .= $code;
                     $codes[-1][0] = $prev;
                  } else {
                     $code = substr($code,1) while ($code =~ /^\s/);
                     push(@codes, [$code, $tr_lns, $col2, $col3, $col4]);
                     prt( "$code " );
                     $col2 = '';
                     $col3 = '';
                     $col4 = '';
                  }
                  $code = '';
               } else {
                  $code = '';
               }
            }
            $tag = '';
            $i++;
            for (; $i < $max; $i++) {
               $cc = substr($line,$i,1);
               last if ($cc eq '>');
               $tag .= $cc;
               if ($cc eq '"') {
                  $i++;
                  for (; $i < $max; $i++) {
                     $cc = substr($line,$i,1);
                     $tag .= $cc;
                     last if ($cc eq '"');
                  }
               }
            }
            if ($tag =~ /^table/i) {
               $in_table = 1;
               prt("$i: Entered table...\n");
            } elsif ($tag =~ /^\/table/i) {
               $in_table = 0;
               prt("$i: Exit table...\n");
            }
            next;
         }

         if ($in_table) {
            if ($tag =~ /^tr/i) {
               $in_tr = 1;
               $in_td = 0; # start column counter
               $tr_lns++;
            } elsif ($tag =~ /\/tr/i) {
               $in_tr = 0;
            } elsif ($tag =~ /^td/i) {
               $in_td++;
            } elsif ($in_td) {
               if ($in_td == 1) {
                  $code .= $cc;
               } elsif ($in_td == 2) {
                  $col2 .= $cc;
               } elsif ($in_td == 3) {
                  $col3 .= $cc;
               } elsif ($in_td == 4) {
                  $col4 .= $cc;
               }
            }
         }
      }

   } else {
      prt("ERROR: Can not open file $fil!\n");
   }
   return \@codes;
}

sub show_code_ref_simple($) {
   my ($rca) = @_;
   my $cnt = scalar @{$rca};
   my ($j, $cd, $ln, $wrap, $c2, $c3, $c4);
   $wrap = 0;
   for ($j = 0; $j < $cnt; $j++) {
      $cd = ${$rca}[$j][0];
      $ln = ${$rca}[$j][1];
      $c2 = ${$rca}[$j][2];
      $c3 = ${$rca}[$j][3];
      $c4 = ${$rca}[$j][4];
      prt( "$ln: $cd" );
      $wrap++;
      if ($wrap == 6) {
         $wrap = 0;
         prt("\n");
      }
   }
   if ($wrap) {
      prt("\n");
   }
}

sub show_code_ref($) {
   my ($rca) = @_;
   my $cnt = scalar @{$rca};
   my ($j, $cd, $ln, $wrap, $c2, $c3, $c4);
   $wrap = 0;
   for ($j = 0; $j < $cnt; $j++) {
      $cd = trim_all(${$rca}[$j][0]);
      $ln = ${$rca}[$j][1];
      $c2 = trim_all(${$rca}[$j][2]);
      $c3 = trim_all(${$rca}[$j][3]);
      $c4 = trim_all(${$rca}[$j][4]);
      #prt( "$ln: $cd, $c2, $c3, $c4\n" );
      $c2 =~ s/"/\\"/g;
      $c3 =~ s/"/\\"/g;
      $c4 =~ s/"/\\"/g;
      prt( "    { \"$cd\", \"$c2\", \"$c3\", \"$c4\" },\n" );
   }
}

my $code_ref = process_file($in_file);
show_code_ref($code_ref);
close_log($outfile,1);
exit(0);

# eof
