#!/Perl
# loadtable.pl
# AIM: Load a HTML table into a multidimensional array
use strict;
require 'logfile.pl' or die "ERROR: Unable to load logfile.pl ...\n";
require 'htmltools.pl' or die "ERROR: Unable to load htmltools.pl ...\n";
# log file stuff
my ($LF);
my $outfile = 'temp'.$0.'.txt';
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );

my $dbg21 = 0;
my $dbg22 = 0;
my $tbl_num = 1;	# want the first table

my $in_index = 'P26\index.htm';
###my $in_index = 'tempind.htm';
my @tbl_arr = ();
my $tacnt = 0;
my $lncnt = 0;
my $tblcnt = 0;
my $indcnt = 0;
my @larr = ();
my @larr2 = ();
my $ln = '';
my @tbl_set = ();
##	push(@tbl_set, [$hrf, $fil, $dt, $sz, $yr, $mt, $dy, 0]);
get_old_index( $in_index );
$indcnt = scalar @tbl_set;
prt( "Got $indcnt from [$in_index] ...\n" );
close_log($outfile,1);
exit(0);

sub get_old_index($) {
	my ($ind) = shift;
	if (open IF, "<$ind") {
		@larr = <IF>; # slurp it all in ...
		close(IF);
		$lncnt = scalar @larr;
		prt( "Got $lncnt lines to process ...\n" );
		###write2file( join('',@larr), 'tempout.txt');
		$ln = tag2newline( join('',@larr), 'td' );
		###$ln = tag2newline( $ln, 'br' );
		@larr2 = split(/\n/, $ln);
		write2file( join("\n",@larr2), 'tempout3.txt');
		if (get_table_array()) {
			$tacnt = scalar @tbl_arr;
			prt( "Got $tacnt lines to process ...\n" );
		} else {
			prt( "Failed to find table $tbl_num ...\n" );
		}
	} else {
		prt( "Warning: Failed to open $ind ...\n" );
	}

	if ($tacnt > 0) {
		my $cc = 0;
		for (my $i = 0; $i < $tacnt ; $i++) {
			$ln = $tbl_arr[$i]; # extract a line
			if ($ln =~ /<td.*>/i) {
				while ( !($ln =~ /<\/td>/i) ) {
					$i++;
					if ($i < $tacnt) {
						$ln .= ' '.$tbl_arr[$i]; # extract a line
					} else {
						last;
					}
				}
				# got begin and end of <td>...</td> block
				if ($ln =~ /(<td*.>)(.*)(<\/td>)/i) {
					my $tds = $1;
					my $inb = $2;
					my $tde = $3;
					# like Line [<td><a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213</td>] = 
					# [<td>][<a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213][</td>] ...
					prt( "Line [$ln] = \n[$tds][$inb][$tde] ...\n" ) if ($dbg21);
					###if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>/) {
					##if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>/i) {
					#if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)/i) {
					if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)\s*/i) {
						my $hrf = $1;
						my $fil = $2;
						my $dt = $3;
						my $sz = $4;
						my ($yr, $mt, $dy) = split(/\//,$dt);
						$sz =~ s/,//g;
						push(@tbl_set, [$hrf, $fil, $dt, $sz, $yr, $mt, $dy, 0]);
						prt("href=[$hrf], file=[$fil], date=[$dt][$yr][$mt][$dy], size=[$sz]...\n") if ($dbg22);
					}
				}
			}
		}
	}
}

sub get_table_array {
	my $fnd = 0;
	$lncnt = scalar @larr2;
	for (my $i = 0; $i < $lncnt ; $i++) {
		$ln = $larr2[$i]; # extract a line
		chomp $ln; # remove LF (\n)
		$ln =~ s/\r$//; # and remove CR, if present
		if ($ln =~ /<table.*>/i) {
			prt( "FOUND TABLE: [$ln] ...\n" );
			$tblcnt++; # bump table counter
			if ($tblcnt == $tbl_num) {
				prt( "Is my TABLE [$tbl_num] ...\n" );
				push(@tbl_arr,$ln);
				if ( !($ln =~ /<\/table>/i) ) {
					$i++; # move to next line
					for ( ; $i < $lncnt; $i++) {
						$ln = $larr2[$i]; # extract a line
						chomp $ln; # remove LF (\n)
						$ln =~ s/\r$//; # and remove CR, if present
						if ( $ln =~ /<\/table>/i ) {
							prt( "END TABLE: [$ln] ...\n" );
							push(@tbl_arr,$ln);
							$fnd = 1;
							last;
						}
						push(@tbl_arr,$ln);
					}
				}
				last;
			}
		}
	}
	return $fnd;
}

# eof - loadtable.pl

