#!/perl -w
# NAME: table2hash.pl
# AIM: Read in a HTML table, and convert to a Perl HASH structure ...
# 29/01/2008 - geoff mclane
use strict;
use warnings;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
	my @tmpsp = split(/\\/,$pgmname);
	$pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );

my $in_file = '..\html\metar_abreviations.htm';
# debug
my $dbg1 = 0;	# show open/close of rows and columns

my @tblitems = ();
if (open IF, "<$in_file") {
	my @lines = <IF>;
	close IF;
	my $lcnt = scalar @lines;
	prt( "$lcnt lines to process ...\n" );
	my $htxt = join(' ', @lines);
	$htxt =~ s/\n/ /gm;
	$htxt =~ s/\r/ /gm;
	$htxt =~ s/\t/ /gm;
	$htxt = trim_all($htxt);
	my $ccnt = length($htxt);
	prt( "Processing $ccnt characters ...\n" );
	process_chars($htxt);
} else {
	prt("ERROR: Can NOT open $in_file ...\n");
}
if (@tblitems) {
	my $icnt = scalar @tblitems;
	prt( "Got $icnt table items ...\n" );
	prt( "my \%acronyms = (\n" );
	for (my $t = 0; $t < $icnt; $t++) {
		my $i1 = $tblitems[$t][0];
		my $i2 = $tblitems[$t][1];
		my $msg = "\t'$i1' => '$i2'";
		$msg .= ',' if (($t + 1) < $icnt);
		prt( "$msg\n" );
	}
	prt( ");\n" );
}

close_log($outfile,1);
exit(0);

sub process_chars {
	my ($txt) = shift;
	my $tl = length($txt);
	my ($ch, $intbl, $tag, $row, $col, $attr, $item, $colcnt, $ftag);
	my ($itm1,$itm2,$itm3,$itm4);	# we have FOUR columns
	$intbl = 0;
	$row = 0;
	$col = 0;
	$item = '';
	$colcnt = 0;
	for (my $i = 0; $i < $tl; $i++) {
		$ch = substr($txt,$i,1);
		if ($ch eq '<') {
			if (length($item)) {
				if ($intbl && $col) {
					prt( "Table Item = [$item] ($colcnt)\n" );
					if ($colcnt == 1) {
						$itm1 = $item;
					} elsif ($colcnt == 2) {
						$itm2 = $item;
					} elsif ($colcnt == 3) {
						$itm3 = $item;
					} elsif ($colcnt == 4) {
						$itm4 = $item;
					}
				}
				$item = '';
			}
			$tag = '';
			$attr = '';
			$i++;
			for ( ; $i < $tl; $i++) {
				$ch = substr($txt,$i,1);
				if (($ch eq '>')||($ch =~ /\s/)) {
					if ($ch ne '>') {
						$i++;
						$attr = '';
						for ( ; $i < $tl; $i++) {
							$ch = substr($txt,$i,1);
							if ($ch eq '>') {
								last;
							}
							$attr .= $ch;
						}
					}
					last;
				}
				$tag .= $ch;
			}
			$ftag = '<'.$tag;
			$ftag .= ' ' if (length($attr));
			$ftag .= $attr.'>';
			if ($tag =~ /^table$/i) {
				$intbl++;
				prt( "In table $ftag...($intbl)\n" );
			} elsif ($tag =~ /^\/table$/) {
				$intbl-- if ($intbl);
				prt( "Out table $ftag ...($intbl)\n" );
			} elsif ($tag =~ /^tr$/) {
				prt( "New row $ftag ... $row - clear column count ...\n" ) if ($dbg1);
				$colcnt = 0;
			} elsif ($tag =~ /^\/tr$/) {
				$row-- if ($row);
				prt( "End row $ftag ... $row \n" ) if ($dbg1);
				# end of row
				push(@tblitems, [$itm1,$itm2]);
				push(@tblitems, [$itm3,$itm4]);
			} elsif ($tag =~ /^td$/) {
				$col++;
				$colcnt++;
				prt( "New column $ftag ... $col \n" ) if ($dbg1);
			} elsif ($tag =~ /^\/td$/) {
				$col-- if ($col);
				prt( "End column $ftag ... $col \n" ) if ($dbg1);
			}
		} else {
			$item .= $ch;
		}
	}
}


# eof - table2hash.pl
