#!/Perl

use LWP::Simple;
require "logfile.pl" or die "Missing logfile.pl ...\n"; # my simple log file and some other utility subs
require "htmltools.pl" or die "Missing htmltools.pl ...\n";

# log file stuff
my ($LF);
my $outfile = 'temp'.$0.'.txt';
my $outfil1 = 'tempcurr02.htm';
my $outfil2 = 'tempcurr02.csv';
# program variables
my $URL = 'http://www.exchangerate.com/';
my @hrefs = ();
my @imgs = ();
my @currency = ();
my $m_date1 = '';
my $m_date2 = '';
my $test1 = 0;
my $etext = '';
open_log($outfile);
prt( "$0 ... Hello, World...\n" );
if ($test1) {
prt("Fetching text from $outfil1 ...\n");
open CIF, "<$outfil1" or mydie("ERROR: Unable to open $outfil1 ...\n");
my @atext = <CIF>; # slurp it into a line array
close CIF;
prt( "From $outfil1, got ".scalar @atext." lines ...\n" );
$etext = join( "", @atext);
prt( "len=".length($etext)."\n"); # "[$txt2]\n");
write2file($etext, 'tempcurr02a.htm');
} else {
prt("Fetching text from $URL ...\n");
my $text = get("$URL");
my $tcnt = length($text);
prt( "Got $tcnt characters from URL $URL ...\n");
$etext = htmlexpand($text);
write2file($etext,$outfil1);
}
##prt("[$text]\n");
##my $ctext = $etext;
my $ctext = htmlclean02($etext);
prt( "len=".length($ctext)."\n"); # "[$txt2]\n");
my $ccnt = extractcurrencies02($ctext);
###my $ccnt = scalar @currency;
prt( "Got $ccnt currencies ...\n" );
my $msg = '';
my $txt = '';
# AUSTRALIA Dollar AUD 1.310649 ### + 1.311001 -0.0268% + 1  
if ($ccnt) {
	open OF, ">$outfil2" or mydie("ERROR: Unable to open CSV out file!\n");
	prt("Writing CSV file $outfil2 ...\n");
	$msg = "Country,Currency,ISO,Rate $m_date1,Rate $m_date2"; 
	prt("$msg\n");
	print OF $msg."\n";
	for (my $i = 0; $i < $ccnt; $i++) {
		$msg = "".($currency[$i][0]).",".($currency[$i][1]).",".($currency[$i][2]).",".($currency[$i][3]).",".($currency[$i][4]);
		prt( "$msg\n" );
		print OF $msg."\n";
	}
	$msg = "From [$URL] on ".(scalar localtime);
	prt("$msg\n");
	print OF $msg."\n";
	close(OF);
} else {
	prt( "WARNING: FAILED TO EXTRACT ANY CURRENCIES!!!\n" );
}
prt( "All done ...\n" );
close_log($outfile,1);
###system( $outfil1 );
###system( $outfil2 );
exit(0);

# <TH COLSPAN=2> Country</TH>
# <TH> Currency </TH>
# <TH> ISO </TH>
# <TH> 08/24/06 </TH>
# <TH> 08/23/06 </TH>
# <TH COLSPAN=2> Change </TH>

my $m_stat1 = '^Country';
my $m_stat2 = '^Currency';
my $m_stat3 = '^ISO';
my $m_stat4 = '^\d{2}\D{1}\d{2}\D{1}\d{2}';	# <TH> 08/24/06 </TH>
my $m_stat5 = '^\d{2}\/\d{2}\/\d{2}';	# <TH> 08/23/06 </TH>
my $m_stat6 = ''; # <TH COLSPAN=2> Change </TH>
my $m_stat7 = ''; #

sub extractcurrencies02 {
	my ($txt) = shift;
	my $len = length($txt);
	my $i = 0;
	my $tag = '';
	my $tt = '';
	my $st = 0;
	my $cnt = 0;
	my @arr = ();
	my $rcnt = 0;
	my $isth = 0;
	prt( "Processing $len characters ...\n" );
	for ($i = 0; $i < $len; $i++) {
		$ch = substr($txt,$i,1);
		if ($ch eq '<') {
			$tag = $ch;
			$i++;
			for ( ; $i < $len; $i++) {
				$ch = substr($txt,$i,1);
				$tag .= $ch;
				if ($ch eq '>') {
					###prt( "Have tag [$tag] ...\n" );
					last;
				}
			}
			$ch = '';
			if (($tag =~ /^<td.*>/i)||($tag =~ /^<th.*>/i)) {
				$i++;
				$tt = '';
				if ($tag =~ /^<td.*>/i) {
					###prt( "Got TD tag [$tag] ...\n" );
					$isth = 0;
				} else {
					###prt( "Got TH tag [$tag] ...\n" );
					$isth = 1;
				}
				for ( ; $i < $len; $i++) {
					$ch = substr($txt,$i,1);
					if ($ch eq '<') {
						$tt = trimbothends($tt);
						if (length($tt)) {
							###$i--; # back up to this char
							last;
						} else {
							# eat this next - assume another tag
							# note this 'eats' though the first <td><img ...> etc tag
							$i++;
							for ( ; $i < $len; $i++) {
								$ch = substr($txt,$i,1);
								if ($ch eq '>') {
									last;
								}
							}
						}
					} else {
						$tt .= $ch
					}
				}

				###prt( "Got TH/TD tag [$tag] ...txt=[$tt]\n" );
				if ($st == 7) {
					##prt( "Got TH/TD tag [$tag] ...txt=[$tt]\n" );
					if ($cnt == 0) {
						$cnt = 1;
						@arr = ();
						push(@arr, $tt);
					} elsif ($cnt == 1) {
						$cnt = 2;
						push(@arr, $tt);
					} elsif ($cnt == 2) {
						$cnt = 3;
						push(@arr, $tt);
					} elsif ($cnt == 3) {
						$cnt = 4;
						push(@arr, $tt);
					} elsif ($cnt == 4) {
						$cnt = 5;
						push(@arr, $tt);
					} elsif ($cnt == 5) {
						$cnt = 6;
						push(@arr, $tt);
					} elsif ($cnt == 6) {
						push(@arr, $tt);
						$cnt = 0;
					}
				} elsif ($isth) {
					if ($st == 0) {
						##if ($tt =~ /$m_stat1/) {
						if ($tt =~ /^Country/) {
							prt("At state 1 ...\n");
							$st = 1;
						}
					} elsif ($st == 1) {
						##if ($tt =~ /$m_stat2/) {
						if ($tt =~ /^Currency/) {
							prt("At state 2 ...\n");
							$st = 2;
						} else {
							$st = 0;
						}
					} elsif ($st == 2) {
						##if ($tt =~ /$m_stat3/) {
						if ($tt =~ /^ISO/) {
							prt("At state 3 ...\n");
							$st = 3;
						} else {
							$st = 0;
						}
					} elsif ($st == 3) {
						##if ($tt =~ /$m_stat3/) {
						if ($tt =~ /^(\d{2}\D{1}\d{2}\D{1}\d{2})/) {
							$m_date1 = $1;
							prt("At state 4 ... $m_date1 ...\n");
							$st = 4;
						} else {
							$st = 0;
						}
					} elsif ($st == 4) {
						##if ($tt =~ /$m_stat4/) {
						if ($tt =~ /^(\d{2}\D{1}\d{2}\D{1}\d{2})/) {
							$m_date2 = $1;
							prt("At state 5 ... $m_date2 ...\n");
							$st = 5;
						} else {
							$st = 0;
						}
					} elsif ($st == 5) {
						prt("At state 6/7 ...\n");
						$st = 7;
					} elsif ($st == 6) {
						prt("At state 7 ...\n");
						$st = 7;
					}
				}
			} elsif ($tag =~ /^<\/tr.*/i) {
				if (($st == 7) && ($cnt >= 5)) {
					###prt("End of row $cnt - return to cnt 0 ...\n");
					# AUSTRALIA Dollar AUD 1.310649 1.311001 -0.0268% + <something>  
					prt( "Arr=[" );
					foreach my $itm (@arr) {
						prt("$itm,");
					}
					prt("]\n");
					push(@currency, [$arr[0], $arr[1], $arr[2], $arr[3], $arr[4]]);
					$rcnt++;
				}
				$cnt = 0;
			} elsif ($tag =~ /^<\/table.*/i) {
				if ($st > 0) {
					###prt("End table - return to state 0 ...\n");
					$st = 0;
				}
			}
		}
	}
	return $rcnt;
}

sub htmlclean02 {
	my ($rtxt) = shift;
prt( "len=".length($rtxt)." Drop comments <!--...--> ...\n");
$rtxt = dropcomments($rtxt);
prt( "len=".length($rtxt)." Strip <HEAD>...</HEAD> tag ...\n");
$rtxt = striptag($rtxt, 'HEAD');
prt( "len=".length($rtxt)." Strip <script>...</script> tag ...\n");
$rtxt = striptag($rtxt,'script');
prt( "len=".length($rtxt)." Strip <noscript>...</noscript> tag ...\n");
$rtxt = striptag($rtxt,'noscript');
prt( "len=".length($rtxt)." Strip <SELECT>...</SELECT> tag ...\n");
$rtxt = striptag($rtxt,'select');
prt( "len=".length($rtxt)." Remove <font ...> tags ...\n");
$rtxt = removefont($rtxt);
prt( "len=".length($rtxt)." Remove <b> tags ...\n");
$rtxt = removetag($rtxt,'b');
prt( "len=".length($rtxt)." Remove <nobr> tags ...\n");
$rtxt = removetag($rtxt,'nobr');
prt( "len=".length($rtxt)." Remove th attributes ...\n");
$rtxt = removetagattrib($rtxt,'th');
prt( "len=".length($rtxt)." Remove tr attributes ...\n");
$rtxt = removetrattrib($rtxt);
prt( "len=".length($rtxt)." Remove td attributes ...\n");
$rtxt = removetdattrib($rtxt);
prt( "len=".length($rtxt)." Delete <a...> & </a>\n");
$rtxt = collecthrefs($rtxt,1);
prt( "len=".length($rtxt)." Delete <img...>\n");
$rtxt = collectimgs($rtxt,1);
prt( "len=".length($rtxt)." Do substitutions ...\n");
$rtxt = substitutions($rtxt);
prt( "len=".length($rtxt)." Trim blank lines ...\n");
$rtxt = trimblanklines($rtxt);
prt( "len=".length($rtxt)." Trim inline td ...\n");
$rtxt = triminlinetd($rtxt);
	return $rtxt;
}

# eof - autoexch01.pl
