#!/Perl
# tidycmp02.pl
# ############################################################################
# AIM: To DOWNLOAD the accessibility table from the web site
# http://www.aprompt.ca/Tidy/accessibilitychecks.html
# parse the html, extracting the HTML test file link
# Download the link, advise if FAILED,
# else write the file to an OUTPUT folder,
# converting the line endings to DOS line endings ...
# and compare its contents to Tidy's accesscases.txt
# NOTE: With $dbg1 == 0, there can be quite LONG delays before NEXT output ...
# Likewise if $dbg2 == 0, and/or $dbg3 == 0 - it looks like NOTHING is happening!!!
# #############################################################################
use strict;
use warnings;
use LWP::Simple;
require "logfile.pl" or die "Missing logfile.pl ...\n"; # my simple log file and some other utility subs
require "htmltools.pl" or die "Missing htmltools.pl ...\n";
# log file stuff
my ($LF);
my $outfile = 'temp'.$0.'.txt';
my $outfil1 = 'temp1'.$0.'.htm';
# program variables
my $download = 1; # do the ACTUAL downloads,
my $dotidytest = 0;	# compare with Tidy file ...
# or use locally saved file after first download
# online source
my $site = 'http://www.aprompt.ca/Tidy/';
my $URL = $site . 'accessibilitychecks.html';
# local HDD source
###my $src_folder = "F:\\Gtools\\tidyproj\\tidycvs6-2\\test\\";
my $src_folder = "F:\\FGCVS\\tidy\\test\\";
my $in_file = $src_folder.'accesscases.txt';
my $in_folder = $src_folder."accessTest\\";
my $out_folder = 'tmp6'; # and output FOLDER, for download
my $new_out = 'tempaccess.txt';
my @tests = ();
my $text = '';
my $tcnt = 0;
my @arr = ();
my $dtext = '';
my $line = '';
my @lines = ();
my $tln = '';
my $tlcnt = 0;
my @mdarr = ();
my @hrefs = ();
my $thrftxt = '';
my $lhrftxt = 0;
my $thrffil = '';
my @newtest = ();
my @desc = ();
my $lcnt = 0;
my $cnt = 0;
my $we = '';
my $test = '';
my $lev = 0;
my $href = '';
my $href2 = '';
my $flip = 0;
my $fnd = 0;
my $dsc = '';
my $tstcnt = 0;
my $ntcnt = 0;
my $dtcnt = 0;
my $wrtncnt = 0;
my $dsccnt = 0;
my $msg = '';
# information collected
my @zeroonline = ();
my @missingcvs = ();
my @difflevels = ();
# debug
my $dbg1 = 0;	# additional diagnostic output
my $dbg2 = 0;	# output the test cases read in ...
my $dbg3 = 0;	# output information when found ...
#######################################################
### main program
open_log($outfile);
prt( "$0 ... Hello, World...\n" );


if ($download) {
	prt("Fetching text from $URL ...\n");
	$text = get("$URL");
	# this assumes CR line endings
	###@arr = split("\r", $text);
	##$dtext = join( "\n", @arr );
	## so without assumption
	if (defined $text) {
		$dtext = force_unix_le($text);
		$tcnt = length($text);
		$dtcnt = length($dtext);
	} else {
		$text = '';
		$dtext = '';
		$tcnt = 0;
		$dtcnt = 0;
	}
	prt( "Got $tcnt ($dtcnt) characters from URL $URL ...\n");
	write2file($dtext,$outfil1);
	@arr = split( "\n", $dtext );
} else {
	open INF, "<$outfil1" or mydie( "ERROR: Unable to open [$outfil1] ... $1\n" );
	@arr = <INF>; # slurp it all
	close INF;
}

if ($dotidytest) {
	prt("Openning the compare file [$in_file] ...\n");
	open INF, "<$in_file" or mydie( "ERROR: Unable to open [$in_file] ... $1\n" );
	my @tmp = <INF>; # slurp it all
	close INF;
	prt( "Got ".scalar @tmp." from $in_file ... putting into a multi-dimensional array ...\n" );
	foreach $line (@tmp) {
		$tln = trimall($line);
		if (length($tln)) {
			push(@lines, $tln);
			my @ts = split(" ",$tln);
			if (scalar @ts == 3) {
				push(@mdarr, [ $ts[0], $ts[1], $ts[2], 0 ]);
			} else {
				prt( "WARNING: [$tln] did not split correctly ...\n" );
			}
		}
	}
	$tlcnt = scalar @mdarr;
	prt( "Got $tlcnt (".(scalar @lines).") from $in_file ...\n" );
	for (my $i3 = 0; $i3 < $tlcnt; $i3++) {
		$msg = $mdarr[$i3][0] . ' ' . $mdarr[$i3][1] . ' ' . $mdarr[$i3][2];
		prt( "$msg\n" ) if ($dbg2);
	}
}
###my $etext = htmlexpand($text);
###my $ctext = htmlcleanall($etext);
##open WOF, ">$outfil1" or mydie("ERROR: Unable to open $outfil1 - $!\n");
$lcnt = scalar @arr;
prt( "Processing $lcnt lines ...\n" );
# expect something like ..............
# Error number [13.2.1.3] - Priority 2
# or
# Warning number [7.4.1.1] - Priority 2
# Warning number [1.1.1.2] - Priority 1
# All images require text equivalents but "alt" text must also meet ...
#
# Testfile 1.1.1.f2: suspicious "alt" text (filename) 
# View testfile source = link 
# Testfile             = link 
# ....................................
foreach $line (@arr) {
##	print WOF $line."\n";
	$tln = trimall($line);
	$tln = removetag($tln, 'b');
	$tln = removetag($tln, 'br');
	##if ($line =~ /(Error|Warning)\s+\[(\d+\.\d+\.\d+\.\d+)\]\s+-\s+Priority\s+(\d{1})/) {
	##if ($tln =~ /(Error|Warning)\s+number\s+/i) {
	##if ($tln =~ /(Error|Warning)\s+number\s+\[(.*)\]/i) {
	if ($tln =~ /(Error|Warning)\s+number\s+\[(.*)\].+Priority\s+(\d+)/i) {
		$cnt++;
		$we = $1;
		$test = $2;
		$lev = $3;
		##prt( "[$2] $tln\n" );
		##prt( "$cnt [$we] [$test] [$lev]\n" );
		push(@tests, [$test, $lev, $we]);
		$flip = 0;
	} elsif ($tln =~ /href=["'](\S+)["']./i ) {
		if ($cnt) {
			my $hrf = $1;
			if ($flip) {
				if ($flip == 1) {
					$href = $site . $hrf;
					if ($download) {
						prt( "Moment ... loading [$href] ...\n" );
						$thrftxt = get($href);
						if (defined $thrftxt) {
							$lhrftxt = length($thrftxt);
						} else {
							$thrftxt = '';
							$lhrftxt = 0;
						}
						if ($lhrftxt) {
							###$thrftxt =~ s/\r/\r\n/gm;
							###$thrftxt =~ s/\r/\n/gm;
							$thrftxt = force_unix_le($thrftxt);
							$thrffil = $test;
							$thrffil =~ s/\./-/g;
							$thrffil = $out_folder . '/' . $thrffil . '.html';
							write2file( $thrftxt, $thrffil );
							$wrtncnt++; # count another WRITTEN
							prt( "[$test] Test HREF=\"$href\" length $lhrftxt ... written [$thrffil]\n" ) if ($dbg1);
						} else {
							$msg = "[$test] Test HREF=\"$href\" length is ZERO - CHECK ME! ...";
							prt( "$msg\n" ) if ($dbg3);
							push(@zeroonline, $msg);
						}
					} else {
						# no download done ...
					}
				} else {
					prt( "[$test] CHECK ME HREF=\"$hrf\" \n" );
				}
			} else {
				$href2 = $hrf;
				prt( "[$test] View HREF=\"$href2\"\n" ) if ($dbg1);
			}
			$flip++;
		}
	} elsif ($tln =~ /Testfile\s+\d+.+:\s+(.*)/) {
		# like - Testfile 1.1.1.f1: <img> missing "alt" text
		my $ds = $1;
		$ds =~ s/&lt;/</g;
		$ds =~ s/&gt;/>/g;
		$ds =~ s/"/'/g;
		push(@desc, [$test, $ds]);
		prt( "[$test] Description=[$ds]\n" ) if ($dbg1);
	}
}
prt( "DONE processing $lcnt lines ...\n" );
$tstcnt = scalar @tests;
$dsccnt = scalar @desc;
prt( "Written $wrtncnt new files ... Got $tstcnt test sets ... $dsccnt desciptions ...\n" );
##close WOF;
for (my $i = 0; $i < $tstcnt; $i++) {
	$we = $tests[$i][2];
	$test = $tests[$i][0];
	$lev = $tests[$i][1];
	$fnd = test_in_lines($test);
	$dsc = find_desc($test);
	my ($tf, $tff);
	$tf = $test;
	$tf =~ s/\./-/g;
	$tff = $in_folder . $tf . ".html";
	if ($fnd) {
		$tln = $lines[$fnd-1];
		###my $tf = $mdarr[$fnd-1][0];
		my $tc = $mdarr[$fnd-1][1];
		###my $tff = $in_folder . "\\" . $tf . ".html";
		if (-f $tff) {
			my @tmparr = split(" ", $tln);
			if (scalar @tmparr == 3) {
				my $lev2 = $tmparr[2]; 
				if ($lev2 == $lev) {
					prt( "[$test] [$lev] [$tln] [$tc] ok\n" ) if ($dbg1);
				} else {
					$msg = "[$test] [$lev] [$tln] [$tc] ok BUT different level [$lev2] ...";
					push(@difflevels, $msg);
					prt( "$msg\n" ) if ($dbg3);
				}
				push(@newtest, [$tf, $test, $lev, $we, $dsc]);
			} else {
				mydie( "[$test] [$lev] [$tln] [$tc] ok BUT NO LEVEL COMPARE\n" );
			}
		} else {
			prt( "[$test] [$lev] [$tln] [$tc] missing [$tff]?\n" );
			push(@newtest, [$tf, $test, $lev, $we, $dsc]);
		}
	} else {
		if (-f $tff) {
			$msg = "NOT FOUND [$test] [$lev] BUT found [$tff]";
		} else {
			$msg = "NOT FOUND [$test] [$lev]";
		}
		push(@missingcvs, $msg);
		prt( "$msg\n" ) if ($dbg3);
	}
}

# output warning information, if NOT output during processing
if (!$dbg3) { # no output during processing
	$cnt = scalar @zeroonline;
	if ($cnt) {
		prt( "Count $cnt file(s) appear MISSING from on-line site ...\n" );
		foreach $msg (@zeroonline) {
			prt( "$msg\n" );
		}
	}
	$cnt = scalar @missingcvs;
	if ($cnt) {
		prt( "Count $cnt file(s) appear MISSING from CVS download ...\n" );
		foreach $msg (@missingcvs) {
			prt( "$msg\n" );
		}
	}
	$cnt = scalar @difflevels;
	if ($cnt) {
		prt( "Count $cnt item(s) appear to have DIFFERENT priority ...\n" );
		foreach $msg (@difflevels) {
			prt( "$msg\n" );
		}
	}
}

$ntcnt = scalar @newtest;
prt( "\nOutputting $ntcnt tests to [$new_out] ...\n" );
open OUTF, ">$new_out" or mydie( "ERROR: Unable to open $new_out ...$! \n" );
for (my $i = 0; $i < $ntcnt; $i++) {
	print OUTF $newtest[$i][0] . ' ' . $newtest[$i][1] . ' ' . $newtest[$i][2]; 
	print OUTF ' ' . $newtest[$i][3] . ' ' . $newtest[$i][4] . "\n"; 
}
close OUTF;
##system($outfil1);
close_log($outfile,1);
exit(0);

### push(@desc, [$test, $dsc]);
sub find_desc {
	my ($tst) = shift;
	my $d = 'NOT FOUND';
	my $ct = scalar @desc;
	for (my $i2 = 0; $i2 < $ct; $i2++) {
		if ($desc[$i2][0] eq $tst) {
			$d = $desc[$i2][1];
			last;
		}
	}
	return $d;
}
###                file    test    level
###	push(@mdarr, [ $ts[0], $ts[1], $ts[2], 0 ]);
sub test_in_lines {
	my ($tst) = shift;
	my $f = 0;
	my $ct = 0;
	my $ln = '';
	###prt( "Finding [$tst] ...\n" );
	for (my $i2 = 0; $i2 < $tlcnt; $i2++) {
		$ct++;
		##my $ts = $mdarr[$i2][1];
		##prt( "Compare with [$ts] ...\n" );
		if ($mdarr[$i2][1] eq $tst) {
			$f = $ct;
			last;
		}
	}
	return $f;
}

sub trimall {
	my ($ln) = shift;
	chomp $ln;
	$ln =~ s/\r$//;
	$ln =~ s/\t/ /g;
	while ($ln =~ /\s\s/) {
		$ln =~ s/\s\s/ /g;
	}
	while ($ln =~ /^\s/) {
		$ln = substr($ln,1);
	}
	while ($ln =~ /\s$/) {
		$ln = substr($ln,0, length($ln) - 1);
	}
	return $ln;
}

sub force_unix_le {
	my ($dtx) = shift;
	my $ntx = '';
	my $len = length($dtx);
	for (my $i = 0; $i < $len; $i++) {
		my $ch = substr($dtx,$i,1);
		if ($ch eq "\r") {	# if CR, check for CR/LF
			$i++; # move to next char
			if ($i < $len) { # if length
				$ch = substr($dtx,$i,1);
				if ($ch ne "\n") {	# is is LF
					$ntx .= "\n"; # no, force LF to replace CR
					if ($ch eq "\r") {	# but if it IS another CR
						$i--; # back up to collect this
						next;	# and loop
					}
				}
				# else let this caracter be added
			} else { # last char
				$ch = "\n"; # add final LF
			}
		}
		$ntx .= $ch;
	}
	return $ntx;
}

# eof - tidycmp02.pl

