#!perl -w
use strict;
use warnings;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";

#######################################################
# Load of HTM tags and PHP reserved words and built-in
my $html_stx = 'C:/Program Files/EditPlus 2/html.stx';
my $php_stx = 'C:/Program Files/EditPlus 2/php.stx';
# if in HTML (default)
#if ($kw == 1) {
my @stxHTM = ();
#} elsif ($kw == 2) {
my @stxATT = ();
#} elsif ($kw == 3) {
my @stxSPL = ();
#else in PHP
#if ($kw == 1) {
my @stxRW = ();
#} elsif ($kw == 2) {
my @stxBI = (); # like @BuiltIns;
#} elsif ($kw == 3) {
my @stxVA = ();
my %HFuncsFnd = ();	# set of FOUND builtin functions
my %HResWdFnd = (); # reserved words used
my @AFileNames = (); # for each output file, with hash of functions
my @AFileHashs = (); # for each output file, with hash of functions
my %HOldbifs = ();	# old BIF, from previous index, if any
#########################################################

#########################################################
######## keep the OLD index
### this is needed IF files have been DELETED ...

### VARIABLES
my $oi_tblcnt = 0;
my $tbl_num = 1;
my $tbl_num3 = 3;
my @tbl_arr = ();
my @tbl_arr3 = ();
my @tbl_set = ();
my @tbl_set3 = ();
my $no_index = 0;

my $dbg20 = 0; # get_table_array()
my $dbg21 = 0;
my $dbg22 = 0;
my $dbg23 = 0;
my $dbg24 = 0;

my $oi_tacnt = 0;
my $oi_tacnt3 = 0;
my @oi_larr = ();
my @oi_larr2 = ();
my @oi_hrefs = ();

### FUNCTIONS
# search the @stxBI array for an entry
sub is_built_in($) {
   my ($t) = shift;
   foreach my $rw (@stxBI) {
      if ($t eq $rw) {
         return 1;
      }
   }
   return 0;
}

sub transfer_old_table3() {
	$oi_tacnt3 = scalar @tbl_set3;
	if ($oi_tacnt3 > 0) {
		prt( "Collected $oi_tacnt3 in \@tbl_set3 ... need to intialise built-in hash ...\n" );
		## load into my %HOldbifs = ();
		my $elimcnt = 0;
		my $elimcnt2 = 0;
		for (my $i = 0; $i < $oi_tacnt3; $i++) {
			my $bif = $tbl_set3[$i][0];
			my $fss = $tbl_set3[$i][1];
			if (is_built_in($bif)) {
				# each new htm file written is kept in -
				# push(@AFileNames, $ind_file   );
				# and for each of these a new hash of built ins has been kept
				# push(@AFileHashs, \%th); # store the functions used ...
				# so these files can be (safely) eliminated, since they will be added later
				foreach my $nhf (@AFileNames) {
					if ($fss =~ /$nhf/i) {
						$fss =~ s/$nhf//;
						$elimcnt++;
					}
				}
				$fss = trim_line($fss);
				if (length($fss)) {
					if (exists $HOldbifs{$bif}) {
						prt("\nWARNING: [$bif] appears DUPLICATED ...\n had=[".$HOldbifs{$bif}."\nadding [$fss]\n\n");
						$HOldbifs{$bif} .= $fss;
					} else {
						$HOldbifs{$bif} = $fss;
					}
				} else {
					$elimcnt2++;
				}
			} else {
				prt("WARNING: DISCARDING [$bif] - NOT BUILT-IN!\n");
			}
		}
		my $nwcnt = scalar keys %HOldbifs;
		if ($elimcnt > 0) {
			prt( "Elimated old files $elimcnt times, avoiding $elimcnt2 bifs being added...\n" );
		}
		prt( "Done $oi_tacnt3 in \@tbl_set3 ... now $nwcnt in \%HOldbifs ...\n" );
	}
}


sub mark_old_index($) {
	my ($f) = shift;
	my $tsc = scalar @tbl_set;
	for (my $i = 0; $i < $tsc; $i++ ) {
		if ($tbl_set[$i][0] eq $f) {
			$tbl_set[$i][7] = 1;
			last;
		}
	}
}

sub get_table_array {
	my $fnd = 0;
	my $lncnt = scalar @oi_larr2;
	for (my $i = 0; $i < $lncnt ; $i++) {
		my $ln = $oi_larr2[$i]; # extract a line
		chomp $ln; # remove LF (\n)
		$ln =~ s/\r$//; # and remove CR, if present
		if ($ln =~ /<table.*>/i) {
			prt( "FOUND TABLE: [$ln] ...\n" ) if ($dbg20);
			$oi_tblcnt++; # bump table counter
			if ($oi_tblcnt == $tbl_num) {
				prt( "Is my TABLE [$oi_tblcnt] ...\n" ) if ($dbg20);
				push(@tbl_arr,$ln);
				if ( !($ln =~ /<\/table>/i) ) {
					$i++; # move to next line
					for ( ; $i < $lncnt; $i++) {
						$ln = $oi_larr2[$i]; # extract a line
						chomp $ln; # remove LF (\n)
						$ln =~ s/\r$//; # and remove CR, if present
						if ( $ln =~ /<\/table>/i ) {
							prt( "END TABLE $tbl_num: [$ln] ...\n" ) if ($dbg20);
							push(@tbl_arr,$ln);
							$fnd++;
							last;
						}
						push(@tbl_arr,$ln);
					}
				}
			} elsif ($oi_tblcnt == $tbl_num3) {
				prt( "Is my TABLE [$oi_tblcnt] ...\n" ) if ($dbg20);
				push(@tbl_arr3,$ln);
				if ( !($ln =~ /<\/table>/i) ) {
					$i++; # move to next line
					for ( ; $i < $lncnt; $i++) {
						$ln = $oi_larr2[$i]; # extract a line
						chomp $ln; # remove LF (\n)
						$ln =~ s/\r$//; # and remove CR, if present
						if ( $ln =~ /<\/table>/i ) {
							prt( "END TABLE $tbl_num3: [$ln] ...\n" ) if ($dbg20);
							push(@tbl_arr3,$ln);
							$fnd++;
							last;
						}
						push(@tbl_arr3,$ln);
					}
				}
			}
		}
	}
	return $fnd;
}

sub process_tbl_arr() {
	my $cc = 0;
	for (my $i = 0; $i < $oi_tacnt ; $i++) {
		my $ln = $tbl_arr[$i]; # extract a line
		if ($ln =~ /<td.*>/i) {
			while ( !($ln =~ /<\/td>/i) ) {
				$i++;
				if ($i < $oi_tacnt) {
					$ln .= ' '.$tbl_arr[$i]; # extract a line
				} else {
					last;
				}
			}
			# got begin and end of <td>...</td> block
			if ($ln =~ /(<td.*?>)(.*)(<\/td>)/i) {
				my $tds = $1;
				my $inb = $2;
				my $tde = $3;
				# like Line [<td><a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213</td>] = 
				# [<td>][<a href="adjrt01.htm">adjrt01.htm</a> <br>2006/05/23 <br>10,213][</td>] ...
				prt( "Line [$ln] = \nBlocks [$tds][$inb][$tde] ...\n" ) if ($dbg21);
				###if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>/) {
				##if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>/i) {
				#if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)/i) {
				if ($inb =~ /<a\s*href=\"(.*)\">(.*)<\/a>\s*<br>(\d{4}\S*)\s*<br>(\d{1}\S*)\s*/i) {
					my $hrf = $1;
					my $fil = $2;
					my $dt = $3;
					my $sz = $4;
					my ($yr, $mt, $dy) = split(/\//,$dt);
					###$sz =~ s/,//g;
					#               0     1     2    3    4    5    6    7
					push(@tbl_set, [$hrf, $fil, $dt, $sz, $yr, $mt, $dy, 0]);
					prt("href=[$hrf], file=[$fil], date=[$dt][$yr][$mt][$dy], size=[$sz]...\n") if ($dbg22);
				} else {
					prt("HREF not found - CHECK!\n") if ($dbg22);
				}
			}
		}
	}
}

sub process_tbl_arr3() {
	my $cc = 0;
	my $ff = 0; # since just two columns - flip flop
	my $bif = '';
	my $fil = '';
	for (my $i = 0; $i < $oi_tacnt3 ; $i++) {
		my $ln = $tbl_arr3[$i]; # extract a line
		if ($ln =~ /<td.*>/i) {
			$cc = length($ln);
			prt( "$i - Line [$ln] $cc...\n" ) if ($dbg24);
			while ( !($ln =~ /<\/td>/i) ) {
				$i++;
				if ($i < $oi_tacnt3) {
					$ln .= ' '.$tbl_arr3[$i]; # extract a line
				} else {
					last;
				}
			}
			if ($cc != length($ln)) {
				$cc = length($ln);
				prt( "$i - Line [$ln] $cc...\n" ) if ($dbg24);
			}

			# got begin and end of <td>...</td> block
			# 2006.09.11 '?' added to STOP greedy parsing
			if ($ln =~ /(<td.*?>)(.*)(<\/td>)/i) {
				my $tds = $1;
				my $inb = $2;
				my $tde = $3;
				prt( "$i - td[$tds] in[$inb] te[$tde]...\n" ) if ($dbg24);
				if ($ff > 0) {
					$fil = collectoi_hrefs($inb, 1); # remove HREF
					$fil = trim_line($fil);
					if (is_built_in($bif)) {
						push(@tbl_set3, [$bif, $fil, 0]);
						prt( " push(\@tbl_set3, [$bif, $fil, 0]); ...\n" ) if ($dbg23);
					} else {
						if (($bif =~ /unused/i)||($bif =~ /missed/i)) {
							prt( " Advice: Skipping [$bif] ...\n" ) if ($dbg23);
						} else {
							prt( " Advice: Skipping [$bif] - NOT BUILT IN FUNCTION!\n" );
						}
					}
					$ff = 0;
				} else {
					$bif = $inb;
					$bif =~ s/\[//;
					$bif =~ s/\]//;
					$bif = trim_line($bif);
					if ($bif =~ /<.*?>(.*?)<\/.*?>/) {
						$bif = trim_line($1);
					}
					$ff = 1;
				}
			} else {
				prt( "CHECK ME: Missed <td> ... </td> \n");
			}
		}
	}
}

sub get_old_index($) {
	my ($ind) = shift;
	$oi_tacnt = 0;
	$oi_tacnt3 = 0;
	my $ln = '';
	my $lncnt = 0;
	if (open IF, "<$ind") {
		@oi_larr = <IF>; # slurp it all in ...
		close(IF);
		$lncnt = scalar @oi_larr;
		prt( "Got $lncnt lines to process ... from [$ind]\n" );
		###write2file( join('',@oi_larr), 'tempout.txt');
		$ln = tag2newline( join('',@oi_larr), 'td' );
		###$ln = tag2newline( $ln, 'br' );
		@oi_larr2 = split(/\n/, $ln);
		###write2file( join("\n",@oi_larr2), 'tempout3.txt');
		if (get_table_array()) {
			$oi_tacnt = scalar @tbl_arr;
			$oi_tacnt3 = scalar @tbl_arr3;
			prt( "Got $oi_tacnt and $oi_tacnt3 lines to process ... from [$ind]...\n" );
		} else {
			prt( "Failed to find table $tbl_num or $tbl_num3 ... in [$ind]...\n" );
		}
	} else {
		prt( "Warning: Failed to open $ind ...\n" );
		$no_index = 1;
	}

	if ($oi_tacnt > 0) {
		process_tbl_arr();
	} else {
		prt( "Warning: Failed to load table $tbl_num ...\n" );
	}
	if ($oi_tacnt3 > 0) {
		process_tbl_arr3();
	} else {
		prt( "Warning: Failed to load table $tbl_num3 ...\n" );
	}
	transfer_old_table3();
}

###################################################################
# COPIED OUT OF htmltools.pl, since I do NOT want to include it, just now ...
sub tag2newline { # ($txt2,'td');
	my ($txt, $tag) = @_;
	my $len = length($txt);
	my $ntxt = '';
	my $i;
	my $ch = '';
	my $ft = '';
	my $lcnt = 0;
	for ($i = 0; $i < $len; $i++ ) {
		$ch = substr($txt,$i,1);
		if ($lcnt && ($ch eq '<')) {
			$ft = $ch;
			$i++;
			for ( ; $i < $len; $i++ ) {
				$ch = substr($txt,$i,1);
				$ft .= $ch;
				if ($ch eq '>') {
					if ($ft =~ /^<$tag/i) {
						$ft = "\n".$ft;
					}
					last;
				}
			}
			$ntxt .= $ft;
		} else {
			$ntxt .= $ch;
			if ($ch eq "\n") {
				$lcnt = 0;
			} else {
				$lcnt++;
			}
		}
	}
	return $ntxt;
}

sub collectoi_hrefs {
	my ($txt,$del) = @_;
	my $ntxt = '';
	my $len = length($txt);
	my $ch = '';
	my $hrf = '';
	my $i;
	for ($i = 0; $i < $len; $i++) {
		$ch = substr($txt,$i,1);
		if ($ch eq '<') {
			$hrf = $ch;
			$i++;
			for ( ; $i < $len; $i++) {
				$ch = substr($txt,$i,1);
				$hrf .= $ch;
				if ($ch eq '>') {
					last;
				}
			}

			if ($hrf =~ /^<a\s/i) {
				if ($del == 0) {
					$ntxt .= $hrf;
				}
				### prt("Got [$hrf] ...\n");
				if ($hrf =~ /href=["'](\S+)["']./i) {
					$hrf = $1;
					push(@oi_hrefs,$hrf);
					### prt("Got [$hrf] ...\n");
				}
			} elsif ($hrf =~ /^<\/a>$/i) {
				if ($del == 0) {
					$ntxt .= $hrf;
				}
			} else {
				$ntxt .= $hrf;
			}
		} else {
			$ntxt .= $ch;
		}
	}
	return $ntxt;
}

###################################################################

####################################
# Reducing a line to bare bones
# Used when loading
# the EditPlus 2 stx files.
####################################
sub trim_line($) {
   my ($l) = shift;
   chomp $l; # remove LF
   $l =~ s/\r$//; # and remove CR, if present
   $l =~ s/\t/ /g; # tabs to a space
   $l =~ s/\s\s/ /g while ($l =~ /\s\s/); # duplicate space to single
   $l = substr($l,1) while ($l =~ /^\s/); # each off leading space
   $l = substr($l,0,length($l)-1) while (($l =~ /\s$/)&&(length($l))); # and trailing space
   return $l;
}

#Loading HTML stx [C:/Program Files/EditPlus 2/html.stx] ...
#Got KEYWORD [HTML Tags] ...
#Got KEYWORD [HTML Attributes] ...
#Got KEYWORD [Special characters] ...
sub load_html_stx($) {
	my ($fil) = shift;
	my $kw = 0;
	my $nl = '';
	prt("Loading HTML stx [$fil] ...\n");
	open IF, "<$fil" or mydie( "ERROR: Unable to open [$fil] ...\n" );
	my @la = <IF>;
	close IF;
	foreach my $ln (@la) {
		chomp $ln;
		$ln =~ s/\r$//;
		if ($ln =~ /^#/) {
			if ($ln =~ /^#KEYWORD=(.*)/) {
				prt( "Got KEYWORD [$1] ...\n" );
				if ($1 eq 'HTML Tags') {
					$kw = 1;
					next;
				} elsif ($1 eq 'HTML Attributes') {
					$kw = 2;
					next;
				} elsif ($1 eq 'Special characters') {
					$kw = 3;
					next;
				}
			}
			$kw = 0;
			next;
		}
		if ($kw == 1) {
			$nl = trim_line($ln);
			push(@stxHTM, $nl) if (length($ln));
		} elsif ($kw == 2) {
			$nl = trim_line($ln);
			push(@stxATT, $nl) if (length($ln));
		} elsif ($kw == 3) {
			$nl = trim_line($ln);
			push(@stxSPL, $nl) if (length($ln));
		}
	}
}

#Loading PHP stx [C:/Program Files/EditPlus 2/php.stx] ...
#Got KEYWORD [Reserved words] ...
#Got KEYWORD [Built-in functions] ...
#Got KEYWORD [Variables] ...
sub load_php_stx($) {
	my ($fil) = shift;
	my $kw = 0;
	my $nl = '';
	prt("Loading PHP stx [$fil] ...\n");
	open IF, "<$fil" or mydie( "ERROR: Unable to open [$fil] ...\n" );
	my @la = <IF>;
	close IF;
	foreach my $ln (@la) {
		chomp $ln;
		$ln =~ s/\r$//;
		if ($ln =~ /^#/) {
			if ($ln =~ /^#KEYWORD=(.*)/) {
				prt( "Got KEYWORD [$1] ...\n" );
				if ($1 eq 'Reserved words') {
					$kw = 1;
					next;
				} elsif ($1 eq 'Built-in functions') {
					$kw = 2;
					next;
				} elsif ($1 eq 'Variables') {
					$kw = 3;
					next;
				}
			}
			$kw = 0;
			next;
		} elsif ($ln =~ /^;/) { # skip these 'comments'
			next;
		}
		if ($kw == 1) {
			$nl = trim_line($ln);
			push(@stxRW, $nl) if (length($ln));
		} elsif ($kw == 2) {
			$nl = trim_line($ln);
			push(@stxBI, $nl) if (length($ln));
		} elsif ($kw == 3) {
			$nl = trim_line($ln);
			push(@stxVA, $nl) if (length($ln));
		}
	}
}

sub do_stx_load() {
	load_html_stx( $html_stx );
	prt( "Loaded ".scalar @stxHTM." HTM, ".scalar @stxATT." ATT, and ".scalar @stxSPL." spls\n" );
	load_php_stx( $php_stx );
	prt( "Loaded ".scalar @stxRW." RW, ".scalar @stxBI." BI, and ".scalar @stxVA." vars\n" );
}

my $old_ind = 'temp2/index.htm';

do_stx_load();
get_old_index($old_ind);

# eof - test7.pl

