#!C:/Perl
# php2htm01.pl
# AIM: Convert PHP file sources to colour coded HTML
# geoffmclane.com - 2006.09.13
# 20070127 - fix for get_nn only
use strict;
use warnings;
use File::stat; # to get the file date
use File::Copy; # to copy from an existing background file - see $jpg_file
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $outfile = 'temp.'.$0.'.txt';
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );

my $in_folder = 'C:\GTools\php';
my $out_folder = 'temp2';
my $indexhtm = 'index.htm';
# other USER variables
my $tab_space = '   '; # note tabs to 3 spaces - change if desired
my @dirfiles = ();	# set of directory files and folders
my @php_list = ();	# just the PHP files offset 0=name 1=date 2=size
my @done_files = (); # push(@done_files, [$nf, $in_date, $in_size, $in_file]); 
#######################################################
# Load of HTM tags and PHP reserved words and built-in
my $html_stx = 'C:/Program Files/EditPlus 2/html.stx';
my $php_stx = 'C:/Program Files/EditPlus 2/php.stx';
# if in HTML (default)
#if ($kw == 1) {
my @stxHTM = ();
#} elsif ($kw == 2) {
my @stxATT = ();
#} elsif ($kw == 3) {
my @stxSPL = ();
#else in PHP
#if ($kw == 1) {
my @stxRW = ();
#} elsif ($kw == 2) {
my @stxBI = ();
#} elsif ($kw == 3) {
my @stxVA = ();
#########################################################
my $m_doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"'."\n".
'"http://www.w3.org/TR/html4/loose.dtd">';
my @existing = ();
my $tot_files = 0;
my $tot_dirs = 0;
my $out_total = 0;
my $doc_total = 0;
# a back ground file
my $jpg_file = 'cldsphp.jpg';
# background SOURCE and DESTINATION of background file
my $jpg_src = "c:/HOMEPAGE/P26/php/$jpg_file";
my $jpg_des = "$out_folder/$jpg_file";
# validation file
my $v401_file = 'valid-html401.gif';
# validation SOURCE and DESTINATION of validation file
my $v401_src = "c:/HOMEPAGE/P26/mperl/$v401_file";
my $v401_des = "$out_folder/$v401_file";
#########################################################
my $latest = 0;
my $earliest = time();
my $in_index = "$out_folder/$indexhtm";	# = something line 'index.htm';

# debug output only
my $dbg1 = 0;
my $dbg2 = 0;
my $dbg3 = 0;
my $dbg4 = 0;
my $dbg5 = 0;
my $dbg6 = 0;	# convert to HTML
my $dbg7 = 0;	# convert spaces
my $dbg8 = 0;	# parse PHP quote
my $dbg25 = 0;	# add metas to handle

# these are really just DEBUG counters
my ($a_cnt, $b_cnt, $c_cnt, $d_cnt, $e_cnt, $f_cnt, $o_cnt, $v_cnt, $q_cnt);
my $add_table = 0;
my $add_chart = 0;
my $add_pre = 1;
my $in_size = 0;
my $in_date = 0;
my $in_file = '';

my $in_php = 0;
my $in_com = 0;
my $got_gt = 0;
my $php = '';
my $com = '';
my $htm = '';
my $bit = '';
my $ist = 0;
my @lnbits = ();
my $len = 0;
my $alen = 0;
my $blen = 0;
my @nlines = ();
my $ii = 0;
my $ch = '';
my $nline = '';
my $g_lc = 0;
my $g_line = '';

# set the CLASS and COLOUR strings
my $a_class = 'a'; # built-in function (red)
my $b_class = 'b'; # comments (#006666)
my $c_class = 'c'; # reserved words (blue)
my $d_class = 'd'; # inside qw(...)
my $e_class = 'e'; # $scalar (#9400d3)
my $f_class = 'f'; # in <<EOF...EOF block (#666666)
my $o_class = 'o'; # @array  (#008b8b - was #FFA500)
my $v_class = 'v'; # %hash (#a52a2a - was #808000)
my $t_class = 't'; # quoted - single and double (#006600)

my $a_color = 'red';
my $b_color = '#006666';
my $c_color = 'blue';
my $d_color = '#a52a2a';
my $e_color = '#9400d3';
my $f_color = '#666666';
my $o_color = '#008b8b';
my $v_color = '#a52a2a';
my $t_color = '#006600';


#########################################
### subs
# built-in functions
# my $a_class = 'a'; # built-in function (red)
sub add_red {
   my ($t) = shift;
   $a_cnt++;
   return ('<span class="'.$a_class.'">'.$t.'</span>');
}
# comments
#my $b_class = 'b'; # comments (#006666)
sub add_class_b {
   my ($t) = shift;
   $b_cnt++;
   return ('<span class="'.$b_class.'">'.$t.'</span>');
}
# reserved words
#my $c_class = 'c'; # reserved words (blue)
sub add_blue {
   my ($t) = shift;
   $c_cnt++;
   return ('<span class="'.$c_class.'">'.$t.'</span>');
}
# perl qw set
#my $d_class = 'd'; # inside qw(...)
sub add_class_d {
   my ($t) = shift;
   $d_cnt++;
   return ('<span class="'.$d_class.'">'.$t.'</span>');
}
#my $e_class = 'e'; # $scalar (#9400d3)
sub add_class_e {
   my ($t) = shift;
   $e_cnt++;
   return ('<span class="'.$e_class.'">'.$t.'</span>');
}
#my $f_class = 'f'; # in <<EOF...EOF block (#666666)
sub add_class_f {
   my ($t) = shift;
   $f_cnt++;
   return ('<span class="'.$f_class.'">'.$t.'</span>');
}
#my $o_class = 'o'; # @array  (#008b8b - was #FFA500)
sub add_class_o {
   my ($t) = shift;
   $o_cnt++;
   return ('<span class="'.$o_class.'">'.$t.'</span>');
}
#my $v_class = 'v'; # %hash (#a52a2a - was #808000)
sub add_class_v {
   my ($t) = shift;
   $v_cnt++;
   return ('<span class="'.$v_class.'">'.$t.'</span>');
}
#my $t_class = 't'; # quoted - single and double (#006600)
sub add_quote {
   my ($t) = shift;
   $q_cnt++;
   return ('<span class="'.$t_class.'">'.$t.'</span>');
}

sub reset_count {
	# done at start of each file
	# these are really just DEBUG counters
	$a_cnt = 0;
	$b_cnt = 0;
	$c_cnt = 0;
	$d_cnt = 0;
	$e_cnt = 0;
	$f_cnt = 0;
	$o_cnt = 0;
	$v_cnt = 0;
	$q_cnt = 0;
}

my $phpcss = <<"PEOF";
/* Style Definitions - updated 2006.08.28 - 2006.07.13 */

body { 
	background-image:url('cldsphp.jpg');
	margin: 0cm 1cm 0cm 1cm; }

hr {
	margin: 0px 0px 0px 0px;
	border-style: none;
	padding: 0px 0px 0px 0px; }

h1 {
 background:#efefef;
 border-style: solid solid solid solid;
 border-color:#d9e2e2;
 border-width:1px;
 padding:2px 2px 2px 2px;
 font-size:200%;
 text-align:center;
}

p.top {
	margin: 0; 
	border-style: none; 
	padding: 0; 
	text-align: center; }

p.nom {
margin:0cm;
margin-bottom:.0001pt;
color: red;
}

p.code {
	margin: 0cm 0.5cm 0cm 0.5cm;
	font-size:10.0pt;
	font-family:"Courier New";
}

.bld { font-weight: bold; }
.cn { font-family:"Courier New"; }
.ctr { text-align: center; }

.a { color:red; }
.b { color:#006666; }
.c { color:blue; }
.d { color:#a52a2a; }
.e { color:#9400d3; }
.f { color:#666666; }
.o { color:#008b8b; }
.v { color:#a52a2a; }
.t { color:#006600; }

.cd {
  /* top, right, bottom, left */
  padding: 0px 10px 0px 10px;
  margin: 1px 10px 1px 10px;
  background: #f0f8ff;
  border-width: 1px;
  border-style: solid solid solid solid;
  border-color: #cccccc;
  width: 90%;
  font-family:"Courier New";
}

.out {
  padding: 0px 10px 0px 10px;
  margin: 1px 10px 1px 10px;
  background: #2f2f2f;
  color: #ffffff;
  border-width: 1px;
  border-style: solid solid solid solid;
  border-color: #cccccc;
  width: 90%;
  font-family:"Courier New";
}

/* eof - php.css */

PEOF

sub get_existing_files($) {
	my ($dir) = shift;
	my $df = '';
	prt( "Getting list of EXISTING files in out folder [$dir] ...\n" );
	opendir( THEDIR, $dir ) or mydie( "ERROR: Unable to open folder [$dir] ...\n" );
	my @dfiles = readdir(THEDIR); # slurp in ALL directories, and files, (and . & ..!)
	closedir(THEDIR);
	my $fndcss = 0;
	my $fndjpg = 0;
	my $fndval = 0;
	my $fcnt = scalar @dfiles;
	foreach my $dfile (@dfiles) {
		$df = $dir . '/' . $dfile; # get full name
		if ($dir eq '.') {
			$df = $dfile;
		}
		if ( -d $df ) { # is directory?
			# if ($dfile eq '.' || $dfile eq '..') or
			if ($dfile =~ '^\.$' || $dfile =~ '^\.\.$') {
				# do nothing with DOT and DOUBLE DOT
			} else {
				####push(@dir_list, $df); # save local DIRECTORY LIST
				$tot_dirs++;
			}
		} else { # it is a FILE
			$tot_files++;
			push(@existing, $dfile);
			if ($dfile =~ /^php\.css$/i) {
				prt( "NOTE: [$df] already exists ...\n" );
				$fndcss = 1;
			} elsif ($dfile =~ /^$jpg_file$/i) {
				prt( "NOTE: [$df] already exists ...\n" );
				$fndjpg = 1;
			} elsif ($dfile =~ /^$v401_file$/i) {
				prt( "NOTE: [$df] already exists ...\n" );
				$fndval = 1;
			}
		}
	}
	if (!$fndcss) {
		$df = $dir.'/php.css';
		prt( "NOTE: Creating [$df] ...\n" );
		write2file( $phpcss, $df );
	}
	if (!$fndjpg) {
		prt( "NOTE: Copying [$jpg_src] to [$jpg_des] ...\n" );
		copy( $jpg_src, $jpg_des ) or mydie("ERROR: Failed to COPY [$jpg_src]!\n");
	}
	if (!$fndval) {
		prt( "NOTE: Copying [$v401_src] to [$v401_des] ...\n" );
		copy( $v401_src, $v401_des ) or mydie("ERROR: Failed to COPY [$v401_src]!\n");
	}
}


sub write_chart($) {
	my ($oh) = shift;
    # mainly only for DEBUG
	print $oh <<"EOF";
Chart of Colours Used<br>
<table border="1" summary="Table of colours, and count of times used">
<tr>
	<th>Class</th><th>Colour</th><th>Use</th><th>Count</th>
</tr>
<tr>
<td><span class="$a_class">class='$a_class'</span></td>
<td><span class="$a_class">$a_color RED</span></td>
<td><span class="$a_class">Built-in Functions</span></td>
<td><span class="$a_class">$a_cnt</span></td>
</tr>
<tr>
<td><span class="$b_class">class='$b_class'</span></td>
<td><span class="$b_class">$b_color BLUEGREEN</span></td>
<td><span class="$a_class">Comments (following #)</span></td>
<td><span class="$b_class">$b_cnt</span></td>
</tr>
<tr>
<td><span class="$c_class">class='$c_class'</span></td>
<td><span class="$c_class">$c_color BLUE</span></td>
<td><span class="$a_class">Reserved Words</span></td>
<td><span class="$c_class">$c_cnt</span></td>
</tr>
<tr>
<td><span class="$d_class">class='$d_class'</span></td>
<td><span class="$d_class">$d_color BROWN</span></td>
<td><span class="$a_class">Inside qw(...)</span></td>
<td><span class="$d_class">$d_cnt</span></td>
</tr>
<tr>
<td><span class="$e_class">class='$e_class'</span></td>
<td><span class="$e_class">$e_color DARKBLUE</span></td>
<td><span class="$a_class">Scalar Variables</span></td>
<td><span class="$e_class">$e_cnt</span></td>
</tr>
<tr>
<td><span class="$f_class">class='$f_class'</span></td>
<td><span class="$f_class">$f_color GREY</span></td>
<td><span class="$a_class">Inside <<EOF thingy</span></td>
<td><span class="$f_class">$f_cnt</span></td>
</tr>
<tr>
<td><span class="$o_class">class='$o_class'</span></td>
<td><span class="$o_class">$o_color ORANGE</span></td>
<td><span class="$a_class">Array Variables</span></td>
<td><span class="$o_class">$o_cnt</span></td>
</tr>
<tr>
<td><span class="$v_class">class='$v_class'</span></td>
<td><span class="$v_class">$v_color OLIVE</span></td>
<td><span class="$a_class">Hash Variables</span></td>
<td><span class="$v_class">$v_cnt</span></td>
</tr>
<tr>
<td><span class="$t_class">class='$t_class'</span></td>
<td><span class="$t_class">$t_color GREEN</span></td>
<td><span class="$a_class">Single and Double Quotes</span></td>
<td><span class="$t_class">$q_cnt</span></td>
</tr>
</table>
<br>End of chart<br>
EOF
	my $tot = ($a_cnt+$b_cnt+$c_cnt+$d_cnt+$e_cnt+$f_cnt+$o_cnt+$v_cnt+$q_cnt);
    my $diff = $out_total - $doc_total;
    print $oh "This use of $tot colour span sequences added $diff bytes. In=$doc_total Out=$out_total<br>\n";
}

sub add_metas($$) {
	my ($oh, $ad) = @_;
	my $m = '';
	my $m2 = '';
	prt( "Add metas to handle ...\n" ) if ($dbg25);
	$m = '<meta name="author" content="geoff mclane">'."\n";
	$m .= '<meta name="keywords" content="geoff, mclane, geoffmclane, computer, consultant, programmer,'."\n";
	$m2 = 'perl, scripts, samples, examples';
#	if ($ad) {
#		foreach my $k (keys %HFuncsFnd) {
#			if (length($m2) > 76) {
#				$m2 .= ",\n";
#				$m .= $m2;
#				$m2 = $k;
#			} else {
#				$m2 .= ', '.$k;
#			}
#		}
#	}
	$m .= $m2;
	$m .= ', free">'."\n";
	$m .= '<meta name="description" content="page of a computer programmer, with sample perl scripts">'."\n";
	print $oh $m;
	prt("$m") if ($dbg25);
}


##########################################################################
# The main file OUTPUT - that is the HTML file.
# It establishes the HTML header, which includes the CSS style
# information. then outputs each of the 'converted' lines ...
# this is what it is all about - to generate a HTML document
##########################################################################
sub write_out_file {
	my ($outf) = shift;
	my ($OTF);

	open $OTF, ">$outf" or mydie( "ERROR: Unable to create $outf ... aborting ...\n" );
	print $OTF "$m_doctype\n";
	print $OTF <<"EOF";
<html>
<head>
<title>$in_file to HTML</title>
<meta http-equiv="Content-Language" content="en-gb">
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">

EOF

	add_metas($OTF, 1);

	print $OTF <<"EOF";
<link rel=stylesheet href="php.css" type="text/css">
</head>

<body>

EOF

   print $OTF "<h1>$in_file to HTML.</h1>\n";
   ###print $OTF '<p class="top"><a href="'.$indexhtm.'">index</a></p>'."\n";
   print $OTF '<p>Generated: ' . localtime(time()) . " from $in_file ";
   print $OTF YYYYMMDD($in_date).' '.b2KMG($in_size).".</p>\n";
   if ($add_table) {
      print $OTF '<table width="100%" border="1" summary="Simple HTML of $in_file"><tr><td>'."\n";
   } elsif ($add_pre) {
      print $OTF '<pre class="cd">'."\n";
   }
   # actual output of generated lines
   foreach my $line (@nlines) {
      $out_total += length($line);
      print $OTF $line;
   }

   if ($add_table) {
      print $OTF '</td></tr></table>'."\n";
   } elsif ($add_pre) {
      print $OTF '</pre>'."\n";
   }

   if ($add_chart) {
	   write_chart($OTF);
   }

   print $OTF '<p class="top"><a href="'.$indexhtm.'">index</a></p>'."\n";

	# add 4.01 validation ...
   print $OTF <<"EOF";
<p>
<a href="http://validator.w3.org/check?uri=referer">
<img src="valid-html401.gif" alt="Valid HTML 4.01 Transitional" width="88" height="31">
</a>
</p>
EOF

   print $OTF "</body>\n";

   print $OTF "</html>\n";

   close($OTF);
}


#########################################################
######################################################
# Converting SPACES to '&nbsp;'
# Of course this could be done just using perl's
# powerful search and replace, but this handles
# any number of spaces, only converting the number
# minus 1 to &nbsp; ... not sure how to have
# this level of control with regex replacement
######################################################
sub conv_spaces {
   my $t = shift;
   my ($c, $i, $nt, $ln, $sc, $sp);
   $nt = ''; # accumulate new line here
   $ln = length($t);
   for ($i = 0; $i < $ln; $i++) {
      $c = substr($t,$i,1);
      if ($c eq ' ') {
         $i++; # bump to next 
         $sc = 0;
         $sp = '';
         for ( ; $i < $ln; $i++) {
            $c = substr($t,$i,1);
            if ($c ne ' ') {
               last; # exit
            }
            $sc++;
            $sp .= $c;
         }
         if ($sc) {
            $sp =~ s/ /&nbsp;/g;
            $nt .= $sp;
         }
         $i--; # back up one
         $c = ' '; # add back the 1 space
      }
      $nt .= $c;
   }
   prt( "conv_space: from [$t] to [$nt] ...\n" ) if $dbg7;
   return $nt;
}

###########################################################################
# VERY IMPORTANT SERVICE
# This converts the 'text' into HTML text, but only does a partial job!
# 1. Convert '&' to '&amp;' to avoid interpreting as replacement
# 2. Convert '<' to '&lt;' to avoid interpreting as HTML
# 3. Convert '"' to '&quot;'
# 4. Convert '\t' to SPACES
# 5. Finally, if there are double or more SPACES, convert to '&nbsp;'
###########################################################################
sub html_line {
   my $t = shift;
   my $ot = $t;
   $t =~ s/&/&amp;/g; # all '&' become '&amp;'
   $t =~ s/</&lt;/g; # make sure all '<' is/are swapped out
   $t =~ s/\"/&quot;/g; # and all quotes become &quot;
   $t =~ s/\t/$tab_space/g; # tabs to spaces
   if ($t =~ /\s\s/) { # if any two consecutive white space
      return conv_spaces($t);
   }
   prt( "html_line: from [$ot] to [$t] ...\n" ) if $dbg6;
   return $t;
}

sub get_dir_files($) {
	my ($dir) = shift;
	prt( "Getting list of EXISTING files in out folder [$dir] ...\n" );
	opendir( THEDIR, $dir ) or mydie( "ERROR: Unable to open folder [$dir] ...\n" );
	@dirfiles = readdir(THEDIR); # slurp in ALL directories, and files, (and . & ..!)
	closedir(THEDIR);
	my $dcnt = scalar @dirfiles;
	prt( "Processing $dcnt items in directory [$in_folder] ...\n" );
	foreach my $fil (@dirfiles) {
		if (($fil =~ /^\.$/)||($fil =~ /^\.\.$/)) {
			prt("skip dot and double dot - [$fil]\n") if ($dbg1);
			next;
		} 
		my $ff = $dir . '/' . $fil;
		my $sb = stat($ff);
		my $ext = my_get_ext($ff);
		if ($fil =~ /^temp.*/i) {
			prt( "Got TEMP ext=[$ext] [$fil] ... \n" ) if ($dbg2);
		} elsif ($ext =~ /^php$/i) {
			push(@php_list, [$fil, $sb->mtime, $sb->size]);
			prt( "Got PHP ext=[$ext] [$fil] [".YYYYMMDD($sb->mtime)."] [".$sb->size."]... \n" ) if ($dbg3);
		} else {
			prt( "Got OTHER ext=[$ext] [$fil] ... \n" ) if ($dbg2);
		}
	}
}

sub get_split_line($) {
	my ($ln) = shift;
	my $len = length($ln);
	my $an = 0;
	my $sp = 0;
	my @ra = (); # return this
	my $bit = '';
	for (my $i = 0; $i < $len; $i++) {
		my $ch = substr($ln,$i,1); # get next char
		if ($an) { # building alpha numeric
			if ($ch =~ /\w/) { # still building
				$bit .= $ch;	# accumuate
			} else {
				push(@ra, $bit) if (length($bit));
				$bit = $ch; # start with this
				$an = 0; # not an
				if ($ch =~ /\s/) { # if white space
					$sp = 1;
				} else {
					if (($ch eq '"')||($ch eq "'")) {
						push(@ra, $bit) if (length($bit));
						$bit = '';
					}
					$sp = 0;
				}
			}
		} else { # not in an yet
			if ($ch =~ /\w/) {
				# enter an
				push(@ra, $bit) if (length($bit));
				$bit = $ch;
				$an = 1;	# is an
			} else { # not an
				if ($sp) {
					if ( !($ch =~ /\s/) ) { # space ended
						push(@ra, $bit) if (length($bit));
						$bit = $ch;
						$sp = 0;
						if (($ch eq '"')||($ch eq "'")) {
							push(@ra, $bit) if (length($bit));
							$bit = '';
						}
					}
				} else { # not in space
					if ($ch =~ /\s/) { # space started
						push(@ra, $bit) if (length($bit));
						$bit = $ch;
						$sp = 1;
					} else {
						if (($ch eq '"')||($ch eq "'")) {
							push(@ra, $bit) if (length($bit));
							push(@ra, $ch);
							$bit = '';
						} else {
							$bit .= $ch; # accumulate
						}
					}
				}
			}
		}
	}
	push(@ra, $bit) if (length($bit));
	return @ra;
}

sub reset_lines() {
	# set at start of set of lines to process
	$in_php = 0;
	$in_com = 0;
	$got_gt = 0;
	$php = '';
	$com = '';
	$htm = '';
	$bit = '';
	$ist = 0;
	@nlines = ();
	$nline = '';
	$g_lc = 0;
}

sub enter_php() {
	if ($blen >= 2) {
		if (substr($bit,0,2) eq '<?') {
			$in_php = 1;
			$php = $bit;
			prt( "Line $g_lc: Enter PHP ... [$php][$g_line]\n" );
			return 1;
		}
	}
	return 0;
}

sub enter_com() {
	if ( !$in_php && ($blen >= 4)) {
		if (substr($bit,0,4) eq '<!--') {
			$in_com = 1;
			$com = $bit;
			prt( "Line $g_lc: Enter COM ... [$com][$g_line]\n" );
			return 1;
		}
	}
	return 0;
}

#if ($kw == 1) {
#my @stxRW = ();
# add_red
sub in_stx_RW($) {
	my ($t) = shift;
	foreach my $rw (@stxRW) {
		if ($t eq $rw) {
			return 1;
		}
	}
	return 0;
}
#} elsif ($kw == 2) {
#my @stxBI = ();
# add_blue
sub in_stx_BI($) {
	my ($t) = shift;
	foreach my $rw (@stxBI) {
		if ($t eq $rw) {
			return 1;
		}
	}
	return 0;
}

my @php_la = ();
my @php_aol = ();
my $php_bi = '';
my $php_ct = 0;
my $php_pi = 0;
my $php_bl = 0;
my $php_c = '';
my $php_d = '';
my $php_p2 = 0;
my $php_lc = 0;
my $php_i = 0;

sub process_php_quote() {
	my ($tx, $tx2);
	my $php_qe = 0;
	$php_d = '';
	$tx2 = '';
	prt( "$php_pi: Start quote [$php_c] ... [$php_bi] \n" ) if ($dbg8);
	$tx = $php_c;
	for ($php_p2 = 1; $php_p2 < $php_bl; $php_p2++) {
		$php_d = substr($php_bi,$php_p2,1);
		$tx .= $php_d;
		if ($php_c eq $php_d) {
			prt( "$php_pi: End 1 quote [$php_d] ... [$php_bi]tx=[$tx] \n" ) if ($dbg8);
			$php_qe = 1;
			last;
		}
	}
	if ($php_c ne $php_d) {
		$php_pi++; # move to next bit
		for ( ; $php_pi < $php_ct; $php_pi++) {
			$php_bi = $php_aol[$php_pi];
			$php_bl = length($php_bi);
			prt( "$php_pi: Finding End quote [$php_c] ... [$php_bi]tx=[$tx] \n" ) if ($dbg8);
			for ($php_p2 = 0; $php_p2 < $php_bl; $php_p2++) {
				$php_d = substr($php_bi,$php_p2,1);
				$tx .= $php_d;
				if ($php_c eq $php_d) {
					prt( "$php_pi: End 2 quote [$php_d] ... [$php_bi]tx=[$tx] \n" ) if ($dbg8);
					$php_p2++;
					if ($php_p2 < $php_bl) {
						$tx2 = substr($php_bi,$php_p2); # get balance
						prt( "Got balance ... [$php_bi]tx2=[$tx2] \n" ) if ($dbg8);
					}
					$php_qe = 1;
					last;
				}
			}
			last if ($php_qe); # exit bit loop
		}
	}
	# assume we got to the end of the quoted text
	prt( "$php_pi: Assumed end quote [$php_c] ... [$php_bi]tx=[$tx] \n" ) if ($dbg8);
	$nline .= add_quote(html_line($tx));
	$nline .= html_line($tx2) if (length($tx2));
}

sub parse_php() {
	@php_la = split("\n",$php);
	$php_lc = scalar @php_la;
	for ($php_i = 0; $php_i < $php_lc; $php_i++) {
		my $pl = $php_la[$php_i];
		chomp $pl;
		$pl =~ s/\r$//;
		@php_aol = get_split_line($pl);
		$php_ct = scalar @php_aol;
		for ($php_pi = 0; $php_pi < $php_ct; $php_pi++) {
			$php_bi = $php_aol[$php_pi];
			$php_bl = length($php_bi);
			$php_c = substr($php_bi,0,1);
			if (($php_c eq '"')||($php_c eq "'")) {
				process_php_quote();
			} elsif (($php_bl >= 2)&&(substr($php_bi,0,2) eq '<?')) {
				$nline .= add_class_e(html_line($php_bi));
			} elsif (($php_bl >= 2)&&(substr($php_bi,0,2) eq '?>')) {
				$nline .= add_class_e(html_line($php_bi));
			} elsif (in_stx_RW($php_bi)) {
				$nline .= add_blue(html_line($php_bi));
			} elsif (in_stx_BI($php_bi)) {
				$nline .= add_red(html_line($php_bi));
			} else {
				$nline .= html_line($php_bi);
			}
		} # for each of the line bits
		if ($php_lc > 1) {
			$nline .= "\n";
		}
	} # for each line
}

sub process_php() {
	for ( ; $ii < $alen; $ii++) {
		$bit = $lnbits[$ii];
		$blen = length($bit);
		$ch = substr($bit,0,1);
		$php .= $bit;
		if ($blen >= 2) {
			if (substr($bit,0,2) eq '?>') {
				###prt( "Line $g_lc: Exit PHP ... [$php][$g_line]\n" );
				prt( "Line $g_lc: Exit PHP ... [$g_line]\n" );
				parse_php();
				$in_php = 0;
				return 1;	# potentially before EOL
			}
		}
	}
	$php .= "\n";
	return 0; # reached EOL
}

sub parse_com() {
	my @la = split("\n",$php);
	foreach my $cl (@la) {
		chomp $cl;
		$cl =~ s/\r$//;
		$nline .= html_line($cl)."\n";
	}
}

sub process_com() {
	for ( ; $ii < $alen; $ii++) {
		$bit = $lnbits[$ii];
		$blen = length($bit);
		$ch = substr($bit,0,1);
		$com .= $bit;
		if ($blen >= 3) {
			if (substr($bit,0,3) eq '-->') {
				$in_com = 0;
				prt( "Line $g_lc: Exit COM ... [$com][$g_line]\n" );
				$com = add_class_b(html_line($com));
				prt( "NLine $g_lc: Exit COM ... [$com]\n" );
				##$nline .= $com;
				parse_com();
				$com = '';
				return 1; # potentially before EOL
			}
		}
	}
	$com .= "\n";
	return 0; # reached EOL
}

sub process_htm() {
	for ( ; $ii < $alen; $ii++) {
		$bit = $lnbits[$ii];
		$blen = length($bit);
		$ch = substr($bit,0,1);
		if ($ch eq '<') {
			if (enter_php() || enter_com()) {
				$nline .= $htm;
				$htm = '';
				return 1;
			}
		}
		# processing HTML
		if ( !$in_php && !$in_com) {
			if ($ch eq '<') {
				$got_gt = 1;
				$htm .= html_line($bit);
			} else {
				if ($got_gt) {
					if (is_htm_tag($bit)) {
						$htm .= add_blue($bit);
					} else {
						$htm .= html_line($bit);
					}
					if ($ch eq '>') {
						$got_gt = 0;
					}
				} else {
					$htm .= html_line($bit);
				}
			}
		}
	}
	$htm .= "\n";	# reached EOL
	return 0;
}

sub show_line_bits() {
	prt( " $g_lc:" );
	for ($ii = 0; $ii < $alen; $ii++) {
		$bit = $lnbits[$ii];
		$blen = length($bit);
		$ch = substr($bit,0,1);
		prt( "$blen" );
		prt( "[$bit]" );
	}
	prt("$len $alen p=$in_php c=$in_com\n");
}

sub process_line() {
	##prt( "Line $g_lc: [$g_line] p=$in_php c=$in_com...\n" ) if ($dbg4);
	prt( "Line $g_lc: [$g_line] p=$in_php c=$in_com...\n" );
	my $len = length($g_line);
	@lnbits = get_split_line($g_line);
	$alen = scalar @lnbits;
	show_line_bits() if ($dbg5);
	if ($alen) {
		for ($ii = 0; $ii < $alen; $ii++) {
			if ($in_php) {
				process_php();
			} else { # in HTML
				if ($in_com) {
					process_com();
				} else {
					process_htm();
				}
			}
		} # for the array of line bits
	} else {
		if ($in_php) {
			process_php();
		} elsif ($in_com) {
			process_com();
		} else {
			process_htm(); # add a blank
		}
	}
	$nline .= $htm;
	$htm = '';
	if (length($nline)) {
		push(@nlines, "$nline");
	}
	$nline = '';
}

sub process_file($$) {
	my ($ff, $fil) = @_;
	if (open IF, "<$ff") {
		my @lines = <IF>;
		close IF;
		$g_lc = scalar @lines;
		prt( "Processing $g_lc lines in [$fil] ...\n" );
		reset_count();
		$g_lc = 0;
		reset_lines();
		foreach my $ln (@lines) {
			$g_line = $ln;
			chomp $g_line;
			$g_line =~ s/\r$//;
			$g_lc++;
			process_line();
		} # for each LINE
		my $nf = my_get_filetitle($fil).'.htm';
		my $nfo = $out_folder."\\".$nf;
		prt("Done $g_lc lines of [$fil] ... ".scalar @nlines." lines out to [$nfo] ...\n");
		write_out_file($nfo);
		push(@done_files, [$nf, $in_date, $in_size, $in_file]);
		return 1;
	} else {
		prt( "WARNING: Failed to open [$ff] ...\n" );
	}
	return 0;
}

sub process_files($) {
	my ($dir) = shift;
	my $cnt = scalar @php_list;
	prt( "Processing $cnt files from folder [$dir] ...\n" );
	for (my $i = 0; $i < $cnt; $i++) {
		my $fil = $php_list[$i][0];
		$in_date = $php_list[$i][1];
		$in_size = $php_list[$i][2];
		$in_file = $fil;
		my $ff = $dir.'/'.$fil;
		if ($in_date > $latest) { $latest = $in_date; }
		if ($in_date < $earliest) { $earliest = $in_date; }
		process_file($ff, $fil);
	} # for $i to $cnt
}

sub my_get_path($) {
	my ($d) = shift;
	$d =~ s/\\/\//g;
	if ($d =~ /\/$/) { # if it ends in a path
		return $d; # return it ALL
	}
	my @arr = split('/',$d);
	pop @arr;
	return (join('/',@arr));
}

sub my_get_filename($) {
	my ($p) = shift;
	my $d = my_get_path($p);
	my $f = $p;
	$f = substr($p, length($d) + 1)	if (length($d));
	return $f;
}

sub my_get_ext($) {
	my ($p) = shift;
	my $f = my_get_filename($p);
	my @a = split(/\./, $f);
	my $c = scalar @a;
	### prt("IN=[$p] d=[$d] f=[$f] $c ...\n");
	if ($c > 1) {
		return $a[-1]; # get last 'ext' entry
	} elsif (substr($f,0,1) eq '.') {
		return $f;
	}
	return '';
}

sub my_get_filetitle($) {
   my ($p) = shift;
   my $f = my_get_filename($p);
   my @a = split(/\./, $f);
   my $cnt = scalar @a;
   if ($cnt > 1) {
      pop @a; # drop last 'ext' entry
      return join( '.', @a);
   }
   return $f;
}

################################################
# My particular time 'translation'
sub YYYYMMDD {
	my ($tm) = shift;
	#    0    1    2     3     4    5     6     7     8
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($tm);
	$year += 1900;
	$mon += 1;
	my $ymd = "$year/";
	if ($mon < 10) {
		$ymd .= '0'.$mon.'/';
	} else {
		$ymd .= "$mon/";
	}
	if ($mday < 10) {
		$ymd .= '0'.$mday;
	} else {
		$ymd .= "$mday";
	}
	return $ymd;
}

##################################################
# My particular bytes to K, M, G
sub b2KMG($) {
	my ($d) = shift;
	if ($d < 1000) {
		return $d;
	}
	my $oss;
	my $kss;
	my $lg = 0;
	my $ks = ($d / 1024); #// get Ks
	my $div = 1;
    if( $ks < 1000 ) {
      $div = 1;
      $oss = "KB";
    } elsif ( $ks < 1000000 ) {
	  $div = 1000;
      $oss = "MB";
    } elsif ( $ks < 1000000000 ) {
      $div = 1000000;
      $oss = "GB";
    } else {
      $div = 1000000000;
      $oss = "TB";
    }
    $kss = $ks / $div;
    $kss += 0.05;
    $kss *= 10;
    $lg = int($kss);
    return( ($lg / 10) . " " . $oss );
   ###return( ($lg / 10) . $oss );
}



##################################################
# My particular 'nice number'
sub get_nn { # perl nice number nicenum add commas
	my ($n) = shift;
	if (length($n) > 3) {
		my $mod = length($n) % 3;
		my $ret = (($mod > 0) ? substr( $n, 0, $mod ) : '');
		my $mx = int( length($n) / 3 );
		for (my $i = 0; $i < $mx; $i++ ) {
			if (($mod == 0) && ($i == 0)) {
				$ret .= substr( $n, ($mod+(3*$i)), 3 );
			} else {
				$ret .= ',' . substr( $n, ($mod+(3*$i)), 3 );
			}
		}
		return $ret;
	}
	return $n;
}

####################################
####################################
# Reducing a line to bare bones
# Used when loading
# the EditPlus 2 stx files.
####################################
sub trim_line($) {
   my ($l) = shift;
   chomp $l; # remove LF
   $l =~ s/\r$//; # and remove CR, if present
   $l =~ s/\t/ /g; # tabs to a space
   $l =~ s/\s\s/ /g while ($l =~ /\s\s/); # duplicate space to single
   $l = substr($l,1) while ($l =~ /^\s/); # each off leading space
   $l = substr($l,0,length($l)-1) while (($l =~ /\s$/)&&(length($l))); # and trailing space
   return $l;
}

#Loading HTML stx [C:/Program Files/EditPlus 2/html.stx] ...
#Got KEYWORD [HTML Tags] ...
#Got KEYWORD [HTML Attributes] ...
#Got KEYWORD [Special characters] ...
sub load_html_stx($) {
	my ($fil) = shift;
	my $kw = 0;
	my $nl = '';
	prt("Loading HTML stx [$fil] ...\n");
	open IF, "<$fil" or mydie( "ERROR: Unable to open [$fil] ...\n" );
	my @la = <IF>;
	close IF;
	foreach my $ln (@la) {
		chomp $ln;
		$ln =~ s/\r$//;
		if ($ln =~ /^#/) {
			if ($ln =~ /^#KEYWORD=(.*)/) {
				prt( "Got KEYWORD [$1] ...\n" );
				if ($1 eq 'HTML Tags') {
					$kw = 1;
					next;
				} elsif ($1 eq 'HTML Attributes') {
					$kw = 2;
					next;
				} elsif ($1 eq 'Special characters') {
					$kw = 3;
					next;
				}
			}
			$kw = 0;
			next;
		}
		if ($kw == 1) {
			$nl = trim_line($ln);
			push(@stxHTM, $nl) if (length($ln));
		} elsif ($kw == 2) {
			$nl = trim_line($ln);
			push(@stxATT, $nl) if (length($ln));
		} elsif ($kw == 3) {
			$nl = trim_line($ln);
			push(@stxSPL, $nl) if (length($ln));
		}
	}
}

sub is_htm_tag($) {
	my ($tg) = shift;
	foreach my $t (@stxHTM) {
		if (uc($t) eq uc($tg)) {
			return 1;
		}
	}
	return 0;
}

sub is_in_htm($) {
	my ($tg) = shift;
	##prt( "Testing [$tg] ");
	$tg =~ s/^\///;	# remove any leading '/'
	##prt( "now 1 [$tg] " );
	if ($tg =~ /\/$/) { # if there is a trailing '/'
		$tg =~ s/\/$//; # remove that
		$tg = trim_line($tg); # and trim
		##prt( "now 2 [$tg] " );
	}
	my @a = split(/ /,$tg); # split the tag away for any attributes
	if (scalar @a > 1) {
		$tg = $a[0]; # just get the first
	}
	##prt( "now 3 [$tg]\n" );
	return is_htm_tag($tg);
}

#Loading PHP stx [C:/Program Files/EditPlus 2/php.stx] ...
#Got KEYWORD [Reserved words] ...
#Got KEYWORD [Built-in functions] ...
#Got KEYWORD [Variables] ...
sub load_php_stx($) {
	my ($fil) = shift;
	my $kw = 0;
	my $nl = '';
	prt("Loading PHP stx [$fil] ...\n");
	open IF, "<$fil" or mydie( "ERROR: Unable to open [$fil] ...\n" );
	my @la = <IF>;
	close IF;
	foreach my $ln (@la) {
		chomp $ln;
		$ln =~ s/\r$//;
		if ($ln =~ /^#/) {
			if ($ln =~ /^#KEYWORD=(.*)/) {
				prt( "Got KEYWORD [$1] ...\n" );
				if ($1 eq 'Reserved words') {
					$kw = 1;
					next;
				} elsif ($1 eq 'Built-in functions') {
					$kw = 2;
					next;
				} elsif ($1 eq 'Variables') {
					$kw = 3;
					next;
				}
			}
			$kw = 0;
			next;
		}
		if ($kw == 1) {
			$nl = trim_line($ln);
			push(@stxRW, $nl) if (length($ln));
		} elsif ($kw == 2) {
			$nl = trim_line($ln);
			push(@stxBI, $nl) if (length($ln));
		} elsif ($kw == 3) {
			$nl = trim_line($ln);
			push(@stxVA, $nl) if (length($ln));
		}
	}
}

sub do_stx_load() {
	load_html_stx( $html_stx );
	prt( "Loaded ".scalar @stxHTM." HTM, ".scalar @stxATT." ATT, and ".scalar @stxSPL." spls\n" );
	load_php_stx( $php_stx );
	prt( "Loaded ".scalar @stxRW." RW, ".scalar @stxBI." BI, and ".scalar @stxVA." vars\n" );
}

sub generate_index { # output @done_files - array of files to index.htm
	my $icnt = scalar @done_files;
	my $cnt = 0;
	my $msg = '';
	my $i = 0;
	my $dcnt = 0;
	my $ocnt = 0;
	my $acnt = 0; # added to index.htm
	my ($OF, $line, $date, $sz, $wrap, $tsc);
	if ($icnt == 0) {
		prt( "No index.htm generated - no files to list ...\n" );
		return;
	}
	my $slatest = YYYYMMDD($latest);
	my $searly = YYYYMMDD($earliest);
	my $of = $in_index; ## "$out_folder/$indexhtm";	# = something line 'index.htm';
	$wrap = 5;
	$tsc = 0;

	open $OF, ">$of" or mydie("ERROR: Unable to generate index file ...aborting ...\n");
	prt( "\nWriting [$of] HTML with $icnt files ...\n" );
	print $OF "$m_doctype\n";
	print $OF <<"EOF";

<html>

<head>
<title>Index to PHP HTML Samples</title>
<meta http-equiv="Content-Language" content="en-au">
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
EOF

	add_metas($OF, 0);

	print $OF <<"EOF";
<link rel=stylesheet href="php.css" type="text/css">
</head>

<body>

EOF
	print $OF "<h1>Index to PHP HTML Samples</h1>\n";
	##out_link_line($OF, 1); # avoid adding top
	print $OF <<"EOF";
<a name="top"></a>
<p>This is a rather random sample of the PHP scripts I have generated over the 
last few years ($searly - $slatest). Some represent complete PHP applications, aimed at a particular 
purpose, while others are just samples, sometimes not functional! And some, are only 
'include' files. A small amount of script has been scraped 
from various web site, to test some suggested functionality, but most are largely 
my own fun and games with PHP.</p>

<p>When there is a series numbered 01, 02, 03, etc, this usually means the latest is the largest 
number, but sometimes they are different samples. However, the date following each file name 
link is a further indication of the age of the sample. And the original file size, in bytes, follows 
that.</p>

<p>Each of these HTML files are generated from the PHP script, php2htm01.pl, with colour coding added, 
and, as can be read in the preamble to another 'converter', p2hall02.pl, this means sometimes a simple 
copy and paste will fail, due mainly to a 'translation' of certain characters. 
But most of the time it should be ok, or only require minor fixes.</p>

<p>As always, <font size="2" color="red"><b>*** USE AT OWN RISK ***</b></font>. These are in the 
'public domain' thus there is no 'licence' to worry about. Of course you MUST have a Perl runtime
installed, and in some special cases, additional PHP 'libraries' installed/enabled, to run those 
particular files locally, or on a host.</p>
	
<a name="alphabetic"></a>
<p>The table is repeated. The first should be more or less in file alphabetic order, 
the second is in <a href="#dateindex"><b>date order</b></a> table, with the latest listed first. Then 
there is a <a href="#jumptable"><b>'jump'</b></a> table, where each PHP built-in function is list, 
with links to the file(s) that use that built-in. Enjoy ;=))</p>

EOF

	###out_link_line($OF, 2); # avoid adding alphabetc
	print $OF '<table width="100%" border="0" summary="Alphabetic index to Perl samples">'."\n";
	print $OF '<caption>Alphabetic index to PHP samples</caption>'."\n";
	# actual output of generated lines
	$cnt = 0; # for $wrap
	#foreach $line (@ind_files) {
	for ($i = 0; $i < $icnt; $i++) {
		$line = $done_files[$i][0];
		####$date = $ind_files[$i][1];
		$date = YYYYMMDD($done_files[$i][1]);
		$sz = get_nn($done_files[$i][2]);
		$msg = '';
		if ($cnt == 0) {
			$msg = "<tr>\n";
		}
		###mark_old_index($line);
		$msg .= "<td><a href=\"$line\">$line</a><br>$date<br>$sz</td>\n";
		$cnt++;
		if ($cnt == $wrap) {
			$msg .= "</tr>\n";
			$cnt = 0;
		}
		print $OF $msg;
		$acnt++; # bump added
	}
#	my $tsc = scalar @tbl_set;
#	$ocnt = 0;
#	for ($i = 0; $i < $tsc; $i++ ) {
#		if ($tbl_set[$i][7] == 0) {
#			$ocnt++;
#		}
#	}
#	prt("Checked $tsc files from old index, and found $ocnt NOT MARKED ...\n");
#	#		           0     1     2    3    4    5    6    7
#	# push(@tbl_set, [$hrf, $fil, $dt, $sz, $yr, $mt, $dy, 0]);
#	for ($i = 0; $i < $tsc; $i++ ) {
#		if ($tbl_set[$i][7] == 0) {
#			$line = $tbl_set[$i][0];
#			if (in_existing($line)) {
#				$date = $tbl_set[$i][2];
#				$sz   = $tbl_set[$i][3];
#				$msg = '';
#				if ($cnt == 0) {
#					$msg = "<tr>\n";
#				}
#				###mark_old_index($line);
#				$tbl_set[$i][7] = 2;
#				$msg .= "<td><a href=\"$line\">$line</a><br>$date<br>$sz</td>\n";
#				$cnt++;
#				if ($cnt == $wrap) {
#					$msg .= "</tr>\n";
#					$cnt = 0;
#				}
#				print $OF $msg;
#				prt( "NOTE ADDED [$line][$date][$sz] from OLD index ...\n" );
#				$dcnt++;
#			} else {
#				prt( "WARNING: File [$line] is NO LONGER IN FOLDER! Now dumped!!\n" );
#			}
#		}
#	}
	if ($cnt) {
		$msg = '';
		while ($cnt < $wrap) {
			$msg .= "<td>&nbsp; </td>\n";
			$cnt++;
		}
		$msg .= "</tr>\n";
		print $OF $msg;
	}
	print $OF "</table>\n";
	prt( "Done primary table ".($acnt + $dcnt)." ... now to do date sorted table ...\n" );
#	add_new_table($OF); # add new table sorted by time
#	add_jump_table($OF); # put a jump table of build-in function
#	out_link_line($OF, 0); # avoid none
	# add 4.01 validation ...
	print $OF <<"EOF";
<p>
<a href="http://validator.w3.org/check?uri=referer">
<img src="valid-html401.gif" alt="Valid HTML 4.01 Transitional" width="88" height="31">
</a>
</p>

EOF
	print $OF "</body>\n";
	$msg = "<!-- P26.".YYYYMMDD(time())." generated by $0 for geoffmclane.com/mperl/samples -->\n";
	print $OF $msg;
	print $OF "</html>\n";
	close($OF);
	prt( "Done file [$of] with $icnt files, plus $dcnt of $tsc from previous ...\n" );
}

########################################################################
# Main program.
########################################################################
do_stx_load();
get_existing_files( $out_folder );
###get_dir_files( $in_folder );
###push(@php_list, ['dir01.php', 123, 123]);
push(@php_list, ['about.php', 123, 123]);
process_files( $in_folder );
###generate_index(); # output @done_files - array of files to index.htm
my $tl = $out_folder.'\about.htm';
system( $tl );
close_log($outfile,1);

exit(0);

# eof - php2html.pl
