#!/Perl
# AIM: genindex2.pl
# to read a FOLDER, find the INDEX HTML file there, and build a LIST of LINK from it
# That list give the HTM file name and title
# FIX20060626 - added option to build list into an Array, giving lnk,file date,title ...
# of form 
# 	var ma = new Array(
#		new item( "index.htm", "Index", "Link to main index" ),
#		new item( "fgfs-026.htm", "2006-06-06", "FlightGear 0.9.10 with MSVC8"),
# to get the file data,  using
# ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
#       $atime,$mtime,$ctime,$blksize,$blocks)
#           = stat($filename);
# or
# my $sa = stat($filename);
# my $tm = scalar locatime $sa->$mtime;
# UGH - But this is ALL extracted from index.htm ... not exactly what I want now ...
use File::stat;

print "$0 ... Hello, World...\n";
# this should come from the command line, or an INPUT FILE
my @indexs = ("index.htm", "index.html", "index.php");
my @in_excl = ();
my $def_folder = 'c:\HOMEPAGE\P26\fg';
my $in_dir; # = shift || die "ERROR: Must give input folder ...\n";
my $out_file = 'temp'.$0.'.txt';
my $OH;
my @mths = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
my @newarr = (); # contents of files found, as mdata|$file|$dtt|$title
open $OH, ">$out_file" || die "ERROR: Can NOT create output file ... aborting ...\n";
my $write_log = 1;

parse_arguments(@ARGV);

prt ("Processing directory $in_dir ...\n");

opendir( DIR, $in_dir) || die "ERROR: Can NOT open $in_dir ... aborting ...\n";
my @files = readdir(DIR);
closedir DIR;

prt ("Found ".scalar @files." items in the directory ...\n");
my $file;
my $filcnt = 0;
my @titles = ();
my @links = ();
# first pass - find the file that will give us the ORDER
my $got_ind = 0;
my $ind_file = '';
my $line = '';
my @loc_files = ();
foreach $file (@files) {
	if (($file eq '.')||($file eq '..')) {
		next;
	}
	my $ff = $in_dir . '/' . $file;
	if ( -d $ff ) {
		#prt( "Ignore Directory $file ...\n");
	} else {
		if (is_my_file($file)) {
			if (is_index($file)) {
				$got_ind = 1;
				$ind_file = $ff;
				last;
			}
		}
	}
}

if (! $got_ind) {
	prt( "ERROR: Unable to locate INDEX file .../n" );
	die "aborting .../n";
}

open $IF, "<$ind_file" or die "Can not OPEN $ind_file! ... aborting ...\n";
my @ind_lines = <$IF>; # slurp whole file, to an array of lines
close($IF);

prt( "Found ".scalar @ind_lines." lines in $ind_file ...\n" );
my $href = " HREF";
foreach $line (@ind_lines) {
	chomp $line;
	while ( $line =~ / href/io ) {
		my $pos = index(uc($line), $href);
		if ($pos > 0) {
			my $lp = substr($line,($pos+5));
			$lp = eat_sp($lp);
			if (substr($lp,0,1) eq '=') {
				$lp = substr($lp,1);
			}
			$lp = eat_sp($lp);
			my $c = substr($lp,0,1);
			if (($c eq '"')||($c eq "'")) {
				$lp = substr($lp,1);
			} else {
				# hmm no delimiter - ok
				$c = ' ';
			}
			$pos = index($lp,$c);
			my $hr = substr($lp,0,$pos);
			if ($hr =~ /^http/i) {
				prt( "Discarded [$hr] in $line ... \n" );
			} else {
				###prt( "Found [$hr] in $line ... \n" );
				push(@loc_files,$hr);
			}
			$line = substr($lp,($pos+1));
		}
	}
}
prt( "Found ".scalar @loc_files." local files in $ind_file ...\n" );
# got through the files, and get the TITLE for each
# building up a TITLE array
foreach $file (sort @files) {
	if (($file eq '.')||($file eq '..')) {
		next;
	}
	my $ff = $in_dir . '/' . $file;
	if ( -d $ff ) {
		#prt( "Ignore Directory $file ...\n");
	} else {
		if (is_my_file($file)) {
			my $sb = stat($ff);
			my $tit = get_title($ff);
			if (length($tit) == 0) {
				$tit = $file;
			}
			push(@titles, "$tit|$file");
			### prt( "\$mypages[$filcnt] = \"$file|$tit\";\n");
			### FIX20060626 - add other type of output ...
			my $tm = scalar localtime $sb->mtime;
			###prt( "File $file, title = $tit, date = $tm \n" );
			my @arr = split( / /, $tm ); # time of form 'Sat Mar 12 03:11:55 2005'
			if (scalar @arr == 5) {
				my $mn = mth_to_num( $arr[1] );
				my $mnn = 0;
				if ($mn) {
					if( $mn < 10 ) {
						$mnn = '0'.$mn;
					} else {
						$mnn = ''.$mn;
					}
				} else {
					$mnn = '??';
				}
				my $dn = $arr[2];
				if ($dn < 10) {
					$dn = '0'.$dn;
				}
				my $dtt = $arr[4].'/'.$mnn.'/'.$dn;
				###my $ent = "$sb->mtime|$file|$dtt|$tit";
				my $ent = $sb->mtime."|$file|$dtt|$tit";
				###prt( "Got '$arr[4]/$arr[1]/$arr[2]' ... $dtt ... $ent ...\n" );
				push(@newarr, $ent);
			}
			###prt("\n");
			$filcnt++;
		} else {
			#prt( "IGNORE $file ...\n" );
		}
	}
}

prt( "Done list in FILE order ... now title order ...\n" );
my $fc = 0;
### now I want to OUTPUT in the LINKS FOUND ORDER
#################################################
# the 'index' file should come FIRST
foreach $file (sort @titles) {
	prt( "\$mypages[$fc] = \"$file\";\n" );
	$fc++;
}

##close $OH;
close_log();
exit(0);
### end of program ###

## month to number
sub mth_to_num {
	my ($mth) = shift;
	my $cnt = 0;
	###prt( "Chk [$mth] " );
	foreach my $m (@mths) {
		$cnt++;
		if ($m eq $mth) {
			###prt( "Is $m - return $cnt\n" );
			return $cnt;
		}
		###else {
		###	prt( "Not [$m] " );
		###}
	}
	prt( "WARNING: Returning 0!!!\n" );
	return 0;
}

sub get_title {
	my ($f) = @_;
	open $IF, "<$f" or die "Can not OPEN $f! ... aborting ...\n";
	my @lines = <$IF>; # slurp whole file, to an array of lines
	close($IF);
	my $titln = '';
	my $intit = 0;
	foreach my $ln (@lines) {
		chomp $ln;
		if ($intit) {
			if ($ln =~ /<\/title>/i) {
				$titln .= ' '.$ln;
				$intit = 0;
			} else {
				$titln .= ' '.$ln;
			}
		} elsif ( $ln =~ /<title/i ) {
			if ($ln =~ /<\/title>/i) {
				$titln = $ln;
			} else {
				$titln = $ln;
				$intit = 1;  # stay here until end
			}
		} else {
			
		}
	}
	if (length($titln)) {
		# strip <title
		$pos = index(uc($titln),'<TITLE>');
		if ($pos != -1) {
			$titln = substr($titln, ($pos + 7));
			$pos = index(uc($titln), '</TITLE>');
			if ($pos > 0) {
				$titln = substr($titln,0, $pos);
			}
		}
	}
	while(length($titln)) {
		if (substr($titln,0,1) eq ' ') {
			$titln = substr($titln,1);
		} else {
			last;
		}
	}
	while (substr($titln,-1) eq ' ') {
		$titln = substr($titln,0, length($titln)-1);
	}
	return $titln;
}

sub is_my_file {
	my ($f) = @_;
	my $ret = 0;
	if ($f =~ /(.*)\.htm$/i) {
		$ret = 1;
	} elsif ($f =~ /(.*)\.html$/i) {
		$ret = 1;
	} elsif ($f =~ /(.*)\.shtml$/i) {
		$ret = 1;
	} elsif ($f =~ /(.*)\.php$/i) {
		$ret = 1;
	}
	if ($ret) {
		foreach my $f2 (@in_excl) {
			### prt("Comparing ".uc($f2)." with ".uc($f)." ...\n");
			if (uc($f2) eq uc($f)) {
				$ret = 0;
				last;
			}
		}
	}
	return $ret;
}

sub is_index {
	my ($f) = @_;
	foreach my $f2 (@indexs) {
		if (uc($f2) eq uc($f)) {
			return 1;
		}
	}
	return 0;
}

sub eat_sp {
	my ($l) = @_;
	while (substr($l,0,1) eq ' ') {
		$l = substr($l,1);
	}
	return $l;
}

sub prt {
	my ($m) = @_;
	print $m;
	print $OH $m;
}

sub log_close {
	if ($write_log) {
		close( $OH );
	}
}

sub close_log {
	if ($write_log) {
		prt( "Closing LOG file, and passing to 'system($outfile)'\nMay need to CLOSE notepad to continue ...\n" );
		log_close();
		system( $out_file );
	}
}


sub parse_arguments {
	my (@av) = @_;
	my $ac = 0;
	my $arg;
	if (! @av) {
		push(@av, $def_folder);
		###die "ERROR: Must give input folder ... aborting ...\n";
		prt( "WARNING: Should give an input folder ...\n" );
		prt( "Using default [$def_folder] ...\n" );
	}
	while(@av) {
		$ac++; # bump argument count
		$arg = $av[0];
		if ($ac == 1) {
			$in_dir = $arg;
			if (! -d $in_dir) {
				die "ERROR: Can not locate folder $in_dir ... aborting ...\n";
			}
		} elsif ($ac == 2) {
			if (! -f $arg) {
				die "ERROR: Can not locate exclude file $arg ... aborting ...\n";
			}
			open $IF, "<$arg" or die "Can not OPEN $arg! ... aborting ...\n";
			@in_excl = <$IF>; # slurp whole file, to an array of lines
			close($IF);
			foreach $arg (@in_excl) {
				chomp $arg;
				prt ("Excluding [$arg] ...\n");
			}
		} else {
			die "ERROR: Too many arguments given ... aborting ...\n";
		}
		shift @av; # use up argument
	}
}

# eof - genindex2.pl
