#!\perl -w
#
# sitemap02.pl -- a command-line utility for building an HTML site map
# ===========================================
# Usage:	perl --localpath=/file/path/to/serverroot/ 
#                     --exts="xhtml, html" sitemap.p 
# 22/07/2007 
use strict;
use File::Find;
use Getopt::Long;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# debug
my $dbg1 = 0;	# output extra messages
my $dbg2 = 0;	# output extra messages
my $dbg3 = 0;	# output extra messages
my $dbg4 = 0;
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
	my @tmpsp = split(/\\/,$pgmname);
	$pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);
###prt( "$0 ... Hello, World ...\n" );

my (%file_list, $local_path, $file_exts, $out_file, $fn, @lines, $line, $max, $title);
my @results = ();
my @fpfolders = qw( _vti_cnf _vti_pvt _private _derived );
$out_file = 'tempsm.htm';
# HTML stuff
my $m_doctype = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"'."\n".
'"http://www.w3.org/TR/html4/loose.dtd">';

# parse the command line options:

##GetOptions( "localpath=s" => \$local_path,
##            "exts=s"      => \$file_exts);
$local_path = shift || 'C:/homepage/HOM/test4';
push(@fpfolders, 'ok');

#$local_path = shift || 'C:/homepage/GeoffAir';
$file_exts = shift || 'htm, html, xhtml';

mydie( "Usage: perl --localpath=/file/path/to/root/  --exts='xhtml, html' sitemap.pl\n" ) 
     unless $local_path and $file_exts;

$local_path =~ s/\/$//;  
my @file_exts = split (/\s?,\s?/, $file_exts);
my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
my @weekDays = qw(Sun Mon Tue Wed Thu Fri Sat Sun);

sub get_date_time {
	my ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear, $daylightSavings) = localtime();
	my $year = 1900 + $yearOffset;
	my $theTime = "$hour:$minute:$second";
	my $theDate = "$weekDays[$dayOfWeek] $months[$month] $dayOfMonth, $year";
	return "$theTime, $theDate"; 
}

%file_list = &find_local_files(\@file_exts, [$local_path]);

##
# now, to the hunt...
#
DIR: foreach my $directory ( sort (keys (%file_list))) {

	prt( "Processing $directory ... " ) if ($dbg1);
	my @arr = split('/', $directory);
	if (is_fp_folder( $arr[-1] ) ) {
		prt( "skipped ...\n" ) if ($dbg1);
		next;
	}
	prt( "ok\n" ) if ($dbg1);
    FILE: foreach my $file (sort (@{$file_list{$directory}})) {
		$fn = substr($file, (length($local_path)+1));
		prt( "Processing file $fn [$file] ...\n" ) if ($dbg2);
		if (open INF, "<$file") {
			@lines = <INF>;
			close INF;
			$line = join('',@lines);
			$max = length($line);
			prt( "Processing $fn, $max characters, in ".scalar @lines." lines ...\n" ) if ($dbg4);
			$title = get_title($line);
			push(@results, [$fn, $title]);
			prt( "push(\@results, [$fn, $title])\n") if ($dbg4);

		} else {
			prt( "WARNING: Unable to open file [$file] ... $! ... \n" );
		}
	}
}

$max = scalar @results;
prt( "Got $max results ...\n" );
if ($max) {
	my ($OF);
	if (open $OF, ">$out_file") {
		write_html_head($OF);
		print $OF "<p>List of $max files found ...</p>\n";
		print $OF "<ol>\n";
		for (my $i = 0; $i < $max; $i++) {
			$fn = $results[$i][0];
			$title = $results[$i][1];
			print $OF "<li><a href=\"$fn\">$fn</a> - <b>$title</b></li>\n";
		}
		print $OF "</ol>\n";
		write_html_tail($OF, $out_file);
		close $OF;
		system($out_file);
	} else {
		prt( "WARNING: Unable to create file [$out_file] ... $! ... \n" );
	}
}

my $msg = "TBD";

prt( "$msg\n" );
close_log($outfile,1);
exit(0);

# utility subroutines
sub get_title {
	my ($txt) = shift;
	my $len = length($txt);
	my $tit = '';
	my $tag = '';
	my ($i, $ch);
	prt( "Get title from $len characters ...\n" ) if ($dbg3);
	for ($i = 0; $i < $len; $i++) {
		$ch = substr($txt,$i,1);
		if ($ch eq '<') {
			$tag = $ch;
			$i++;
			for (; $i < $len; $i++) {
				$ch = substr($txt,$i,1);
				$tag .= $ch;
				if ($ch eq '>') {
					last;
				}
			}
			##prt( "Got TAG $tag ...\n" );
			if ($tag =~ /<title>/i) {
				$i++;
				prt( "Got $tag ...\n" ) if ($dbg3);
				for (; $i < $len; $i++) {
					$ch = substr($txt,$i,1);
					if ($ch eq '<') {
						last;
					}
					$tit .= $ch;
				}
				last;
			}
		}
	}
	return trim_all($tit);
}

sub find_local_files {
    my ($extensions, $directories) = @_;
    my %file_list = ();   
    my $extension_re = '(';
    $extension_re .= join ('|', @{$extensions});
    $extension_re .= ')';
        
    local *wanted_files = sub {
                              return if -d;
                              return if -l;
                              push (@{$file_list{$File::Find::dir}}, $File::Find::name)
                              if $File::Find::name =~ /\.$extension_re$/;
                          };
        
    File::Find::find(\&wanted_files, @{$directories});
    return %file_list;
}

sub translate_path {     
    my ($old_path, $new_path, $file) = @_;
    $file =~ s|$old_path|$new_path|;
    return $file;             
}

################################################
# ignore FRONTPAGE folders
################################################
sub is_fp_folder {
	my ($inf) = shift;
	foreach my $fil (@fpfolders) {
		if (lc($inf) eq lc($fil)) {
			return 1;
		}
	}
	return 0;
}

sub write_html_head { # ($OF)
	my ($f) = shift;
	print $f "$m_doctype\n";
	print $f <<"EOF";
<html>
<head>
<title>Site Index</title>
<meta http-equiv="Content-Language" content="en-au">
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
</head>

<body>
<h1 align="center"><a name="bm_top"
   id="bm_top"></a>Site Index</h1>

EOF

}

sub write_html_tail {	# ($OF, filename);
	my ($f, $of) = @_;
	my ($msg);

	print $f <<"EOF";

<p><a name="bm_end"
   id="bm_end">EOF - $of
</p>

EOF

	$msg = "<!-- generated by $pgmname -->\n";
	$msg .= "<!-- ";
	$msg .= get_date_time();
	$msg .= " -->\n";
	print $f $msg;

	print $f "</body>\n";
	print $f "</html>\n";
}


# eof - sitemap02.pl