#!/usr/bin/perl -w
# NAME: genindex04.pl
# AIM: Complete re-write - Given an input folder, generate a tempsitemap.htm of 
# a complete directory scan
# 10/12/2013 - Add alpha sorted jump list.
# 18/11/2013 - Add description column, from description.csv file
# 06/01/2012 geoff mclane http://geoffair.net/mperl
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use File::stat;
use Cwd;
my $os = $^O;
my $perl_dir = '/home/geoff/bin';
my $PATH_SEP = '/';
my $temp_dir = '/tmp';
if ($os =~ /win/i) {
    $perl_dir = 'C:\GTools\perl';
    $temp_dir = $perl_dir;
    $PATH_SEP = "\\";
}
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt";
open_log($outfile);

# user variables
my $VERS = "0.0.1 2012-01-06";
my $load_log = 0;
my $in_dir = '';
my $verbosity = 0;
my $out_xml = 'tempsitemap.htm';
my $xclude_repo_dirs = 1;
my @repo_dirs = qw( CVS .svn .git .hg );
my $recursive = 0;
my $html_only = 1;
my $blank = 0;
my $colcount = 2;
my $desccol = 0;    # from file, this could be the title of the html is there is one
my %descriptions = ();
my $add_sorted = 0;

# debug
my $debug_on = 0;
my $def_file = 'C:\GTools\perl';

### program variables
my @warnings = ();
my $cwd = cwd();
# forward
sub scan_directory($$);

sub VERB1() { return $verbosity >= 1; }
sub VERB2() { return $verbosity >= 2; }
sub VERB5() { return $verbosity >= 5; }
sub VERB9() { return $verbosity >= 9; }

sub is_repo_directory($) {
    my $dir = shift;
    my ($test);
    foreach $test (@repo_dirs) {
        return 1 if ($dir eq $test);
    }
    return 0;
}

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        prt( "\nNo warnings issued.\n\n" ) if (VERB9());
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub process_in_file($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$inc,$lnn);
    $lnn = 0;
    foreach $line (@lines) {
        chomp $line;
        $lnn++;
        if ($line =~ /\s*#\s*include\s+(.+)$/) {
            $inc = $1;
            prt("$lnn: $inc\n");
        }
    }
}

sub is_html_like($) {
    my $fil = shift;
    return 1 if ($fil =~ /\.html$/i);
    return 1 if ($fil =~ /\.htm$/i);
    return 1 if ($fil =~ /\.php$/i);
    return 1 if ($fil =~ /\.shtml$/i);
    return 1 if ($fil =~ /\.phtml$/i);
    return 0;
}

sub scan_directory($$) {
    my ($dir,$rlist) = @_;
    if (!opendir(DIR,$dir)) {
        prtw("WARNING: Failed to open directory [$dir]\n");
    }
    my @files = readdir(DIR);
    closedir(DIR);
    my ($item,$ff,$ishtml,$sb,$ft,$desc);
    $dir .= $PATH_SEP if (!($dir =~ /(\\|\/)$/));
    my @dirs = ();
    $desc = 'nbsp;';
    foreach $item (@files) {
        next if (($item eq '.')||($item eq '..'));
        $ff = $dir.$item;
        if (-f $ff) {
            # next if ($item eq $out_xml);    # skip self
            next if ($item =~ /\.bak$/i);   # skip .bak
            $ishtml = is_html_like($item);
            next if ($html_only && !$ishtml);
            my ($n,$d,$e) = fileparse($item, qr/\.[^.]*/);
            $sb = stat($ff);
            $ft = $sb->mtime;
            if ($desccol) {
                if (defined $descriptions{$item}) {
                    $desc = $descriptions{$item};
                } else {
                    prtw("WARNING: Presently no decription for [$item]\n");
                    $desc = '&nbsp;';
                }
            }
            #                0   1  2   3
            push(@{$rlist},[$ff,$e,$ft,$desc]); # got a FILE
        } elsif (-d $ff) {
            push(@dirs,$ff) if (!is_repo_directory($item)); # got a directory
        } else {
            prtw("WARNING: item [$ff] skipped!\n");
        }
    }
    foreach $dir (@dirs) {
        scan_directory($dir,$rlist);
    }
}

sub mycmp_nc_n1 {
    my $nm1 = lc(${$a}[1]);
    my $nm2 = lc(${$b}[2]);
   return 1 if ($nm1 gt $nm2);
   return -1 if ($nm1 lt $nm2);
   return 0;
}

sub write_html($$) {
    my ($dir,$rlist) = @_;
    my $len = length($dir);
    my ($file,$cnt,$i,$html,$wrap,$cols,$rows,$mrow,$flen,$maxlen,$ind,$href,$tmp,$desc);
    my ($line);
    my $added_cnt = 0;
    $cnt = scalar @{$rlist};
    $html = "<html><head><title>Site Map</title></head>\n";
    $html .= "<body><a name=\"top\"></a>\n";
    $html .= "<h1 align=\"center\">Site Map - $cnt Files</h1>\n";
    $html .= "<p align=\"center\"><a href=\"index.htm\">index</a>\n";
    $html .= "<a href=\"#end\">end</a></p>\n";
    if ($add_sorted) {
        $html .= "<p>Alpha sorted jump <a href=\"#sorted\">list</a></p>\n";
    }
    $html .= "<!-- *** MAIN TABLE BEGIN *** -->\n";
    $html .= "<table align=\"center\" width=\"100%\" border=\"0\" cellpadding=\"2\" cellspacing=\"2\" summary=\"Site List\">\n";
    $line = '';
    $wrap = $colcount;
    $mrow = 24;
    $maxlen = 50;
    $rows = 0;
    $cols = 0;
    $html .= "<thead>\n";
    $html .= "<tr>\n";
    for ($i = 0; $i < $wrap; $i++) {
        $html .= "<th>Link</th>\n";
        $html .= "<th>Description</th>\n" if ($desccol);
    }
    $html .= "</tr>\n";
    $html .= "</thead>\n";
    $html .= "<tbody>\n";
    my @backups = ();
    my @links = ();
    for ($i = 0; $i < $cnt; $i++) {
        ##                0   1  2   3
        #push(@{$rlist},[$ff,$e,$ft,$desc]); # got a FILE
        $file = path_d2u(substr(${$rlist}[$i][0],$len)); # strip base directory
        $desc = ${$rlist}[$i][3];
        next if ($desc eq 'EXCLUDE');
        $href = $file;
        my ($nm,$dr) = fileparse($file);
        $flen = length($file);
        $ind = index($file,'/');
        if (($len > $maxlen)&&($ind > 0)) {
            $dr = substr($file,0,$ind);
            $file = "$dr...$nm";
        }
        push(@links,[$href,$nm,$file]);
        if ($desc eq 'Previous backup copy') {
            push(@backups,[$file,$href,$desc]);
            next;
        }
        $added_cnt++;
        $line = "<td>";
        if ($desc eq 'This file') {
            $line .= "<strong>$file</strong>";
        } else {
            $line .= "<a ";
            $line .= 'target="_blank" ' if ($blank);
            $line .= "href=\"$href\">$file</a>";
        }
        $line .= "</td>\n";
        $line .= "<td>$desc</td>\n" if ($desccol);
        $html .= "<tr>\n" if ($cols == 0);
        $html .= $line;
        $cols++;
        if ($cols == $wrap) {
            $html .= "</tr>\n";
            $cols = 0;
            $rows++;
            if ($rows == $mrow) {
                $tmp = $wrap;
                $tmp *= 2 if ($desccol);
                $html .= "<tr><td align=\"center\" colspan=\"$tmp\">\n";
                $html .= "<a href=\"#top\">top</a> <a href=\"#end\">end</a>\n";
                $html .= "</td></tr>\n";
                $rows = 0;
            }
        }
    }
    my $cnt2 = scalar @backups;
    for ($i = 0; $i < $cnt2; $i++) {
        $file = $backups[$i][0];
        $href = $backups[$i][1];
        $desc = $backups[$i][2];
        $line = "<td><a ";
        $line .= 'target="_blank" ' if ($blank);
        $line .= "href=\"$href\">$file</a></td>\n";
        $line .= "<td>$desc</td>\n" if ($desccol);
        $html .= "<tr>\n" if ($cols == 0);
        $html .= $line;
        $cols++;
        $added_cnt++;
        if ($cols == $wrap) {
            $html .= "</tr>\n";
            $cols = 0;
            $rows++;
            if ($rows == $mrow) {
                $tmp = $wrap;
                $tmp *= 2 if ($desccol);
                $html .= "<tr><td align=\"center\" colspan=\"$tmp\">\n";
                $html .= "<a href=\"#top\">top</a> <a href=\"#end\">end</a>\n";
                $html .= "</td></tr>\n";
                $rows = 0;
            }
        }
    }
    if ($cols) {
        while ($cols < $wrap) {
            $html .= "<td>&nbsp;</td>";
            $html .= "<td>&nbsp;</td>" if ($desccol);
            $cols++;
        }
        $html .= "</tr>\n";
    }
    $html .= "</tbody>\n";
    $html .= "</table>\n";
    $html .= "<!-- *** MAIN TABLE END *** -->\n";
    $cnt2 = scalar @links;
    if ($add_sorted && $cnt2) {
        $html .= "<a name=\"sorted\"></a>\n";
        $line = "<p>Alpha sorted list: ";
        @links = sort mycmp_nc_n1 @links;
        for ($i = 0; $i < $cnt2; $i++) {
            $href = $links[$i][0];
            $file = $links[$i][2];
            $line .= "<a ";
            $line .= 'target="_blank" ' if ($blank);
            $line .= "href=\"$href\">$file</a> ";
            if (length($line) > 90) {
                $html .= "$line\n";
                $line = '';
            }
        }
        $html .= "$line" if (length($line));
        $html .= "</p>\n";
    }
    # maybe now another table by the extension, or whatevr
    my $date = lu_get_YYYYMMDD_hhmmss_UTC(time());
    $html .= "<p>Done Site Map of $added_cnt of $cnt files, on ".$date." UTC, by $pgmname</p>\n";
    $html .= "<a name=\"end\"></a>";
    $html .= "</body>\n";
    $html .= "</html>\n";
    write2file($html,$out_xml);
    prt("Site list written to $out_xml\n");
    if ($load_log) {
        prt("=== HTML start ==========================================\n");
        prt($html);
        prt("=== HTML end   ==========================================\n");
    }
    if ($os =~ /win/i) {
        system($out_xml);
    } else {
        system("firefox $out_xml");
    }
}

sub mycmp_decend_n2 {
   return 1 if (${$a}[2] < ${$b}[2]);
   return -1 if (${$a}[2] > ${$b}[2]);
   return 0;
}


sub process_in_directory($) {
    my $dir = shift;
    opendir(DIR,$dir) || pgm_exit(1,"ERROR: Unable to open directory [$dir]!\n");
    my @files = readdir(DIR);
    closedir(DIR);
    my $itemcnt = scalar @files;
    prt("Got $itemcnt items, from base directory [$dir]...\n");
    my ($item,$ff,$ishtml,$sb,$ft,$desc);
    ut_fix_directory(\$dir);
    #$dir .= $PATH_SEP if (!($dir =~ /(\\|\/)$/));
    my @file_list = ();
    my @dirs = ();
    $desc = '&nbsp;';
    foreach $item (@files) {
        next if (($item eq '.')||($item eq '..'));
        $ff = $dir.$item;
        if (-f $ff) {
            next if ($item eq $out_xml);    # skip self
            next if ($item =~ /\.bak$/i);   # skip .bak
            $ishtml = is_html_like($item);
            next if ($html_only && !$ishtml);
            my ($n,$d,$e) = fileparse($item, qr/\.[^.]*/);
            $sb = stat($ff);
            $ft = $sb->mtime;
            if ($desccol) {
                if (defined $descriptions{$item}) {
                    $desc = $descriptions{$item};
                } else {
                    prtw("WARNING: Presently no decription for [$item]\n");
                    $desc = '&nbsp;';
                }
            }
            #                0   1  2   3
            push(@file_list,[$ff,$e,$ft,$desc]); # got a FILE
        } elsif (-d $ff) {
            push(@dirs,$ff) if (!is_repo_directory($item)); # got a directory - skip repos
        } else {
            prtw("WARNING: item [$ff] skipped!\n");
        }
    }
    if ($recursive) {
        foreach $dir (@dirs) {
            scan_directory($dir,\@file_list);
        }
    }
    $itemcnt = scalar @file_list;
    my @arr = sort mycmp_decend_n2 @file_list;
    prt("Got TOTAL $itemcnt files, from directory $dir...\n");
    write_html($dir,\@arr);
    ##write_html($dir,\@file_list);
}

#########################################
### MAIN ###
parse_args(@ARGV);
process_in_directory($in_dir);
pgm_exit(0,"");
########################################

sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av);
}

sub load_descriptions($) {
    my $fil = shift;
    if (! open(FIL,"<$fil")) {
        pgm_exit(1,"ERROR: Unable to open description file [$fil]!\n");
    }
    my @lines = <FIL>;
    close FIL;
    my $lncnt = scalar @lines;
    my ($i,$line,@arr,$cnt,$len,$tline,$i2,$file,$desc);
    my $dcnt = 0;
    for ($i = 1; $i < $lncnt; $i++) {
        $i2 = $i+1;
        $line = $lines[$i];
        chomp $line;
        $tline = trim_all($line);
        $len = length($tline);
        next if ($len == 0);
        @arr = split(",",$line);
        $cnt = scalar @arr;
        if ($cnt == 2) {
            $file = $arr[0];
            $desc = $arr[1];
            $descriptions{$file} = $desc;
            $dcnt++;
        } else {
            prtw("$i2: Did NOT split into 2 [$line]! Got $cnt\n");
        }
   }
   prt("Loaded $dcnt descriptions from $fil\n");
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg,$tmp);
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^v/) {
                if ($sarg =~ /^v.*(\d+)$/) {
                    $verbosity = $1;
                } else {
                    while ($sarg =~ /^v/) {
                        $verbosity++;
                        $sarg = substr($sarg,1);
                    }
                }
                prt("Verbosity = $verbosity\n") if (VERB1());
            } elsif ($sarg =~ /^l/) {
                $load_log = 1;
                prt("Set to load log at end.\n") if (VERB1());
            } elsif ($sarg =~ /^r/) {
                $recursive = 1;
                prt("Set recrusive to $recursive.\n") if (VERB1());
            } elsif ($sarg =~ /^a/) {
                $html_only = 0;
                prt("Set load ALL files.\n") if (VERB1());
            } elsif ($sarg =~ /^d/) {
                $desccol = 1;
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                load_descriptions($sarg);
                prt("Set to add a description column.\n") if (VERB1());
            } elsif ($sarg =~ /^b/) {
                $blank = 1;
                prt("Set to add _blank to href.\n") if (VERB1());
            } elsif ($sarg =~ /^o/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $out_xml = $sarg;
                prt("Set out file to [$out_xml].\n") if (VERB1());
            } elsif ($sarg =~ /^c/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                if (($sarg =~ /^\d+$/) && ($sarg ne '0')) {
                    $colcount = $sarg;
                    prt("Set column count to [$colcount].\n") if (VERB1());
                } else {
                    pgm_exit(1,"ERROR: Column count must be 1 to nn! Not [$arg]!\n");
                }
            } elsif ($sarg =~ /^s/) {
                $add_sorted = 1;
                prt("Add alpha sorted links.\n") if (VERB1());
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_dir = $arg;
            prt("Set input to [$in_dir]\n") if (VERB1());
            if (! -d $in_dir) {
                pgm_exit(1,"ERROR: Unable to find in directory [$in_dir]! Check name, location...\n");
            }
        }
        shift @av;
    }

    if ((length($in_dir) ==  0) && $debug_on) {
        $in_dir = $def_file;
        prt("Set DEFAULT input to [$in_dir]\n");
    }
    if (length($in_dir) ==  0) {
        pgm_exit(1,"ERROR: No input directory found in command!\n");
    }
    if (! -d $in_dir) {
        pgm_exit(1,"ERROR: Unable to find in directory [$in_dir]! Check name, location...\n");
    }
}

sub give_help {
    prt("$pgmname: version $VERS\n");
    prt("Usage: $pgmname [options] in-directory\n");
    prt("Options:\n");
    prt(" --help    (-h or -?) = This help, and exit 0.\n");
    prt(" --verb[n]       (-v) = Bump [or set] verbosity. def=$verbosity\n");
    prt(" --load          (-l) = Load LOG at end. ($outfile)\n");
    prt(" --out <file>    (-o) = Write output to this file.\n");
    prt(" --recursive     (-r) = Recurse into subdirectories. (def=$recursive)\n");
    prt(" --all           (-a) = Include ALL files. Default is just html like files. (def=$html_only)\n");
    prt(" --blank         (-b) = Add target=\"_blank\" to href. (def=$blank)\n");
    prt(" --cols n        (-c) = Set column count. (def=$colcount)\n");
    prt(" --desc file.csv (-d) = Add description column, from csv file. (def=$desccol)\n");
    prt(" --sort          (-s) = Add alpha sorted jump list. (def=$add_sorted)\n");
    prt(" Will scan the input directory, and build a $out_xml html file.\n");

}

# eof - genindex04.pl
