#!/perl -w
# NAME: getfilecounts.pl
# AIM: Given a root file, or folder, scan ALL directories, and report 'type' counts...
# but in essence this is just a 'template' for further file processing
# 2010/04/13  - geoff mclane - http://geoffair.net/mperl/
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use Cwd;
unshift(@INC, 'C:\GTools\perl');
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
    my @tmpsp = split(/\\/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $perl_dir = 'C:\GTools\perl';
my $outfile = $perl_dir."\\temp.$pgmname.txt";
open_log($outfile);

# user variables
my $load_log = 0;
my $in_file = 'C:\HOMEPAGE\FG\index.html';

my @fpfolders = qw( aspnet_client _vti_cnf _vti_pvt _private _derived );
my @html_ext = qw( .htm .html .shtml .php );
my @graf_ext = qw( .jpg .jpeg .gif .png .bmp .ico .mpg );
my @css_ext  = qw( .css );
my @script_ext = qw( .js .class .cgi );

### program variables
my @warnings = ();
my $cwd = cwd();
my $os = $^O;
my @g_file_array = ();
my @g_xclude_dirs = ();
my @g_xclude_files = ();
my $g_indir = '';
my $g_infile = '';   # none yet
my $g_dir_count = 0;
my $g_item_count = 0;

### debug
my $dbg01 = 0;  # show each directory parse

### forward refs
sub process_sub_dir($);

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg)
    }
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub show_warnings() {
   if (@warnings) {
      prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
      foreach my $itm (@warnings) {
         prt("$itm\n");
      }
      prt("\n");
   } else {
      prt( "\nNo warnings issued.\n\n" );
   }
}

sub os_is_win() { return (($os eq 'MSWin32') ? 1 : 0); }

sub dos_2_unix($) {
    my ($du) = shift;
    $du =~ s/\\/\//g;
    return $du;
}

sub is_fp_folder($) {
    my ($inf) = shift;
    $inf = lc($inf) if (os_is_win());
    foreach my $fil (@fpfolders) {
        $fil = lc($fil)  if (os_is_win());
        return 1 if ($inf eq $fil);
    }
    return 0;
}

sub is_xclude_folder($) {
    my ($inf) = shift;
    $inf = lc($inf) if (os_is_win());
    foreach my $fil (@g_xclude_dirs) {
        $fil = lc($fil)  if (os_is_win());
        return 1 if ($inf eq $fil);
    }
    return 0;
}

sub is_excluded_dir($) {
    my ($dir) = shift;
    return 1 if (is_fp_folder($dir));
    return 1 if (is_xclude_folder($dir));
    return 0;
}

sub is_user_excluded($) {
    my ($file) = shift;
    $file = lc($file) if (os_is_win());
    foreach my $fil (@g_xclude_files) {
        $fil = lc($fil)  if (os_is_win());
        return 1 if ($file eq $fil);
    }
    return 0;
}    

#########################################################
# Passed an array REF of extensions,
# check if this is one of them?
#########################################################
sub is_this_extent($$) {
   my ($ext, $rex) = @_;
   my $lcx = lc($ext);
   foreach my $x (@{$rex}) {
      return 1 if ($lcx eq lc($x));
   }
   return 0;
}

############################################
# only looking for HTM, HTML, PHP,
# could be extended to others maybe ...
############################################

# test an EXTENSION, or form '.htm'...
sub is_htm_ext($) {
   my ($ext) = shift;
   return( is_this_extent($ext,\@html_ext) );
}
sub is_graf_ext($) {
   my ($ext) = shift;
   return( is_this_extent($ext,\@graf_ext) );
}
sub is_zip_ext($) {
   my ($ext) = shift;
    my @arr = qw( .zip .gz );
   return( is_this_extent($ext,\@arr) );
}
sub is_css_ext($) {
    my ($ext) = shift;
    return( is_this_extent($ext, \@css_ext) );
}
sub is_txt_ext($) {
    my ($ext) = shift;
    my @arr = qw( .txt );
    return( is_this_extent($ext, \@arr) );
}
sub is_script_ext($) {
    my ($fil) = shift;
    return( is_this_extent($fil, \@script_ext) );
}

# test a FILE/PATH extension
sub is_htm_file_ext($) {
    my ($fil) = shift;
    my ($n,$d,$e) = fileparse($fil,qr/\.[^.]*/);
    return( is_htm_ext($e) );
}
sub is_graphic_file_ext($) {
    my ($fil) = shift;
    my ($n,$d,$e) = fileparse($fil,qr/\.[^.]*/);
    return( is_graf_ext($e) );
}
sub is_zip_file_ext($) {
    my ($fil) = shift;
    my ($n,$d,$e) = fileparse($fil,qr/\.[^.]*/);
    return( is_zip_ext($e) );
}
sub is_css_file_ext($) {
    my ($fil) = shift;
    my ($n,$d,$e) = fileparse($fil,qr/\.[^.]*/);
    return( is_css_ext($e) );
}
sub is_txt_file_ext($) {
    my ($fil) = shift;
    my ($n,$d,$e) = fileparse($fil,qr/\.[^.]*/);
    return( is_txt_ext($e) );
}
sub is_script_file_ext($) {
    my ($fil) = shift;
    my ($n,$d,$e) = fileparse($fil,qr/\.[^.]*/);
    return( is_script_ext($e) );
}

sub get_file_ext_type($) {
    my ($fil) = shift;
    return 1 if (is_htm_file_ext($fil));
    return 2 if (is_graphic_file_ext($fil));
    return 3 if (is_zip_file_ext($fil));
    return 4 if (is_css_file_ext($fil));
    return 5 if (is_txt_file_ext($fil));
    return 6 if (is_script_file_ext($fil));
    return 0;
}

sub type_2_stg($) {
    my ($t) = shift;
    return "HTML" if ($t == 1);
    return "IMG" if ($t == 2);
    return "ZIP" if ($t == 3);
    return "CSS" if ($t == 4);
    return "TXT" if ($t == 5);
    return "SCRIPT" if ($t == 6);
    return "Other";
}

sub add_2_g_file_array($) {
    my ($ff) = shift;
    push(@g_file_array, [$ff,get_file_ext_type($ff),0,0]);
}

sub get_g_file_array_counts() {
    my $len = scalar @g_file_array;
    my %hash = ();
    my ($typ,$i);
    for ($i = 0; $i < $len; $i++) {
        $typ = $g_file_array[$i][1];
        $hash{$typ}++;
    }
    my $res = "Total=$len";
    $len = 0;
    foreach $typ (keys %hash) {
        $i = $hash{$typ};
        $res .= " ".type_2_stg($typ)."=$i";
        $len += $i;
    }
    $res .= " ($len)";
    return $res;
}

sub process_sub_dir($) {
    my ($indir) = @_;
    $indir = dos_2_unix($indir);    # use ALL unix form of path
    pgm_exit(1,"ERROR: Unable to open directory [$indir]!\n") if ( !opendir( DIR, $indir ) );
    my @files = readdir(DIR);
    closedir DIR;
    my ($fcnt,$file,$ff,@dirs);
    $fcnt = scalar @files;
    prt("Processing $fcnt files, from [$indir]...\n") if ($dbg01);
    $indir .= '/' if !($indir =~ /(\\|\/)$/);
    @dirs = ();
    foreach $file (@files) {
        next if (($file eq '.')||($file eq '..'));
        $g_item_count++;
        $ff = $indir.$file;
        if (-f $ff) {
            add_2_g_file_array($ff) if (!is_user_excluded($file));
        } elsif (-d $ff) {
            push(@dirs,$ff) if (!is_excluded_dir($file));
            $g_dir_count++;
        } else {
            pgm_exit(1,"ERROR: WHAT IS THIS? [$ff]!!\n");
        }
    }
    foreach $ff (@dirs) {
        process_sub_dir($ff);
    }
}

sub get_all_files($) {
    my ($inf) = @_;
    $inf = dos_2_unix($inf);    # use ALL unix form of path
    my ($infile,$indir);
    if (-f $inf) {
        ($infile,$indir) = fileparse($inf);
        #prt("Got file [$infile], in directory [$indir]...\n");
        $indir = $cwd if ($indir =~ /^\.(\\|\/)$/);
    } elsif (-d $inf) {
        $indir = $inf;
        $infile = '';   # none yet
    }
    $indir .= '/' if !($indir =~ /(\\|\/)$/);
    pgm_exit(1,"ERROR: Unable to open directory [$indir]!\n") if ( !opendir( DIR, $indir ) );
    my @files = readdir(DIR);
    closedir DIR;
    my ($fcnt,$file,$ff,@dirs);
    $fcnt = scalar @files;
    prt("Processing $fcnt files, from [$indir]...\n") if ($dbg01);
    @dirs = ();
    foreach $file (@files) {
        next if (($file eq '.')||($file eq '..'));
        $g_item_count++;
        $ff = $indir.$file;
        if (-f $ff) {
            add_2_g_file_array($ff) if (!is_user_excluded($file));
        } elsif (-d $ff) {
            push(@dirs,$ff) if (!is_excluded_dir($file));
            $g_dir_count++;
        } else {
            pgm_exit(1,"ERROR: WHAT IS THIS? [$ff]!!\n");
        }
    }

    # now process the subdirectories, and sub-sub...
    foreach $ff (@dirs) {
        process_sub_dir($ff);
    }
}

#########################################
### MAIN ###
parse_args(@ARGV);
prt( "$pgmname: in [$cwd]: In file [$in_file]...\n" );
if (-f $in_file) {
    ($g_infile,$g_indir) = fileparse($in_file);
    #prt("Got file [$infile], in directory [$indir]...\n");
    $g_indir = $cwd if ($g_indir =~ /^\.(\\|\/)$/);
} elsif (-d $in_file) {
    $g_indir = $in_file;
    $g_infile = '';   # none yet
} else {
    pgm_exit(1,"ERROR: In file [$in_file] NOT valid!\n");
}
get_all_files($in_file);
prt("Processed $g_item_count items, $g_dir_count directories, for -\n");
prt( get_g_file_array_counts()."\n" );
pgm_exit(0,"Normal exit(0)");
########################################
sub parse_args {
    my (@av) = @_;
    while (@av) {
        $in_file = $av[0];
        shift @av;
    }
}

# eof - getfilecounts.pl
