#!/perl -w
# NAME: dirsizes03.pl
# AIM: Given a PATH, show the directories existing, and the approx. size of each
# including the number of files found
# 27/12/2011 - Minor FIX20111227 fixes - ignore 'System Volume Information', and ret 5 elements on fail
# 13/07/2010 - invert the output
# 01/07/2010 - add adjusted size, based on block size of 4096 - closer approx of
# space required on disk system that uses 4096 bytes as BLOCK size.
# 1/2/2009 - Minor fix of file count ($fc), especially when no subdirectories.
# 20/12/2008 - Added an -x=excludes parameters
# 20/11/2008 geoff mclane http://geoffair.net/mperl
# ###############################################################################
use strict;
use warnings;
use File::stat;
use Cwd;
use Fcntl ':mode';

# This needs to be adjusted for personal use ...
unshift(@INC, 'C:/GTools/perl');
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
    my @tmpsp = split(/\\/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $perl_root = 'C:\GTools\perl';
my $outfile = $perl_root."\\temp.$pgmname.txt";
open_log($outfile);

my $VERS = "0.0.4 2011-12-27";
my $in_folder = '';
#my $in_folder = 'C:\Users\Geoff\Documents';
#my $in_folder = 'C:\Documents and Settings\Geoff McLane\My Documents';
my $tot_size = 0;
my @warnings = ();
my $file_count = 0;

my $debug_on = 0;
my $def_dir = 'E:\CDROMS_01';

# ==========================================
my @dir_sizes = ();
# @dir_sizes offsets
my $DL_SIZE = 0;
my $DL_DIR  = 1;
my $DL_NN   = 2;
my $DL_KS   = 3;
my $DL_FC   = 4;
my $DL_FCNN = 5;
# added
my $DL_DCNT = 6;
my $DL_ASIZ = 7;
my $DL_TBLK = 8;
# added more
my $DL_DCNN = 9;
my $DL_ASNN = 10;
my $DL_ASKS = 11;
my $DL_TBNN = 12;
# ============================================

my $verbosity = 0;
my %excluded = ();
my $loadlog = 0;
my $block = 4096;
my $add_dir_block = 0;
my $max_file_name = 35;
my $sort_alpha = 0;

# program variables
my $g_dot_cnt = 0;

# debug
my $dbg_s01 = 0;    # show prt("dir: $file $cds bytes ... as they are processed ...

# forward
sub process_folder($$);

sub prtw($) {
    my ($tx) = shift;
    $tx =~ s/\n$// if ($tx =~ /\n$/);
    prt("$tx\n");
    push(@warnings,$tx);
}

sub show_warnings($) {
    my ($dbg) = shift;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS ...\n" );
        foreach my $line (@warnings) {
            prt("$line\n" );
        }
        prt("\n");
    } elsif ($dbg) {
        prt("\nNo warnings issued.\n\n");
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    show_warnings( 0 );
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt("$msg\n");
    }
    close_log($outfile,$loadlog);
    # unlink($outfile);
    exit($val);
}


sub dir2unix($) {
	my ($d) = shift;
	$d =~ s/\\/\//g;
	return $d;
}


#string dirghtml::b2ks1(double d) // b2ks1(double d)
sub bytes2ks($) {
	my ($d) = @_;
	my $oss;
	my $kss;
	my $lg = 0;
	my $ks = ($d / 1024); #// get Ks
	my $div = 1;
   if( $ks < 1024 ) {
      $div = 1;
      $oss = "KB";
   } elsif ( $ks < (1024 * 1024) ) {
	  $div = 1024;
      $oss = "MB";
   } elsif ( $ks < (1024 * 1024 * 1024) ) {
      $div = (1024 * 1024);
      $oss = "GB";
   } else {
      $div = (1024 * 1024 * 1024);
      $oss = "TB";
   }
   $kss = $ks / $div;
   $kss += 0.05;
   $kss *= 10;
   $lg = int($kss);
   return( ($lg / 10) . $oss );
}

sub mycmp_ascend {
   return 1 if (${$a}[0] > ${$b}[0]);
   return -1 if (${$a}[0] < ${$b}[0]);
   return 0;
}

sub mycmp_decend {
   if (${$a}[0] < ${$b}[0]) {
      #prt( "+[".${$a}[0]."] < [".${$b}[0]."]\n" ) if $verb3;
      return 1;
   }
   if (${$a}[0] > ${$b}[0]) {
      #prt( "-[".${$a}[0]."] < [".${$b}[0]."]\n" ) if $verb3;
      return -1;
   }
   #prt( "=[".${$a}[0]."] < [".${$b}[0]."]\n" ) if $verb3;
   return 0;
}

sub mycmp_decend_alpha {
   return 1 if (lc(${$a}[$DL_DIR]) gt lc(${$b}[$DL_DIR]));
   return -1 if (lc(${$a}[$DL_DIR]) lt lc(${$b}[$DL_DIR]));
   return 0;
}


sub get_nn($) { # perl nice number nicenum add commas
	my ($n) = shift;
	if (length($n) > 3) {
		my $mod = length($n) % 3;
		my $ret = (($mod > 0) ? substr( $n, 0, $mod ) : '');
		my $mx = int( length($n) / 3 );
		for (my $i = 0; $i < $mx; $i++ ) {
			if (($mod == 0) && ($i == 0)) {
				$ret .= substr( $n, ($mod+(3*$i)), 3 );
			} else {
				$ret .= ',' . substr( $n, ($mod+(3*$i)), 3 );
			}
		}
		return $ret;
	}
	return $n;
}


#  0 dev      device number of filesystem
#  1 ino      inode number
#  2 mode     file mode  (type and permissions)
#  3 nlink    number of (hard) links to the file
#  4 uid      numeric user ID of file's owner
#  5 gid      numeric group ID of file's owner
#  6 rdev     the device identifier (special files only)
#  7 size     total size of file, in bytes
#  8 atime    last access time in seconds since the epoch
#  9 mtime    last modify time in seconds since the epoch
# 10 ctime    inode change time in seconds since the epoch (*)
# 11 blksize  preferred block size for file system I/O
# 12 blocks   actual number of blocks allocated

# mode
# File types.  Not necessarily all are available on your system.
# S_IFREG S_IFDIR S_IFLNK S_IFBLK S_IFCHR S_IFIFO S_IFSOCK S_IFWHT S_ENFMT
# # The operators -f, -d, -l, -b, -c, -p, and -S.
# S_ISREG($mode) S_ISDIR($mode) S_ISLNK($mode)
# S_ISBLK($mode) S_ISCHR($mode) S_ISFIFO($mode) S_ISSOCK($mode)
sub is_directory {
	my ($p) = shift;
	my $sb = stat($p);
	if ($sb) {
		my $mode = $sb->mode;
		if (S_ISDIR($mode)) {
			return 1;
		}
	}
	return 0;
}

sub show_stat {
	my ($p) = shift;
	my $sb = stat($p);
	if ($sb) {
		my $mode = $sb->mode;
		my $size = $sb->size;
		my $type = S_IFMT($mode);
		my $perm = S_IMODE($mode);
		my $sperm = sprintf("%04o", ($mode & 0777));
		my $tstg = "";
		if (S_ISREG($mode)) {
			$tstg = '-f';
		} elsif (S_ISDIR($mode)) {
			$tstg = '-d';
		} elsif (S_ISLNK($mode)) {
			$tstg = '-l';
		} elsif (S_ISBLK($mode)) {
			$tstg = '-b';
		} elsif (S_ISCHR($mode)) {
			$tstg = '-c';
		} elsif (S_ISFIFO($mode)) {
			$tstg = '-p';
		} elsif (S_ISSOCK($mode)) {
			$tstg = '-S';
		} else {
			$tstg = '-?';
		}
		prt("stat $p [$tstg] $size - mode=$mode, type=$type, perm=$perm [$sperm]\n");
	} else {
		prt("stat $p failed\n");
	}
}

sub process_folder($$) {
    my ($in,$lev) = @_;
	my $inf = dir2unix($in);
    my (@files, $file, $ff, $sb, $cds, $sz, $fc, $lfc);
    my $dsize = 0;
    my $rsize = 0;
    my $asize = 0;  # adjusted size
    my $rasiz = 0;
    my $rdcnt = 0;
    my $blks = 0;
    my $fcnt = 0;
    my $dcnt = 0;
	my ($as,$dc,$diff,$cdiff,$totblks,$tb,$totrblks);
    $lfc = 0;
	$fc = 0;
    $cds = 0;
    $cdiff = 0;
    $totblks = 0;
    $totrblks = 0;
    if (!opendir( DIR, $inf)) {
        if ($inf =~ /System\s+Volume\s+Information/i) {
            # FIX20111227 - QUIETLY ignore this known FAILURE
        } else {
    		prtw("WARNING: Unable to open [$inf] ... $! ...\n");
        }
        return 0,0,0,0,0;   # FIX20111227 - return 5 zeros
    }

    if (!$dbg_s01) {
        local $| = 1;
    }
    @files = readdir(DIR);
    closedir(DIR);
    foreach $file (@files) {
        next if (($file eq '.')||($file eq '..'));
        $ff = $inf."\\".$file;
        if (-d $ff) {
            if (defined $excluded{$file}) {
                prt( "Skipping folder [$file].\n" ) if ($verbosity > 1);
            } else {
                $dcnt++;
                $rdcnt++;
                $asize += $block if ($add_dir_block);
                $rasiz += $block if ($add_dir_block);
                #prt("Sub dir:$lev: $file $cds bytes, $fc files, $asize, $dcnt...\n");
                ($cds,$fc,$as,$dc,$tb) = process_folder($ff,($lev + 1));
                $dsize += $cds;
                $fcnt += $fc;
                $asize += $as;
                $dcnt += $dc;
                $totblks += $tb;
                if ($lev == 0) {
                    prt("dir: $file $cds bytes, $fc files, $asize, $dcnt ...\n") if ($dbg_s01);
                    $tot_size += $cds;
                    #                 0     1      2    3   4    5   6    7    8
                    push(@dir_sizes, [$cds, $file, '',  '', $fc, '', $dc, $as, $tb]);
                }
            }
        } else {
            $fcnt++;
            $lfc++;
            $sb = stat($ff);
            if ($sb) {
                $sz = $sb->size;    # get file SIZE
                $dsize += $sz;
                $rsize += $sz;
                $blks = 0;
                if ($sz == 0) {
                    $blks = 1;
                } else {
                    $blks = int($sz / $block);
                    $blks++ if ($sz % $block);
                }
                $as = ($blks * $block);
                $totblks += $blks;
                $totrblks += $blks;
                $asize += $as;
                $rasiz += $as;
                #$diff = $asize - $dsize;
                $diff = $as - $sz;
                $cdiff += $diff;
                #prt("$ff $sz $as $blks $dsize $asize ($diff - $cdiff)\n");
            } else {
                prtw("WARNING: stat of $ff FAILED!\n");
            }
            $file_count++;
            if (!$dbg_s01) {
                if (($file_count % 1000) == 0) {
                    prt( "." );
                    $g_dot_cnt++;
                }
            }
        }
    }
    if ($lev == 0) {
        $tot_size += $rsize;
        prt("\n") if ($g_dot_cnt);  # clear to new line if have output any DOTS...
        prt( "root: $inf ".get_nn($rsize).", total ".get_nn($tot_size)." $fcnt files...\n" ) if ($dbg_s01);
        #                 0       1       2   3   4     5  6       7       8
        push(@dir_sizes, [$rsize, 'root', '', '', $lfc, '',$rdcnt, $rasiz, $totrblks]); # add in this ROOT size to list
    }
    $diff = $asize - $dsize;
    #prt("Returning: $dsize,$fcnt,$asize,$dcnt - diff=$diff\n");
    return $dsize,$fcnt,$asize,$dcnt,$totblks;
}

# ================================================================
# nice smooth output of the list, aligning all numbers
# This requires a first run through to get the various minimum length
# then applying those min length in the 2nd run output
# ----------------------------------------------------------------
sub show_dir_sizes {
    my ($mdl, $msl, $mkl, $mcl, $dir, $siz, $max, $i, $nn, $ks, $fc, $fcnn);
    my ($dc, $asz, $aszk, $tblk, $tbnn, $mtbl, $tbks, $tbtot, $mtbkl);
    $max = scalar @dir_sizes;
    $mdl = 0;
    $msl = 0;
    $mkl = 0;
    $mcl = 0;
    # could sort in other ways - ascending, or alphabetic
    #@dir_sizes = sort mycmp_decend @dir_sizes;
    #@dir_sizes = sort mycmp_ascend @dir_sizes;
    if ($sort_alpha) {
        @dir_sizes = sort mycmp_decend_alpha @dir_sizes;
    } else {
        @dir_sizes = sort mycmp_ascend @dir_sizes;
    }
    # ---------------------------------------------------
    my $tot = 0;
    my $ftot = 0;
    # added
    my $atot = 0;
    my $dtot = 0;
    my $maszl = 0;
    my $mdcl  = 0;
    my $maskl = 0;
    $tblk = 0;
    $mtbl = 0;
    $tbtot = 0;
    $mtbkl = 0;
    # first run, just to get min sizes
    for ($i = 0; $i < $max; $i++) {
        $dir = $dir_sizes[$i][$DL_DIR];
        $siz = $dir_sizes[$i][$DL_SIZE];
        $fc = $dir_sizes[$i][$DL_FC];
        $dc = $dir_sizes[$i][$DL_DCNT];
        $asz = $dir_sizes[$i][$DL_ASIZ];
        $tblk = $dir_sizes[$i][$DL_TBLK];
        $tot += $siz;
        $ftot += $fc;
        $dtot += $dc;
        $atot += $asz;
        $tbtot += $tblk;

        # do conversion now, and keep ASCII
        $nn = get_nn($siz);
        $ks = bytes2ks($siz);
        $fcnn = get_nn($fc);
        $dir_sizes[$i][$DL_NN] = $nn;
        $dir_sizes[$i][$DL_KS] = $ks;
        $dir_sizes[$i][$DL_FCNN] = $fcnn;
        $mdl = length($dir) if (length($dir) > $mdl);
        $msl = length($nn) if (length($nn) > $msl);
        $mkl = length($ks) if (length($ks) > $mkl);
        $mcl = length($fcnn) if (length($fcnn) > $mcl);

        $nn = get_nn($asz);
        $ks = bytes2ks($asz);
        $fcnn = get_nn($dc);
        $dir_sizes[$i][$DL_ASNN] = $nn;
        $dir_sizes[$i][$DL_ASKS] = $ks;
        $dir_sizes[$i][$DL_DCNN] = $fcnn;
        $maszl = length($nn) if (length($nn) > $maszl);
        $maskl = length($ks) if (length($ks) > $maskl);
        $mdcl = length($fcnn) if (length($fcnn) > $mdcl);

        $tbnn = get_nn($tblk);
        $mtbl = length($tbnn) if (length($tbnn) > $mtbl);
        $dir_sizes[$i][$DL_TBNN] = $tbnn;
        $tbks = bytes2ks($tblk * $block);
        $mtbkl = length($tbks) if (length($tbks) > $mtbkl);
    }


    # get total lengths
    $nn = get_nn($tot);
    $ks = bytes2ks($tot);
    $fcnn = get_nn($ftot);
    $msl = length($nn) if (length($nn) > $msl);
    $mkl = length($ks) if (length($ks) > $mkl);
    $mcl = length($fcnn) if (length($fcnn) > $mcl);

    $nn = get_nn($atot);
    $ks = bytes2ks($atot);
    $fcnn = get_nn($dtot);
    $maszl = length($nn) if (length($nn) > $maszl);
    $maskl = length($ks) if (length($ks) > $maskl);
    $mdcl = length($fcnn) if (length($fcnn) > $mdcl);
    $tbnn = get_nn($tbtot);
    $mtbl = length($tbnn) if (length($tbnn) > $mtbl);
    $tbks = bytes2ks($tbtot * $block);
    $mtbkl = length($tbks) if (length($tbks) > $mtbkl);

    $mdl = $max_file_name if ($mdl > $max_file_name);

    # second run, adjusting the lengths of each output
    for ($i = 0; $i < $max; $i++) {
        $dir = $dir_sizes[$i][$DL_DIR];     # directory name
        $siz = $dir_sizes[$i][$DL_SIZE];    # total size IN this directory
        $nn =  $dir_sizes[$i][$DL_NN];
        $ks =  $dir_sizes[$i][$DL_KS];
        $fcnn = $dir_sizes[$i][$DL_FCNN];
        $aszk = $dir_sizes[$i][$DL_ASKS];
        $asz = $dir_sizes[$i][$DL_ASNN];
        $tblk = $dir_sizes[$i][$DL_TBLK];
        $tbnn = $dir_sizes[$i][$DL_TBNN];

        $dir .= ' ' while (length($dir) < $mdl);
        $nn = ' '.$nn while (length($nn) < $msl);
        $ks = ' '.$ks while (length($ks) < $mkl);
        $fcnn = ' '.$fcnn while (length($fcnn) < $mcl);
        $asz = ' '.$asz while (length($asz) < $maszl);
        $aszk = ' '.$aszk while (length($aszk) < $maskl);
        #prt( "dir: $dir $nn  $ks  $fcnn files ($asz $aszk)\n" );
        $tbnn = ' '.$tbnn while (length($tbnn) < $mtbl);
        $tbks = bytes2ks($tblk * $block);
        $tbks = ' '.$tbks while (length($tbks) < $mtbkl);
        prt( "$dir $nn  $ks  $fcnn files ($tbnn $tbks)\n" );
    }

    # FINAL 'totals' output
    $dir = "TOTAL";
    $dir .= ' ' while (length($dir) < $mdl);
    $siz = $tot;
    $nn = get_nn($tot);
    $nn = ' '.$nn while (length($nn) < $msl);
    $ks = bytes2ks($tot);
    $ks = ' '.$ks while (length($ks) < $mkl);
    $fcnn = get_nn($ftot);
    $fcnn = ' '.$fcnn while (length($fcnn) < $mcl);
    $tbnn = get_nn($tbtot);
    $tbnn = ' '.$tbnn while (length($tbnn) < $mtbl);
    $tbks = bytes2ks($tbtot * $block);
    $tbks = ' '.$tbks while (length($tbks) < $mtbkl);
    prt( "$dir $nn  $ks  $fcnn FILES ($tbnn $tbks)\n" );
    # all done
}

##### SUBS ONLY ABOVE ####
# ######### MAIN #########
# ========================

parse_args(@ARGV);  # parse user input

prt( "$pgmname ... Hello, processing [$in_folder] directory ...\n" ) if ($verbosity > 0);

process_folder( $in_folder, 0 );   # accumulate global array

show_dir_sizes();   # do the display stuff

pgm_exit(0,"");

#################################################################################
#### SUBS ONLY rlated to user input
# =================================

sub give_help {
    prt( "$pgmname [Options] folder\n" );
    prt( "Options: $VERS\n" );
    prt( " -? -h -help = This brief HELP, and exit.\n" );
    prt( " -l          = Load log into Wordpad\n" );
    prt( " -v[vvvv]    = Set verbosity. (def=$verbosity).\n" );
    prt( " -sort-alpha = List in alphabetic sequence. (def=by size).\n");
    prt( " -x=folder   = Exclude this folder.\n" );
    exit(1);
}

# Ensure argument exists, or die.
sub require_arg {
    my ($arg, @arglist) = @_;
    mydie( "ERROR: no argument given for option '$arg' ...\n" ) if ! @arglist;
}

sub set_verbosity {
    my (@av) = @_;
    my ($arg, $ex);
    while(@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $arg =~ s/^-// while ($arg =~ /^-/);
            if ($arg =~ /^v/) {
                $verbosity += length($arg);
                prt( "Set verbosity to [$verbosity].\n" );
            }
        }
        shift @av;
    }

}

sub parse_args {
    my (@av) = @_;
    my ($arg, $ex);
    set_verbosity(@av);
    while(@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $arg =~ s/^-// while ($arg =~ /^-/);
            if (($arg eq '?')||($arg eq 'h')||($arg eq 'help')) {
                give_help();
            } elsif ($arg =~ /^x=(.+)/) {
                $ex = $1;
                $excluded{$ex} = 1;
                prt( "Excluding folder [$ex] ...\n" ) if ($verbosity > 0);
            } elsif ($arg =~ /^x$/) {
                require_arg(@av);
                shift @av;
                $ex = $av[0];
                $excluded{$ex} = 1;
                prt( "Excluding folder [$ex] ...\n" ) if ($verbosity > 0);
            } elsif ($arg =~ /^sort-alpha/i) {
                $sort_alpha = 1;
                prt( "Set sort alpha...\n" ) if ($verbosity > 0);
            } elsif ($arg =~ /^l/) {
                $loadlog = 1;
                prt( "Set load log into Wordpad ...\n" ) if ($verbosity > 0);
            } elsif ($arg =~ /^v/) {
                # done first
            } else {
                pgm_exit(1, "ERROR: Unknown argument [".$av[0]."]! Use -? for HELP.\n" );
            }
        } else {
            $in_folder = $arg;
            prt( "Set IN folder to [$in_folder] ...\n" ) if ($verbosity > 0);
        }
        shift @av;
    }
    if ($debug_on && (length($in_folder) == 0) && (length($def_dir))) {
        $in_folder = $def_dir;
        prt( "DEBUG ON: Set IN folder to DEFAULT [$in_folder] ...\n" );
        #$dbg_s01 = 1;
        #$loadlog = 1;
    }

    if (length($in_folder) == 0) {
        pgm_exit(1,"ERROR: No in folder found!");
    }
}

# eof - dirsizes03.pl

