#!/perl -w
# dirbysize.pl
# AIM: Read a file output from the DIR command, and order the files in SIZE
# order ... drop out the pure <DIR> entries ...
# 2010/03/28 - option to omit CVS/svn/git directories
# 2/13/2009 - Add delete (unlink) of log file at end, and skip if <DIR> is 3[2] or 4th[3] entry...
# 20061219 - Can NOT use just $0 to get script name, since when directly invoke from
# a command prompt, it becomes C:\GTools\Perl\dirbydate.pl, so added some regex to
# massage the name, and just get the perl script name ... if a DRIVE included /^\w{1}:\\.*/
# AND, to enable it to FIND the logfile.pl, add unshift(@INC, 'C:/GTools/perl');
# OR this can be use lib '/Users/User/perl-lib';, or perl -MC:/GTools/perl -e dirbydate.pl
# and added a $evenspace option ...
# geoff mclane - 28 March, 2007 - geoffmclane.com - based on dirbydate.pl
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
unshift(@INC, 'C:/GTools/perl');
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# options
my $evenspace = 1;
my $maxname = 0;
my $maxsize = 0;
my $loadlog = 0;
my $maxlines = 300;	# if greater than ??? lines, $loadout in notepad ...
my $fixtm = 0;
my $show_repo = 0;
my $list_only = 0;
my $sort_decend = 0;
my $verbosity = 1;
my $max_count = -1;
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $perl_dir = 'C:\GTools\perl';
my $outfile = $perl_dir."\\temp.$pgmname.txt";
open_log($outfile);

# debug output
my $dbg01 = 0;	# show each FOLDER as found ...
my $dbg02 = 0;	# show count and bytes as found ...
my $dbg03 = 0;	# show files, as found
my $verb3 = 0;	# show sort compare ...

my $inp_file = 'C:\DTEMP\templist';
#my $in_file = 'templist.txt';
my @repo_dirs = qw( CVS .svn .git );
my @filelist = ();
my @sortlist = ();
my $basedir = '';
my @warnings = ();
my @inp_files = ();

sub prt_version() {
    prt("$pgmname: Version 0.0.1 2010/03/29\n");
}

sub prtw($) {
    my ($tx) = shift;
    $tx =~ s/\n$//;
    prt("$tx\n");
    push(@warnings,$tx);
}

sub show_warnings() {
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS ...\n" );
        foreach my $line (@warnings) {
            prt("$line\n" );
        }
        prt("\n");
    } else {
        prt("\nNo warnings issued.\n\n") if ($verbosity);
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg) if ($val || $verbosity);
    }
    $verbosity = 0 if ($val == 0);
    show_warnings();
    close_log($outfile,$loadlog);
    # unlink($outfile);
    exit($val);
}

##################################################
# My particular 'nice number'
sub get_nn($) { # perl nice number nicenum add commas
	my ($n) = shift;
	if (length($n) > 3) {
		my $mod = length($n) % 3;
		my $ret = (($mod > 0) ? substr( $n, 0, $mod ) : '');
		my $mx = int( length($n) / 3 );
		for (my $i = 0; $i < $mx; $i++ ) {
			if (($mod == 0) && ($i == 0)) {
				$ret .= substr( $n, ($mod+(3*$i)), 3 );
			} else {
				$ret .= ',' . substr( $n, ($mod+(3*$i)), 3 );
			}
		}
		return $ret;
	}
	return $n;
}

sub trimall($) {	# version 20061127
	my ($ln) = shift;
	chomp $ln;			# remove CR (\n)
	$ln =~ s/\r$//;		# remove LF (\r)
	$ln =~ s/\t/ /g;	# TAB(s) to a SPACE
	while ($ln =~ /\s\s/) {
		$ln =~ s/\s\s/ /g;	# all double space to SINGLE
	}
	while ($ln =~ /^\s/) {
		$ln = substr($ln,1); # remove all LEADING space
	}
	while ($ln =~ /\s$/) {
		$ln = substr($ln,0, length($ln) - 1); # remove all TRAILING space
	}
	return $ln;
}

sub remdir {
	my ($f) = shift;
	my $b2 = quotemeta($basedir);
	$f =~ s/^$b2\\//; # remove beginning ...
	return $f;
}

# put least first
sub mycmp_ascend2 {	# special - ascend by 2nd component
   return -1 if (${$a}[2] < ${$b}[2]);
   return  1 if (${$a}[2] > ${$b}[2]);
   return 0;
}
# put largest first
sub mycmp_decend2 {	# special - ascend by 2nd component
   return -1 if (${$a}[2] > ${$b}[2]);
   return  1 if (${$a}[2] < ${$b}[2]);
   return 0;
}

sub is_date_entry {
    my (@ar) = @_;
    if (scalar @ar == 3) {
        if (($ar[0] =~ /^\d+$/) && ($ar[1] =~ /^\d+$/) && ($ar[2] =~ /^\d+$/)) {
            return 1;
        }
    }
    return 0;
}
sub is_time_entry {
    my (@ar) = @_;
    if (scalar @ar == 2) {
        if (($ar[0] =~ /^\d+$/)&&($ar[1] =~ /^\d+$/)) {
            return 1;
        }
    }
    return 0;
}

sub dbs_d2u($) {
	my ($du) = shift;
	$du =~ s/\\/\//g;
	return $du;
}

sub is_a_respository_dir($) {
    my ($dir) = @_;
    my $ud = dbs_d2u($dir); # ensure UNIX path
    my @arr = split('/',$ud);   # split on UNIX path
    my ($d1,$d2);
    foreach $d1 (@arr) {
        foreach $d2 (@repo_dirs) {
            return 1 if ($d1 eq $d2);
        }
    }
    return 0;
}

sub set_common_folder($) {
    my ($ra) = @_;  # = \@folders
    my $comm = '';
    my ($fold,$len1,$len2,$len);
    my ($i,$ch1,$ch2);
    $len1 = 0;
    foreach $fold (@{$ra}) {
        $len2 = length($fold);
        if ($len1) {
            $len = $len1;
            $len = $len2 if ($len2 < $len1);
            for ($i = 0; $i < $len; $i++) {
                $ch1 = substr($comm,$i,1);
                $ch2 = substr($fold,$i,1);
                last if ($ch1 ne $ch2);
            }
            if ($i == $len) {
                # we reached the end, and they are the SAME
            } else {
                # stopped due to difference
                if ($i == 0) {
                    # all different = NO COMMON
                    $comm = '';
                    last;
                } elsif ($i < $len1) {
                    # must truncate common
                    $comm = substr($comm,0,$i);
                    $len1 = length($comm);
                }
            }
        } else {
            $comm = $fold;
            $len1 = length($comm);
        }
    }
    prt("Common folder [$comm]\n") if (length($comm) && $verbosity);
}

sub process_in_file($) {
    my ($inf) = @_;
    open INF, "<$inf" or mydie("ERROR: Can NOT open [$inf] ... $! ...\n" );
    my @lines = <INF>;	# slurp it all
    close INF;
    my $lc = scalar @lines;
    prt( "Processing $lc lines from [$inf] ...\n" ) if ($verbosity);
    my $line = '';
    my $tln = '';
    my $act_folder = '';
    my $isrepo = 0;
    $basedir = file_dirname($inf);
    my @folders = ();
    foreach $line (@lines) {
        $tln = trimall($line);
        if (length($tln)) {
            #  Volume in drive C has no label.
            #  Volume Serial Number is D833-AEFA
            if ($line =~ /Volume in drive (.*)/ ) {
                # ignore
            } elsif ($line =~ /Volume Serial Number is (.*)/ ) {
                # ignore
            } elsif ($line =~ /Total Files Listed/ ) {
                # ignore
            } elsif ($line =~ /Directory of (.*)/ ) {
                $act_folder = $1;
                prt( "Folder [$act_folder] ...\n" ) if ($dbg01);
                $isrepo = is_a_respository_dir($act_folder);
                push(@folders,$act_folder);
            } elsif ($line =~ /\d+\s+File\(s\)\s+[\d,]+\s+bytes/ ) {
                prt( $line ) if ($dbg02);
            } elsif ($line =~ /\d+\s+Dir\(s\)\s+[\d,]+\s+bytes/ ) {
                prt( $line ) if ($dbg02);
            } else {
                my @arr = split(/\s/, $tln);
                my $ac = scalar @arr;
                my $pchk = 0;
                #prt( $line );
                if ($ac > 3) {
                    # from Windows 98 machine
                    # ..             <DIR>        01/02/04  16:32 ..
                    # IMAGE001 PNG         5 031  01/02/04  16:32 image001.png
                    # from XP machine
                    # 09/07/2008  19:24    <DIR>          Sue
                    # 09/07/2008  19:40            81,061 temp.DirBySize.pl.txt
                    # from Vista machine
                    # 01/10/2009  03:00 PM    <DIR>          28
                    # 10/05/2008  02:04 PM             4,180 chgdiff.txt
                    if (($arr[2] eq '<DIR>')||($arr[3] eq '<DIR>')) {
                        # ignore
                    } else {
                        # file names of interest
                        my @ar2 = split('/', $arr[0]); # get day/month/year
                        my @ar3 = split(':', $arr[1]); # get hour:minutes
                        if (is_date_entry(@ar2) && is_time_entry(@ar3)) {
                            my $sz = $arr[2];
                            my $nm = $arr[3];
                            #if ($fixtm && (($sz eq 'AM')||($sz eq 'PM'))) {
                            if (($sz eq 'AM')||($sz eq 'PM')) {
                                # 12 hour clock - have AM or PM
                                if ($sz eq 'PM') {
                                    $ar3[0] += 12;	# bump hour by 12
                                    $arr[1] = $ar3[0].':'.$ar3[1];
                                }
                                $sz = $arr[3];
                                $nm = $arr[4];
                                if ($ac > 5) {
                                    my $i = 5;
                                    while($i < $ac) {
                                        $nm .= ' '.$arr[$i];
                                        $i++;
                                    }
                                }
                            } else {
                                # 24 hour clock (no AM/PM)
                                if ($ac > 4) {
                                    my $i = 4;
                                    while($i < $ac) {
                                        $nm .= ' '.$arr[$i];
                                        $i++;
                                    }
                                }
                            }
                            if ((scalar @ar2 == 3)&&(scalar @ar3 == 2)) {
                                my $ft = int($ar2[2].$ar2[1].$ar2[0].$ar3[0].$ar3[1]);
                                if ($evenspace) {	# get maximum lengths
                                    $maxname = length($nm) if (length($nm) > $maxname);
                                    $maxsize = length($sz) if (length($sz) > $maxsize);
                                }
                                $sz =~ s/,//g; # remove the COMA
                                #                 0     1   2    3            4       5        6
                                push(@filelist, [$ft, $nm, $sz, $act_folder, $arr[0], $arr[1], $isrepo]);
                                #prt( "$ft, $nm, $sz, $act_folder $arr[0], $arr[1]\n") if ($dbg03);
                                prt( "$ft, $nm, $sz, $arr[0], $arr[1]\n") if ($dbg03);
                            } else {
                                $pchk = 1;
                            }
                        } else {
                            # first and second NOT DATE and TIME
                            # maybe a Windows 98 listing
                            # IMAGE001 PNG         5 031  01/02/04  16:32 image001.png
                            # MAROC    EXE    72 869 062  09/07/08  18:54 maroc.exe
                            mydie( "EEK: Handling of this just not yet done ;=((\n" );
                        }
                    }
                } else {
                    $pchk = 1;
                }
                if ($pchk) {
                    prt( "CHECK: " );
                    for (my $i = 0; $i < $ac; $i++) {
                        prt( $arr[$i].' ' );
                    }
                    prt("\n");
                }

            }
        }
    }
    set_common_folder(\@folders);
}

sub dbs_b2ks2($) {
   my ($d) = @_;
	my $oss;
	my $kss;
	my $lg = 0;
	my $ks = ($d / 1024); #// get Ks
	my $div = 1;
   if( $ks < 1024 ) {
      $div = 1;
      $oss = "KB";
   } elsif ( $ks < (1024*1024) ) {
	  $div = 1024;
      $oss = "MB";
   } elsif ( $ks < (1024*1024*1024) ) {
      $div = 1024 * 1024;
      $oss = "GB";
   } else {
      $div = 1024 * 1204 * 1240;
      $oss = "TB";
   }
   $kss = $ks / $div;
   $kss += 0.05;
   $kss *= 10;
   $lg = int($kss);
   $kss = $lg / 10;
   $kss .= '.0' if (!($kss =~ /\./));
   ###return( ($lg / 10) . " " . $oss );
   return "$kss$oss";
}

sub show_file_list() {
    my $fc = scalar @filelist;
    my $so = '';
    ###@sortlist = sort mycmp_decend @filelist;
    if ($sort_decend) {
        @sortlist = sort mycmp_decend2 @filelist;
        $so = 'decending';
    } else {
        @sortlist = sort mycmp_ascend2 @filelist;
        $so = 'ascending';
    }
    my $fcs = scalar @sortlist;
    if ($verbosity) {
        prt( "Got $fc files...");
        prt( " spaced mxsz=$maxsize, mxnm=$maxname...") if ($evenspace);
        prt( " sorted $so..." );
        prt( sprintf(" %s repos...", ($show_repo ? "including" : "excluding")) );
        # prt( "Got $fcs sorted files ...\n" );
        prt("\n");
    }
    my $msg = '';
    my $skipped = 0;
    my $total_size = 0;
    my $repo_size = 0;
    my ($nm,$sz,$i,$reldir,$isrep,$csz,$cnm,$isrepo,$bgn,$lstcnt);
    $bgn = 0;
    if ($max_count != -1) {
        if ($max_count < $fcs) {
            $bgn = $fcs - $max_count;
        }
    }
    $loadlog = 1 if ($fcs - $bgn> $maxlines);	# if greater than ??? lines, $loadout in notepad ...
    $lstcnt = 0;
    for ($i = $bgn; $i < $fcs; $i++) {
        $reldir = remdir( $sortlist[$i][3] );
        $isrepo = $sortlist[$i][6];
        #                   0    1    2    3            4        5       6
        # push(@filelist, [$ft, $nm, $sz, $act_folder, $arr[0], $arr[1], $isrepo]);
        $sz = $sortlist[$i][2];
        $total_size += $sz;
        $csz = get_nn($sz);
        $nm = $sortlist[$i][1];
        $cnm = $nm;
        if ($list_only) {
            $msg = $reldir;
            $msg .= "\\" if !(($msg =~ /(\\|\/)$/)||($nm =~ /^(\\|\/)/));
            $msg .= $nm;
        } else {
            if ($evenspace) {
                $csz = ' ' . $csz while (length($csz) < $maxsize);
                $cnm = ' ' . $cnm while (length($cnm) < $maxname);
                $msg = $sortlist[$i][4] . ' ' . $sortlist[$i][5] . ' ' . $csz . ' ' . $cnm . ' ' . $reldir;
            } else {
                $msg = $sortlist[$i][4] . ' ' . $sortlist[$i][5] . ' ' . $sortlist[$i][1] . ' ' . $reldir;
            }
        }
        if ($show_repo) {
            prt( "$msg\n" );
            $lstcnt++;
        } elsif (!$isrepo) {
            prt( "$msg\n" );
            $lstcnt++;
        } else {
            $skipped++;
            $repo_size += $sz;
        }
    }
    $sz = $total_size - $repo_size;
    $csz = dbs_b2ks2($sz);
    if ($verbosity) {
        prt("Listed $lstcnt of $fcs files, $sz ($csz) bytes, in size order $so.\n");
        prt("Skipped $skipped files, $repo_size (".dbs_b2ks2($repo_size).") bytes, in repository folders.\n") if ($skipped);
    }
}

sub process_in_files($) {
    my ($ra) = @_;
    my $fcnt = scalar @{$ra};
    my ($file);
    prt("Processing $fcnt files...\n") if ($verbosity);
    foreach $file (@{$ra}) {
        process_in_file($file);
    }    
}

parse_args(@ARGV);
process_in_files(\@inp_files);
show_file_list();
pgm_exit(0,"Normal exit 0");

#############################
sub give_help {
    prt_version();
    prt("Usages: $pgmname [options] file1 [file2, file3, ...]\n");
    prt("where 'file' is the directory listing file to process.\n");
    prt("Options:\n");
    prt(" -h (or -?) = This help, and exit 0\n");
    prt(" -c <dec>   = Set maximum output count.\n");
    prt(" -i[nvert]  = Invert sort order.\n");
    prt(" -ll[+|-]   = Load log at end.\n");
    prt(" -listonly  = List the full file only.\n");
    prt(" -showrepo  = Show file in repository directories as well.\n");
    prt(" -v[+|-]    = Increase, or decrease (-) verbosity.\n");
}

# only handle the -v[vv...][+|-] parameter
sub parse_arg_v {
    my (@av) = @_;
    my ($arg,$sarg,$len,$ch,$i,$hadp);
    $hadp = 0;
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            $len = length($sarg);
            if ($sarg =~ /^v/i) {
                #prt("Verb: [$sarg] ($len)\n");
                for ($i = 0; $i < $len; $i++) {
                    $ch = substr($sarg,$i,1);
                    if ($ch =~ /v/i) {
                        $verbosity++;
                    } elsif ($ch eq '+') {
                        if ($hadp) {
                            $verbosity++;
                        }
                        $hadp++;
                    } elsif ($ch eq '-') {
                        $verbosity-- if ($verbosity);
                    } else {
                        pgm_exit(1,"ERROR: Unknown argument [$arg]! -v can only be followed by 'v', '+' or '-'! Try -h for help.\n");
                    }
                }
                prt("Set verbosity to $verbosity.\n") if ($verbosity);
            }
        }
        shift @av;
    }
}

sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"Argument $arg requires follwoing count! Try -h for help.\n")
        if (!@av);
}

# parse the command line items...
sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg,$ch,$len,$tmp,$full);
    parse_arg_v(@av);
    $tmp = scalar @av;
    prt("Got $tmp arguments...\n") if ($verbosity > 1);
    $full = '';
    while (@av) {
        $arg = $av[0];
        $full .= ' ' if (length($full));
        $full .= $arg;
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            $ch = substr($sarg,0,1);
            $len = length($sarg);
            if (($ch eq 'h')||($ch eq '?')) {
                prt("Give help, and exit.\n");
                give_help();
                pgm_exit(0,"Help exit 0");
            } elsif ($sarg =~ /^ll/i) {
                $tmp = $loadlog;
                if ($len > 2) {
                    $ch = substr($sarg,2,1);
                    if ($ch eq '-') {
                        $loadlog = 0;
                    } elsif ($ch eq '+') {
                        $loadlog = 1;
                    } else {
                        pgm_exit(1,"ERROR: Unknown argument [$arg]! -l can only be followed by '+' or '-'! Try -h for help.\n");
                    }
                } else {
                    $loadlog = 1;
                }
                if ($tmp != $loadlog) {
                    prt(sprintf("Set load log %s\n", ($loadlog ? "On" : "Off"))) if ($verbosity);
                }
            } elsif ($sarg =~ /^listonly$/i) {
                $list_only = 1;
                prt("Set to output list only\n") if ($verbosity);
            } elsif ($sarg =~ /^showrepo$/i) {
                $show_repo = 1;
                prt("Set to include repository files.\n") if ($verbosity);
            } elsif ($sarg =~ /^i/i) {
                $sort_decend = 1;
                prt("Set to invert sort order.\n") if ($verbosity);
            } elsif ($sarg =~ /^c/i) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $full .= " $sarg";
                if ($sarg =~ /^\d+$/) {
                    $max_count = $sarg;
                    if ($max_count) {
                        prt("Set max output count to $max_count.\n") if ($verbosity);
                    } else {
                        pgm_exit(1,"ERROR: Argument [$arg] has to be followed by decimal count GT 0! Not [$sarg].\n");
                    }
                } else {
                    pgm_exit(1,"ERROR: Argument [$arg] has to be followed by decimal count! Not [$sarg].\n");
                }
            } elsif ($sarg =~ /^v/i) {
                # already done
            } else {
                pgm_exit(1,"ERROR: Unknown argument [$arg]! Try -h for help.\n");
            }
        } else {
            # $inp_file = $arg;
            push(@inp_files,$arg);
            prt("Adding [$arg] to file list.\n") if ($verbosity);
        }
        shift @av;
    }
    if (!@inp_files && (-f $inp_file)) {
        push(@inp_files,$inp_file);
        prt("Adding default [$inp_file] to file list.\n") if ($verbosity);
    }
    prt("ARGS:[$full]\n") if ($verbosity > 2);
}

# eof - dirbysize.pl
