#!/perl -w
# NAME: chkdupes.pl
# AIM: Read a folder, and subfolders, and check for any duplicate file names
# This is so they can all be put in one folder, if possible
# 19/11/2011 - Allow first item of two to be a single file
# 18/11/2011 - Fix bug if two folder given - src and dest
# 15/08/2011 - Update...
# 22/07/2008 geoff mclane http://geoffair.net/mperl

use strict;
use warnings;
use File::stat;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use Cwd;
my $perl_dir = 'C:\GTools\perl';
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' ...\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $perl_dir."\\temp.$pgmname.txt";
open_log($outfile);

# user variables
my $VERS = "0.0.3 2011-11-19";
my $load_log = 0;
my $in_file = '';
my $show_ext = 0;
my $max_name_wid = 65;
my $debug_on = 0;
my $def_file = 'def_file';

my $def_folder = "C:\\Users\\Public\\SAVES\\peru\\My Pictures\\Carla";	# DEFAULT, if NO command input
my $in_folder = "";
my @folder_list = ();

my @file_list = ();
my %exthash = ();
my @dup_list = ();

my $verbose = 0;

### program variables
my @warnings = ();
my $cwd = cwd();
my $os = $^O;

### forward
sub process_folder($);
sub scan_dir($$$);

sub VERB1() { return ($verbose >= 1); }
sub VERB2() { return ($verbose >= 2); }
sub VERB5() { return ($verbose >= 5); }
sub VERB9() { return ($verbose >= 9); }

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        ###prt( "\nNo warnings issued.\n\n" );
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}


sub process_folder($) {
	my ($inf) = shift;
	my @subdirs = ();
    my ($file,$name);
	if (opendir( DIR, $inf)) {
		my @files = readdir(DIR);
		closedir DIR;
		foreach $file (@files) {
			if (($file eq '.')||($file eq '..')) {
				next;
			}
			my $ff = $inf . "\\" . $file;
			if (-d $ff) {
				push(@subdirs,$ff);
			} else {
				my ($nm,$dir,$ext) = fileparse( $ff, qr/\.[^.]*/ );
                my $sb = stat($ff);
				#my ($nm,$dir) = fileparse( $ff );
				$nm = lc($nm);
				$ext = lc($ext);
				$name = $nm.$ext;
                #                  0    1      2  3  4
				push( @file_list, [$ff, $name, 0, 0, \$sb] );
				if (defined $exthash{$ext}) {
					$exthash{$ext} ++;
				} else {
					$exthash{$ext} = 1;
				}
			}
		}
		foreach my $fil (@subdirs) {
			process_folder($fil);
		}
	} else {
		prt( "ERROR: Can NOT open $inf ... $! ... \n" );
	}
}

sub do_one_folder() {
    process_folder($folder_list[0]);
    my $incnt = scalar @file_list;
    prt( "Got $incnt file items to check ...\n" );
    my $dup_cnt = 0;
    my ($ff1,$ff2,$i,$j,$name,$sb1,$sb2,$min,$len,$nn1,$nn2,$tm1,$tm2);
    for ($i = 0; $i < $incnt; $i++) {
        $name = $file_list[$i][1];
        for ($j = 0; $j < $incnt; $j++) {
            if ($i != $j) {
                if ($name eq $file_list[$j][1]) {
                    $file_list[$j][2]++;
                    $file_list[$i][2]++;
                    $file_list[$j][3] = $i;
                    $file_list[$i][3] = $j;
                }
            }
        }
    }
    $dup_cnt = 0;
    for ($i = 0; $i < $incnt; $i++) {
        $name = $file_list[$i][1];
        if ($file_list[$i][2] > 0) {
            $dup_cnt++;
        }
    }
    prt( "Got $dup_cnt duplicate names...\n" );
    $min = 0;
    for ($i = 0; $i < $incnt; $i++) {
        $name = $file_list[$i][1];
        if ($file_list[$i][2] > 0) {
            $j = $file_list[$i][3];
            $ff1 = $file_list[$i][0];
            $ff2 = $file_list[$j][0];
            $len = length($ff1);
            $min = $len if ($len > $min);
            $len = length($ff2);
            $min = $len if ($len > $min);
        }
    }
    $min = $max_name_wid if ($min > $max_name_wid);
    for ($i = 0; $i < $incnt; $i++) {
        $name = $file_list[$i][1];
        if ($file_list[$i][2] > 0) {
            #prt( "Dupe $name ...\n" );
            if (VERB5()) {
                $j = $file_list[$i][3];
                $ff1 = $file_list[$i][0];
                $ff2 = $file_list[$j][0];
                $sb1 = stat($ff1);
                $sb2 = stat($ff2);
                $nn1 = get_nn($sb1->size);
                $nn2 = get_nn($sb2->size);
                $tm1 = lu_get_YYYYMMDD_hhmmss($sb1->mtime);
                $tm2 = lu_get_YYYYMMDD_hhmmss($sb2->mtime);

                $ff1 .= ' ' while (length($ff1) < $min);
                $ff2 .= ' ' while (length($ff2) < $min);
                $nn1 = ' '.$nn1 while (length($nn1) < 12);
                $nn2 = ' '.$nn2 while (length($nn2) < 12);
                prt("$ff1 $nn1 $tm1\n");
                prt("$ff2 $nn2 $tm1\n");
            } elsif (VERB1()) {
                prt( "del \"$file_list[$i][0]\"\n" );
            }
        }
    }
    prt( "Done $dup_cnt duplicate names...\n" ) if (VERB1());
}

sub scan_dir($$$) {
	my ($ra,$inf,$lev) = @_;
    pgm_exit(1,"ERROR: scan_dir: Passed null value!\n") if (length($inf) == 0);    
	my @subdirs = ();
    my ($file,$ff,$name);
    prt("Scanning [$inf]...\n") if ($lev == 0);
	if (opendir( DIR, $inf)) {
		my @files = readdir(DIR);
		closedir DIR;
        $inf .= "\\" if ( !($inf =~ /(\\|\/)$/) );
		foreach $file (@files) {
			next if (($file eq '.')||($file eq '..'));
            $ff = $inf.$file;
			if (-d $ff) {
				push(@subdirs,$ff);
			} else {
				my ($nm,$dir,$ext) = fileparse( $ff, qr/\.[^.]*/ );
				#my ($nm,$dir) = fileparse( $ff );
				$nm = lc($nm);
				$ext = lc($ext);
				$name = $nm.$ext;
                #              0      1    2      3
				push( @{$ra}, [$file, $ff, $name, 0] );
				if (defined $exthash{$ext}) {
					$exthash{$ext} ++;
				} else {
					$exthash{$ext} = 1;
				}
			}
		}
		foreach my $fil (@subdirs) {
			scan_dir($ra,$fil,$lev+1);
		}
	} else {
		pgm_exit(1,"ERROR: Can NOT open [$inf] ... $! ... \n" );
	}
}

sub compare_lists($$) {
    my ($ra1,$ra2) = @_;    # = \@arr1,\@arr2
    my $cnt1 = scalar @{$ra1};
    my $cnt2 = scalar @{$ra2};
    prt("Comparing list 1 = $cnt1, with list 2 = $cnt2...\n");
    my ($fil1,$fil2,$nm1,$nm2,$fnd,$i,$j,$min,$len);
    #               0      1    2      3
	#push( @{$ra}, [$file, $ff, $name, 0] );
    $min = 0;
    for ($i = 0; $i < $cnt1; $i++) {
        $fil1 = ${$ra1}[$i][0];
        $len = length($fil1);
        $min = $len if ($len > $min);
    }
    for ($i = 0; $i < $cnt1; $i++) {
        $fil1 = ${$ra1}[$i][0];
        $nm1  = ${$ra1}[$i][2];
        $fnd = 0;
        for ($j = 0; $j < $cnt2; $j++) {
            $fil2 = ${$ra2}[$j][0];
            $nm2  = ${$ra2}[$j][2];
            if ($nm1 eq $nm2) {
                $fnd = 1;
                last;
            }
        }
        $fil1 .= ' ' while (length($fil1) < $min);
        if ($fnd) {
            prtw("File $fil1 is DUPLICATED in list 2!\n");
        } else {
            prt("File $fil1 NOT found in list 2!\n");
        }
    }
    #prt("WARNING: Coding NOT completed!\n");
}

sub show_extensions() {
    my $cnt = scalar keys(%exthash);
    prt("Got list of $cnt entensions...\n");
    foreach my $key (keys %exthash) {
        prt( "Extension $key occurs ".$exthash{$key}." times ...\n" );
    }
    prt("Done list of $cnt entensions...\n");
}

parse_args(@ARGV);
###prt( "$0 ... Processing $in_folder ...\n" );
if (scalar @folder_list == 1) {
    do_one_folder();    # check folder for duplicate names???
} elsif (scalar @folder_list == 2) {
    my (@arr1,@arr2);
    my $f1 = $folder_list[0];
    my $f2 = $folder_list[1];
    if (-f $f1) {
        #               0      1    2      3
	    #push( @{$ra}, [$file, $ff, $name, 0] );
        my ($nm,$dr) = fileparse($f1);
        push(@arr1,    [$nm,   $f1, lc($nm),0]);
    } elsif (-d $f1) {
        scan_dir(\@arr1,$f1,0);
    } else {
        pgm_exit(1,"First item is neither file, nor folder [$f1]!\n");
    }
    scan_dir(\@arr2,$f2,0);
    compare_lists(\@arr1,\@arr2);
}

show_extensions() if ($show_ext);
pgm_exit(0,"");

################################

sub give_help {
    prt("$pgmname: version $VERS\n");
    prt("Usage: $pgmname [options] in-folder/in-file [in-folder2]\n");
    prt("Options:\n");
    prt(" --help (-h or -?) = This help, and exit 0.\n");
    prt(" --load       (-l) = Load log at end.\n");
    prt(" --show       (-s) = Also show extension list.\n");
    prt(" --verb       (-v) = Bump verbosity.\n");
    prt("If just ONE directory given, then it will be checked for duplicate files.\n");
    prt("If TWO folders given, they will be compared, and duplicate files reported.\n");
}
sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg,$cnt);
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^l/) {
                $load_log = 1;
            } elsif ($sarg =~ /^s/) {
                $show_ext = 1;
            } elsif ($sarg =~ /^v/i) {
                if ($sarg =~ /^v(\d+)$/) {
                    $verbose = $1;
                } else {
                    while ($sarg =~ /^v/) {
                        $verbose++;
                        $sarg = substr($sarg,1);
                    }
                }
                prt("Set verbosity to $verbose\n") if (VERB1());
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = $arg;
            if ((-d $in_file)||(-f $in_file)) {
                push(@folder_list,$in_file);
                $cnt = scalar @folder_list;
                if ($cnt > 2) {
                    pgm_exit(1,"ERROR: Can only give two folders. folder [$in_file] is 3rd!\n");
                }
                prt("Set input $cnt to [$in_file]\n");
            } else {
                pgm_exit(1,"ERROR: Can NOT locate folder [$in_file]!\n");
            }
        }
        shift @av;
    }

    if ((length($in_file) ==  0) && $debug_on) {
        $in_file = $def_file;
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
    #if (! -f $in_file) {
    #    pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
    #}
}


# eof - chkdupes.pl

