#!/perl -w
# NAME: cleandiff02.pl
# AIM: To remove some things from an input diff file ...
# 1 - remove Only in blah blah ...
# 2 - remove diff of .dsw, .dsp, .sln, .vcproj files ...
# and write results to an output file
# 10/12/2011 - If VERB, output a changed file list
# 17/11/2011 - Add -o <file>, to set the OUTPUT of the cleaned diff
# 29/06/2011 - Add -x <file>, to exclude a specific 'file' from the diff
# 26/05/2011 - Improve UI
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use Cwd;
my $perl_dir = 'C:\GTools\perl';
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl'!\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $perl_dir."\\temp.$pgmname.txt";
open_log($outfile);

# features
my $load_log = 0;   # load log file at end
my $keep_only_in = 0;
my $keep_bin_files = 0;

# program variables
my $in_file  = '';
my $out_file = $perl_dir.'\tempclean.txt';
my @user_xcludes = ();
my @xclude_dirs = ();
my $verbosity = 0;
my $in_input_file = 0;

my @changed_files = ();

sub VERB1() { return ($verbosity >= 1); }
sub VERB2() { return ($verbosity >= 2); }
sub VERB5() { return ($verbosity >= 5); }
sub VERB9() { return ($verbosity >= 9); }

my $dbg_mww = 0;
sub match_with_wild($$) {
    my ($fil1,$fil2) = @_;
    my $len1 = length($fil1);
    my $len2 = length($fil2);
    prt("match_with_wild: [$fil1] [$fil2] ") if ($dbg_mww);
    my ($i,$j,$c1,$c2);
    $i = 0;
    $j = 0;
    if (($len1 > 0) && ($len2 > 0)) {
        while (($i < $len1)&&($j < $len2)) {
            $c1 = substr($fil1,$i,1);
            $c2 = substr($fil2,$j,1);
            if (($c1 eq $c2)||($c1 eq '?')||($c2 eq '?')) {
                $i++;
                $j++;
                prt("$c1= ") if ($dbg_mww);
            } elsif ($c2 eq '*') {
                $i++; # any $c1 matches asterick
                if (($j + 1) < $len2) {
                    # but if more, maybe time to step past '*'
                    $c2 = substr($fil2,($j+1),1);
                    if ($c1 eq $c2) {
                        $j += 2;
                    }
                }
                prt("$c1* ") if ($dbg_mww);
            } elsif ($c1 eq '*') {
                $j++; # any $c2 matches asterick
                if (($i + 1) < $len1) {
                    # but if more, maybe time to step past '*'
                    $c1 = substr($fil1,($i+1),1);
                    if ($c1 eq $c2) {
                        $i += 2;
                    }
                }
                prt("$c2* ") if ($dbg_mww);
            } else {
                prt(" = 0 - [$c1] ne [$c2]\n") if ($dbg_mww);
                return 0;
            }
        }
        if (($i == $len1)&&($j == $len2)) {
            prt(" = 1 - both ran out of chars\n") if ($dbg_mww);
            return 1; # both ran out of chars
        } elsif (($i == $len1)&&($c2 eq '*')&&(($j + 1) == $len2)){
            prt(" = 1 - first ran out and last is second $c2\n") if ($dbg_mww);
            return 1; # first ran out, and second is last '*'
        } elsif (($j == $len2)&&($c1 eq '*')&&(($i + 1) == $len1)){
            prt(" = 1 - second ran out and last of first is $c1\n") if ($dbg_mww);
            return 1; # second ran out, and second is last '*'
        } 
        prt(" = 0 - failed - no case\n") if ($dbg_mww);
    } elsif ($len1 > 0) {
        # 2nd is nul
        if ($fil1 eq '*') {
            prt(" = 1 - asterix matches nul\n") if ($dbg_mww);
            return 1; # nul matches asterix
        }
        prt(" = 0 - len1 > 0, but [$fil1]\n") if ($dbg_mww);
    } elsif ($len2 > 0) {
        # 1st is nul
        if ($fil2 eq '*') {
            prt(" = 1 - nul match asterix\n") if ($dbg_mww);
            return 1; # nul matches asterix
        }
        prt(" = 0 - len2 > 0, but [$fil1]\n") if ($dbg_mww);
    } else {
        prt(" = 0 - no case\n") if ($dbg_mww);
    }
    return 0;
}

sub matches_wild($$) {
    my ($fil,$wild) = @_;
	my ($n1,$d1,$e1) = fileparse( $fil, qr/\.[^.]*/ );
	my ($n2,$d2,$e2) = fileparse( $wild, qr/\.[^.]*/ );
    my $lcn1 = lc($n1);
    my $lcn2 = lc($n2);
    # strip . from extension
    $e1 =~ s/^\.//;
    $e2 =~ s/^\.//;
    my $lce1 = lc($e1);
    my $lce2 = lc($e2);
    prt("[v9] matches_wild: [$n1] [$n2] and [$e1] [$e2]\n") if (VERB9());
    return 1 if (($lcn1 eq $lcn2)&&($lce1 eq $lce2));
    return 2 if (($lcn1 eq $lcn2)&&($lce2 eq '*'));
    return 3 if (($lcn2 eq '*')&&($lce1 eq $lce2));
    return 4 if (match_with_wild($lcn1,$lcn2) && match_with_wild($lce1,$lce2));
    return 0;
}

sub has_wild($) {
    my $txt = shift;
    my $len = length($txt);
    my ($i,$c);
    for ($i = 0; $i < $len; $i++) {
        $c = substr($txt,$i,1);
        if (($c eq '?')||($c eq '*')) {
            return 1;
        }
    }
    return 0;
}

sub in_user_excludes($) {
    my ($fil) = shift;
    my ($nam,$dir) = fileparse($fil);
    my ($item);
    my $hw = has_wild($fil);
    foreach $item (@user_xcludes) {
        return 1 if ($nam eq $item);
        if ($hw || has_wild($item)) {
            return 2 if (matches_wild($item,$nam));
        }
    }
    return 0;
}

sub in_dir_excludes($) {
    my ($tx) = @_;
    my ($dir);
    foreach $dir (@xclude_dirs) {
        if ($tx =~ /(\\|\/)$dir(\\|\/)/) {
            return 1;
        }
    }
    return 0;
}

# debug
my $dbg_01 = 0; # show every line
my @warnings = ();

sub prtw($) {
    my $txt = shift;
    $txt =~ s/\n$//;
    prt("$txt\n");
    push(@warnings,$txt);
}

sub show_warnings($) {
    my ($dbg) = shift;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS ...\n" );
        foreach my $line (@warnings) {
            prt("$line\n" );
        }
        prt("\n");
    } elsif ($dbg) {
        #prt("\nNo warnings issued.\n\n");
    }
}


sub pgm_exit($$) {
    my ($val,$msg) = @_;
    show_warnings($val);
    prt( "$msg" ) if (length($msg));
    close_log($outfile,$load_log);
    exit($val);
}

sub give_help {
	prt( "Brief HELP for $pgmname script ...\n" );
	prt( "$pgmname [options] -in:input_file -out:output_file\n" );
    prt( "options:\n" );
    prt( " -ko       = Keep 'Only in ...' lines, and add to end.\n" );
    prt( " -ll       = Load log at end.\n" );
    prt( " -v[n]     = Bump or set verbosity. Def=$verbosity.\n");
    prt( " -x <file> = Exclude this file from the cleaned diff.\n");
    prt( " -d <dir>  = Exclude files with this folder from the cleaned diff.\n");
    prt( " -o <file> = Set the OUTPUT cleaned diff file.\n");
	prt( "Defaults: in:$in_file, out:$out_file\n" );
    prt( "Will exit if error found in command!\n" );
}

sub is_excluded_file($) {
    my ($tx) = @_;
    # exclude MSVC make files
    return 1 if ($tx =~ /\.dsp$/i);
    return 2 if ($tx =~ /\.dsw$/i);
    return 3 if ($tx =~ /\.sln$/i);
    return 4 if ($tx =~ /\.vcproj$/i);
    # exclude SVN, CVS, git directory items
    return 5 if ($tx =~ /(\\|\/)\.svn(\\|\/)/);
    return 6 if ($tx =~ /(\\|\/)CVS(\\|\/)/);
    return 7 if ($tx =~ /(\\|\/)\.git(\\|\/)/);
    # user file exclude
    return 8 if (in_user_excludes($tx));
    # user directory exclude
    return 9 if (in_dir_excludes($tx));
    return 0;
}

sub get_diff_files($$$) {
    my ($lin,$rfil1,$rfil2) = @_;
    #my @arr = split(/s+/,$lin);
    my @arr = space_split($lin);
    if (scalar @arr < 2) {
        pgm_exit(1,"ERROR: Line [$lin] did NOT split correctly!\n");
    }
    my ($fil,$cnt);
    $cnt = 0;
    foreach $fil (@arr) {
        next if ($fil =~ /^-/);
        if ($cnt == 0) {
            ${$rfil1} = $fil;
            $cnt++;
        } elsif ($cnt == 1) {
            ${$rfil2} = $fil;
            $cnt++;
        }
    }
    return $cnt;
}

sub process_file($) {
	my ($fil) = @_;
	my (@lines, $line, $lncnt, $i, $txt, $lnnum);
	my @nlines = ();
    my @only = ();
    my @bins = ();
	my $newlns = 0;
    my ($res,$proj,$fil1,$fil2,$sline,$cnt);
    my ($name,$dir);
    $proj = '';
	if (open INF, "<$fil") {
		@lines = <INF>;
		close INF;
		$lncnt = scalar @lines;
		prt( "Process $lncnt lines from $fil ...\n" );
		$lnnum = 0;
		for ($i = 0; $i < $lncnt; $i++) {
			$line = $lines[$i];
			chomp $line;
			$lnnum = $i + 1;
            prt("$lnnum: [$line]\n") if ($dbg_01);
            if ($line =~ /^diff\s+.+/) {
			#if ( $line =~ /^diff\s+(.+)\s+(.+)$/ ) {
                $sline = $line;
                $sline =~ s/^diff\s+//;
                $fil1 = '';
                $fil2 = '';
                $cnt = get_diff_files($sline,\$fil1,\$fil2);
                if (($cnt != 2)||(length($fil1)==0)) {
                   prtw("WARNING: get_diff_files:1: FAILED on [$sline] $cnt f1=[$fil1] f2=[$fil2]\n");
                }
                #$res = is_excluded_file($line);
                $res = is_excluded_file($fil1);
                prt("[v5]$lnnum: $line [$fil1] ($res) 1\n") if (VERB5());
                if ($res) {
                    $i++;
                    # stay here, eating lines, until next NOT excluded item
                    for (; $i < $lncnt; $i++) {
                        $line = $lines[$i];
                        chomp $line;
            			$lnnum = $i + 1;
                        if ($line =~ /^diff\s+.+/) {
            			#if ( $line =~ /^diff\s+(.+)\s+(.+)$/ ) {
                            $sline = $line;
                            $sline =~ s/^diff\s+//;
                            $fil1 = '';
                            $fil2 = '';
                            $cnt = get_diff_files($sline,\$fil1,\$fil2);
                            if (($cnt != 2)||(length($fil1)==0)) {
                               prtw("WARNING: get_diff_files:2: FAILED on [$sline] $cnt f1=[$fil1] f2=[$fil2]\n");
                            }
                            #$res = is_excluded_file($line);
                            $res = is_excluded_file($fil1);
                            prt("[v5]$lnnum: $line [$fil1] ($res) 2\n") if (VERB5());
                            last if ( !$res );
                        }
                    }
                    last if ($i == $lncnt);
                }
                if (length($fil1)) {
                    ($name,$dir) = fileparse($fil1);
                    push(@changed_files,[$name,$fil1,$fil2]);
                }
			} elsif ($line =~ /^Only\s+in\s+/) {
                if ($keep_only_in) {
                    $res = is_excluded_file($line);
                    push(@only,$line) if ($res < 5);   # EXCLUDE \.git\, \.svn\, etc... lines
                }
                next;
			} elsif ($line =~ /^\s*Binary\s+files\s+/) {
                if ($keep_bin_files) {
                    $res = is_excluded_file($line);
                    push(@bins,$line) if ($res < 5);   # EXCLUDE \.git\, \.svn\, etc... lines
                }
                next;
            } elsif ($line =~ /^=================(=+)\s(\w+)/) {
                $proj = $2;
                if ($keep_only_in) {
                    push(@only,$line);
                }
            }
			push(@nlines,$line);
			$newlns++;
		}
        if ($keep_only_in) {
            push(@nlines, @only);
        }
        if ($keep_bin_files) {
            push(@nlines, @bins);
        }
	} else {
		prt( "ERROR: Unable to OPEN/READ file $fil ... check name, location...\n" );
	}
    return \@nlines;    # return the array reference
}

sub write_out_lines($$) {
    my ($rnls,$out) = @_;
    my $cnt = scalar @{$rnls};
    if ($cnt) {
        my $txt = join("\n",@{$rnls});
        $txt .= "\n";
        write2file($txt,$out);
        prt("Written $cnt lines to [$out]...\n" );
    }
}

sub show_changed_files()
{
    my $cnt = scalar @changed_files;
    prt("Cleaned diff contains $cnt changed files...\n");
    if ($cnt && VERB1()) {
        my ($i,$name,$fil1,$fil2);
        for ($i = 0; $i < $cnt; $i++) {
            $name = $changed_files[$i][0];
            $fil1 = $changed_files[$i][1];
            $fil2 = $changed_files[$i][2];
            prt(" $fil1\n");
        }
    }
}

# =======================================
# MAIN

parse_args(@ARGV);
my $ref_lines = process_file($in_file);
write_out_lines($ref_lines,$out_file);
show_changed_files();
pgm_exit(0,"");

# =======================================

sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: Arg [$arg] must be followed by a 2nd argument!\n") if (!@av);
}

sub local_strip_both_quotes($) {
    my $txt = shift;
    if ($txt =~ /^'(.+)'$/) {
        return $1;
    }
    if ($txt =~ /^"(.+)"$/) {
        return $1;
    }
    return '' if ($txt eq '""');
    return '' if ($txt eq "''");
    #prt("Stripping [$txt] FAILED\n");
    return $txt;
}

sub load_input_file($$) {
    my ($arg,$file) = @_;
    if (open INF, "<$file") {
        my @lines = <INF>;
        close INF;
        my @carr = ();
        my ($line,@arr,$tmp,$i);
        my $lncnt = scalar @lines;
        for ($i = 0; $i < $lncnt; $i++) {
            $line = $lines[$i];
            $line = trim_all($line);
            next if (length($line) == 0);
            next if ($line =~ /^#/);
            while (($line =~ /\\$/)&&(($i+1) < $lncnt)) {
                $i++;
                $line =~ s/\\$//;
                $line .= trim_all($lines[$i]);
            }
            #@arr = split(/\s/,$line);
            @arr = space_split($line);
            foreach $tmp (@arr) {
                $tmp = local_strip_both_quotes($tmp);
                push(@carr,$tmp);
            }
        }
        $in_input_file++;
        parse_args(@carr);
        $in_input_file--;
    } else {
        pgm_exit(1,"ERROR: Unable to 'open' file [$file]!\n")
    }
}


sub parse_args {
	my (@av) = @_;
	my ($arg,$ch,$sarg);
	while (@av) {
		$arg = $av[0];
		$ch = substr($arg,0,1);
		if ($arg =~ /\?/) {
			give_help();
        	pgm_exit(0,"Help exit\n");
		} elsif (($ch eq '-')||($ch eq '/')) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
			#$val = substr($arg,1);
            if ($sarg =~ /^h/) {
                give_help();
            	pgm_exit(0,"Help exit\n");
            } elsif ($sarg =~ /^i/) {
                if ($sarg =~ /^in:(.+)$/) {
    				$in_file = substr($sarg,3);
                } else {
                    need_arg(@av);
                    shift @av;
                    $sarg = $av[0];
				    $in_file = $sarg;
                }
				prt( "Set input file to [$in_file] ...\n" );
            } elsif ($sarg =~ /^o/) {
    			if ($sarg =~ /^out:(.+)$/) {
    				$out_file = substr($sarg,4);
                } else {
                    need_arg(@av);
                    shift @av;
                    $sarg = $av[0];
			    	$out_file = $sarg;
                }
				prt( "Set output file to [$out_file] ...\n" );
            } elsif ($sarg eq 'ko') {
                $keep_only_in = 1;
				prt( "Set to keep 'Only in ' lines...\n" );
            } elsif ($sarg eq 'kb') {
                $keep_bin_files = 1;
                prt("Set to keep 'Binary files ' lines...\n");
            } elsif ($sarg eq 'll') {
                $load_log = 1;
				prt( "Set to load log at end...\n" );
            } elsif ($sarg =~ /^x/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                push(@user_xcludes,$sarg);
                prt("Added [$sarg] to user file excludes.\n");
            } elsif ($sarg =~ /^d/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                push(@xclude_dirs,$sarg);
                prt("Added [$sarg] to user directory excludes.\n");
            } elsif ($sarg =~ /^v/) {
                if ($sarg =~ /^v(\d+)$/) {
                    $verbosity = $1;
                } else {
                    while ($sarg =~ /^v/) {
                        $verbosity++;
                        $sarg = substr($sarg,1);
                    }
                }
                prt("Set verbosity to [$verbosity]\n");
            } elsif ($sarg =~ /^\@/) {
                $sarg = substr($sarg,1);
                prt("Loading from input file [$sarg]\n");
                load_input_file($arg,$sarg);
            } elsif ($sarg =~ /^o/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $out_file = $sarg;
                prt("Set the output file to [$out_file].\n");
            } else {
				prt( "ERROR: Unknown argument [$arg]!\n" );
				give_help();
            	pgm_exit(1,"Error exit\n");
			}
		} else {
            # assume a bare argument in the INPUT file
    		$in_file = $arg;
			prt( "Set input file to [$in_file] ...\n" );
		}
        shift @av;
	}

    if ($in_input_file == 0) {
        if (length($in_file) == 0) {
            prt( "ERROR: NO input file found in command! Aborting...\n" );
            pgm_exit(1,"No file exit\n");
        } elsif ( ! -f $in_file) {
            prt( "ERROR: Unable to locate [$in_file] file! Aborting...\n" );
            pgm_exit(1,"No file exit\n");
        }
    }
}

# eof - cleandiff02.pl
