#!/usr/bin/perl -w
# NAME: findinfile.pl
# AIM: Find a string in a file, using perl regex
# 07/07/2013 - Improve UI
# 31/05/2011 geoff mclane http://geoffair.net/mperl
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use File::stat;
use Cwd;
my $perl_dir = 'C:\GTools\perl';
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl ...\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $perl_dir."\\temp.$pgmname.txt";
open_log($outfile);

my $VERS = "0.0.2 2013-07-07";
###my $VERS = "0.0.1 2011-05-31";

# user variables
my $load_log = 0;
my $in_file = '';
my $find_this = '';
my @in_files = ();
my $whole_finds = 0;
my $starts_with = 0;
my $ends_with = 0;
my $out_file = '';
my $files_scanned = 0;
my $total_lines = 0;
my $total_finds = 0;

my $def_dbg_on = 0;
my $def_file = 'C:\DTEMP\explist.txt';
# my $def_find = "\\b(\\d{7})\\.(\\d+)\\b";
my $def_find = '\b(\d{7})\.(\d+)\b';

### program variables
my @warnings = ();
my $cwd = cwd();
my $os = $^O;
my $verbosity = 0;

sub VERB1() { return ($verbosity >= 1); }
sub VERB2() { return ($verbosity >= 2); }
sub VERB5() { return ($verbosity >= 5); }
sub VERB9() { return ($verbosity >= 9); }

### debug
my $dbg_01 = 0;

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        prt( "\nNo warnings issued.\n\n" ) if ($val);
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

my @found_lines = ();


sub split_words($) {
    my $txt = shift;
    my $len = length($txt);
    my $ch = substr($txt,0,1); # get FIRST
    my $inword = ($ch =~ /\w/) ? 1 : 0;
    my $tag = $ch;
    my ($i);
    my @arr = ();
    for ($i = 1; $i < $len; $i++) {
        $ch = substr($txt,$i,1);
        if ($inword) {
            if ($ch =~ /\w/) {
                $tag .= $ch; # continue accumulating the word
            } else {
                $inword = 0; # end of word
                push(@arr,$tag); # bag it
                $tag = $ch; # start not word
            }
        } else {
            if ($ch =~ /\w/) {
                push(@arr,$tag); # bag it
                $inword = 1; # beginning a word
                $tag = $ch;
            } else {
                $tag .= $ch; # continue non-word accumutation
            }
        }
    }
    push(@arr,$tag) if (length($tag)); # bag it
    return @arr;    # return WORD SPLIT array
}

sub word_split($) {
    my $line = shift;
    my @arr = ();
    my @a = space_split($line);
    my ($itm,$len);
    foreach $itm (@arr) {
        $len = length($itm);
        if ($len == 1) {
            push(@arr,$itm);
        } elsif ($itm =~ /^\w+$/) {
            push(@arr,$itm);
        } else {
            push(@arr,split_words($itm));
        }
    }
    return \@arr;
}


sub process_in_file($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    prt("Processing [$inf]...\n") if (VERB9());
    $files_scanned++;
    my ($line,$inc,$lnn,$tline,$finds,$fnd,$ra,$word,$cnt);
    $lnn = 0;
    $finds = 0;
    while (defined($line = <INF>)) {
        chomp $line;
        $lnn++;
        if (($lnn % 100000)==0) {
            prt("$lnn\n");
        }
        $tline = trim_all($line);
        next if (length($tline) == 0);
        if ($whole_finds || $starts_with || $ends_with) {
            $ra = word_split($tline);
            $cnt = scalar @{$ra};
            foreach $word (@{$ra}) {
                if ($whole_finds) {
                    $fnd = ($word =~ /^$find_this$/) ? 1 : 0;
                } elsif ($starts_with) {
                    $fnd = ($word =~ /^$find_this/) ? 1 : 0;
                } elsif ($ends_with) {
                    $fnd = ($word =~ /$find_this$/) ? 1 : 0;
                }
                last if ($fnd); # got a match
            }
        } else {
           $fnd = ($line =~ /$find_this/) ? 1 : 0;
        }
        if ($fnd) {
            prt("$lnn: $line\n") if (VERB9());
            $finds++;
            push(@found_lines, [$line, $lnn, $inf]);
            $total_finds++;
        }
    }
    close INF;
    prt("Done $lnn lines with $finds finds...\n") if (VERB5());
    $total_lines += $lnn;
}

sub process_in_files() {
    my ($file);
    foreach $file (@in_files) {
        process_in_file($file);
    }
}

sub show_finds() {
    #                    0      1     2
    #push(@found_lines, [$line, $lnn, $inf]);
    my $cnt = scalar @found_lines;
    my ($i,$line,$lnn,$inf,$ra,$clnn,$msg);
    my %h = ();
    for ($i = 0; $i < $cnt; $i++) {
        $line = $found_lines[$i][0];
        $lnn  = $found_lines[$i][1];
        $inf  = $found_lines[$i][2];
        $h{$inf} = [] if (! defined $h{$inf});
        $ra = $h{$inf};
        push(@{$ra},[$line,$lnn]);
    }
    my @arr = sort keys(%h);    # get file list
    $msg = '';
    $msg .= "Scanned $files_scanned files, $total_lines lines, for $total_finds finds.\n";
    foreach $inf (@arr) {
        $ra = $h{$inf};
        $cnt = scalar @{$ra};
        prt("File: $inf with $cnt finds...\n");
        $msg .= "File: $inf with $cnt finds...\n";
        for ($i = 0; $i < $cnt; $i++) {
            $line = ${$ra}[$i][0];
            $lnn  = ${$ra}[$i][1];
            $clnn = sprintf("%4d:",$lnn);
            prt("$clnn $line\n");
            $msg .= "$clnn $line\n";
        }
    }
    prt("Scanned $files_scanned files, $total_lines lines, for $total_finds finds.\n");
    if (length($out_file) && length($msg)) {
        write2file($msg,$out_file);
        prt("Written results to [$out_file]\n");
    }
}


#########################################
### MAIN ###
parse_args(@ARGV);
###prt( "$pgmname: in [$cwd]: Hello, World...\n" );
process_in_files();
show_finds();
pgm_exit(0,"");
########################################
sub give_help {
    prt("$pgmname: version $VERS\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help   (-h or -?) = This help, and exit 0.\n");
    prt(" --verb[n]      (-v) = Bump [or set] verbosity. def=$verbosity\n");
    prt(" --find regex   (-f) = Regex to use to FIND in the in-file\n");
    prt(" --inp file     (-i) = Treat file as a line separated list of files\n");
    prt(" --load_log     (-l) = Load log file at end.\n");
    prt(" --out file     (-o) = Output the finds to this file.\n");
    prt(" Lines beginning with other than alphanumeric, and 'NOT ' will be skipped\n");

}
sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have following argument!\n") if (!@av);
}

sub get_in_file_list($) {
    my $fil = shift;
    if (! open INF, "<$fil") {
        pgm_exit(1,"ERROR: Unable to open $fil!\n");
    }
    my @lines = <INF>;
    close INF;
    my ($line,$len);
    foreach $line (@lines) {
        chomp $line;
        $len = length($line);
        next if ($len == 0); # skip blank lines
        next if ($line =~ /^NOT\s+/); # skip begin with 'NOT '
        next if ( !($line =~ /^\w+/) ); # skip does NOT start with alphanum...
        if (-f $line) {
            push(@in_files,$line);
        } else {
            prtw("WARNING: Unable to locate file [$line]\n");
        }
    }
    my $cnt = scalar @in_files;
    return $cnt;
}



sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg,$cnt);
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^l/) {
                $load_log = 1;
                $load_log = 2 if ($sarg =~ /^ll/);
                $load_log = 3 if ($sarg =~ /^lll/);
                prt("Set to load log $load_log at end\n") if (VERB1());
            } elsif ($sarg =~ /^f/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $find_this = $sarg;
                prt("Set regex to [$find_this]\n") if (VERB1());
            } elsif ($sarg =~ /^o/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $out_file = $sarg;
                prt("Set output file to [$out_file]\n") if (VERB1());
            } elsif ($sarg =~ /^i/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                if (-f $sarg) {
                    $cnt = get_in_file_list($sarg);
                    if ($cnt) {
                        prt("Set $cnt file list from [$sarg]\n") if (VERB1());
                        $in_file = $in_files[0];
                    } else {
                        prtw("WARNING: No input files found in [$sarg]\n");
                    }

                } else {
                    pgm_exit(1,"ERROR: Can NOT find file $sarg\n");
                }
            } elsif ($sarg =~ /^v/i) {
                if ($sarg =~ /^v.*(\d+)$/) {
                    $verbosity = $1;
                } else {
                    while ($sarg =~ /^v/i) {
                        $verbosity++;
                        $sarg = substr($sarg,1);
                    }
                }
                prt("Set Verbosity = $verbosity\n") if (VERB1());
            } elsif ($sarg =~ /^w/i) {
                $whole_finds = 1;
                prt("Set whole find only\n") if (VERB1());
            } elsif ($sarg =~ /^s/i) {
                $starts_with = 1;
                prt("Set starts with\n") if (VERB1());
            } elsif ($sarg =~ /^e/i) {
                $ends_with = 1;
                prt("Set ends with\n") if (VERB1());
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = $arg;
            if (-f $in_file) {
                prt("Set input to [$in_file]\n") if (VERB1());
                push(@in_files,$in_file);
            } else {
                pgm_exit(1,"ERROR: Can NOT find file $arg\n");
            }
        }
        shift @av;
    }

    if ((length($in_file) ==  0) && $def_dbg_on) {
        $in_file = $def_file;
        $find_this = $def_find;
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
    if (length($find_this) ==  0) {
        pgm_exit(1,"ERROR: No 'find' item found in command!\n");
    }
    if (! -f $in_file) {
        pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
    }
    if ($starts_with && $ends_with) {
        $whole_finds = 1;
    }


}

# eof - findinfile.pl
