#!/perl -w
# NAME: countword.pl
# AIM: Read ALL source files, counting the total lines, and count how many times
# a particular 'word' occurs in that source. Avoids certain BINARY files.
# In this case the word is 'const' ...
# 31/10/2007 geoff mclane - http://geoffair.net/mperl
use strict;
use warnings;
use File::Basename;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
	my @tmpsp = split(/\\/,$pgmname);
	$pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );

my $base_folder = "C:\\FGCVS\\";
my @in_folders = qw( asterisk
CMake
expat
fgcomtar
FileZilla
FlightGear/source
fltk
fltk-1.1
freeglut
glut-3.7.6
iaxclient
libidn
libpcap
libpri
mozilla
netpbm
NSIS
openal
OpenSceneGraph
OpenThreads
openvpngui
openvpntrun
OTcvs
plib
portaudio
Producer
pthreads
regex
SimGear
swig
tcpdump
TerraGear
tidy
tidywrap
unxutils
winmerge
xiph
xmlrpc++
xmlrpc-c
xmprpc-java
zaptel
zlib-1.2.3 );

my $in_folder = '';
my $find = 'const';
###my $out_file = "templines.txt";
my $out_file = "templine2.txt";

my @avoid = qw( .obj .lib .exe .dll .pdb .pch .ilk .ncb .plg .opt .idb .suo .manifest .exp .res .dep .htm .user .html .ac .in .m4 );
my @avoid2 = qw( .asm .xul .swg .tcl .w32 .tmpl .3 .xml .m .config .pm .lua .dsp .pike .README .min .info .php4 .programming .cs .h-msvc8 .php .mcp .rb .i .sgml .log .nb .idl .wat .conf .tbl .h-vms .src .dpr .charset .dll .0 .txt .make .win32 .tag .ds .cmake .js .trimedia .cmd .mm .ref .xhtml .h-msvc6 .tex .s .sh .cgi .o2 .wwtpl .py .h-msvc71 .tpu .tst .csv .csh=1 .ami .man .orig .xs .mk .xpm .patch .sym .pas .vcb .txt .dos .skl .dox .sub .wn .ps .1 .o9 .java .sda .cp .map .lisp .nonportable .perl .texi .ss .sed .diff .flex .l .current .nasm .w32 .my .y .tab .pl .mod .am .jar .commands .ch .properties .expected-xml .api .fl .vc5 .inc .hin .tar .dat .scm .rtf .doc );

my @included = qw( .hxx .cxx .h .c .hh .hpp .h .inl .cpp .cc .c++ );

my $recursive = 1;
my $exclude2 = 1;

# debug
my $dbg1 = 0;
my $dbg2 = 0;
my $verb3 = 0;
# variables
my $word_count = 0;
my $file_count = 0;
my $line_count = 0;
my $tword_count = 0;
my $tfile_count = 0;
my $tline_count = 0;
my @results = ();
my @keepline = ();
my $pct = 0;
my $pct100 = 0;
my $inf = '';
foreach $inf (@in_folders) {
	$in_folder = $base_folder . $inf;
	prt( "Project: $inf:\n" );
	$tword_count += $word_count;
	$tfile_count += $file_count;
	$tline_count += $line_count;
	$word_count = 0;
	$file_count = 0;
	$line_count = 0;
	process_folder( $in_folder );
	prt( "Processed $file_count files, $line_count lines, for $word_count finds of $find ...\n" );
	$pct = 0;
	if ($word_count && $line_count) {
		$pct = (($word_count * 100) / $line_count);
		$pct100 = int($pct * 100);
		$pct = $pct100 / 100;
		prt( "That is $pct% of the lines ...\n" );
	}
	push(@results, [$pct, $inf, $file_count, $line_count, $word_count] );
}
prt( "Processed $tfile_count files, $tline_count lines, for $tword_count finds of $find ...\n" );
if ($tword_count && $tline_count) {
	$pct = (($tword_count * 100) / $tline_count);
	$pct100 = int($pct * 100);
	$pct = $pct100 / 100;
	prt( "That is $pct% of the lines AVERAGE ...\n" );
}
my @sresults = sort mycmp_decend @results;
my $rescnt = scalar @sresults;
prt( "Pct  Project      Files   Lines  Finds\n" );
for (my $i = 0; $i < $rescnt; $i++) {
	#push(@results, [$pct, $inf, $file_count, $line_count, $word_count] );
	$pct = $sresults[$i][0];
	$inf = $sresults[$i][1];
	$file_count = $sresults[$i][2];
	$line_count = $sresults[$i][3];
	$word_count = $sresults[$i][4];
	while (length($pct) < 5) {
		$pct .= ' ';
	}
	while (length($inf) < 15 ) {
		$inf .= ' ';
	}
	while (length($file_count) < 6 ) {
		$file_count .= ' ';
	}
	while (length($line_count) < 7 ) {
		$line_count .= ' ';
	}
	while (length($word_count) < 6 ) {
		$word_count .= ' ';
	}
	prt( "$pct $inf $file_count $line_count $word_count\n" );
}

write2file(join('',@keepline), $out_file);
close_log($outfile,1);
exit(0);

sub mycmp_decend {
   if (${$a}[0] < ${$b}[0]) {
      prt( "+[".${$a}[0]."] < [".${$b}[0]."]\n" ) if $verb3;
      return 1;
   }
   if (${$a}[0] > ${$b}[0]) {
      prt( "-[".${$a}[0]."] < [".${$b}[0]."]\n" ) if $verb3;
      return -1;
   }
   prt( "=[".${$a}[0]."] = [".${$b}[0]."]\n" ) if $verb3;
   return 0;
}


sub in_avoid {
	my ($ext) = shift;
	my $lcext = lc($ext);
	foreach my $tst (@avoid) {
		if ($tst eq $lcext) {
			return 1;
		}
	}
	if ($exclude2) {	# more excluded
		foreach my $tst (@avoid2) {
			if ($tst eq $lcext) {
				return 1;
			}
		}
	}
	return 0;
}

sub process_file {
	my ($inf) = shift;
	my $len = length($find);
	prt( "Processing $inf file ...\n" ) if ($dbg2);
	if (open FIL, "<$inf") {
		my @lines = <FIL>;
		close FIL;
		$file_count++;
		my $pline = '';
		my $dnfil = 0;
		foreach my $line (@lines) {
			$line_count++;
			###if ($line =~ /$find/) {
			if ($line =~ /\W+$find\W+/) {
				push(@keepline,"FILE: $inf\n") if (!$dnfil);
				$dnfil = 1;
				push(@keepline,$line);
				my $mline = $line;
				my $ind = index($mline, $find);
				while ($ind != -1) {
					$word_count++;	# bump the WORD count
					$mline = substr($mline, ($ind + $len));
					$ind = index($mline, $find);
				}
			}
			$pline = $line;
		}
	} else {
		prt( "ERROR: FAILED TO OPEN [$inf] ... $! ...\n" );
	}
}

sub process_folder {
	my ($inf) = shift;
	prt( "Processing $inf folder ...\n" ) if ($dbg1);
	my ($nm, $dir, $ext);
	my @flds = ();
	if ( opendir( DIR, $inf ) ) {
		my @files = readdir(DIR);
		closedir DIR;
		my $hfcnt = 0;
		foreach my $fl (@files) {
			if (($fl eq '.') || ($fl eq '..')) {
				next;
			}
			($nm, $dir, $ext) = fileparse( $fl, qr/\.[^.]*/ );
			my $ff = $inf;
			$ff .= "\\";
			$ff .= $fl;
			if (-d $ff) {
				if ($recursive) {
					my $lcfl = lc($fl);
					if ( ($lcfl eq '.svn') || ($lcfl eq 'cvs') ) {
						# skip
					} else {
						push(@flds, $ff);
					}
				}
			} else {
				if ( !in_avoid($ext) ) {
					process_file( $ff );
				}
			}
		}
	}
	foreach $dir (@flds) {
		process_folder($dir);
	}
}

# eof - countwords.pl
