#!/perl -w
# NAME: extractwords.pl
# AIM: read a file, and extract the word within ...
# 12/01/2008 - geoff mclane - http://geoffair.net/mperl
use strict;
use warnings;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
	my @tmpsp = split(/\\/,$pgmname);
	$pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );

my %wordlist = ();
my $in_file = 'C:\Program Files\Microsoft Platform SDK for Windows Server 2003 R2\include\windows.h';
my $maxwrap = 8;
my $ignorecomm = 1;
my $ignerror = 1;

process_file($in_file);
show_words();

close_log($outfile,1);
exit(0);

sub is_hex_numb {
	my ($txt) = shift;
	if ($txt =~ /^0X/i) {
		$txt = substr($txt,2);
	}
	my $tl = length($txt);
	my ($t, $c);
	for ($t = 0; $t < $tl; $t++) {
		$c = substr($txt,$t,1);
		if ( !(($c =~ /\d/)||($c =~ /[A-F]/i)) ) {
			return 0;
		}
	}
	return 1;
}

# isallnums
sub is_all_nums {
	my ($txt) = shift;
	my $tl = length($txt);
	my ($t, $c);
	for ($t = 0; $t < $tl; $t++) {
		$c = substr($txt,$t,1);
		if ( !($c =~ /\d/) ) {
			return 0;
		}
	}
	return 1;
}


sub add_word {
	my ($wd) = shift;
	if ((length($wd) > 1) &&
		!is_all_nums($wd) &&
		!is_hex_numb($wd) ) {
		if (defined $wordlist{$wd}) {
			$wordlist{$wd}++;
		} else {
			$wordlist{$wd} = 1;
		}
	}
}

sub process_directive {
	my ($ln, $dr) = @_;
	my ($ind, $dl, $ll, $tag, $ch, $i);
	if ($ignerror) {
		#if ($ln =~ /^\s*#\s*error\s+/) {
		if ($dr eq 'error') {
			add_word('error');
			return;
	    }
    }
	$dl = length($dr);
	$ind = index($ln, $dr);
	$tag = '';
	if ($ind > 0) {
		$ln = substr($ln,$ind+$dl);
	}
	$ll = length($ln);
	for ($i = 0; $i < $ll; $i++) {
		$ch = substr($ln,$i,1);
		if ($ch =~ /\w/) {
			$tag .= $ch;
		} else {
			add_word($tag) if length($tag);
			$tag = '';
		}
	}
}


sub process_file {
	my ($fil) = shift;
	my (@lines, $lc, $line, $i, $ll, $ch, $pch, $word, $incomm);
	if (open INF, "<$fil") {
		@lines = <INF>;
		close INF;
		$lc = scalar @lines;
		prt( "Processing $lc lines from $fil ...\n" );
		$word = '';
		$incomm = 0;
		foreach $line (@lines) {
			if ( !$incomm && ($line =~ /^\s*#\s*(\w+)\s+/)) {
				process_directive($line, $1);
				next;
			}
			$ll = length($line);
			for ($i = 0; $i < $ll; $i++) {
				$ch = substr($line,$i,1);
				if ($ignorecomm && $incomm) {
					if (($pch eq '*') && ($ch eq '/')) {
						$incomm = 0;
					}
					$pch = $ch;
					next;
				}
				if ($ch =~ /\w/) {
					$word .= $ch;
				} else {
					add_word($word) if length($word);
					$word = '';
					if ($ignorecomm) {
						if (($ch eq '*')&&
							($pch eq '/')) {
							$incomm = 1;
						} elsif (($ch eq '/')&&
							($pch eq '/')) {
							$ch = ' ';
							$i = $ll;
						}
					}
				}
				$pch = $ch;
			}
		}
		add_word($word) if length($word);
	} else {
		prt( "WARNING: Failed to OPEN file [$fil] ...\n" );
	}
}

sub show_words {
	my ($wd, $cnt, $tot, $wrap, $wcnt);
	$tot = 0;
	$wrap = 0;
	$wcnt = scalar keys(%wordlist);
	prt( "Output of $wcnt words found ...\n" );
	foreach $wd (keys %wordlist) {
		$cnt = $wordlist{$wd};
		$tot += $cnt;
		prt( "$wd " );
		$wrap++;
		if ($wrap > $maxwrap) {
			prt("\n");
			$wrap = 0;
		}
	}
	prt("\n") if ($wrap);
	prt( "Done $wcnt, $tot total words ...\n" );
}

# eof - extractwords.pl
