#!/Perl
print "$0: Hello, World...\n";
# AIM - scan HTML file, and ADD an alt='$fn image', if none found

my $in_dir = shift || die "ERROR: Must give input folder, or file ...\n";
my $out_file = 'tempout2.txt';
my $OH;
open $OH, ">$out_file" || die "ERROR: Can NOT create output file ... aborting ...\n";

###my $out_dir = $in_dir;
my $out_dir = 'temp2'; # use another OUT folder, until SURE no mess-up!!!
my @files = ();
my $file;
my $line;
my $filcnt = 0;
my @myfiles = ();
my @fnames = ();
if ( -f $in_dir) {
	prt( "Processing file $in_dir ...\n" );
	push(@myfiles,$in_dir);
	$file = get_file_name($in_dir);
	push(@fnames, $file);
	$in_dir = get_path_name($in_dir); 	# GET THE DIRECTORY PART
	prt( "Processing file $file ... dir=$in_dir\n" );
} else {
	prt( "Processing directory $in_dir ...\n" );
	opendir( DIR, $in_dir) || die "ERROR: Can NOT open $in_dir ... aborting ...\n";
	@files = readdir(DIR);
	closedir DIR;
	prt( "Found ".scalar @files." items in the directory ...\n" );
	foreach $file (sort @files) {
		if (($file eq '.')||($file eq '..')) {
			next;
		}
		my $ff = $in_dir . '/' . $file;
		if ( -d $ff ) {
			#prt( "Ignore Directory $file ...\n");
		} else {
			if (is_my_file($file)) {
				push(@myfiles,$ff);
				push(@fnames,$file);
				$filcnt++;
			} else {
				#prt( "IGNORE $file ...\n" );
			}
		}
	}
}

prt( "Found ".scalar @myfiles." files to scan ...\n");
$filcnt = 0;
my @diff_files = ();
foreach $file (@myfiles) {
	my $on = $out_dir . '/' . $fnames[$filcnt];
	prt( "IN=$file OUT=$on\n" );
	$filcnt++;
	open $IF, "<$file" or die "Can not OPEN $file! ... aborting ...\n";
	my @lines = <$IF>; # slurp whole file, to an array of lines
	close($IF);
	# process, line by line
	my $lc = 0;
	my $diff_cnt = 0;
	foreach $line (@lines) {
		chomp $line;
		while( substr($line,0,1) eq ' ' ) {
			$line = substr($line,1);
		}
		##if ($line =~ /<img (.*)>/i) {
		##if ($line =~ /<img (.*)(>|\/>)/io) {
		if ($line =~ /document\.write/io) {
			# ignore these lines
		##} elsif ($line =~ /<img (.*)(>|\/>)?/io) {
		} elsif ($line =~ /<img (.*)(>|\/>){1,}/io) {
			###prt( "Line $lc: with IMG [$line] ...\n" );
			###prt( "Line $lc: [$1][$2] ...\n" );
			my $pl = $1; # get the interesting part
			if ($pl =~ /alt=/) {
				### prt( "With ALT Line $lc: [$pl] ...\n" );
			} else {
				#if ($pl =~ />/) {
				if ($pl =~ /(.+)>/) {
					# multiple ...
					$pl = $1; # at least get this portion
					### prt( "Line $lc: MULTIPLE - NO ALT [$pl] ...\n" );
				}
				if ($pl =~ /(.+)>/) {
					# multiple ...
					prt( "Line $lc: STILL MULTIPLE - NO ALT [$1][$pl] ...\n" );
				} else {
					if ($pl =~ /src=(.+)/) {
						my $src = $1;
						if (substr($src,0,1) eq '"') {
							$src = substr($src,1);
							my $pos = index($src,'"');
							if ($pos > 0) {
								$src = substr($src,0,$pos);
								$src = get_file_name($src);
								$src =~ s/\./ /;
								###my $npl = $pl.' alt="'.$src.' image"';
								###my $mnpl = quotemeta($npl);
								###my $mpl = quotemeta($pl);
								###$line =~ s/$mpl/$mnpl/o;
								my $nline = add_alt($line,$src);
								if (length($nline) > length($line)) {
									$diff_cnt++;
									$lines[$lc] = $nline;
								}
								prt( "Line $lc: Added ALT [$pl][$src] ...[$line] \n" );
							} else {
								prt( "Line $lc: src?? NO ALT [$pl][$src] ...\n" );
							}
						} else {
							prt( "Line $lc: SRC NO ALT [$pl][$src] ...\n" );
						}
					} else {
						prt( "Line $lc: NO ALT [$pl] ...\n" );
					}
				}
			}
		}
		$lc++;
	}

	if ($diff_cnt) {
		# now write it OUT
		open $OF, ">$on" or die "ERROR: Unable to open OUT file ... aborting ...\n";
		foreach $line (@lines) {
			chomp $line;
			print $OF "$line\n";
		}
		close $OF;
	}
}

sub add_alt {
	my ($ln,$sc) = @_; # extract line, and source
	my $len = length($ln);
	my $i;
	my $nln = '';
	my $done_it = 0;
	my $si = 0;
	for ($i = 0; $i < $len; $i++) {
		my $ch = substr($ln,$i,1);
		if (($si == 4) && ($done_it == 0)) {
			if ($ch eq '>') {
				$nln .= ' alt="image '.$sc.'"';
				$done_it = 1;
				$si++;
			}
		}
		if ($si == 0) {
			if (uc($ch) eq 'I') {
				$si++;
			}
		} elsif ($si == 1) {
			if (uc($ch) eq 'M') {
				$si++;
			} else {
				$si = 0;
			}
		} elsif ($si == 2) {
			if (uc($ch) eq 'G') {
				$si++;
			} else {
				$si = 0;
			}
		} elsif ($si == 3) {
			if ($ch eq ' ') {
				$si++; # set SUCCESS
			} else {
				$si = 0;
			}
		}
		$nln .= $ch;
	}
	return $nln;
}



sub is_my_file {
	my ($f) = @_;
	my $ret = 0;
	if ($f =~ /(.*)\.htm$/i) {
		$ret = 1;
	} elsif ($f =~ /(.*)\.html$/i) {
		$ret = 1;
#	} elsif ($f =~ /(.*)\.shtml$/i) {
#		$ret = 1;
#	} elsif ($f =~ /(.*)\.php$/i) {
#		$ret = 1;
	}
#	if ($ret) {
#		foreach my $f2 (@excl) {
#			if (uc($f2) eq uc($f)) {
#				$ret = 0;
#				last;
#			}
#		}
#	}
	return $ret;
}

sub dos_to_unix {
	my ($d) = @_;
	$d =~ s/\\/\//g;
	return $d;
}

sub get_file_name {
	my ($pf) = @_;
	$pf = dos_to_unix($pf);
	@parts = split('/',$pf);
	return $parts[$#parts];
}

sub get_path_name {
	my ($pf) = @_;
	$pf = dos_to_unix($pf);
	@parts = split('/',$pf);
	pop( @parts ); # Pops off the last value of the array - the file name
	$pf = join( '\\', @parts );
	return $pf;
}

sub prt {
	my ($m) = @_;
	print $m;
	print $OH $m;
}

# eof - genalt.pl
