#!/perl -w
# NAME: showhrefs.pl
# AIM: Given a HTML file, extract, and show HREF (anchor) entries.
# 29/07/2007 - geoff mclane - http://geoffair.net/mperl/index.htm
use strict;
use warnings;
use File::Basename;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
require 'htmltools.pl' or die "Unable to load htmltools.pl ...\n";
# log file stuff
my ($LF);
my $outfile = 'temp.'.$0.'.txt';
if ($0 =~ /\w{1}:\\.*/) {
	my @tmpsp = split(/\\/,$0);
	$outfile = 'temp.'.($tmpsp[-1]).'.txt';
}
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );
my $addhrefs = 0;
my $base_dir = "C:\\HOMEPAGE\\HOM\\test4\\";
my @in_files = qw(collections.htm limited-edition.htm groom-center.htm product-lines.htm);
##my $in_file = 'collections.htm';
##my $in_file = 'C:\HOMEPAGE\HOM\test4\limited-edition.htm';
my $in_file = 'C:\HOMEPAGE\HOM\test4\groom-center.htm';
##my $in_file = 'C:\HOMEPAGE\HOM\test4\product-lines.htm';
my @hrefs = ();
my @anchors = ();
my $title = '';
my $hcnt = 0;
my $acnt = 0;
my $hrf = '';
my $hfile = '';
my $dirname = $base_dir;
my $had_menu = 0;
my @html_ext = qw( .htm .html .shtml .php );
##my ($filename, $dirname) = fileparse($in_file);
foreach $hfile (@in_files) {
	$in_file = "$base_dir$hfile";
	process_file( $in_file );
	$hcnt = scalar @hrefs;
	$acnt = scalar @anchors;
	prt( "Got $hcnt HREF ... $acnt anchors ... title=\"$title\"\n" );
	if ($addhrefs) {
		prt( "\nList $hcnt HREF \n" );
		foreach $hrf (@hrefs) {
			$hrf =~ s/\n/ /gm;
			$hrf = trim_all($hrf);
			prt( "$hrf\n" );
		}
	}
	prt( "\nList $acnt anchors \n" );
	$had_menu = 0;
	foreach $hrf (@anchors) {
		$hrf =~ s/\n/ /gm;
		$hrf = trim_all($hrf);
		$hfile = anchor_href($hrf);
		if (is_my_ext($hfile, @html_ext)) {
			$title = get_file_title( "$dirname$hfile" );
		} else {
			$title = "<not html>";
		}
		###prt( "$hrf ($hfile) $title\n" );
		prt( "$hfile, title=[$title] $hrf\n" ) if ($had_menu);
		if ($hfile eq './') {
			$had_menu = 1;
		}
	}
}

close_log($outfile,1);
exit(0);

########################
### subs

sub is_html_file {
	my ($fil) = shift;
	my ($nm,$dir,$ext) = fileparse( $fil, qr/\.[^.]*/ );



}

sub anchor_href {
	my ($txt) = shift;
	my $len = length($txt);
	my $ch = '';
	my $pch = ' ';
	my $tag = '';
	for (my $i = 0; $i < $len; $i++) {
		$ch = substr($txt, $i, 1);
		if ((lc($ch) eq 'h')&&($pch =~ /\s/)) {
			$pch = substr($txt, $i);
			if ($pch =~ /^href=/i) {
				$tag = substr($txt, ($i+5));
				$tag = trim_all($tag);
				if ( $tag =~ /^['"]/ ) {
					$pch = substr($tag,0,1);
					$tag = substr($tag,1);
				} else {
					$pch = ' ';
				}
				my $ind = index($tag, $pch);
				if ($ind != -1) {
					$tag = substr($tag,0,$ind);
				}
				return $tag;
			}
		}
		$pch = $ch;
	}
	return $tag;
}

sub get_file_title {
	my ($inf) = shift;
	my ($IN);
	my $tit = '';
	if (open $IN, "<$inf") {
		my @lines = <$IN>;
		close $IN;
		###my $lc = scalar @lines;
		###prt( "Processing $lc lines from $inf ...\n" );
		$tit = return_tag( join( '', @lines ), 'title' );
		$tit =~ s/\n/ /gm;
		$tit = trim_all($tit);
	} else {
		###prt( "ERROR: Failed to open $inf ... $! ...\n" );
		$tit = "<open failed on $inf>";
	}
	return $tit;
}

sub process_file {
	my ($inf) = shift;
	my ($IN);
	if (open $IN, "<$inf") {
		my @lines = <$IN>;
		close $IN;
		my $lc = scalar @lines;
		prt( "\nProcessing $lc lines from $inf ...\n" );
		my $ft = join( '', @lines );
		$title = return_tag( $ft, 'title' );
		$title =~ s/\n/ /gm;
		$title = trim_all($title);
		@hrefs = ret_hrefs_array( $ft );
		@anchors = ret_anchor_array( $ft );
	} else {
		prt( "ERROR: Failed to open $inf ... $! ...\n" );
		return 0;
	}
	return 1;
}

#########################################################
# Passed an array of extensions,
# check if this is one of them?
#########################################################
sub is_my_ext {
	my ($fil, @exts) = @_;
	my ($nm,$dir,$ext) = fileparse( $fil, qr/\.[^.]*/ );
	foreach my $ex (@exts) {
		if (lc($ex) eq lc($ext)) {
			return 1;
		}
	}
	return 0;
}

# eof
