#!/perl -w
# NAME: fav-05.pl
# AIM: To read the Internet Favorites, and produce
# a HTML document, with links and description
#
# 23/08/2007 - into new 'format', and some updates using Tidy to 'fix' text ...
# added date table, if $chkip, check the IP can be resolved, mark <b>(B)</b> if not.
# if $chkpage, download the page, and mark <b> if this fails.
#
# 2006.07.11 - switch link column, and add (B) broken, from c:\HOMEPAGE\Broken02.htm
# update 2006.06.28 - weed out local references
# Added a MAXIMUM width, so the table approximately 'fits' a 1024 wide screen
# change to using '<base target="_blank">'
#
# 2005.11.12 - works ok - geoff mclane - geoffair.net/favorites.htm
# 
use strict;
use warnings;
use File::stat;
use Socket;
use LWP::Simple;
unshift(@INC, 'C:/GTools/perl');
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
	my @tmpsp = split(/\\/,$pgmname);
	$pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);
prt( "$0 ... Hello, World ...\n" );

##my $htmfil = 'favorites.htm';	# OUTPUT HTML FILE
my $htmfil = 'tempfav.htm';	# OUTPUT HTML FILE
my $chkip = 0;	# physically CHECK resolves to an IP address, or 2
my $chkpage = 0;	# longer FETCH ACTUAL PAGE - about 1 sec per entry!!!
# set a sample maximum title, wrap start at -10 from this - original set at 60
#             12345678901234567890123456789012345678901234567890123456789012345678901234567890
#                      1         2         3         4         5         6         7
my $maxtit = 'Domain Name Registration, Domain Transfe'; # rs. Your domain name search starts here.';
my $maxwid = length($maxtit);
my $DT = YYYYMMDD(time());
my $hvers = "  <!-- GA.$DT - geoffair.net - redo listing using $pgmname -->\n"; 
$hvers .= "  <!-- P26.2006.07.11 - update -->\n";
$hvers .= '  <!-- p26.2005.11.11 - List of favorites in PRO-1 geoffmclane.com/favorites.htm -->';
# parameters to run tidydev to 'fix' tit text. could not get stdin functioning, so
# write to a file, and pass file to tidydev.exe (in my PATH).
my $params = '-f temptidy.txt --tidy-mark no --doctype omit --show-body-only yes';
my $tablelink = 24;		# put a LINK LINE after this many rows

### debug
my $dbg3 = 0;	# show resolved IP addresses

#########################################
my $favfolder = get_favorite_folder();
prt( "Processing folder [$favfolder] ...\n" );
my $basedir = quotemeta($favfolder);
# entries to EXCLUDE
my @fav_exclude = ( 'https://geoffmclane.com:2083/frontend/x/index.html' );

my @dirs = ($favfolder);	# start folder
my @fils = ();				# collection of files from folder
my @tblist = ();			# final multidimensional array
my @warnings = ();
my $wmsg = '';
my @broken = ();	# if $chkip, broken links found.
my @nopage = ();	# if $chkpage, pages that did not load.

# MULTI-DIMENSIONAL ARRAY - holding all the information for the TABLES
#                0   1   2    3     4      5     6  7   8   9    10
# $tl_           sn  url mu   tm    dir    tit   dn vip txt ttt  dom
#push(@tblist, [$sn, $u, $mu, $tms, $fold, $tit, 0, 0,  0,  $ttt,$dom]);
my $tl_sn = 0;
my $tl_url = 1;
my $tl_mu = 2;
my $tl_tm = 3;
my $tl_dir = 4;
my $tl_tit = 5;
my $tl_dn = 6;
my $tl_vip = 7;	# valid IP, if checked
my $tl_txt = 8;	# valid page, if checked
my $tl_ttt = 9;	# Tidied title text
my $tl_dom = 10;	# domain NAME

### begin processing
while (@dirs) {
	my @dir2 = @dirs;	# copy array
	@dirs = ();			# and kill these
	foreach my $dn (@dir2) {
		process_dir($dn);	# prcess folder, which may yield more folders
	}
}

get_table_arr();	# build up MULTI-DIMENSIONAL array for TABLES

post_processing();	# Tidy tit text, and get domain name

# show MISSING
if ($chkip && @broken) {
	prt( "Showing list of ".scalar @broken." BROKEN URLS ...\n" );
	foreach my $u (@broken) {
		prt( "$u\n" );
	}
}
if ($chkpage && @nopage) {
	prt( "Showing list of ".scalar @nopage." URLS where PAGE failed ...\n" );
	foreach my $u (@nopage) {
		prt( "$u\n" );
	}
}

# WRITE HTML FILE
if( write_html_file( $htmfil ) ) {
	system($htmfil);	# load HTML file
}

close_log($outfile,1);	# close LOG and LOAD
exit(0);	# ALL DONE

#####################################
###### subs

# at presetn ADDS 2 things to the multi-dimensional array
sub post_processing {
	my $bgntime = time();
	my $max = scalar @tblist;
	my ($i, $f_link, $f_tit, $f_ttt, $f_dom);
	prt( "Post processing $max entries ...\n" );
	for ($i = 0; $i < $max; $i++ ) {
		$f_link = $tblist[$i][$tl_url];	# column 3 (HREF)
		$f_tit  = $tblist[$i][$tl_tit];	# column 2
		$f_ttt  = get_tidy_text($f_tit);
		$tblist[$i][$tl_ttt] = $f_ttt;
		$f_dom  = Get_Domain_Name($f_link);
		$tblist[$i][$tl_dom] = $f_dom;
	}
	prt( "Done post for $max entries ...".secs_2_hhmmss(time() - $bgntime). "...\n" );
}

##############################################################
###### HTML STUFF
sub get_link_text {
	my ( $grp ) = shift;
	my $lt = '';
	$lt .= '   <a target="_self" href="#top">top</a> '."\n" if ($grp != 1);
	$lt .= '   <a target="_self" href="#folder">folder</a>  '."\n" if ($grp != 4);
	$lt .= '   <a target="_self" href="#dateorder">date</a>  '."\n" if ($grp != 3);
	$lt .= '   <a target="_self" href="#end">end</a> '."\n" if ($grp != 2);
	$lt .= '   <a target="_self" href="favorite.htm">back</a> '."\n";
	$lt .= '   <a target="_self" href="home2.htm">home</a> '."\n";
	return $lt;
}

sub	add_link_para {
	my ( $hf, $grp ) = @_;
	print $hf "  <p class=\"ctr\">\n";
	print $hf get_link_text( $grp );
	print $hf "  </p>\n";
}

# out the HEAD
sub out_htm_head {
	my ($hf) = shift;

	print $hf <<EOF;
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
 <head>
  <meta http-equiv="Content-Language"
        content="en-us">
  <meta http-equiv="Content-Type"
        content="text/html; charset=us-ascii">
  <meta name="Author"
        content="Geoff Mclane">
  <title>
   List of Geoff Favorites
  </title>
  <link rel="SHORTCUT ICON"
        href="http://geoffair.net/icong.ico">
  <link rel="stylesheet"
        type="text/css"
        href="homeg.css">
  <style type="text/css">
<!-- /* Style Definitions */
  .smfnt {
  font-size : small;
  }
  -->
  </style>
  <base target="_blank">
 </head>
 <body>
  <h1>
   <a name="top"
       id="top"></a>List of Geoff Favorites
  </h1>

EOF

	add_link_para( $hf, 1 );	# exclude 'top'

	print $hf <<EOF;

  <p>
   This is a simple table, as at $DT, of my ever changing, personal <span class=
   "bld">Favorites</span>. It is autogenerated periodically, using a Perl script, in an attempt to
   keep it up to date ;=)) It does contain some broken links, sites that have disappeared, but
   most are valid and current. The base target has been set to _blank, so when a link is clicked,
   it should open in a NEW browser page. While the link text is sometimes truncated, the
   underlying anchor reference contains the full link ... Enjoy ...
  </p>

EOF

}

sub add_link_item {
	my ($hf, $cols) = @_;
	print $hf "\n";
	print $hf "   <tr>\n";
	print $hf "    <td colspan=\"$cols\" align=\"center\" class=\"smfnt\">";
	add_link_para( $hf, 0 );	# exclude none
	print $hf "    </td>\n";
	print $hf "   </tr>\n";
}


# out FIRST TABLE
sub out_htm_table {
	my ($hf) = shift;
	my $tcnt = scalar @tblist;
	my ($f_title, $f_link, $f_tlink, $f_date, $f_fold, $f_tit, $f_bkn, $f_pag, $f_ttt);
	my $rowcnt = 0;

	add_link_para( $hf, 4 );	# exclude 'folder'

	print $hf <<EOF;
  <a name="folder"
       id="folder"></a>
  <table border="1"
         width="100%"
         summary="List of favorites - First column is the title, and the 2nd is link">
   <tr>
    <th>
     Folder
    </th>
    <th>
     Title
    </th>
    <th>
     Link
    </th>
    <th>
     Date
    </th>
   </tr>
EOF

	#                0   1   2    3     4      5     6  7   8    9     10
	# $tl_           sn  url mu   tm    dir    tit   dn vip txt  ttt   dom
	#push(@tblist, [$sn, $u, $mu, $tms, $fold, $tit, 0, 0,  $tx, $ttt, $dom ]);
	for (my $i = 0; $i < $tcnt; $i++) {
		$f_title = $tblist[$i][$tl_sn];		# unused
		$f_link =  $tblist[$i][$tl_url];	# column 3 (HREF)
		$f_tlink = $tblist[$i][$tl_mu];		# column 3 (text)
		$f_date =  YYYYMMDD($tblist[$i][$tl_tm]);	# column 4 (as YYYY/MM/DD)
		$f_fold =  $tblist[$i][$tl_dir];	# column 1
		$f_tit  =  $tblist[$i][$tl_tit];	# column 2
		$f_bkn  =  $tblist[$i][$tl_vip];	# broken link
		$f_pag  =  $tblist[$i][$tl_txt];	# missing page
		$f_ttt  =  $tblist[$i][$tl_ttt];	# tidied text
		if ( !$f_bkn ) {
			$f_title = '<b>(B)</b> '.$f_title;
			$f_tit = '<b>(B)</b> '.$f_tit;
		} elsif ( !$f_pag ) {
			$f_title = '(b) '.$f_title;
			$f_tit = '(b) '.$f_tit;
		}

		print $hf "\n";
		print $hf "   <tr>\n";
		print $hf "    <td>$f_fold</td>\n";
		print $hf "    <td>$f_ttt</td>\n";
		print $hf "    <td><a href=\"$f_link\">$f_tlink</a></td>\n";
		print $hf "    <td>$f_date</td>\n";
		print $hf "   </tr>\n";

		$rowcnt++;
		if ($rowcnt > $tablelink) {
			if (($tcnt - $i) > ($tablelink / 2)) {
				add_link_item($hf, 4);
			}
			$rowcnt = 0;
		}
	}

	print $hf "  </table>\n";

}

# paragraph between tables
sub out_htm_para1 {
	my ($hf) = shift;

	print $hf <<"EOF";

  <p>
   <a name="bottom"
      id="bottom"></a> This table is auto-generated from a Perl script, reading and analysing my
      'Favorites' folder, from the USERPROFILE given in the environment. Those marked with a
      <b>(B)</b> were <b>BROKEN</b> links at the last full verification done by the perl script ...
      and a small (b) suggests the actual page could not be fetched by the perl script ...
      sometimes this is due to the fact that they are secure sites (https), and sometimes due to the
      fact that the site, or at least that page, has since been pulled down, but I have yet to
      delete this link from my personal 'Favorites' ... and just sometimes the perl script makes a
      mistake in its verification process, and/or the site has a redirection active!
  </p>

EOF

}

# out 2nd DATE TABLE
sub out_htm_table2 {
	my ($hf) = shift;
	my $tcnt = scalar @tblist;
	my ($i, $f_title, $f_link, $f_tlink, $f_date, $f_fold, $f_tit, $f_bkn, $f_pag, $f_tm, $f_ttt);

	add_link_para( $hf, 3 );	# exclude 'dateorder'

	print $hf <<EOF;
  <p>
  <a name="dateorder"
     id="dateorder"></a>The following is a repeat ot the above, IN DATE ORDER ...
  </p>

  <table border="1"
         width="100%"
         summary="Table in DATE order">
   <tr>
    <th>
     Date
    </th>
    <th>
     Folder
    </th>
    <th>
     Title
    </th>
    <th>
     Link
    </th>
   </tr>
EOF

	#                0   1   2    3     4      5     6  7   8   9
	# $tl_           sn  url mu   tm    dir    tit   dn vip txt ttt 
	#push(@tblist, [$sn, $u, $mu, $tms, $fold, $tit, 0, 0,  0,  $ttt]);
	for ($i = 0; $i < $tcnt; $i++) {
		$tblist[$i][$tl_dn] = 0;	## clear DONE flag
	}

	my $maxtm = 0;
	my $mxoff = 0;
	my $alldn = 1;
	my $rowcnt = 0;
	my $done = 0;

	while ($alldn) {
		$alldn = 0;
		$maxtm = 0;
		for ($i = 0; $i < $tcnt; $i++) {
			if ( ! $tblist[$i][$tl_dn] ) {
				$f_tm = $tblist[$i][$tl_tm];
				if ($f_tm > $maxtm) {
					$maxtm = $f_tm;
					$mxoff = $i;
					$alldn = 1;
				}
			}
		}

		if ($alldn) {
			$i = $mxoff;
			$tblist[$i][$tl_dn] = 1;	## SET DONE flag
			$f_title = $tblist[$i][$tl_sn];		# unused
			$f_link =  $tblist[$i][$tl_url];	# column 4 (HREF)
			$f_tlink = $tblist[$i][$tl_mu];		# column 4 (text)
			$f_date =  YYYYMMDD($tblist[$i][$tl_tm]);	# column 1 (as YYYY/MM/DD)
			$f_fold =  $tblist[$i][$tl_dir];	# column 2
			$f_tit  =  $tblist[$i][$tl_tit];	# column 3
			$f_bkn  =  $tblist[$i][$tl_vip];	# broken link
			$f_pag  =  $tblist[$i][$tl_txt];	# missing page
			$f_ttt  =  $tblist[$i][$tl_ttt];	# tidyied text
			if ( !$f_bkn ) {
				$f_title = '<b>(B)</b> '.$f_title;
				$f_tit = '<b>(B)</b> '.$f_tit;
			} elsif ( !$f_pag ) {
				$f_title = '(b) '.$f_title;
				$f_tit = '(b) '.$f_tit;
			}

			print $hf "\n";
			print $hf "   <tr>\n";
			print $hf "    <td>$f_date</td>\n";
			print $hf "    <td>$f_fold</td>\n";
			print $hf "    <td>$f_ttt</td>\n";
			print $hf "    <td><a href=\"$f_link\">$f_tlink</a></td>\n";
			print $hf "   </tr>\n";
			$rowcnt++;
			$done++;
			if ($rowcnt > $tablelink) {
				if (($tcnt - $done) > ($tablelink / 2)) {
					add_link_item($hf, 4);
				}
				$rowcnt = 0;
			}
		}
	}

	print $hf "  </table>\n";

}

# out the HTML tail
sub out_htm_tail {
	my ($hf) = shift;

	add_link_para( $hf, 2 );	# exclude 'end'

	print $hf <<EOF;

  <p>
   <a name="end"
      id="end"></a><a target="_blank"
      href="http://tidy.sourceforge.net/"><img border="0"
        src="images/checked_by_tidy.gif"
        alt="checked by tidy"
        width="32"
        height="32"></a>&nbsp; <a href="http://validator.w3.org/check?uri=referer"
      target="_blank"><img src="images/valid-html401.gif"
        alt="Valid HTML 4.01 Transitional"
        width="88"
        height="31"></a>
  </p>
  $hvers
 </body>
</html>
EOF

}

sub write_html_file {
	my ($hfil) = shift;
	my ($HF, $max);
	$max = scalar @tblist;
	if ( !$max ) {
		prt( "WARNING: No items in LIST!\n" );
		return 0;
	}
	if ( !open( $HF, ">$hfil" ) ) {
		prt( "ERROR: Can NOT open HTML file $hfil! ... $! ...\n" );
		return 0;
	}
	prt( "Writing $max items to $hfil ...\n" );

	out_htm_head( $HF );

	out_htm_table( $HF );

	out_htm_para1( $HF );

	out_htm_table2( $HF );

	out_htm_tail( $HF );

	close $HF;
	return 1;
}

sub get_favorite_folder {
	my $ff = '';
	if( !defined( $ENV{'USERPROFILE'} ) ) {
		mydie( "ERROR: Can NOT locate USERPROFILE in ENVironment!\n" );
	}
	$ff = $ENV{'USERPROFILE'} . '\\Favorites';
	if( !( -d $ff ) ) {
		mydie( "ERROR: Folder $ff is NOT a directory!\n" );
	}
	return $ff;
}

sub get_file_list {
	my ($d, @fs) = @_;
	foreach my $fn (@fs) {
		next if ($fn eq '.');
		next if ($fn eq '..');
		my $ffn = $d . '\\' . $fn;
		if( -d $ffn ) {
			push(@dirs, $ffn);
		} else {
			if ($fn =~ /.+\.url$/i) {
				push(@fils, $ffn);
			} else {
				prt( "Discarding file $ffn ...\n" );
			}
		}
	}
	my $fcnt = scalar @fils;
	my $dcnt = scalar @dirs;
	prt( "Found $fcnt files, and $dcnt directories ...\n" );
}

sub process_dir {
	my ($d) = shift;
	prt( "Processing $d ...\n" );
	if (opendir(DIRH, $d)) {
		my @dfs = readdir(DIRH);
		closedir(DIRH);
		prt( "Found " . scalar @dfs . " entries ...\n" );
		get_file_list($d, @dfs);
	} else {
		prt( "WARNING: Failed to open directory [$d]!...\n" );
	}
}


sub remdir {
	my ($f) = shift;
	$f =~ s/^$basedir\\//; # remove beginning ...
	$f =~ s/\.url$//; # and remove tail
	return $f;
}

sub in_exclude {
	my ($h) = shift;
	foreach my $l (@fav_exclude) {
		if ($l eq $h) {
			return 1;
		}
	}
	return 0;
}

sub max_sub2 {
	my ($ln, $max) = @_;
	if (length($ln) > ($max+5)) {
		$ln = substr($ln,0,$max) . '...';
	}
	return $ln;
}

sub max_sub {
	my ($ln, $max) = @_;
	my $nln = $ln;
	if (length($ln) > $max) {
		my @arr = split(/ /,$ln);
		$nln = '';
		my $bit = '';
		my $bl = 0;
		my $sl = 0;
		my $sc = 0;
		foreach my $s (@arr) {
			$sl = length($s);
			$bl = length($bit);
			while ($sl > $max) {
				if ($bl) {
					$bit .= ' ';
				}
				$bit .= substr($s, 0, $max - $bl);
				$s = substr($s, $max - $bl);
				if (length($nln)) {
					$nln .= "<br>\n";
				}
				$nln .= $bit;
				$bit = '';
				$sl = length($s);
				$bl = length($bit);
				$sc = 0;
			}
			if ($bl) {
				if (( $bl + $sc + length($s) ) > $max ) {
					if (length($nln)) {
						$nln .= "<br>\n";
					}
					$nln .= $bit;
					$bit = $s;
					$sc = 0;
				} else {
					$bit .= ' ';
					$sc++;
					$bit .= $s;
				}
			} else {
				$bit = $s;
				$sc = 0;
			}
		}
		if (length($bit)) {
			if (length($nln)) {
				$nln .= "<br>\n";
			}
			$nln .= $bit;
		}
	}
	return $nln;
}

sub secs_2_hhmmss {
	my ($secs) = shift;
	my $rt = '';
	my $mins = int($secs / 60);
	$secs = $secs - ($mins * 60);
	$secs = (int(($secs * 10) + 0.5)) / 10;
	if ($mins > 60) {
		my $hrs = int($mins / 60);
		$mins = $mins - ($hrs * 60);
		$mins = '0'.$mins if ($mins < 10);
		$secs = '0'.$secs if ($secs < 10);
		$rt = "$hrs:$mins:$secs";
	} else {
		$mins = '0'.$mins if ($mins < 10);
		$secs = '0'.$secs if ($secs < 10);
		$rt = "$mins:$secs";
	}
	return $rt;
}

sub get_table_arr {
	my $totcnt = scalar @fils;
	prt( "Getting array of $totcnt favorite items ...\n" );
	my $lncnt = 0;
	my $bgntime = time();	# seconds
	my ($currtime, $difftime, $persec, $remains, $remsecs, $tenths, $remtm, $elapsed);
	foreach my $fn (@fils) {	# process each file
		$lncnt++;
		if ($chkpage && (($lncnt % 20) == 0)) {
			$currtime = time();
			$difftime = $currtime - $bgntime;
			$persec = $lncnt / $difftime;
			$remains = $totcnt - $lncnt;
			$remsecs = $remains / $persec;
			$tenths = (int(($persec * 100) + 0.05)) / 100;
			$remtm = secs_2_hhmmss($remsecs);
			$elapsed = secs_2_hhmmss($difftime);
			prt( "$elapsed Done $lncnt ... $tenths/sec ... remains $remains in $remtm mm:ss ...\n" );
		}
		my $sb = stat($fn);
		my $tms = $sb->mtime;
		if ( open( FH, $fn ) ) {
			my @lns = <FH>; # slurp in the lines
			close FH;
			my $sn = remdir($fn);	# file name is the TITLE of the favorite ...
			my $line = '';
			my $fnd = 1;
			my $bkn = 1;	# assume NOT broken link
			my $pag = 1;	# assume NOT no page
			# get the FOLDER
			my $ind = rindex($sn, "\\");
			my $fold = '.';
			my $tit = $sn;
			if ($ind != -1) {
				$fold = substr($sn, 0, $ind);
				$tit = substr($sn, ($ind + 1));
			}
			foreach $line (@lns) {
				chomp $line;
				if( $line =~ /^URL=/ ) {
					my $u = substr($line,4); ## ~ s/^URL=//;
					if (in_exclude($u)) {
						$fnd = 0; # avoid a WARNING ...
						last;
					}
					if ($chkip) {
						if ( !showIPAddress($u) ) {
							push(@broken, $u);
							$bkn = 0;
						}
					}
					if ($chkpage) {	# WARNING: this is QUITE SLOW
						if (!Get_URL_Text_Count($u)) {
							push(@nopage, $u);
							$pag = 0;
						}
					}
					my $mu = max_sub2($u,$maxwid);
					$mu =~ s/&/&amp;/g;
					$sn = max_sub($sn, $maxwid); # wrap text to max width
					$sn =~ s/&/&amp;/mg; # possible MULTIPLE lines
					$u  =~ s/&/&amp;/g;
					$tit =~ s/&/&amp;/g;
					#                0   1   2    3     4      5     6  7   8    9   10 
					# $tl_           sn  url mu   tm    dir    tit   dn vip txt  ttt dom
					#push(@tblist, [$sn, $u, $mu, $tms, $fold, $tit, 0, 0,  0,   ttt,dom]);
					push(@tblist, [$sn, $u, $mu, $tms, $fold, $tit, 0, $bkn,$pag, '', ''  ]);
					$fnd = 0;
					last;
				}
			}
			if ($fnd) {
				$wmsg = "WARNING: Did NOT find a URL line in [$fn] ...\n"; 
				prt($wmsg);
				push(@warnings,$wmsg);
			}
		} else {
			$wmsg = "WARNING: Unable to open file [$fn] ...\n";
			prt($wmsg);
			push(@warnings,$wmsg);
		}
	}
	$currtime = time();
	$difftime = $currtime - $bgntime;
	$remtm = secs_2_hhmmss($difftime);
	prt( "Got array of $totcnt items (in $remtm) ...\n" );
}

################################################
# My particular time 'translation' - replaced date_string
sub YYYYMMDD {
	#  0    1    2     3     4    5     6     7     8
	my ($tm) = shift;
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($tm);
	$year += 1900;
	$mon += 1;
	my $ymd = "$year/";
	if ($mon < 10) {
		$ymd .= '0'.$mon.'/';
	} else {
		$ymd .= "$mon/";
	}
	if ($mday < 10) {
		$ymd .= '0'.$mday;
	} else {
		$ymd .= "$mday";
	}
	return $ymd;
}

sub get_tidy_text {
	my ($tx) = shift;
	my $inf = 'temptidy.htm';
	my $ntx = '';
	write2file($tx,$inf);
	if (open (TDY, "tidydev $params $inf|")) {
		my @arr = <TDY>;
		close TDY;
		foreach my $ln (@arr) {
			chomp $ln;
			$ntx .= "\n" if length($ntx);
			$ntx .= $ln;
		}
		###$ntx = join('',@arr);
	} else {
		prt( "FAILED! ... $! ...\n" );
		$ntx = $tx;
	}
	return $ntx;
}

sub Get_URL_Text_Count {
	my ($url) = shift;
	my $txt = get( $url );
	my $tcnt = 0;
	if ($txt) {
		$tcnt =	length($txt);
	}
	return $tcnt;
}


sub Get_Host_Name {
	my ($nm) = shift;
	if ($nm =~ /^http:\/\/(.*)/i) {
		$nm = $1;
	}
	if ($nm =~ /^https:\/\/(.*)/i) {
		$nm = $1;
	}
	if ($nm =~ /^ftp:\/\/(.*)/i) {
		$nm = $1;
	}
	my @arr = split('/', $nm);
	$nm = $arr[0];
	return $nm;
}

sub Get_Domain_Name {
	my ($nm) = shift;
	$nm = Get_Host_Name($nm);
	if ($nm =~ /^www\.(.*)/) {
		$nm = substr($nm,4);
	}
	return $nm;
}

############################################################
# Only used is $chkip = 1;
# Show IP Address
# uses sockets, gethostbyname
# Return 0, if can NOT be resolved.
# else the number of IP addresses resolved.
############################################################
sub showIPAddress {
	my ($nm) = shift;
	my $hnm = Get_Host_Name($nm);
	my @addr = gethostbyname($hnm);
	my $cnt = 0;
	if( !@addr ) {
		prt( "Can't resolve $nm ($hnm): $!\n" );
		return 0;
	}
	@addr = map { inet_ntoa($_) } @addr[4 .. $#addr];
	foreach my $k (@addr) {
		$cnt++;
		prt( "$cnt: $nm($hnm) resolves to IP [$k]\n" ) if ($dbg3);
	}
	return $cnt;
}

# eof - fav-05.pl
