#!/Perl
# AIM: To read the Internet Favorites, and produce
# a HTML document, with links and description
# 2006.07.11 - switch link column, and add (B) broken, from c:\HOMEPAGE\Broken02.htm
# update 2006.06.28 - weed out local references
# Added a MAXIMUM width, so the table approximately 'fits' a 1024 wide screen
# change to using '<base target="_blank">'
# 2005.11.12 - works ok - geoff mclane
# 
use File::stat;
my $DT = '2006.07.15';
$VERSION = '0.4';
$PACKAGE = 'fav-04';
my $hvers = "<!-- P26.$DT - minor update -->\n"; 
$hvers .= "<!-- P26.2006.07.11 - update -->\n";
$hvers .= '<!-- p26.2005.11.11 - List of favorites in PRO-1 geoffmclane.com/favorites.htm -->';
print "$0 ... Hello, World ...\n";
if( !defined( $ENV{'USERPROFILE'} ) ) {
	print "Can NOT locate USERPROFILE in ENVironment!\n";
	exit(1);
}
my $ff = $ENV{'USERPROFILE'} . '\\Favorites';
if( !( -d $ff ) ) {
	print "Folder $ff is NOT a directory!\n";
	exit(2);
}
# set a sample maximum title, wrap start at -10 from this - original set at 60
#             12345678901234567890123456789012345678901234567890123456789012345678901234567890
#                      1         2         3         4         5         6         7
my $maxtit = 'Domain Name Registration, Domain Transfe'; # rs. Your domain name search starts here.';
my $logfil = "temp.$0.txt";
my $htmfil = 'favorites.htm';
my @mths = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
my $addfold = 1;
my $sch = ''; # avoid this character ...
my @fav_exclude = (
 'https://geoffmclane.com:2083/frontend/x/index.html' );

my @fav_broken = (
 'http://a.ninemsn.com.au/b.aspx',
 'http://blogs.msdn.com/nikolad/archive/2005/09/02/460368.aspx',
 'http://code.jenseng.com/jenChat/',
 'http://datacompression.info/JPEG.shtml',
 'http://document.ihg.uni-duisburg.de/cgi-bin/mapserv40',
 'http://drivers.soft32.com/index-2-12-110-0-4.html',
 'http://flightgear.org/Downloads/scenery-0.9.5.html',
 'http://free.compuserve.com/trycsfree/index2.adp',
 'http://grass.ibiblio.org/grass57/index.html',
 'http://home.exetel.com.au/atmint.exetel.com.au/2004SBTS.html',
 'http://ourworld.compuserve.com/homepages/GEOFF_MCLANE',
 'http://pubs.logicalexpressions.com/Pub0009/LPMArticle.asp',
 'http://serenitysydney.com.au/',
 'http://usa.asus.com/products/mb/socket478/p4c800-d/overview.htm',
 'http://www.adg.dk/airport.asp',
 'http://www.candleart.com.au/',
 'http://www.cartexpress.com/',
 'http://www.commerce-cgi.com/download.htm',
 'http://www.compalseast.org.au/',
 'http://www.dmartias.fr/mondial/',
 'http://www.e-directory.org/download/list/modules.html',
 'http://www.elanit.com.au/immediacy/main.asp',
 'http://www.fgdc.gov/clearinghouse/clearinghouse.html',
 'http://www.flightgear.org/~curt/Models/Special/Rascal110_2/',
 'http://www.flightgear.org/~curt/Photos/KMHV/',
 'http://www.flightgear.org/Downloads/scenery-0.9.7.html',
 'http://www.flymig.com/iata/r/Country.Papua_New_Guinea.htm',
 'http://www.frenchlinguistics.com/dictionary/',
 'http://www.friendofflowers.com/images/famphots/famphot.php',
 'http://www.interweb.com.au/',
 'http://www.iridiumsoftsol.com/content.aspx',
 'http://www.jobsearch.gov.au/',
 'http://www.libsdl.org/index.php',
 'http://www.linuxguruz.com/',
 'http://www.megxon.com/products/S302/S302.htm',
 'http://www.microsoft.com/downloads/details.aspx',
 'http://www.microsoft.com/isapi/redir.dll',
 'http://www.navigate.com.au/navigate/index.jsp',
 'http://www.netopia.com/buy/download_promo.jsp',
 'http://www.nottingham.ac.uk/~eazdluf/taxidraw.html',
 'http://www.nottingham.ac.uk/~eazdluf/taxidraw/airportdata.html',
 'http://www.ntsb.gov/',
 'http://www.open-bits.org/browse.php',
 'http://www.ossim.org/tiki-read_article.php',
 'http://www.perldoc.com/perl5.8.0/lib.html',
 'http://www.stockill.org/fgfsdb/models.php',
 'http://www.wajb.freeserve.co.uk/codes.htm',
 'http://www.web-developer-india.com/web/jscript/refp_10.html',
 'http://www.worldofmaya.com/t_poly.html',
 'http://www.worldzone.net/games/azrael_dark/PROJECT_ZERO/GMAX.html',
 'http://x-plane.org/home/robinp/AptNavFAQ.htm',
 'https://164.214.2.62/products/digitalaero/index.cfm',
 'https://geoffmclane.com:2083/frontend/x/index.html',
 'https://www.clickstart.com.au/capabiliti/menuscript.asp');
my @oth_broken = (
 'file:///cgi-sys/Count.cgi',
 'file:///cgi-sys/guestbook.cgi',
 'http://&lt;!--',
 'http://docs.rinet.ru:8083/WebPub/ch56.htm',
 'http://emporium.turnpike.net/~viredit/emploi/cv/query.htm',
 'http://ev.free2code.net/plugins/articles/read.php',
 'http://geoffmclane.com/fgfs-003.htm',
 'http://home.netscape.com/assist/net_sites/new_html3_prop.html',
 'http://homepages.wmich.edu/~l0lazaro/perld/fileio.html',
 'http://jobs.iconrec.com.au',
 'http://lib.risk.ee/javanotes/c7/s6.html',
 'http://perl.hamtech.net/prog/ch03_109.htm',
 'http://tidy.sf.net/issue/1365706',
 'http://vadivel.thinkingms.com/PermaLink.aspx',
 'http://www.accuweather.com/adcbin/public/intlocal_index.asp',
 'http://www.ao.net/~juang/IntroJava2/JavaIO/JavaIO.html',
 'http://www.bradchoate.com/weblog/2002/08/12/mtmacro',
 'http://www.cclabs.missouri.edu/things/instruction/perl/perlcourse.html',
 'http://www.cruising.org/cvpc/cruiselines/DisplayShip.cfm',
 'http://www.digistuff.com/story_photos.asp',
 'http://www.flightgear.org/Downloads/scenery-0.9.7.html',
 'http://www.hollandamerica.com/fivestarfleet/rotterdam.htm',
 'http://www.jobsearch.gov.au/',
 'http://www.libsdl.org/cvs.php',
 'http://www.microsoft.com/downloads/details.aspx',
 'http://www.microsoft.com/downloads/details.aspx ',
 'http://www.neosoft.com/neosoft/man/perl.1.html',
 'http://www.netacc.net/~poulsen/moonphase.html',
 'http://www.netscape.com/navigator/',
 'http://www.nottingham.ac.uk/~eazdluf/taxidraw.html',
 'http://www.novell.com/products/netware4/quicklook.html',
 'http://www.opengl.org/resources/libraries/glut.html',
 'http://www.opengl.org/resources/libraries/glut/glut_downloads.html',
 'http://www.reunir.com/fiche.asp',
 'http://www.shfa.nsw.gov.au/content/home.cfm',
 'http://www.stratus.com/products/vos',
 'http://www.tek-tips.com/viewthread.cfm',
 'http://www.x-plane.org/users/robinp',
 'http://www.x-plane.org/users/robinp/',
 'https://ccvs.cvshome.org/',
 'https://www.cvshome.org/');

my $basedir = $ff;
my $blen = length($basedir);
my ($fn,$ffn,$LF,$HF);
my @dirs = ($ff);
my @fils = ();
my @tblist = ();
my @warnings = ();
my $wmsg = '';
my $f_title = '';
my $f_link = '';
my $f_tlink = '';
my $f_data = '';
my ($f_fold, $f_tit);
open $LF, ">$logfil" or die "Can NOT open LOG file $logfil!\n";
open $HF, ">$htmfil" or die "Can NOT open HTML file $htmfil!\n";

my $fcnt = scalar @fils;
my $dcnt = scalar @dirs;
my $maxwid = length($maxtit);
#print "Found $fcnt files, and $dcnt directories ...\n";
while (scalar @dirs) {
	local @dir2 = @dirs;
	@dirs = ();
	while ($fn = shift @dir2) {
		do_dir($fn);
	}
}
$fcnt = scalar @fils;
$dcnt = scalar @dirs;
prt( "Total: $fcnt URL files ...\n" );
prt( "Maximum line length used = $maxwid ...\n" );

# choosing a DOCTYPE
##my $doctyp4 = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">';
my $doctyp4 = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">';
##my $doctyp3 = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">';

out_htm_head();
oh( '<table border="1" width="100%" summary="List of favorites - First column is the title, and the 2nd is link">' );
oh( ' <tr>' );
if ($addfold) {
	oh( '  <td><b>Folder</b></td>' );
}
oh( '  <td><b>Title</b></td>' );
oh( '  <td><b>Link</b></td>' );
oh( '  <td><b>Date</b></td>' );
oh( ' </tr>' );
get_table_arr();
my $tcnt = scalar @tblist;
for (my $i = 0; $i < $tcnt; $i++) {
	$f_title = $tblist[$i][0];
	$f_link =  $tblist[$i][1];
	$f_tlink = $tblist[$i][2];
	$f_date =  $tblist[$i][3];
	$f_fold =  $tblist[$i][4];
	$f_tit  =  $tblist[$i][5];
	oh( '   <tr>' );
	if ($addfold) {
		oh( "    <td>$f_fold</td>" );
		oh( "    <td>$f_tit</td>" );
	} else {
		oh( "    <td>$f_title</td>" );
	}
	oh( "    <td><a href=\"$f_link\">$f_tlink</a></td>" );
	oh( "    <td>$f_date</td>" );
	oh( '   </tr>' );
	###prt( "$i [".$tblist[$i][0].", ".$tblist[$i][1].", ".$tblist[$i][2].", ".$tblist[$i][3]."]\n" );
	###prt( "$i [$f_title, $f_link, $f_tlink, $f_date, $f_fold, $f_tit]\n" );
}
oh( '</table>' );
out_htm_tail();
if (@warnings) {
	prt( "Repeating WARNINGS issues ...\n" );
	foreach $wmsg (@warnings) {
		prt($wmsg);
	}
}

prt("Loading $htmfil ... may have to be closed to continue...\n");
close( $HF );
close( $LF );
system( $htmfil );
##system( $logfil );
exit(0);

#######################################################################
### just subs below

sub get_table_arr {
	prt( "Getting array of ".scalar @fils." files ...\n" );
	foreach $fn (@fils) {
		# process each file
		local $FH;
		my $sb = stat($fn);
		my $tms = get_YYYYMMDD(scalar localtime $sb->mtime);
		if ( open( $FH, $fn ) ) {
			local @lns = <$FH>; # slurp in the lines
			local $sn = remdir($fn);	# file name is the TITLE of the favorite ...
			###prt( "Processing " . remdir($fn) . " of " . scalar @lns . " lines ...\n");
			close( $FH );
			local $line;
			my $fnd = 1;
			my $bkn = 0; # assume NOT broken link, per FP
			# get the FOLDER
			my $ind = rindex($sn, "\\");
			my $fold = '.';
			my $tit = $sn;
			if ($ind != -1) {
				$fold = substr($sn, 0, $ind);
				$tit = substr($sn, ($ind + 1));
			}
			foreach $line (@lns) {
				chomp $line;
				if( $line =~ /^URL=/ ) {
					local $u = substr($line,4); ## ~ s/^URL=//;
					if (in_exclude($u)) {
						$fnd = 0; # avoid a WARNING ...
						last;
					}
					if (in_fav_broken($u) || in_oth_broken($u)) {
						$bkn = 1;
					}
					##prt( "\"$sn\",$u\n" );
					local $mu = max_sub2($u,$maxwid);
					$mu =~ s/&/&amp;/g;
					$sn = max_sub($sn, $maxwid); # wrap text to max width
					$sn =~ s/&/&amp;/mg; # possible MULTIPLE lines
					if (($fold eq 'Links') && (substr($tit,0,4) eq 'FIFA')) {
						prt( "Exception - changed [$sn] and [$tit] \n" );
						$tit =~ s/$sch/&#153;/g;
						$sn =~ s/$sch/&#153;/mg;
						prt( "Exception - to [$sn] and [$tit] \n" );
					}
					$u  =~ s/&/&amp;/g;
					$tit =~ s/&/&amp;/g;
					if ($bkn) {
						### $tms .= '<b>(B)</b>';
						$sn = '<b>(B)</b> '.$sn;
						$tit = '<b>(B)</b> '.$tit;
					}
					push(@tblist, [$sn, $u, $mu, $tms, $fold, $tit]);
					###prt( "push(\@tblist, [$sn, $u, $mu, $tms, $fold])\n" );
					$fnd = 0;
					last;
				}
			}
			if ($fnd) {
				$wmsg = "WARNING: Did NOT find a URL line in [$fn] ...\n"; 
				prt($wmsg);
				push(@warnings,$wmsg);
			}
		} else {
			$wmsg = "WARNING: Unable to open file [$fn] ...\n";
			prt($wmsg);
			push(@warnings,$wmsg);
		}
	}
	prt( "Got array of ".scalar @tblist." items ...\n" );
}

## month to number
sub mth_to_num {
	my ($mth) = shift;
	my $cnt = 0;
	###prt( "Chk [$mth] " );
	foreach my $m (@mths) {
		$cnt++;
		if ($m eq $mth) {
			###prt( "Is $m - return $cnt\n" );
			return $cnt;
		}
	}
	mydie( "ERROR: Returning 0!!! for [$mth]\n" );
	return '??';
}


sub get_YYYYMMDD {
	my ($tm) = shift;
	my @arr = split( / /, $tm ); # time of form 'Sat Mar 12 03:11:55 2005'
	my $ac = scalar @arr;
	my $doff = 2;
	my $yoff = 4;
	if ($ac == 5) {
		$doff = 2;
		$yoff = 4;
	} elsif ($ac == 6) {
		$doff = 3;
		$yoff = 5;
	} else {
		mydie( "ERROR: Time ($tm) did NOT split correctly!\n" );
	}
	my $mn = mth_to_num( $arr[1] );
	if ($mn < 10) {
		$mn = '0'.$mn;
	}
	my $dn = $arr[$doff];
	if ($dn < 10) {
		$dn = '0'.$dn;
	}
	my $dtt = $arr[$yoff].'/'.$mn.'/'.$dn; # translated to 2005/03/12
	return $dtt;
}


sub get_lists {
	###foreach $fn (@files) {
	while ($fn = shift @_) {
		next if ($fn eq '.');
		next if ($fn eq '..');
		$ffn = $ff . '\\' . $fn;
		if( -d $ffn ) {
			push(@dirs, $ffn);
		} else {
			if ($fn =~ /\.url$/i) {
				push(@fils, $ffn);
			} else {
				prt( "Discarding file $ffn ...\n" );
			}
		}
	}
	$fcnt = scalar @fils;
	$dcnt = scalar @dirs;
	prt( "Found $fcnt files, and $dcnt directories ...\n" );
}

sub do_dir {
	local ($dn) = @_;
	print "Processing $dn ...\n";
	opendir(DIRH, $dn);
	local @f = readdir(DIRH);
	closedir(DIRH);
	print "Found " . scalar @f . " entries ...\n";
	$ff = $dn;
	get_lists(@f);
}

sub prt {
	my $msg = shift;
	print $msg;
	print $LF $msg;
}

sub max_sub2 {
	my ($ln, $max) = @_;
	if (length($ln) > ($max+5)) {
		$ln = substr($ln,0,$max) . '...';
	}
	return $ln;
}

sub max_sub {
	my ($ln, $max) = @_;
	my $nln = $ln;
	if (length($ln) > $max) {
		my @arr = split(/ /,$ln);
		$nln = '';
		my $bit = '';
		my $bl = 0;
		my $sl = 0;
		my $sc = 0;
		foreach my $s (@arr) {
			$sl = length($s);
			$bl = length($bit);
			while ($sl > $max) {
				if ($bl) {
					$bit .= ' ';
				}
				$bit .= substr($s, 0, $max - $bl);
				$s = substr($s, $max - $bl);
				if (length($nln)) {
					$nln .= "<br>\n";
				}
				$nln .= $bit;
				$bit = '';
				$sl = length($s);
				$bl = length($bit);
				$sc = 0;
			}
			if ($bl) {
				if (( $bl + $sc + length($s) ) > $max ) {
					if (length($nln)) {
						$nln .= "<br>\n";
					}
					$nln .= $bit;
					$bit = $s;
					$sc = 0;
				} else {
					$bit .= ' ';
					$sc++;
					$bit .= $s;
				}
			} else {
				$bit = $s;
				$sc = 0;
			}
		}
		if (length($bit)) {
			if (length($nln)) {
				$nln .= "<br>\n";
			}
			$nln .= $bit;
		}
	}
	return $nln;
}


sub remdir {
	local ($f) = @_;
	local $b2 = quotemeta($basedir);
	###$f =~ s/^$basedir//;
	###$f = substr( $f, (length($basedir) + 1) );
	##$f = substr( $f, ($blen + 1), (length($f) - $blen - 5) );
	$f =~ s/^$b2\\//; # remove beginning ...
	$f =~ s/\.url$//; # and remove tail
	return $f;
	###return (max_sub($f, $maxwid));
}

sub ohl {
	print $HF "\n";
}

sub oh {
	local ($txt) = @_;
	print $HF $txt;
	ohl();
}

sub out_htm_head {
oh( $doctyp4 );
oh( '<html>' );
oh( '<head>' );
oh( "<title>List of Geoff Favorites</title>" );
oh( '<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">' );
oh( '<meta name="Author" content="Geoff Mclane">' );
oh( '<style type="text/css">' );
oh( '<!-- /* Style Definitions */' );
oh( 'body {' );
oh( ' background-image:url("clds3.jpg");' );
oh( ' margin: 0cm 1cm 0cm 1cm;' );
oh( '}' );
oh( 'h1{' );
oh( ' background:#efefef;' );
oh( ' border-style: solid solid solid solid;' );
oh( ' border-color:#d9e2e2;' );
oh( ' border-width:1px;' );
oh( ' padding:2px 2px 2px 2px;' );
oh( ' font-size:200%;' );
oh( ' text-align:center;' );
oh( '}' );
oh( '.ctr { text-align:center; }' );
oh( '.bld { font-weight:bold; }' );
oh( '-->' );
oh( '</style>' );
oh( '<base target="_blank">' ); # set so ALL open in 'New Window'
oh( '</head>' );
oh( '<body>' );
oh( '<h1><a name="top"></a>List of Geoff Favorites</h1>' );
oh( '<p class="ctr"><a href="favorite.htm">back</a> <a href="home2.htm">home</a> ' );
oh( '<a target="_self" href="#bottom">bottom</a></p>' );
oh( '<p>This is a simple table, as at $DT, of my ever changing, personal <span class="bld">Favorites</span>. ');
oh( 'It is autogenerated periodically, using a Perl script, in an attempt to keep it up to date ;=)) ' );
oh( 'It does contain some broken links, sites that have disappeared, but most are valid and current. ' );
oh( 'The base target has been set to _blank, so when a link is clicked, it should open in a NEW ');
oh( 'browser page. While the link text is sometimes truncated, the underlying anchor reference ');
oh( 'contains the full link ... Enjoy ...</p>' );

}

sub out_htm_tail {
oh( '<p><a name="bottom"></a>' );
oh( "This table is auto-generated from a Perl script, reading and analysing my 'Favorites' folder, " );
oh( "from the USERPROFILE given in the environment. Those marked with a <b>(B)</b> were <b>BROKEN</b> links ");
oh( "at the last full verification done by FrontPage ... sometimes it is due to the fact that they are " );
oh( "secure sites (https), and sometimes due to the fact that the site, or at least that page, has since been pulled down, " );
oh( "but I have yet to delete this link from my personal 'Favorites' ... and just sometimes FrontPage " );
oh( "makes a mistake in its verification process, and/or the site has a redirection active!</p>" );

oh( '<p class="ctr"><a href="favorite.htm">back</a> <a href="home2.htm">home</a> ' );
oh( '<a target="_self" href="#top">top</a></p>' );

	print $HF <<"EOF";
<p><a href="http://validator.w3.org/check?uri=referer">
<img src="images/valid-html401.gif" alt="Valid HTML 4.01 Transitional" height="31" width="88"></a></p>

</body>

$hvers

</html>

EOF

}

sub in_fav_broken {
	my ($h) = shift;
	foreach $l (@fav_broken) {
		if ($l eq $h) {
			return 1;
		}
	}
	return 0;
}

sub in_oth_broken {
	my ($h) = shift;
	foreach $l (@oth_broken) {
		if ($l eq $h) {
			return 1;
		}
	}
	return 0;
}

# my @fav_exclude = (
sub in_exclude {
	my ($h) = shift;
	foreach $l (@fav_exclude) {
		if ($l eq $h) {
			return 1;
		}
	}
	return 0;
}


#eof
