#!/perl -w
# NAME: genentities.pl
# AIM: To prepare a HTML page, with _ALL_ entities up to 32767
# 11/30/2008 - geoff mclane - http://geoffair.net/mperl/
use strict;
use warnings;
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /\w{1}:\\.*/) {
	my @tmpsp = split(/\\/,$pgmname);
	$pgmname = $tmpsp[-1];
}
my $outfile = "temp.$pgmname.txt";
open_log($outfile);

my $out_file = "tempent.htm";
my $out_file2 = "tempent2.htm";

# OPTION
my $usehex = 0;
my $usecn = 1;
my %invalid = (
 0 => 1,
 129 => 1,
 141 => 1,
 143 => 1,
 144 => 1,
 157 => 1
 );

my %tidy_ents = (
   34 => "quot",
   38 => "amp",
#   39 => "apos",   # something wrong here?
   60 => "lt",
   62 => "gt",
   160 => "nbsp",
   161 => "iexcl",
   162 => "cent",
   163 => "pound",
   164 => "curren",
   165 => "yen",
   166 => "brvbar",
   167 => "sect",
   168 => "uml",
   169 => "copy",
   170 => "ordf",
   171 => "laquo",
   172 => "not",
   173 => "shy",
   174 => "reg",
   175 => "macr",
   176 => "deg",
   177 => "plusmn",
   178 => "sup2",
   179 => "sup3",
   180 => "acute",
   181 => "micro",
   182 => "para",
   183 => "middot",
   184 => "cedil",
   185 => "sup1",
   186 => "ordm",
   187 => "raquo",
   188 => "frac14",
   189 => "frac12",
   190 => "frac34",
   191 => "iquest",
   192 => "Agrave",
   193 => "Aacute",
   194 => "Acirc",
   195 => "Atilde",
   196 => "Auml",
   197 => "Aring",
   198 => "AElig",
   199 => "Ccedil",
   200 => "Egrave",
   201 => "Eacute",
   202 => "Ecirc",
   203 => "Euml",
   204 => "Igrave",
   205 => "Iacute",
   206 => "Icirc",
   207 => "Iuml",
   208 => "ETH",
   209 => "Ntilde",
   210 => "Ograve",
   211 => "Oacute",
   212 => "Ocirc",
   213 => "Otilde",
   214 => "Ouml",
   215 => "times",
   216 => "Oslash",
   217 => "Ugrave",
   218 => "Uacute",
   219 => "Ucirc",
   220 => "Uuml",
   221 => "Yacute",
   222 => "THORN",
   223 => "szlig",
   224 => "agrave",
   225 => "aacute",
   226 => "acirc",
   227 => "atilde",
   228 => "auml",
   229 => "aring",
   230 => "aelig",
   231 => "ccedil",
   232 => "egrave",
   233 => "eacute",
   234 => "ecirc",
   235 => "euml",
   236 => "igrave",
   237 => "iacute",
   238 => "icirc",
   239 => "iuml",
   240 => "eth",
   241 => "ntilde",
   242 => "ograve",
   243 => "oacute",
   244 => "ocirc",
   245 => "otilde",
   246 => "ouml",
   247 => "divide",
   248 => "oslash",
   249 => "ugrave",
   250 => "uacute",
   251 => "ucirc",
   252 => "uuml",
   253 => "yacute",
   254 => "thorn",
   255 => "yuml",
   402 => "fnof",
   913 => "Alpha",
   914 => "Beta",
   915 => "Gamma",
   916 => "Delta",
   917 => "Epsilon",
   918 => "Zeta",
   919 => "Eta",
   920 => "Theta",
   921 => "Iota",
   922 => "Kappa",
   923 => "Lambda",
   924 => "Mu",
   925 => "Nu",
   926 => "Xi",
   927 => "Omicron",
   928 => "Pi",
   929 => "Rho",
   931 => "Sigma",
   932 => "Tau",
   933 => "Upsilon",
   934 => "Phi",
   935 => "Chi",
   936 => "Psi",
   937 => "Omega",
   945 => "alpha",
   946 => "beta",
   947 => "gamma",
   948 => "delta",
   949 => "epsilon",
   950 => "zeta",
   951 => "eta",
   952 => "theta",
   953 => "iota",
   954 => "kappa",
   955 => "lambda",
   956 => "mu",
   957 => "nu",
   958 => "xi",
   959 => "omicron",
   960 => "pi",
   961 => "rho",
   962 => "sigmaf",
   963 => "sigma",
   964 => "tau",
   965 => "upsilon",
   966 => "phi",
   967 => "chi",
   968 => "psi",
   969 => "omega",
   977 => "thetasym",
   978 => "upsih",
   982 => "piv",
   8226 => "bull",
   8230 => "hellip",
   8242 => "prime",
   8243 => "Prime",
   8254 => "oline",
   8260 => "frasl",
   8472 => "weierp",
   8465 => "image",
   8476 => "real",
   8482 => "trade",
   8501 => "alefsym",
   8592 => "larr",
   8593 => "uarr",
   8594 => "rarr",
   8595 => "darr",
   8596 => "harr",
   8629 => "crarr",
   8656 => "lArr",
   8657 => "uArr",
   8658 => "rArr",
   8659 => "dArr",
   8660 => "hArr",
   8704 => "forall",
   8706 => "part",
   8707 => "exist",
   8709 => "empty",
   8711 => "nabla",
   8712 => "isin",
   8713 => "notin",
   8715 => "ni",
   8719 => "prod",
   8721 => "sum",
   8722 => "minus",
   8727 => "lowast",
   8730 => "radic",
   8733 => "prop",
   8734 => "infin",
   8736 => "ang",
   8743 => "and",
   8744 => "or",
   8745 => "cap",
   8746 => "cup",
   8747 => "int",
   8756 => "there4",
   8764 => "sim",
   8773 => "cong",
   8776 => "asymp",
   8800 => "ne",
   8801 => "equiv",
   8804 => "le",
   8805 => "ge",
   8834 => "sub",
   8835 => "sup",
   8836 => "nsub",
   8838 => "sube",
   8839 => "supe",
   8853 => "oplus",
   8855 => "otimes",
   8869 => "perp",
   8901 => "sdot",
   8968 => "lceil",
   8969 => "rceil",
   8970 => "lfloor",
   8971 => "rfloor",
   9001 => "lang",
   9002 => "rang",
   9674 => "loz",
   9824 => "spades",
   9827 => "clubs",
   9829 => "hearts",
   9830 => "diams",
   338 => "OElig",
   339 => "oelig",
   352 => "Scaron",
   353 => "scaron",
   376 => "Yuml",
   710 => "circ",
   732 => "tilde",
   8194 => "ensp",
   8195 => "emsp",
   8201 => "thinsp",
   8204 => "zwnj",
   8205 => "zwj",
   8206 => "lrm",
   8207 => "rlm",
   8211 => "ndash",
   8212 => "mdash",
   8216 => "lsquo",
   8217 => "rsquo",
   8218 => "sbquo",
   8220 => "ldquo",
   8221 => "rdquo",
   8222 => "bdquo",
   8224 => "dagger",
   8225 => "Dagger",
   8240 => "permil",
   8249 => "lsaquo",
   8250 => "rsaquo",
   8364 => "euro"
);

my %replaced = (
    128 => 0x20AC,
    129 => 0x0000,
    130 => 0x201A,
    131 => 0x0192, 
    132 => 0x201E, 
    133 => 0x2026,
    134 => 0x2020,
    135 => 0x2021,
    136 => 0x02C6,
    137 => 0x2030,
    138 => 0x0160,
    139 => 0x2039,
    140 => 0x0152,
    141 => 0x0000, 
    142 => 0x017D,
    143 => 0x0000,
    144 => 0x0000,
    145 => 0x2018,
    146 => 0x2019,
    147 => 0x201C,
    148 => 0x201D,
    149 => 0x2022,
    150 => 0x2013, 
    151 => 0x2014,
    152 => 0x02DC,
    153 => 0x2122,
    154 => 0x0161,
    155 => 0x203A,
    156 => 0x0153,
    157 => 0x0000,
    158 => 0x017E,
    159 => 0x0178
);

my $params = '-f temptidy.txt --tidy-mark no --wrap 99 --indent yes '.
'--break-before-br yes --indent-attributes yes --vertical-space yes '.
'--indent-spaces 1 --indent-cdata no --wrap-asp no --wrap-attributes no '.
'--wrap-jste no --wrap-php no --wrap-script-literals no --wrap-sections no';

prt( "$0 ... generating entities, output to $out_file ...\n" );

gen_entities($out_file, 0, 32768, 32, 0);

my $tt = get_tidy_txt( $out_file, $params);

write2file($tt,$out_file2);

system($out_file2);

close_log($outfile,0);
exit(0);

#################################################################
######### SUB ONLY ########

# main purpose
sub gen_entities {
	my ($out, $min, $max, $wrap, $load) = @_;
	my ($txt, $i, $ln, $rng, $mint, $maxt, $j);
    my $html = '';

    $txt = get_html_head();
    $html .= $txt;
    if ($usecn) {
        $txt = "<div class=\"cn\">\n";
        $html .= $txt;
    }

    $html .= "<table align=\"center\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\" summary=\"table of entities\">\n";

    $txt = "<caption>Entities in the range $min to ";
	$i = $max;
    if ($max % $wrap) {
        $i += ($wrap - ($max % $wrap) - 1);
    }
    $txt .= "$i ...</caption>";
    $html .= $txt;

    $txt = "<tr>\n";
    $html .= $txt;
    $html .= "<th>Range</th>\n";
    for ($j = 0; $j < $wrap; $j++) {
        $html .= "<th>$j</th>\n";
    }
    $html .= "</tr>\n";

    $ln = 0;
    for ($i = $min; $i < $max; $i++) {

        if ($i && (($i % 800) == 0)) {
            $txt = "<tr>\n";
            $txt .= "<td colspan=\"$wrap\" align=\"center\">\n";
            $txt .= "<a href=\"#top\">top</a>\n";
            $txt .= " | \n";
            $txt .= "<a href=\"#end\">end</a>\n";
            $txt .= "</td>\n";
            $txt .= "</tr>\n";
            $html .= $txt;
        }

        if ($ln == 0) {
            $html .= "<tr>\n";
            if ($usehex) {
                $mint = dec2hex($i);
                $maxt = dec2hex($i + $wrap - 1);
            } else {
                $mint = "$i";
                while (length($mint) < 5) {
                    $mint = '0'.$mint;
                }
                $maxt = ''.($i + $wrap - 1);
                while (length($maxt) < 5) {
                    $maxt = '0'.$maxt;
                }
            }
            $rng = "$mint-$maxt";
            $txt = "<td nowrap>$rng&nbsp;</td>\n";
            $html .= $txt;
        }

        $txt = "<td>";
        if (defined $invalid{$i}) {
            $txt .= "&nbsp;";
        } elsif (defined $tidy_ents{$i}) {
            $txt .= "&".$tidy_ents{$i}.";";
        } elsif (defined $replaced{$i}) {
            $txt .= "&#".$replaced{$i}.";";
        } else {
            $txt .= "&#$i;";
        }
        $txt .= "</td>\n";
        $html .= $txt;

        $ln++;
        if ($ln == $wrap) {
            $ln = 0;
            $html .= "</tr>\n";
        }
    }
    if ($ln) {
        while ($ln < $wrap) {
            $ln++;
            $html .= "<td>;&#$i;</td>/n";
            $i++;
        }
        $html .= "</tr>\n";
    }
    $html .= "<tr>\n";
    $html .= "<th>Range</th>\n";
    for ($j = 0; $j < $wrap; $j++) {
        $html .= "<th>$j</th>\n";
    }
    $html .= "</tr>\n";

    $html .= "</table>\n";

    if ($usecn) {
        $txt = "</div>\n";
        $html .= $txt;
    }

    $txt = get_end_links();
    $html .= $txt;

    $txt = get_html_valid();
    $html .= $txt;

    $txt = "<!-- generated by $pgmname on ". localtime(time()) . " for geoffair.net -->\n";
    $html .= $txt;

    $html .= "</body>\n";
    $html .= "</html>\n";

	if (open OF, ">$out") {
        print OF $html;
		close OF;
		system($out) if ($load);
	} else {
		prt("ERROR: Failed to create $out file ... $! ...\n");
	}
    
}

sub gen_entities_vok {
	my ($out, $min, $max, $wrap, $load) = @_;
	my ($txt, $i, $ln, $rng, $mint, $maxt, $j);
	if (open OF, ">$out") {

		$txt = get_html_head();
		print OF $txt;

        if ($usecn) {
            $txt = "<div class=\"cn\">\n";
            print OF $txt;
        }

		print OF "<table align=\"center\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\" summary=\"table of entities\">\n";
		$txt = "<caption>Entities in the range $min to ";
		$i = $max;
		if ($max % $wrap) {
			$i += ($wrap - ($max % $wrap) - 1);
		}
		$txt .= "$i ...</caption>";
		print OF "$txt\n";

		print OF "<tr>\n";
        print OF "<th>Range</th>\n";
        for ($j = 0; $j < $wrap; $j++) {
            print OF "<th>$j</th>\n";
        }
		print OF "</tr>\n";

		$ln = 0;
		for ($i = $min; $i < $max; $i++) {

            if ($i && (($i % 800) == 0)) {
                $txt = "<tr>\n";
                $txt .= "<td colspan=\"$wrap\" align=\"center\">\n";
                $txt .= "<a href=\"#top\">top</a>\n";
                $txt .= " | \n";
                $txt .= "<a href=\"#end\">end</a>\n";
                $txt .= "</td>\n";
                $txt .= "</tr>\n";
                print OF $txt;
            }

			if ($ln == 0) {
				print OF "<tr>\n";

                if ($usehex) {
                    $mint = dec2hex($i);
                    $maxt = dec2hex($i + $wrap - 1);
                } else {
                    $mint = "$i";
                    while (length($mint) < 5) {
                        $mint = '0'.$mint;
                    }
                    $maxt = ''.($i + $wrap - 1);
                    while (length($maxt) < 5) {
                        $maxt = '0'.$maxt;
                    }
                }
				$rng = "$mint-$maxt";
				$txt = "<td nowrap>$rng&nbsp;</td>\n";
				print OF $txt;
			}

   			$txt = "<td>";
            if (defined $invalid{$i}) {
                $txt .= "&nbsp;";
            } elsif (defined $tidy_ents{$i}) {
                $txt .= "&".$tidy_ents{$i}.";";
            } elsif (defined $replaced{$i}) {
                $txt .= "&#".$replaced{$i}.";";
            } else {
                $txt .= "&#$i;";
            }
            $txt .= "</td>\n";
			print OF $txt;

			$ln++;
			if ($ln == $wrap) {
				$ln = 0;
				print OF "</tr>\n";
			}
		}
		if ($ln) {
			while ($ln < $wrap) {
				$ln++;
				print OF "<td>;&#$i;</td>/n";
				$i++;
			}
			print OF "</tr>\n";
		}
		print OF "<tr>\n";
        print OF "<th>Range</th>\n";
        for ($j = 0; $j < $wrap; $j++) {
            print OF "<th>$j</th>\n";
        }
		print OF "</tr>\n";

        print OF "</table>\n";

        if ($usecn) {
            $txt = "</div>\n";
            print OF $txt;
        }

        $txt = get_end_links();
        print OF $txt;

        $txt = get_html_valid();
        print OF $txt;

        $txt = "<!-- generated by $pgmname on ". localtime(time()) . " for geoffair.net -->\n";
        print OF $txt;

		print OF "</body>\n";
		print OF "</html>\n";
		close OF;

		system($out) if ($load);

	} else {
		prt("ERROR: Failed to create $out file ... $! ...\n");
	}

}

########################
####### NOT USED #######
sub gen_entities_two_columns {
	my ($out, $min, $max, $wrap) = @_;
	my ($txt, $i, $ln, $rng);
	if (open OF, ">$out") {

		$txt = html_head();
		print OF $txt;

		print OF "<table align=\"center\" border=\"0\" cellpadding=\"1\" cellspacing=\"1\" summary=\"table of entities\">\n";
		$txt = "<caption>Entities in the range $min to ";
		$i = $max;
		if ($max % $wrap) {
			$i += ($wrap - ($max % $wrap) - 1);
		}
		$txt .= "$i ...</caption>";
		print OF "$txt\n";
		$ln = 0;
		for ($i = $min; $i < $max; $i++) {
			if ($ln == 0) {
				print OF "<tr>\n";
				$rng = "$i - ".($i + $wrap - 1);
				$txt = "<td>$rng</td>\n";
				print OF $txt;
    			$txt = "<td>";
				print OF $txt;
			}

            $txt = '&nbsp;';
			$txt .= "&#$i;";
			print OF $txt;

			$ln++;
			if ($ln == $wrap) {
				$ln = 0;
    			print OF "</td>\n";
				print OF "</tr>\n";
			}
		}
		if ($ln) {
			while ($ln < $wrap) {
				$ln++;
				print OF "&nbsp;&#$i;";
				$i++;
			}
			print OF "</td>\n";
			print OF "</tr>\n";
		}

        print OF "</table>\n";

        $txt = "<!-- generated by $pgmname on ". localtime(time()) . " for geoffair.net -->\n";
        print OF $txt;

		print OF "</body>\n";
		print OF "</html>\n";
		close OF;
		system($out);

	} else {
		prt("ERROR: Failed to create $out file ... $! ...\n");
	}

}

########################
####### NOT USED #######
sub gen_entities_simple {
	my ($out, $min, $max, $wrap) = @_;
	my ($txt, $i, $ln);
	if (open OF, ">$out") {

		$txt = html_head();
		print OF $txt;

		print OF "<table align=\"center\" border=\"1\" cellpadding=\"1\" cellspacing=\"1\" summary=\"table of entities\">\n";
		$txt = "<caption>Entities in the range $min to ";
		$i = $max;
		if ($max % $wrap) {
			$i += ($wrap - ($max % $wrap) - 1);
		}
		$txt .= "$i ...</caption>";
		print OF "$txt\n";
		$ln = 0;
		for ($i = $min; $i < $max; $i++) {
			if ($ln == 0) {
				print OF "<tr>\n";
			}

			$txt = "<td>";
			if ($i == 0) {
				$txt .= "&nbsp;";
			} else {
				$txt .= "&#$i;";
			}
			$txt .= "<br>";
			$txt .= "$i</td>\n";
			print OF $txt;

			$ln++;
			if ($ln == $wrap) {
				$ln = 0;
				print OF "</tr>\n";
			}
		}
		if ($ln) {
			while ($ln < $wrap) {
				$ln++;
				print OF "<td>&#$i;<br>$i</td>\n";
				$i++;
			}
			print OF "</tr>\n";
		}
        print OF "</table>\n";


        $txt = get_end_links();
        print OF $txt;

        $txt = get_html_valid();
        print OF $txt;

		print OF "</body>\n";
		print OF "</html>\n";
		close OF;
		system($out);

	} else {
		prt("ERROR: Failed to create $out file ... $! ...\n");
	}

}

sub get_end_links {
    my $end_links = <<EOF;

    <p class="ctr">
    <a name="end"></a>
    |- <a target="_self" href="index.htm">index</a>
    -|- <a target="_self" href="http://geoffair.net/home2.htm">home</a>
    -|- <a target="_self" href="#top">top</a>
    -|
    </p>

EOF

    return $end_links;
}


sub get_html_head {
	my $html_head = <<EOF;
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
 <head>
  <title>
   display entities
  </title>
  <meta http-equiv="Content-Language"
        content="en-us">
  <meta http-equiv="Content-Type"
        content="text/html; charset=us-ascii">
  <link rel="stylesheet"
        href="cxx.css"
        type="text/css">
 </head>
 <body>
  <a name="top"
        id="top"></a>
  <h1>
   Display Entities
  </h1>

    <p class="ctr">
    |- <a target="_self" href="index.htm">index</a>
    -|- <a target="_self" href="http://geoffair.net/home2.htm">home</a>
    -|- <a target=\"_self\" href=\"#end\">end</a>
    -|
    </p>

  <p>
    A simple Perl generated list of 'entity' values, 0-65767, just to see what happens! And
    what is displayed.
  </p>

EOF
	return $html_head;
}

sub get_html_valid {
    my $html_valid = <<EOF;

  <p>
   <a name="end"
      id="end"></a> <a target="_blank"
      href="http://tidy.sourceforge.net/"><img border="0"
        src="images/checked_by_tidy.gif"
        alt="checked by tidy"
        width="32"
        height="32"></a>&nbsp; <a href="http://validator.w3.org/check?uri=referer"
      target="_blank"><img src="images/valid-html401.gif"
        alt="Valid HTML 4.01 Transitional"
        width="88"
        height="31"></a>
  </p>

EOF

    return $html_valid;
}

sub dec2hex {
    my $decnum = $_[0];     # parameter passed to the subfunction
    my $hexnum = '';     # the final hex number
    my $tempval = 0;
	if ($decnum == 0) {
		return '0000';
	}
    while ($decnum != 0) {
		# get the remainder (modulus function)
		# by dividing by 16
		$tempval = $decnum % 16;
		# convert to the appropriate letter
		# if the value is greater than 9
		if ($tempval > 9) {
			$tempval = chr($tempval + 55);
		}
		# 'concatenate' the number to 
		# what we have so far in what will
		# be the final variable
		$hexnum = $tempval . $hexnum ;
		# new actually divide by 16, and 
		# keep the integer value of the 
		# answer
		$decnum = int($decnum / 16); 
		# if we cant divide by 16, this is the
		# last step
		if ($decnum < 16) {
			# convert to letters again..
			if ($decnum > 9) {
				$decnum = chr($decnum + 55);
			}
			# add this onto the final answer.. 
			# reset decnum variable to zero so loop
			# will exit
			$hexnum = $decnum . $hexnum; 
			$decnum = 0 
		}
    }
    while (length($hexnum) < 4) {
        $hexnum = '0' . $hexnum;
    }
    return $hexnum;
} # end sub

sub get_tidy_txt {
    my ($inf, $pars) = @_;
    my $ntx = '';
	if (open (TDY, "tidydev $pars $inf |")) {
		my @arr = <TDY>;
		close TDY;
		foreach my $ln (@arr) {
			chomp $ln;
			if( length($ln) ) {
				$ntx .= "\n" if length($ntx);
				$ntx .= $ln;
			}
		}
    } else {
        prt( "ERROR: Failed to run tidydev ...\n" );
    }
    return $ntx;
}

# eof - genentities.pl
