#!/usr/bin/perl -w
# NAME: tidyentities.pl
# AIM: VERY SPECIALIZED - Read a C table, and a JSON set of entities, and compare
# 08/02/2015 geoff mclane http://geoffair.net/mperl
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use Cwd;
use JSON;
use Data::Dumper;
use feature 'unicode_strings';
use utf8;
use Text::Unidecode;
my $os = $^O;
my $perl_dir = '/home/geoff/bin';
my $PATH_SEP = '/';
my $temp_dir = '/tmp';
if ($os =~ /win/i) {
    $perl_dir = 'C:\GTools\perl';
    $temp_dir = $perl_dir;
    $PATH_SEP = "\\";
}
unshift(@INC, $perl_dir);
require 'lib_utils.pl' or die "Unable to load 'lib_utils.pl' Check paths in \@INC...\n";
# log file stuff
our ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $temp_dir.$PATH_SEP."temp.$pgmname.txt";
open_log($outfile);

# user variables
my $VERS = "0.0.5 2015-01-09";
my $load_log = 0;
my $in_file = '';
my $in_file1 = 'C:\Users\user\Documents\Tidy\htmlmathml.json';
my $in_file2 = 'F:\Projects\tidy-html5\src\entities.c';
my $verbosity = 0;
my $out_file = '';

# ### DEBUG ###
my $debug_on = 0;
my $def_file = 'def_file';

### program variables
my @warnings = ();
my $cwd = cwd();

sub VERB1() { return $verbosity >= 1; }
sub VERB2() { return $verbosity >= 2; }
sub VERB5() { return $verbosity >= 5; }
sub VERB9() { return $verbosity >= 9; }

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        prt( "\nNo warnings issued.\n\n" ) if (VERB9());
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub mycmp_nc_sort {
   return -1 if (lc($a) lt lc($b));
   return 1 if (lc($a) gt lc($b));
   return 0;
}

my %test_hash = (
    'nlE' => "\x{2266}\x{338}",
    'harrcir' => "\x{2948}",
    'omid' => "\x{29b6}",
    'cularr' => "\x{21b6}",
    'ycy' => "\x{44b}",
    'ldca' => "\x{2936}"
    );

sub check_hash() {
    my $rh = \%test_hash;
    prt(Dumper($rh));
    my @arr = keys %{$rh};
    my ($key,$val,$num,$len,$i,$ch,$i2);
    foreach $key (@arr) {
        $val = ${$rh}{$key};
        #prt("$key ");
        $num = sprintf("%s", unidecode( $val ));
        #$num = sprintf("%X", unidecode( $val ));
        #prt("\n");
        prt("$key $num\n");
#        $val =~ s/([^[:ascii:]]+)/unidecode($1)/ge;
#        $len = length($val);
#        $num = '';
#        for ($i = 0; $i < $len; $i++) {
#            $i2 = $i + 1;
#            $ch = substr($val,$i,1);
#            prt("$i2: $ch ");
#            last if ($ch eq '{');
#        }
#        for (; $i < $len; $i++) {
#            $ch = substr($val,$i,1);
#            last if ($ch eq '}');
#            $num .= $ch;
#        }
#        #if ($val =~ /\\x\{(.+)\}/) {
#        #    $num = $1;
#        #    prt("$key $num\n");
#        #}
#        #$num = chr($val);
#        prt("$key $num\n");
    }
    pgm_exit(1,"TEMP EXIT\n");
}

# from : http://www.w3.org/TR/xml-entity-names/#htmlmathml
# DWN: htmlmathml.json
# {
# "characters": {
#  "AElig": "\u00C6",
#  "AMP": "\u0026",
#  "NotSubset": "\u2282\u20D2", 
# from : http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f-a.dtd
# <!ENTITY NotSubset        "&#x02284;" ><!--alias ISOAMSN vnsub -->
# <!ENTITY NotPrecedesEqual "&#x02AAF;&#x00338;" ><!--alias ISOAMSN npre -->
#   "NotPrecedesEqual": "\u2AAF\u0338",
sub process_in_json($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$num,$lnn,$val);
    $line = join("",@lines);
    my $json = JSON->new->allow_nonref;
    my $rh = $json->decode( $line );
    my $tag = 'characters';
    if (!defined ${$rh}{$tag}) {
        prt("NO '$tag' defined in file $inf!\n");
        return;
    }
    my $rh2 = ${$rh}{$tag};
    prt(Dumper($rh2));
    $load_log = 1;
    my @arr = sort mycmp_nc_sort keys(%{$rh2});
    my $cnt = scalar @arr;
    prt("Found $cnt entity keys...\n");
    foreach $tag (@arr) {
        $val = ${$rh2}{$tag};
        if ($val =~ /\\x\{([0-9a-f]+)\}/) {
            $num = hex($1);
            prt("$tag = $num\n");
        } else {
            ###$num = chr($val);
        }
    }

}

# static const entity entities[] =
#{
#    /*
#    ** Markup pre-defined character entities
#    */
#    { "quot",    VERS_ALL|VERS_XML,    34 },
#    { "amp",     VERS_ALL|VERS_XML,    38 },
# ...
#    { NULL,       VERS_UNKNOWN, 0 }
# };
sub process_in_file($) {
    my ($inf) = @_;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$inc,$lnn,$len,@arr,$ent,$ver,$code);
    $lnn = 0;
    my $intable = 0;
    my %entities = ();
    foreach $line (@lines) {
        $lnn++;
        chomp $line;
        $line = trim_all($line);
        $len = length($line);
        next if ($len == 0);
        if ($intable) {
            if ($line =~ /^\{(.+)\}/) {
                $inc = trim_all($1);
                @arr = split(',',$inc);
                $len = scalar @arr;
                if ($len == 3) {
                    $ent = strip_double_quotes(trim_all($arr[0]));
                    $ver = trim_all($arr[1]);
                    $code = trim_all($arr[2]);
                    if ($ent eq 'NULL') {
                        prt("$lnn: $ent $ver $code\n");
                        $intable = 0;
                        last;
                    }
                    $entities{$ent} = $code;
                } else {
                    pgm_exit(1,"$lnn DID NOT SPLIT 3, got $len [$line] *** FIX ME ***\n");
                }
            }
        } else {
            if ($line =~ /const\s+entity\s+entities/) {
                $intable = 1;
                prt("$lnn $line\n");
            }
        }
    }
    @arr = sort keys %entities;
    $len = scalar @arr;
    prt("Got $len entities from $inf\n");
}


#########################################
### MAIN ###
###parse_args(@ARGV);
check_hash();
process_in_file($in_file2);
process_in_json($in_file1);
pgm_exit(0,"");
########################################

sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have a following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg);
    my $verb = VERB2();
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } elsif ($sarg =~ /^v/) {
                if ($sarg =~ /^v.*(\d+)$/) {
                    $verbosity = $1;
                } else {
                    while ($sarg =~ /^v/) {
                        $verbosity++;
                        $sarg = substr($sarg,1);
                    }
                }
                $verb = VERB2();
                prt("Verbosity = $verbosity\n") if ($verb);
            } elsif ($sarg =~ /^l/) {
                if ($sarg =~ /^ll/) {
                    $load_log = 2;
                } else {
                    $load_log = 1;
                }
                prt("Set to load log at end. ($load_log)\n") if ($verb);
            } elsif ($sarg =~ /^o/) {
                need_arg(@av);
                shift @av;
                $sarg = $av[0];
                $out_file = $sarg;
                prt("Set out file to [$out_file].\n") if ($verb);
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = $arg;
            prt("Set input to [$in_file]\n") if ($verb);
        }
        shift @av;
    }

    if ($debug_on) {
        prtw("WARNING: DEBUG is ON!\n");
        if (length($in_file) ==  0) {
            $in_file = $def_file;
            prt("Set DEFAULT input to [$in_file]\n");
        }
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
    if (! -f $in_file) {
        pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
    }
}

sub give_help {
    prt("$pgmname: version $VERS\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help  (-h or -?) = This help, and exit 0.\n");
    prt(" --verb[n]     (-v) = Bump [or set] verbosity. def=$verbosity\n");
    prt(" --load        (-l) = Load LOG at end. ($outfile)\n");
    prt(" --out <file>  (-o) = Write output to this file.\n");
}

# eof - template.pl
