#!/usr/bin/perl -w
# NAME: Tidy01.pl
# AIM: SPECIFIC TO HTML TIDY ONLY - HTML5 SUPPORT RESEARCH
use strict;
use warnings;
use File::Basename;  # split path ($name,$dir,$ext) = fileparse($file [, qr/\.[^.]*/] )
use Cwd;
my $perl_dir = 'C:\GTools\perl';
unshift(@INC, $perl_dir);
require 'logfile.pl' or die "Unable to load logfile.pl ...\n";
# log file stuff
my ($LF);
my $pgmname = $0;
if ($pgmname =~ /(\\|\/)/) {
    my @tmpsp = split(/(\\|\/)/,$pgmname);
    $pgmname = $tmpsp[-1];
}
my $outfile = $perl_dir."\\temp.$pgmname.txt";
open_log($outfile);

# user variables

my $load_log = 1;
my $in_file = 'c:\Projects\Tidy\tidycvs\src\tags.c';
my $in_file2 = 'c:\Projects\Tidy\tidycvs\include\tidyenum.h';
my $in_file3 = 'c:\Projects\Tidy\tidycvs\src\attrs.c';
my $in_file4 = 'c:\Projects\Tidy\tidycvs\src\attrdict.c';
my $in_file5 = 'c:\Projects\Tidy\tidycvs\src\lexer.h';

my $g_min_tag = 24; # 16;
my $g_min_val = 24; # 16;

my $do_new_tag_cmp = 0;
my $gen_tidy_tag_array = 0;

my $debug_on = 0;
my $def_file = 'def_file';

# tags (found to be) defined in Tidy 2010
my @tidy2010 = qw(a abbr acronym address align applet area b base basefont bdo 
    bgsound big blink blockquote body br button caption center cite code col 
    colgroup comment dd del dfn dir div dl dt em embed fieldset font form frame 
    frameset h1 h2 h3 h4 h5 h6 head hr html i iframe ilayer img input ins isindex 
    kbd keygen label layer legend li link listing map marquee menu meta multicol 
    nextid nobr noembed noframes nolayer nosave noscript object ol optgroup 
    option p param plaintext pre q rb rbc rp rt rtc ruby s samp script select 
    server servlet small spacer span strike strong style sub sup table tbody 
    td textarea tfoot th thead title tr tt u ul var wbr xmp 
);

# from : http://dev.w3.org/html5/spec/Overview.html#elements-1
# from : http://dev.w3.org/html5/spec/Overview.html#interfaces
my @html5_element = qw(a abbr address area article aside audio b base bdi bdo blockquote body br
    button canvas caption cite code col colgroup command datalist dd del details div dl 
    dt em embed fieldset figcaption figure footer form head h1 h2 h3 h4 h5 h6 header 
    hgroup hr html i iframe img input ins kbd keygen label legend li link map mark 
    meter nav noscript object ol optgroup option output p param pre progress q 
    rp rt ruby s samp section select small source span strong style sub summary 
    sup table tbody td textarea tfoot th thead time title tr track ul var video wbr
);

my @new_tags = qw(article aside audio canvas command datalist details figcaption
 figure footer header hgroup keygen mark meter nav output progress rp rt
 ruby section source summary time video wbr
);

# comparing what is presently found in Tidy, with the list scraped from
# the W3C HTML5 list from the Editor's document in progress - at 20110115
# =======================================================================
my @gen_new = qw(article aside audio bdi canvas command datalist details figcaption 
  figure footer header hgroup mark meter nav output progress section source 
  summary time track video
);

### program variables
my @warnings = ();
my $cwd = cwd();
my $os = $^O;

sub show_warnings($) {
    my ($val) = @_;
    if (@warnings) {
        prt( "\nGot ".scalar @warnings." WARNINGS...\n" );
        foreach my $itm (@warnings) {
           prt("$itm\n");
        }
        prt("\n");
    } else {
        prt( "\nNo warnings issued.\n\n" );
    }
}

sub pgm_exit($$) {
    my ($val,$msg) = @_;
    if (length($msg)) {
        $msg .= "\n" if (!($msg =~ /\n$/));
        prt($msg);
    }
    show_warnings($val);
    close_log($outfile,$load_log);
    exit($val);
}


sub prtw($) {
   my ($tx) = shift;
   $tx =~ s/\n$//;
   prt("$tx\n");
   push(@warnings,$tx);
}

sub trim_tag($) {
    my ($tag) = @_;
    $tag =~ s/,$//; # remove any trailing ','
    $tag = trim_all($tag); # and trim
    $tag =~ s/^"//; # remove leading quote
    $tag = trim_all($tag); # and trim
    $tag =~ s/"$//; # remove leading quote
    $tag = trim_all($tag); # and trim
    return $tag;
}

sub set_min_tag($) {
    my ($rt) = @_;  # \$tag
    ${$rt} .= ' ' while (length(${$rt}) < $g_min_tag);
}
sub set_min_val($) {
    my ($rt) = @_;  # \$tag
    ${$rt} = ' '.${$rt} while (length(${$rt}) < $g_min_val);
}


## SETUP
#  ${$rh}{'ENUM_FILE'} = $in_file2;        # process_enums_tag_at
#  ${$rh}{'TAG_FILE'} = $in_file;          # process_tag_table
#  ${$rh}{'ATTR_FILE'} = $in_file3;        # process_attr_table
#  ${$rh}{'ATTRDICT_FILE'} = $in_file4;    # process_attr_dict
#  ${$rh}{'DEFINES_FILE'} = $in_file5;     # process_defines_file
## PROCESSING
# process_defines_file($rh); # get ${$rh}{'DEFINES'} = \%defs; - Bit defintions
# process_attr_dict($rh);    # get attrs per tag per version ${$rh}{'TAGS_ATTR_VERS'}
# process_enums_tag_at($rh); # get ${$rh}{'ENUM_TAGS'} and ${$rh}{'ENUM_ATTRS'}
# process_tag_table($rh);    # get ${$rh}{'TAG_TABLE'} and ${$rh}{'VERS_DEFINES'}
# process_attr_table($rh);   # get ${$rh}{'ATTR_TABLE'}
sub sanity_check($) {
    my ($rh) = @_;
    if ( (defined ${$rh}{'ENUM_FILE'}) && # = $in_file2; # process_enums_tag_at
         (defined ${$rh}{'TAG_FILE'} ) && # = $in_file;  # process_tag_table
         (defined ${$rh}{'ATTR_FILE'}) && # = $in_file3;        # process_attr_table
         (defined ${$rh}{'ATTRDICT_FILE'}) && # = $in_file4;    # process_attr_dict
         (defined ${$rh}{'DEFINES_FILE'}) )  # = $in_file5;     # process_defines_file
    {
        #prt("All files set...\n");
    } else {
        pgm_exit(1,"ERROR: One of the input files NOT set!\n");
    }
    if ( (defined ${$rh}{'DEFINES'}) && # = \%defs; - Bit defintions
         (defined ${$rh}{'TAGS_ATTR_VERS'}) &&
         (defined ${$rh}{'ENUM_TAGS'}) &&
         (defined ${$rh}{'ENUM_ATTRS'}) &&
         (defined ${$rh}{'TAG_TABLE'}) &&
         (defined ${$rh}{'VERS_DEFINES'}) &&
         (defined ${$rh}{'ATTR_TABLE'}) )
    {
         my $rbd = ${$rh}{'DEFINES'};
         my $rtav = ${$rh}{'TAGS_ATTR_VERS'};
         my $rtags = ${$rh}{'ENUM_TAGS'};
         my $rattr = ${$rh}{'ENUM_ATTRS'};
         my $rttbl = ${$rh}{'TAG_TABLE'};
         my $rvers = ${$rh}{'VERS_DEFINES'};
         my $ratbl = ${$rh}{'ATTR_TABLE'};
         # compare the list of enumerated tags, with tags in the table
         # should be the same
         my ($tag,$fnd,$ctag,@arr);
         @arr = sort keys(%{$rtags});
         #foreach $tag (keys %{$rtags}) {
         foreach $tag (@arr) {
             $ctag = $tag;
             set_min_tag(\$ctag);
             if (defined ${$rtav}{$tag}) {
                 # found it...
             } else {
                 prtw("WARNING: Tag [$ctag] NOT found in tag/attr/vers\n");
             }
             if (defined ${$rttbl}{$tag}) {
                 # found
             } else {
                 prtw("WARNING: Tag [$ctag] NOT found in tag table\n");
             }
         }
         foreach $tag (keys %{$rtav}) {
             if (defined ${$rtags}{$tag}) {
                 # found it...
             } else {
                 prtw("WARNING: Tag [$tag] NOT found in enum tags\n");
             }
         }

    } else {
        pgm_exit(1,"ERROR: One of the hashes NOT set!\n");
    }
}

sub show_ref_hash($) {
    my ($mhr) = @_;
    sanity_check($mhr);

    my $rtags = ${$mhr}{'ENUM_TAGS'};
    # ${$rh}{'ATTR_TABLE'} = \%attrs;
    # ${$rh}{'ENUM_TAGS'} = \%tags;
    # ${$rh}{'ENUM_ATTRS'} = \%attrs;
    # ${$rh}{'TAG_TABLE'} = \%tags;
    # ${$rh}{'VERS_DEFINES'} = \%dtags;

    my $rh = ${$mhr}{'TAG_TABLE'};
    my @arr = sort keys %{$rh};
    my $max = scalar @arr;
    my $rntags = \@new_tags; # = qw(article aside ...
    my $r5tags = \@html5_element;
    my $cnt = scalar @{$rntags};
    my @ntags = ();
    my @etags = ();
    my ($tag,$tag2,$fnd,$old,$new,$ver,$ctag);
    if ($do_new_tag_cmp) {
        prt("Got $cnt tags to compare to $max...\n");
        $old = 0;
        $new = 0;
        foreach $tag (@{$rntags}) {
            $fnd = 0;
            foreach $tag2 (@arr) {
                if ($tag eq $tag2) {
                    $ver = ${$rh}{$tag2};
                    $fnd = 1;
                    last;
                }
            }
            if ($fnd) {
                $old++;
                push(@etags,$tag);
                prt("Tag [$tag] ver [$ver] already exists\n");
            } else {
                $new++;
                push(@ntags,$tag);
                set_min_tag(\$tag);
                prt("Tag [$tag] is NEW\n");
            }
        }
        prt("Of $cnt tags, $old old, $new new...\n");
        my $oldlist = join(" ",@etags);
        my $newlist = join(" ",@ntags);
        prt("Old: $oldlist\n");
        prt("New: $newlist\n");
    }

    # now do the same against the HTML5 WC3 EDITOR's DRAFT document
    # =============================================================
    $cnt = scalar @{$r5tags};
    @ntags = ();
    @etags = ();
    $old = 0;
    $new = 0;
    @arr = sort keys( %{$rtags} );
    $max = scalar @arr;
    prt("Got $cnt tags to compare to $max...\n");
    foreach $tag (@{$r5tags}) {
        $ctag = $tag;
        set_min_tag(\$ctag);
        $fnd = 0;
        foreach $tag2 (@arr) {
            if ($tag eq $tag2) {
                $ver = ${$rh}{$tag2};
                $fnd = 1;
                last;
            }
        }
        if ($fnd) {
            $old++;
            push(@etags,$tag);
            prt("Tag [$ctag] ver [$ver] already exists\n");
        } else {
            $new++;
            push(@ntags,$tag);
            set_min_tag(\$tag);
            prt("Tag [$ctag] is NEW\n");
        }
    }
    prt("Of $cnt tags, $old old, $new new...\n");
    my $oldlist = join(" ",@etags);
    my $newlist = join(" ",@ntags);
    prt("Old: $oldlist\n");
    prt("New: $newlist\n");

    if ($gen_tidy_tag_array) {
        #$rtags = ${$mhr}{'ENUM_TAGS'};
        @arr = sort keys( %{$rtags} );
        my $tlist = join(" ",@arr);
        prt("Tidy: $tlist\n");
        my ($msg,$line);
        $line = "my \@tidy2010 = qw(";
        $msg = '';
        foreach $tag (@arr) {
            $line .= "$tag ";
            if (length($line) > 75) {
                $msg .= "$line\n";
                $line = '    ';
            }
        }
        $msg .= $line if ($line =~ /\w/);
        $msg .= "\n);\n";
        prt($msg);
    }
}


# attrs.h
# struct _Attribute {
#    TidyAttrId  id;
#    tmbstr      name;
#    unsigned    versions;
#    AttrCheck*  attrchk;
#    struct _Attribute* next; };
# attrs.c
# static const Attribute attribute_defs [] =
#{
#  { TidyAttr_UNKNOWN,           "unknown!",          VERS_PROPRIETARY,  NULL         }, 
#  { TidyAttr_ABBR,              "abbr",              VERS_HTML40,       CH_PCDATA    }, 
# ...
#   { TidyAttr_URN,               "urn",               VERS_HTML20,       CH_PCDATA    }, /* for <a>, never implemented */
#
#  /* this must be the final entry */
#  { N_TIDY_ATTRIBS,             NULL,                VERS_UNKNOWN,      NULL         }
#};
sub process_attr_table($) {
    my ($rh) = @_;
    my $inf = ${$rh}{'ATTR_FILE'}; # = $in_file3;
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$inc,$lnn,$intable,$inatts);
    my (@arr,$acnt,$tag,$ver,$cnt,$i,$clnn,$txt);
    $lnn = 0;
    $intable = 0;
    $inatts = 0;
    my %attrs = ();
    my $hdr2 = "In ATTRIB default definitions... TidyAttr_???, \"text\", VERS";
    foreach $line (@lines) {
        $lnn++;
        $line = trim_all($line);
        $clnn = sprintf("%4d",$lnn);
        #prt("$line\n");
        next if (length($line) == 0);
        if ($line =~ /\s*#\s*include\s+(.+)$/) {
            $inc = $1;
            prt("$clnn: $inc\n");
        } else {
            if ($intable) {
                if ($line =~ /N_TIDY_ATTRIBS/) {
                    @arr = keys %attrs;
                    $acnt = scalar @arr;
                    prt("$clnn: Done attr table... $acnt entries...\n");
                    $intable = 0;
                    next;
                }
                $line =~ s/,$//;    # drop any trailing ','
                $line = trim_all($line); # and trim
                $line =~ s/\}$//;   # drop any trailing '}'
                $line = trim_all($line); # and trim
                $line =~ s/^\{//;   # drop any leanding '{'
                $line = trim_all($line); # and trim
                @arr = split(/,/,$line); # comma split
                $acnt = scalar @arr;
                if ($acnt > 2) {
                    $tag = trim_all($arr[0]);
                    $txt = strip_quotes(trim_all($arr[1]));
                    $ver = trim_all($arr[2]);

                    $tag =~ s/^TidyAttr_//;
                    $tag = lc($tag);
                    $attrs{$tag} = $ver;

                    # for display
                    prt("$hdr2\n") if (length($hdr2));
                    $hdr2 = '';
                    set_min_tag(\$tag);
                    $txt = "'$txt'";
                    set_min_tag(\$txt);
                    prt("$clnn: $tag $txt $ver\n");
                }
            } elsif ($line =~ /Attribute\s+attribute_defs\s*\[/) {
                $intable = 1;
                prt("$clnn: Enter attribute table...\n");
            }
        }
    }
    ${$rh}{'ATTR_TABLE'} = \%attrs;
}

# enum
# /** Known HTML element types
# */
# typedef enum
# {
#  TidyTag_UNKNOWN,  /**< Unknown tag! */
#  TidyTag_A,        /**< A */
#  TidyTag_ABBR,     /**< ABBR */
# ...
# TidyTag_NEXTID,   /**< NEXTID */
#
#  N_TIDY_TAGS       /**< Must be last */
#} TidyTagId;

# typedef enum
#{
#  TidyAttr_UNKNOWN,           /**< UNKNOWN= */
#  TidyAttr_ABBR,              /**< ABBR= */
#  TidyAttr_ACCEPT,            /**< ACCEPT= */
#  TidyAttr_ACCEPT_CHARSET,    /**< ACCEPT_CHARSET= */
# ...
#   TidyAttr_URN,               /**< URN= */
#
#  N_TIDY_ATTRIBS              /**< Must be last */
#} TidyAttrId;

sub process_enums_tag_at($) {
    my ($rh) = @_;
    my $inf = ${$rh}{'ENUM_FILE'};
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($file,$dir) = fileparse($inf);
    my ($line,$inc,$lnn,$intable,$inatts);
    my (@arr,$acnt,$tag,$ver,$cnt,$i,$clnn);
    $lnn = 0;
    $intable = 0;
    $inatts = 0;
    my %tags = ();
    my %attrs = ();
    my $hdr1 = "In TAG enumeration... from file [$file]...";
    my $hdr2 = "In ATTRIB enumerations... from file [$file]...";
    foreach $line (@lines) {
        $lnn++;
        $clnn = sprintf("%4d",$lnn);
        $line = trim_all($line);
        #prt("$line\n");
        next if (length($line) == 0);
        if ($line =~ /\s*#\s*include\s+(.+)$/) {
            $inc = $1;
            prt("$clnn: $inc\n");
        } else {
            if ($inatts) {
                if ($line =~ /N_TIDY_ATTRIBS/) {
                    @arr = keys %attrs;
                    $acnt = scalar @arr;
                    prt("$clnn: End attrs enum... got $acnt ATTRIBUTES\n");
                    $inatts = 0;
                    next;
                }
                @arr = split(/,/,$line);
                $acnt = scalar @arr;
                if ($acnt > 1) {
                    $tag = trim_all($arr[0]);
                    $ver = trim_all($arr[1]);
                    $tag =~ s/^TidyAttr_//;
                    $tag = lc($tag);
                    $attrs{$tag} = $ver;

                    # for display
                    prt("$hdr2\n") if (length($hdr2));
                    $hdr2 = '';
                    set_min_tag(\$tag);
                    prt("$clnn: $tag $ver\n");
                }
            } elsif ($intable) {
                if ($line =~ /N_TIDY_TAGS/) {
                    @arr = keys %tags;
                    $acnt = scalar @arr;
                    prt("$clnn: End tag enum... got $acnt TAGS\n");
                    $intable = 0;
                    next;
                }
                @arr = split(",",$line);
                $acnt = scalar @arr;
                if ($acnt > 1) {
                    $tag = trim_all($arr[0]);
                    $ver = trim_all($arr[1]);
                    for ($i = 2; $i < $acnt; $i++) {
                        $ver .= " ".$arr[$i];
                    }
                    $tag =~ s/^TidyTag_//;
                    $tag = lc($tag);
                    $tags{$tag} = $ver;

                    # for display
                    prt("$hdr1\n") if (length($hdr1));
                    $hdr1 = '';
                    set_min_tag(\$tag);
                    prt( "$clnn: $tag $ver\n");
                }
            } elsif ($line =~ /TidyAttr_UNKNOWN/) {
                $inatts = 1;
                prt("$clnn: Enter attribs...\n");
            } elsif ($line =~ /TidyTag_UNKNOWN/) {
                $intable = 1;
                prt("$clnn: Enter table...\n");
            }
        }
    }
    ${$rh}{'ENUM_TAGS'} = \%tags;
    ${$rh}{'ENUM_ATTRS'} = \%attrs;
}

# tags.c

# list
# #define VERS_ELEM_A          (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50)
# #define VERS_ELEM_ABBR       (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50)
# #define VERS_ELEM_ACRONYM    (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50)
# ...
# #define VERS_ELEM_VAR        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10|HT50)
# #define VERS_ELEM_XMP        (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|HT50)

# find table
# static const Dict tag_defs[] =
# {
#  { TidyTag_UNKNOWN, "unknown!", VERS_UNKNOWN, NULL, (0), NULL, NULL },
#
#  /* W3C defined elements */
#  { TidyTag_A,    "a",    VERS_ELEM_A,    &TY_(W3CAttrsFor_A)[0],    (CM_INLINE), TY_(ParseInline), NULL },
#  { TidyTag_ABBR, "abbr", VERS_ELEM_ABBR, &TY_(W3CAttrsFor_ABBR)[0], (CM_INLINE), TY_(ParseInline), NULL },
# ...
#   /* this must be the final entry */
#  { (TidyTagId)0,  NULL,  0,              NULL,                       (0),        NULL,             NULL }
#};

sub process_tag_table($) {
    my ($rh) = @_;
    my $inf = ${$rh}{'TAG_FILE'};
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$inc,$lnn,$intable);
    my (@arr,$acnt,$tag,$ver,$clnn,$elem);
    $lnn = 0;
    $intable = 0;
    my %tags = ();
    my %dtags = ();
    my $hdr1 = "In ELEMENT defintions... from [$inf]";
    my $hdr2 = "In Dict tag table... from [$inf]";
    foreach $line (@lines) {
        $line = trim_all($line);
        $lnn++;
        $clnn = sprintf("%4d",$lnn);
        if ($line =~ /\s*#\s*include\s+(.+)$/) {
            $inc = $1;
            prt("$clnn:I: $inc\n");
        } else {
            if ($intable) {
                if ($line =~ /\s*\};/) {
                    @arr = keys %tags;
                    $acnt = scalar @arr;
                    prt("$clnn: Done Dict tag table... got $acnt entries...\n");
                    $intable = 0;
                    next;
                }
                $line =~ s/,$//;
                $line = trim_all($line);
                $line =~ s/\}$//;
                $line = trim_all($line);
                $line =~ s/^\{//;
                $line = trim_all($line);
                @arr = split(/,/,$line);
                $acnt = scalar @arr;
                if ($acnt > 4) {
                    $tag = trim_tag($arr[1]);
                    $ver = trim_tag($arr[2]);
                    if (($tag ne 'NULL')&&
                        ($tag ne 'unknown!')) {
                        $tags{$tag} = $ver;

                        # display
                        prt("$hdr2\n") if (length($hdr2));
                        $hdr2 = '';
                        set_min_tag(\$tag);
                        prt( "$clnn:T: $tag $ver\n");
                    }
                }
            } elsif ($line =~ /Dict\s+tag_defs\s*\[/) {
                $intable = 1;
                prt("$clnn: Enter Dict tag_defs table... from [$inf]\n");
            } elsif ( $line =~ /\s*\#\s*define\s+VERS_ELEM_(\w+)\s+\((.+)\)/ ) {
                $tag = lc($1);
                $ver = trim_all($2);
                $elem = 'VERS_ELEM'.uc($tag);
                $dtags{$tag} = $ver;

                # display
                prt("$hdr1\n") if (length($hdr1));
                $hdr1 = '';
                set_min_tag(\$tag);
                #prt( "$clnn:V: $tag $ver ($elem)\n");
                prt( "$clnn:V: $tag $ver\n");
            }

        }
    }
    ${$rh}{'TAG_TABLE'} = \%tags;
    ${$rh}{'VERS_DEFINES'} = \%dtags;
}

# attrdict.c
# const AttrVersion TY_(W3CAttrsFor_A)[] = 
#{
#  { TidyAttr_ACCESSKEY,      xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10 },
#  { TidyAttr_CHARSET,        xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10 },
# ...
#   { TidyAttr_UNKNOWN,        0                                                                },
#};
sub process_attr_dict($) {
    my ($rh) = @_;
    my $inf = ${$rh}{'ATTRDICT_FILE'};
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$lnn,$clnn,$indef,$tag);
    my (@arr,$acnt,$def,$cnt,$att,@arr2,$v);
    $lnn = 0;
    $indef = 0;
    $tag = '';
    my %tags = ();
    my %attrs = ();
    my %vers = ();
    $cnt = 0;
    foreach $line (@lines) {
        $lnn++;
        $clnn = sprintf("%4d",$lnn);
        $line = trim_all($line);
        next if (length($line) == 0);
        if ($indef) {
            if ($line =~ /TidyAttr_UNKNOWN/) {
                @arr = keys %attrs;
                $acnt = scalar @arr;
                # my %h = %attrs;
                $tags{$tag} = { %attrs };
                prt("$clnn: End definition for [$tag] - added $acnt...\n");
                $indef = 0;
                %attrs = ();
                next;
            }
            $line =~ s/,$//;
            $line = trim_all($line);
            $line =~ s/^\{//;
            $line = trim_all($line);
            $line =~ s/\}$//;
            $line = trim_all($line);
            @arr = split(",",$line);
            $acnt = scalar @arr;
            if ($acnt > 1) {
                $att = trim_all($arr[0]);
                $att =~ s/^TidyAttr_//;
                $att = lc($att);
                $def = trim_all($arr[1]);
                $attrs{$att} = $def;
                $cnt++;
                # this is spread over 13 columns
                @arr2 = split(/\|/,$def);
                foreach $v (@arr2) {
                    $v = trim_all($v);
                    if (defined $vers{$v}) {
                        $vers{$v}++;
                    } else {
                        $vers{$v} = 1;
                    }
                }

                # print
                set_min_tag(\$att);
                prt("$clnn: $att $def (".(scalar @arr2).")\n");

            }

        } elsif ($line =~ /AttrVersion\s+TY_\s*\(\s*W3CAttrsFor_(\w+)\s*\)/) {
            $tag = lc($1);
            $indef = 1;
            prt("$clnn: Begin definitions for [$tag]\n");
        }
    }
    @arr = keys %vers;
    $acnt = scalar @arr;
    prt("Got $acnt 'versions' used...\n");
    prt("Vers: ".join(" ",@arr)."\n");
    ${$rh}{'TAGS_ATTR_VERS'} = \%tags;
}

# 
# #define digit       1u
# ...
# #define CM_EMPTY        (1 << 0)
# ...
# /* unknown */
# #define xxxx                   0u
#
# /* W3C defined HTML/XHTML family document types */
# #define HT20                   1u
# #define HT32                   2u
# ...
# #define XB10                4096u
#
# /* proprietary stuff */
# #define VERS_SUN            8192u
# #define VERS_NETSCAPE      16384u
# ...

sub process_defines_file($) {
    my ($rh) = @_;
    my $inf = ${$rh}{'DEFINES_FILE'};
    if (! open INF, "<$inf") {
        pgm_exit(1,"ERROR: Unable to open file [$inf]\n"); 
    }
    my @lines = <INF>;
    close INF;
    my $lncnt = scalar @lines;
    prt("Processing $lncnt lines, from [$inf]...\n");
    my ($line,$lnn,$clnn,$tag);
    my (@arr,$acnt,$def,$pnt,$att,@arr2,$v);
    $lnn = 0;
    $tag = '';
    my %defs = ();
    foreach $line (@lines) {
        $lnn++;
        $clnn = sprintf("%4d",$lnn);
        $line = trim_all($line);
        next if (length($line) == 0);
        if ($line =~ /^\s*\#\s*define\s+(.+)$/) {
            $pnt = 0;
            if ($line =~ /^\s*\#\s*define\s+(\w+)\s+(.+)\s*$/) {
                $tag = $1;
                $def = $2;
                if ($def =~ /^(\d+)\w{1}$/) {
                    $def = $1;
                }
                $pnt = 1;
            } elsif ($line =~ /^\s*\#\s*define\s+(\w+)\s*$/) {
                $tag = $1;
                $def = 1;
                $pnt = 1;
            } else {
                prtw("$clnn: WARNING: define missed [$line]\n");
            }
            if ($pnt) {
                # print
                set_min_tag(\$tag);
                set_min_val(\$def);
                prt("$clnn: $tag $def\n");
            }
        }
    }
    ${$rh}{'DEFINES'} = \%defs;
    ###pgm_exit(1,"TEMP");
}


sub get_anon_hash() {
    my %h = ();
    return \%h;
}

sub process_files() {
    # get the tag and attribute enumerations
    my $rh = get_anon_hash();
    # SETUP
    ${$rh}{'ENUM_FILE'} = $in_file2;        # process_enums_tag_at
    ${$rh}{'TAG_FILE'} = $in_file;          # process_tag_table
    ${$rh}{'ATTR_FILE'} = $in_file3;        # process_attr_table
    ${$rh}{'ATTRDICT_FILE'} = $in_file4;    # process_attr_dict
    ${$rh}{'DEFINES_FILE'} = $in_file5;     # process_defines_file
    # PROCESSING
    process_defines_file($rh); # get ${$rh}{'DEFINES'} = \%defs; - Bit defintions
    process_enums_tag_at($rh); # get ${$rh}{'ENUM_TAGS'} and ${$rh}{'ENUM_ATTRS'}
    process_tag_table($rh);    # get ${$rh}{'TAG_TABLE'} and ${$rh}{'VERS_DEFINES'}
    process_attr_table($rh);   # get ${$rh}{'ATTR_TABLE'}
    process_attr_dict($rh);    # get attrs per tag per version ${$rh}{'TAGS_ATTR_VERS'}
    # ANALYSIS, and DISPLAY
    show_ref_hash($rh);
}

# Old: keygen rp rt ruby wbr
# New: article aside audio canvas command datalist details figcaption figure footer header hgroup mark
#      meter nav output progress section source summary time video
#########################################
### MAIN ###
#parse_args(@ARGV);
#prt( "$pgmname: in [$cwd]: Hello, World...\n" );
process_files();
pgm_exit(0,"Normal exit(0)");
########################################
sub give_help {
    prt("$pgmname: version 0.0.1 2010-09-11\n");
    prt("Usage: $pgmname [options] in-file\n");
    prt("Options:\n");
    prt(" --help (-h or -?) = This help, and exit 0.\n");
}
sub need_arg {
    my ($arg,@av) = @_;
    pgm_exit(1,"ERROR: [$arg] must have following argument!\n") if (!@av);
}

sub parse_args {
    my (@av) = @_;
    my ($arg,$sarg);
    while (@av) {
        $arg = $av[0];
        if ($arg =~ /^-/) {
            $sarg = substr($arg,1);
            $sarg = substr($sarg,1) while ($sarg =~ /^-/);
            if (($sarg =~ /^h/i)||($sarg eq '?')) {
                give_help();
                pgm_exit(0,"Help exit(0)");
            } else {
                pgm_exit(1,"ERROR: Invalid argument [$arg]! Try -?\n");
            }
        } else {
            $in_file = $arg;
            prt("Set input to [$in_file]\n");
        }
        shift @av;
    }

    if ((length($in_file) ==  0) && $debug_on) {
        $in_file = $def_file;
    }
    if (length($in_file) ==  0) {
        pgm_exit(1,"ERROR: No input files found in command!\n");
    }
    if (! -f $in_file) {
        pgm_exit(1,"ERROR: Unable to find in file [$in_file]! Check name, location...\n");
    }
}

sub global_attributes() {
    my $ga = <<EOF;
Global attributes 
id, title, lang and xml:lang, xml:base (XML only), dir, class, style,
Embedding custom non-visible data with the data-* attributes
EOF
    return $ga;
}

# eof - template.pl
