// tidynodes.c

// load a HTML file, and output nodes
// to test node position reporting
// 2009-07-28 - geoff mclane - http://geoffair.net/tidy

#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <time.h>   /* for ctime() */
#include <stdlib.h> /* for _splitpath() */
#include <io.h>     /* for _findfirst() _findnext()... */

#include "tidy.h"
#include "buffio.h"
/* #include "lexer.h" - could include this, but decided to define own states */

int __cdecl sprtf( char * lpf, ... );
Bool tidyNodeImplicit( TidyNode tnod );

#ifdef NDEBUG
#define USE_LOCAL_SPRTF
#endif

#define MX_CMP_TEXT     32  /* only compare up to first nn bytes */
#define M_MAX_DRIVE     3
#define M_MAX_DIR       256
#define M_MAX_FNAME     256
#define M_MAX_EXT       256
 
typedef struct tagSPLITPATH {
    char fullpath[264];
    char drive[M_MAX_DRIVE];
    char dir[M_MAX_DIR];
    char name[M_MAX_FNAME];
    char ext[M_MAX_EXT];
}SPLITPATH, *PSPLITPATH;

static int nodeindent = 1;
static int maxoutput = MX_CMP_TEXT; /* was 16 */

static FILE * fp;
static struct stat buf;
static char * filebuffer;
static char * fileend;
static int filesize;
static TidyDoc tdoc = NULL;
static int shownodecount = 1;
static int nodecount = 0;
static int okcount = 0;
static int foundcount = 0;
static int notfoundcount = 0;
static char * htmlfil = NULL;
static char * inputmask = NULL;
#define MX_TXT_STORED 8
static char filtext[MX_TXT_STORED+1];
static char lextext[MX_TXT_STORED+1];
static char timebuf[32];    /* or 26 */
static int addedfilename = 0;

static char _s_tmp_buf[1024];
static char _s_out_buf[1024];   /* buffer output */
static SPLITPATH _s_splitpath;

#define CHKMEM(a)   if(!a) { sprtf("ERROR: MEMORY FAILED! Aborting...\n"); exit(-1); }
#define EndBuf(a)   ( a + strlen(a) )

/* simple LIST */
typedef struct tagSLIST {
    void * link;
    char name[1];
}SLIST, * PSLIST;

static void * simple_list = NULL;
static void * warning_list = NULL;

static PSLIST add_to_list( PSLIST * ppsl, PSLIST psl )
{
    PSLIST pslnext = *ppsl;
    psl->link = NULL;   /* ensure last is NULL */
    if(pslnext) {
        while( pslnext->link )
            pslnext = pslnext->link;
        pslnext->link = psl;
    } else {
        *ppsl = psl;
    }
    return psl;
}

static PSLIST add_to_simple_list( char * p )
{
    int len = (int)strlen(p);
    PSLIST psl = (PSLIST)malloc(sizeof(SLIST) + len);
    CHKMEM(psl);
    psl->link = NULL;
    strcpy(psl->name,p);
    return add_to_list( (PSLIST *)&simple_list, psl );
}

static PSLIST slistnext = NULL;
PSLIST get_slist_first(void) { slistnext = (PSLIST)simple_list; return slistnext; }
PSLIST get_slist_next(void) { if(slistnext) slistnext = slistnext->link; return slistnext; }
static PSLIST wlistnext = NULL;
PSLIST get_wlist_first(void) { wlistnext = (PSLIST)warning_list; return wlistnext; }
PSLIST get_wlist_next(void) { if(wlistnext) wlistnext = wlistnext->link; return wlistnext; }

void kill_simple_list(void)
{
    PSLIST psl = get_slist_first();
    PSLIST del;
    while(psl) {
        del = psl;
        psl = get_slist_next();
        free(del);
    }
}
void kill_warning_list(void)
{
    PSLIST psl = get_wlist_first();
    PSLIST del;
    while(psl) {
        del = psl;
        psl = get_wlist_next();
        free(del);
    }
}

int get_simple_list_count(void)
{
    PSLIST psl = get_slist_first();
    int count = 0;
    while(psl) {
        count++;
        psl = get_slist_next();
    }
    return count;
}

void print_simple_list(void)
{
    PSLIST psl = get_slist_first();
    int count = get_simple_list_count();
    if(count)
        sprtf("Simple List: %d\n", count);
    while(psl) {
        sprtf("%s\n", psl->name);
        psl = get_slist_next();
    }
}

int get_warning_list_count(void)
{
    PSLIST psl = get_wlist_first();
    int count = 0;
    while(psl) {
        count++;
        psl = get_wlist_next();
    }
    return count;
}

void print_warning_list(void)
{
    int count = get_warning_list_count();
    if(count) {
        PSLIST psl = get_wlist_first();
        sprtf("Warning List: %d\n", count);
        while(psl) {
            sprtf("%s\n", psl->name);
            psl = get_wlist_next();
        }
    }
}


void kill_lists(void)
{
    kill_simple_list();
    kill_warning_list();
}

void print_lists(void)
{
    if(get_warning_list_count() || get_simple_list_count()) {
        printf("Summary:\n");
        print_warning_list();
        print_simple_list();
    }
}

void pgm_exit( TidyDoc tdoc, int val )
{
    if ( tdoc )
        tidyRelease( tdoc );
    print_lists();
    kill_lists();
    exit(val);
}

static char * get_tmp_buf(void) { return _s_tmp_buf; }

/* ---------------------------------------------------
   char * getFileBufferOffset( int olin, int ocol)
   Get the offset into the file buffer for this logical row/column
   or return NULL if it is OUT OF RANGE...
   Note, no need for speed in the 'test', so always start at the
   beginning...
   --------------------------------------------------- */
char * getFileBufferOffset( int olin, int ocol)
{
    char * cp = filebuffer;
    int off = 0;
    while(olin) {
        if( *cp == '\n' ) {
            olin--;
            if(olin == 0) {
                cp++;
                off++;
                break;
            }
        }
        cp++;
        off++;
    }
    if( (off + ocol) <= filesize )
        return cp + ocol;
    return NULL;
}

#define ISUPPERALPHA(a) (( a >= 'A' ) && ( a <= 'Z' ))
#define ISLOWERALPHA(a) (( a >= 'a' ) && ( a <= 'z' ))
#define ISNUMERIC(a)    (( a >= '0' ) && ( a <= '9' ))
#define ISALPHANUM(a) (ISUPPERALPHA(a) || ISLOWERALPHA(a) || ISNUMERIC(a))

/* buffer compare 'correction' types */
#define CT_SKIPPED_CR       0x00000001
#define CT_LF_TO_SPACE      0x00000002
#define CT_NBSP_C2A0        0x00000004
#define CT_SKIP_ENTITY      0x00000008
#define CT_SKIP_AMP         0x00000010
#define CT_HYPHEN_EQUAL     0x00000020
#define CT_SKIP_SPACE       0x00000040

static unsigned int correction_type;    /* accumulate 'correction' flags */
typedef struct tagCT2TEXT {
    unsigned int flag;
    char * text;
}CT2TEXT, * PCT2TEXT;

CT2TEXT sCT2Text[] = {
    { CT_SKIPPED_CR, "SKIP_CR" },
    { CT_LF_TO_SPACE, "LF_2_SPACE" },
    { CT_NBSP_C2A0, "NBSP_C2A0" },
    { CT_SKIP_ENTITY, "SKIP_ENTITY" },
    { CT_SKIP_AMP, "SKIP_AMP" },
    { CT_HYPHEN_EQUAL, "HYPHEN_EQUAL" },
    { CT_SKIP_SPACE, "SKIP_SPACE" },
    /* last */
    { 0, NULL }
};


char * get_correction_type_text(void)
{
    static char _s_ct2txt_buf[264];
    char * cp = _s_ct2txt_buf;
    PCT2TEXT pct2txt = &sCT2Text[0];
    unsigned int ui = correction_type;
    *cp = 0;
    while( pct2txt->text && ui ) {
        if( ui & pct2txt->flag ) {
            if(*cp)
                strcat(cp, " ");
            strcat(cp, pct2txt->text);
            ui &= ~(pct2txt->flag);
        }
        pct2txt++;  /* bump to next member */
    }
    return cp;
}

/* -----------------------------------------------------------
    int find_text_in_buf( char * ptmp, int * polin, int * pocol );

    Quite messy service to try to FIND the 'lexer' text,
    in the file buffer, on the same line...
    Tries to deal with entities untranslated in the file buffer,
    but 'translated' in the lexer buffer
    return 1 if found, setting the new suggested column offset,
    else returns 0, setting nothing...

    IT JUST EMPIRICALLY GREW AS I FOUND THINGS TO FIX, AND
    NO ATTEMPT MADE TO SMOOTH IT OUT - BUT IT WORKS
    (IN MOST CASES) ;=)
   ----------------------------------------------------------- */
int find_text_in_buf( char * plexer, int * polin, int * pocol )
{
    int len = (int)strlen(plexer);
    int olin = *polin;
    int ocol = *pocol;
    char * cp = filebuffer;
    int off = 0;
    int i, j1, j2, k, l, ncol;
    char    c1, c2, pc1, pc2, nc1, nc2;
    char *ptmp1, *ptmp2;    /* just for DEBUG */

    correction_type = 0;    /* no 'corrections' yet */
    if( len == 0 )
        return 0;   /* can NOT find nothing ;=)) */

    /* get to the SAME line */
    i = olin;
    while(i) {
        if( *cp == '\n' ) {
            i--;
            if(i == 0) {
                cp++;
                off++;
                break;
            }
        }
        cp++;
        off++;
    }

    /* now should be able to FIND 'ptmp' text in this line
       unless the line is also wrong! */
    ptmp1 = plexer;    /* lexer accumulated data */
    ptmp2 = cp + ocol;  /* lexer suggested location */
    c1 = *ptmp1; /* load FIRST char to find */
    c2 = *ptmp2;
    nc1 = ptmp1[1];
    nc2 = ptmp2[1];
    pc1 = 0;
    pc2 = 0;
    /* search this file data for the lexer text
       some problems:
       file data will contain 0x0d, 0x0a in windows.
       The CR should be skipped
       The LF may or may not be recorded as a ' ' in lexer data
       And file line spaces ' ', may have been stripped from lexer data
       A stange one - File is 0xa0 0xa0 0xa0...for(11)+ 0x20, and lexer is
       0xc2 0xa0 0xc2 0xa0 0xc2 0xa0 ...etc - len=16
       Tyr to handle ALL these cases ;=()
       ----------------------------------------------------------- */
    for ( i = 0; (i + off) < filesize; i++ ) {
        ptmp1 = plexer;    /* lexer accumulated data */
        ptmp2 = &cp[i];    /* original FILE data */
        c1 = *plexer; /* load FIRST char to find */
        c2 = cp[i];
        nc1 = ptmp1[1];
        nc2 = ptmp2[1];
        if ( c2 == '\r' ) {
            correction_type |= CT_SKIPPED_CR;
            continue;   /* ignore this extra file char */
        }
        if( c2 == '\n' ) {
            olin++; /* bump the line number */
            c2 = ' ';
            correction_type |= CT_LF_TO_SPACE;
        }
        /* try to handle file='&nbsp;', and lexer is '' */
        j1 = 1;
        j2 = 1;
        if ( (c2 == '&') && (c1 == '') ) {
            if( (filesize - (j2 + i + off)) > 6 ) { /* GT '&nbsp;' */
                if(( cp[i + j2 + 4] == ';') && (cp[i + j2 + 3] == 'p') && (cp[i + j2 + 2] == 's')) {
                    /* lexer will store 0xc2 0xa0 */
                    if(( (j1 + 1) <= len ) && ((plexer[1] & 0xFF) == 0xa0)) {
                        j1 += 1;    /* step over the LEXER 'translation' */
                        j2 += 5;    /* step over the file '&nbsp;' */
                        c2 = c1;    /* make them EQUAL!!! */
                        correction_type |= CT_NBSP_C2A0;
                    }
                }
            }
        }
        if ( c1 == c2 ) {
            /* got first char at least */
            ncol = i; /* keep logical column offset */
            /* for( j1 = 0, j2 = 0; (j2 + i + off) < filesize; j1++, j2++ ) { */
            /* J1 AND J2 VALUES HAVE NOW BEEN SET ABOVE */
            for( ; (j2 + i + off) < filesize; j1++, j2++ ) {
                if( j1 == len ) {
                    *pocol = ncol;  /* return NEW offset found */
                    *polin = olin;  /* and potentially a NEW line */
                    return 1;   /* successful match */
                }
                ptmp1 = &plexer[j1];    /* lexer accumulated data */
                ptmp2 = &cp[i + j2];    /* original FILE data */
                nc1 = ptmp1[1];
                nc2 = ptmp2[1];
                pc1 = c1;
                pc2 = c2;
                c1 = plexer[j1];
                c2 = cp[i + j2];
                if( c1 != c2 ) {
                    if( c2 == '&' ) {
                        /* this could be a Tidy 'translated' entity, so try harder */
                        for(k = j2 + 1; (k + i + off) < filesize; k++) {
                            c2 = cp[i + k];
                            if( (k > (j2 + 1)) && (c2 == ';') ) {
                                /* reached end of entity */
                                k++;
                                if( (k + i + off) < filesize ) {
                                    c2 = cp[i + k]; /* get char AFTER entity */
                                    for( l = j1 + 1; l < len; l++ ) {
                                        c1 = plexer[l];
                                        if( c1 == c2 ) {
                                            /* assumed success skip of entity */
                                            j1 = l;
                                            j2 = k;
                                            correction_type |= CT_SKIP_ENTITY;
                                            break;
                                        }
                                    }
                                }
                                break;
                            } else if( !( ISALPHANUM(c2) || (c2 = '#')) )
                                break;
                        }
                    } else if ((pc1 == '&') && (c2 == 'a')) {
                        /* handle thing like hrefs, which may have '&svc' in
                           source file data, but Tidy will have converted this
                           to '&amp;svc'... */
                        if( (filesize - (j2 + i + off)) > 4 ) {
                            if( cp[i + j2 + 3] == ';' ) {
                                c2 = cp[i + j2 + 4];
                                if( c1 == c2 ) {
                                    j2 += 4;    /* bump past '&amp;' */
                                    correction_type |= CT_SKIP_AMP;
                                }
                            }
                        }
                    } else if ((c1 == '=') && (c2 == '-') && (nc1 == '=') && (nc2 == '-')) {
                        /* special case in comments where a pair of hyphens is
                           reported as a warning by Tidy, and converts these '--'
                           to '==' - BUMP past these... */
                        j1++;
                        j2++;
                        correction_type |= CT_HYPHEN_EQUAL;
                        continue;
                    }

                    /* if file character is a SPACE, and there are more,
                       and the NEXT file character is the same as the lexer
                       then bump, and continue... */
                    if (( c1 != c2 ) && (c2 == ' ') && (filesize - (j2 + i + off) > 2)) {
                        if( cp[i + j2 + 1] == c1 ) {
                            j2++;
                            c2 = cp[i + j2];
                            correction_type |= CT_SKIP_SPACE;
                        }
                    }
                    if( c1 != c2 )
                        break;  /* FAILED */
                }
            }
        }
        pc1 = c1;
        pc2 = c2;
    }
    return 0;
}

int simple_text_compare( char * fil, char * lex, int len2 )
{
    int i;
    for(i = 0; i < len2; i++) {
        if( fil[i] != lex[i] ) {
            return 1;
        }
    }
    return 0;
}

int simple_text_compare_nc( char * fil, char * lex, int len2 )
{
    int i;
    char c1, c2;
    for(i = 0; i < len2; i++) {
        if( ISUPPERALPHA(fil[i]) )
            c1 = fil[i] + 'a' - 'A';
        else
            c1 = fil[i];
        if( ISUPPERALPHA(lex[i]) )
            c2 = lex[i] + 'a' - 'A';
        else
            c2 = lex[i];

        if( c1 != c2 ) {
            return 1;
        }
    }
    return 0;
}


int compare_the_text( char * fil, char * lex )
{
    int len1 = (int)strlen(fil);
    int len2 = (int)strlen(lex);
    if( !len2 )
        return 1;
    if( len1 < len2 )
        return -1;
    if( simple_text_compare( fil, lex, len2 ) ) {
        if( simple_text_compare_nc( fil, lex, len2 ) ) {
            /* could try HARDER, especially with 'entities' */
            return 1;
        }
    }
    return 0;
}


/* need port for unix */
int nocase_strncmp( char * off, char * ptmp, int i )
{
    int cp = _strnicmp( off, ptmp, i );
    return cp;
}

int nocase_strncmp_ignore_spacey( char * off, char * ptmp, int len )
{
    int len1 = (int)strlen(off);
    int len2 = (int)strlen(ptmp);
    int max = len2;
    int i1, i2;
    char c1, c2;
    if( len1 < len2 )
        return -1;
    if( max > len )
        max = len;
    i1 = 0;
    i2 = 0;
    while( (i1 < max) && (i2 < max) )
    {
        c1 = tolower(off[i1]);
        c2 = tolower(ptmp[i2]);
        if( c1 == c2 ) {
            i1++;
            i2++;
            continue;
        }

        while((i1 < max) && (c1 <= ' '))
        {
            i1++;
            c1 = tolower(off[i1]);
        }
        while((i2 < max) && (c2 <= ' '))
        {
            i2++;
            c2 = tolower(ptmp[i2]);
        }
        if(( i1 >= max ) && ( i2 >= max ))
            return 0;   /* ran out of both in space skipping */

        if( c1 == c2 ) {
            i1++;
            i2++;
            continue;
        }
        return (c1 - c2);
    }

    return 0;   /* got to 'max' compare */
}

void countNodes(  TidyNode tnod, int * pcount, int * pimplicit )
{
    TidyNode child;
    int i;
    for ( child = tidyGetChild(tnod); child; child = tidyGetNext(child) )
    {
        i = *pcount;
        i++;
        *pcount = i;
        if( tidyNodeImplicit(child) ) {
            i = *pimplicit;
            i++;
            *pimplicit = i;
        }
        countNodes( child, pcount, pimplicit );
    }
}

int storeNodeValue(char * ptmp, int i, TidyNode child)
{
    TidyBuffer value;
    tidyBufInit( &value );
    tidyNodeGetValue( tdoc, child, &value );
    while( !tidyBufEndOfInput(&value) ) {
        char c = tidyBufGetByte(&value);
        if( i < MX_CMP_TEXT ) { /* max buffer = 1024 */
            ptmp[i] = c;
            i++;
            ptmp[i] = 0;
        } else
            break;  /* we have enough to compare */
    }
    tidyBufFree( &value );
    return i;   /* return total (new) LENGTH */
}

/* -----------------------------------------------------
   lexer states
    switch (lexer->state)
    {
        case LEX_CONTENT:  /* element content *=/
        case LEX_GT:  /* < *=/
        case LEX_ENDTAG:  /* </letter *=/
        case LEX_STARTTAG: /* first letter of tagname *=/
        case LEX_COMMENT:  /* seen <!-- so look for --> *=/
        case LEX_DOCTYPE:  /* seen <!d so look for '>' munging whitespace *=/
        case LEX_PROCINSTR:  /* seen <? so look for '>' *=/
        case LEX_ASP:  /* seen <% so look for "%>" *=/
        case LEX_JSTE:  /* seen <# so look for "#>" *=/
        case LEX_PHP: /* seen "<?php" so look for "?>" *=/
        case LEX_XMLDECL: /* seen "<?xml" so look for "?>" *=/
        case LEX_SECTION: /* seen "<![" so look for "]>" *=/
        case LEX_CDATA: /* seen "<![CDATA[" so look for "]]>" *=/
    }
   ------------------------------------------------------ */
typedef enum
{
  MLEX_UNKNOWN, /* added to have an UNKNOWN state */
  MLEX_CONTENT,     /* text */
  MLEX_GT,          /* "<text />" (not exactly!) */
  MLEX_ENDTAG,      /* "</" */
  MLEX_STARTTAG,    /* "<" */
  MLEX_COMMENT,     /* '<!--' */
  MLEX_DOCTYPE,     /* '<!' */
  MLEX_PROCINSTR,   /* '<?' */
  MLEX_CDATA,       /* "<![CDATA[" */
  MLEX_SECTION,     /* "<![" */
  MLEX_ASP,         /* '<%' */
  MLEX_JSTE,        /* '<#' */
  MLEX_PHP,         /* "<?php" */
  MLEX_XMLDECL,     /* "<?xml" */
  MLEX_ROOT         /* added just for ROOT */
} MLexerState;

/* ======================================================================
   void dumpNode( TidyNode tnod, int indent )

   Dump each NODE in the parsing tree, outputting the row and column
   The FULL file text has been loaded into 'filebuffer', of 'filesize'
   and the 'row' and 'column' report is checked...
   ====================================================================== */
void dumpNode( TidyNode tnod, int indent )
{
    TidyNode child;
    int line,column;
    int olin,ocol;
    char * off;
    char cfirst, ctext, next;
    int i;
    char * ptmp = get_tmp_buf();    /* a static temporary 1024 buffer */
    char * pfiltxt = filtext;
    char * plextxt = lextext;
    char * pct;
    int state = MLEX_UNKNOWN;
    TidyBuffer output;

    for ( child = tidyGetChild(tnod); child; child = tidyGetNext(child) )
    {
        ctmbstr name;
        switch ( tidyNodeGetType(child) )
        {
        case TidyNode_Root:
            name = "Root";
            state = MLEX_ROOT;
            break;
        case TidyNode_DocType:
            name = "DOCTYPE";
            state = MLEX_DOCTYPE;   /* <!D */
            break;
        case TidyNode_Comment:
            name = "Comment";
            state = MLEX_COMMENT;     /* '<!--' */
            break;
        case TidyNode_ProcIns:
            name = "Processing Instruction";
            state = MLEX_PROCINSTR;   /* '<?' */
            break;
        case TidyNode_Text:
            name = "Text";
            state = MLEX_CONTENT;     /* text */
            break;
        case TidyNode_CDATA:
            name = "CDATA";
            state = MLEX_CDATA;       /* "<![CDATA[" */
            break;
        case TidyNode_Section:
            name = "XML Section";
            state = MLEX_SECTION;     /* "<![" */
            break;
        case TidyNode_Asp:
            name = "ASP";
            state = MLEX_ASP;         /* '<%' */
            break;
        case TidyNode_Jste:
            name = "JSTE";
            state = MLEX_JSTE;        /* '<#' */
            break;
        case TidyNode_Php:
            name = "PHP";
            state = MLEX_PHP;         /* "<?php" */
            break;
        case TidyNode_XmlDecl:
            name = "XML Declaration";
            state = MLEX_XMLDECL;     /* "<?xml" */
            break;
        case TidyNode_Start:
            name = tidyNodeGetName( child );
            state = MLEX_STARTTAG;    /* "<" */
            break;
        case TidyNode_End:
            name = tidyNodeGetName( child );
            state = MLEX_ENDTAG;      /* "</" */
            break;
        case TidyNode_StartEnd:
            name = tidyNodeGetName( child );
            state = MLEX_GT;          /* "<text />" (not exactly!) */
            break;
        default:
            name = tidyNodeGetName( child );
            break;
        }
        assert( name != NULL );
        line   = tidyNodeLine(child);
        column = tidyNodeColumn(child);
        nodecount++;
        if( shownodecount ) {
            sprtf( "%3d: ", nodecount );
        }
        /* reduce to logical offsets */
        olin = line ? line - 1 : 0;
        ocol = column ? column - 1 : 0;
        off  = getFileBufferOffset(olin, ocol);
        if( tidyNodeImplicit(child) ) {
            /* IGNORE line/column values for IMPLICIT (added) nodes... but show them */
            sprtf( "implicit: %*.*sNode: %s (%d,%d)\n", indent, indent, " ", name, line, column );
        } else {
            sprtf( "%3d:%3d: %*.*sNode: %s [", line, column, indent, indent, " ", name );
            if(off) {
                for(i = 0; i < maxoutput; i++) {
                    if( &off[i] < fileend ) {
                        char c = off[i];
                        if(i == 0)
                            cfirst = c;
                        if(c >= ' ') {
                            if( i && ( c == '<' )) /* break if open tag, if not first */
                                break;
                            sprtf("%c", c ); /* show char */
                            if( c == '>' ) /* break on a close tag */
                                break;
                        } else {
                            sprtf(".");
                        }
                        /* keep a little store */
                        if(i < MX_TXT_STORED) {
                            filtext[i] = c;
                            filtext[i+1] = 0;   /* zero terminate */
                        }
                    } else {
                        sprtf("WARNING: appears out of range!" );
                        break;
                    }
                }
            } else {
                sprtf("WARNING: appears out of range!" );
            }
            sprtf("]");
            if( strcmp(name, "Text") == 0 ) {
                sprtf("T[");
                tidyBufInit( &output );
                tidyNodeGetValue( tdoc, child, &output );
                i = 0;
                while( !tidyBufEndOfInput(&output) ) {
                    next = tidyBufGetByte(&output);
                    sprtf("%c", next );
                    if( i == 0 )
                        ctext = next;
                    if( i < MX_CMP_TEXT ) {  /* buffer is 1024 */
                        ptmp[i] = next;
                        ptmp[i+1] = 0;
                        if( i < MX_TXT_STORED ) {
                            lextext[i] = next;
                            lextext[i+1] = 0;
                        }
                        i++;
                    } else
                        break;
                }
                tidyBufFree( &output );
                sprtf("]");
                if( compare_the_text( filtext, lextext ) == 0 ) {
                    sprtf("ok");
                    okcount++;
                } else if( cfirst == ctext ) {
                    sprtf("ok1");
                    okcount++;
                } else if (((cfirst & 0xFF) == 0xa0 ) && ((ctext & 0xFF) == 0xc2)) {
                    /* special case - file is 0xa0, which lexer stores as 0xc2 0xa0!!! */
                    sprtf("okA0=C2A0");
                    okcount++;
                } else {
                    sprtf("Different?");
                    if ( find_text_in_buf( ptmp, &olin, &ocol ) ) {
                        pct = get_correction_type_text();
                        sprtf(" But\n AT: %3d,%3d found match to lexer text", olin + 1, ocol + 1);
                        if( *pct )
                            sprtf( " (CT=%s)", pct );
                        foundcount++;
                    } else {
                        notfoundcount++;
                    }
                }
            } else {
                /* NOT text node */
                if(off) {
                    i = sprintf(ptmp, "%s", name);
                    switch(state)
                    {
                    case MLEX_UNKNOWN:  /* added to have an UNKNOWN state */
                        /* i = sprintf(ptmp, "<%s", name); */
                        break;
                    case MLEX_CONTENT:  /* text */
                        /* i = sprintf(ptmp, "<%s", name); */
                        break;
                    case MLEX_GT:       /* "<text />" (not exactly!) */
                        i = sprintf(ptmp, "<%s", name);
                        break;
                    case MLEX_ENDTAG:   /* "</" */
                        i = sprintf(ptmp, "</%s", name);
                        break;
                    case MLEX_STARTTAG: /* "<" */
                        i = sprintf(ptmp, "<%s", name);
                        break;
                    case MLEX_COMMENT:  /* '<!--' */
                        i = sprintf(ptmp, "<!--");
                        i = storeNodeValue(ptmp, i, child);
                        break;
                    case MLEX_DOCTYPE:  /* '<!' - here name is 'DOCTYPE', so */
                        i = sprintf(ptmp, "<!%s", name);
                        break;
                    case MLEX_PROCINSTR: /* '<?' */
                        i = sprintf(ptmp, "<?");
                        i = storeNodeValue(ptmp, i, child);
                        break;
                    case MLEX_CDATA:     /* "<![CDATA[" */
                        i = sprintf(ptmp, "<![CDATA[");
                        i = storeNodeValue(ptmp, i, child);
                        break;
                    case MLEX_SECTION:   /* "<![" */
                        i = sprintf(ptmp, "<![");
                        i = storeNodeValue(ptmp, i, child);
                        break;
                    case MLEX_ASP:       /* '<%' */
                        i = sprintf(ptmp, "<%%");
                        i = storeNodeValue(ptmp, i, child);
                        break;
                    case MLEX_JSTE:      /* '<#' */
                        i = sprintf(ptmp, "<#");
                        i = storeNodeValue(ptmp, i, child);
                        break;
                    case MLEX_PHP:       /* "<?php" */
                        i = sprintf(ptmp, "<?");
                        i = storeNodeValue(ptmp, i, child);
                        break;
                    case MLEX_XMLDECL:   /* "<?xml" */
                        i = sprintf(ptmp, "<?xml %s", name);
                        break;
                    case MLEX_ROOT:      /* added just for ROOT */
                        /* i = sprintf(ptmp, "<%s", name); */
                        break;
                    default:
                        /* i = sprintf(ptmp, "<%s", name); */
                        break;
                    }

                    /* if( _strnicmp( off, ptmp, i ) ) { */
                    if( nocase_strncmp( off, ptmp, i ) ) {
                        sprtf("Different?");
                        if ( find_text_in_buf( ptmp, &olin, &ocol ) ) {
                            pct = get_correction_type_text();
                            sprtf(" But\n AT: %3d,%3d found match to lexer text", olin + 1, ocol + 1);
                            if( *pct )
                                sprtf( " (CT=%s)", pct );
                            foundcount++;
                        } else {
                            if( nocase_strncmp_ignore_spacey( off, ptmp, i ) == 0 ) {
                                sprtf( " But diff only spacey chars!" );
                                okcount++;
                            } else {
                                notfoundcount++;
                            }
                        }
                    } else {
                        sprtf("ok");
                        okcount++;
                    }
                }
            }
            sprtf("\n");
        }
        dumpNode( child, indent + nodeindent );
    }
}

char * file_name_only(char * pfile)
{
    char * fn = pfile;
    int len = (int)strlen(fn);
    int i;
    int off = 0;
    for(i = 0; i < len; i++) {
        if((fn[i] == '\\') || (fn[i] == '/'))
            off = i + 1;
    }
    return fn + off;
}


void dumpDoc( TidyDoc tdoc )
{
    static char _s_results_buf[512];
    char * pres = _s_results_buf;
    int count = 0;
    int implicit = 0;

    nodecount = 0;
    okcount = 0;
    foundcount = 0;
    notfoundcount = 0;
    addedfilename = 0;
    countNodes( tidyGetRoot(tdoc), &count, &implicit );
    sprtf("\nInput File: [%s],\n      Size: [%d] bytes, Time: [%s].\n", htmlfil, buf.st_size, timebuf);
    sprtf("Dump from ROOT... %d nodes... %d implicit\n", count, implicit);
    dumpNode( tidyGetRoot(tdoc), 0 );
    sprtf("End dump of %d non-implicit nodes... ", count - implicit);
    strcpy(pres, file_name_only(htmlfil));
    strcat(pres, " ");
    if( (count - implicit) == okcount ) {
        sprtf("ALL appear ok.");
        strcat(pres,"ALL appear ok.");
    } else if( foundcount ) {
        sprtf("%d ok, %d found, ", okcount, foundcount);
        sprintf(EndBuf(pres), "%d ok, %d found, ", okcount, foundcount);
        if( (count - implicit) == (okcount + foundcount) ) {
            sprtf(" none missed.");
            strcat(pres, " none missed.");
        } else {
            sprtf(" %d missed.", ((count - implicit) - (okcount + foundcount)) );
            sprintf(EndBuf(pres), " %d missed.", ((count - implicit) - (okcount + foundcount)) );
        }
    } else {
        sprtf("%d appear ok, but %d others missed?", okcount, ((count - implicit) - (okcount + foundcount)));
        sprintf(EndBuf(pres),"%d appear ok, but %d others missed?", okcount, ((count - implicit) - (okcount + foundcount)));
    }
    add_to_simple_list(pres);
    sprtf("\n");

}

#ifdef USE_LOCAL_SPRTF
/* sprtf.c 
 * just a LOG output
 */

//#include "sprtf.h"

#define  MXIO     256
#define  VFP(a)   ( a && ( a != (FILE *)-1 ))

char def_log[] = "tempnode.txt";
char logfile[256] = "\0";
FILE * outfile = NULL;
int log_closed = 0;
int add_std_out = 1;
int no_buffering = 1;
int last_was_crlf = 0;

int open_log_file( void )
{
   if (logfile[0] == 0)
      strcpy(logfile,def_log);
   outfile = fopen(logfile, "wb");
   if( outfile == 0 ) {
      outfile = (FILE *)-1;
      sprtf("WARNING: Failed to open log file [%s] ...\n", logfile);
      return 0;   /* failed */
   }
   return 1; /* success */
}

static void oi( char * ps )
{
   int len = (int)strlen(ps);
   int w;
   last_was_crlf = 0;
   if(len) {
       w = ps[len - 1];
       if( w == '\n' )
           last_was_crlf = 1;

      if( outfile == 0 )
         open_log_file();
      if( VFP(outfile) ) {
         w = (int)fwrite( ps, 1, len, outfile );
         if( w != len ) {
            fclose(outfile);
            outfile = (FILE *)-1;
            sprtf("WARNING: Failed write to log file [%s] ...\n", logfile);
         } else {
            fflush(outfile);  /* ensure WRITTEN to file */
         }
      }
      if( add_std_out )
         fwrite( ps, 1, len, stdout );
   }
}

typedef void (*PRTSERV)(char *);

static void	prt( char * ps )
{
   static char buf[MXIO + 4];
	char * pb = buf;
	size_t i, j, k;
	char   c, d;
   i = strlen(ps);
	if(i) {
		k = 0;
		d = 0;
		for( j = 0; j < i; j++ )
		{
			c = ps[j];
			if( c == 0x0d ) {
				if( (j+1) < i ) {
					if( ps[j+1] != 0x0a ) {
						pb[k++] = c;
						c = 0x0a;
					}
            } else {
					pb[k++] = c;
					c = 0x0a;
				}
			} else if( c == 0x0a ) {
				if( d != 0x0d ) {
					pb[k++] = 0x0d;
				}
			}
			pb[k++] = c;
			d = c;
			if( k >= MXIO ) {
				pb[k] = 0;
				oi( pb );
				k = 0;
			}
		}	/* for length of string */
		if( k ) {
         /* =================
			//if( ( gbCheckCrLf ) &&
			//	( d != 0x0a ) ) {
				// add Cr/Lf pair
				//pb[k++] = 0x0d;
				//pb[k++] = 0x0a;
				//pb[k] = 0;
			//}
           ================== */
			pb[k] = 0;
			oi( pb );
		}
	}
}

static size_t last_len = 0;
static char _s_interbuf[1024];

PRTSERV prtsrv = prt;
void set_nocrlf_srvr(void)
{
   prtsrv = oi;
}
void out_diag_buffer( void )
{
   char * lpb = _s_interbuf;
   if( last_len )
      prtsrv(lpb);
/*      prt(lpb); */
   last_len = 0;
}

void inter_prt( char * buf )
{
   char * lpb = _s_interbuf;
   int isdbg = strcmp(buf,"<debug stop>");
   lpb[last_len] = 0;
   strcat(lpb, buf);
   last_len = strlen(lpb);
   if( no_buffering ||
      ( isdbg == 0 ) ||
      ( last_len > (MXIO - 16) ) ) {
      out_diag_buffer();
      if( isdbg == 0 )
         last_len = 0;
      last_len = 0;
   }
}
void close_log_file( void )
{
   out_diag_buffer();
   last_len = 0;
   if( VFP(outfile) ) {
      fclose(outfile);
   }
   outfile = 0;
}

int __cdecl sprtf( char * lpf, ... )
{
   static char _s_sprtfbuf[1024];
   char * lpb = &_s_sprtfbuf[0];
   int   i = 0;
   va_list arglist;
   if( !log_closed ) {
      va_start(arglist, lpf);
      i = vsprintf( lpb, lpf, arglist );
      va_end(arglist);
      inter_prt(lpb);
   }
   return i;
}

/* eof - sprtf.c */
#endif /* #ifdef USE_LOCAL_SPRTF */

int process_a_file( char * pfil )
{
    char * ptm;
    int len;
    int status = 1;
    htmlfil = pfil;
    if( stat(pfil,&buf) ) {
        sprtf("ERROR: Unable to 'stat' file [%s]!\n", pfil);
        pgm_exit( tdoc, 3 );
        return 3;
    }
    ptm = ctime(&buf.st_mtime);
    if(ptm) {
        strcpy(timebuf,ptm);
        len = (int)strlen(timebuf);
        while(len--) {
            if(timebuf[len] > ' ')
                break;
            timebuf[len] = 0;
        }
    } else {
        strcpy(timebuf,"Not valid!");
    }
    fp = fopen(pfil,"rb");
    if( !fp ) {
        sprtf("ERROR: Unable to 'open' file [%s]!\n", pfil);
        pgm_exit( tdoc, 3 );
        return 3;
    }
    filebuffer = (char *)malloc(buf.st_size + 1);
    if( !filebuffer ) {
        fclose(fp);
        sprtf("ERROR: Memory FAILED on %d bytes!\n", buf.st_size );
        pgm_exit( tdoc, 3 );
        return 3;
    }
    filesize = (int)fread( filebuffer, 1, buf.st_size, fp );
    fclose(fp);
    if(filesize != buf.st_size) {
        sprtf("ERROR: READ FAILED on file [%s]! %d vs %d!!\n", pfil, filesize, buf.st_size);
        pgm_exit( tdoc, 3 );
        return 3;

    }
    fileend = filebuffer + filesize;

    if ( tidyOptGetBool(tdoc, TidyEmacs) )
        tidyOptSetValue( tdoc, TidyEmacsFile, pfil );
    status = tidyParseFile( tdoc, pfil );

    dumpDoc( tdoc );

    free(filebuffer);

    return status;
}

int process_input_file( char * pfil )
{
#ifdef _MSC_VER
    PSPLITPATH psp = &_s_splitpath;
    int results = 0;
    strcpy(psp->fullpath, pfil);
    _splitpath( pfil, psp->drive, psp->dir, psp->name, psp->ext );
    if( stat(pfil,&buf) == 0 ) {
        return process_a_file(pfil);
    } else {
        struct _finddata_t fd;
        intptr_t find = _findfirst(pfil, &fd);
        if(find != -1) {
            inputmask = htmlfil;
            sprtf("Input Mask: [%s]... finding...\n", inputmask);
            do {
                if( fd.attrib & _A_SUBDIR ) {
                    /* ignore these */
                } else {
                    strcpy(psp->fullpath, psp->dir);
                    strcat(psp->fullpath, fd.name);
                    results += process_a_file(psp->fullpath);
                }
            } while ( _findnext(find, &fd) == 0 );
            _findclose(find);
            return results;
        } else {
            return process_a_file(pfil);
        }
    }
#else
    return process_a_file(pfil);
#endif
}

int main( int argc, char **argv )
{
    char * cfgfil;
    int status = 0;
    tdoc = tidyCreate();
    sprtf( "Running: [%s]\nCompiled on %s, at %s\n", argv[0], __DATE__, __TIME__ );
    if( argc < 2 ) {
        sprtf("ERROR: Must give the name of the HTML file to test...\n");
        pgm_exit(tdoc,3);
        return 3;
    }
#ifdef TIDY_CONFIG_FILE
    if ( tidyFileExists( tdoc, TIDY_CONFIG_FILE) )
    {
        status = tidyLoadConfig( tdoc, TIDY_CONFIG_FILE );
        if ( status != 0 ) {
           sprtf("ERROR: Loading config file [%s] failed, err = %d\n", TIDY_CONFIG_FILE, status);
           pgm_exit( tdoc, 3 );
           return 3;
        }
        sprtf("Loaded config file [%s]\n", TIDY_CONFIG_FILE);
    }
#endif /* TIDY_CONFIG_FILE */
    /* look for env var "HTML_TIDY" */
    /* then for ~/.tidyrc (on platforms defining $HOME) */

    if ( (cfgfil = getenv("HTML_TIDY")) != NULL )
    {
        status = tidyLoadConfig( tdoc, cfgfil );
        if ( status != 0 ) {
            sprtf( "ERROR: Loading config file [%s] failed, err = %d\n", cfgfil, status);
            pgm_exit( tdoc, 3 );
            return 3;
        }
    }
#ifdef TIDY_USER_CONFIG_FILE
    else if ( tidyFileExists( tdoc, TIDY_USER_CONFIG_FILE) )
    {
        status = tidyLoadConfig( tdoc, TIDY_USER_CONFIG_FILE );
        if ( status != 0 ) {
            sprtf("ERROR: Loading config file [%s] failed, err = %d\n", TIDY_USER_CONFIG_FILE, status);
            pgm_exit( tdoc, 3 );
            return 3;
        }
    }
#endif /* TIDY_USER_CONFIG_FILE */

    htmlfil = argv[1];

    status = process_input_file( htmlfil );

    pgm_exit(tdoc,status);
    return status;
}


// eof - tidynodes.c
