
Patches between my TidyDev, and CVS updated Tidy
Current Diff
================================================================
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\clean.c tidydev\src\clean.c
--- C:\FGCVS\tidy\src\clean.c	Mon Aug 13 18:27:26 2007
+++ tidydev\src\clean.c	Thu Aug 16 13:31:15 2007
@@ -1768,7 +1768,9 @@
             attrIsLANG(attr)  ||
              ( (attrIsHEIGHT(attr) || attrIsWIDTH(attr)) &&
                (nodeIsTD(node) || nodeIsTR(node) || nodeIsTH(node)) ) ||
-             (attr->attribute && TY_(tmbstrncmp)(attr->attribute, "x:", 2) == 0) )
+             (attr->attribute && (
+             ( TY_(tmbstrncmp)(attr->attribute, "x:", 2) == 0 ) ||
+             ( TY_(tmbstrncmp)(attr->attribute, "v:", 2) == 0 ) ))) 
         {
             if (prev)
                 prev->next = next;
@@ -1992,7 +1994,9 @@
         {
             AttVal *attr = TY_(AttrGetById)(node, TidyAttr_REL);
 
-            if (AttrValueIs(attr, "File-List"))
+            /* some <link rel="???" ...> to discard */
+            if ( AttrValueIs(attr, "File-List") ||
+                 AttrValueIs(attr, "Edit-Time-Data") )
             {
                 node = TY_(DiscardElement)( doc, node );
                 continue;
@@ -2086,7 +2090,7 @@
         if (!node)
             return;
 
-        /* strip out style and class attributes */
+        /* strip out style, class, ... and "x:", "v:" attributes */
         if (TY_(nodeIsElement)(node))
             PurgeWord2000Attributes( doc, node );
 
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\clean.h tidydev\src\clean.h
--- C:\FGCVS\tidy\src\clean.h	Tue Sep 12 17:14:44 2006
+++ tidydev\src\clean.h	Sat Aug 18 19:55:25 2007
@@ -58,7 +58,7 @@
  as inline.
 */
 void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node);
-
+void TY_(CleanWord2003)( TidyDocImpl* doc, Node *node); /* new 20070523 */
 Bool TY_(IsWord2000)( TidyDocImpl* doc );
 
 /* where appropriate move object elements from head to body */
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\lexer.c tidydev\src\lexer.c
--- C:\FGCVS\tidy\src\lexer.c	Sat Mar 29 13:59:10 2008
+++ tidydev\src\lexer.c	Tue Apr 15 14:00:20 2008
@@ -1800,6 +1800,9 @@
     CDATA_ENDTAG
 } CDATAState;
 
+/* forward reference */
+Bool TY_(LexerInJavaComment)( TidyDocImpl* doc );
+
 static Node *GetCDATA( TidyDocImpl* doc, Node *container )
 {
     Lexer* lexer = doc->lexer;
@@ -1845,8 +1848,13 @@
                     return NULL;
                 }
                 TY_(AddCharToLexer)(lexer, c);
-                start = lexer->lexsize - 1;
-                state = CDATA_STARTTAG;
+                start = lexer->lexsize - 1;  /* set start of lexer to -> '<' char */
+                /* if javascript, only if NOT in a comment */
+                if ( TY_(IsJavaScript)( container ) &&
+                     !TY_(LexerInJavaComment)( doc ) )
+                {
+                    state = CDATA_STARTTAG;
+                }
             }
             else if (c == '/')
             {
@@ -1859,8 +1867,30 @@
                     TY_(UngetChar)(c, doc->docIn);
                     continue;
                 }
+
                 TY_(UngetChar)(c, doc->docIn);
 
+                /* had < + / + letter */
+                if ( TY_(IsJavaScript)( container ) &&
+                     TY_(LexerInJavaComment)( doc ) )
+                {
+                   start = lexer->lexsize - 1;
+                   SetLexerLocus( doc, lexer );
+                   /* if javascript insert backslash before /
+                      EXCEPT if XHTML */
+                   if ( !( TY_(HTMLVersion)(doc) & (X10S|X10T) ) )
+                   {
+                       lexer->columns -= 3;
+                       TY_(ReportError)(doc, NULL, NULL, BAD_CDATA_CONTENT_INSERTED);
+                       for (i = lexer->lexsize; i > start; --i)
+                           lexer->lexbuf[i] = lexer->lexbuf[i-1];
+
+                       lexer->lexbuf[start] = '\\';
+                       lexer->lexsize++;
+                   }
+                   continue;
+                }
+
                 start = lexer->lexsize;
                 state = CDATA_ENDTAG;
             }
@@ -1886,9 +1916,12 @@
                     continue;
                 }
                 TY_(UngetChar)(c, doc->docIn);
-
                 start = lexer->lexsize;
-                state = CDATA_ENDTAG;
+                if ( TY_(IsJavaScript)( container ) &&
+                     !TY_(LexerInJavaComment)( doc ) )
+                {
+                   state = CDATA_ENDTAG;
+                }
             }
             else
             {
@@ -1942,16 +1975,18 @@
                 /* if the end tag is not already escaped using backslash */
                 SetLexerLocus( doc, lexer );
                 lexer->columns -= 3;
-                TY_(ReportError)(doc, NULL, NULL, BAD_CDATA_CONTENT);
 
                 /* if javascript insert backslash before / */
                 if (TY_(IsJavaScript)(container))
                 {
+                    TY_(ReportError)(doc, NULL, NULL, BAD_CDATA_CONTENT_INSERTED);
                     for (i = lexer->lexsize; i > start-1; --i)
                         lexer->lexbuf[i] = lexer->lexbuf[i-1];
 
                     lexer->lexbuf[start-1] = '\\';
                     lexer->lexsize++;
+                } else {
+                    TY_(ReportError)(doc, NULL, NULL, BAD_CDATA_CONTENT);
                 }
             }
             state = CDATA_INTERMEDIATE;
@@ -2007,6 +2042,10 @@
 */
 static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode );
 
+#ifndef  NDEBUG
+#include "lexer-dbg.c"
+#else /* !#ifndef  NDEBUG */
+
 Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode )
 {
     Lexer* lexer = doc->lexer;
@@ -2050,6 +2089,9 @@
     return GetTokenFromStream( doc, mode );
 }
 
+#endif   /* #ifndef  NDEBUG y/n */
+
+
 static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
 {
     Lexer* lexer = doc->lexer;
@@ -3813,6 +3855,68 @@
     TY_(ReportError)(doc, NULL, NULL, MALFORMED_DOCTYPE);
     TY_(FreeNode)(doc, node);
     return NULL;
+}
+
+Bool TY_(LexerInJavaComment)( TidyDocImpl* doc )
+{
+    Lexer* lexer = doc->lexer;
+    uint i;
+    tmbchar c, last, inQuote;
+    Bool inComment1 = no;
+    Bool inComment2 = no;
+    if ( lexer && ( lexer->lexsize > lexer->txtstart ) )
+    {
+       last = 0;
+       inQuote = 0;
+       for( i = lexer->txtstart; i < lexer->txtend; i++ )
+       {
+          c = lexer->lexbuf[i];
+          if ( inQuote )
+          {
+             if ( ( ( c == inQuote ) && ( last != '\\' ) ) ||
+                  ( c == '\n' ) )
+                  inQuote = 0;
+          }
+          else if ( inComment1 )
+          {
+             /* have a '/*' start */
+             if (( c == '/' ) && ( last == '*' ))
+                inComment1 = no;
+          }
+          else if ( inComment2 )
+          {
+             /* have a '//' start */
+             if ( c == '\n' )
+                inComment2 = no;
+          }
+          else
+          {
+             /* not in any comment yet */
+             if ( c == '/' )
+             {
+                if ( last == '/' )
+                {
+                   inComment2 = yes; /* in a comment, until EOL */
+                }
+             }
+             else if ( c == '*' )
+             {
+                if ( last == '/' )
+                {
+                   inComment1 = yes;   /* in a comment, until '*'+'/' */
+                }
+             }
+             else if (( c == '"' ) || ( c == 0x27 ))
+             {
+                inQuote = c;
+             }
+          }
+          last = c;
+       }
+       if ( inQuote || inComment1 || inComment2 )
+          return yes;
+    }
+    return no;
 }
 
 /*
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\localize.c tidydev\src\localize.c
--- C:\FGCVS\tidy\src\localize.c	Wed Nov 14 21:15:06 2007
+++ tidydev\src\localize.c	Wed Jan 09 15:38:50 2008
@@ -144,6 +144,7 @@
   { MALFORMED_COMMENT,            "adjacent hyphens within comment"                                         }, /* Error */
   { BAD_COMMENT_CHARS,            "expecting -- or >"                                                       }, /* Error */
   { BAD_CDATA_CONTENT,            "'<' + '/' + letter not allowed here"                                     }, /* Error */
+  { BAD_CDATA_CONTENT_INSERTED,   "'<' + '/' + letter not allowed here - inserted escape"                   }, /* Error */
   { INCONSISTENT_NAMESPACE,       "HTML namespace doesn't match content"                                    }, /* Error */
   { SPACE_PRECEDING_XMLDECL,      "removing whitespace preceding XML Declaration"                           }, /* Error */
   { MALFORMED_DOCTYPE,            "discarding malformed <!DOCTYPE>"                                         }, /* Error */
@@ -1469,6 +1470,7 @@
     case BAD_COMMENT_CHARS:
     case BAD_XML_COMMENT:
     case BAD_CDATA_CONTENT:
+    case BAD_CDATA_CONTENT_INSERTED:
     case INCONSISTENT_NAMESPACE:
     case DOCTYPE_AFTER_TAGS:
     case DTYPE_NOT_UPPER_CASE:
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\mappedio.c tidydev\src\mappedio.c
--- C:\FGCVS\tidy\src\mappedio.c	Wed Mar 19 14:01:46 2008
+++ tidydev\src\mappedio.c	Tue Apr 15 14:05:40 2008
@@ -144,6 +144,9 @@
 {
     MappedFileSource *data = sourceData;
 
+#ifndef  NDEBUG
+    int  ch;
+#endif
     if ( !data->view || data->iter >= data->end )
     {
         data->pos += data->gran;
@@ -152,7 +155,13 @@
             return EndOfStream;
     }
 
+#ifdef   NDEBUG
     return *( data->iter++ );
+#else
+    ch = *( data->iter++ );
+    printf( "%c", ch );
+    return ch;
+#endif
 }
 
 static Bool TIDY_CALL mapped_eof( void *sourceData )
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\message.h tidydev\src\message.h
--- C:\FGCVS\tidy\src\message.h	Wed May 30 18:47:31 2007
+++ tidydev\src\message.h	Wed Aug 29 16:15:50 2007
@@ -103,6 +103,7 @@
 #define BAD_COMMENT_CHARS            30
 #define BAD_XML_COMMENT              31
 #define BAD_CDATA_CONTENT            32
+#define BAD_CDATA_CONTENT_INSERTED   88
 #define INCONSISTENT_NAMESPACE       33
 #define DOCTYPE_AFTER_TAGS           34
 #define MALFORMED_DOCTYPE            35
@@ -160,7 +161,7 @@
 #define MISSING_ATTRIBUTE            86
 #define WHITE_IN_URI                 87
 
-#define PREVIOUS_LOCATION            88 /* last */
+#define PREVIOUS_LOCATION            89 /* last */
 
 /* character encoding errors */
 
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\parser.c tidydev\src\parser.c
--- C:\FGCVS\tidy\src\parser.c	Sat Mar 29 13:59:11 2008
+++ tidydev\src\parser.c	Tue Apr 15 14:15:03 2008
@@ -1254,6 +1254,23 @@
             if ( nodeIsBR(node) )
                 TrimSpaces( doc, element );
 
+            if (node->implicit && !lexer->pushed)
+            {
+               /* 1747864 - check that we are NOT adding an implicit node,
+                  which is already next in the stream anyway ... */
+               Node * next = TY_(GetToken)(doc, mode);
+               if(next && (next->tag == node->tag))
+               {
+                   TY_(FreeNode)( doc, node );  /* toss this implict node */
+                   node = next;  /* use the node from the stream */
+               }
+               else
+               {
+                  /* not the same - put it back */
+                  TY_(UngetToken)( doc );
+               }
+            }
+
             TY_(InsertNodeAtEnd)(element, node);
             
             if (node->implicit)
@@ -1310,11 +1327,13 @@
 
      will get corrupted.
     */
-    if ((TY_(nodeHasCM)(element, CM_BLOCK) || nodeIsDT(element)) &&
-        !TY_(nodeHasCM)(element, CM_MIXED))
-        TY_(InlineDup)(doc, NULL);
-    else if (TY_(nodeHasCM)(element, CM_INLINE))
-        TY_(PushInline)(doc, element);
+    if ( !TY_(nodeHasCM)(element, CM_MIXED) )
+    {
+        if (TY_(nodeHasCM)(element, CM_BLOCK) || nodeIsDT(element))
+            TY_(InlineDup)(doc, NULL);
+        else if (TY_(nodeHasCM)(element, CM_INLINE))
+            TY_(PushInline)(doc, element);
+    }
 
     if ( nodeIsNOBR(element) )
         doc->badLayout |= USING_NOBR;
@@ -3207,18 +3226,20 @@
                 ++HasTitle;
 
                 if (HasTitle > 1)
-                    TY_(ReportError)(doc, head, node,
-                                     head ?
-                                     TOO_MANY_ELEMENTS_IN : TOO_MANY_ELEMENTS);
+                    if (head)
+                        TY_(ReportError)(doc, head, node, TOO_MANY_ELEMENTS_IN);
+                    else
+                        TY_(ReportError)(doc, head, node, TOO_MANY_ELEMENTS);
             }
             else if ( nodeIsBASE(node) )
             {
                 ++HasBase;
 
                 if (HasBase > 1)
-                    TY_(ReportError)(doc, head, node,
-                                     head ?
-                                     TOO_MANY_ELEMENTS_IN : TOO_MANY_ELEMENTS);
+                    if (head)
+                        TY_(ReportError)(doc, head, node, TOO_MANY_ELEMENTS_IN);
+                    else
+                        TY_(ReportError)(doc, head, node, TOO_MANY_ELEMENTS);
             }
             else if ( nodeIsNOSCRIPT(node) )
             {
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\pprint.c tidydev\src\pprint.c
--- C:\FGCVS\tidy\src\pprint.c	Sat Mar 29 13:59:11 2008
+++ tidydev\src\pprint.c	Tue Apr 15 14:11:59 2008
@@ -1733,7 +1733,10 @@
 static int TextStartsWithWhitespace( Lexer *lexer, Node *node, uint start, uint mode )
 {
     assert( node != NULL );
-    if ( (mode & (CDATA|COMMENT)) && TY_(nodeIsText)(node) && node->end > node->start && start >= node->start )
+    if ( (mode & (CDATA|COMMENT)) &&
+       TY_(nodeIsTextLike)(node) &&
+       node->end > node->start &&
+       start >= node->start )
     {
         uint ch, ix = start;
         /* Skip whitespace. */
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\tags.c tidydev\src\tags.c
--- C:\FGCVS\tidy\src\tags.c	Sat Mar 29 13:59:12 2008
+++ tidydev\src\tags.c	Tue Apr 15 13:46:46 2008
@@ -870,6 +870,22 @@
   return no;
 }
 
+/* new 20070818 - tags that normally contain text */
+Bool TY_(nodeIsTextLike)( Node * node )
+{
+   switch ( node->type )
+   {
+   case TextNode:	  /* yes for sure */
+   case CDATATag:   /* maybe??? */
+   case SectionTag: /* maybe??? */
+   case AspTag:     /* yes? */
+   case JsteTag:    /* yes? */
+   case PhpTag:     /* yes for sure */
+      return yes;
+   }
+   return no;
+}
+
 Bool TY_(nodeIsElement)( Node* node )
 {
   return ( node && 
@@ -944,6 +960,8 @@
     }
     return 0;
 }
+
+const Dict * get_tag_block( void ) { return &tag_defs[0]; }
 
 /*
  * local variables:
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\tags.h tidydev\src\tags.h
--- C:\FGCVS\tidy\src\tags.h	Fri Dec 15 11:17:55 2006
+++ tidydev\src\tags.h	Sun Sep 02 16:32:46 2007
@@ -128,6 +128,8 @@
 
 Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node );
 
+Bool TY_(nodeIsTextLike)( Node * node ); /* new 20070818 - tags that normally contain text */
+
 #if 0
 /* Compare & result to operand.  If equal, then all bits
 ** requested are set.
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\tidylib.c tidydev\src\tidylib.c
--- C:\FGCVS\tidy\src\tidylib.c	Sat Mar 29 13:59:12 2008
+++ tidydev\src\tidylib.c	Tue Apr 15 13:39:26 2008
@@ -1268,6 +1268,7 @@
 
         /* drop style & class attributes and empty p, span elements */
         TY_(CleanWord2000)( doc, &doc->root );
+        TY_(CleanWord2003)( doc, &doc->root );
         TY_(DropEmptyElements)(doc, &doc->root);
     }
 
================================================================




// =========================================
// reviewed files

diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\pprint.c tidydev\src\pprint.c
--- C:\FGCVS\tidy\src\pprint.c	Sat Mar 29 13:59:11 2008
+++ tidydev\src\pprint.c	Mon Sep 03 15:14:15 2007
@@ -7,8 +7,8 @@
   CVS Info :
 
     $Author: arnaud02 $ 
-    $Date: 2008/03/22 20:23:37 $ 
-    $Revision: 1.119 $ 
+    $Date: 2007/02/11 17:14:27 $ 
+    $Revision: 1.118 $ 
 
 */
 
@@ -257,7 +257,6 @@
     int i;
     for (i = 0; unicode4cat[i].code && unicode4cat[i].code <= c; ++i)
         if (unicode4cat[i].code == c)
-        {
             /* wrapping before opening punctuation and initial quotes */
             if (unicode4cat[i].category == UCPS ||
                 unicode4cat[i].category == UCPI)
@@ -265,7 +264,6 @@
             /* else wrapping after this character */
             else
                 return WrapAfter;
-        }
     /* character has no effect on line wrapping */
     return NoWrapPoint;
 }
@@ -1733,7 +1731,10 @@
 static int TextStartsWithWhitespace( Lexer *lexer, Node *node, uint start, uint mode )
 {
     assert( node != NULL );
-    if ( (mode & (CDATA|COMMENT)) && TY_(nodeIsText)(node) && node->end > node->start && start >= node->start )
+    if ( (mode & (CDATA|COMMENT)) &&
+       TY_(nodeIsTextLike)(node) &&
+       node->end > node->start &&
+       start >= node->start )
     {
         uint ch, ix = start;
         /* Skip whitespace. */
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\tags.c tidydev\src\tags.c
--- C:\FGCVS\tidy\src\tags.c	Sat Mar 29 13:59:12 2008
+++ tidydev\src\tags.c	Tue Apr 15 13:46:46 2008
@@ -870,6 +870,22 @@
   return no;
 }
 
+/* new 20070818 - tags that normally contain text */
+Bool TY_(nodeIsTextLike)( Node * node )
+{
+   switch ( node->type )
+   {
+   case TextNode:	  /* yes for sure */
+   case CDATATag:   /* maybe??? */
+   case SectionTag: /* maybe??? */
+   case AspTag:     /* yes? */
+   case JsteTag:    /* yes? */
+   case PhpTag:     /* yes for sure */
+      return yes;
+   }
+   return no;
+}
+
 Bool TY_(nodeIsElement)( Node* node )
 {
   return ( node && 
@@ -944,6 +960,8 @@
     }
     return 0;
 }
+
+const Dict * get_tag_block( void ) { return &tag_defs[0]; }
 
 /*
  * local variables:
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\tags.h tidydev\src\tags.h
--- C:\FGCVS\tidy\src\tags.h	Fri Dec 15 11:17:55 2006
+++ tidydev\src\tags.h	Sun Sep 02 16:32:46 2007
@@ -128,6 +128,8 @@
 
 Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node );
 
+Bool TY_(nodeIsTextLike)( Node * node ); /* new 20070818 - tags that normally contain text */
+
 #if 0
 /* Compare & result to operand.  If equal, then all bits
 ** requested are set.
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\tidylib.c tidydev\src\tidylib.c
--- C:\FGCVS\tidy\src\tidylib.c	Sat Mar 29 13:59:12 2008
+++ tidydev\src\tidylib.c	Tue Apr 15 13:39:26 2008
@@ -1268,6 +1268,7 @@
 
         /* drop style & class attributes and empty p, span elements */
         TY_(CleanWord2000)( doc, &doc->root );
+        TY_(CleanWord2003)( doc, &doc->root );
         TY_(DropEmptyElements)(doc, &doc->root);
     }
 
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\clean.c tidydev\src\clean.c
--- C:\FGCVS\tidy\src\clean.c	Mon Aug 13 18:27:26 2007
+++ tidydev\src\clean.c	Thu Aug 16 13:31:15 2007
@@ -1768,7 +1768,9 @@
             attrIsLANG(attr)  ||
              ( (attrIsHEIGHT(attr) || attrIsWIDTH(attr)) &&
                (nodeIsTD(node) || nodeIsTR(node) || nodeIsTH(node)) ) ||
-             (attr->attribute && TY_(tmbstrncmp)(attr->attribute, "x:", 2) == 0) )
+             (attr->attribute && (
+             ( TY_(tmbstrncmp)(attr->attribute, "x:", 2) == 0 ) ||
+             ( TY_(tmbstrncmp)(attr->attribute, "v:", 2) == 0 ) ))) 
         {
             if (prev)
                 prev->next = next;
@@ -1992,7 +1994,9 @@
         {
             AttVal *attr = TY_(AttrGetById)(node, TidyAttr_REL);
 
-            if (AttrValueIs(attr, "File-List"))
+            /* some <link rel="???" ...> to discard */
+            if ( AttrValueIs(attr, "File-List") ||
+                 AttrValueIs(attr, "Edit-Time-Data") )
             {
                 node = TY_(DiscardElement)( doc, node );
                 continue;
@@ -2086,7 +2090,7 @@
         if (!node)
             return;
 
-        /* strip out style and class attributes */
+        /* strip out style, class, ... and "x:", "v:" attributes */
         if (TY_(nodeIsElement)(node))
             PurgeWord2000Attributes( doc, node );
 
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\clean.h tidydev\src\clean.h
--- C:\FGCVS\tidy\src\clean.h	Tue Sep 12 17:14:44 2006
+++ tidydev\src\clean.h	Sat Aug 18 19:55:25 2007
@@ -58,7 +58,7 @@
  as inline.
 */
 void TY_(CleanWord2000)( TidyDocImpl* doc, Node *node);
-
+void TY_(CleanWord2003)( TidyDocImpl* doc, Node *node); /* new 20070523 */
 Bool TY_(IsWord2000)( TidyDocImpl* doc );
 
 /* where appropriate move object elements from head to body */

diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\lexer.c tidydev\src\lexer.c
--- C:\FGCVS\tidy\src\lexer.c	Sat Mar 29 13:59:10 2008
+++ tidydev\src\lexer.c	Sun Sep 02 16:25:23 2007
@@ -1,13 +1,13 @@
 /* lexer.c -- Lexer for html parser
   
-  (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
+  (c) 1998-2007 (W3C) MIT, ERCIM, Keio University
   See tidy.h for the copyright notice.
   
   CVS Info :
 
     $Author: arnaud02 $ 
-    $Date: 2008/03/22 21:06:55 $ 
-    $Revision: 1.194 $ 
+    $Date: 2007/05/13 18:13:34 $ 
+    $Revision: 1.193 $ 
 
 */
 
@@ -1628,9 +1628,6 @@
             return no;
         }
         break;
-    case TidyDoctypeOmit:
-        assert(0);
-        break;
     }
 
     return no;
@@ -1800,6 +1797,9 @@
     CDATA_ENDTAG
 } CDATAState;
 
+/* forward reference */
+Bool TY_(LexerInJavaComment)( TidyDocImpl* doc );
+
 static Node *GetCDATA( TidyDocImpl* doc, Node *container )
 {
     Lexer* lexer = doc->lexer;
@@ -1845,8 +1845,13 @@
                     return NULL;
                 }
                 TY_(AddCharToLexer)(lexer, c);
-                start = lexer->lexsize - 1;
-                state = CDATA_STARTTAG;
+                start = lexer->lexsize - 1;  /* set start of lexer to -> '<' char */
+                /* if javascript, only if NOT in a comment */
+                if ( TY_(IsJavaScript)( container ) &&
+                     !TY_(LexerInJavaComment)( doc ) )
+                {
+                    state = CDATA_STARTTAG;
+                }
             }
             else if (c == '/')
             {
@@ -1859,8 +1864,30 @@
                     TY_(UngetChar)(c, doc->docIn);
                     continue;
                 }
+
                 TY_(UngetChar)(c, doc->docIn);
 
+                /* had < + / + letter */
+                if ( TY_(IsJavaScript)( container ) &&
+                     TY_(LexerInJavaComment)( doc ) )
+                {
+                   start = lexer->lexsize - 1;
+                   SetLexerLocus( doc, lexer );
+                   /* if javascript insert backslash before /
+                      EXCEPT if XHTML */
+                   if ( !( TY_(HTMLVersion)(doc) & (X10S|X10T) ) )
+                   {
+                       lexer->columns -= 3;
+                       TY_(ReportError)(doc, NULL, NULL, BAD_CDATA_CONTENT_INSERTED);
+                       for (i = lexer->lexsize; i > start; --i)
+                           lexer->lexbuf[i] = lexer->lexbuf[i-1];
+
+                       lexer->lexbuf[start] = '\\';
+                       lexer->lexsize++;
+                   }
+                   continue;
+                }
+
                 start = lexer->lexsize;
                 state = CDATA_ENDTAG;
             }
@@ -1886,9 +1913,12 @@
                     continue;
                 }
                 TY_(UngetChar)(c, doc->docIn);
-
                 start = lexer->lexsize;
-                state = CDATA_ENDTAG;
+                if ( TY_(IsJavaScript)( container ) &&
+                     !TY_(LexerInJavaComment)( doc ) )
+                {
+                   state = CDATA_ENDTAG;
+                }
             }
             else
             {
@@ -1942,16 +1972,18 @@
                 /* if the end tag is not already escaped using backslash */
                 SetLexerLocus( doc, lexer );
                 lexer->columns -= 3;
-                TY_(ReportError)(doc, NULL, NULL, BAD_CDATA_CONTENT);
 
                 /* if javascript insert backslash before / */
                 if (TY_(IsJavaScript)(container))
                 {
+                    TY_(ReportError)(doc, NULL, NULL, BAD_CDATA_CONTENT_INSERTED);
                     for (i = lexer->lexsize; i > start-1; --i)
                         lexer->lexbuf[i] = lexer->lexbuf[i-1];
 
                     lexer->lexbuf[start-1] = '\\';
                     lexer->lexsize++;
+                } else {
+                    TY_(ReportError)(doc, NULL, NULL, BAD_CDATA_CONTENT);
                 }
             }
             state = CDATA_INTERMEDIATE;
@@ -2007,6 +2039,10 @@
 */
 static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode );
 
+#ifndef  NDEBUG
+#include "lexer-dbg.c"
+#else /* !#ifndef  NDEBUG */
+
 Node* TY_(GetToken)( TidyDocImpl* doc, GetTokenMode mode )
 {
     Lexer* lexer = doc->lexer;
@@ -2050,6 +2086,9 @@
     return GetTokenFromStream( doc, mode );
 }
 
+#endif   /* #ifndef  NDEBUG y/n */
+
+
 static Node* GetTokenFromStream( TidyDocImpl* doc, GetTokenMode mode )
 {
     Lexer* lexer = doc->lexer;
@@ -3813,6 +3852,68 @@
     TY_(ReportError)(doc, NULL, NULL, MALFORMED_DOCTYPE);
     TY_(FreeNode)(doc, node);
     return NULL;
+}
+
+Bool TY_(LexerInJavaComment)( TidyDocImpl* doc )
+{
+    Lexer* lexer = doc->lexer;
+    uint i;
+    tmbchar c, last, inQuote;
+    Bool inComment1 = no;
+    Bool inComment2 = no;
+    if ( lexer && ( lexer->lexsize > lexer->txtstart ) )
+    {
+       last = 0;
+       inQuote = 0;
+       for( i = lexer->txtstart; i < lexer->txtend; i++ )
+       {
+          c = lexer->lexbuf[i];
+          if ( inQuote )
+          {
+             if ( ( ( c == inQuote ) && ( last != '\\' ) ) ||
+                  ( c == '\n' ) )
+                  inQuote = 0;
+          }
+          else if ( inComment1 )
+          {
+             /* have a '/*' start */
+             if (( c == '/' ) && ( last == '*' ))
+                inComment1 = no;
+          }
+          else if ( inComment2 )
+          {
+             /* have a '//' start */
+             if ( c == '\n' )
+                inComment2 = no;
+          }
+          else
+          {
+             /* not in any comment yet */
+             if ( c == '/' )
+             {
+                if ( last == '/' )
+                {
+                   inComment2 = yes; /* in a comment, until EOL */
+                }
+             }
+             else if ( c == '*' )
+             {
+                if ( last == '/' )
+                {
+                   inComment1 = yes;   /* in a comment, until '*'+'/' */
+                }
+             }
+             else if (( c == '"' ) || ( c == 0x27 ))
+             {
+                inQuote = c;
+             }
+          }
+          last = c;
+       }
+       if ( inQuote || inComment1 || inComment2 )
+          return yes;
+    }
+    return no;
 }
 
 /*
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\localize.c tidydev\src\localize.c
--- C:\FGCVS\tidy\src\localize.c	Wed Nov 14 21:15:06 2007
+++ tidydev\src\localize.c	Wed Jan 09 15:38:50 2008
@@ -144,6 +144,7 @@
   { MALFORMED_COMMENT,            "adjacent hyphens within comment"                                         }, /* Error */
   { BAD_COMMENT_CHARS,            "expecting -- or >"                                                       }, /* Error */
   { BAD_CDATA_CONTENT,            "'<' + '/' + letter not allowed here"                                     }, /* Error */
+  { BAD_CDATA_CONTENT_INSERTED,   "'<' + '/' + letter not allowed here - inserted escape"                   }, /* Error */
   { INCONSISTENT_NAMESPACE,       "HTML namespace doesn't match content"                                    }, /* Error */
   { SPACE_PRECEDING_XMLDECL,      "removing whitespace preceding XML Declaration"                           }, /* Error */
   { MALFORMED_DOCTYPE,            "discarding malformed <!DOCTYPE>"                                         }, /* Error */
@@ -1469,6 +1470,7 @@
     case BAD_COMMENT_CHARS:
     case BAD_XML_COMMENT:
     case BAD_CDATA_CONTENT:
+    case BAD_CDATA_CONTENT_INSERTED:
     case INCONSISTENT_NAMESPACE:
     case DOCTYPE_AFTER_TAGS:
     case DTYPE_NOT_UPPER_CASE:
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\mappedio.c tidydev\src\mappedio.c
--- C:\FGCVS\tidy\src\mappedio.c	Wed Mar 19 14:01:46 2008
+++ tidydev\src\mappedio.c	Wed Aug 29 16:49:46 2007
@@ -1,11 +1,11 @@
 /* Interface to mmap style I/O
 
-   (c) 2006-2008 (W3C) MIT, ERCIM, Keio University
+   (c) 2006-2007 (W3C) MIT, ERCIM, Keio University
    See tidy.h for the copyright notice.
 
    Originally contributed by Cory Nelson and Nuno Lopes
 
-   $Id: mappedio.c,v 1.14 2008/03/18 20:19:35 arnaud02 Exp $
+   $Id: mappedio.c,v 1.13 2007/02/07 12:08:31 arnaud02 Exp $
 */
 
 /* keep these here to keep file non-empty */
@@ -60,10 +60,9 @@
         return -1;
 
     fd = fileno(fp);
-    if ( fstat(fd, &sbuf) == -1
-         || sbuf.st_size == 0
-         || (fin->base = mmap(0, fin->size = sbuf.st_size, PROT_READ,
-                              MAP_SHARED, fd, 0)) == MAP_FAILED)
+    if ( fstat(fd, &sbuf) == -1 ||
+         (fin->base = mmap(0, fin->size = sbuf.st_size, PROT_READ, MAP_SHARED,
+                           fd, 0)) == MAP_FAILED)
     {
         TidyFree( allocator, fin );
         /* Fallback on standard I/O */
@@ -143,7 +142,9 @@
 static int TIDY_CALL mapped_getByte( void *sourceData )
 {
     MappedFileSource *data = sourceData;
-
+#ifndef  NDEBUG
+    int  ch;
+#endif
     if ( !data->view || data->iter >= data->end )
     {
         data->pos += data->gran;
@@ -151,8 +152,13 @@
         if ( data->pos >= data->size || mapped_openView(data) != 0 )
             return EndOfStream;
     }
-
+#ifdef   NDEBUG
     return *( data->iter++ );
+#else
+    ch = *( data->iter++ );
+    printf( "%c", ch );
+    return ch;
+#endif
 }
 
 static Bool TIDY_CALL mapped_eof( void *sourceData )
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\message.h tidydev\src\message.h
--- C:\FGCVS\tidy\src\message.h	Wed May 30 18:47:31 2007
+++ tidydev\src\message.h	Wed Aug 29 16:15:50 2007
@@ -103,6 +103,7 @@
 #define BAD_COMMENT_CHARS            30
 #define BAD_XML_COMMENT              31
 #define BAD_CDATA_CONTENT            32
+#define BAD_CDATA_CONTENT_INSERTED   88
 #define INCONSISTENT_NAMESPACE       33
 #define DOCTYPE_AFTER_TAGS           34
 #define MALFORMED_DOCTYPE            35
@@ -160,7 +161,7 @@
 #define MISSING_ATTRIBUTE            86
 #define WHITE_IN_URI                 87
 
-#define PREVIOUS_LOCATION            88 /* last */
+#define PREVIOUS_LOCATION            89 /* last */
 
 /* character encoding errors */
 
diff -ur -x *.manifest -x *.map C:\FGCVS\tidy\src\parser.c tidydev\src\parser.c
--- C:\FGCVS\tidy\src\parser.c	Sat Mar 29 13:59:11 2008
+++ tidydev\src\parser.c	Thu Aug 16 13:43:02 2007
@@ -6,8 +6,8 @@
   CVS Info :
 
     $Author: arnaud02 $ 
-    $Date: 2008/03/22 20:02:42 $ 
-    $Revision: 1.187 $ 
+    $Date: 2007/08/15 17:43:02 $ 
+    $Revision: 1.186 $ 
 
 */
 
@@ -1254,6 +1254,23 @@
             if ( nodeIsBR(node) )
                 TrimSpaces( doc, element );
 
+            if (node->implicit && !lexer->pushed)
+            {
+               /* 1747864 - check that we are NOT adding an implicit node,
+                  which is already next in the stream anyway ... */
+               Node * next = TY_(GetToken)(doc, mode);
+               if(next && (next->tag == node->tag))
+               {
+                   TY_(FreeNode)( doc, node );  /* toss this implict node */
+                   node = next;  /* use the node from the stream */
+               }
+               else
+               {
+                  /* not the same - put it back */
+                  TY_(UngetToken)( doc );
+               }
+            }
+
             TY_(InsertNodeAtEnd)(element, node);
             
             if (node->implicit)
@@ -1310,11 +1327,13 @@
 
      will get corrupted.
     */
-    if ((TY_(nodeHasCM)(element, CM_BLOCK) || nodeIsDT(element)) &&
-        !TY_(nodeHasCM)(element, CM_MIXED))
-        TY_(InlineDup)(doc, NULL);
-    else if (TY_(nodeHasCM)(element, CM_INLINE))
-        TY_(PushInline)(doc, element);
+    if ( !TY_(nodeHasCM)(element, CM_MIXED) )
+    {
+        if (TY_(nodeHasCM)(element, CM_BLOCK) || nodeIsDT(element))
+            TY_(InlineDup)(doc, NULL);
+        else if (TY_(nodeHasCM)(element, CM_INLINE))
+            TY_(PushInline)(doc, element);
+    }
 
     if ( nodeIsNOBR(element) )
         doc->badLayout |= USING_NOBR;
@@ -3207,18 +3226,20 @@
                 ++HasTitle;
 
                 if (HasTitle > 1)
-                    TY_(ReportError)(doc, head, node,
-                                     head ?
-                                     TOO_MANY_ELEMENTS_IN : TOO_MANY_ELEMENTS);
+                    if (head)
+                        TY_(ReportError)(doc, head, node, TOO_MANY_ELEMENTS_IN);
+                    else
+                        TY_(ReportError)(doc, head, node, TOO_MANY_ELEMENTS);
             }
             else if ( nodeIsBASE(node) )
             {
                 ++HasBase;
 
                 if (HasBase > 1)
-                    TY_(ReportError)(doc, head, node,
-                                     head ?
-                                     TOO_MANY_ELEMENTS_IN : TOO_MANY_ELEMENTS);
+                    if (head)
+                        TY_(ReportError)(doc, head, node, TOO_MANY_ELEMENTS_IN);
+                    else
+                        TY_(ReportError)(doc, head, node, TOO_MANY_ELEMENTS);
             }
             else if ( nodeIsNOSCRIPT(node) )
             {
// =============================================================
