File jtidy-CVE-2023-34623.patch of Package jtidy.29894

diff -burN jtidy_orig/src/main/java/org/w3c/tidy/ExcessiveNesting.java jtidy/src/main/java/org/w3c/tidy/ExcessiveNesting.java
--- jtidy_orig/src/main/java/org/w3c/tidy/ExcessiveNesting.java	1970-01-01 01:00:00.000000000 +0100
+++ jtidy/src/main/java/org/w3c/tidy/ExcessiveNesting.java	2023-07-17 15:00:48.402407248 +0200
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2023 Business Operation Systems GmbH. All Rights Reserved.
+ */
+package org.w3c.tidy;
+
+/**
+ * Exception signaling a document with excessive nesting that is considered a denial-of-service attack.
+ */
+public class ExcessiveNesting extends Exception {
+
+	/** 
+	 * Creates a {@link ExcessiveNesting}.
+	 */
+	public ExcessiveNesting() {
+		super();
+	}
+}
diff -burN jtidy_orig/src/main/java/org/w3c/tidy/ParserImpl.java jtidy/src/main/java/org/w3c/tidy/ParserImpl.java
--- jtidy_orig/src/main/java/org/w3c/tidy/ParserImpl.java	2007-10-23 16:01:23.000000000 +0200
+++ jtidy/src/main/java/org/w3c/tidy/ParserImpl.java	2023-07-18 16:20:13.866940607 +0200
@@ -172,12 +172,18 @@
     }
 
     /**
-     * @param lexer
-     * @param node
-     * @param mode
+     *@param lexer the Lexer to use
+     * @param node the node to use
+     * @param mode the mode to use
+     * @param nestingLevel The current nesting level of the document. Extremely nested documents are considered an error. 
+     * @throws ExcessiveNesting When excessive nesting is detected.
      */
-    protected static void parseTag(Lexer lexer, Node node, short mode)
+    protected static void parseTag(Lexer lexer, Node node, short mode, int nestingLevel) throws ExcessiveNesting
     {
+        if (nestingLevel > 500) {
+    		throw new ExcessiveNesting();
+    	}
+
         // Fix by GLP 2000-12-21. Need to reset insertspace if this
         // is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
         if ((node.tag.model & Dict.CM_EMPTY) != 0)
@@ -200,7 +206,7 @@
             return;
         }
 
-        node.tag.getParser().parse(lexer, node, mode);
+        node.tag.getParser().parse(lexer, node, mode, nestingLevel + 1);
     }
 
     /**
@@ -209,7 +215,7 @@
      * @param element
      * @param node
      */
-    protected static void moveToHead(Lexer lexer, Node element, Node node)
+    protected static void moveToHead(Lexer lexer, Node element, Node node, int nestingLevel) throws ExcessiveNesting
     {
         Node head;
         node.removeNode(); // make sure that node is isolated
@@ -236,7 +242,7 @@
 
             if (node.tag.getParser() != null)
             {
-                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
             }
         }
         else
@@ -264,9 +270,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @throws ExcessiveNesting When excessive nesting is detected.
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node html, short mode)
+        public void parse(Lexer lexer, Node html, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node, head;
             Node frameset = null;
@@ -310,7 +317,7 @@
 
             head = node;
             html.insertNodeAtEnd(head);
-            HEAD.parse(lexer, head, mode);
+            HEAD.parse(lexer, head, mode, nestingLevel);
 
             while (true)
             {
@@ -323,7 +330,7 @@
                         // implied body
                         node = lexer.inferredTag("body");
                         html.insertNodeAtEnd(node);
-                        BODY.parse(lexer, node, mode);
+                        BODY.parse(lexer, node, mode, nestingLevel);
                     }
 
                     return;
@@ -370,7 +377,7 @@
                             lexer.report.warning(lexer, html, noframes, Report.INSERTING_TAG);
                         }
 
-                        parseTag(lexer, noframes, mode);
+                        parseTag(lexer, noframes, mode, nestingLevel);
                         continue;
                     }
 
@@ -397,7 +404,7 @@
                     }
 
                     html.insertNodeAtEnd(node);
-                    parseTag(lexer, node, mode);
+                    parseTag(lexer, node, mode, nestingLevel);
 
                     // see if it includes a noframes element so that we can merge subsequent noframes elements
 
@@ -433,7 +440,7 @@
                         frameset.insertNodeAtEnd(noframes);
                     }
 
-                    parseTag(lexer, noframes, mode);
+                    parseTag(lexer, noframes, mode, nestingLevel);
                     continue;
                 }
 
@@ -441,7 +448,7 @@
                 {
                     if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
                     {
-                        moveToHead(lexer, html, node);
+                        moveToHead(lexer, html, node, nestingLevel);
                         continue;
                     }
 
@@ -469,7 +476,7 @@
                     }
 
                     lexer.constrainVersion(Dict.VERS_FRAMESET);
-                    parseTag(lexer, noframes, mode);
+                    parseTag(lexer, noframes, mode, nestingLevel);
                     continue;
                 }
 
@@ -480,7 +487,7 @@
 
             // node must be body
             html.insertNodeAtEnd(node);
-            parseTag(lexer, node, mode);
+            parseTag(lexer, node, mode, nestingLevel);
             lexer.seenEndHtml = true;
         }
 
@@ -493,9 +500,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+	 * @throws ExcessiveNesting When excessive nesting is detected.
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node head, short mode)
+        public void parse(Lexer lexer, Node head, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node;
             int hasTitle = 0;
@@ -573,7 +581,7 @@
                     }
 
                     head.insertNodeAtEnd(node);
-                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
                     continue;
                 }
 
@@ -600,9 +608,9 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node title, short mode)
+        public void parse(Lexer lexer, Node title, short mode, int nestingLevel)
         {
             Node node;
 
@@ -671,9 +679,9 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node script, short mode)
+        public void parse(Lexer lexer, Node script, short mode, int nestingLevel)
         {
             // This isn't quite right for CDATA content as it recognises tags within the content and parses them
             // accordingly. This will unfortunately screw up scripts which include < + letter, < + !, < + ? or < + / +
@@ -696,9 +704,9 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node body, short mode)
+        public void parse(Lexer lexer, Node body, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node;
             boolean checkstack, iswhitenode;
@@ -753,7 +761,7 @@
                     if (node.type == Node.START_TAG)
                     {
                         body.insertNodeAtEnd(node);
-                        BLOCK.parse(lexer, node, mode);
+                        BLOCK.parse(lexer, node, mode, nestingLevel);
                         continue;
                     }
 
@@ -809,7 +817,7 @@
                         lexer.ungetToken();
                         para = lexer.inferredTag("p");
                         body.insertNodeAtEnd(para);
-                        parseTag(lexer, para, mode);
+                        parseTag(lexer, para, mode, nestingLevel);
                         mode = Lexer.MIXED_CONTENT;
                         continue;
                     }
@@ -872,7 +880,7 @@
 
                     if ((node.tag.model & Dict.CM_HEAD) != 0)
                     {
-                        moveToHead(lexer, body, node);
+                        moveToHead(lexer, body, node, nestingLevel);
                         continue;
                     }
 
@@ -971,7 +979,7 @@
                     }
 
                     body.insertNodeAtEnd(node);
-                    parseTag(lexer, node, mode);
+                    parseTag(lexer, node, mode, nestingLevel);
                     continue;
                 }
 
@@ -989,9 +997,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @throws ExcessiveNesting When excessive nesting is detected.
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node frameset, short mode)
+        public void parse(Lexer lexer, Node frameset, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node;
             TagTable tt = lexer.configuration.tt;
@@ -1023,7 +1032,7 @@
                 {
                     if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
                     {
-                        moveToHead(lexer, frameset, node);
+                        moveToHead(lexer, frameset, node, nestingLevel);
                         continue;
                     }
                 }
@@ -1039,7 +1048,7 @@
                 {
                     frameset.insertNodeAtEnd(node);
                     lexer.excludeBlocks = false;
-                    parseTag(lexer, node, Lexer.MIXED_CONTENT);
+                    parseTag(lexer, node, Lexer.MIXED_CONTENT, nestingLevel);
                     continue;
                 }
                 else if (node.type == Node.START_END_TAG && (node.tag.model & Dict.CM_FRAMES) != 0)
@@ -1064,9 +1073,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+	 * @throws ExcessiveNesting When excessive nesting is detected.
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node element, short mode)
+        public void parse(Lexer lexer, Node element, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node, parent;
             TagTable tt = lexer.configuration.tt;
@@ -1537,7 +1547,7 @@
 
                     if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
                     {
-                        moveToHead(lexer, element, node);
+                        moveToHead(lexer, element, node, nestingLevel);
                         continue;
                     }
 
@@ -1583,7 +1593,7 @@
                     }
 
                     element.insertNodeAtEnd(node);
-                    parseTag(lexer, node, mode);
+                    parseTag(lexer, node, mode, nestingLevel);
                     continue;
                 }
 
@@ -1607,7 +1617,7 @@
     public static class ParseList implements Parser
     {
 
-        public void parse(Lexer lexer, Node list, short mode)
+        public void parse(Lexer lexer, Node list, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node;
             Node parent;
@@ -1703,7 +1713,7 @@
 
                 // node should be <LI>
                 list.insertNodeAtEnd(node);
-                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
             }
 
             if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
@@ -1724,9 +1734,9 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node element, short mode)
+        public void parse(Lexer lexer, Node element, short mode, int nestingLevel) throws ExcessiveNesting
         {
             if (lexer.isvoyager)
             {
@@ -1749,7 +1759,7 @@
         /**
          * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
          */
-        public void parse(Lexer lexer, Node list, short mode)
+        public void parse(Lexer lexer, Node list, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node, parent;
             TagTable tt = lexer.configuration.tt;
@@ -1830,7 +1840,7 @@
                     }
 
                     // and parse contents of center
-                    parseTag(lexer, node, mode);
+                    parseTag(lexer, node, mode, nestingLevel);
 
                     // now create a new dl element
                     list = lexer.inferredTag("dl");
@@ -1868,7 +1878,7 @@
 
                 // node should be <DT> or <DD>
                 list.insertNodeAtEnd(node);
-                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
             }
 
             lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
@@ -1884,9 +1894,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @throws ExcessiveNesting
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node pre, short mode)
+        public void parse(Lexer lexer, Node pre, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node;
             TagTable tt = lexer.configuration.tt;
@@ -1989,7 +2000,7 @@
                     }
 
                     pre.insertNodeAtEnd(node);
-                    parseTag(lexer, node, Lexer.PREFORMATTED);
+                    parseTag(lexer, node, Lexer.PREFORMATTED, nestingLevel);
                     continue;
                 }
 
@@ -2010,9 +2021,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @throws ExcessiveNesting When excessive nesting is detected.
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node element, short mode)
+        public void parse(Lexer lexer, Node element, short mode, int nestingLevel) throws ExcessiveNesting
         {
             // element is node created by the lexer upon seeing the start tag, or by the parser when the start tag is
             // inferred.
@@ -2161,7 +2173,7 @@
                         lexer.ungetToken();
                         node = lexer.inferredTag("p");
                         element.insertNodeAtEnd(node);
-                        parseTag(lexer, node, Lexer.MIXED_CONTENT);
+                        parseTag(lexer, node, Lexer.MIXED_CONTENT, nestingLevel);
                         continue;
                     }
 
@@ -2278,7 +2290,7 @@
 
                         if ((node.tag.model & Dict.CM_HEAD) != 0)
                         {
-                            moveToHead(lexer, element, node);
+                            moveToHead(lexer, element, node, nestingLevel);
                             continue;
                         }
 
@@ -2332,7 +2344,7 @@
 
                         if ((node.tag.model & Dict.CM_HEAD) != 0)
                         {
-                            moveToHead(lexer, element, node);
+                            moveToHead(lexer, element, node, nestingLevel);
                             continue;
                         }
 
@@ -2465,6 +2477,7 @@
                     }
 
                     parseTag(lexer, node, Lexer.IGNORE_WHITESPACE // Lexer.MixedContent
+                        , nestingLevel
                     );
                     continue;
                 }
@@ -2507,9 +2520,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @throws ExcessiveNesting When excessive nesting is detected.
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node table, short mode)
+        public void parse(Lexer lexer, Node table, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node, parent;
             int istackbase;
@@ -2560,7 +2574,7 @@
 
                         if (!(node.type == Node.TEXT_NODE)) // #427662 - was (!node.type == TextNode) - fix by Young
                         {
-                            parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+                            parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
                         }
 
                         lexer.exiled = false;
@@ -2568,7 +2582,7 @@
                     }
                     else if ((node.tag.model & Dict.CM_HEAD) != 0)
                     {
-                        moveToHead(lexer, table, node);
+                        moveToHead(lexer, table, node, nestingLevel);
                         continue;
                     }
                 }
@@ -2618,7 +2632,7 @@
                 {
                     table.insertNodeAtEnd(node);
 
-                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
                     continue;
                 }
 
@@ -2640,9 +2654,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @throws ExcessiveNesting When excessive nesting is detected.
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node colgroup, short mode)
+        public void parse(Lexer lexer, Node colgroup, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node, parent;
             TagTable tt = lexer.configuration.tt;
@@ -2715,7 +2730,7 @@
 
                 // node should be <COL>
                 colgroup.insertNodeAtEnd(node);
-                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
             }
         }
 
@@ -2728,9 +2743,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+	 * @throws ExcessiveNesting When excessive nesting is detected.
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node rowgroup, short mode)
+        public void parse(Lexer lexer, Node rowgroup, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node, parent;
             TagTable tt = lexer.configuration.tt;
@@ -2796,7 +2812,7 @@
                         // #427662 was (!node.type == TextNode) fix by Young 04 Aug 00
                         if (node.type != Node.TEXT_NODE)
                         {
-                            parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+                            parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
                         }
 
                         lexer.exiled = false;
@@ -2805,7 +2821,7 @@
                     else if ((node.tag.model & Dict.CM_HEAD) != 0)
                     {
                         lexer.report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
-                        moveToHead(lexer, rowgroup, node);
+                        moveToHead(lexer, rowgroup, node, nestingLevel);
                         continue;
                     }
                 }
@@ -2872,7 +2888,7 @@
 
                 // node should be <TR>
                 rowgroup.insertNodeAtEnd(node);
-                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
             }
             Node.trimEmptyElement(lexer, rowgroup);
         }
@@ -2885,9 +2901,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @throws ExcessiveNesting When excessive nesting is detected.
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node row, short mode)
+        public void parse(Lexer lexer, Node row, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node, parent;
             boolean excludeState;
@@ -2997,7 +3014,7 @@
 
                         if (node.type != Node.TEXT_NODE)
                         {
-                            parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+                            parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
                         }
 
                         lexer.exiled = false;
@@ -3006,7 +3023,7 @@
                     else if ((node.tag.model & Dict.CM_HEAD) != 0)
                     {
                         lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
-                        moveToHead(lexer, row, node);
+                        moveToHead(lexer, row, node, nestingLevel);
                         continue;
                     }
                 }
@@ -3021,7 +3038,7 @@
                 row.insertNodeAtEnd(node);
                 excludeState = lexer.excludeBlocks;
                 lexer.excludeBlocks = false;
-                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
                 lexer.excludeBlocks = excludeState;
 
                 // pop inline stack
@@ -3044,9 +3061,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @throws ExcessiveNesting When excessive nesting is detected.
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node noframes, short mode)
+        public void parse(Lexer lexer, Node noframes, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node;
             TagTable tt = lexer.configuration.tt;
@@ -3103,7 +3121,7 @@
                 {
                     boolean seenbody = lexer.seenEndBody;
                     noframes.insertNodeAtEnd(node);
-                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); // MixedContent
+                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel); // MixedContent
 
                     if (seenbody)
                     {
@@ -3139,7 +3157,7 @@
                         }
                         noframes.insertNodeAtEnd(node);
                     }
-                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
                     // MixedContent
                     continue;
                 }
@@ -3159,9 +3177,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @throws ExcessiveNesting When excessive nesting is detected.
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node field, short mode)
+        public void parse(Lexer lexer, Node field, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node;
             TagTable tt = lexer.configuration.tt;
@@ -3187,7 +3206,7 @@
                     && (node.tag == tt.tagOption || node.tag == tt.tagOptgroup || node.tag == tt.tagScript))
                 {
                     field.insertNodeAtEnd(node);
-                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
                     continue;
                 }
 
@@ -3207,9 +3226,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @throws ExcessiveNesting When excessive nesting is detected.
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node field, short mode)
+        public void parse(Lexer lexer, Node field, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node;
             TagTable tt = lexer.configuration.tt;
@@ -3293,9 +3313,10 @@
     {
 
         /**
-         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+         * @throws ExcessiveNesting When excessive nesting is detected.
+         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
          */
-        public void parse(Lexer lexer, Node field, short mode)
+        public void parse(Lexer lexer, Node field, short mode, int nestingLevel) throws ExcessiveNesting
         {
             Node node;
             TagTable tt = lexer.configuration.tt;
@@ -3325,7 +3346,7 @@
                     }
 
                     field.insertNodeAtEnd(node);
-                    parseTag(lexer, node, Lexer.MIXED_CONTENT);
+                    parseTag(lexer, node, Lexer.MIXED_CONTENT, nestingLevel);
                     continue;
                 }
 
@@ -3394,7 +3415,11 @@
             }
 
             document.insertNodeAtEnd(html);
-            HTML.parse(lexer, html, (short) 0); // TODO?
+            try {
+		HTML.parse(lexer, html, (short) 0, 0);
+	    } catch (ExcessiveNesting ex) {
+                lexer.report.error(lexer, null, node, Report.DOCUMENT_WITH_EXCESSIVE_NESTING);
+	    }
             break;
         }
 
diff -burN jtidy_orig/src/main/java/org/w3c/tidy/Parser.java jtidy/src/main/java/org/w3c/tidy/Parser.java
--- jtidy_orig/src/main/java/org/w3c/tidy/Parser.java	2004-09-05 22:44:47.000000000 +0200
+++ jtidy/src/main/java/org/w3c/tidy/Parser.java	2023-07-17 15:02:33.075886928 +0200
@@ -68,7 +68,9 @@
      * @param lexer Lexer
      * @param node node created by the lexer upon seeing the start tag, or by the parser when the start tag is inferred
      * @param mode content mode
+     * @param nestingLevel The current nesting level of the document. Extremely nested documents are considered an error.
+     * @throws ExcessiveNesting When excessive nesting is detected.
      */
-    void parse(Lexer lexer, Node node, short mode);
+    void parse(Lexer lexer, Node node, short mode, int nestingLevel) throws ExcessiveNesting;
 
 }
\ No newline at end of file
diff -burN jtidy_orig/src/main/java/org/w3c/tidy/Report.java jtidy/src/main/java/org/w3c/tidy/Report.java
--- jtidy_orig/src/main/java/org/w3c/tidy/Report.java	2004-09-26 22:10:11.000000000 +0200
+++ jtidy/src/main/java/org/w3c/tidy/Report.java	2023-07-17 16:20:11.362126725 +0200
@@ -325,6 +325,11 @@
     public static final short UNEXPECTED_ENDTAG_IN = 47;
 
     /**
+     * excessive nesting that is considered a denial-of-service attack.
+     */
+    public static final short DOCUMENT_WITH_EXCESSIVE_NESTING = 48;
+
+    /**
      * replacing element.
      */
     public static final short REPLACING_ELEMENT = 83;
diff -burN jtidy_orig/src/test/java/org/w3c/tidy/TestCVE_2023_34623.java jtidy/src/test/java/org/w3c/tidy/TestCVE_2023_34623.java
--- jtidy_orig/src/test/java/org/w3c/tidy/TestCVE_2023_34623.java	1970-01-01 01:00:00.000000000 +0100
+++ jtidy/src/test/java/org/w3c/tidy/TestCVE_2023_34623.java	2023-07-17 16:21:06.522199563 +0200
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2023 Business Operation Systems GmbH. All Rights Reserved.
+ */
+package org.w3c.tidy;
+
+import java.io.StringReader;
+
+import junit.framework.TestCase;
+
+/**
+ * Test case for CVE-2023-34623.
+ * 
+ * @see "https://nvd.nist.gov/vuln/detail/CVE-2023-34623"
+ */
+public class TestCVE_2023_34623 extends TestCase {
+
+    private static final int NESTING_LEVEL = 9999;
+
+    /**
+     * Checks that excessive nesting does not result in a crash.
+     */
+	public void testDeepNesting() {
+        String htmlData = deeplyNestedDoc();
+        Tidy tidy = new Tidy();
+        try (StringReader stringReader = new StringReader(htmlData);){
+        	tidy.parse(stringReader, System.out);
+        }
+        assertEquals(1, tidy.getParseErrors());
+    }
+
+	protected String deeplyNestedDoc() {
+		StringBuilder result = new StringBuilder();
+		for (int i = 0; i < 9999; ++i) {
+		    result.append("<div>");
+		    if ((i & 31) == 0) {
+		        result.append("\n");
+		    }
+		}
+		result.append("\n").append("").append("\n");
+		for (int i = 0; i < NESTING_LEVEL; ++i) {
+		    result.append("</div>");
+		    if ((i & 31) == 0) {
+		        result.append("\n");
+		    }
+		}
+		return result.toString();
+	}	
+}
openSUSE Build Service is sponsored by