File jtidy-CVE-2023-34623.patch of Package jtidy.29894
diff -burN jtidy_orig/src/main/java/org/w3c/tidy/ExcessiveNesting.java jtidy/src/main/java/org/w3c/tidy/ExcessiveNesting.java
--- jtidy_orig/src/main/java/org/w3c/tidy/ExcessiveNesting.java 1970-01-01 01:00:00.000000000 +0100
+++ jtidy/src/main/java/org/w3c/tidy/ExcessiveNesting.java 2023-07-17 15:00:48.402407248 +0200
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2023 Business Operation Systems GmbH. All Rights Reserved.
+ */
+package org.w3c.tidy;
+
+/**
+ * Exception signaling a document with excessive nesting that is considered a denial-of-service attack.
+ */
+public class ExcessiveNesting extends Exception {
+
+ /**
+ * Creates a {@link ExcessiveNesting}.
+ */
+ public ExcessiveNesting() {
+ super();
+ }
+}
diff -burN jtidy_orig/src/main/java/org/w3c/tidy/ParserImpl.java jtidy/src/main/java/org/w3c/tidy/ParserImpl.java
--- jtidy_orig/src/main/java/org/w3c/tidy/ParserImpl.java 2007-10-23 16:01:23.000000000 +0200
+++ jtidy/src/main/java/org/w3c/tidy/ParserImpl.java 2023-07-18 16:20:13.866940607 +0200
@@ -172,12 +172,18 @@
}
/**
- * @param lexer
- * @param node
- * @param mode
+ *@param lexer the Lexer to use
+ * @param node the node to use
+ * @param mode the mode to use
+ * @param nestingLevel The current nesting level of the document. Extremely nested documents are considered an error.
+ * @throws ExcessiveNesting When excessive nesting is detected.
*/
- protected static void parseTag(Lexer lexer, Node node, short mode)
+ protected static void parseTag(Lexer lexer, Node node, short mode, int nestingLevel) throws ExcessiveNesting
{
+ if (nestingLevel > 500) {
+ throw new ExcessiveNesting();
+ }
+
// Fix by GLP 2000-12-21. Need to reset insertspace if this
// is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
if ((node.tag.model & Dict.CM_EMPTY) != 0)
@@ -200,7 +206,7 @@
return;
}
- node.tag.getParser().parse(lexer, node, mode);
+ node.tag.getParser().parse(lexer, node, mode, nestingLevel + 1);
}
/**
@@ -209,7 +215,7 @@
* @param element
* @param node
*/
- protected static void moveToHead(Lexer lexer, Node element, Node node)
+ protected static void moveToHead(Lexer lexer, Node element, Node node, int nestingLevel) throws ExcessiveNesting
{
Node head;
node.removeNode(); // make sure that node is isolated
@@ -236,7 +242,7 @@
if (node.tag.getParser() != null)
{
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
}
}
else
@@ -264,9 +270,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting When excessive nesting is detected.
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node html, short mode)
+ public void parse(Lexer lexer, Node html, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node, head;
Node frameset = null;
@@ -310,7 +317,7 @@
head = node;
html.insertNodeAtEnd(head);
- HEAD.parse(lexer, head, mode);
+ HEAD.parse(lexer, head, mode, nestingLevel);
while (true)
{
@@ -323,7 +330,7 @@
// implied body
node = lexer.inferredTag("body");
html.insertNodeAtEnd(node);
- BODY.parse(lexer, node, mode);
+ BODY.parse(lexer, node, mode, nestingLevel);
}
return;
@@ -370,7 +377,7 @@
lexer.report.warning(lexer, html, noframes, Report.INSERTING_TAG);
}
- parseTag(lexer, noframes, mode);
+ parseTag(lexer, noframes, mode, nestingLevel);
continue;
}
@@ -397,7 +404,7 @@
}
html.insertNodeAtEnd(node);
- parseTag(lexer, node, mode);
+ parseTag(lexer, node, mode, nestingLevel);
// see if it includes a noframes element so that we can merge subsequent noframes elements
@@ -433,7 +440,7 @@
frameset.insertNodeAtEnd(noframes);
}
- parseTag(lexer, noframes, mode);
+ parseTag(lexer, noframes, mode, nestingLevel);
continue;
}
@@ -441,7 +448,7 @@
{
if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
{
- moveToHead(lexer, html, node);
+ moveToHead(lexer, html, node, nestingLevel);
continue;
}
@@ -469,7 +476,7 @@
}
lexer.constrainVersion(Dict.VERS_FRAMESET);
- parseTag(lexer, noframes, mode);
+ parseTag(lexer, noframes, mode, nestingLevel);
continue;
}
@@ -480,7 +487,7 @@
// node must be body
html.insertNodeAtEnd(node);
- parseTag(lexer, node, mode);
+ parseTag(lexer, node, mode, nestingLevel);
lexer.seenEndHtml = true;
}
@@ -493,9 +500,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting When excessive nesting is detected.
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node head, short mode)
+ public void parse(Lexer lexer, Node head, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node;
int hasTitle = 0;
@@ -573,7 +581,7 @@
}
head.insertNodeAtEnd(node);
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
continue;
}
@@ -600,9 +608,9 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node title, short mode)
+ public void parse(Lexer lexer, Node title, short mode, int nestingLevel)
{
Node node;
@@ -671,9 +679,9 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node script, short mode)
+ public void parse(Lexer lexer, Node script, short mode, int nestingLevel)
{
// This isn't quite right for CDATA content as it recognises tags within the content and parses them
// accordingly. This will unfortunately screw up scripts which include < + letter, < + !, < + ? or < + / +
@@ -696,9 +704,9 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node body, short mode)
+ public void parse(Lexer lexer, Node body, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node;
boolean checkstack, iswhitenode;
@@ -753,7 +761,7 @@
if (node.type == Node.START_TAG)
{
body.insertNodeAtEnd(node);
- BLOCK.parse(lexer, node, mode);
+ BLOCK.parse(lexer, node, mode, nestingLevel);
continue;
}
@@ -809,7 +817,7 @@
lexer.ungetToken();
para = lexer.inferredTag("p");
body.insertNodeAtEnd(para);
- parseTag(lexer, para, mode);
+ parseTag(lexer, para, mode, nestingLevel);
mode = Lexer.MIXED_CONTENT;
continue;
}
@@ -872,7 +880,7 @@
if ((node.tag.model & Dict.CM_HEAD) != 0)
{
- moveToHead(lexer, body, node);
+ moveToHead(lexer, body, node, nestingLevel);
continue;
}
@@ -971,7 +979,7 @@
}
body.insertNodeAtEnd(node);
- parseTag(lexer, node, mode);
+ parseTag(lexer, node, mode, nestingLevel);
continue;
}
@@ -989,9 +997,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting When excessive nesting is detected.
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node frameset, short mode)
+ public void parse(Lexer lexer, Node frameset, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node;
TagTable tt = lexer.configuration.tt;
@@ -1023,7 +1032,7 @@
{
if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
{
- moveToHead(lexer, frameset, node);
+ moveToHead(lexer, frameset, node, nestingLevel);
continue;
}
}
@@ -1039,7 +1048,7 @@
{
frameset.insertNodeAtEnd(node);
lexer.excludeBlocks = false;
- parseTag(lexer, node, Lexer.MIXED_CONTENT);
+ parseTag(lexer, node, Lexer.MIXED_CONTENT, nestingLevel);
continue;
}
else if (node.type == Node.START_END_TAG && (node.tag.model & Dict.CM_FRAMES) != 0)
@@ -1064,9 +1073,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting When excessive nesting is detected.
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node element, short mode)
+ public void parse(Lexer lexer, Node element, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node, parent;
TagTable tt = lexer.configuration.tt;
@@ -1537,7 +1547,7 @@
if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
{
- moveToHead(lexer, element, node);
+ moveToHead(lexer, element, node, nestingLevel);
continue;
}
@@ -1583,7 +1593,7 @@
}
element.insertNodeAtEnd(node);
- parseTag(lexer, node, mode);
+ parseTag(lexer, node, mode, nestingLevel);
continue;
}
@@ -1607,7 +1617,7 @@
public static class ParseList implements Parser
{
- public void parse(Lexer lexer, Node list, short mode)
+ public void parse(Lexer lexer, Node list, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node;
Node parent;
@@ -1703,7 +1713,7 @@
// node should be <LI>
list.insertNodeAtEnd(node);
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
}
if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
@@ -1724,9 +1734,9 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node element, short mode)
+ public void parse(Lexer lexer, Node element, short mode, int nestingLevel) throws ExcessiveNesting
{
if (lexer.isvoyager)
{
@@ -1749,7 +1759,7 @@
/**
* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
*/
- public void parse(Lexer lexer, Node list, short mode)
+ public void parse(Lexer lexer, Node list, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node, parent;
TagTable tt = lexer.configuration.tt;
@@ -1830,7 +1840,7 @@
}
// and parse contents of center
- parseTag(lexer, node, mode);
+ parseTag(lexer, node, mode, nestingLevel);
// now create a new dl element
list = lexer.inferredTag("dl");
@@ -1868,7 +1878,7 @@
// node should be <DT> or <DD>
list.insertNodeAtEnd(node);
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
}
lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
@@ -1884,9 +1894,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node pre, short mode)
+ public void parse(Lexer lexer, Node pre, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node;
TagTable tt = lexer.configuration.tt;
@@ -1989,7 +2000,7 @@
}
pre.insertNodeAtEnd(node);
- parseTag(lexer, node, Lexer.PREFORMATTED);
+ parseTag(lexer, node, Lexer.PREFORMATTED, nestingLevel);
continue;
}
@@ -2010,9 +2021,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting When excessive nesting is detected.
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node element, short mode)
+ public void parse(Lexer lexer, Node element, short mode, int nestingLevel) throws ExcessiveNesting
{
// element is node created by the lexer upon seeing the start tag, or by the parser when the start tag is
// inferred.
@@ -2161,7 +2173,7 @@
lexer.ungetToken();
node = lexer.inferredTag("p");
element.insertNodeAtEnd(node);
- parseTag(lexer, node, Lexer.MIXED_CONTENT);
+ parseTag(lexer, node, Lexer.MIXED_CONTENT, nestingLevel);
continue;
}
@@ -2278,7 +2290,7 @@
if ((node.tag.model & Dict.CM_HEAD) != 0)
{
- moveToHead(lexer, element, node);
+ moveToHead(lexer, element, node, nestingLevel);
continue;
}
@@ -2332,7 +2344,7 @@
if ((node.tag.model & Dict.CM_HEAD) != 0)
{
- moveToHead(lexer, element, node);
+ moveToHead(lexer, element, node, nestingLevel);
continue;
}
@@ -2465,6 +2477,7 @@
}
parseTag(lexer, node, Lexer.IGNORE_WHITESPACE // Lexer.MixedContent
+ , nestingLevel
);
continue;
}
@@ -2507,9 +2520,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting When excessive nesting is detected.
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node table, short mode)
+ public void parse(Lexer lexer, Node table, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node, parent;
int istackbase;
@@ -2560,7 +2574,7 @@
if (!(node.type == Node.TEXT_NODE)) // #427662 - was (!node.type == TextNode) - fix by Young
{
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
}
lexer.exiled = false;
@@ -2568,7 +2582,7 @@
}
else if ((node.tag.model & Dict.CM_HEAD) != 0)
{
- moveToHead(lexer, table, node);
+ moveToHead(lexer, table, node, nestingLevel);
continue;
}
}
@@ -2618,7 +2632,7 @@
{
table.insertNodeAtEnd(node);
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
continue;
}
@@ -2640,9 +2654,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting When excessive nesting is detected.
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node colgroup, short mode)
+ public void parse(Lexer lexer, Node colgroup, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node, parent;
TagTable tt = lexer.configuration.tt;
@@ -2715,7 +2730,7 @@
// node should be <COL>
colgroup.insertNodeAtEnd(node);
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
}
}
@@ -2728,9 +2743,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting When excessive nesting is detected.
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node rowgroup, short mode)
+ public void parse(Lexer lexer, Node rowgroup, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node, parent;
TagTable tt = lexer.configuration.tt;
@@ -2796,7 +2812,7 @@
// #427662 was (!node.type == TextNode) fix by Young 04 Aug 00
if (node.type != Node.TEXT_NODE)
{
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
}
lexer.exiled = false;
@@ -2805,7 +2821,7 @@
else if ((node.tag.model & Dict.CM_HEAD) != 0)
{
lexer.report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
- moveToHead(lexer, rowgroup, node);
+ moveToHead(lexer, rowgroup, node, nestingLevel);
continue;
}
}
@@ -2872,7 +2888,7 @@
// node should be <TR>
rowgroup.insertNodeAtEnd(node);
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
}
Node.trimEmptyElement(lexer, rowgroup);
}
@@ -2885,9 +2901,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting When excessive nesting is detected.
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node row, short mode)
+ public void parse(Lexer lexer, Node row, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node, parent;
boolean excludeState;
@@ -2997,7 +3014,7 @@
if (node.type != Node.TEXT_NODE)
{
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
}
lexer.exiled = false;
@@ -3006,7 +3023,7 @@
else if ((node.tag.model & Dict.CM_HEAD) != 0)
{
lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
- moveToHead(lexer, row, node);
+ moveToHead(lexer, row, node, nestingLevel);
continue;
}
}
@@ -3021,7 +3038,7 @@
row.insertNodeAtEnd(node);
excludeState = lexer.excludeBlocks;
lexer.excludeBlocks = false;
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
lexer.excludeBlocks = excludeState;
// pop inline stack
@@ -3044,9 +3061,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting When excessive nesting is detected.
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node noframes, short mode)
+ public void parse(Lexer lexer, Node noframes, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node;
TagTable tt = lexer.configuration.tt;
@@ -3103,7 +3121,7 @@
{
boolean seenbody = lexer.seenEndBody;
noframes.insertNodeAtEnd(node);
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); // MixedContent
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel); // MixedContent
if (seenbody)
{
@@ -3139,7 +3157,7 @@
}
noframes.insertNodeAtEnd(node);
}
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
// MixedContent
continue;
}
@@ -3159,9 +3177,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting When excessive nesting is detected.
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node field, short mode)
+ public void parse(Lexer lexer, Node field, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node;
TagTable tt = lexer.configuration.tt;
@@ -3187,7 +3206,7 @@
&& (node.tag == tt.tagOption || node.tag == tt.tagOptgroup || node.tag == tt.tagScript))
{
field.insertNodeAtEnd(node);
- parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
+ parseTag(lexer, node, Lexer.IGNORE_WHITESPACE, nestingLevel);
continue;
}
@@ -3207,9 +3226,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting When excessive nesting is detected.
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node field, short mode)
+ public void parse(Lexer lexer, Node field, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node;
TagTable tt = lexer.configuration.tt;
@@ -3293,9 +3313,10 @@
{
/**
- * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
+ * @throws ExcessiveNesting When excessive nesting is detected.
+ * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short, int)
*/
- public void parse(Lexer lexer, Node field, short mode)
+ public void parse(Lexer lexer, Node field, short mode, int nestingLevel) throws ExcessiveNesting
{
Node node;
TagTable tt = lexer.configuration.tt;
@@ -3325,7 +3346,7 @@
}
field.insertNodeAtEnd(node);
- parseTag(lexer, node, Lexer.MIXED_CONTENT);
+ parseTag(lexer, node, Lexer.MIXED_CONTENT, nestingLevel);
continue;
}
@@ -3394,7 +3415,11 @@
}
document.insertNodeAtEnd(html);
- HTML.parse(lexer, html, (short) 0); // TODO?
+ try {
+ HTML.parse(lexer, html, (short) 0, 0);
+ } catch (ExcessiveNesting ex) {
+ lexer.report.error(lexer, null, node, Report.DOCUMENT_WITH_EXCESSIVE_NESTING);
+ }
break;
}
diff -burN jtidy_orig/src/main/java/org/w3c/tidy/Parser.java jtidy/src/main/java/org/w3c/tidy/Parser.java
--- jtidy_orig/src/main/java/org/w3c/tidy/Parser.java 2004-09-05 22:44:47.000000000 +0200
+++ jtidy/src/main/java/org/w3c/tidy/Parser.java 2023-07-17 15:02:33.075886928 +0200
@@ -68,7 +68,9 @@
* @param lexer Lexer
* @param node node created by the lexer upon seeing the start tag, or by the parser when the start tag is inferred
* @param mode content mode
+ * @param nestingLevel The current nesting level of the document. Extremely nested documents are considered an error.
+ * @throws ExcessiveNesting When excessive nesting is detected.
*/
- void parse(Lexer lexer, Node node, short mode);
+ void parse(Lexer lexer, Node node, short mode, int nestingLevel) throws ExcessiveNesting;
}
\ No newline at end of file
diff -burN jtidy_orig/src/main/java/org/w3c/tidy/Report.java jtidy/src/main/java/org/w3c/tidy/Report.java
--- jtidy_orig/src/main/java/org/w3c/tidy/Report.java 2004-09-26 22:10:11.000000000 +0200
+++ jtidy/src/main/java/org/w3c/tidy/Report.java 2023-07-17 16:20:11.362126725 +0200
@@ -325,6 +325,11 @@
public static final short UNEXPECTED_ENDTAG_IN = 47;
/**
+ * excessive nesting that is considered a denial-of-service attack.
+ */
+ public static final short DOCUMENT_WITH_EXCESSIVE_NESTING = 48;
+
+ /**
* replacing element.
*/
public static final short REPLACING_ELEMENT = 83;
diff -burN jtidy_orig/src/test/java/org/w3c/tidy/TestCVE_2023_34623.java jtidy/src/test/java/org/w3c/tidy/TestCVE_2023_34623.java
--- jtidy_orig/src/test/java/org/w3c/tidy/TestCVE_2023_34623.java 1970-01-01 01:00:00.000000000 +0100
+++ jtidy/src/test/java/org/w3c/tidy/TestCVE_2023_34623.java 2023-07-17 16:21:06.522199563 +0200
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2023 Business Operation Systems GmbH. All Rights Reserved.
+ */
+package org.w3c.tidy;
+
+import java.io.StringReader;
+
+import junit.framework.TestCase;
+
+/**
+ * Test case for CVE-2023-34623.
+ *
+ * @see "https://nvd.nist.gov/vuln/detail/CVE-2023-34623"
+ */
+public class TestCVE_2023_34623 extends TestCase {
+
+ private static final int NESTING_LEVEL = 9999;
+
+ /**
+ * Checks that excessive nesting does not result in a crash.
+ */
+ public void testDeepNesting() {
+ String htmlData = deeplyNestedDoc();
+ Tidy tidy = new Tidy();
+ try (StringReader stringReader = new StringReader(htmlData);){
+ tidy.parse(stringReader, System.out);
+ }
+ assertEquals(1, tidy.getParseErrors());
+ }
+
+ protected String deeplyNestedDoc() {
+ StringBuilder result = new StringBuilder();
+ for (int i = 0; i < 9999; ++i) {
+ result.append("<div>");
+ if ((i & 31) == 0) {
+ result.append("\n");
+ }
+ }
+ result.append("\n").append("").append("\n");
+ for (int i = 0; i < NESTING_LEVEL; ++i) {
+ result.append("</div>");
+ if ((i & 31) == 0) {
+ result.append("\n");
+ }
+ }
+ return result.toString();
+ }
+}