File CVE-2022-40023-RE-DoS-Lexer.patch of Package python-Mako.26334
From 925760291d6efec64fda6e9dd1fd9cfbd5be068c Mon Sep 17 00:00:00 2001
From: Mike Bayer <mike_mp@zzzcomputing.com>
Date: Mon, 29 Aug 2022 12:28:52 -0400
Subject: [PATCH] fix tag regexp to match quoted groups correctly
Fixed issue in lexer where the regexp used to match tags would not
correctly interpret quoted sections individually. While this parsing issue
still produced the same expected tag structure later on, the mis-handling
of quoted sections was also subject to a regexp crash if a tag had a large
number of quotes within its quoted sections.
Fixes: #366
Change-Id: I74e0d71ff7f419970711a7cd51adcf1bb90a44c0
---
doc/build/unreleased/366.rst | 9 +++++++++
mako/lexer.py | 13 +++++++++----
test/test_lexer.py | 28 ++++++++++++++++++++++------
3 files changed, 40 insertions(+), 10 deletions(-)
create mode 100644 doc/build/unreleased/366.rst
--- /dev/null
+++ b/doc/build/unreleased/366.rst
@@ -0,0 +1,9 @@
+.. change::
+ :tags: bug, lexer
+ :tickets: 366
+
+ Fixed issue in lexer where the regexp used to match tags would not
+ correctly interpret quoted sections individually. While this parsing issue
+ still produced the same expected tag structure later on, the mis-handling
+ of quoted sections was also subject to a regexp crash if a tag had a large
+ number of quotes within its quoted sections.
\ No newline at end of file
--- a/mako/lexer.py
+++ b/mako/lexer.py
@@ -271,21 +271,26 @@ class Lexer(object):
return self.template
def match_tag_start(self):
- match = self.match(r'''
+ reg = r"""
\<% # opening tag
([\w\.\:]+) # keyword
- ((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \
+ ((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \
# sign, string expression
+ # comma is for backwards compat
+ # identified in #366
\s* # more whitespace
(/)?> # closing
- ''',
+ """
- re.I | re.S | re.X)
+ match = self.match(
+ reg,
+ re.I | re.S | re.X
+ )
if match:
keyword, attr, isend = match.groups()
--- a/test/test_lexer.py
+++ b/test/test_lexer.py
@@ -3,6 +3,7 @@ from mako import exceptions, util, compa
from test.util import flatten_result
from mako.template import Template
import re
+
from test import TemplateTest, eq_, assert_raises_message
# create fake parsetree classes which are constructed
@@ -105,6 +106,10 @@ class LexerTest(TemplateTest):
self.assertRaises(exceptions.CompileException,
Lexer(template).parse)
+ def test_tag_many_quotes(self):
+ template = "<%0" + '"' * 3000
+ self.assertRaises(exceptions.SyntaxException, Lexer(template).parse)
+
def test_unmatched_tag(self):
template = \
"""
@@ -304,13 +309,18 @@ class LexerTest(TemplateTest):
, {'expr': 'foo<bar and hoho>lala and "x" + "y"'
}, (3, 13), []), Text('\n ', (3, 64))]))
- def test_pagetag(self):
- template = \
- """
- <%page cached="True", args="a, b"/>
+ def do_test_pagetag(self, comma, numchars):
+ # note that the comma here looks like:
+ # <%page cached="True", args="a, b"/>
+ # that's what this test has looked like for decades, however, the
+ # comma there is not actually the right syntax. When issue #366
+ # was fixed, the reg was altered to accommodate for this comma to allow
+ # backwards compat
+ template = """
+ <%page cached="True"{} args="a, b"/>
some template
- """
+ """.format(comma)
nodes = Lexer(template).parse()
self._compare(nodes, TemplateNode({}, [Text('\n ',
(1, 1)), PageTag('page', {'args': 'a, b',
@@ -319,7 +329,13 @@ class LexerTest(TemplateTest):
some template
''',
- (2, 48))]))
+ (2, numchars))]))
+
+ def test_pagetag_comma(self):
+ self.do_test_pagetag(",", 48)
+
+ def test_pagetag_blank(self):
+ self.do_test_pagetag("", 47)
def test_nesting(self):
template = \