Files
Soumya Sambu 580693f8b9 python3-django: Fix CVE-2024-38875
An issue was discovered in Django 4.2 before 4.2.14 and 5.0 before 5.0.7.
urlize and urlizetrunc were subject to a potential denial of service attack
via certain inputs with a very large number of brackets.

References:
https://nvd.nist.gov/vuln/detail/CVE-2024-38875
https://github.com/advisories/GHSA-qg2p-9jwr-mmqf

Upstream-patch:
79f3687642

Signed-off-by: Soumya Sambu <soumya.sambu@windriver.com>
Signed-off-by: Armin Kuster <akuster808@gmail.com>
2025-01-22 19:20:02 -05:00

162 lines
6.7 KiB
Diff

From 79f368764295df109a37192f6182fb6f361d85b5 Mon Sep 17 00:00:00 2001
From: Adam Johnson <me@adamj.eu>
Date: Mon, 24 Jun 2024 15:30:59 +0200
Subject: [PATCH] [4.2.x] Fixed CVE-2024-38875 -- Mitigated potential DoS in
urlize and urlizetrunc template filters.
Thank you to Elias Myllymäki for the report.
Co-authored-by: Sarah Boyce <42296566+sarahboyce@users.noreply.github.com>
CVE: CVE-2024-38875
Upstream-Status: Backport [https://github.com/django/django/commit/79f368764295df109a37192f6182fb6f361d85b5]
Signed-off-by: Soumya Sambu <soumya.sambu@windriver.com>
---
django/utils/html.py | 90 +++++++++++++++++++++++++---------
tests/utils_tests/test_html.py | 7 +++
2 files changed, 73 insertions(+), 21 deletions(-)
diff --git a/django/utils/html.py b/django/utils/html.py
index 7a33d5f..f1b74ab 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -234,6 +234,15 @@ def smart_urlquote(url):
return urlunsplit((scheme, netloc, path, query, fragment))
+class CountsDict(dict):
+ def __init__(self, *args, word, **kwargs):
+ super().__init__(*args, *kwargs)
+ self.word = word
+
+ def __missing__(self, key):
+ self[key] = self.word.count(key)
+ return self[key]
+
@keep_lazy_text
def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
@@ -268,36 +277,69 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
return text.replace('&amp;', '&').replace('&lt;', '<').replace(
'&gt;', '>').replace('&quot;', '"').replace('&#39;', "'")
- def trim_punctuation(lead, middle, trail):
+ def wrapping_punctuation_openings():
+ return "".join(dict(WRAPPING_PUNCTUATION).keys())
+
+ def trailing_punctuation_chars_no_semicolon():
+ return TRAILING_PUNCTUATION_CHARS.replace(";", "")
+
+ def trailing_punctuation_chars_has_semicolon():
+ return ";" in TRAILING_PUNCTUATION_CHARS
+
+ def trim_punctuation(word):
"""
Trim trailing and wrapping punctuation from `middle`. Return the items
of the new state.
"""
+ # Strip all opening wrapping punctuation.
+ middle = word.lstrip(wrapping_punctuation_openings())
+ lead = word[: len(word) - len(middle)]
+ trail = ""
+
# Continue trimming until middle remains unchanged.
trimmed_something = True
- while trimmed_something:
+ counts = CountsDict(word=middle)
+ while trimmed_something and middle:
trimmed_something = False
# Trim wrapping punctuation.
for opening, closing in WRAPPING_PUNCTUATION:
- if middle.startswith(opening):
- middle = middle[len(opening):]
- lead += opening
- trimmed_something = True
- # Keep parentheses at the end only if they're balanced.
- if (middle.endswith(closing) and
- middle.count(closing) == middle.count(opening) + 1):
- middle = middle[:-len(closing)]
- trail = closing + trail
- trimmed_something = True
- # Trim trailing punctuation (after trimming wrapping punctuation,
- # as encoded entities contain ';'). Unescape entites to avoid
- # breaking them by removing ';'.
- middle_unescaped = unescape(middle)
- stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS)
- if middle_unescaped != stripped:
- trail = middle[len(stripped):] + trail
- middle = middle[:len(stripped) - len(middle_unescaped)]
+ if counts[opening] < counts[closing]:
+ rstripped = middle.rstrip(closing)
+ if rstripped != middle:
+ strip = counts[closing] - counts[opening]
+ trail = middle[-strip:]
+ middle = middle[:-strip]
+ trimmed_something = True
+ counts[closing] -= strip
+
+ rstripped = middle.rstrip(trailing_punctuation_chars_no_semicolon())
+ if rstripped != middle:
+ trail = middle[len(rstripped) :] + trail
+ middle = rstripped
trimmed_something = True
+
+ if trailing_punctuation_chars_has_semicolon() and middle.endswith(";"):
+ # Only strip if not part of an HTML entity.
+ amp = middle.rfind("&")
+ if amp == -1:
+ can_strip = True
+ else:
+ potential_entity = middle[amp:]
+ escaped = unescape(potential_entity)
+ can_strip = (escaped == potential_entity) or escaped.endswith(";")
+
+ if can_strip:
+ rstripped = middle.rstrip(";")
+ amount_stripped = len(middle) - len(rstripped)
+ if amp > -1 and amount_stripped > 1:
+ # Leave a trailing semicolon as might be an entity.
+ trail = middle[len(rstripped) + 1 :] + trail
+ middle = rstripped + ";"
+ else:
+ trail = middle[len(rstripped) :] + trail
+ middle = rstripped
+ trimmed_something = True
+
return lead, middle, trail
def is_email_simple(value):
@@ -321,9 +363,7 @@ def urlize(text, trim_url_limit=None, no
# lead: Current punctuation trimmed from the beginning of the word.
# middle: Current state of the word.
# trail: Current punctuation trimmed from the end of the word.
- lead, middle, trail = '', word, ''
- # Deal with punctuation.
- lead, middle, trail = trim_punctuation(lead, middle, trail)
+ lead, middle, trail = trim_punctuation(word)
# Make URL we want to point to.
url = None
diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py
index 5cc2d9b..715c1c6 100644
--- a/tests/utils_tests/test_html.py
+++ b/tests/utils_tests/test_html.py
@@ -267,6 +267,13 @@ class TestUtilsHtml(SimpleTestCase):
'foo@.example.com',
'foo@localhost',
'foo@localhost.',
+ # trim_punctuation catastrophic tests
+ "(" * 100_000 + ":" + ")" * 100_000,
+ "(" * 100_000 + "&:" + ")" * 100_000,
+ "([" * 100_000 + ":" + "])" * 100_000,
+ "[(" * 100_000 + ":" + ")]" * 100_000,
+ "([[" * 100_000 + ":" + "]])" * 100_000,
+ "&:" + ";" * 100_000,
)
for value in tests:
with self.subTest(value=value):
--
2.40.0