Kaydet (Commit) 8618271c authored tarafından Tim Graham's avatar Tim Graham

Fixed CVE-2018-7536 -- Fixed catastrophic backtracking in urlize and urlizetrunc template filters.

Thanks Florian Apolloner for assisting with the patch.
üst 4d2a2c83
...@@ -13,12 +13,7 @@ from django.utils.safestring import SafeData, SafeText, mark_safe ...@@ -13,12 +13,7 @@ from django.utils.safestring import SafeData, SafeText, mark_safe
from django.utils.text import normalize_newlines from django.utils.text import normalize_newlines
# Configuration for urlize() function. # Configuration for urlize() function.
TRAILING_PUNCTUATION_RE = re.compile( TRAILING_PUNCTUATION_CHARS = '.,:;!'
'^' # Beginning of word
'(.*?)' # The URL in word
'([.,:;!]+)' # Allowed non-wrapping, trailing punctuation
'$' # End of word
)
WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;'), ('"', '"'), ('\'', '\'')] WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;'), ('"', '"'), ('\'', '\'')]
# List of possible strings used for bullets in bulleted lists. # List of possible strings used for bullets in bulleted lists.
...@@ -28,7 +23,6 @@ unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)') ...@@ -28,7 +23,6 @@ unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
word_split_re = re.compile(r'''([\s<>"']+)''') word_split_re = re.compile(r'''([\s<>"']+)''')
simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE) simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE)
simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$', re.IGNORECASE) simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$', re.IGNORECASE)
simple_email_re = re.compile(r'^\S+@\S+\.\S+$')
_html_escapes = { _html_escapes = {
ord('&'): '&amp;', ord('&'): '&amp;',
...@@ -293,10 +287,10 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): ...@@ -293,10 +287,10 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
trimmed_something = False trimmed_something = False
# Trim trailing punctuation. # Trim trailing punctuation.
match = TRAILING_PUNCTUATION_RE.match(middle) stripped = middle.rstrip(TRAILING_PUNCTUATION_CHARS)
if match: if middle != stripped:
middle = match.group(1) trail = middle[len(stripped):] + trail
trail = match.group(2) + trail middle = stripped
trimmed_something = True trimmed_something = True
# Trim wrapping punctuation. # Trim wrapping punctuation.
...@@ -313,6 +307,21 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): ...@@ -313,6 +307,21 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
trimmed_something = True trimmed_something = True
return lead, middle, trail return lead, middle, trail
def is_email_simple(value):
"""Return True if value looks like an email address."""
# An @ must be in the middle of the value.
if '@' not in value or value.startswith('@') or value.endswith('@'):
return False
try:
p1, p2 = value.split('@')
except ValueError:
# value contains more than one @.
return False
# Dot must be in p2 (e.g. example.com)
if '.' not in p2 or p2.startswith('.'):
return False
return True
words = word_split_re.split(str(text)) words = word_split_re.split(str(text))
for i, word in enumerate(words): for i, word in enumerate(words):
if '.' in word or '@' in word or ':' in word: if '.' in word or '@' in word or ':' in word:
...@@ -332,7 +341,7 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): ...@@ -332,7 +341,7 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
elif simple_url_2_re.match(middle): elif simple_url_2_re.match(middle):
middle, middle_unescaped, trail = unescape(middle, trail) middle, middle_unescaped, trail = unescape(middle, trail)
url = smart_urlquote('http://%s' % middle_unescaped) url = smart_urlquote('http://%s' % middle_unescaped)
elif ':' not in middle and simple_email_re.match(middle): elif ':' not in middle and is_email_simple(middle):
local, domain = middle.rsplit('@', 1) local, domain = middle.rsplit('@', 1)
try: try:
domain = domain.encode('idna').decode('ascii') domain = domain.encode('idna').decode('ascii')
......
...@@ -5,3 +5,14 @@ Django 1.11.11 release notes ...@@ -5,3 +5,14 @@ Django 1.11.11 release notes
*March 6, 2018* *March 6, 2018*
Django 1.11.11 fixes two security issues in 1.11.10. Django 1.11.11 fixes two security issues in 1.11.10.
CVE-2018-7536: Denial-of-service possibility in ``urlize`` and ``urlizetrunc`` template filters
===============================================================================================
The ``django.utils.html.urlize()`` function was extremely slow to evaluate
certain inputs due to catastrophic backtracking vulnerabilities in two regular
expressions. The ``urlize()`` function is used to implement the ``urlize`` and
``urlizetrunc`` template filters, which were thus vulnerable.
The problematic regular expressions are replaced with parsing logic that
behaves similarly.
...@@ -5,3 +5,14 @@ Django 1.8.19 release notes ...@@ -5,3 +5,14 @@ Django 1.8.19 release notes
*March 6, 2018* *March 6, 2018*
Django 1.8.19 fixes two security issues in 1.18.18. Django 1.8.19 fixes two security issues in 1.18.18.
CVE-2018-7536: Denial-of-service possibility in ``urlize`` and ``urlizetrunc`` template filters
===============================================================================================
The ``django.utils.html.urlize()`` function was extremely slow to evaluate
certain inputs due to a catastrophic backtracking vulnerability in a regular
expression. The ``urlize()`` function is used to implement the ``urlize`` and
``urlizetrunc`` template filters, which were thus vulnerable.
The problematic regular expression is replaced with parsing logic that behaves
similarly.
...@@ -7,6 +7,17 @@ Django 2.0.3 release notes ...@@ -7,6 +7,17 @@ Django 2.0.3 release notes
Django 2.0.3 fixes two security issues and several bugs in 2.0.2. Also, the Django 2.0.3 fixes two security issues and several bugs in 2.0.2. Also, the
latest string translations from Transifex are incorporated. latest string translations from Transifex are incorporated.
CVE-2018-7536: Denial-of-service possibility in ``urlize`` and ``urlizetrunc`` template filters
===============================================================================================
The ``django.utils.html.urlize()`` function was extremely slow to evaluate
certain inputs due to catastrophic backtracking vulnerabilities in two regular
expressions. The ``urlize()`` function is used to implement the ``urlize`` and
``urlizetrunc`` template filters, which were thus vulnerable.
The problematic regular expressions are replaced with parsing logic that
behaves similarly.
Bugfixes Bugfixes
======== ========
......
...@@ -253,3 +253,12 @@ class TestUtilsHtml(SimpleTestCase): ...@@ -253,3 +253,12 @@ class TestUtilsHtml(SimpleTestCase):
for value, output in tests: for value, output in tests:
with self.subTest(value=value): with self.subTest(value=value):
self.assertEqual(urlize(value), output) self.assertEqual(urlize(value), output)
def test_urlize_unchanged_inputs(self):
tests = (
('a' + '@a' * 50000) + 'a', # simple_email_re catastrophic test
('a' + '.' * 1000000) + 'a', # trailing_punctuation catastrophic test
)
for value in tests:
with self.subTest(value=value):
self.assertEqual(urlize(value), value)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment