Kaydet (Commit) d7504a3d authored tarafından Claude Paroz's avatar Claude Paroz

Improved regex in strip_tags

Thanks Pablo Recio for the report. Refs #19237.
üst afa3e163
...@@ -33,7 +33,7 @@ link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+') ...@@ -33,7 +33,7 @@ link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL) hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z') trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
strip_tags_re = re.compile(r'</?\S([^=]*=(\s*"[^"]*"|\s*\'[^\']*\'|\S*)|[^>])*?>', re.IGNORECASE) strip_tags_re = re.compile(r'</?\S([^=>]*=(\s*"[^"]*"|\s*\'[^\']*\'|\S*)|[^>])*?>', re.IGNORECASE)
def escape(text): def escape(text):
......
...@@ -68,6 +68,7 @@ class TestUtilsHtml(unittest.TestCase): ...@@ -68,6 +68,7 @@ class TestUtilsHtml(unittest.TestCase):
('a<p onclick="alert(\'<test>\')">b</p>c', 'abc'), ('a<p onclick="alert(\'<test>\')">b</p>c', 'abc'),
('a<p a >b</p>c', 'abc'), ('a<p a >b</p>c', 'abc'),
('d<a:b c:d>e</p>f', 'def'), ('d<a:b c:d>e</p>f', 'def'),
('<strong>foo</strong><a href="http://example.com">bar</a>', 'foobar'),
) )
for value, output in items: for value, output in items:
self.check_output(f, value, output) self.check_output(f, value, output)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment