From 26bde205d40e1788d0938a62afcd015a17eddf5f Mon Sep 17 00:00:00 2001 From: Benjamin Bach Date: Wed, 28 Dec 2016 00:24:53 +0100 Subject: [PATCH] Use Bleach to sanitize HTML and remove Markdown safe_mode (deprecated) #571 --- docs/release_notes.rst | 27 ++++++++++++++++++++++++++- setup.py | 1 + tox.ini | 1 + wiki/conf/settings.py | 10 +++++++++- wiki/core/markdown/__init__.py | 8 +++++++- wiki/tests/test_markdown.py | 23 ++++++++++++++++++++--- 6 files changed, 64 insertions(+), 6 deletions(-) diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 0d1183a3..7283c613 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -37,12 +37,37 @@ django-wiki 0.2 (dev) * Updated languages since 0.1: Chinese, French, German, German, Russian, Spanish * Added Django 1.10 support #563 + * Security: Do not depend on markdown ``safe_mode``, instead use ``bleach``. * Fix duplicate search results when logged in #582 (duvholt) * Fix memory leak in markdown extensions setting #564 * Updated translations - Languages > 90% completed: Chinese (China), Portuguese (Brazil), Korean (Korea), French, Slovak, Spanish, Dutch, German, Russian, Finnish. * Taiwanese Chinese added (39% completed) * Cleanup documentation structure #575 -Support removed for: + +HTML contents +~~~~~~~~~~~~~ + +`Bleach `_ is now used to sanitize HTML +before invoking Markdown. + +HTML escaping is done before Markdown parsing happens. In future Markdown +versions, HTML escaping is no longer done, and ``safe_mode`` is removed. We have +already removed ``safe_mode`` from the default ``WIKI_MARKDOWN_KWARGS`` setting, +however if you have configured this yourself, you are advised to remove +``safe_mode``. + +Allowed tags are from Bleach's default settings: ``a``, ``abbr``, ``acronym``, +``b``, ``blockquote``, ``code``, ``em``, ``i``, ``li``, ``ol``, ``strong``, +``ul``. + +Please use new setting ``WIKI_MARKDOWN_HTML_WHITELIST`` and set a list of +allowed tags to customize behavior. + + +Python and Django support +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Support has been removed for: * Python 2.6 * Django < 1.8 diff --git a/setup.py b/setup.py index 1d7a67bd..1f77f488 100755 --- a/setup.py +++ b/setup.py @@ -28,6 +28,7 @@ def read_file(fname): requirements = [ "Django>=1.8", + "bleach>=1.5,<2", "Pillow", "django-nyt>=1.0b1", "six", diff --git a/tox.ini b/tox.ini index 3e0e5344..4bd0b110 100644 --- a/tox.ini +++ b/tox.ini @@ -20,6 +20,7 @@ deps = mock>=2.0 Markdown==2.6.7 django_nyt==1.0b1 + bleach==1.5.0 django18: Django==1.8.2 django19: Django==1.9 django110: Django==1.10.2 diff --git a/wiki/conf/settings.py b/wiki/conf/settings.py index 4c4de36f..9b7cbdf5 100644 --- a/wiki/conf/settings.py +++ b/wiki/conf/settings.py @@ -1,5 +1,7 @@ from __future__ import absolute_import, unicode_literals +import bleach + from django.conf import settings as django_settings from django.core.files.storage import default_storage from django.core.urlresolvers import reverse_lazy @@ -26,13 +28,19 @@ MARKDOWN_KWARGS = { 'codehilite', 'sane_lists', ], - 'safe_mode': 'replace', 'extension_configs': { 'toc': { 'title': _('Table of Contents')}}, } MARKDOWN_KWARGS.update(getattr(django_settings, 'WIKI_MARKDOWN_KWARGS', {})) +# Allowed tags in Markdown article contents. +MARKDOWN_HTML_WHITELIST = getattr( + django_settings, + 'WIKI_MARKDOWN_HTML_WHITELIST', + bleach.ALLOWED_TAGS +) + # This slug is used in URLPath if an article has been deleted. The children of the # URLPath of that article are moved to lost and found. They keep their permissions # and all their content. diff --git a/wiki/core/markdown/__init__.py b/wiki/core/markdown/__init__.py index e40b4d6a..b662bdd0 100644 --- a/wiki/core/markdown/__init__.py +++ b/wiki/core/markdown/__init__.py @@ -1,10 +1,12 @@ from __future__ import absolute_import, unicode_literals +import bleach +import markdown + from wiki.conf import settings from wiki.core.markdown.mdx.previewlinks import PreviewLinksExtension from wiki.core.markdown.mdx.responsivetable import ResponsiveTableExtension from wiki.core.plugins import registry as plugin_registry -import markdown class ArticleMarkdown(markdown.Markdown): @@ -27,6 +29,10 @@ class ArticleMarkdown(markdown.Markdown): extensions += plugin_registry.get_markdown_extensions() return extensions + def convert(self, text, *args, **kwargs): + text = bleach.clean(text, tags=settings.MARKDOWN_HTML_WHITELIST) + return super(ArticleMarkdown, self).convert(text, *args, **kwargs) + def article_markdown(text, article, *args, **kwargs): md = ArticleMarkdown(article, *args, **kwargs) diff --git a/wiki/tests/test_markdown.py b/wiki/tests/test_markdown.py index d95a325f..49b025bd 100644 --- a/wiki/tests/test_markdown.py +++ b/wiki/tests/test_markdown.py @@ -1,12 +1,16 @@ from __future__ import absolute_import, unicode_literals -from django.test import TestCase import markdown +from django.test import TestCase +from mock import patch from wiki.core.markdown import ArticleMarkdown from wiki.core.markdown.mdx.responsivetable import ResponsiveTableExtension -from mock import patch +from wiki.models import URLPath +from wiki.tests.base import ArticleTestBase + + +class ArticleMarkdownTests(ArticleTestBase): -class ArticleMarkdownTests(TestCase): @patch('wiki.core.markdown.settings') def test_do_not_modify_extensions(self, settings): extensions = ['footnotes', 'attr_list', 'sane_lists'] @@ -15,7 +19,20 @@ class ArticleMarkdownTests(TestCase): ArticleMarkdown(None) self.assertEqual(len(extensions), number_of_extensions) + def test_html_removal(self): + + urlpath = URLPath.create_article( + self.root, + 'html_removal', + title="Test 1", + content="only_this" + ) + + self.assertEqual(urlpath.article.render(), "

</html>only_this

") + + class ResponsiveTableTests(TestCase): + def setUp(self): self.md = markdown.Markdown(extensions=[ 'extra', -- 2.45.2