~netlandish/django-wiki: Tweak search result highlighting

2 files changed, 37 insertions(+), 27 deletions(-)

M src/wiki/templatetags/wiki_tags.py
M tests/core/test_template_filters.py

M src/wiki/templatetags/wiki_tags.py => src/wiki/templatetags/wiki_tags.py +21 -19

@@ 109,31 109,33 @@ def get_content_snippet(content, keyword, max_words=30):
 
         # remove html tags
         content = striptags(content)
-        # remove newlines
-        content = content.replace("\n", " ").split(" ")
+        # remove whitespace
+        words = content.split()
 
-        return list(filter(lambda x: x != "", content))
+        return words
 
     max_words = int(max_words)
 
-    pattern = re.compile(
-        r'(?P<before>.*)%s(?P<after>.*)' % re.escape(keyword),
-        re.MULTILINE | re.IGNORECASE | re.DOTALL
-    )
+    match_position = content.lower().rfind(keyword.lower())
 
-    match = pattern.search(content)
-
-    if match:
-        words = clean_text(match.group("before"))
-        before_words = words[-max_words // 2:]
-        words = clean_text(match.group("after"))
-
-        after = " ".join(words[:max_words - len(before_words)])
+    if match_position != -1:
+        try:
+            match_start = content.rindex(' ', 0, match_position) + 1
+        except ValueError:
+            match_start = 0
+        try:
+            match_end = content.index(' ', match_position + len(keyword))
+        except ValueError:
+            match_end = len(content)
+        all_before = clean_text(content[:match_start])
+        match = content[match_start:match_end]
+        all_after = clean_text(content[match_end:])
+        before_words = all_before[-max_words // 2:]
+        after_words = all_after[:max_words - len(before_words)]
         before = " ".join(before_words)
-
-        html = "%s %s %s" % (before, striptags(keyword), after)
-
-        kw_p = re.compile(r'(%s)' % keyword, re.IGNORECASE)
+        after = " ".join(after_words)
+        html = ("%s %s %s" % (before, striptags(match), after)).strip()
+        kw_p = re.compile(r'(\S*%s\S*)' % keyword, re.IGNORECASE)
         html = kw_p.sub(r"<strong>\1</strong>", html)
 
         return mark_safe(html)

M tests/core/test_template_filters.py => tests/core/test_template_filters.py +16 -8

@@ 19,7 19,7 @@ class GetContentSnippet(TemplateTestCase):
         content = text + ' list'
         expected = (
             'lorem lorem lorem lorem lorem lorem lorem lorem lorem '
-            'lorem lorem lorem lorem lorem lorem <strong>list</strong> '
+            'lorem lorem lorem lorem lorem lorem <strong>list</strong>'
         )
 
         output = get_content_snippet(content, 'list')


@@ 30,7 30,7 @@ class GetContentSnippet(TemplateTestCase):
         text = 'lorem ' * 80
         content = 'list ' + text
         expected = (
-            ' <strong>list</strong> lorem lorem lorem lorem lorem '
+            '<strong>list</strong> lorem lorem lorem lorem lorem '
             'lorem lorem lorem lorem lorem lorem lorem lorem lorem lorem lorem '
             'lorem lorem lorem lorem lorem lorem lorem lorem lorem lorem lorem '
             'lorem lorem lorem'


@@ 50,7 50,7 @@ class GetContentSnippet(TemplateTestCase):
             '<strong>lorem</strong> <strong>lorem</strong> '
             '<strong>lorem</strong> <strong>lorem</strong> '
             '<strong>lorem</strong> <strong>lorem</strong> '
-            '<strong>lorem</strong> <strong>lorem</strong> '
+            '<strong>lorem</strong> <strong>lorem</strong>'
         )
 
         output = get_content_snippet(content, 'lorem')


@@ 82,7 82,7 @@ class GetContentSnippet(TemplateTestCase):
         expected = (
             'dolorum dolorum dolorum dolorum dolorum dolorum dolorum '
             'dolorum dolorum dolorum dolorum dolorum dolorum dolorum dolorum '
-            '<strong>list</strong> '
+            '<strong>list</strong>'
         )
 
         output = get_content_snippet(content, 'list')


@@ 95,7 95,7 @@ class GetContentSnippet(TemplateTestCase):
         content = text + ' list'
 
         output = get_content_snippet(content, 'list', 0)
-        expected = 'spam ' * 800 + '<strong>list</strong> '
+        expected = 'spam ' * 800 + '<strong>list</strong>'
 
         self.assertEqual(output, expected)
 


@@ 105,7 105,7 @@ class GetContentSnippet(TemplateTestCase):
         content = text + ' list'
 
         output = get_content_snippet(content, 'list', -10)
-        expected = 'spam ' * 75 + '<strong>list</strong> '
+        expected = 'spam ' * 75 + '<strong>list</strong>'
 
         self.assertEqual(output, expected)
 


@@ 154,7 154,7 @@ class GetContentSnippet(TemplateTestCase):
         expected = (
             'I should citate Shakespeare or Byron. '
             'Or <strong>maybe</strong> copy paste from python '
-            'or django documentation. <strong>maybe</strong> .'
+            'or django documentation. <strong>Maybe.</strong>'
         )
 
         output = get_content_snippet(content, keyword, 30)


@@ 179,10 179,18 @@ class GetContentSnippet(TemplateTestCase):
 
         expected = (
             'knight <strong>eggs</strong> spam ham '
-            '<strong>eggs</strong> guido python <strong>eggs</strong> '
+            '<strong>eggs</strong> guido python <strong>eggs</strong>'
         )
         self.assertEqual(output, expected)
 
+    def test_content_case_preserved(self):
+        keyword = 'DOlOr'
+        match = 'DoLoR'
+        content = 'lorem ipsum %s sit amet' % match
+        output = get_content_snippet(content, keyword)
+        self.assertIn(match, output)
+        self.assertNotIn(keyword, output)
+
 
 class CanRead(TemplateTestCase):