@@ 109,31 109,33 @@ def get_content_snippet(content, keyword, max_words=30):
# remove html tags
content = striptags(content)
- # remove newlines
- content = content.replace("\n", " ").split(" ")
+ # remove whitespace
+ words = content.split()
- return list(filter(lambda x: x != "", content))
+ return words
max_words = int(max_words)
- pattern = re.compile(
- r'(?P<before>.*)%s(?P<after>.*)' % re.escape(keyword),
- re.MULTILINE | re.IGNORECASE | re.DOTALL
- )
+ match_position = content.lower().rfind(keyword.lower())
- match = pattern.search(content)
-
- if match:
- words = clean_text(match.group("before"))
- before_words = words[-max_words // 2:]
- words = clean_text(match.group("after"))
-
- after = " ".join(words[:max_words - len(before_words)])
+ if match_position != -1:
+ try:
+ match_start = content.rindex(' ', 0, match_position) + 1
+ except ValueError:
+ match_start = 0
+ try:
+ match_end = content.index(' ', match_position + len(keyword))
+ except ValueError:
+ match_end = len(content)
+ all_before = clean_text(content[:match_start])
+ match = content[match_start:match_end]
+ all_after = clean_text(content[match_end:])
+ before_words = all_before[-max_words // 2:]
+ after_words = all_after[:max_words - len(before_words)]
before = " ".join(before_words)
-
- html = "%s %s %s" % (before, striptags(keyword), after)
-
- kw_p = re.compile(r'(%s)' % keyword, re.IGNORECASE)
+ after = " ".join(after_words)
+ html = ("%s %s %s" % (before, striptags(match), after)).strip()
+ kw_p = re.compile(r'(\S*%s\S*)' % keyword, re.IGNORECASE)
html = kw_p.sub(r"<strong>\1</strong>", html)
return mark_safe(html)
@@ 19,7 19,7 @@ class GetContentSnippet(TemplateTestCase):
content = text + ' list'
expected = (
'lorem lorem lorem lorem lorem lorem lorem lorem lorem '
- 'lorem lorem lorem lorem lorem lorem <strong>list</strong> '
+ 'lorem lorem lorem lorem lorem lorem <strong>list</strong>'
)
output = get_content_snippet(content, 'list')
@@ 30,7 30,7 @@ class GetContentSnippet(TemplateTestCase):
text = 'lorem ' * 80
content = 'list ' + text
expected = (
- ' <strong>list</strong> lorem lorem lorem lorem lorem '
+ '<strong>list</strong> lorem lorem lorem lorem lorem '
'lorem lorem lorem lorem lorem lorem lorem lorem lorem lorem lorem '
'lorem lorem lorem lorem lorem lorem lorem lorem lorem lorem lorem '
'lorem lorem lorem'
@@ 50,7 50,7 @@ class GetContentSnippet(TemplateTestCase):
'<strong>lorem</strong> <strong>lorem</strong> '
'<strong>lorem</strong> <strong>lorem</strong> '
'<strong>lorem</strong> <strong>lorem</strong> '
- '<strong>lorem</strong> <strong>lorem</strong> '
+ '<strong>lorem</strong> <strong>lorem</strong>'
)
output = get_content_snippet(content, 'lorem')
@@ 82,7 82,7 @@ class GetContentSnippet(TemplateTestCase):
expected = (
'dolorum dolorum dolorum dolorum dolorum dolorum dolorum '
'dolorum dolorum dolorum dolorum dolorum dolorum dolorum dolorum '
- '<strong>list</strong> '
+ '<strong>list</strong>'
)
output = get_content_snippet(content, 'list')
@@ 95,7 95,7 @@ class GetContentSnippet(TemplateTestCase):
content = text + ' list'
output = get_content_snippet(content, 'list', 0)
- expected = 'spam ' * 800 + '<strong>list</strong> '
+ expected = 'spam ' * 800 + '<strong>list</strong>'
self.assertEqual(output, expected)
@@ 105,7 105,7 @@ class GetContentSnippet(TemplateTestCase):
content = text + ' list'
output = get_content_snippet(content, 'list', -10)
- expected = 'spam ' * 75 + '<strong>list</strong> '
+ expected = 'spam ' * 75 + '<strong>list</strong>'
self.assertEqual(output, expected)
@@ 154,7 154,7 @@ class GetContentSnippet(TemplateTestCase):
expected = (
'I should citate Shakespeare or Byron. '
'Or <strong>maybe</strong> copy paste from python '
- 'or django documentation. <strong>maybe</strong> .'
+ 'or django documentation. <strong>Maybe.</strong>'
)
output = get_content_snippet(content, keyword, 30)
@@ 179,10 179,18 @@ class GetContentSnippet(TemplateTestCase):
expected = (
'knight <strong>eggs</strong> spam ham '
- '<strong>eggs</strong> guido python <strong>eggs</strong> '
+ '<strong>eggs</strong> guido python <strong>eggs</strong>'
)
self.assertEqual(output, expected)
+ def test_content_case_preserved(self):
+ keyword = 'DOlOr'
+ match = 'DoLoR'
+ content = 'lorem ipsum %s sit amet' % match
+ output = get_content_snippet(content, keyword)
+ self.assertIn(match, output)
+ self.assertNotIn(keyword, output)
+
class CanRead(TemplateTestCase):