~netlandish/django-wiki: TOC: Use upstream TocExtension

1 files changed, 7 insertions(+), 269 deletions(-)

M src/wiki/plugins/macros/mdx/toc.py

M src/wiki/plugins/macros/mdx/toc.py => src/wiki/plugins/macros/mdx/toc.py +7 -269

@@ 1,30 1,6 @@
-"""
-Table of Contents Extension for Python-Markdown
-* * *
-
-(c) 2008 [Jack Miller](http://codezen.org)
-
-Dependencies:
-* [Markdown 2.1+](http://packages.python.org/Markdown/)
-
-Pull request to include the below code in Python-Markdown:
-https://github.com/waylan/Python-Markdown/pull/191
-
-Until it's released, we have a copy here.
-
-/benjaoming
-
-
-UPDATE PR WAS MERGED FOR MARKDOWN 2.3
-
-SO WE AN JUST DEPEND ON THAT!
-
-
-"""
 import re
-import unicodedata
 
-import markdown
+from markdown.extensions.toc import TocTreeprocessor, TocExtension, slugify
 from markdown.util import etree
 from wiki.plugins.macros import settings
 


@@ 33,250 9,8 @@ HEADER_ID_PREFIX = "wiki-toc-"
 IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
 
 
-def slugify(value, separator):
-    """ Slugify a string, to make it URL friendly. """
-    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
-    value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower()
-    return re.sub('[%s\s]+' % separator, separator, value)
-
-
-def itertext(elem):
-    """ Loop through all children and return text only.
-
-    Reimplements method of same name added to ElementTree in Python 2.7
-
-    """
-    if elem.text:
-        yield elem.text
-    for e in elem:
-        yield from itertext(e)
-        if e.tail:
-            yield e.tail
-
-
-def unique(elem_id, ids):
-    """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """
-    while elem_id in ids:
-        m = IDCOUNT_RE.match(elem_id)
-        if m:
-            elem_id = '%s_%d' % (m.group(1), int(m.group(2)) + 1)
-        else:
-            elem_id = '%s_%d' % (elem_id, 1)
-    ids.add(elem_id)
-    return HEADER_ID_PREFIX + elem_id
-
-
-def order_toc_list(toc_list):
-    """Given an unsorted list with errors and skips, return a nested one.
-    [{'level': 1}, {'level': 2}]
-    =>
-    [{'level': 1, 'children': [{'level': 2, 'children': []}]}]
-
-    A wrong list is also converted:
-    [{'level': 2}, {'level': 1}]
-    =>
-    [{'level': 2, 'children': []}, {'level': 1, 'children': []}]
-    """
-
-    def build_correct(remaining_list, prev_elements=[{'level': 1000}]):
-
-        if not remaining_list:
-            return [], []
-
-        current = remaining_list.pop(0)
-        if 'children' not in current:
-            current['children'] = []
-
-        if not prev_elements:
-            # This happens for instance with [8, 1, 1], ie. when some
-            # header level is outside a scope. We treat it as a
-            # top-level
-            next_elements, children = build_correct(remaining_list, [current])
-            current['children'].append(children)
-            return [current] + next_elements, []
-
-        prev_element = prev_elements.pop()
-        children = []
-        next_elements = []
-        # Is current part of the child list or next list?
-        if current['level'] > prev_element['level']:
-            # print "%d is a child of %d" % (current['level'],
-            # prev_element['level'])
-            prev_elements.append(prev_element)
-            prev_elements.append(current)
-            prev_element['children'].append(current)
-            next_elements2, children2 = build_correct(
-                remaining_list, prev_elements)
-            children += children2
-            next_elements += next_elements2
-        else:
-            # print "%d is ancestor of %d" % (current['level'],
-            # prev_element['level'])
-            if not prev_elements:
-                # print "No previous elements, so appending to the next set"
-                next_elements.append(current)
-                prev_elements = [current]
-                next_elements2, children2 = build_correct(
-                    remaining_list, prev_elements)
-                current['children'].extend(children2)
-            else:
-                # print "Previous elements, comparing to those first"
-                remaining_list.insert(0, current)
-                next_elements2, children2 = build_correct(
-                    remaining_list, prev_elements)
-                children.extend(children2)
-            next_elements += next_elements2
-
-        return next_elements, children
-
-    flattened_list, __ = build_correct(toc_list)
-    return flattened_list
-
-
-class TocTreeprocessor(markdown.treeprocessors.Treeprocessor):
-
-    # Iterator wrapper to get parent and child all at once
-    def iterparent(self, root):
-        for parent in root.getiterator():
-            for child in parent:
-                yield parent, child
-
-    def add_anchor(self, c, elem_id):  # @ReservedAssignment
-        if self.use_anchors:
-            anchor = etree.Element("a")
-            anchor.text = c.text
-            anchor.attrib["href"] = "#" + elem_id
-            anchor.attrib["class"] = "toclink"
-            c.text = ""
-            for elem in c.getchildren():
-                anchor.append(elem)
-                c.remove(elem)
-            c.append(anchor)
-
-    def build_toc_etree(self, div, toc_list):
-
-        def build_etree_ul(toc_list, parent):
-            ul = etree.SubElement(parent, "ul")
-            for item in toc_list:
-                # List item link, to be inserted into the toc div
-                li = etree.SubElement(ul, "li")
-                link = etree.SubElement(li, "a")
-                link.text = item.get('name', '')
-                link.attrib["href"] = '#' + item.get('id', '')
-                if item['children']:
-                    build_etree_ul(item['children'], li)
-            return ul
-
-        return build_etree_ul(toc_list, div)
-
-    def run(self, doc):  # noqa
-
-        div = etree.Element("div")
-        div.attrib["class"] = "toc"
-        header_rgx = re.compile("[Hh][123456]")
-
-        self.use_anchors = self.config["anchorlink"] in [
-            1,
-            '1',
-            True,
-            'True',
-            'true']
-
-        # Get a list of id attributes
-        used_ids = set()
-        for c in doc.getiterator():
-            if "id" in c.attrib:
-                used_ids.add(c.attrib["id"])
-
-        toc_list = []
-        marker_found = False
-        for (p, c) in self.iterparent(doc):
-            text = ''.join(itertext(c)).strip()
-            if not text:
-                continue
-
-            # To keep the output from screwing up the
-            # validation by putting a <div> inside of a <p>
-            # we actually replace the <p> in its entirety.
-            # We do not allow the marker inside a header as that
-            # would causes an enless loop of placing a new TOC
-            # inside previously generated TOC.
-            if c.text and c.text.strip() == self.config["marker"] and \
-               not header_rgx.match(c.tag) and c.tag not in ['pre', 'code']:
-                for i in range(len(p)):
-                    if p[i] == c:
-                        p[i] = div
-                        break
-                marker_found = True
-
-            if header_rgx.match(c.tag):
-
-                # Do not override pre-existing ids
-                if "id" not in c.attrib:
-                    elem_id = unique(
-                        self.config["slugify"](
-                            text,
-                            '-'),
-                        used_ids)
-                    c.attrib["id"] = elem_id
-                else:
-                    elem_id = c.attrib["id"]
-
-                tag_level = int(c.tag[-1])
-
-                toc_list.append({
-                    'level': tag_level,
-                    'id': elem_id,
-                    'name': c.text
-                })
-
-                self.add_anchor(c, elem_id)
-
-        if marker_found:
-            toc_list_nested = order_toc_list(toc_list)
-            self.build_toc_etree(div, toc_list_nested)
-            # serialize and attach to markdown instance.
-            prettify = self.markdown.treeprocessors.get('prettify')
-            if prettify:
-                prettify.run(div)
-            toc = self.markdown.serializer(div)
-            for pp in self.markdown.postprocessors.values():
-                toc = pp.run(toc)
-            self.markdown.toc = toc
-
-
-class TocExtension(markdown.Extension):
-
-    TreeProcessorClass = TocTreeprocessor
-
-    def __init__(self, configs=[]):
-        self.config = {
-            "marker": [
-                "[TOC]", "Text to find and replace with Table of Contents -"
-                "Defaults to \"[TOC]\""], "slugify": [
-                slugify, "Function to generate anchors based on header text-"
-                "Defaults to the headerid ext's slugify function."], "title": [
-                None, "Title to insert into TOC <div> - "
-                "Defaults to None"], "anchorlink": [
-                    0, "1 if header should be a self link"
-                    "Defaults to 0"]}
-
-        for key, value in configs:
-            self.setConfig(key, value)
-
-    def extendMarkdown(self, md, md_globals):
-        tocext = self.TreeProcessorClass(md)
-        tocext.config = self.getConfigs()
-        # Headerid ext is set to '>inline'. With this set to '<prettify',
-        # it should always come after headerid ext (and honor ids assinged
-        # by the header id extension) if both are used. Same goes for
-        # attr_list extension. This must come last because we don't want
-        # to redefine ids after toc is created. But we do want toc prettified.
-        md.treeprocessors.add("toc", tocext, ">attr_list")
-
-
-def makeExtension(configs={}):
-    return TocExtension(configs=configs)
+def wiki_slugify(*args, **kwargs):
+    return HEADER_ID_PREFIX + slugify(*args, **kwargs)
 
 
 class WikiTreeProcessorClass(TocTreeprocessor):


@@ 306,6 40,10 @@ class WikiTreeProcessorClass(TocTreeprocessor):
 class WikiTocExtension(TocExtension):
     TreeProcessorClass = WikiTreeProcessorClass
 
+    def __init__(self, **kwargs):
+        kwargs.setdefault('slugify', wiki_slugify)
+        super().__init__(**kwargs)
+
     def extendMarkdown(self, md, md_globals):
         if 'toc' in settings.METHODS:
             TocExtension.extendMarkdown(self, md, md_globals)