@@ 1,30 1,6 @@
-"""
-Table of Contents Extension for Python-Markdown
-* * *
-
-(c) 2008 [Jack Miller](http://codezen.org)
-
-Dependencies:
-* [Markdown 2.1+](http://packages.python.org/Markdown/)
-
-Pull request to include the below code in Python-Markdown:
-https://github.com/waylan/Python-Markdown/pull/191
-
-Until it's released, we have a copy here.
-
-/benjaoming
-
-
-UPDATE PR WAS MERGED FOR MARKDOWN 2.3
-
-SO WE AN JUST DEPEND ON THAT!
-
-
-"""
import re
-import unicodedata
-import markdown
+from markdown.extensions.toc import TocTreeprocessor, TocExtension, slugify
from markdown.util import etree
from wiki.plugins.macros import settings
@@ 33,250 9,8 @@ HEADER_ID_PREFIX = "wiki-toc-"
IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
-def slugify(value, separator):
- """ Slugify a string, to make it URL friendly. """
- value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
- value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower()
- return re.sub('[%s\s]+' % separator, separator, value)
-
-
-def itertext(elem):
- """ Loop through all children and return text only.
-
- Reimplements method of same name added to ElementTree in Python 2.7
-
- """
- if elem.text:
- yield elem.text
- for e in elem:
- yield from itertext(e)
- if e.tail:
- yield e.tail
-
-
-def unique(elem_id, ids):
- """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """
- while elem_id in ids:
- m = IDCOUNT_RE.match(elem_id)
- if m:
- elem_id = '%s_%d' % (m.group(1), int(m.group(2)) + 1)
- else:
- elem_id = '%s_%d' % (elem_id, 1)
- ids.add(elem_id)
- return HEADER_ID_PREFIX + elem_id
-
-
-def order_toc_list(toc_list):
- """Given an unsorted list with errors and skips, return a nested one.
- [{'level': 1}, {'level': 2}]
- =>
- [{'level': 1, 'children': [{'level': 2, 'children': []}]}]
-
- A wrong list is also converted:
- [{'level': 2}, {'level': 1}]
- =>
- [{'level': 2, 'children': []}, {'level': 1, 'children': []}]
- """
-
- def build_correct(remaining_list, prev_elements=[{'level': 1000}]):
-
- if not remaining_list:
- return [], []
-
- current = remaining_list.pop(0)
- if 'children' not in current:
- current['children'] = []
-
- if not prev_elements:
- # This happens for instance with [8, 1, 1], ie. when some
- # header level is outside a scope. We treat it as a
- # top-level
- next_elements, children = build_correct(remaining_list, [current])
- current['children'].append(children)
- return [current] + next_elements, []
-
- prev_element = prev_elements.pop()
- children = []
- next_elements = []
- # Is current part of the child list or next list?
- if current['level'] > prev_element['level']:
- # print "%d is a child of %d" % (current['level'],
- # prev_element['level'])
- prev_elements.append(prev_element)
- prev_elements.append(current)
- prev_element['children'].append(current)
- next_elements2, children2 = build_correct(
- remaining_list, prev_elements)
- children += children2
- next_elements += next_elements2
- else:
- # print "%d is ancestor of %d" % (current['level'],
- # prev_element['level'])
- if not prev_elements:
- # print "No previous elements, so appending to the next set"
- next_elements.append(current)
- prev_elements = [current]
- next_elements2, children2 = build_correct(
- remaining_list, prev_elements)
- current['children'].extend(children2)
- else:
- # print "Previous elements, comparing to those first"
- remaining_list.insert(0, current)
- next_elements2, children2 = build_correct(
- remaining_list, prev_elements)
- children.extend(children2)
- next_elements += next_elements2
-
- return next_elements, children
-
- flattened_list, __ = build_correct(toc_list)
- return flattened_list
-
-
-class TocTreeprocessor(markdown.treeprocessors.Treeprocessor):
-
- # Iterator wrapper to get parent and child all at once
- def iterparent(self, root):
- for parent in root.getiterator():
- for child in parent:
- yield parent, child
-
- def add_anchor(self, c, elem_id): # @ReservedAssignment
- if self.use_anchors:
- anchor = etree.Element("a")
- anchor.text = c.text
- anchor.attrib["href"] = "#" + elem_id
- anchor.attrib["class"] = "toclink"
- c.text = ""
- for elem in c.getchildren():
- anchor.append(elem)
- c.remove(elem)
- c.append(anchor)
-
- def build_toc_etree(self, div, toc_list):
-
- def build_etree_ul(toc_list, parent):
- ul = etree.SubElement(parent, "ul")
- for item in toc_list:
- # List item link, to be inserted into the toc div
- li = etree.SubElement(ul, "li")
- link = etree.SubElement(li, "a")
- link.text = item.get('name', '')
- link.attrib["href"] = '#' + item.get('id', '')
- if item['children']:
- build_etree_ul(item['children'], li)
- return ul
-
- return build_etree_ul(toc_list, div)
-
- def run(self, doc): # noqa
-
- div = etree.Element("div")
- div.attrib["class"] = "toc"
- header_rgx = re.compile("[Hh][123456]")
-
- self.use_anchors = self.config["anchorlink"] in [
- 1,
- '1',
- True,
- 'True',
- 'true']
-
- # Get a list of id attributes
- used_ids = set()
- for c in doc.getiterator():
- if "id" in c.attrib:
- used_ids.add(c.attrib["id"])
-
- toc_list = []
- marker_found = False
- for (p, c) in self.iterparent(doc):
- text = ''.join(itertext(c)).strip()
- if not text:
- continue
-
- # To keep the output from screwing up the
- # validation by putting a <div> inside of a <p>
- # we actually replace the <p> in its entirety.
- # We do not allow the marker inside a header as that
- # would causes an enless loop of placing a new TOC
- # inside previously generated TOC.
- if c.text and c.text.strip() == self.config["marker"] and \
- not header_rgx.match(c.tag) and c.tag not in ['pre', 'code']:
- for i in range(len(p)):
- if p[i] == c:
- p[i] = div
- break
- marker_found = True
-
- if header_rgx.match(c.tag):
-
- # Do not override pre-existing ids
- if "id" not in c.attrib:
- elem_id = unique(
- self.config["slugify"](
- text,
- '-'),
- used_ids)
- c.attrib["id"] = elem_id
- else:
- elem_id = c.attrib["id"]
-
- tag_level = int(c.tag[-1])
-
- toc_list.append({
- 'level': tag_level,
- 'id': elem_id,
- 'name': c.text
- })
-
- self.add_anchor(c, elem_id)
-
- if marker_found:
- toc_list_nested = order_toc_list(toc_list)
- self.build_toc_etree(div, toc_list_nested)
- # serialize and attach to markdown instance.
- prettify = self.markdown.treeprocessors.get('prettify')
- if prettify:
- prettify.run(div)
- toc = self.markdown.serializer(div)
- for pp in self.markdown.postprocessors.values():
- toc = pp.run(toc)
- self.markdown.toc = toc
-
-
-class TocExtension(markdown.Extension):
-
- TreeProcessorClass = TocTreeprocessor
-
- def __init__(self, configs=[]):
- self.config = {
- "marker": [
- "[TOC]", "Text to find and replace with Table of Contents -"
- "Defaults to \"[TOC]\""], "slugify": [
- slugify, "Function to generate anchors based on header text-"
- "Defaults to the headerid ext's slugify function."], "title": [
- None, "Title to insert into TOC <div> - "
- "Defaults to None"], "anchorlink": [
- 0, "1 if header should be a self link"
- "Defaults to 0"]}
-
- for key, value in configs:
- self.setConfig(key, value)
-
- def extendMarkdown(self, md, md_globals):
- tocext = self.TreeProcessorClass(md)
- tocext.config = self.getConfigs()
- # Headerid ext is set to '>inline'. With this set to '<prettify',
- # it should always come after headerid ext (and honor ids assinged
- # by the header id extension) if both are used. Same goes for
- # attr_list extension. This must come last because we don't want
- # to redefine ids after toc is created. But we do want toc prettified.
- md.treeprocessors.add("toc", tocext, ">attr_list")
-
-
-def makeExtension(configs={}):
- return TocExtension(configs=configs)
+def wiki_slugify(*args, **kwargs):
+ return HEADER_ID_PREFIX + slugify(*args, **kwargs)
class WikiTreeProcessorClass(TocTreeprocessor):
@@ 306,6 40,10 @@ class WikiTreeProcessorClass(TocTreeprocessor):
class WikiTocExtension(TocExtension):
TreeProcessorClass = WikiTreeProcessorClass
+ def __init__(self, **kwargs):
+ kwargs.setdefault('slugify', wiki_slugify)
+ super().__init__(**kwargs)
+
def extendMarkdown(self, md, md_globals):
if 'toc' in settings.METHODS:
TocExtension.extendMarkdown(self, md, md_globals)