Update Markdown parser from 2.1.1 to 2.3.2

2020-03-20 16:55:45 +01:00 · 2020-03-20 16:55:45 +01:00 · 7cb4f1d3d7
parent 4b642fa48a
commit 7cb4f1d3d7
1 changed files with 365 additions and 180 deletions
--- a/dynastie/generators/markdown2.py
+++ b/dynastie/generators/markdown2.py
@ -53,8 +53,9 @@ see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
 * header-ids: Adds "id" attributes to headers. The id value is a slug of
  the header text.
 * html-classes: Takes a dict mapping html tag names (lowercase) to a
-  string to use for a "class" tag attribute. Currently only supports
+  string to use for a "class" tag attribute. Currently only supports "img",
-  "pre" and "code" tags. Add an issue if you require this for other tags.
+  "table", "pre" and "code" tags. Add an issue if you require this for other
  tags.
 * markdown-in-html: Allow the use of `markdown="1"` in a block HTML tag to
  have markdown processing be done on its contents. Similar to
  <http://michelf.com/projects/php-markdown/extra/#markdown-attr> but with
@ -70,9 +71,14 @@ see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
 * smarty-pants: Replaces ' and " with curly quotation marks or curly
  apostrophes.  Replaces --, ---, ..., and . . . with en dashes, em dashes,
  and ellipses.
 * spoiler: A special kind of blockquote commonly hidden behind a
  click on SO. Syntax per <http://meta.stackexchange.com/a/72878>.
 * toc: The returned HTML string gets a new "toc_html" attribute which is
  a Table of Contents for the document. (experimental)
 * xml: Passes one-liner processing instructions and namespaced XML tags.
 * tables: Tables using the same format as GFM
  <https://help.github.com/articles/github-flavored-markdown#tables> and
  PHP-Markdown Extra <https://michelf.ca/projects/php-markdown/extra/#table>.
 * wiki-tables: Google Code Wiki-style tables. See
  <http://code.google.com/p/support/wiki/WikiSyntax#Tables>.
 """
@ -82,13 +88,11 @@ see <https://github.com/trentm/python-markdown2/wiki/Extras> for details):
 #   not yet sure if there implications with this. Compare 'pydoc sre'
 #   and 'perldoc perlre'.
-__version_info__ = (2, 1, 1)
+__version_info__ = (2, 3, 2)
 __version__ = '.'.join(map(str, __version_info__))
 __author__ = "Trent Mick"
 import os
 import sys
 from pprint import pprint
 import re
 import logging
 try:
@ -102,13 +106,7 @@ import codecs
 # ---- Python version compat
 try:
    from urllib.parse import quote # python3
 except ImportError:
    from urllib import quote # python2
 if sys.version_info[:2] < (2, 4):
    from sets import Set as set
    def reversed(sequence):
        for i in sequence[::-1]:
            yield i
@ -127,7 +125,6 @@ elif sys.version_info[0] >= 3:
    base_string_type = str
 # ---- globals
 DEBUG = False
@ -145,14 +142,11 @@ g_escape_table = dict([(ch, _hash_text(ch))
    for ch in '\\`*_{}[]()>#+-.!'])
 # ---- exceptions
 class MarkdownError(Exception):
    pass
 # ---- public api
 def markdown_path(path, encoding="utf-8",
@ -167,6 +161,7 @@ def markdown_path(path, encoding="utf-8",
                    link_patterns=link_patterns,
                    use_file_vars=use_file_vars).convert(text)
 def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
             safe_mode=None, extras=None, link_patterns=None,
             use_file_vars=False):
@ -175,6 +170,7 @@ def markdown(text, html4tags=False, tab_width=DEFAULT_TAB_WIDTH,
                    link_patterns=link_patterns,
                    use_file_vars=use_file_vars).convert(text)
 class Markdown(object):
    # The dict of "extras" to enable in processing -- a mapping of
    # extra name to argument for the extra. Most extras do not have an
@ -222,7 +218,7 @@ class Markdown(object):
                extras = dict([(e, None) for e in extras])
            self.extras.update(extras)
        assert isinstance(self.extras, dict)
-        if "toc" in self.extras and not "header-ids" in self.extras:
+        if "toc" in self.extras and "header-ids" not in self.extras:
            self.extras["header-ids"] = None   # "toc" implies "header-ids"
        self._instance_extras = self.extras.copy()
@ -254,6 +250,11 @@ class Markdown(object):
    # should only be used in <a> tags with an "href" attribute.
    _a_nofollow = re.compile(r"<(a)([^>]*href=)", re.IGNORECASE)
    # Opens the linked document in a new window or tab
    # should only used in <a> tags with an "target" attribute.
    # same with _a_nofollow
    _a_blank = _a_nofollow
    def convert(self, text):
        """Convert the given text."""
        # Main function. The order in which other subs are called here is
@ -288,7 +289,8 @@ class Markdown(object):
                    self.extras[ename] = earg
        # Standardize line endings:
-        text = re.sub("\r\n|\r", "\n", text)
+        text = text.replace("\r\n", "\n")
        text = text.replace("\r", "\n")
        # Make sure $text ends with a couple of newlines:
        text += "\n\n"
@ -308,13 +310,16 @@ class Markdown(object):
        text = self.preprocess(text)
        if "fenced-code-blocks" in self.extras and not self.safe_mode:
            text = self._do_fenced_code_blocks(text)
        if self.safe_mode:
            text = self._hash_html_spans(text)
        # Turn block-level HTML blocks into hash entries
        text = self._hash_html_blocks(text, raw=True)
-        if "fenced-code-blocks" in self.extras:
+        if "fenced-code-blocks" in self.extras and self.safe_mode:
            text = self._do_fenced_code_blocks(text)
        # Strip link definitions, store in hashes.
@ -340,6 +345,9 @@ class Markdown(object):
        if "nofollow" in self.extras:
            text = self._a_nofollow.sub(r'<\1 rel="nofollow"\2', text)
        if "target-blank-links" in self.extras:
            text = self._a_blank.sub(r'<\1 target="_blank"\2', text)
        text += "\n"
        rv = UnicodeWithAttrs(text)
@ -363,18 +371,29 @@ class Markdown(object):
        """
        return text
-    # Is metadata if the content starts with '---'-fenced `key: value`
+    # Is metadata if the content starts with optional '---'-fenced `key: value`
    # pairs. E.g. (indented for presentation):
    #   ---
    #   foo: bar
    #   another-var: blah blah
    #   ---
-    _metadata_pat = re.compile("""^---[ \t]*\n((?:[ \t]*[^ \t:]+[ \t]*:[^\n]*\n)+)---[ \t]*\n""")
+    #   # header
    # or:
    #   foo: bar
    #   another-var: blah blah
    #   
    #   # header
    _metadata_pat = re.compile(r"""
        ^
        (?:---[\ \t]*\n)?                       # optional "---"
        ((?:[ \t]*[^ \t:]+[\ \t]*:[^\n]*\n)+)   # "key: value" pairs
        (?:---[ \t]*)?                          # optional "---"
        \n""",
        re.VERBOSE
    )
    def _extract_metadata(self, text):
        # fast test
        if not text.startswith("---"):
            return text
        match = self._metadata_pat.match(text)
        if not match:
            return text
@ -387,7 +406,6 @@ class Markdown(object):
        return tail
    _emacs_oneliner_vars_pat = re.compile(r"-\*-\s*([^\r\n]*?)\s*-\*-", re.UNICODE)
    # This regular expression is intended to match blocks like this:
    #    PREFIX Local Variables: SUFFIX
@ -505,14 +523,19 @@ class Markdown(object):
        return emacs_vars
-    # Cribbed from a post by Bart Lateur:
+    def _detab_line(self, line):
-    # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
+        r"""Recusively convert tabs to spaces in a single line.
-    _detab_re = re.compile(r'(.*?)\t', re.M)
+
-    def _detab_sub(self, match):
+        Called from _detab()."""
-        g1 = match.group(1)
+        if '\t' not in line:
-        return g1 + (' ' * (self.tab_width - len(g1) % self.tab_width))
+            return line
        chunk1, chunk2 = line.split('\t', 1)
        chunk1 += (' ' * (self.tab_width - len(chunk1) % self.tab_width))
        output = chunk1 + chunk2
        return self._detab_line(output)
    def _detab(self, text):
-        r"""Remove (leading?) tabs from a file.
+        r"""Iterate text line by line and convert tabs to spaces.
            >>> m = Markdown()
            >>> m._detab("\tfoo")
@ -528,7 +551,10 @@ class Markdown(object):
        """
        if '\t' not in text:
            return text
-        return self._detab_re.subn(self._detab_sub, text)[0]
+        output = []
        for line in text.splitlines():
            output.append(self._detab_line(line))
        return '\n'.join(output)
    # I broke out the html5 tags here and add them to _block_tags_a and
    # _block_tags_b.  This way html5 tags are easy to keep track of.
@ -776,12 +802,7 @@ class Markdown(object):
            re.X | re.M)
        return footnote_def_re.sub(self._extract_footnote_def_sub, text)
-
+    _hr_re = re.compile(r'^[ ]{0,3}([-_*][ ]{0,2}){3,}$', re.M)
    _hr_data = [
        ('*', re.compile(r"^[ ]{0,3}\*(.*?)$", re.M)),
        ('-', re.compile(r"^[ ]{0,3}\-(.*?)$", re.M)),
        ('_', re.compile(r"^[ ]{0,3}\_(.*?)$", re.M)),
    ]
    def _run_block_gamut(self, text):
        # These are all the transformations that form block-level
@ -798,13 +819,7 @@ class Markdown(object):
        # Markdown.pl 1.0.1's hr regexes limit the number of spaces between the
        # hr chars to one or two. We'll reproduce that limit here.
        hr = "\n<hr"+self.empty_element_suffix+"\n"
-        for ch, regex in self._hr_data:
+        text = re.sub(self._hr_re, hr, text)
            if ch in text:
                for m in reversed(list(regex.finditer(text))):
                    tail = m.group(1).rstrip()
                    if not tail.strip(ch + ' ') and tail.count("   ") == 0:
                        start, end = m.span()
                        text = text[:start] + hr + text[end:]
        text = self._do_lists(text)
@ -812,6 +827,8 @@ class Markdown(object):
            text = self._prepare_pyshell_blocks(text)
        if "wiki-tables" in self.extras:
            text = self._do_wiki_tables(text)
        if "tables" in self.extras:
            text = self._do_tables(text)
        text = self._do_code_blocks(text)
@ -852,6 +869,79 @@ class Markdown(object):
        return _pyshell_block_re.sub(self._pyshell_block_sub, text)
    def _table_sub(self, match):
        trim_space_re = '^[ \t\n]+|[ \t\n]+$'
        trim_bar_re = '^\||\|$'
        head, underline, body = match.groups()
        # Determine aligns for columns.
        cols = [cell.strip() for cell in re.sub(trim_bar_re, "", re.sub(trim_space_re, "", underline)).split('|')]
        align_from_col_idx = {}
        for col_idx, col in enumerate(cols):
            if col[0] == ':' and col[-1] == ':':
                align_from_col_idx[col_idx] = ' align="center"'
            elif col[0] == ':':
                align_from_col_idx[col_idx] = ' align="left"'
            elif col[-1] == ':':
                align_from_col_idx[col_idx] = ' align="right"'
        # thead
        hlines = ['<table%s>' % self._html_class_str_from_tag('table'), '<thead>', '<tr>']
        cols = [cell.strip() for cell in re.sub(trim_bar_re, "", re.sub(trim_space_re, "", head)).split('|')]
        for col_idx, col in enumerate(cols):
            hlines.append('  <th%s>%s</th>' % (
                align_from_col_idx.get(col_idx, ''),
                self._run_span_gamut(col)
            ))
        hlines.append('</tr>')
        hlines.append('</thead>')
        # tbody
        hlines.append('<tbody>')
        for line in body.strip('\n').split('\n'):
            hlines.append('<tr>')
            cols = [cell.strip() for cell in re.sub(trim_bar_re, "", re.sub(trim_space_re, "", line)).split('|')]
            for col_idx, col in enumerate(cols):
                hlines.append('  <td%s>%s</td>' % (
                    align_from_col_idx.get(col_idx, ''),
                    self._run_span_gamut(col)
                ))
            hlines.append('</tr>')
        hlines.append('</tbody>')
        hlines.append('</table>')
        return '\n'.join(hlines) + '\n'
    def _do_tables(self, text):
        """Copying PHP-Markdown and GFM table syntax. Some regex borrowed from
        https://github.com/michelf/php-markdown/blob/lib/Michelf/Markdown.php#L2538
        """
        less_than_tab = self.tab_width - 1
        table_re = re.compile(r'''
                (?:(?<=\n\n)|\A\n?)             # leading blank line
                ^[ ]{0,%d}                      # allowed whitespace
                (.*[|].*)  \n                   # $1: header row (at least one pipe)
                ^[ ]{0,%d}                      # allowed whitespace
                (                               # $2: underline row
                    # underline row with leading bar
                    (?:  \|\ *:?-+:?\ *  )+  \|?  \n
                    |
                    # or, underline row without leading bar
                    (?:  \ *:?-+:?\ *\|  )+  (?:  \ *:?-+:?\ *  )?  \n
                )
                (                               # $3: data rows
                    (?:
                        ^[ ]{0,%d}(?!\ )         # ensure line begins with 0 to less_than_tab spaces
                        .*\|.*  \n
                    )+
                )
            ''' % (less_than_tab, less_than_tab, less_than_tab), re.M | re.X)
        return table_re.sub(self._table_sub, text)
    def _wiki_table_sub(self, match):
        ttext = match.group(0).strip()
        # print 'wiki table: %r' % match.group(0)
@ -861,7 +951,7 @@ class Markdown(object):
            row = [c.strip() for c in re.split(r'(?<!\\)\|\|', line)]
            rows.append(row)
        # pprint(rows)
-        hlines = ['<table>', '<tbody>']
+        hlines = ['<table%s>' % self._html_class_str_from_tag('table'), '<tbody>']
        for row in rows:
            hrow = ['<tr>']
            for cell in row:
@ -907,12 +997,18 @@ class Markdown(object):
        text = self._encode_amps_and_angles(text)
        if "strike" in self.extras:
            text = self._do_strike(text)
        text = self._do_italics_and_bold(text)
        if "smarty-pants" in self.extras:
            text = self._do_smart_punctuation(text)
        # Do hard breaks:
        if "break-on-newline" in self.extras:
            text = re.sub(r" *\n", "<br%s\n" % self.empty_element_suffix, text)
        else:
            text = re.sub(r" {2,}\n", " <br%s\n" % self.empty_element_suffix, text)
        return text
@ -1003,22 +1099,14 @@ class Markdown(object):
            raise MarkdownError("invalid value for 'safe_mode': %r (must be "
                                "'escape' or 'replace')" % self.safe_mode)
-    _tail_of_inline_link_re = re.compile(r'''
+    _inline_link_title = re.compile(r'''
-          # Match tail of: [text](/url/) or [text](/url/ "title")
+            (                   # \1
-          \(            # literal paren
+              [ \t]+
-            [ \t]*
+              (['"])            # quote char = \2
            (?P<url>            # \1
                <.*?>
                |
                .*?
            )
            [ \t]*
            (                   # \2
              (['"])            # quote char = \3
              (?P<title>.*?)
-              \3                # matching quote
+              \2
            )?                  # title is optional
-          \)
+          \)$
        ''', re.X | re.S)
    _tail_of_reference_link_re = re.compile(r'''
          # Match tail of: [text][id]
@ -1029,6 +1117,52 @@ class Markdown(object):
          \]
        ''', re.X | re.S)
    _whitespace = re.compile(r'\s*')
    _strip_anglebrackets = re.compile(r'<(.*)>.*')
    def _find_non_whitespace(self, text, start):
        """Returns the index of the first non-whitespace character in text
        after (and including) start
        """
        match = self._whitespace.match(text, start)
        return match.end()
    def _find_balanced(self, text, start, open_c, close_c):
        """Returns the index where the open_c and close_c characters balance
        out - the same number of open_c and close_c are encountered - or the
        end of string if it's reached before the balance point is found.
        """
        i = start
        l = len(text)
        count = 1
        while count > 0 and i < l:
            if text[i] == open_c:
                count += 1
            elif text[i] == close_c:
                count -= 1
            i += 1
        return i
    def _extract_url_and_title(self, text, start):
        """Extracts the url and (optional) title from the tail of a link"""
        # text[start] equals the opening parenthesis
        idx = self._find_non_whitespace(text, start+1)
        if idx == len(text):
            return None, None, None
        end_idx = idx
        has_anglebrackets = text[idx] == "<"
        if has_anglebrackets:
            end_idx = self._find_balanced(text, end_idx+1, "<", ">")
        end_idx = self._find_balanced(text, end_idx, "(", ")")
        match = self._inline_link_title.search(text, idx, end_idx)
        if not match:
            return None, None, None
        url, title = text[idx:match.start()], match.group("title")
        if has_anglebrackets:
            url = self._strip_anglebrackets.sub(r'\1', url)
        return url, title, end_idx
    def _do_links(self, text):
        """Turn Markdown link shortcuts into XHTML <a> and <img> tags.
@ -1111,8 +1245,8 @@ class Markdown(object):
            # Inline anchor or img?
            if text[p] == '(':  # attempt at perf improvement
-                match = self._tail_of_inline_link_re.match(text, p)
+                url, title, url_end_idx = self._extract_url_and_title(text, p)
-                if match:
+                if url is not None:
                    # Handle an inline anchor or img.
                    is_img = start_idx > 0 and text[start_idx-1] == "!"
                    if is_img:
@ -1123,9 +1257,6 @@ class Markdown(object):
                        start_idx -= 1
                        is_img = 1
                    url, title = match.group("url"), match.group("title")
                    if url and url[0] == '<':
                        url = url[1:-1]  # '<url>' -> 'url'
                    # We've got to encode these to avoid conflicting
                    # with italics/bold.
                    url = url.replace('*', self._escape_table['*']) \
@ -1138,20 +1269,17 @@ class Markdown(object):
                    else:
                        title_str = ''
                    if is_img:
                        img_class_str = self._html_class_str_from_tag("img")
                        if is_inline_img:
-				result = '<img class="inlineimage" src="%s" alt="%s"%s%s' \
+                            img_class_str = ' class="inlineimage"'
                        result = '<img src="%s" alt="%s"%s%s%s' \
                                 % (url.replace('"', '&quot;'),
                                    _xml_escape_attr(link_text),
-				       title_str, self.empty_element_suffix)
+                                    title_str, img_class_str, self.empty_element_suffix)
                        else:
                            result = '<img src="%s" alt="%s"%s%s' \
 				% (url.replace('"', '&quot;'),
 				   _xml_escape_attr(link_text),
 				   title_str, self.empty_element_suffix)
                        if "smarty-pants" in self.extras:
                            result = result.replace('"', self._escape_table['"'])
                        curr_pos = start_idx + len(result)
-                        text = text[:start_idx] + result + text[match.end():]
+                        text = text[:start_idx] + result + text[url_end_idx:]
                    elif start_idx >= anchor_allowed_pos:
                        result_head = '<a href="%s"%s>' % (url, title_str)
                        result = '%s%s</a>' % (result_head, link_text)
@ -1161,7 +1289,7 @@ class Markdown(object):
                        # anchor_allowed_pos on.
                        curr_pos = start_idx + len(result_head)
                        anchor_allowed_pos = start_idx + len(result)
-                        text = text[:start_idx] + result + text[match.end():]
+                        text = text[:start_idx] + result + text[url_end_idx:]
                    else:
                        # Anchor not allowed here.
                        curr_pos = start_idx + 1
@ -1186,7 +1314,6 @@ class Markdown(object):
                                 .replace('_', self._escape_table['_'])
                        title = self.titles.get(link_id)
                        if title:
                            before = title
                            title = _xml_escape_attr(title) \
                                .replace('*', self._escape_table['*']) \
                                .replace('_', self._escape_table['_'])
@ -1194,10 +1321,11 @@ class Markdown(object):
                        else:
                            title_str = ''
                        if is_img:
-                            result = '<img src="%s" alt="%s"%s%s' \
+                            img_class_str = self._html_class_str_from_tag("img")
                            result = '<img src="%s" alt="%s"%s%s%s' \
                                % (url.replace('"', '&quot;'),
                                   link_text.replace('"', '&quot;'),
-                                   title_str, self.empty_element_suffix)
+                                   title_str, img_class_str, self.empty_element_suffix)
                            if "smarty-pants" in self.extras:
                                result = result.replace('"', self._escape_table['"'])
                            curr_pos = start_idx + len(result)
@ -1258,44 +1386,42 @@ class Markdown(object):
            self._toc = []
        self._toc.append((level, id, self._unescape_special_chars(name)))
-    _setext_h_re = re.compile(r'^(.+)[ \t]*\n(=+|-+)[ \t]*\n+', re.M)
+    _h_re_base = r'''
-    def _setext_h_sub(self, match):
+        (^(.+)[ \t]*\n(=+|-+)[ \t]*\n+)
-        n = {"=": 1, "-": 2}[match.group(2)[0]]
+        |
-        demote_headers = self.extras.get("demote-headers")
+        (^(\#{1,6})  # \1 = string of #'s
-        if demote_headers:
+        [ \t]%s
            n = min(n + demote_headers, 6)
        header_id_attr = ""
        if "header-ids" in self.extras:
            header_id = self.header_id_from_text(match.group(1),
                self.extras["header-ids"], n)
            if header_id:
                header_id_attr = ' id="%s"' % header_id
        html = self._run_span_gamut(match.group(1))
        if "toc" in self.extras and header_id:
            self._toc_add_entry(n, header_id, html)
        return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n)
    _atx_h_re = re.compile(r'''
        ^(\#{1,6})  # \1 = string of #'s
        [ \t]+
        (.+?)       # \2 = Header text
        [ \t]*
        (?<!\\)     # ensure not an escaped trailing '#'
        \#*         # optional closing #'s (not counted)
        \n+
-        ''', re.X | re.M)
+        )
-    def _atx_h_sub(self, match):
+        '''
-        n = len(match.group(1))
+
    _h_re = re.compile(_h_re_base % '*', re.X | re.M)
    _h_re_tag_friendly = re.compile(_h_re_base % '+', re.X | re.M)
    def _h_sub(self, match):
        if match.group(1) is not None:
            # Setext header
            n = {"=": 1, "-": 2}[match.group(3)[0]]
            header_group = match.group(2)
        else:
            # atx header
            n = len(match.group(5))
            header_group = match.group(6)
        demote_headers = self.extras.get("demote-headers")
        if demote_headers:
            n = min(n + demote_headers, 6)
        header_id_attr = ""
        if "header-ids" in self.extras:
-            header_id = self.header_id_from_text(match.group(2),
+            header_id = self.header_id_from_text(header_group,
                self.extras["header-ids"], n)
            if header_id:
                header_id_attr = ' id="%s"' % header_id
-        html = self._run_span_gamut(match.group(2))
+        html = self._run_span_gamut(header_group)
        if "toc" in self.extras and header_id:
            self._toc_add_entry(n, header_id, html)
        return "<h%d%s>%s</h%d>\n\n" % (n, header_id_attr, html, n)
@ -1307,7 +1433,6 @@ class Markdown(object):
        #
        #     Header 2
        #     --------
        text = self._setext_h_re.sub(self._setext_h_sub, text)
        # atx-style headers:
        #   # Header 1
@ -1315,10 +1440,10 @@ class Markdown(object):
        #   ## Header 2 with closing hashes ##
        #   ...
        #   ###### Header 6
        text = self._atx_h_re.sub(self._atx_h_sub, text)
        return text
        if 'tag-friendly' in self.extras:
            return self._h_re_tag_friendly.sub(self._h_sub, text)
        return self._h_re.sub(self._h_sub, text)
    _marker_ul_chars = '*+-'
    _marker_any = r'(?:[%s]|\d+\.)' % _marker_ul_chars
@ -1380,8 +1505,9 @@ class Markdown(object):
            hits.sort()
            match = hits[0][1]
            start, end = match.span()
-            text = text[:start] + self._list_sub(match) + text[end:]
+            middle = self._list_sub(match)
-            pos = end
+            text = text[:start] + middle + text[end:]
            pos = start + len(middle)  # start pos for next attempted match
        return text
@ -1395,11 +1521,25 @@ class Markdown(object):
        ''' % (_marker_any, _marker_any),
        re.M | re.X | re.S)
    _task_list_item_re = re.compile(r'''
        (\[[\ x]\])[ \t]+       # tasklist marker = \1
        (.*)                   # list item text = \2
    ''', re.M | re.X | re.S)
    _task_list_warpper_str = r'<p><input type="checkbox" class="task-list-item-checkbox" %sdisabled>%s</p>'
    def _task_list_item_sub(self, match):
        marker = match.group(1)
        item_text = match.group(2)
        if marker == '[x]':
                return self._task_list_warpper_str % ('checked ', item_text)
        elif marker == '[ ]':
                return self._task_list_warpper_str % ('', item_text)
    _last_li_endswith_two_eols = False
    def _list_item_sub(self, match):
        item = match.group(4)
        leading_line = match.group(1)
        leading_space = match.group(2)
        if leading_line or "\n\n" in item or self._last_li_endswith_two_eols:
            item = self._run_block_gamut(self._outdent(item))
        else:
@ -1409,6 +1549,10 @@ class Markdown(object):
                item = item[:-1]
            item = self._run_span_gamut(item)
        self._last_li_endswith_two_eols = (len(match.group(5)) == 2)
        if "task_list" in self.extras:
            item = self._task_list_item_re.sub(self._task_list_item_sub, item)
        return "<li>%s</li>\n" % item
    def _process_list_items(self, list_str):
@ -1497,8 +1641,20 @@ class Markdown(object):
                formatter_opts = self.extras['code-color'] or {}
        if lexer_name:
            def unhash_code(codeblock):
                for key, sanitized in list(self.html_spans.items()):
                    codeblock = codeblock.replace(key, sanitized)
                replacements = [
                    ("&amp;", "&"),
                    ("&lt;", "<"),
                    ("&gt;", ">")
                ]
                for old, new in replacements:
                    codeblock = codeblock.replace(old, new)
                return codeblock
            lexer = self._get_pygments_lexer(lexer_name)
            if lexer:
                codeblock = unhash_code( codeblock )
                colored = self._color_with_pygments(codeblock, lexer,
                                                    **formatter_opts)
                return "\n\n%s\n\n" % colored
@ -1535,19 +1691,22 @@ class Markdown(object):
              )+
            )
            ((?=^[ ]{0,%d}\S)|\Z)   # Lookahead for non-space at line-start, or end of doc
            # Lookahead to make sure this block isn't already in a code block.
            # Needed when syntax highlighting is being used.
            (?![^<]*\</code\>)
            ''' % (self.tab_width, self.tab_width),
            re.M | re.X)
        return code_block_re.sub(self._code_block_sub, text)
    _fenced_code_block_re = re.compile(r'''
-        (?:\n\n|\A\n?)
+        (?:\n+|\A\n?)
        ^```([\w+-]+)?[ \t]*\n      # opening fence, $1 = optional lang
        (.*?)                       # $2 = code block content
        ^```[ \t]*\n                # closing fence
        ''', re.M | re.X | re.S)
    def _fenced_code_block_sub(self, match):
-        return self._code_block_sub(match, is_fenced_code_block=True);
+        return self._code_block_sub(match, is_fenced_code_block=True)
    def _do_fenced_code_blocks(self, text):
        """Process ```-fenced unindented code blocks ('fenced-code-blocks' extra)."""
@ -1620,12 +1779,17 @@ class Markdown(object):
        self._escape_table[text] = hashed
        return hashed
    _strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
    def _do_strike(self, text):
        text = self._strike_re.sub(r"<strike>\1</strike>", text)
        return text
    _strong_re = re.compile(r"(\*\*|__)(?=\S)(.+?[*_]*)(?<=\S)\1", re.S)
    _em_re = re.compile(r"(\*|_)(?=\S)(.+?)(?<=\S)\1", re.S)
    _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
    _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
    _code_friendly_line_re = re.compile(r"\~\~(?=\S)(.+?)(?<=\S)\~\~", re.S)
    _code_friendly_underline_re = re.compile(r"\~(?=\S)(.+?)(?<=\S)\~", re.S)
    _code_friendly_strong_re = re.compile(r"\*\*(?=\S)(.+?[*_]*)(?<=\S)\*\*", re.S)
    _code_friendly_em_re = re.compile(r"\*(?=\S)(.+?)(?<=\S)\*", re.S)
    def _do_italics_and_bold(self, text):
        # <strong> must go first:
        if "code-friendly" in self.extras:
@ -1686,37 +1850,52 @@ class Markdown(object):
        text = text.replace(". . .", "&#8230;")
        return text
-    _block_quote_re = re.compile(r'''
+    _block_quote_base = r'''
        (                           # Wrap whole match in \1
          (
-            ^[ \t]*>[ \t]?          # '>' at the start of a line
+            ^[ \t]*>%s[ \t]?        # '>' at the start of a line
              .+\n                  # rest of the first line
            (.+\n)*                 # subsequent consecutive lines
            \n*                     # blanks
          )+
        )
-        ''', re.M | re.X)
+    '''
-    _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M);
+    _block_quote_re = re.compile(_block_quote_base % '', re.M | re.X)
-
+    _block_quote_re_spoiler = re.compile(_block_quote_base % '[ \t]*?!?', re.M | re.X)
    _bq_one_level_re = re.compile('^[ \t]*>[ \t]?', re.M)
    _bq_one_level_re_spoiler = re.compile('^[ \t]*>[ \t]*?![ \t]?', re.M)
    _bq_all_lines_spoilers = re.compile(r'\A(?:^[ \t]*>[ \t]*?!.*[\n\r]*)+\Z', re.M)
    _html_pre_block_re = re.compile(r'(\s*<pre>.+?</pre>)', re.S)
    def _dedent_two_spaces_sub(self, match):
        return re.sub(r'(?m)^  ', '', match.group(1))
    def _block_quote_sub(self, match):
        bq = match.group(1)
-        bq = self._bq_one_level_re.sub('', bq)  # trim one level of quoting
+        is_spoiler = 'spoiler' in self.extras and self._bq_all_lines_spoilers.match(bq)
-        bq = self._ws_only_line_re.sub('', bq)  # trim whitespace-only lines
+        # trim one level of quoting
        if is_spoiler:
            bq = self._bq_one_level_re_spoiler.sub('', bq)
        else:
            bq = self._bq_one_level_re.sub('', bq)
        # trim whitespace-only lines
        bq = self._ws_only_line_re.sub('', bq)
        bq = self._run_block_gamut(bq)          # recurse
        bq = re.sub('(?m)^', '  ', bq)
        # These leading spaces screw with <pre> content, so we need to fix that:
        bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq)
-        return "<blockquote>\n%s\n</blockquote>\n\n" % bq
+        if is_spoiler:
            return '<blockquote class="spoiler">\n%s\n</blockquote>\n\n' % bq
        else:
            return '<blockquote>\n%s\n</blockquote>\n\n' % bq
    def _do_block_quotes(self, text):
        if '>' not in text:
            return text
        if 'spoiler' in self.extras:
            return self._block_quote_re_spoiler.sub(self._block_quote_sub, text)
        else:
            return self._block_quote_re.sub(self._block_quote_sub, text)
    def _form_paragraphs(self, text):
@ -1774,7 +1953,7 @@ class Markdown(object):
                    '&#8617;</a>' % (id, i+1))
                if footer[-1].endswith("</p>"):
                    footer[-1] = footer[-1][:-len("</p>")] \
-                        + '&nbsp;' + backlink + "</p>"
+                        + '&#160;' + backlink + "</p>"
                else:
                    footer.append("\n<p>%s</p>" % backlink)
                footer.append('</li>')
@ -1979,6 +2158,7 @@ def _curry(*args, **kwargs):
        return function(*args + rest, **combined)
    return result
 # Recipe: regex_from_encoded_pattern (1.0)
 def _regex_from_encoded_pattern(s):
    """'foo'    -> re.compile(re.escape('foo'))
@ -2008,6 +2188,7 @@ def _regex_from_encoded_pattern(s):
    else:  # not an encoded regex
        return re.compile(re.escape(s))
 # Recipe: dedent (0.1.2)
 def _dedentlines(lines, tabsize=8, skip_first_line=False):
    """_dedentlines(lines, tabsize=8, skip_first_line=False) -> dedented lines
@ -2025,7 +2206,6 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
    if DEBUG:
        print("dedent: dedent(..., tabsize=%d, skip_first_line=%r)"\
              % (tabsize, skip_first_line))
    indents = []
    margin = None
    for i, line in enumerate(lines):
        if i == 0 and skip_first_line: continue
@ -2079,6 +2259,7 @@ def _dedentlines(lines, tabsize=8, skip_first_line=False):
                    lines[i] = lines[i][removed:]
    return lines
 def _dedent(text, tabsize=8, skip_first_line=False):
    """_dedent(text, tabsize=8, skip_first_line=False) -> dedented text
@ -2105,6 +2286,7 @@ class _memoized(object):
    def __init__(self, func):
        self.func = func
        self.cache = {}
    def __call__(self, *args):
        try:
            return self.cache[args]
@ -2115,6 +2297,7 @@ class _memoized(object):
            # uncachable -- for instance, passing a list as an argument.
            # Better to not cache than to blow up entirely.
            return self.func(*args)
    def __repr__(self):
        """Return the function's docstring."""
        return self.func.__doc__
@ -2141,6 +2324,7 @@ def _xml_oneliner_re_from_tab_width(tab_width):
        """ % (tab_width - 1), re.X)
 _xml_oneliner_re_from_tab_width = _memoized(_xml_oneliner_re_from_tab_width)
 def _hr_tag_re_from_tab_width(tab_width):
    return re.compile(r"""
        (?:
@ -2191,7 +2375,6 @@ def _xml_encode_email_char_at_random(ch):
        return '&#%s;' % ord(ch)
 # ---- mainline
 class _NoReflowFormatter(optparse.IndentedHelpFormatter):
@ -2199,10 +2382,12 @@ class _NoReflowFormatter(optparse.IndentedHelpFormatter):
    def format_description(self, description):
        return description or ""
 def _test():
    import doctest
    doctest.testmod()
 def main(argv=None):
    if argv is None:
        argv = sys.argv
@ -2319,7 +2504,7 @@ def main(argv=None):
                sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
        if extras and "toc" in extras:
            log.debug("toc_html: " +
-                html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace'))
+                str(html.toc_html.encode(sys.stdout.encoding or "utf-8", 'xmlcharrefreplace')))
        if opts.compare:
            test_dir = join(dirname(dirname(abspath(__file__))), "test")
            if exists(join(test_dir, "test_markdown2.py")):