diff --git a/migration/html2text/__init__.py b/migration/html2text/__init__.py
index 85c664c8..0e521760 100644
--- a/migration/html2text/__init__.py
+++ b/migration/html2text/__init__.py
@@ -12,19 +12,19 @@ from . import config
 from .elements import AnchorElement, ListElement
 from .typing import OutCallback
 from .utils import (
-    dumb_css_parser,
-    element_style,
-    escape_md,
-    escape_md_section,
-    google_fixed_width_font,
-    google_has_height,
-    google_list_style,
-    google_text_emphasis,
-    hn,
-    list_numbering_start,
-    pad_tables_in_text,
-    skipwrap,
-    unifiable_n,
+	dumb_css_parser,
+	element_style,
+	escape_md,
+	escape_md_section,
+	google_fixed_width_font,
+	google_has_height,
+	google_list_style,
+	google_text_emphasis,
+	hn,
+	list_numbering_start,
+	pad_tables_in_text,
+	skipwrap,
+	unifiable_n,
 )
 
 __version__ = (2020, 1, 16)
@@ -35,982 +35,998 @@ __version__ = (2020, 1, 16)
 
 
 class HTML2Text(html.parser.HTMLParser):
-    def __init__(
-        self,
-        out: Optional[OutCallback] = None,
-        baseurl: str = "",
-        bodywidth: int = config.BODY_WIDTH,
-    ) -> None:
-        """
-        Input parameters:
-            out: possible custom replacement for self.outtextf (which
-                 appends lines of text).
-            baseurl: base URL of the document we process
-        """
-        super().__init__(convert_charrefs=False)
-
-        # Config options
-        self.split_next_td = False
-        self.td_count = 0
-        self.table_start = False
-        self.unicode_snob = config.UNICODE_SNOB  # covered in cli
-        self.escape_snob = config.ESCAPE_SNOB  # covered in cli
-        self.links_each_paragraph = config.LINKS_EACH_PARAGRAPH
-        self.body_width = bodywidth  # covered in cli
-        self.skip_internal_links = config.SKIP_INTERNAL_LINKS  # covered in cli
-        self.inline_links = config.INLINE_LINKS  # covered in cli
-        self.protect_links = config.PROTECT_LINKS  # covered in cli
-        self.google_list_indent = config.GOOGLE_LIST_INDENT  # covered in cli
-        self.ignore_links = config.IGNORE_ANCHORS  # covered in cli
-        self.ignore_mailto_links = config.IGNORE_MAILTO_LINKS  # covered in cli
-        self.ignore_images = config.IGNORE_IMAGES  # covered in cli
-        self.images_as_html = config.IMAGES_AS_HTML  # covered in cli
-        self.images_to_alt = config.IMAGES_TO_ALT  # covered in cli
-        self.images_with_size = config.IMAGES_WITH_SIZE  # covered in cli
-        self.ignore_emphasis = config.IGNORE_EMPHASIS  # covered in cli
-        self.bypass_tables = config.BYPASS_TABLES  # covered in cli
-        self.ignore_tables = config.IGNORE_TABLES  # covered in cli
-        self.google_doc = False  # covered in cli
-        self.ul_item_mark = "*"  # covered in cli
-        self.emphasis_mark = "_"  # covered in cli
-        self.strong_mark = "**"
-        self.single_line_break = config.SINGLE_LINE_BREAK  # covered in cli
-        self.use_automatic_links = config.USE_AUTOMATIC_LINKS  # covered in cli
-        self.hide_strikethrough = False  # covered in cli
-        self.mark_code = config.MARK_CODE
-        self.wrap_list_items = config.WRAP_LIST_ITEMS  # covered in cli
-        self.wrap_links = config.WRAP_LINKS  # covered in cli
-        self.wrap_tables = config.WRAP_TABLES
-        self.pad_tables = config.PAD_TABLES  # covered in cli
-        self.default_image_alt = config.DEFAULT_IMAGE_ALT  # covered in cli
-        self.tag_callback = None
-        self.open_quote = config.OPEN_QUOTE  # covered in cli
-        self.close_quote = config.CLOSE_QUOTE  # covered in cli
-        self.header_id = None
-        self.span_hightlight = False
-        self.span_lead = False
-
-        if out is None:
-            self.out = self.outtextf
-        else:
-            self.out = out
-
-        # empty list to store output characters before they are "joined"
-        self.outtextlist = []  # type: List[str]
-
-        self.quiet = 0
-        self.p_p = 0  # number of newline character to print before next output
-        self.outcount = 0
-        self.start = True
-        self.space = False
-        self.a = []  # type: List[AnchorElement]
-        self.astack = []  # type: List[Optional[Dict[str, Optional[str]]]]
-        self.maybe_automatic_link = None  # type: Optional[str]
-        self.empty_link = False
-        self.absolute_url_matcher = re.compile(r"^[a-zA-Z+]+://")
-        self.acount = 0
-        self.list = []  # type: List[ListElement]
-        self.blockquote = 0
-        self.pre = False
-        self.startpre = False
-        self.code = False
-        self.quote = False
-        self.br_toggle = ""
-        self.lastWasNL = False
-        self.lastWasList = False
-        self.style = 0
-        self.style_def = {}  # type: Dict[str, Dict[str, str]]
-        self.tag_stack = (
-            []
-        )  # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
-        self.emphasis = 0
-        self.drop_white_space = 0
-        self.inheader = False
-        # Current abbreviation definition
-        self.abbr_title = None  # type: Optional[str]
-        # Last inner HTML (for abbr being defined)
-        self.abbr_data = None  # type: Optional[str]
-        # Stack of abbreviations to write later
-        self.abbr_list = {}  # type: Dict[str, str]
-        self.baseurl = baseurl
-        self.stressed = False
-        self.preceding_stressed = False
-        self.preceding_data = ""
-        self.current_tag = ""
-
-        config.UNIFIABLE["nbsp"] = "&nbsp_place_holder;"
-
-    def feed(self, data: str) -> None:
-        data = data.replace("</' + 'script>", "</ignore>")
-        super().feed(data)
-
-    def handle(self, data: str) -> str:
-        self.feed(data)
-        self.feed("")
-        markdown = self.optwrap(self.finish())
-        if self.pad_tables:
-            return pad_tables_in_text(markdown)
-        else:
-            return markdown
-
-    def outtextf(self, s: str) -> None:
-        self.outtextlist.append(s)
-        if s:
-            self.lastWasNL = s[-1] == "\n"
-
-    def finish(self) -> str:
-        self.close()
-
-        self.pbr()
-        self.o("", force="end")
-
-        outtext = "".join(self.outtextlist)
-
-        if self.unicode_snob:
-            nbsp = html.entities.html5["nbsp;"]
-        else:
-            nbsp = " "
-        outtext = outtext.replace("&nbsp_place_holder;", nbsp)
-
-        # Clear self.outtextlist to avoid memory leak of its content to
-        # the next handling.
-        self.outtextlist = []
-
-        return outtext
-
-    def handle_charref(self, c: str) -> None:
-        self.handle_data(self.charref(c), True)
-
-    def handle_entityref(self, c: str) -> None:
-        ref = self.entityref(c)
-
-        # ref may be an empty string (e.g. for &lrm;/&rlm; markers that should
-        # not contribute to the final output).
-        # self.handle_data cannot handle a zero-length string right after a
-        # stressed tag or mid-text within a stressed tag (text get split and
-        # self.stressed/self.preceding_stressed gets switched after the first
-        # part of that text).
-        if ref:
-            self.handle_data(ref, True)
-
-    def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
-        self.handle_tag(tag, dict(attrs), start=True)
-
-    def handle_endtag(self, tag: str) -> None:
-        self.handle_tag(tag, {}, start=False)
-
-    def previousIndex(self, attrs: Dict[str, Optional[str]]) -> Optional[int]:
-        """
-        :type attrs: dict
-
-        :returns: The index of certain set of attributes (of a link) in the
-        self.a list. If the set of attributes is not found, returns None
-        :rtype: int
-        """
-        if "href" not in attrs:
-            return None
-
-        match = False
-        for i, a in enumerate(self.a):
-            if "href" in a.attrs and a.attrs["href"] == attrs["href"]:
-                if "title" in a.attrs or "title" in attrs:
-                    if (
-                        "title" in a.attrs
-                        and "title" in attrs
-                        and a.attrs["title"] == attrs["title"]
-                    ):
-                        match = True
-                else:
-                    match = True
-
-            if match:
-                return i
-        return None
-
-    def handle_emphasis(
-        self, start: bool, tag_style: Dict[str, str], parent_style: Dict[str, str]
-    ) -> None:
-        """
-        Handles various text emphases
-        """
-        tag_emphasis = google_text_emphasis(tag_style)
-        parent_emphasis = google_text_emphasis(parent_style)
-
-        # handle Google's text emphasis
-        strikethrough = "line-through" in tag_emphasis and self.hide_strikethrough
-
-        # google and others may mark a font's weight as `bold` or `700`
-        bold = False
-        for bold_marker in config.BOLD_TEXT_STYLE_VALUES:
-            bold = bold_marker in tag_emphasis and bold_marker not in parent_emphasis
-            if bold:
-                break
-
-        italic = "italic" in tag_emphasis and "italic" not in parent_emphasis
-        fixed = (
-            google_fixed_width_font(tag_style)
-            and not google_fixed_width_font(parent_style)
-            and not self.pre
-        )
-
-        if start:
-            # crossed-out text must be handled before other attributes
-            # in order not to output qualifiers unnecessarily
-            if bold or italic or fixed:
-                self.emphasis += 1
-            if strikethrough:
-                self.quiet += 1
-            if italic:
-                self.o(self.emphasis_mark)
-                self.drop_white_space += 1
-            if bold:
-                self.o(self.strong_mark)
-                self.drop_white_space += 1
-            if fixed:
-                self.o("`")
-                self.drop_white_space += 1
-                self.code = True
-        else:
-            if bold or italic or fixed:
-                # there must not be whitespace before closing emphasis mark
-                self.emphasis -= 1
-                self.space = False
-            if fixed:
-                if self.drop_white_space:
-                    # empty emphasis, drop it
-                    self.drop_white_space -= 1
-                else:
-                    self.o("`")
-                self.code = False
-            if bold:
-                if self.drop_white_space:
-                    # empty emphasis, drop it
-                    self.drop_white_space -= 1
-                else:
-                    self.o(self.strong_mark)
-            if italic:
-                if self.drop_white_space:
-                    # empty emphasis, drop it
-                    self.drop_white_space -= 1
-                else:
-                    self.o(self.emphasis_mark)
-            # space is only allowed after *all* emphasis marks
-            if (bold or italic) and not self.emphasis:
-                self.o(" ")
-            if strikethrough:
-                self.quiet -= 1
-
-    def handle_tag(
-        self, tag: str, attrs: Dict[str, Optional[str]], start: bool
-    ) -> None:
-        self.current_tag = tag
-
-        if self.tag_callback is not None:
-            if self.tag_callback(self, tag, attrs, start) is True:
-                return
-
-        # first thing inside the anchor tag is another tag
-        # that produces some output
-        if (
-            start
-            and self.maybe_automatic_link is not None
-            and tag not in ["p", "div", "style", "dl", "dt"]
-            and (tag != "img" or self.ignore_images)
-        ):
-            self.o("[")
-            self.maybe_automatic_link = None
-            self.empty_link = False
-
-        if self.google_doc:
-            # the attrs parameter is empty for a closing tag. in addition, we
-            # need the attributes of the parent nodes in order to get a
-            # complete style description for the current element. we assume
-            # that google docs export well formed html.
-            parent_style = {}  # type: Dict[str, str]
-            if start:
-                if self.tag_stack:
-                    parent_style = self.tag_stack[-1][2]
-                tag_style = element_style(attrs, self.style_def, parent_style)
-                self.tag_stack.append((tag, attrs, tag_style))
-            else:
-                dummy, attrs, tag_style = (
-                    self.tag_stack.pop() if self.tag_stack else (None, {}, {})
-                )
-                if self.tag_stack:
-                    parent_style = self.tag_stack[-1][2]
-
-        if hn(tag):
-            # check if nh is inside of an 'a' tag (incorrect but found in the wild)
-            if self.astack:
-                if start:
-                    self.inheader = True
-                    # are inside link name, so only add '#' if it can appear before '['
-                    if self.outtextlist and self.outtextlist[-1] == "[":
-                        self.outtextlist.pop()
-                        self.space = False
-                        self.o(hn(tag) * "#" + " ")
-                        self.o("[")
-                        self.header_id = attrs.get('id')
-            else:
-                self.p()
-                if start:
-                    self.inheader = True
-                    self.o(hn(tag) * "#" + " ")
-                    if self.header_id: 
-                        self.o(' {#' + self.header_id + '}')
-                        self.header_id = None
-                else:
-                    self.inheader = False
-                    return  # prevent redundant emphasis marks on headers
-                
-        if tag == 'span':
-            if start and 'class' in attrs:
-                    if attrs['class'] == 'highlight':
-                        self.o('`') # NOTE: same as <code>
-                        self.span_hightlight = True
-                    elif attrs['class'] == 'lead':
-                        self.o('==') # NOTE: but CriticMarkup uses {== ==}
-                        self.span_lead = True
-            else:
-                if self.span_hightlight:
-                    self.o('`')
-                    self.span_hightlight = False
-                elif self.span_lead:
-                    self.o('==')
-                    self.span_lead = False
-
-        if tag in ["p", "div"]:
-            if self.google_doc:
-                if start and google_has_height(tag_style):
-                    self.p()
-                else:
-                    self.soft_br()
-            elif self.astack:
-                pass
-            else:
-                self.p()
-
-        if tag == "br" and start:
-            if self.blockquote > 0:
-                self.o("  \n> ")
-            else:
-                self.o("  \n")
-
-        if tag == "hr" and start:
-            self.p()
-            self.o("* * *")
-            self.p()
-
-        if tag in ["head", "style", "script"]:
-            if start:
-                self.quiet += 1
-            else:
-                self.quiet -= 1
-
-        if tag == "style":
-            if start:
-                self.style += 1
-            else:
-                self.style -= 1
-
-        if tag in ["body"]:
-            self.quiet = 0  # sites like 9rules.com never close <head>
-
-        if tag == "blockquote":
-            if start:
-                self.p()
-                self.o("> ", force=True)
-                self.start = True
-                self.blockquote += 1
-            else:
-                self.blockquote -= 1
-                self.p()
-
-        if tag in ["em", "i", "u"] and not self.ignore_emphasis:
-            # Separate with a space if we immediately follow an alphanumeric
-            # character, since otherwise Markdown won't render the emphasis
-            # marks, and we'll be left with eg 'foo_bar_' visible.
-            # (Don't add a space otherwise, though, since there isn't one in the
-            # original HTML.)
-            if (
-                start
-                and self.preceding_data
-                and self.preceding_data[-1] not in string.whitespace
-                and self.preceding_data[-1] not in string.punctuation
-            ):
-                emphasis = " " + self.emphasis_mark
-                self.preceding_data += " "
-            else:
-                emphasis = self.emphasis_mark
-
-            self.o(emphasis)
-            if start:
-                self.stressed = True
-
-        if tag in ["strong", "b"] and not self.ignore_emphasis:
-            # Separate with space if we immediately follow an * character, since
-            # without it, Markdown won't render the resulting *** correctly.
-            # (Don't add a space otherwise, though, since there isn't one in the
-            # original HTML.)
-            if (
-                start
-                and self.preceding_data
-                and self.preceding_data[-1] == self.strong_mark[0]
-            ):
-                strong = " " + self.strong_mark
-                self.preceding_data += " "
-            else:
-                strong = self.strong_mark
-
-            self.o(strong)
-            if start:
-                self.stressed = True
-
-        if tag in ["del", "strike", "s"]:
-            if start and self.preceding_data and self.preceding_data[-1] == "~":
-                strike = " ~~"
-                self.preceding_data += " "
-            else:
-                strike = "~~"
-
-            self.o(strike)
-            if start:
-                self.stressed = True
-
-        if self.google_doc:
-            if not self.inheader:
-                # handle some font attributes, but leave headers clean
-                self.handle_emphasis(start, tag_style, parent_style)
-
-        if tag in ["kbd", "code", "tt"] and not self.pre:
-            self.o("`")  # TODO: `` `this` ``
-            self.code = not self.code
-
-        if tag == "abbr":
-            if start:
-                self.abbr_title = None
-                self.abbr_data = ""
-                if "title" in attrs:
-                    self.abbr_title = attrs["title"]
-            else:
-                if self.abbr_title is not None:
-                    assert self.abbr_data is not None
-                    self.abbr_list[self.abbr_data] = self.abbr_title
-                    self.abbr_title = None
-                self.abbr_data = None
-
-        if tag == "q":
-            if not self.quote:
-                self.o(self.open_quote)
-            else:
-                self.o(self.close_quote)
-            self.quote = not self.quote
-
-        def link_url(self: HTML2Text, link: str, title: str = "") -> None:
-            url = urlparse.urljoin(self.baseurl, link)
-            title = ' "{}"'.format(title) if title.strip() else ""
-            self.o("]({url}{title})".format(url=escape_md(url), title=title))
-
-        if tag == "a" and not self.ignore_links:
-            if start:
-                if (
-                    "href" in attrs
-                    and attrs["href"] is not None
-                    and not (self.skip_internal_links and attrs["href"].startswith("#"))
-                    and not (
-                        self.ignore_mailto_links and attrs["href"].startswith("mailto:")
-                    )
-                ):
-                    self.astack.append(attrs)
-                    self.maybe_automatic_link = attrs["href"]
-                    self.empty_link = True
-                    if self.protect_links:
-                        attrs["href"] = "<" + attrs["href"] + ">"
-                else:
-                    self.astack.append(None)
-            else:
-                if self.astack:
-                    a = self.astack.pop()
-                    if self.maybe_automatic_link and not self.empty_link:
-                        self.maybe_automatic_link = None
-                    elif a:
-                        assert a["href"] is not None
-                        if self.empty_link:
-                            self.o("[")
-                            self.empty_link = False
-                            self.maybe_automatic_link = None
-                        if self.inline_links:
-                            self.p_p = 0
-                            title = a.get("title") or ""
-                            title = escape_md(title)
-                            link_url(self, a["href"], title)
-                        else:
-                            i = self.previousIndex(a)
-                            if i is not None:
-                                a_props = self.a[i]
-                            else:
-                                self.acount += 1
-                                a_props = AnchorElement(a, self.acount, self.outcount)
-                                self.a.append(a_props)
-                            self.o("][" + str(a_props.count) + "]")
-
-        if tag == "img" and start and not self.ignore_images:
-            if "src" in attrs:
-                assert attrs["src"] is not None
-                if not self.images_to_alt:
-                    attrs["href"] = attrs["src"]
-                alt = attrs.get("alt") or self.default_image_alt
-
-                # If we have images_with_size, write raw html including width,
-                # height, and alt attributes
-                if self.images_as_html or (
-                    self.images_with_size and ("width" in attrs or "height" in attrs)
-                ):
-                    self.o("<img src='" + attrs["src"] + "' ")
-                    if "width" in attrs:
-                        assert attrs["width"] is not None
-                        self.o("width='" + attrs["width"] + "' ")
-                    if "height" in attrs:
-                        assert attrs["height"] is not None
-                        self.o("height='" + attrs["height"] + "' ")
-                    if alt:
-                        self.o("alt='" + alt + "' ")
-                    self.o("/>")
-                    return
-
-                # If we have a link to create, output the start
-                if self.maybe_automatic_link is not None:
-                    href = self.maybe_automatic_link
-                    if (
-                        self.images_to_alt
-                        and escape_md(alt) == href
-                        and self.absolute_url_matcher.match(href)
-                    ):
-                        self.o("<" + escape_md(alt) + ">")
-                        self.empty_link = False
-                        return
-                    else:
-                        self.o("[")
-                        self.maybe_automatic_link = None
-                        self.empty_link = False
-
-                # If we have images_to_alt, we discard the image itself,
-                # considering only the alt text.
-                if self.images_to_alt:
-                    self.o(escape_md(alt))
-                else:
-                    self.o("![" + escape_md(alt) + "]")
-                    if self.inline_links:
-                        href = attrs.get("href") or ""
-                        self.o(
-                            "(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")"
-                        )
-                    else:
-                        i = self.previousIndex(attrs)
-                        if i is not None:
-                            a_props = self.a[i]
-                        else:
-                            self.acount += 1
-                            a_props = AnchorElement(attrs, self.acount, self.outcount)
-                            self.a.append(a_props)
-                        self.o("[" + str(a_props.count) + "]")
-
-        if tag == "dl" and start:
-            self.p()
-        if tag == "dt" and not start:
-            self.pbr()
-        if tag == "dd" and start:
-            self.o("    ")
-        if tag == "dd" and not start:
-            self.pbr()
-
-        if tag in ["ol", "ul"]:
-            # Google Docs create sub lists as top level lists
-            if not self.list and not self.lastWasList:
-                self.p()
-            if start:
-                if self.google_doc:
-                    list_style = google_list_style(tag_style)
-                else:
-                    list_style = tag
-                numbering_start = list_numbering_start(attrs)
-                self.list.append(ListElement(list_style, numbering_start))
-            else:
-                if self.list:
-                    self.list.pop()
-                    if not self.google_doc and not self.list:
-                        self.o("\n")
-            self.lastWasList = True
-        else:
-            self.lastWasList = False
-
-        if tag == "li":
-            self.pbr()
-            if start:
-                if self.list:
-                    li = self.list[-1]
-                else:
-                    li = ListElement("ul", 0)
-                if self.google_doc:
-                    self.o("  " * self.google_nest_count(tag_style))
-                else:
-                    # Indent two spaces per list, except use three spaces for an
-                    # unordered list inside an ordered list.
-                    # https://spec.commonmark.org/0.28/#motivation
-                    # TODO: line up <ol><li>s > 9 correctly.
-                    parent_list = None
-                    for list in self.list:
-                        self.o(
-                            "   " if parent_list == "ol" and list.name == "ul" else "  "
-                        )
-                        parent_list = list.name
-
-                if li.name == "ul":
-                    self.o(self.ul_item_mark + " ")
-                elif li.name == "ol":
-                    li.num += 1
-                    self.o(str(li.num) + ". ")
-                self.start = True
-
-        if tag in ["table", "tr", "td", "th"]:
-            if self.ignore_tables:
-                if tag == "tr":
-                    if start:
-                        pass
-                    else:
-                        self.soft_br()
-                else:
-                    pass
-
-            elif self.bypass_tables:
-                if start:
-                    self.soft_br()
-                if tag in ["td", "th"]:
-                    if start:
-                        self.o("<{}>\n\n".format(tag))
-                    else:
-                        self.o("\n</{}>".format(tag))
-                else:
-                    if start:
-                        self.o("<{}>".format(tag))
-                    else:
-                        self.o("</{}>".format(tag))
-
-            else:
-                if tag == "table":
-                    if start:
-                        self.table_start = True
-                        if self.pad_tables:
-                            self.o("<" + config.TABLE_MARKER_FOR_PAD + ">")
-                            self.o("  \n")
-                    else:
-                        if self.pad_tables:
-                            # add break in case the table is empty or its 1 row table
-                            self.soft_br()
-                            self.o("</" + config.TABLE_MARKER_FOR_PAD + ">")
-                            self.o("  \n")
-                if tag in ["td", "th"] and start:
-                    if self.split_next_td:
-                        self.o("| ")
-                    self.split_next_td = True
-
-                if tag == "tr" and start:
-                    self.td_count = 0
-                if tag == "tr" and not start:
-                    self.split_next_td = False
-                    self.soft_br()
-                if tag == "tr" and not start and self.table_start:
-                    # Underline table header
-                    self.o("|".join(["---"] * self.td_count))
-                    self.soft_br()
-                    self.table_start = False
-                if tag in ["td", "th"] and start:
-                    self.td_count += 1
-
-        if tag == "pre":
-            if start:
-                self.startpre = True
-                self.pre = True
-            else:
-                self.pre = False
-                if self.mark_code:
-                    self.out("\n[/code]")
-            self.p()
-
-    # TODO: Add docstring for these one letter functions
-    def pbr(self) -> None:
-        "Pretty print has a line break"
-        if self.p_p == 0:
-            self.p_p = 1
-
-    def p(self) -> None:
-        "Set pretty print to 1 or 2 lines"
-        self.p_p = 1 if self.single_line_break else 2
-
-    def soft_br(self) -> None:
-        "Soft breaks"
-        self.pbr()
-        self.br_toggle = "  "
-
-    def o(
-        self, data: str, puredata: bool = False, force: Union[bool, str] = False
-    ) -> None:
-        """
-        Deal with indentation and whitespace
-        """
-        if self.abbr_data is not None:
-            self.abbr_data += data
-
-        if not self.quiet:
-            if self.google_doc:
-                # prevent white space immediately after 'begin emphasis'
-                # marks ('**' and '_')
-                lstripped_data = data.lstrip()
-                if self.drop_white_space and not (self.pre or self.code):
-                    data = lstripped_data
-                if lstripped_data != "":
-                    self.drop_white_space = 0
-
-            if puredata and not self.pre:
-                # This is a very dangerous call ... it could mess up
-                # all handling of &nbsp; when not handled properly
-                # (see entityref)
-                data = re.sub(r"\s+", r" ", data)
-                if data and data[0] == " ":
-                    self.space = True
-                    data = data[1:]
-            if not data and not force:
-                return
-
-            if self.startpre:
-                # self.out(" :") #TODO: not output when already one there
-                if not data.startswith("\n") and not data.startswith("\r\n"):
-                    # <pre>stuff...
-                    data = "\n" + data
-                if self.mark_code:
-                    self.out("\n[code]")
-                    self.p_p = 0
-
-            bq = ">" * self.blockquote
-            if not (force and data and data[0] == ">") and self.blockquote:
-                bq += " "
-
-            if self.pre:
-                if not self.list:
-                    bq += "    "
-                # else: list content is already partially indented
-                bq += "    " * len(self.list)
-                data = data.replace("\n", "\n" + bq)
-
-            if self.startpre:
-                self.startpre = False
-                if self.list:
-                    # use existing initial indentation
-                    data = data.lstrip("\n")
-
-            if self.start:
-                self.space = False
-                self.p_p = 0
-                self.start = False
-
-            if force == "end":
-                # It's the end.
-                self.p_p = 0
-                self.out("\n")
-                self.space = False
-
-            if self.p_p:
-                self.out((self.br_toggle + "\n" + bq) * self.p_p)
-                self.space = False
-                self.br_toggle = ""
-
-            if self.space:
-                if not self.lastWasNL:
-                    self.out(" ")
-                self.space = False
-
-            if self.a and (
-                (self.p_p == 2 and self.links_each_paragraph) or force == "end"
-            ):
-                if force == "end":
-                    self.out("\n")
-
-                newa = []
-                for link in self.a:
-                    if self.outcount > link.outcount:
-                        self.out(
-                            "   ["
-                            + str(link.count)
-                            + "]: "
-                            + urlparse.urljoin(self.baseurl, link.attrs["href"])
-                        )
-                        if "title" in link.attrs:
-                            assert link.attrs["title"] is not None
-                            self.out(" (" + link.attrs["title"] + ")")
-                        self.out("\n")
-                    else:
-                        newa.append(link)
-
-                # Don't need an extra line when nothing was done.
-                if self.a != newa:
-                    self.out("\n")
-
-                self.a = newa
-
-            if self.abbr_list and force == "end":
-                for abbr, definition in self.abbr_list.items():
-                    self.out("  *[" + abbr + "]: " + definition + "\n")
-
-            self.p_p = 0
-            self.out(data)
-            self.outcount += 1
-
-    def handle_data(self, data: str, entity_char: bool = False) -> None:
-        if not data:
-            # Data may be empty for some HTML entities. For example,
-            # LEFT-TO-RIGHT MARK.
-            return
-
-        if self.stressed:
-            data = data.strip()
-            self.stressed = False
-            self.preceding_stressed = True
-        elif self.preceding_stressed:
-            if (
-                re.match(r"[^][(){}\s.!?]", data[0])
-                and not hn(self.current_tag)
-                and self.current_tag not in ["a", "code", "pre"]
-            ):
-                # should match a letter or common punctuation
-                data = " " + data
-            self.preceding_stressed = False
-
-        if self.style:
-            self.style_def.update(dumb_css_parser(data))
-
-        if self.maybe_automatic_link is not None:
-            href = self.maybe_automatic_link
-            if (
-                href == data
-                and self.absolute_url_matcher.match(href)
-                and self.use_automatic_links
-            ):
-                self.o("<" + data + ">")
-                self.empty_link = False
-                return
-            else:
-                self.o("[")
-                self.maybe_automatic_link = None
-                self.empty_link = False
-
-        if not self.code and not self.pre and not entity_char:
-            data = escape_md_section(data, snob=self.escape_snob)
-        self.preceding_data = data
-        self.o(data, puredata=True)
-
-    def charref(self, name: str) -> str:
-        if name[0] in ["x", "X"]:
-            c = int(name[1:], 16)
-        else:
-            c = int(name)
-
-        if not self.unicode_snob and c in unifiable_n:
-            return unifiable_n[c]
-        else:
-            try:
-                return chr(c)
-            except ValueError:  # invalid unicode
-                return ""
-
-    def entityref(self, c: str) -> str:
-        if not self.unicode_snob and c in config.UNIFIABLE:
-            return config.UNIFIABLE[c]
-        try:
-            ch = html.entities.html5[c + ";"]
-        except KeyError:
-            return "&" + c + ";"
-        return config.UNIFIABLE[c] if c == "nbsp" else ch
-
-    def google_nest_count(self, style: Dict[str, str]) -> int:
-        """
-        Calculate the nesting count of google doc lists
-
-        :type style: dict
-
-        :rtype: int
-        """
-        nest_count = 0
-        if "margin-left" in style:
-            nest_count = int(style["margin-left"][:-2]) // self.google_list_indent
-
-        return nest_count
-
-    def optwrap(self, text: str) -> str:
-        """
-        Wrap all paragraphs in the provided text.
-
-        :type text: str
-
-        :rtype: str
-        """
-        if not self.body_width:
-            return text
-
-        result = ""
-        newlines = 0
-        # I cannot think of a better solution for now.
-        # To avoid the non-wrap behaviour for entire paras
-        # because of the presence of a link in it
-        if not self.wrap_links:
-            self.inline_links = False
-        for para in text.split("\n"):
-            if len(para) > 0:
-                if not skipwrap(
-                    para, self.wrap_links, self.wrap_list_items, self.wrap_tables
-                ):
-                    indent = ""
-                    if para.startswith("  " + self.ul_item_mark):
-                        # list item continuation: add a double indent to the
-                        # new lines
-                        indent = "    "
-                    elif para.startswith("> "):
-                        # blockquote continuation: add the greater than symbol
-                        # to the new lines
-                        indent = "> "
-                    wrapped = wrap(
-                        para,
-                        self.body_width,
-                        break_long_words=False,
-                        subsequent_indent=indent,
-                    )
-                    result += "\n".join(wrapped)
-                    if para.endswith("  "):
-                        result += "  \n"
-                        newlines = 1
-                    elif indent:
-                        result += "\n"
-                        newlines = 1
-                    else:
-                        result += "\n\n"
-                        newlines = 2
-                else:
-                    # Warning for the tempted!!!
-                    # Be aware that obvious replacement of this with
-                    # line.isspace()
-                    # DOES NOT work! Explanations are welcome.
-                    if not config.RE_SPACE.match(para):
-                        result += para + "\n"
-                        newlines = 1
-            else:
-                if newlines < 2:
-                    result += "\n"
-                    newlines += 1
-        return result
+	def __init__(
+		self,
+		out: Optional[OutCallback] = None,
+		baseurl: str = "",
+		bodywidth: int = config.BODY_WIDTH,
+	) -> None:
+		"""
+		Input parameters:
+			out: possible custom replacement for self.outtextf (which
+			appends lines of text).
+			baseurl: base URL of the document we process
+		"""
+		super().__init__(convert_charrefs=False)
+
+		# Config options
+		self.split_next_td = False
+		self.td_count = 0
+		self.table_start = False
+		self.unicode_snob = config.UNICODE_SNOB  # covered in cli
+		self.escape_snob = config.ESCAPE_SNOB  # covered in cli
+		self.links_each_paragraph = config.LINKS_EACH_PARAGRAPH
+		self.body_width = bodywidth  # covered in cli
+		self.skip_internal_links = config.SKIP_INTERNAL_LINKS  # covered in cli
+		self.inline_links = config.INLINE_LINKS  # covered in cli
+		self.protect_links = config.PROTECT_LINKS  # covered in cli
+		self.google_list_indent = config.GOOGLE_LIST_INDENT  # covered in cli
+		self.ignore_links = config.IGNORE_ANCHORS  # covered in cli
+		self.ignore_mailto_links = config.IGNORE_MAILTO_LINKS  # covered in cli
+		self.ignore_images = config.IGNORE_IMAGES  # covered in cli
+		self.images_as_html = config.IMAGES_AS_HTML  # covered in cli
+		self.images_to_alt = config.IMAGES_TO_ALT  # covered in cli
+		self.images_with_size = config.IMAGES_WITH_SIZE  # covered in cli
+		self.ignore_emphasis = config.IGNORE_EMPHASIS  # covered in cli
+		self.bypass_tables = config.BYPASS_TABLES  # covered in cli
+		self.ignore_tables = config.IGNORE_TABLES  # covered in cli
+		self.google_doc = False  # covered in cli
+		self.ul_item_mark = "*"  # covered in cli
+		self.emphasis_mark = "_"  # covered in cli
+		self.strong_mark = "**"
+		self.single_line_break = config.SINGLE_LINE_BREAK  # covered in cli
+		self.use_automatic_links = config.USE_AUTOMATIC_LINKS  # covered in cli
+		self.hide_strikethrough = False  # covered in cli
+		self.mark_code = config.MARK_CODE
+		self.wrap_list_items = config.WRAP_LIST_ITEMS  # covered in cli
+		self.wrap_links = config.WRAP_LINKS  # covered in cli
+		self.wrap_tables = config.WRAP_TABLES
+		self.pad_tables = config.PAD_TABLES  # covered in cli
+		self.default_image_alt = config.DEFAULT_IMAGE_ALT  # covered in cli
+		self.tag_callback = None
+		self.open_quote = config.OPEN_QUOTE  # covered in cli
+		self.close_quote = config.CLOSE_QUOTE  # covered in cli
+		self.header_id = None
+		self.span_highlight = False
+		self.span_lead = False
+
+		if out is None:
+			self.out = self.outtextf
+		else:
+			self.out = out
+
+		# empty list to store output characters before they are "joined"
+		self.outtextlist = []  # type: List[str]
+
+		self.quiet = 0
+		self.p_p = 0  # number of newline character to print before next output
+		self.outcount = 0
+		self.start = True
+		self.space = False
+		self.a = []  # type: List[AnchorElement]
+		self.astack = []  # type: List[Optional[Dict[str, Optional[str]]]]
+		self.maybe_automatic_link = None  # type: Optional[str]
+		self.empty_link = False
+		self.absolute_url_matcher = re.compile(r"^[a-zA-Z+]+://")
+		self.acount = 0
+		self.list = []  # type: List[ListElement]
+		self.blockquote = 0
+		self.pre = False
+		self.startpre = False
+		self.code = False
+		self.quote = False
+		self.br_toggle = ""
+		self.lastWasNL = False
+		self.lastWasList = False
+		self.style = 0
+		self.style_def = {}  # type: Dict[str, Dict[str, str]]
+		self.tag_stack = (
+			[]
+		)  # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
+		self.emphasis = 0
+		self.drop_white_space = 0
+		self.inheader = False
+		# Current abbreviation definition
+		self.abbr_title = None  # type: Optional[str]
+		# Last inner HTML (for abbr being defined)
+		self.abbr_data = None  # type: Optional[str]
+		# Stack of abbreviations to write later
+		self.abbr_list = {}  # type: Dict[str, str]
+		self.baseurl = baseurl
+		self.stressed = False
+		self.preceding_stressed = False
+		self.preceding_data = ""
+		self.current_tag = ""
+		self.current_class = ""
+
+		config.UNIFIABLE["nbsp"] = "&nbsp_place_holder;"
+
+	def feed(self, data: str) -> None:
+		data = data.replace("</' + 'script>", "</ignore>")
+		super().feed(data)
+
+	def handle(self, data: str) -> str:
+		self.feed(data)
+		self.feed("")
+		markdown = self.optwrap(self.finish())
+		if self.pad_tables:
+			return pad_tables_in_text(markdown)
+		else:
+			return markdown
+
+	def outtextf(self, s: str) -> None:
+		self.outtextlist.append(s)
+		if s:
+			self.lastWasNL = s[-1] == "\n"
+
+	def finish(self) -> str:
+		self.close()
+
+		self.pbr()
+		self.o("", force="end")
+
+		outtext = "".join(self.outtextlist)
+
+		if self.unicode_snob:
+			nbsp = html.entities.html5["nbsp;"]
+		else:
+			nbsp = " "
+		outtext = outtext.replace("&nbsp_place_holder;", nbsp)
+
+		# Clear self.outtextlist to avoid memory leak of its content to
+		# the next handling.
+		self.outtextlist = []
+
+		return outtext
+
+	def handle_charref(self, c: str) -> None:
+		self.handle_data(self.charref(c), True)
+
+	def handle_entityref(self, c: str) -> None:
+		ref = self.entityref(c)
+
+		# ref may be an empty string (e.g. for &lrm;/&rlm; markers that should
+		# not contribute to the final output).
+		# self.handle_data cannot handle a zero-length string right after a
+		# stressed tag or mid-text within a stressed tag (text get split and
+		# self.stressed/self.preceding_stressed gets switched after the first
+		# part of that text).
+		if ref:
+			self.handle_data(ref, True)
+
+	def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
+		self.handle_tag(tag, dict(attrs), start=True)
+
+	def handle_endtag(self, tag: str) -> None:
+		self.handle_tag(tag, {}, start=False)
+
+	def previousIndex(self, attrs: Dict[str, Optional[str]]) -> Optional[int]:
+		"""
+		:type attrs: dict
+
+		:returns: The index of certain set of attributes (of a link) in the
+		self.a list. If the set of attributes is not found, returns None
+		:rtype: int
+		"""
+		if "href" not in attrs:
+			return None
+
+		match = False
+		for i, a in enumerate(self.a):
+			if "href" in a.attrs and a.attrs["href"] == attrs["href"]:
+				if "title" in a.attrs or "title" in attrs:
+					if (
+						"title" in a.attrs
+						and "title" in attrs
+						and a.attrs["title"] == attrs["title"]
+					):
+						match = True
+				else:
+					match = True
+
+			if match:
+				return i
+		return None
+
+	def handle_emphasis(
+		self, start: bool, tag_style: Dict[str, str], parent_style: Dict[str, str]
+	) -> None:
+		"""
+		Handles various text emphases
+		"""
+		tag_emphasis = google_text_emphasis(tag_style)
+		parent_emphasis = google_text_emphasis(parent_style)
+
+		# handle Google's text emphasis
+		strikethrough = "line-through" in tag_emphasis and self.hide_strikethrough
+
+		# google and others may mark a font's weight as `bold` or `700`
+		bold = False
+		for bold_marker in config.BOLD_TEXT_STYLE_VALUES:
+			bold = bold_marker in tag_emphasis and bold_marker not in parent_emphasis
+			if bold:
+				break
+
+		italic = "italic" in tag_emphasis and "italic" not in parent_emphasis
+		fixed = (
+			google_fixed_width_font(tag_style)
+			and not google_fixed_width_font(parent_style)
+			and not self.pre
+		)
+
+		if start:
+			# crossed-out text must be handled before other attributes
+			# in order not to output qualifiers unnecessarily
+			if bold or italic or fixed:
+				self.emphasis += 1
+			if strikethrough:
+				self.quiet += 1
+			if italic:
+				self.o(self.emphasis_mark)
+				self.drop_white_space += 1
+			if bold:
+				self.o(self.strong_mark)
+				self.drop_white_space += 1
+			if fixed:
+				self.o("`")
+				self.drop_white_space += 1
+				self.code = True
+		else:
+			if bold or italic or fixed:
+				# there must not be whitespace before closing emphasis mark
+				self.emphasis -= 1
+				self.space = False
+			if fixed:
+				if self.drop_white_space:
+					# empty emphasis, drop it
+					self.drop_white_space -= 1
+				else:
+					self.o("`")
+				self.code = False
+			if bold:
+				if self.drop_white_space:
+					# empty emphasis, drop it
+					self.drop_white_space -= 1
+				else:
+					self.o(self.strong_mark)
+			if italic:
+				if self.drop_white_space:
+					# empty emphasis, drop it
+					self.drop_white_space -= 1
+				else:
+					self.o(self.emphasis_mark)
+			# space is only allowed after *all* emphasis marks
+			if (bold or italic) and not self.emphasis:
+				self.o(" ")
+			if strikethrough:
+				self.quiet -= 1
+
+	def handle_tag(
+		self, tag: str, attrs: Dict[str, Optional[str]], start: bool
+	) -> None:
+		self.current_tag = tag
+
+		if self.tag_callback is not None:
+			if self.tag_callback(self, tag, attrs, start) is True:
+				return
+
+		# first thing inside the anchor tag is another tag
+		# that produces some output
+		if (
+			start
+			and self.maybe_automatic_link is not None
+			and tag not in ["p", "div", "style", "dl", "dt"]
+			and (tag != "img" or self.ignore_images)
+		):
+			self.o("[")
+			self.maybe_automatic_link = None
+			self.empty_link = False
+
+		if self.google_doc:
+			# the attrs parameter is empty for a closing tag. in addition, we
+			# need the attributes of the parent nodes in order to get a
+			# complete style description for the current element. we assume
+			# that google docs export well formed html.
+			parent_style = {}  # type: Dict[str, str]
+			if start:
+				if self.tag_stack:
+					parent_style = self.tag_stack[-1][2]
+				tag_style = element_style(attrs, self.style_def, parent_style)
+				self.tag_stack.append((tag, attrs, tag_style))
+			else:
+				dummy, attrs, tag_style = (
+					self.tag_stack.pop() if self.tag_stack else (None, {}, {})
+				)
+				if self.tag_stack:
+					parent_style = self.tag_stack[-1][2]
+
+		if hn(tag):
+			# check if nh is inside of an 'a' tag 
+			# (incorrect but found in the wild)
+			if self.astack:
+				if start:
+					self.inheader = True
+					# are inside link name, so only add '#' if it can appear before '['
+					if self.outtextlist and self.outtextlist[-1] == "[":
+						self.outtextlist.pop()
+						self.space = False
+						self.o(hn(tag) * "#" + " ")
+						self.o("[")
+						self.header_id = attrs.get('id')
+			else:
+				self.p()
+				if start:
+					self.inheader = True
+					self.o(hn(tag) * "#" + " ")
+					if self.header_id: 
+						self.o(' {#' + self.header_id + '}')
+						self.header_id = None
+				else:
+					self.inheader = False
+					return  # prevent redundant emphasis marks on headers
+		if 'class' in attrs:
+			self.current_class = attrs.get('class')
+			# self.p()
+			if not start:
+				self.current_class = ''
+		if 'style' in attrs:
+			if attrs.get('style') == 'text-align: center':
+				self.current_class = 'center'
+			if not start:
+				self.current_class = ''
+		if tag == 'span':
+			if start:
+					if self.current_class == 'highlight' and \
+						self.inheader == False and \
+						self.span_lead == False and \
+						self.astack == False:
+							self.o('`') # NOTE: same as <code>
+							self.span_highlight = True
+					elif self.current_class == 'lead' and \
+						self.inheader == False and \
+						self.span_highlight == False:
+							self.o('==') # NOTE: but CriticMarkup uses {== ==}
+							self.span_lead = True
+			else:
+				if self.span_highlight:
+					self.o('`')
+					self.span_highlight = False
+				elif self.span_lead:
+						self.o('==')
+						self.span_lead = False
+
+		if tag in ["p", "div"]:
+			if self.google_doc:
+				if start and google_has_height(tag_style):
+					self.p()
+				else:
+					self.soft_br()
+			elif self.astack:
+				pass
+			else:
+				self.p()
+
+		if tag == "br" and start:
+			if self.blockquote > 0:
+				self.o("  \n> ")
+			else:
+				self.o("  \n")
+
+		if tag == "hr" and start:
+			self.p()
+			self.o("* * *")
+			self.p()
+
+		if tag in ["head", "style", "script"]:
+			if start:
+				self.quiet += 1
+			else:
+				self.quiet -= 1
+
+		if tag == "style":
+			if start:
+				self.style += 1
+			else:
+				self.style -= 1
+
+		if tag in ["body"]:
+			self.quiet = 0  # sites like 9rules.com never close <head>
+
+		if tag == "blockquote":
+			if start:
+				self.p()
+				self.o("> ", force=True)
+				self.start = True
+				self.blockquote += 1
+			else:
+				self.blockquote -= 1
+				self.p()
+
+		if tag in ["em", "i", "u"] and not self.ignore_emphasis:
+			# Separate with a space if we immediately follow an alphanumeric
+			# character, since otherwise Markdown won't render the emphasis
+			# marks, and we'll be left with eg 'foo_bar_' visible.
+			# (Don't add a space otherwise, though, since there isn't one in the
+			# original HTML.)
+			if (
+				start
+				and self.preceding_data
+				and self.preceding_data[-1] not in string.whitespace
+				and self.preceding_data[-1] not in string.punctuation
+			):
+				emphasis = " " + self.emphasis_mark
+				self.preceding_data += " "
+			else:
+				emphasis = self.emphasis_mark
+
+			self.o(emphasis)
+			if start:
+				self.stressed = True
+
+		if tag in ["strong", "b"] and not self.ignore_emphasis:
+			# Separate with space if we immediately follow an * character, since
+			# without it, Markdown won't render the resulting *** correctly.
+			# (Don't add a space otherwise, though, since there isn't one in the
+			# original HTML.)
+			if (
+				start
+				and self.preceding_data
+				and self.preceding_data[-1] == self.strong_mark[0]
+			):
+				strong = " " + self.strong_mark
+				self.preceding_data += " "
+			else:
+				strong = self.strong_mark
+
+			self.o(strong)
+			if start:
+				self.stressed = True
+
+		if tag in ["del", "strike", "s"]:
+			if start and self.preceding_data and self.preceding_data[-1] == "~":
+				strike = " ~~"
+				self.preceding_data += " "
+			else:
+				strike = "~~"
+
+			self.o(strike)
+			if start:
+				self.stressed = True
+
+		if self.google_doc:
+			if not self.inheader:
+				# handle some font attributes, but leave headers clean
+				self.handle_emphasis(start, tag_style, parent_style)
+
+		if tag in ["kbd", "code", "tt"] and not self.pre:
+			self.o("`")  # TODO: `` `this` ``
+			self.code = not self.code
+
+		if tag == "abbr":
+			if start:
+				self.abbr_title = None
+				self.abbr_data = ""
+				if "title" in attrs:
+					self.abbr_title = attrs["title"]
+			else:
+				if self.abbr_title is not None:
+					assert self.abbr_data is not None
+					self.abbr_list[self.abbr_data] = self.abbr_title
+					self.abbr_title = None
+				self.abbr_data = None
+
+		if tag == "q":
+			if not self.quote:
+				self.o(self.open_quote)
+			else:
+				self.o(self.close_quote)
+			self.quote = not self.quote
+
+		def link_url(self: HTML2Text, link: str, title: str = "") -> None:
+			url = urlparse.urljoin(self.baseurl, link)
+			title = ' "{}"'.format(title) if title.strip() else ""
+			self.o("]({url}{title})".format(url=escape_md(url), title=title))
+
+		if tag == "a" and not self.ignore_links:
+			if start:
+				if (
+					"href" in attrs
+					and attrs["href"] is not None
+					and not (self.skip_internal_links and attrs["href"].startswith("#"))
+					and not (
+						self.ignore_mailto_links and attrs["href"].startswith("mailto:")
+					)
+				):
+					self.astack.append(attrs)
+					self.maybe_automatic_link = attrs["href"]
+					self.empty_link = True
+					if self.protect_links:
+						attrs["href"] = "<" + attrs["href"] + ">"
+				else:
+					self.astack.append(None)
+			else:
+				if self.astack:
+					a = self.astack.pop()
+					if self.maybe_automatic_link and not self.empty_link:
+						self.maybe_automatic_link = None
+					elif a:
+						assert a["href"] is not None
+						if self.empty_link:
+							self.o("[")
+							self.empty_link = False
+							self.maybe_automatic_link = None
+						if self.inline_links:
+							self.p_p = 0
+							title = a.get("title") or ""
+							title = escape_md(title)
+							link_url(self, a["href"], title)
+						else:
+							i = self.previousIndex(a)
+							if i is not None:
+								a_props = self.a[i]
+							else:
+								self.acount += 1
+								a_props = AnchorElement(a, self.acount, self.outcount)
+								self.a.append(a_props)
+							self.o("][" + str(a_props.count) + "]")
+
+		if tag == "img" and start and not self.ignore_images:
+			if "src" in attrs:
+				assert attrs["src"] is not None
+				if not self.images_to_alt:
+					attrs["href"] = attrs["src"]
+				alt = attrs.get("alt") or self.default_image_alt
+
+				# If we have images_with_size, write raw html including width,
+				# height, and alt attributes
+				if self.images_as_html or (
+					self.images_with_size and ("width" in attrs or "height" in attrs)
+				):
+					self.o("<img src='" + attrs["src"] + "' ")
+					if "width" in attrs:
+						assert attrs["width"] is not None
+						self.o("width='" + attrs["width"] + "' ")
+					if "height" in attrs:
+						assert attrs["height"] is not None
+						self.o("height='" + attrs["height"] + "' ")
+					if alt:
+						self.o("alt='" + alt + "' ")
+					self.o("/>")
+					return
+
+				# If we have a link to create, output the start
+				if self.maybe_automatic_link is not None:
+					href = self.maybe_automatic_link
+					if (
+						self.images_to_alt
+						and escape_md(alt) == href
+						and self.absolute_url_matcher.match(href)
+					):
+						self.o("<" + escape_md(alt) + ">")
+						self.empty_link = False
+						return
+					else:
+						self.o("[")
+						self.maybe_automatic_link = None
+						self.empty_link = False
+
+				# If we have images_to_alt, we discard the image itself,
+				# considering only the alt text.
+				if self.images_to_alt:
+					self.o(escape_md(alt))
+				else:
+					self.o("![" + escape_md(alt) + "]")
+					if self.inline_links:
+						href = attrs.get("href") or ""
+						self.o(
+							"(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")"
+						)
+					else:
+						i = self.previousIndex(attrs)
+						if i is not None:
+							a_props = self.a[i]
+						else:
+							self.acount += 1
+							a_props = AnchorElement(attrs, self.acount, self.outcount)
+							self.a.append(a_props)
+						self.o("[" + str(a_props.count) + "]")
+
+		if tag == "dl" and start:
+			self.p()
+		if tag == "dt" and not start:
+			self.pbr()
+		if tag == "dd" and start:
+			self.o("    ")
+		if tag == "dd" and not start:
+			self.pbr()
+
+		if tag in ["ol", "ul"]:
+			# Google Docs create sub lists as top level lists
+			if not self.list and not self.lastWasList:
+				self.p()
+			if start:
+				if self.google_doc:
+					list_style = google_list_style(tag_style)
+				else:
+					list_style = tag
+				numbering_start = list_numbering_start(attrs)
+				self.list.append(ListElement(list_style, numbering_start))
+			else:
+				if self.list:
+					self.list.pop()
+					if not self.google_doc and not self.list:
+						self.o("\n")
+			self.lastWasList = True
+		else:
+			self.lastWasList = False
+
+		if tag == "li":
+			self.pbr()
+			if start:
+				if self.list:
+					li = self.list[-1]
+				else:
+					li = ListElement("ul", 0)
+				if self.google_doc:
+					self.o("  " * self.google_nest_count(tag_style))
+				else:
+					# Indent two spaces per list, except use three spaces for an
+					# unordered list inside an ordered list.
+					# https://spec.commonmark.org/0.28/#motivation
+					# TODO: line up <ol><li>s > 9 correctly.
+					parent_list = None
+					for list in self.list:
+						self.o(
+							"   " if parent_list == "ol" and list.name == "ul" else "  "
+						)
+						parent_list = list.name
+
+				if li.name == "ul":
+					self.o(self.ul_item_mark + " ")
+				elif li.name == "ol":
+					li.num += 1
+					self.o(str(li.num) + ". ")
+				self.start = True
+
+		if tag in ["table", "tr", "td", "th"]:
+			if self.ignore_tables:
+				if tag == "tr":
+					if start:
+						pass
+					else:
+						self.soft_br()
+				else:
+					pass
+
+			elif self.bypass_tables:
+				if start:
+					self.soft_br()
+				if tag in ["td", "th"]:
+					if start:
+						self.o("<{}>\n\n".format(tag))
+					else:
+						self.o("\n</{}>".format(tag))
+				else:
+					if start:
+						self.o("<{}>".format(tag))
+					else:
+						self.o("</{}>".format(tag))
+
+			else:
+				if tag == "table":
+					if start:
+						self.table_start = True
+						if self.pad_tables:
+							self.o("<" + config.TABLE_MARKER_FOR_PAD + ">")
+							self.o("  \n")
+					else:
+						if self.pad_tables:
+							# add break in case the table is empty or its 1 row table
+							self.soft_br()
+							self.o("</" + config.TABLE_MARKER_FOR_PAD + ">")
+							self.o("  \n")
+				if tag in ["td", "th"] and start:
+					if self.split_next_td:
+						self.o("| ")
+					self.split_next_td = True
+
+				if tag == "tr" and start:
+					self.td_count = 0
+				if tag == "tr" and not start:
+					self.split_next_td = False
+					self.soft_br()
+				if tag == "tr" and not start and self.table_start:
+					# Underline table header
+					self.o("|".join(["---"] * self.td_count))
+					self.soft_br()
+					self.table_start = False
+				if tag in ["td", "th"] and start:
+					self.td_count += 1
+
+		if tag == "pre":
+			if start:
+				self.startpre = True
+				self.pre = True
+			else:
+				self.pre = False
+				if self.mark_code:
+					self.out("\n[/code]")
+			self.p()
+
+	# TODO: Add docstring for these one letter functions
+	def pbr(self) -> None:
+		"Pretty print has a line break"
+		if self.p_p == 0:
+			self.p_p = 1
+
+	def p(self) -> None:
+		"Set pretty print to 1 or 2 lines"
+		self.p_p = 1 if self.single_line_break else 2
+
+	def soft_br(self) -> None:
+		"Soft breaks"
+		self.pbr()
+		self.br_toggle = "  "
+
+	def o(
+		self, data: str, puredata: bool = False, force: Union[bool, str] = False
+	) -> None:
+		"""
+		Deal with indentation and whitespace
+		"""
+		if self.abbr_data is not None:
+			self.abbr_data += data
+
+		if not self.quiet:
+			if self.google_doc:
+				# prevent white space immediately after 'begin emphasis'
+				# marks ('**' and '_')
+				lstripped_data = data.lstrip()
+				if self.drop_white_space and not (self.pre or self.code):
+					data = lstripped_data
+				if lstripped_data != "":
+					self.drop_white_space = 0
+
+			if puredata and not self.pre:
+				# This is a very dangerous call ... it could mess up
+				# all handling of &nbsp; when not handled properly
+				# (see entityref)
+				data = re.sub(r"\s+", r" ", data)
+				if data and data[0] == " ":
+					self.space = True
+					data = data[1:]
+			if not data and not force:
+				return
+
+			if self.startpre:
+				# self.out(" :") #TODO: not output when already one there
+				if not data.startswith("\n") and not data.startswith("\r\n"):
+					# <pre>stuff...
+					data = "\n" + data
+				if self.mark_code:
+					self.out("\n[code]")
+					self.p_p = 0
+
+			bq = ">" * self.blockquote
+			if not (force and data and data[0] == ">") and self.blockquote:
+				bq += " "
+
+			if self.pre:
+				if not self.list:
+					bq += "    "
+				# else: list content is already partially indented
+				bq += "    " * len(self.list)
+				data = data.replace("\n", "\n" + bq)
+
+			if self.startpre:
+				self.startpre = False
+				if self.list:
+					# use existing initial indentation
+					data = data.lstrip("\n")
+
+			if self.start:
+				self.space = False
+				self.p_p = 0
+				self.start = False
+
+			if force == "end":
+				# It's the end.
+				self.p_p = 0
+				self.out("\n")
+				self.space = False
+
+			if self.p_p:
+				self.out((self.br_toggle + "\n" + bq) * self.p_p)
+				self.space = False
+				self.br_toggle = ""
+
+			if self.space:
+				if not self.lastWasNL:
+					self.out(" ")
+				self.space = False
+
+			if self.a and (
+				(self.p_p == 2 and self.links_each_paragraph) or force == "end"
+			):
+				if force == "end":
+					self.out("\n")
+
+				newa = []
+				for link in self.a:
+					if self.outcount > link.outcount:
+						self.out(
+							"   ["
+							+ str(link.count)
+							+ "]: "
+							+ urlparse.urljoin(self.baseurl, link.attrs["href"])
+						)
+						if "title" in link.attrs:
+							assert link.attrs["title"] is not None
+							self.out(" (" + link.attrs["title"] + ")")
+						self.out("\n")
+					else:
+						newa.append(link)
+
+				# Don't need an extra line when nothing was done.
+				if self.a != newa:
+					self.out("\n")
+
+				self.a = newa
+
+			if self.abbr_list and force == "end":
+				for abbr, definition in self.abbr_list.items():
+					self.out("  *[" + abbr + "]: " + definition + "\n")
+
+			self.p_p = 0
+			self.out(data)
+			self.outcount += 1
+
+	def handle_data(self, data: str, entity_char: bool = False) -> None:
+		if not data:
+			# Data may be empty for some HTML entities. For example,
+			# LEFT-TO-RIGHT MARK.
+			return
+
+		if self.stressed:
+			data = data.strip()
+			self.stressed = False
+			self.preceding_stressed = True
+		elif self.preceding_stressed:
+			if (
+				re.match(r"[^][(){}\s.!?]", data[0])
+				and not hn(self.current_tag)
+				and self.current_tag not in ["a", "code", "pre"]
+			):
+				# should match a letter or common punctuation
+				data = " " + data
+			self.preceding_stressed = False
+
+		if self.style:
+			self.style_def.update(dumb_css_parser(data))
+
+		if self.maybe_automatic_link is not None:
+			href = self.maybe_automatic_link
+			if (
+				href == data
+				and self.absolute_url_matcher.match(href)
+				and self.use_automatic_links
+			):
+				self.o("<" + data + ">")
+				self.empty_link = False
+				return
+			else:
+				self.o("[")
+				self.maybe_automatic_link = None
+				self.empty_link = False
+
+		if not self.code and not self.pre and not entity_char:
+			data = escape_md_section(data, snob=self.escape_snob)
+		self.preceding_data = data
+		self.o(data, puredata=True)
+
+	def charref(self, name: str) -> str:
+		if name[0] in ["x", "X"]:
+			c = int(name[1:], 16)
+		else:
+			c = int(name)
+
+		if not self.unicode_snob and c in unifiable_n:
+			return unifiable_n[c]
+		else:
+			try:
+				return chr(c)
+			except ValueError:  # invalid unicode
+				return ""
+
+	def entityref(self, c: str) -> str:
+		if not self.unicode_snob and c in config.UNIFIABLE:
+			return config.UNIFIABLE[c]
+		try:
+			ch = html.entities.html5[c + ";"]
+		except KeyError:
+			return "&" + c + ";"
+		return config.UNIFIABLE[c] if c == "nbsp" else ch
+
+	def google_nest_count(self, style: Dict[str, str]) -> int:
+		"""
+		Calculate the nesting count of google doc lists
+
+		:type style: dict
+
+		:rtype: int
+		"""
+		nest_count = 0
+		if "margin-left" in style:
+			nest_count = int(style["margin-left"][:-2]) // self.google_list_indent
+
+		return nest_count
+
+	def optwrap(self, text: str) -> str:
+		"""
+		Wrap all paragraphs in the provided text.
+
+		:type text: str
+
+		:rtype: str
+		"""
+		if not self.body_width:
+			return text
+
+		result = ""
+		newlines = 0
+		# I cannot think of a better solution for now.
+		# To avoid the non-wrap behaviour for entire paras
+		# because of the presence of a link in it
+		if not self.wrap_links:
+			self.inline_links = False
+		for para in text.split("\n"):
+			if len(para) > 0:
+				if not skipwrap(
+					para, self.wrap_links, self.wrap_list_items, self.wrap_tables
+				):
+					indent = ""
+					if para.startswith("  " + self.ul_item_mark):
+						# list item continuation: add a double indent to the
+						# new lines
+						indent = "    "
+					elif para.startswith("> "):
+						# blockquote continuation: add the greater than symbol
+						# to the new lines
+						indent = "> "
+					wrapped = wrap(
+						para,
+						self.body_width,
+						break_long_words=False,
+						subsequent_indent=indent,
+					)
+					result += "\n".join(wrapped)
+					if para.endswith("  "):
+						result += "  \n"
+						newlines = 1
+					elif indent:
+						result += "\n"
+						newlines = 1
+					else:
+						result += "\n\n"
+						newlines = 2
+				else:
+					# Warning for the tempted!!!
+					# Be aware that obvious replacement of this with
+					# line.isspace()
+					# DOES NOT work! Explanations are welcome.
+					if not config.RE_SPACE.match(para):
+						result += para + "\n"
+						newlines = 1
+			else:
+				if newlines < 2:
+					result += "\n"
+					newlines += 1
+		return result
 
 
 def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = None) -> str:
-    if bodywidth is None:
-        bodywidth = config.BODY_WIDTH
-    h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
+	if bodywidth is None:
+		bodywidth = config.BODY_WIDTH
+	h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
 
-    return h.handle(html)
+	return h.handle(html)
diff --git a/migration/tables/content_items.py b/migration/tables/content_items.py
index f024fe2b..78d1c3b8 100644
--- a/migration/tables/content_items.py
+++ b/migration/tables/content_items.py
@@ -39,6 +39,7 @@ def get_metadata(r):
 	metadata['createdAt'] = r.get('createdAt', ts)
 	metadata['layout'] = r['layout']
 	metadata['topics'] = [topic['slug'] for topic in r['topics']]
+	metadata['topics'].sort()
 	if r.get('cover', False):
 		metadata['cover'] = r.get('cover')
 	return metadata
@@ -80,7 +81,6 @@ def migrate(entry, users_by_oid, topics_by_oid):
 		'createdAt': entry.get('createdAt', '2016-03-05 22:22:00.350000')
 	}
 	r['slug'] = entry.get('slug', '')
-	body_orig = entry.get('body', '')
 	if not r['slug'] and entry.get('friendlySlugs') is not None:
 		r['slug'] = entry['friendlySlugs']['slug'][0]['slug']
 		if(r['slug'] is None):
@@ -94,12 +94,12 @@ def migrate(entry, users_by_oid, topics_by_oid):
 	mainTopic = topics_by_oid.get(category)
 	if mainTopic:
 		r['mainTopic'] = mainTopic["slug"]
-	topic_oids = set([category])
-	topic_oids.update(entry.get("tags", []))
+	topic_oids = [category, ]
+	taglist = entry.get("tags", [])
+	topic_oids.extend(taglist)
 	for oid in topic_oids:
 		if oid in topics_by_oid:
 			r['topics'].append(topics_by_oid[oid])
-
 	if entry.get('image') is not None:
 		r['cover'] = entry['image']['url']
 	if entry.get('thumborId') is not None:
@@ -116,7 +116,7 @@ def migrate(entry, users_by_oid, topics_by_oid):
 			else:
 				body_html = str(BeautifulSoup(
 					body_orig, features="html.parser"))
-				r['body'] = body_html # html2text(body_html)
+				r['body'] = html2text(body_html)
 		else:
 			print(r['slug'] + ': literature has no media')
 	elif entry.get('type') == 'Video':
@@ -127,17 +127,31 @@ def migrate(entry, users_by_oid, topics_by_oid):
 		if video_url == '#':
 			video_url = 'https://vimeo.com/' + vm if vm else '#'
 		if video_url == '#':
-			print(entry.get('media', 'NO MEDIA!'))
+			print(entry.get('media', 'UNKNOWN MEDIA PROVIDER!'))
 			# raise Exception
-		r['body'] = '<ShoutVideo src=\"' + video_url + \
-			'\" />' + html2text(m.get('body', ''))  # FIXME
+		therestof = html2text(m.get('body', ''))
+		r['body'] = 'import VideoPlayer from \"src/components/Article/VideoPlayer\"\n' + \
+			'<VideoPlayer src=\"'''  + video_url + '\" />\n\n' + therestof
 	elif entry.get('type') == 'Music':
-		r['body'] = '<ShoutMusic media={\"' + \
-			json.dumps(entry['media']) + '\"} />'  # FIXME
+		r['body'] = 'import MusicPlayer from \"src/components/MusicPlayer\"\n'
+		for m in entry['media']:
+			if m == { 'main': 'true' } or m == { 'main': True } or m == {}:
+				continue
+			# TODO: mark highlighted track isMain == True
+			try: r['body'] += '<MusicPlayer src=\"' + m['fileUrl'] + '\"'
+			except: print(m)
+			try: r['body'] += ' title=\"' + m['title'] + '\"'
+			except: print(m)
+			r['body'] += ' />\n\n'
+			r['body'] += html2text(m.get('body', ''))
+	elif entry.get('type') == 'Image':
+		m = r.get('media')
+		try: r['body'] = '<img src=\"' + r['cover'] + '\" />'
+		except: print(entry)
 	if r.get('body') is None:
 		body_orig = entry.get('body', '')
 		body_html = str(BeautifulSoup(body_orig, features="html.parser"))
-		r['body'] = body_html # html2text(body_html)
+		r['body'] = html2text(body_html)
 	body = r.get('body', '')
 	
 	# get author data
@@ -172,12 +186,12 @@ def migrate(entry, users_by_oid, topics_by_oid):
 		'userpic': userdata.get('userpic', '')
 	}
 	shout_dict['authors'] = [ author, ]
-	
+
 	if entry['published']:
-		metadata = get_metadata(r)
+		metadata = get_metadata(shout_dict)
 		content = frontmatter.dumps(frontmatter.Post(body, **metadata))
-		ext = 'md'
-		open('migration/content/' + r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
+		ext = 'mdx'
+		open('../discoursio-web/content/' + r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
 	try:
 		shout_dict['createdAt'] = date_parse(r.get('createdAt')) if entry.get('createdAt') else ts
 		shout_dict['publishedAt'] = date_parse(entry.get('publishedAt')) if entry.get('published') else None