diff --git a/auth/email.py b/auth/email.py index 17f4b7f7..bbccd6e6 100644 --- a/auth/email.py +++ b/auth/email.py @@ -4,25 +4,35 @@ from starlette.exceptions import HTTPException from auth.authenticate import EmailAuthenticate, ResetPassword -from settings import BACKEND_URL, MAILGUN_API_KEY, MAILGUN_DOMAIN, RESET_PWD_URL, CONFIRM_EMAIL_URL +from settings import BACKEND_URL, MAILGUN_API_KEY, MAILGUN_DOMAIN, RESET_PWD_URL, \ + CONFIRM_EMAIL_URL, ERROR_URL_ON_FRONTEND MAILGUN_API_URL = "https://api.mailgun.net/v3/%s/messages" % (MAILGUN_DOMAIN) MAILGUN_FROM = "postmaster " % (MAILGUN_DOMAIN) AUTH_URL = "%s/email_authorize" % (BACKEND_URL) +email_templates = {"confirm_email" : "", "auth_email" : "", "reset_password_email" : ""} + +def load_email_templates(): + for name in email_templates: + filename = "templates/%s.tmpl" % name + with open(filename) as f: + email_templates[name] = f.read() + print("all email templates loaded") + async def send_confirm_email(user): - text = "To confirm registration follow the link" + text = email_templates["confirm_email"] token = await EmailAuthenticate.get_email_token(user) await send_email(user, AUTH_URL, text, token) async def send_auth_email(user): - text = "To enter the site follow the link" + text = email_templates["auth_email"] token = await EmailAuthenticate.get_email_token(user) await send_email(user, AUTH_URL, text, token) async def send_reset_password_email(user): - text = "To reset password follow the link" + text = email_templates["reset_password_email"] token = await ResetPassword.get_reset_token(user) await send_email(user, RESET_PWD_URL, text, token) @@ -45,9 +55,14 @@ async def send_email(user, url, text, token): async def email_authorize(request): token = request.query_params.get('token') if not token: - raise HTTPException(500, "invalid url") + url_with_error = "%s?error=%s" % (ERROR_URL_ON_FRONTEND, "INVALID_TOKEN") + return RedirectResponse(url = url_with_error) - auth_token, user = await EmailAuthenticate.authenticate(token) + try: + auth_token, user = await EmailAuthenticate.authenticate(token) + except: + url_with_error = "%s?error=%s" % (ERROR_URL_ON_FRONTEND, "INVALID_TOKEN") + return RedirectResponse(url = url_with_error) if not user.emailConfirmed: with local_session() as session: diff --git a/migration/html2text/__init__.py b/migration/html2text/__init__.py index 4643a4e2..0e521760 100644 --- a/migration/html2text/__init__.py +++ b/migration/html2text/__init__.py @@ -12,19 +12,19 @@ from . import config from .elements import AnchorElement, ListElement from .typing import OutCallback from .utils import ( - dumb_css_parser, - element_style, - escape_md, - escape_md_section, - google_fixed_width_font, - google_has_height, - google_list_style, - google_text_emphasis, - hn, - list_numbering_start, - pad_tables_in_text, - skipwrap, - unifiable_n, + dumb_css_parser, + element_style, + escape_md, + escape_md_section, + google_fixed_width_font, + google_has_height, + google_list_style, + google_text_emphasis, + hn, + list_numbering_start, + pad_tables_in_text, + skipwrap, + unifiable_n, ) __version__ = (2020, 1, 16) @@ -35,984 +35,998 @@ __version__ = (2020, 1, 16) class HTML2Text(html.parser.HTMLParser): - def __init__( - self, - out: Optional[OutCallback] = None, - baseurl: str = "", - bodywidth: int = config.BODY_WIDTH, - ) -> None: - """ - Input parameters: - out: possible custom replacement for self.outtextf (which - appends lines of text). - baseurl: base URL of the document we process - """ - super().__init__(convert_charrefs=False) - - # Config options - self.split_next_td = False - self.td_count = 0 - self.table_start = False - self.unicode_snob = config.UNICODE_SNOB # covered in cli - self.escape_snob = config.ESCAPE_SNOB # covered in cli - self.links_each_paragraph = config.LINKS_EACH_PARAGRAPH - self.body_width = bodywidth # covered in cli - self.skip_internal_links = config.SKIP_INTERNAL_LINKS # covered in cli - self.inline_links = config.INLINE_LINKS # covered in cli - self.protect_links = config.PROTECT_LINKS # covered in cli - self.google_list_indent = config.GOOGLE_LIST_INDENT # covered in cli - self.ignore_links = config.IGNORE_ANCHORS # covered in cli - self.ignore_mailto_links = config.IGNORE_MAILTO_LINKS # covered in cli - self.ignore_images = config.IGNORE_IMAGES # covered in cli - self.images_as_html = config.IMAGES_AS_HTML # covered in cli - self.images_to_alt = config.IMAGES_TO_ALT # covered in cli - self.images_with_size = config.IMAGES_WITH_SIZE # covered in cli - self.ignore_emphasis = config.IGNORE_EMPHASIS # covered in cli - self.bypass_tables = config.BYPASS_TABLES # covered in cli - self.ignore_tables = config.IGNORE_TABLES # covered in cli - self.google_doc = False # covered in cli - self.ul_item_mark = "*" # covered in cli - self.emphasis_mark = "_" # covered in cli - self.strong_mark = "**" - self.single_line_break = config.SINGLE_LINE_BREAK # covered in cli - self.use_automatic_links = config.USE_AUTOMATIC_LINKS # covered in cli - self.hide_strikethrough = False # covered in cli - self.mark_code = config.MARK_CODE - self.wrap_list_items = config.WRAP_LIST_ITEMS # covered in cli - self.wrap_links = config.WRAP_LINKS # covered in cli - self.wrap_tables = config.WRAP_TABLES - self.pad_tables = config.PAD_TABLES # covered in cli - self.default_image_alt = config.DEFAULT_IMAGE_ALT # covered in cli - self.tag_callback = None - self.open_quote = config.OPEN_QUOTE # covered in cli - self.close_quote = config.CLOSE_QUOTE # covered in cli - self.header_id = None - self.span_hightlight = False - self.span_lead = False - - if out is None: - self.out = self.outtextf - else: - self.out = out - - # empty list to store output characters before they are "joined" - self.outtextlist = [] # type: List[str] - - self.quiet = 0 - self.p_p = 0 # number of newline character to print before next output - self.outcount = 0 - self.start = True - self.space = False - self.a = [] # type: List[AnchorElement] - self.astack = [] # type: List[Optional[Dict[str, Optional[str]]]] - self.maybe_automatic_link = None # type: Optional[str] - self.empty_link = False - self.absolute_url_matcher = re.compile(r"^[a-zA-Z+]+://") - self.acount = 0 - self.list = [] # type: List[ListElement] - self.blockquote = 0 - self.pre = False - self.startpre = False - self.code = False - self.quote = False - self.br_toggle = "" - self.lastWasNL = False - self.lastWasList = False - self.style = 0 - self.style_def = {} # type: Dict[str, Dict[str, str]] - self.tag_stack = ( - [] - ) # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]] - self.emphasis = 0 - self.drop_white_space = 0 - self.inheader = False - # Current abbreviation definition - self.abbr_title = None # type: Optional[str] - # Last inner HTML (for abbr being defined) - self.abbr_data = None # type: Optional[str] - # Stack of abbreviations to write later - self.abbr_list = {} # type: Dict[str, str] - self.baseurl = baseurl - self.stressed = False - self.preceding_stressed = False - self.preceding_data = "" - self.current_tag = "" - - config.UNIFIABLE["nbsp"] = " _place_holder;" - - def feed(self, data: str) -> None: - data = data.replace("", "") - super().feed(data) - - def handle(self, data: str) -> str: - self.feed(data) - self.feed("") - markdown = self.optwrap(self.finish()) - if self.pad_tables: - return pad_tables_in_text(markdown) - else: - return markdown - - def outtextf(self, s: str) -> None: - self.outtextlist.append(s) - if s: - self.lastWasNL = s[-1] == "\n" - - def finish(self) -> str: - self.close() - - self.pbr() - self.o("", force="end") - - outtext = "".join(self.outtextlist) - - if self.unicode_snob: - nbsp = html.entities.html5["nbsp;"] - else: - nbsp = " " - outtext = outtext.replace(" _place_holder;", nbsp) - - # Clear self.outtextlist to avoid memory leak of its content to - # the next handling. - self.outtextlist = [] - - return outtext - - def handle_charref(self, c: str) -> None: - self.handle_data(self.charref(c), True) - - def handle_entityref(self, c: str) -> None: - ref = self.entityref(c) - - # ref may be an empty string (e.g. for ‎/‏ markers that should - # not contribute to the final output). - # self.handle_data cannot handle a zero-length string right after a - # stressed tag or mid-text within a stressed tag (text get split and - # self.stressed/self.preceding_stressed gets switched after the first - # part of that text). - if ref: - self.handle_data(ref, True) - - def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None: - self.handle_tag(tag, dict(attrs), start=True) - - def handle_endtag(self, tag: str) -> None: - self.handle_tag(tag, {}, start=False) - - def previousIndex(self, attrs: Dict[str, Optional[str]]) -> Optional[int]: - """ - :type attrs: dict - - :returns: The index of certain set of attributes (of a link) in the - self.a list. If the set of attributes is not found, returns None - :rtype: int - """ - if "href" not in attrs: - return None - - match = False - for i, a in enumerate(self.a): - if "href" in a.attrs and a.attrs["href"] == attrs["href"]: - if "title" in a.attrs or "title" in attrs: - if ( - "title" in a.attrs - and "title" in attrs - and a.attrs["title"] == attrs["title"] - ): - match = True - else: - match = True - - if match: - return i - return None - - def handle_emphasis( - self, start: bool, tag_style: Dict[str, str], parent_style: Dict[str, str] - ) -> None: - """ - Handles various text emphases - """ - tag_emphasis = google_text_emphasis(tag_style) - parent_emphasis = google_text_emphasis(parent_style) - - # handle Google's text emphasis - strikethrough = "line-through" in tag_emphasis and self.hide_strikethrough - - # google and others may mark a font's weight as `bold` or `700` - bold = False - for bold_marker in config.BOLD_TEXT_STYLE_VALUES: - bold = bold_marker in tag_emphasis and bold_marker not in parent_emphasis - if bold: - break - - italic = "italic" in tag_emphasis and "italic" not in parent_emphasis - fixed = ( - google_fixed_width_font(tag_style) - and not google_fixed_width_font(parent_style) - and not self.pre - ) - - if start: - # crossed-out text must be handled before other attributes - # in order not to output qualifiers unnecessarily - if bold or italic or fixed: - self.emphasis += 1 - if strikethrough: - self.quiet += 1 - if italic: - self.o(self.emphasis_mark) - self.drop_white_space += 1 - if bold: - self.o(self.strong_mark) - self.drop_white_space += 1 - if fixed: - self.o("`") - self.drop_white_space += 1 - self.code = True - else: - if bold or italic or fixed: - # there must not be whitespace before closing emphasis mark - self.emphasis -= 1 - self.space = False - if fixed: - if self.drop_white_space: - # empty emphasis, drop it - self.drop_white_space -= 1 - else: - self.o("`") - self.code = False - if bold: - if self.drop_white_space: - # empty emphasis, drop it - self.drop_white_space -= 1 - else: - self.o(self.strong_mark) - if italic: - if self.drop_white_space: - # empty emphasis, drop it - self.drop_white_space -= 1 - else: - self.o(self.emphasis_mark) - # space is only allowed after *all* emphasis marks - if (bold or italic) and not self.emphasis: - self.o(" ") - if strikethrough: - self.quiet -= 1 - - def handle_tag( - self, tag: str, attrs: Dict[str, Optional[str]], start: bool - ) -> None: - self.current_tag = tag - - if self.tag_callback is not None: - if self.tag_callback(self, tag, attrs, start) is True: - return - - # first thing inside the anchor tag is another tag - # that produces some output - if ( - start - and self.maybe_automatic_link is not None - and tag not in ["p", "div", "style", "dl", "dt"] - and (tag != "img" or self.ignore_images) - ): - self.o("[") - self.maybe_automatic_link = None - self.empty_link = False - - if self.google_doc: - # the attrs parameter is empty for a closing tag. in addition, we - # need the attributes of the parent nodes in order to get a - # complete style description for the current element. we assume - # that google docs export well formed html. - parent_style = {} # type: Dict[str, str] - if start: - if self.tag_stack: - parent_style = self.tag_stack[-1][2] - tag_style = element_style(attrs, self.style_def, parent_style) - self.tag_stack.append((tag, attrs, tag_style)) - else: - dummy, attrs, tag_style = ( - self.tag_stack.pop() if self.tag_stack else (None, {}, {}) - ) - if self.tag_stack: - parent_style = self.tag_stack[-1][2] - - if hn(tag): - # check if nh is inside of an 'a' tag (incorrect but found in the wild) - if self.astack: - if start: - self.inheader = True - # are inside link name, so only add '#' if it can appear before '[' - if self.outtextlist and self.outtextlist[-1] == "[": - self.outtextlist.pop() - self.space = False - self.o(hn(tag) * "#" + " ") - self.o("[") - self.header_id = attrs.get('id') - else: - self.p() - if start: - self.inheader = True - self.o(hn(tag) * "#" + " ") - if self.header_id: - self.o(' {#' + self.header_id + '}') - self.header_id = None - else: - self.inheader = False - return # prevent redundant emphasis marks on headers - - if tag == 'span': - if start and 'class' in attrs: - if attrs['class'] == 'highlight': - self.o('`') # NOTE: same as - self.span_hightlight = True - elif attrs['class'] == 'lead': - if self.span_lead == False: - self.o('==\n') # NOTE: but CriticMarkup uses {== ==} - self.span_lead = True - else: - if self.span_hightlight: - self.o('`') - self.span_hightlight = False - elif self.span_lead: - if self.span_lead == True: - self.o('\n==') - self.span_lead = False - - if tag in ["p", "div"]: - if self.google_doc: - if start and google_has_height(tag_style): - self.p() - else: - self.soft_br() - elif self.astack: - pass - else: - self.p() - - if tag == "br" and start: - if self.blockquote > 0: - self.o(" \n> ") - else: - self.o(" \n") - - if tag == "hr" and start: - self.p() - self.o("* * *") - self.p() - - if tag in ["head", "style", "script"]: - if start: - self.quiet += 1 - else: - self.quiet -= 1 - - if tag == "style": - if start: - self.style += 1 - else: - self.style -= 1 - - if tag in ["body"]: - self.quiet = 0 # sites like 9rules.com never close - - if tag == "blockquote": - if start: - self.p() - self.o("> ", force=True) - self.start = True - self.blockquote += 1 - else: - self.blockquote -= 1 - self.p() - - if tag in ["em", "i", "u"] and not self.ignore_emphasis: - # Separate with a space if we immediately follow an alphanumeric - # character, since otherwise Markdown won't render the emphasis - # marks, and we'll be left with eg 'foo_bar_' visible. - # (Don't add a space otherwise, though, since there isn't one in the - # original HTML.) - if ( - start - and self.preceding_data - and self.preceding_data[-1] not in string.whitespace - and self.preceding_data[-1] not in string.punctuation - ): - emphasis = " " + self.emphasis_mark - self.preceding_data += " " - else: - emphasis = self.emphasis_mark - - self.o(emphasis) - if start: - self.stressed = True - - if tag in ["strong", "b"] and not self.ignore_emphasis: - # Separate with space if we immediately follow an * character, since - # without it, Markdown won't render the resulting *** correctly. - # (Don't add a space otherwise, though, since there isn't one in the - # original HTML.) - if ( - start - and self.preceding_data - and self.preceding_data[-1] == self.strong_mark[0] - ): - strong = " " + self.strong_mark - self.preceding_data += " " - else: - strong = self.strong_mark - - self.o(strong) - if start: - self.stressed = True - - if tag in ["del", "strike", "s"]: - if start and self.preceding_data and self.preceding_data[-1] == "~": - strike = " ~~" - self.preceding_data += " " - else: - strike = "~~" - - self.o(strike) - if start: - self.stressed = True - - if self.google_doc: - if not self.inheader: - # handle some font attributes, but leave headers clean - self.handle_emphasis(start, tag_style, parent_style) - - if tag in ["kbd", "code", "tt"] and not self.pre: - self.o("`") # TODO: `` `this` `` - self.code = not self.code - - if tag == "abbr": - if start: - self.abbr_title = None - self.abbr_data = "" - if "title" in attrs: - self.abbr_title = attrs["title"] - else: - if self.abbr_title is not None: - assert self.abbr_data is not None - self.abbr_list[self.abbr_data] = self.abbr_title - self.abbr_title = None - self.abbr_data = None - - if tag == "q": - if not self.quote: - self.o(self.open_quote) - else: - self.o(self.close_quote) - self.quote = not self.quote - - def link_url(self: HTML2Text, link: str, title: str = "") -> None: - url = urlparse.urljoin(self.baseurl, link) - title = ' "{}"'.format(title) if title.strip() else "" - self.o("]({url}{title})".format(url=escape_md(url), title=title)) - - if tag == "a" and not self.ignore_links: - if start: - if ( - "href" in attrs - and attrs["href"] is not None - and not (self.skip_internal_links and attrs["href"].startswith("#")) - and not ( - self.ignore_mailto_links and attrs["href"].startswith("mailto:") - ) - ): - self.astack.append(attrs) - self.maybe_automatic_link = attrs["href"] - self.empty_link = True - if self.protect_links: - attrs["href"] = "<" + attrs["href"] + ">" - else: - self.astack.append(None) - else: - if self.astack: - a = self.astack.pop() - if self.maybe_automatic_link and not self.empty_link: - self.maybe_automatic_link = None - elif a: - assert a["href"] is not None - if self.empty_link: - self.o("[") - self.empty_link = False - self.maybe_automatic_link = None - if self.inline_links: - self.p_p = 0 - title = a.get("title") or "" - title = escape_md(title) - link_url(self, a["href"], title) - else: - i = self.previousIndex(a) - if i is not None: - a_props = self.a[i] - else: - self.acount += 1 - a_props = AnchorElement(a, self.acount, self.outcount) - self.a.append(a_props) - self.o("][" + str(a_props.count) + "]") - - if tag == "img" and start and not self.ignore_images: - if "src" in attrs: - assert attrs["src"] is not None - if not self.images_to_alt: - attrs["href"] = attrs["src"] - alt = attrs.get("alt") or self.default_image_alt - - # If we have images_with_size, write raw html including width, - # height, and alt attributes - if self.images_as_html or ( - self.images_with_size and ("width" in attrs or "height" in attrs) - ): - self.o("") - return - - # If we have a link to create, output the start - if self.maybe_automatic_link is not None: - href = self.maybe_automatic_link - if ( - self.images_to_alt - and escape_md(alt) == href - and self.absolute_url_matcher.match(href) - ): - self.o("<" + escape_md(alt) + ">") - self.empty_link = False - return - else: - self.o("[") - self.maybe_automatic_link = None - self.empty_link = False - - # If we have images_to_alt, we discard the image itself, - # considering only the alt text. - if self.images_to_alt: - self.o(escape_md(alt)) - else: - self.o("![" + escape_md(alt) + "]") - if self.inline_links: - href = attrs.get("href") or "" - self.o( - "(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")" - ) - else: - i = self.previousIndex(attrs) - if i is not None: - a_props = self.a[i] - else: - self.acount += 1 - a_props = AnchorElement(attrs, self.acount, self.outcount) - self.a.append(a_props) - self.o("[" + str(a_props.count) + "]") - - if tag == "dl" and start: - self.p() - if tag == "dt" and not start: - self.pbr() - if tag == "dd" and start: - self.o(" ") - if tag == "dd" and not start: - self.pbr() - - if tag in ["ol", "ul"]: - # Google Docs create sub lists as top level lists - if not self.list and not self.lastWasList: - self.p() - if start: - if self.google_doc: - list_style = google_list_style(tag_style) - else: - list_style = tag - numbering_start = list_numbering_start(attrs) - self.list.append(ListElement(list_style, numbering_start)) - else: - if self.list: - self.list.pop() - if not self.google_doc and not self.list: - self.o("\n") - self.lastWasList = True - else: - self.lastWasList = False - - if tag == "li": - self.pbr() - if start: - if self.list: - li = self.list[-1] - else: - li = ListElement("ul", 0) - if self.google_doc: - self.o(" " * self.google_nest_count(tag_style)) - else: - # Indent two spaces per list, except use three spaces for an - # unordered list inside an ordered list. - # https://spec.commonmark.org/0.28/#motivation - # TODO: line up
  1. s > 9 correctly. - parent_list = None - for list in self.list: - self.o( - " " if parent_list == "ol" and list.name == "ul" else " " - ) - parent_list = list.name - - if li.name == "ul": - self.o(self.ul_item_mark + " ") - elif li.name == "ol": - li.num += 1 - self.o(str(li.num) + ". ") - self.start = True - - if tag in ["table", "tr", "td", "th"]: - if self.ignore_tables: - if tag == "tr": - if start: - pass - else: - self.soft_br() - else: - pass - - elif self.bypass_tables: - if start: - self.soft_br() - if tag in ["td", "th"]: - if start: - self.o("<{}>\n\n".format(tag)) - else: - self.o("\n".format(tag)) - else: - if start: - self.o("<{}>".format(tag)) - else: - self.o("".format(tag)) - - else: - if tag == "table": - if start: - self.table_start = True - if self.pad_tables: - self.o("<" + config.TABLE_MARKER_FOR_PAD + ">") - self.o(" \n") - else: - if self.pad_tables: - # add break in case the table is empty or its 1 row table - self.soft_br() - self.o("") - self.o(" \n") - if tag in ["td", "th"] and start: - if self.split_next_td: - self.o("| ") - self.split_next_td = True - - if tag == "tr" and start: - self.td_count = 0 - if tag == "tr" and not start: - self.split_next_td = False - self.soft_br() - if tag == "tr" and not start and self.table_start: - # Underline table header - self.o("|".join(["---"] * self.td_count)) - self.soft_br() - self.table_start = False - if tag in ["td", "th"] and start: - self.td_count += 1 - - if tag == "pre": - if start: - self.startpre = True - self.pre = True - else: - self.pre = False - if self.mark_code: - self.out("\n[/code]") - self.p() - - # TODO: Add docstring for these one letter functions - def pbr(self) -> None: - "Pretty print has a line break" - if self.p_p == 0: - self.p_p = 1 - - def p(self) -> None: - "Set pretty print to 1 or 2 lines" - self.p_p = 1 if self.single_line_break else 2 - - def soft_br(self) -> None: - "Soft breaks" - self.pbr() - self.br_toggle = " " - - def o( - self, data: str, puredata: bool = False, force: Union[bool, str] = False - ) -> None: - """ - Deal with indentation and whitespace - """ - if self.abbr_data is not None: - self.abbr_data += data - - if not self.quiet: - if self.google_doc: - # prevent white space immediately after 'begin emphasis' - # marks ('**' and '_') - lstripped_data = data.lstrip() - if self.drop_white_space and not (self.pre or self.code): - data = lstripped_data - if lstripped_data != "": - self.drop_white_space = 0 - - if puredata and not self.pre: - # This is a very dangerous call ... it could mess up - # all handling of   when not handled properly - # (see entityref) - data = re.sub(r"\s+", r" ", data) - if data and data[0] == " ": - self.space = True - data = data[1:] - if not data and not force: - return - - if self.startpre: - # self.out(" :") #TODO: not output when already one there - if not data.startswith("\n") and not data.startswith("\r\n"): - #
    stuff...
    -                    data = "\n" + data
    -                if self.mark_code:
    -                    self.out("\n[code]")
    -                    self.p_p = 0
    -
    -            bq = ">" * self.blockquote
    -            if not (force and data and data[0] == ">") and self.blockquote:
    -                bq += " "
    -
    -            if self.pre:
    -                if not self.list:
    -                    bq += "    "
    -                # else: list content is already partially indented
    -                bq += "    " * len(self.list)
    -                data = data.replace("\n", "\n" + bq)
    -
    -            if self.startpre:
    -                self.startpre = False
    -                if self.list:
    -                    # use existing initial indentation
    -                    data = data.lstrip("\n")
    -
    -            if self.start:
    -                self.space = False
    -                self.p_p = 0
    -                self.start = False
    -
    -            if force == "end":
    -                # It's the end.
    -                self.p_p = 0
    -                self.out("\n")
    -                self.space = False
    -
    -            if self.p_p:
    -                self.out((self.br_toggle + "\n" + bq) * self.p_p)
    -                self.space = False
    -                self.br_toggle = ""
    -
    -            if self.space:
    -                if not self.lastWasNL:
    -                    self.out(" ")
    -                self.space = False
    -
    -            if self.a and (
    -                (self.p_p == 2 and self.links_each_paragraph) or force == "end"
    -            ):
    -                if force == "end":
    -                    self.out("\n")
    -
    -                newa = []
    -                for link in self.a:
    -                    if self.outcount > link.outcount:
    -                        self.out(
    -                            "   ["
    -                            + str(link.count)
    -                            + "]: "
    -                            + urlparse.urljoin(self.baseurl, link.attrs["href"])
    -                        )
    -                        if "title" in link.attrs:
    -                            assert link.attrs["title"] is not None
    -                            self.out(" (" + link.attrs["title"] + ")")
    -                        self.out("\n")
    -                    else:
    -                        newa.append(link)
    -
    -                # Don't need an extra line when nothing was done.
    -                if self.a != newa:
    -                    self.out("\n")
    -
    -                self.a = newa
    -
    -            if self.abbr_list and force == "end":
    -                for abbr, definition in self.abbr_list.items():
    -                    self.out("  *[" + abbr + "]: " + definition + "\n")
    -
    -            self.p_p = 0
    -            self.out(data)
    -            self.outcount += 1
    -
    -    def handle_data(self, data: str, entity_char: bool = False) -> None:
    -        if not data:
    -            # Data may be empty for some HTML entities. For example,
    -            # LEFT-TO-RIGHT MARK.
    -            return
    -
    -        if self.stressed:
    -            data = data.strip()
    -            self.stressed = False
    -            self.preceding_stressed = True
    -        elif self.preceding_stressed:
    -            if (
    -                re.match(r"[^][(){}\s.!?]", data[0])
    -                and not hn(self.current_tag)
    -                and self.current_tag not in ["a", "code", "pre"]
    -            ):
    -                # should match a letter or common punctuation
    -                data = " " + data
    -            self.preceding_stressed = False
    -
    -        if self.style:
    -            self.style_def.update(dumb_css_parser(data))
    -
    -        if self.maybe_automatic_link is not None:
    -            href = self.maybe_automatic_link
    -            if (
    -                href == data
    -                and self.absolute_url_matcher.match(href)
    -                and self.use_automatic_links
    -            ):
    -                self.o("<" + data + ">")
    -                self.empty_link = False
    -                return
    -            else:
    -                self.o("[")
    -                self.maybe_automatic_link = None
    -                self.empty_link = False
    -
    -        if not self.code and not self.pre and not entity_char:
    -            data = escape_md_section(data, snob=self.escape_snob)
    -        self.preceding_data = data
    -        self.o(data, puredata=True)
    -
    -    def charref(self, name: str) -> str:
    -        if name[0] in ["x", "X"]:
    -            c = int(name[1:], 16)
    -        else:
    -            c = int(name)
    -
    -        if not self.unicode_snob and c in unifiable_n:
    -            return unifiable_n[c]
    -        else:
    -            try:
    -                return chr(c)
    -            except ValueError:  # invalid unicode
    -                return ""
    -
    -    def entityref(self, c: str) -> str:
    -        if not self.unicode_snob and c in config.UNIFIABLE:
    -            return config.UNIFIABLE[c]
    -        try:
    -            ch = html.entities.html5[c + ";"]
    -        except KeyError:
    -            return "&" + c + ";"
    -        return config.UNIFIABLE[c] if c == "nbsp" else ch
    -
    -    def google_nest_count(self, style: Dict[str, str]) -> int:
    -        """
    -        Calculate the nesting count of google doc lists
    -
    -        :type style: dict
    -
    -        :rtype: int
    -        """
    -        nest_count = 0
    -        if "margin-left" in style:
    -            nest_count = int(style["margin-left"][:-2]) // self.google_list_indent
    -
    -        return nest_count
    -
    -    def optwrap(self, text: str) -> str:
    -        """
    -        Wrap all paragraphs in the provided text.
    -
    -        :type text: str
    -
    -        :rtype: str
    -        """
    -        if not self.body_width:
    -            return text
    -
    -        result = ""
    -        newlines = 0
    -        # I cannot think of a better solution for now.
    -        # To avoid the non-wrap behaviour for entire paras
    -        # because of the presence of a link in it
    -        if not self.wrap_links:
    -            self.inline_links = False
    -        for para in text.split("\n"):
    -            if len(para) > 0:
    -                if not skipwrap(
    -                    para, self.wrap_links, self.wrap_list_items, self.wrap_tables
    -                ):
    -                    indent = ""
    -                    if para.startswith("  " + self.ul_item_mark):
    -                        # list item continuation: add a double indent to the
    -                        # new lines
    -                        indent = "    "
    -                    elif para.startswith("> "):
    -                        # blockquote continuation: add the greater than symbol
    -                        # to the new lines
    -                        indent = "> "
    -                    wrapped = wrap(
    -                        para,
    -                        self.body_width,
    -                        break_long_words=False,
    -                        subsequent_indent=indent,
    -                    )
    -                    result += "\n".join(wrapped)
    -                    if para.endswith("  "):
    -                        result += "  \n"
    -                        newlines = 1
    -                    elif indent:
    -                        result += "\n"
    -                        newlines = 1
    -                    else:
    -                        result += "\n\n"
    -                        newlines = 2
    -                else:
    -                    # Warning for the tempted!!!
    -                    # Be aware that obvious replacement of this with
    -                    # line.isspace()
    -                    # DOES NOT work! Explanations are welcome.
    -                    if not config.RE_SPACE.match(para):
    -                        result += para + "\n"
    -                        newlines = 1
    -            else:
    -                if newlines < 2:
    -                    result += "\n"
    -                    newlines += 1
    -        return result
    +	def __init__(
    +		self,
    +		out: Optional[OutCallback] = None,
    +		baseurl: str = "",
    +		bodywidth: int = config.BODY_WIDTH,
    +	) -> None:
    +		"""
    +		Input parameters:
    +			out: possible custom replacement for self.outtextf (which
    +			appends lines of text).
    +			baseurl: base URL of the document we process
    +		"""
    +		super().__init__(convert_charrefs=False)
    +
    +		# Config options
    +		self.split_next_td = False
    +		self.td_count = 0
    +		self.table_start = False
    +		self.unicode_snob = config.UNICODE_SNOB  # covered in cli
    +		self.escape_snob = config.ESCAPE_SNOB  # covered in cli
    +		self.links_each_paragraph = config.LINKS_EACH_PARAGRAPH
    +		self.body_width = bodywidth  # covered in cli
    +		self.skip_internal_links = config.SKIP_INTERNAL_LINKS  # covered in cli
    +		self.inline_links = config.INLINE_LINKS  # covered in cli
    +		self.protect_links = config.PROTECT_LINKS  # covered in cli
    +		self.google_list_indent = config.GOOGLE_LIST_INDENT  # covered in cli
    +		self.ignore_links = config.IGNORE_ANCHORS  # covered in cli
    +		self.ignore_mailto_links = config.IGNORE_MAILTO_LINKS  # covered in cli
    +		self.ignore_images = config.IGNORE_IMAGES  # covered in cli
    +		self.images_as_html = config.IMAGES_AS_HTML  # covered in cli
    +		self.images_to_alt = config.IMAGES_TO_ALT  # covered in cli
    +		self.images_with_size = config.IMAGES_WITH_SIZE  # covered in cli
    +		self.ignore_emphasis = config.IGNORE_EMPHASIS  # covered in cli
    +		self.bypass_tables = config.BYPASS_TABLES  # covered in cli
    +		self.ignore_tables = config.IGNORE_TABLES  # covered in cli
    +		self.google_doc = False  # covered in cli
    +		self.ul_item_mark = "*"  # covered in cli
    +		self.emphasis_mark = "_"  # covered in cli
    +		self.strong_mark = "**"
    +		self.single_line_break = config.SINGLE_LINE_BREAK  # covered in cli
    +		self.use_automatic_links = config.USE_AUTOMATIC_LINKS  # covered in cli
    +		self.hide_strikethrough = False  # covered in cli
    +		self.mark_code = config.MARK_CODE
    +		self.wrap_list_items = config.WRAP_LIST_ITEMS  # covered in cli
    +		self.wrap_links = config.WRAP_LINKS  # covered in cli
    +		self.wrap_tables = config.WRAP_TABLES
    +		self.pad_tables = config.PAD_TABLES  # covered in cli
    +		self.default_image_alt = config.DEFAULT_IMAGE_ALT  # covered in cli
    +		self.tag_callback = None
    +		self.open_quote = config.OPEN_QUOTE  # covered in cli
    +		self.close_quote = config.CLOSE_QUOTE  # covered in cli
    +		self.header_id = None
    +		self.span_highlight = False
    +		self.span_lead = False
    +
    +		if out is None:
    +			self.out = self.outtextf
    +		else:
    +			self.out = out
    +
    +		# empty list to store output characters before they are "joined"
    +		self.outtextlist = []  # type: List[str]
    +
    +		self.quiet = 0
    +		self.p_p = 0  # number of newline character to print before next output
    +		self.outcount = 0
    +		self.start = True
    +		self.space = False
    +		self.a = []  # type: List[AnchorElement]
    +		self.astack = []  # type: List[Optional[Dict[str, Optional[str]]]]
    +		self.maybe_automatic_link = None  # type: Optional[str]
    +		self.empty_link = False
    +		self.absolute_url_matcher = re.compile(r"^[a-zA-Z+]+://")
    +		self.acount = 0
    +		self.list = []  # type: List[ListElement]
    +		self.blockquote = 0
    +		self.pre = False
    +		self.startpre = False
    +		self.code = False
    +		self.quote = False
    +		self.br_toggle = ""
    +		self.lastWasNL = False
    +		self.lastWasList = False
    +		self.style = 0
    +		self.style_def = {}  # type: Dict[str, Dict[str, str]]
    +		self.tag_stack = (
    +			[]
    +		)  # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
    +		self.emphasis = 0
    +		self.drop_white_space = 0
    +		self.inheader = False
    +		# Current abbreviation definition
    +		self.abbr_title = None  # type: Optional[str]
    +		# Last inner HTML (for abbr being defined)
    +		self.abbr_data = None  # type: Optional[str]
    +		# Stack of abbreviations to write later
    +		self.abbr_list = {}  # type: Dict[str, str]
    +		self.baseurl = baseurl
    +		self.stressed = False
    +		self.preceding_stressed = False
    +		self.preceding_data = ""
    +		self.current_tag = ""
    +		self.current_class = ""
    +
    +		config.UNIFIABLE["nbsp"] = " _place_holder;"
    +
    +	def feed(self, data: str) -> None:
    +		data = data.replace("", "")
    +		super().feed(data)
    +
    +	def handle(self, data: str) -> str:
    +		self.feed(data)
    +		self.feed("")
    +		markdown = self.optwrap(self.finish())
    +		if self.pad_tables:
    +			return pad_tables_in_text(markdown)
    +		else:
    +			return markdown
    +
    +	def outtextf(self, s: str) -> None:
    +		self.outtextlist.append(s)
    +		if s:
    +			self.lastWasNL = s[-1] == "\n"
    +
    +	def finish(self) -> str:
    +		self.close()
    +
    +		self.pbr()
    +		self.o("", force="end")
    +
    +		outtext = "".join(self.outtextlist)
    +
    +		if self.unicode_snob:
    +			nbsp = html.entities.html5["nbsp;"]
    +		else:
    +			nbsp = " "
    +		outtext = outtext.replace(" _place_holder;", nbsp)
    +
    +		# Clear self.outtextlist to avoid memory leak of its content to
    +		# the next handling.
    +		self.outtextlist = []
    +
    +		return outtext
    +
    +	def handle_charref(self, c: str) -> None:
    +		self.handle_data(self.charref(c), True)
    +
    +	def handle_entityref(self, c: str) -> None:
    +		ref = self.entityref(c)
    +
    +		# ref may be an empty string (e.g. for ‎/‏ markers that should
    +		# not contribute to the final output).
    +		# self.handle_data cannot handle a zero-length string right after a
    +		# stressed tag or mid-text within a stressed tag (text get split and
    +		# self.stressed/self.preceding_stressed gets switched after the first
    +		# part of that text).
    +		if ref:
    +			self.handle_data(ref, True)
    +
    +	def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
    +		self.handle_tag(tag, dict(attrs), start=True)
    +
    +	def handle_endtag(self, tag: str) -> None:
    +		self.handle_tag(tag, {}, start=False)
    +
    +	def previousIndex(self, attrs: Dict[str, Optional[str]]) -> Optional[int]:
    +		"""
    +		:type attrs: dict
    +
    +		:returns: The index of certain set of attributes (of a link) in the
    +		self.a list. If the set of attributes is not found, returns None
    +		:rtype: int
    +		"""
    +		if "href" not in attrs:
    +			return None
    +
    +		match = False
    +		for i, a in enumerate(self.a):
    +			if "href" in a.attrs and a.attrs["href"] == attrs["href"]:
    +				if "title" in a.attrs or "title" in attrs:
    +					if (
    +						"title" in a.attrs
    +						and "title" in attrs
    +						and a.attrs["title"] == attrs["title"]
    +					):
    +						match = True
    +				else:
    +					match = True
    +
    +			if match:
    +				return i
    +		return None
    +
    +	def handle_emphasis(
    +		self, start: bool, tag_style: Dict[str, str], parent_style: Dict[str, str]
    +	) -> None:
    +		"""
    +		Handles various text emphases
    +		"""
    +		tag_emphasis = google_text_emphasis(tag_style)
    +		parent_emphasis = google_text_emphasis(parent_style)
    +
    +		# handle Google's text emphasis
    +		strikethrough = "line-through" in tag_emphasis and self.hide_strikethrough
    +
    +		# google and others may mark a font's weight as `bold` or `700`
    +		bold = False
    +		for bold_marker in config.BOLD_TEXT_STYLE_VALUES:
    +			bold = bold_marker in tag_emphasis and bold_marker not in parent_emphasis
    +			if bold:
    +				break
    +
    +		italic = "italic" in tag_emphasis and "italic" not in parent_emphasis
    +		fixed = (
    +			google_fixed_width_font(tag_style)
    +			and not google_fixed_width_font(parent_style)
    +			and not self.pre
    +		)
    +
    +		if start:
    +			# crossed-out text must be handled before other attributes
    +			# in order not to output qualifiers unnecessarily
    +			if bold or italic or fixed:
    +				self.emphasis += 1
    +			if strikethrough:
    +				self.quiet += 1
    +			if italic:
    +				self.o(self.emphasis_mark)
    +				self.drop_white_space += 1
    +			if bold:
    +				self.o(self.strong_mark)
    +				self.drop_white_space += 1
    +			if fixed:
    +				self.o("`")
    +				self.drop_white_space += 1
    +				self.code = True
    +		else:
    +			if bold or italic or fixed:
    +				# there must not be whitespace before closing emphasis mark
    +				self.emphasis -= 1
    +				self.space = False
    +			if fixed:
    +				if self.drop_white_space:
    +					# empty emphasis, drop it
    +					self.drop_white_space -= 1
    +				else:
    +					self.o("`")
    +				self.code = False
    +			if bold:
    +				if self.drop_white_space:
    +					# empty emphasis, drop it
    +					self.drop_white_space -= 1
    +				else:
    +					self.o(self.strong_mark)
    +			if italic:
    +				if self.drop_white_space:
    +					# empty emphasis, drop it
    +					self.drop_white_space -= 1
    +				else:
    +					self.o(self.emphasis_mark)
    +			# space is only allowed after *all* emphasis marks
    +			if (bold or italic) and not self.emphasis:
    +				self.o(" ")
    +			if strikethrough:
    +				self.quiet -= 1
    +
    +	def handle_tag(
    +		self, tag: str, attrs: Dict[str, Optional[str]], start: bool
    +	) -> None:
    +		self.current_tag = tag
    +
    +		if self.tag_callback is not None:
    +			if self.tag_callback(self, tag, attrs, start) is True:
    +				return
    +
    +		# first thing inside the anchor tag is another tag
    +		# that produces some output
    +		if (
    +			start
    +			and self.maybe_automatic_link is not None
    +			and tag not in ["p", "div", "style", "dl", "dt"]
    +			and (tag != "img" or self.ignore_images)
    +		):
    +			self.o("[")
    +			self.maybe_automatic_link = None
    +			self.empty_link = False
    +
    +		if self.google_doc:
    +			# the attrs parameter is empty for a closing tag. in addition, we
    +			# need the attributes of the parent nodes in order to get a
    +			# complete style description for the current element. we assume
    +			# that google docs export well formed html.
    +			parent_style = {}  # type: Dict[str, str]
    +			if start:
    +				if self.tag_stack:
    +					parent_style = self.tag_stack[-1][2]
    +				tag_style = element_style(attrs, self.style_def, parent_style)
    +				self.tag_stack.append((tag, attrs, tag_style))
    +			else:
    +				dummy, attrs, tag_style = (
    +					self.tag_stack.pop() if self.tag_stack else (None, {}, {})
    +				)
    +				if self.tag_stack:
    +					parent_style = self.tag_stack[-1][2]
    +
    +		if hn(tag):
    +			# check if nh is inside of an 'a' tag 
    +			# (incorrect but found in the wild)
    +			if self.astack:
    +				if start:
    +					self.inheader = True
    +					# are inside link name, so only add '#' if it can appear before '['
    +					if self.outtextlist and self.outtextlist[-1] == "[":
    +						self.outtextlist.pop()
    +						self.space = False
    +						self.o(hn(tag) * "#" + " ")
    +						self.o("[")
    +						self.header_id = attrs.get('id')
    +			else:
    +				self.p()
    +				if start:
    +					self.inheader = True
    +					self.o(hn(tag) * "#" + " ")
    +					if self.header_id: 
    +						self.o(' {#' + self.header_id + '}')
    +						self.header_id = None
    +				else:
    +					self.inheader = False
    +					return  # prevent redundant emphasis marks on headers
    +		if 'class' in attrs:
    +			self.current_class = attrs.get('class')
    +			# self.p()
    +			if not start:
    +				self.current_class = ''
    +		if 'style' in attrs:
    +			if attrs.get('style') == 'text-align: center':
    +				self.current_class = 'center'
    +			if not start:
    +				self.current_class = ''
    +		if tag == 'span':
    +			if start:
    +					if self.current_class == 'highlight' and \
    +						self.inheader == False and \
    +						self.span_lead == False and \
    +						self.astack == False:
    +							self.o('`') # NOTE: same as 
    +							self.span_highlight = True
    +					elif self.current_class == 'lead' and \
    +						self.inheader == False and \
    +						self.span_highlight == False:
    +							self.o('==') # NOTE: but CriticMarkup uses {== ==}
    +							self.span_lead = True
    +			else:
    +				if self.span_highlight:
    +					self.o('`')
    +					self.span_highlight = False
    +				elif self.span_lead:
    +						self.o('==')
    +						self.span_lead = False
    +
    +		if tag in ["p", "div"]:
    +			if self.google_doc:
    +				if start and google_has_height(tag_style):
    +					self.p()
    +				else:
    +					self.soft_br()
    +			elif self.astack:
    +				pass
    +			else:
    +				self.p()
    +
    +		if tag == "br" and start:
    +			if self.blockquote > 0:
    +				self.o("  \n> ")
    +			else:
    +				self.o("  \n")
    +
    +		if tag == "hr" and start:
    +			self.p()
    +			self.o("* * *")
    +			self.p()
    +
    +		if tag in ["head", "style", "script"]:
    +			if start:
    +				self.quiet += 1
    +			else:
    +				self.quiet -= 1
    +
    +		if tag == "style":
    +			if start:
    +				self.style += 1
    +			else:
    +				self.style -= 1
    +
    +		if tag in ["body"]:
    +			self.quiet = 0  # sites like 9rules.com never close 
    +
    +		if tag == "blockquote":
    +			if start:
    +				self.p()
    +				self.o("> ", force=True)
    +				self.start = True
    +				self.blockquote += 1
    +			else:
    +				self.blockquote -= 1
    +				self.p()
    +
    +		if tag in ["em", "i", "u"] and not self.ignore_emphasis:
    +			# Separate with a space if we immediately follow an alphanumeric
    +			# character, since otherwise Markdown won't render the emphasis
    +			# marks, and we'll be left with eg 'foo_bar_' visible.
    +			# (Don't add a space otherwise, though, since there isn't one in the
    +			# original HTML.)
    +			if (
    +				start
    +				and self.preceding_data
    +				and self.preceding_data[-1] not in string.whitespace
    +				and self.preceding_data[-1] not in string.punctuation
    +			):
    +				emphasis = " " + self.emphasis_mark
    +				self.preceding_data += " "
    +			else:
    +				emphasis = self.emphasis_mark
    +
    +			self.o(emphasis)
    +			if start:
    +				self.stressed = True
    +
    +		if tag in ["strong", "b"] and not self.ignore_emphasis:
    +			# Separate with space if we immediately follow an * character, since
    +			# without it, Markdown won't render the resulting *** correctly.
    +			# (Don't add a space otherwise, though, since there isn't one in the
    +			# original HTML.)
    +			if (
    +				start
    +				and self.preceding_data
    +				and self.preceding_data[-1] == self.strong_mark[0]
    +			):
    +				strong = " " + self.strong_mark
    +				self.preceding_data += " "
    +			else:
    +				strong = self.strong_mark
    +
    +			self.o(strong)
    +			if start:
    +				self.stressed = True
    +
    +		if tag in ["del", "strike", "s"]:
    +			if start and self.preceding_data and self.preceding_data[-1] == "~":
    +				strike = " ~~"
    +				self.preceding_data += " "
    +			else:
    +				strike = "~~"
    +
    +			self.o(strike)
    +			if start:
    +				self.stressed = True
    +
    +		if self.google_doc:
    +			if not self.inheader:
    +				# handle some font attributes, but leave headers clean
    +				self.handle_emphasis(start, tag_style, parent_style)
    +
    +		if tag in ["kbd", "code", "tt"] and not self.pre:
    +			self.o("`")  # TODO: `` `this` ``
    +			self.code = not self.code
    +
    +		if tag == "abbr":
    +			if start:
    +				self.abbr_title = None
    +				self.abbr_data = ""
    +				if "title" in attrs:
    +					self.abbr_title = attrs["title"]
    +			else:
    +				if self.abbr_title is not None:
    +					assert self.abbr_data is not None
    +					self.abbr_list[self.abbr_data] = self.abbr_title
    +					self.abbr_title = None
    +				self.abbr_data = None
    +
    +		if tag == "q":
    +			if not self.quote:
    +				self.o(self.open_quote)
    +			else:
    +				self.o(self.close_quote)
    +			self.quote = not self.quote
    +
    +		def link_url(self: HTML2Text, link: str, title: str = "") -> None:
    +			url = urlparse.urljoin(self.baseurl, link)
    +			title = ' "{}"'.format(title) if title.strip() else ""
    +			self.o("]({url}{title})".format(url=escape_md(url), title=title))
    +
    +		if tag == "a" and not self.ignore_links:
    +			if start:
    +				if (
    +					"href" in attrs
    +					and attrs["href"] is not None
    +					and not (self.skip_internal_links and attrs["href"].startswith("#"))
    +					and not (
    +						self.ignore_mailto_links and attrs["href"].startswith("mailto:")
    +					)
    +				):
    +					self.astack.append(attrs)
    +					self.maybe_automatic_link = attrs["href"]
    +					self.empty_link = True
    +					if self.protect_links:
    +						attrs["href"] = "<" + attrs["href"] + ">"
    +				else:
    +					self.astack.append(None)
    +			else:
    +				if self.astack:
    +					a = self.astack.pop()
    +					if self.maybe_automatic_link and not self.empty_link:
    +						self.maybe_automatic_link = None
    +					elif a:
    +						assert a["href"] is not None
    +						if self.empty_link:
    +							self.o("[")
    +							self.empty_link = False
    +							self.maybe_automatic_link = None
    +						if self.inline_links:
    +							self.p_p = 0
    +							title = a.get("title") or ""
    +							title = escape_md(title)
    +							link_url(self, a["href"], title)
    +						else:
    +							i = self.previousIndex(a)
    +							if i is not None:
    +								a_props = self.a[i]
    +							else:
    +								self.acount += 1
    +								a_props = AnchorElement(a, self.acount, self.outcount)
    +								self.a.append(a_props)
    +							self.o("][" + str(a_props.count) + "]")
    +
    +		if tag == "img" and start and not self.ignore_images:
    +			if "src" in attrs:
    +				assert attrs["src"] is not None
    +				if not self.images_to_alt:
    +					attrs["href"] = attrs["src"]
    +				alt = attrs.get("alt") or self.default_image_alt
    +
    +				# If we have images_with_size, write raw html including width,
    +				# height, and alt attributes
    +				if self.images_as_html or (
    +					self.images_with_size and ("width" in attrs or "height" in attrs)
    +				):
    +					self.o("")
    +					return
    +
    +				# If we have a link to create, output the start
    +				if self.maybe_automatic_link is not None:
    +					href = self.maybe_automatic_link
    +					if (
    +						self.images_to_alt
    +						and escape_md(alt) == href
    +						and self.absolute_url_matcher.match(href)
    +					):
    +						self.o("<" + escape_md(alt) + ">")
    +						self.empty_link = False
    +						return
    +					else:
    +						self.o("[")
    +						self.maybe_automatic_link = None
    +						self.empty_link = False
    +
    +				# If we have images_to_alt, we discard the image itself,
    +				# considering only the alt text.
    +				if self.images_to_alt:
    +					self.o(escape_md(alt))
    +				else:
    +					self.o("![" + escape_md(alt) + "]")
    +					if self.inline_links:
    +						href = attrs.get("href") or ""
    +						self.o(
    +							"(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")"
    +						)
    +					else:
    +						i = self.previousIndex(attrs)
    +						if i is not None:
    +							a_props = self.a[i]
    +						else:
    +							self.acount += 1
    +							a_props = AnchorElement(attrs, self.acount, self.outcount)
    +							self.a.append(a_props)
    +						self.o("[" + str(a_props.count) + "]")
    +
    +		if tag == "dl" and start:
    +			self.p()
    +		if tag == "dt" and not start:
    +			self.pbr()
    +		if tag == "dd" and start:
    +			self.o("    ")
    +		if tag == "dd" and not start:
    +			self.pbr()
    +
    +		if tag in ["ol", "ul"]:
    +			# Google Docs create sub lists as top level lists
    +			if not self.list and not self.lastWasList:
    +				self.p()
    +			if start:
    +				if self.google_doc:
    +					list_style = google_list_style(tag_style)
    +				else:
    +					list_style = tag
    +				numbering_start = list_numbering_start(attrs)
    +				self.list.append(ListElement(list_style, numbering_start))
    +			else:
    +				if self.list:
    +					self.list.pop()
    +					if not self.google_doc and not self.list:
    +						self.o("\n")
    +			self.lastWasList = True
    +		else:
    +			self.lastWasList = False
    +
    +		if tag == "li":
    +			self.pbr()
    +			if start:
    +				if self.list:
    +					li = self.list[-1]
    +				else:
    +					li = ListElement("ul", 0)
    +				if self.google_doc:
    +					self.o("  " * self.google_nest_count(tag_style))
    +				else:
    +					# Indent two spaces per list, except use three spaces for an
    +					# unordered list inside an ordered list.
    +					# https://spec.commonmark.org/0.28/#motivation
    +					# TODO: line up 
    1. s > 9 correctly. + parent_list = None + for list in self.list: + self.o( + " " if parent_list == "ol" and list.name == "ul" else " " + ) + parent_list = list.name + + if li.name == "ul": + self.o(self.ul_item_mark + " ") + elif li.name == "ol": + li.num += 1 + self.o(str(li.num) + ". ") + self.start = True + + if tag in ["table", "tr", "td", "th"]: + if self.ignore_tables: + if tag == "tr": + if start: + pass + else: + self.soft_br() + else: + pass + + elif self.bypass_tables: + if start: + self.soft_br() + if tag in ["td", "th"]: + if start: + self.o("<{}>\n\n".format(tag)) + else: + self.o("\n".format(tag)) + else: + if start: + self.o("<{}>".format(tag)) + else: + self.o("".format(tag)) + + else: + if tag == "table": + if start: + self.table_start = True + if self.pad_tables: + self.o("<" + config.TABLE_MARKER_FOR_PAD + ">") + self.o(" \n") + else: + if self.pad_tables: + # add break in case the table is empty or its 1 row table + self.soft_br() + self.o("") + self.o(" \n") + if tag in ["td", "th"] and start: + if self.split_next_td: + self.o("| ") + self.split_next_td = True + + if tag == "tr" and start: + self.td_count = 0 + if tag == "tr" and not start: + self.split_next_td = False + self.soft_br() + if tag == "tr" and not start and self.table_start: + # Underline table header + self.o("|".join(["---"] * self.td_count)) + self.soft_br() + self.table_start = False + if tag in ["td", "th"] and start: + self.td_count += 1 + + if tag == "pre": + if start: + self.startpre = True + self.pre = True + else: + self.pre = False + if self.mark_code: + self.out("\n[/code]") + self.p() + + # TODO: Add docstring for these one letter functions + def pbr(self) -> None: + "Pretty print has a line break" + if self.p_p == 0: + self.p_p = 1 + + def p(self) -> None: + "Set pretty print to 1 or 2 lines" + self.p_p = 1 if self.single_line_break else 2 + + def soft_br(self) -> None: + "Soft breaks" + self.pbr() + self.br_toggle = " " + + def o( + self, data: str, puredata: bool = False, force: Union[bool, str] = False + ) -> None: + """ + Deal with indentation and whitespace + """ + if self.abbr_data is not None: + self.abbr_data += data + + if not self.quiet: + if self.google_doc: + # prevent white space immediately after 'begin emphasis' + # marks ('**' and '_') + lstripped_data = data.lstrip() + if self.drop_white_space and not (self.pre or self.code): + data = lstripped_data + if lstripped_data != "": + self.drop_white_space = 0 + + if puredata and not self.pre: + # This is a very dangerous call ... it could mess up + # all handling of   when not handled properly + # (see entityref) + data = re.sub(r"\s+", r" ", data) + if data and data[0] == " ": + self.space = True + data = data[1:] + if not data and not force: + return + + if self.startpre: + # self.out(" :") #TODO: not output when already one there + if not data.startswith("\n") and not data.startswith("\r\n"): + #
      stuff...
      +					data = "\n" + data
      +				if self.mark_code:
      +					self.out("\n[code]")
      +					self.p_p = 0
      +
      +			bq = ">" * self.blockquote
      +			if not (force and data and data[0] == ">") and self.blockquote:
      +				bq += " "
      +
      +			if self.pre:
      +				if not self.list:
      +					bq += "    "
      +				# else: list content is already partially indented
      +				bq += "    " * len(self.list)
      +				data = data.replace("\n", "\n" + bq)
      +
      +			if self.startpre:
      +				self.startpre = False
      +				if self.list:
      +					# use existing initial indentation
      +					data = data.lstrip("\n")
      +
      +			if self.start:
      +				self.space = False
      +				self.p_p = 0
      +				self.start = False
      +
      +			if force == "end":
      +				# It's the end.
      +				self.p_p = 0
      +				self.out("\n")
      +				self.space = False
      +
      +			if self.p_p:
      +				self.out((self.br_toggle + "\n" + bq) * self.p_p)
      +				self.space = False
      +				self.br_toggle = ""
      +
      +			if self.space:
      +				if not self.lastWasNL:
      +					self.out(" ")
      +				self.space = False
      +
      +			if self.a and (
      +				(self.p_p == 2 and self.links_each_paragraph) or force == "end"
      +			):
      +				if force == "end":
      +					self.out("\n")
      +
      +				newa = []
      +				for link in self.a:
      +					if self.outcount > link.outcount:
      +						self.out(
      +							"   ["
      +							+ str(link.count)
      +							+ "]: "
      +							+ urlparse.urljoin(self.baseurl, link.attrs["href"])
      +						)
      +						if "title" in link.attrs:
      +							assert link.attrs["title"] is not None
      +							self.out(" (" + link.attrs["title"] + ")")
      +						self.out("\n")
      +					else:
      +						newa.append(link)
      +
      +				# Don't need an extra line when nothing was done.
      +				if self.a != newa:
      +					self.out("\n")
      +
      +				self.a = newa
      +
      +			if self.abbr_list and force == "end":
      +				for abbr, definition in self.abbr_list.items():
      +					self.out("  *[" + abbr + "]: " + definition + "\n")
      +
      +			self.p_p = 0
      +			self.out(data)
      +			self.outcount += 1
      +
      +	def handle_data(self, data: str, entity_char: bool = False) -> None:
      +		if not data:
      +			# Data may be empty for some HTML entities. For example,
      +			# LEFT-TO-RIGHT MARK.
      +			return
      +
      +		if self.stressed:
      +			data = data.strip()
      +			self.stressed = False
      +			self.preceding_stressed = True
      +		elif self.preceding_stressed:
      +			if (
      +				re.match(r"[^][(){}\s.!?]", data[0])
      +				and not hn(self.current_tag)
      +				and self.current_tag not in ["a", "code", "pre"]
      +			):
      +				# should match a letter or common punctuation
      +				data = " " + data
      +			self.preceding_stressed = False
      +
      +		if self.style:
      +			self.style_def.update(dumb_css_parser(data))
      +
      +		if self.maybe_automatic_link is not None:
      +			href = self.maybe_automatic_link
      +			if (
      +				href == data
      +				and self.absolute_url_matcher.match(href)
      +				and self.use_automatic_links
      +			):
      +				self.o("<" + data + ">")
      +				self.empty_link = False
      +				return
      +			else:
      +				self.o("[")
      +				self.maybe_automatic_link = None
      +				self.empty_link = False
      +
      +		if not self.code and not self.pre and not entity_char:
      +			data = escape_md_section(data, snob=self.escape_snob)
      +		self.preceding_data = data
      +		self.o(data, puredata=True)
      +
      +	def charref(self, name: str) -> str:
      +		if name[0] in ["x", "X"]:
      +			c = int(name[1:], 16)
      +		else:
      +			c = int(name)
      +
      +		if not self.unicode_snob and c in unifiable_n:
      +			return unifiable_n[c]
      +		else:
      +			try:
      +				return chr(c)
      +			except ValueError:  # invalid unicode
      +				return ""
      +
      +	def entityref(self, c: str) -> str:
      +		if not self.unicode_snob and c in config.UNIFIABLE:
      +			return config.UNIFIABLE[c]
      +		try:
      +			ch = html.entities.html5[c + ";"]
      +		except KeyError:
      +			return "&" + c + ";"
      +		return config.UNIFIABLE[c] if c == "nbsp" else ch
      +
      +	def google_nest_count(self, style: Dict[str, str]) -> int:
      +		"""
      +		Calculate the nesting count of google doc lists
      +
      +		:type style: dict
      +
      +		:rtype: int
      +		"""
      +		nest_count = 0
      +		if "margin-left" in style:
      +			nest_count = int(style["margin-left"][:-2]) // self.google_list_indent
      +
      +		return nest_count
      +
      +	def optwrap(self, text: str) -> str:
      +		"""
      +		Wrap all paragraphs in the provided text.
      +
      +		:type text: str
      +
      +		:rtype: str
      +		"""
      +		if not self.body_width:
      +			return text
      +
      +		result = ""
      +		newlines = 0
      +		# I cannot think of a better solution for now.
      +		# To avoid the non-wrap behaviour for entire paras
      +		# because of the presence of a link in it
      +		if not self.wrap_links:
      +			self.inline_links = False
      +		for para in text.split("\n"):
      +			if len(para) > 0:
      +				if not skipwrap(
      +					para, self.wrap_links, self.wrap_list_items, self.wrap_tables
      +				):
      +					indent = ""
      +					if para.startswith("  " + self.ul_item_mark):
      +						# list item continuation: add a double indent to the
      +						# new lines
      +						indent = "    "
      +					elif para.startswith("> "):
      +						# blockquote continuation: add the greater than symbol
      +						# to the new lines
      +						indent = "> "
      +					wrapped = wrap(
      +						para,
      +						self.body_width,
      +						break_long_words=False,
      +						subsequent_indent=indent,
      +					)
      +					result += "\n".join(wrapped)
      +					if para.endswith("  "):
      +						result += "  \n"
      +						newlines = 1
      +					elif indent:
      +						result += "\n"
      +						newlines = 1
      +					else:
      +						result += "\n\n"
      +						newlines = 2
      +				else:
      +					# Warning for the tempted!!!
      +					# Be aware that obvious replacement of this with
      +					# line.isspace()
      +					# DOES NOT work! Explanations are welcome.
      +					if not config.RE_SPACE.match(para):
      +						result += para + "\n"
      +						newlines = 1
      +			else:
      +				if newlines < 2:
      +					result += "\n"
      +					newlines += 1
      +		return result
       
       
       def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = None) -> str:
      -    if bodywidth is None:
      -        bodywidth = config.BODY_WIDTH
      -    h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
      +	if bodywidth is None:
      +		bodywidth = config.BODY_WIDTH
      +	h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
       
      -    return h.handle(html)
      +	return h.handle(html)
      diff --git a/migration/tables/content_items.py b/migration/tables/content_items.py
      index 42d4ef9e..78d1c3b8 100644
      --- a/migration/tables/content_items.py
      +++ b/migration/tables/content_items.py
      @@ -39,6 +39,7 @@ def get_metadata(r):
       	metadata['createdAt'] = r.get('createdAt', ts)
       	metadata['layout'] = r['layout']
       	metadata['topics'] = [topic['slug'] for topic in r['topics']]
      +	metadata['topics'].sort()
       	if r.get('cover', False):
       		metadata['cover'] = r.get('cover')
       	return metadata
      @@ -80,7 +81,6 @@ def migrate(entry, users_by_oid, topics_by_oid):
       		'createdAt': entry.get('createdAt', '2016-03-05 22:22:00.350000')
       	}
       	r['slug'] = entry.get('slug', '')
      -	body_orig = entry.get('body', '')
       	if not r['slug'] and entry.get('friendlySlugs') is not None:
       		r['slug'] = entry['friendlySlugs']['slug'][0]['slug']
       		if(r['slug'] is None):
      @@ -94,12 +94,12 @@ def migrate(entry, users_by_oid, topics_by_oid):
       	mainTopic = topics_by_oid.get(category)
       	if mainTopic:
       		r['mainTopic'] = mainTopic["slug"]
      -	topic_oids = set([category])
      -	topic_oids.update(entry.get("tags", []))
      +	topic_oids = [category, ]
      +	taglist = entry.get("tags", [])
      +	topic_oids.extend(taglist)
       	for oid in topic_oids:
       		if oid in topics_by_oid:
       			r['topics'].append(topics_by_oid[oid])
      -
       	if entry.get('image') is not None:
       		r['cover'] = entry['image']['url']
       	if entry.get('thumborId') is not None:
      @@ -116,7 +116,7 @@ def migrate(entry, users_by_oid, topics_by_oid):
       			else:
       				body_html = str(BeautifulSoup(
       					body_orig, features="html.parser"))
      -				r['body'] = body_html # html2text(body_html)
      +				r['body'] = html2text(body_html)
       		else:
       			print(r['slug'] + ': literature has no media')
       	elif entry.get('type') == 'Video':
      @@ -127,17 +127,31 @@ def migrate(entry, users_by_oid, topics_by_oid):
       		if video_url == '#':
       			video_url = 'https://vimeo.com/' + vm if vm else '#'
       		if video_url == '#':
      -			print(entry.get('media', 'NO MEDIA!'))
      +			print(entry.get('media', 'UNKNOWN MEDIA PROVIDER!'))
       			# raise Exception
      -		r['body'] = '' + html2text(m.get('body', ''))  # FIXME
      +		therestof = html2text(m.get('body', ''))
      +		r['body'] = 'import VideoPlayer from \"src/components/Article/VideoPlayer\"\n' + \
      +			'\n\n' + therestof
       	elif entry.get('type') == 'Music':
      -		r['body'] = ''  # FIXME
      +		r['body'] = 'import MusicPlayer from \"src/components/MusicPlayer\"\n'
      +		for m in entry['media']:
      +			if m == { 'main': 'true' } or m == { 'main': True } or m == {}:
      +				continue
      +			# TODO: mark highlighted track isMain == True
      +			try: r['body'] += ''
      +		except: print(entry)
       	if r.get('body') is None:
       		body_orig = entry.get('body', '')
       		body_html = str(BeautifulSoup(body_orig, features="html.parser"))
      -		r['body'] = body_html # html2text(body_html)
      +		r['body'] = html2text(body_html)
       	body = r.get('body', '')
       	
       	# get author data
      @@ -172,12 +186,12 @@ def migrate(entry, users_by_oid, topics_by_oid):
       		'userpic': userdata.get('userpic', '')
       	}
       	shout_dict['authors'] = [ author, ]
      -	
      +
       	if entry['published']:
       		metadata = get_metadata(shout_dict)
       		content = frontmatter.dumps(frontmatter.Post(body, **metadata))
      -		ext = 'md'
      -		open('migration/content/' + r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
      +		ext = 'mdx'
      +		open('../discoursio-web/content/' + r['layout'] + '/' + r['slug'] + '.' + ext, 'w').write(content)
       	try:
       		shout_dict['createdAt'] = date_parse(r.get('createdAt')) if entry.get('createdAt') else ts
       		shout_dict['publishedAt'] = date_parse(entry.get('publishedAt')) if entry.get('published') else None
      diff --git a/resolvers/collab.py b/resolvers/collab.py
      index 8131cc51..ecfc14ef 100644
      --- a/resolvers/collab.py
      +++ b/resolvers/collab.py
      @@ -219,7 +219,7 @@ async def invite_author(_, author_slug, shout):
       
       @mutation.field("removeAuthor")
       @login_required
      -async def invite_author(_, author_slug, shout):
      +async def remove_author(_, author_slug, shout):
       	auth = info.context["request"].auth
       	user_id = auth.user_id
       
      @@ -241,4 +241,4 @@ async def invite_author(_, author_slug, shout):
       
       	# TODO: email notify
       
      -	return {}
      \ No newline at end of file
      +	return {}
      diff --git a/settings.py b/settings.py
      index 79c48403..d20b8fd5 100644
      --- a/settings.py
      +++ b/settings.py
      @@ -8,6 +8,7 @@ BACKEND_URL = environ.get("BACKEND_URL") or "https://localhost:8080"
       OAUTH_CALLBACK_URL = environ.get("OAUTH_CALLBACK_URL") or "https://localhost:8080"
       RESET_PWD_URL = environ.get("RESET_PWD_URL") or "https://localhost:8080/reset_pwd"
       CONFIRM_EMAIL_URL = environ.get("CONFIRM_EMAIL_URL") or "https://new.discours.io"
      +ERROR_URL_ON_FRONTEND = environ.get("ERROR_URL_ON_FRONTEND") or "https://new.discours.io"
       
       DB_URL = environ.get("DATABASE_URL") or environ.get("DB_URL") or "sqlite:///db.sqlite3"
       JWT_ALGORITHM = "HS256"
      diff --git a/templates/auth_email.tmpl b/templates/auth_email.tmpl
      new file mode 100644
      index 00000000..9135da9a
      --- /dev/null
      +++ b/templates/auth_email.tmpl
      @@ -0,0 +1 @@
      +To enter the site follow the link
      diff --git a/templates/confirm_email.tmpl b/templates/confirm_email.tmpl
      new file mode 100644
      index 00000000..9d56c41d
      --- /dev/null
      +++ b/templates/confirm_email.tmpl
      @@ -0,0 +1 @@
      +To confirm registration follow the link
      diff --git a/templates/reset_password_email.tmpl b/templates/reset_password_email.tmpl
      new file mode 100644
      index 00000000..51a68604
      --- /dev/null
      +++ b/templates/reset_password_email.tmpl
      @@ -0,0 +1 @@
      +To reset password follow the link