diff --git a/migration/__init__.py b/migration/__init__.py index 2d195e06..4a25931d 100644 --- a/migration/__init__.py +++ b/migration/__init__.py @@ -314,9 +314,6 @@ async def handle_auto(): async def main(): if len(sys.argv) > 1: - cmd = sys.argv[1] - if type(cmd) == str: - print("[migration] command: " + cmd) init_tables() await handle_auto() else: diff --git a/migration/export.py b/migration/export.py index a9340dc5..102cfb14 100644 --- a/migration/export.py +++ b/migration/export.py @@ -4,7 +4,7 @@ from datetime import datetime, timezone import frontmatter -from .extract import extract_html, prepare_html_body, extract_media +from .extract import extract_html, extract_media from .utils import DateTimeEncoder OLD_DATE = "2016-03-05 22:22:00.350000" @@ -50,11 +50,12 @@ def export_mdx(r): def export_body(shout, storage): entry = storage["content_items"]["by_oid"][shout["oid"]] if entry: - shout["body"] = prepare_html_body(entry) # prepare_md_body(entry) - shout["media"] = extract_media(entry) + body = extract_html(entry) + media = extract_media(entry) + shout["body"] = body # prepare_html_body(entry) # prepare_md_body(entry) + shout["media"] = media export_mdx(shout) print("[export] html for %s" % shout["slug"]) - body = extract_html(entry) open(contentDir + shout["slug"] + ".html", "w").write(body) else: raise Exception("no content_items entry found") diff --git a/migration/extract.py b/migration/extract.py index 62199dcf..ccadb7e2 100644 --- a/migration/extract.py +++ b/migration/extract.py @@ -3,6 +3,9 @@ import os import re import uuid +from bs4 import BeautifulSoup + + TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)" contentDir = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "..", "discoursio-web", "content" @@ -343,59 +346,7 @@ def prepare_html_body(entry): def extract_html(entry): body_orig = (entry.get("body") or "").replace('\(', '(').replace('\)', ')') - media = entry.get("media", []) - kind = entry.get("type") or "" - print("[extract] kind: " + kind) - mbodies = set([]) - if media: - # print('[extract] media is found') - for m in media: - mbody = m.get("body", "") - addon = "" - if kind == "Literature": - mbody = m.get("literatureBody") or m.get("body", "") - elif kind == "Image": - cover = "" - if "thumborId" in entry: - cover = cdn + "/unsafe/1600x/" + entry["thumborId"] - if not cover: - if "image" in entry: - cover = entry["image"].get("url", "") - if "cloudinary" in cover: - cover = "" - # else: print('[extract] cover: ' + cover) - title = m.get("title", "").replace("\n", " ").replace(" ", " ") - u = m.get("thumborId") or cover or "" - if title: - addon += "