media normalized

This commit is contained in:
tonyrewin 2022-11-26 17:58:53 +03:00
parent 11d498c47d
commit 9ca9859563
3 changed files with 42 additions and 47 deletions

View File

@ -4,7 +4,7 @@ from datetime import datetime, timezone
import frontmatter import frontmatter
from .extract import extract_html, prepare_html_body from .extract import extract_html, prepare_html_body, extract_media
from .utils import DateTimeEncoder from .utils import DateTimeEncoder
OLD_DATE = "2016-03-05 22:22:00.350000" OLD_DATE = "2016-03-05 22:22:00.350000"
@ -50,11 +50,11 @@ def export_mdx(r):
def export_body(shout, storage): def export_body(shout, storage):
entry = storage["content_items"]["by_oid"][shout["oid"]] entry = storage["content_items"]["by_oid"][shout["oid"]]
if entry: if entry:
shout["body"], media = prepare_html_body(entry) # prepare_md_body(entry) shout["body"] = prepare_html_body(entry) # prepare_md_body(entry)
shout["media"] = media shout["media"] = extract_media(entry)
export_mdx(shout) export_mdx(shout)
print("[export] html for %s" % shout["slug"]) print("[export] html for %s" % shout["slug"])
body, _media = extract_html(entry) body = extract_html(entry)
open(contentDir + shout["slug"] + ".html", "w").write(body) open(contentDir + shout["slug"] + ".html", "w").write(body)
else: else:
raise Exception("no content_items entry found") raise Exception("no content_items entry found")

View File

@ -3,8 +3,6 @@ import os
import re import re
import uuid import uuid
from .html2text import html2text
TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)" TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)"
contentDir = os.path.join( contentDir = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "..", "..", "discoursio-web", "content" os.path.dirname(os.path.realpath(__file__)), "..", "..", "discoursio-web", "content"
@ -258,47 +256,44 @@ def extract_md(body, oid=""):
return newbody return newbody
def prepare_md_body(entry): def extract_media(entry):
# body modifications ''' normalized media extraction method '''
body = "" # media [ { title pic url body } ]}
kind = entry.get("type") kind = entry.get("layout")
addon = "" media = []
if kind == "Video": for m in entry.get("media", []):
addon = "" # title
for m in entry.get("media", []): title = m.get("title", "").replace("\n", " ").replace(" ", " ")
if "youtubeId" in m: artist = m.get("performer") or m.get("artist")
addon += "<VideoPlayer youtubeId='" + m["youtubeId"] + "' />\n" if artist:
title = artist + " - " + title
# pic
url = m.get("fileUrl") or m.get("url", "")
pic = ""
if "thumborId" in m:
pic = cdn + "/unsafe/1600x/" + m["thumborId"]
# url
if not url:
if kind == "Image":
url = pic
elif "youtubeId" in m:
url = "https://youtube.com/?watch=" + m["youtubeId"]
elif "vimeoId" in m: elif "vimeoId" in m:
addon += "<VideoPlayer vimeoId='" + m["vimeoId"] + "' />\n" url = "https://vimeo.com/" + m["vimeoId"]
else: else:
print("[extract] media is not supported") print("[extract] media is not supported")
print(m) # body
body = "import VideoPlayer from '$/components/Article/VideoPlayer'\n\n" + addon body = m.get("body") or m.get("literatureBody")
elif kind == "Music": media.append({
addon = "" "url": url,
for m in entry.get("media", []): "pic": pic,
artist = m.get("performer") "title": title,
trackname = "" "body": body
if artist: })
trackname += artist + " - " return media
if "title" in m:
trackname += m.get("title", "")
addon += (
'<AudioPlayer src="'
+ m.get("fileUrl", "")
+ '" title="'
+ trackname
+ '" />\n'
)
body = "import AudioPlayer from '$/components/Article/AudioPlayer'\n\n" + addon
body_orig, media = extract_html(entry)
if body_orig:
body += extract_md(html2text(body_orig), entry["_id"])
if not body:
print("[extract] empty MDX body")
return body, media
def prepare_html_body(entry): def prepare_html_body(entry):
@ -339,11 +334,11 @@ def prepare_html_body(entry):
addon += '"></audio></figure>' addon += '"></audio></figure>'
body += addon body += addon
body, media = extract_html(entry) body = extract_html(entry)
# if body_orig: body += extract_md(html2text(body_orig), entry['_id']) # if body_orig: body += extract_md(html2text(body_orig), entry['_id'])
if not body: if not body:
print("[extract] empty HTML body") print("[extract] empty HTML body")
return body, media return body
def extract_html(entry): def extract_html(entry):
@ -403,4 +398,4 @@ def extract_html(entry):
if not body_orig: if not body_orig:
print("[extract] empty HTML body") print("[extract] empty HTML body")
# body_html = str(BeautifulSoup(body_orig, features="html.parser")) # body_html = str(BeautifulSoup(body_orig, features="html.parser"))
return body_orig, media return body_orig

View File

@ -439,7 +439,7 @@ type Shout {
deletedBy: User deletedBy: User
publishedBy: User publishedBy: User
publishedAt: DateTime publishedAt: DateTime
media: String media: String # json [ { title pic url body }, .. ]
stat: Stat stat: Stat
} }