lint wip
This commit is contained in:
@@ -14,8 +14,9 @@ from migration.tables.content_items import get_shout_slug
|
||||
from migration.tables.content_items import migrate as migrateShout
|
||||
from migration.tables.remarks import migrate as migrateRemark
|
||||
from migration.tables.topics import migrate as migrateTopic
|
||||
from migration.tables.users import migrate as migrateUser, post_migrate as users_post_migrate
|
||||
from migration.tables.users import migrate as migrateUser
|
||||
from migration.tables.users import migrate_2stage as migrateUser_2stage
|
||||
from migration.tables.users import post_migrate as users_post_migrate
|
||||
from orm import init_tables
|
||||
from orm.reaction import Reaction
|
||||
|
||||
@@ -63,16 +64,8 @@ async def topics_handle(storage):
|
||||
del storage["topics"]["by_slug"][oldslug]
|
||||
storage["topics"]["by_oid"][oid] = storage["topics"]["by_slug"][newslug]
|
||||
print("[migration] " + str(counter) + " topics migrated")
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["topics"]["by_oid"].values()))
|
||||
+ " topics by oid"
|
||||
)
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["topics"]["by_slug"].values()))
|
||||
+ " topics by slug"
|
||||
)
|
||||
print("[migration] " + str(len(storage["topics"]["by_oid"].values())) + " topics by oid")
|
||||
print("[migration] " + str(len(storage["topics"]["by_slug"].values())) + " topics by slug")
|
||||
|
||||
|
||||
async def shouts_handle(storage, args):
|
||||
@@ -117,9 +110,10 @@ async def shouts_handle(storage, args):
|
||||
|
||||
# print main counter
|
||||
counter += 1
|
||||
print('[migration] shouts_handle %d: %s @%s' % (
|
||||
(counter + 1), shout_dict["slug"], author["slug"]
|
||||
))
|
||||
print(
|
||||
'[migration] shouts_handle %d: %s @%s'
|
||||
% ((counter + 1), shout_dict["slug"], author["slug"])
|
||||
)
|
||||
|
||||
b = bs4.BeautifulSoup(shout_dict["body"], "html.parser")
|
||||
texts = [shout_dict["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
|
||||
@@ -214,9 +208,7 @@ def data_load():
|
||||
tags_data = json.loads(open("migration/data/tags.json").read())
|
||||
storage["topics"]["tags"] = tags_data
|
||||
print("[migration.load] " + str(len(tags_data)) + " tags ")
|
||||
cats_data = json.loads(
|
||||
open("migration/data/content_item_categories.json").read()
|
||||
)
|
||||
cats_data = json.loads(open("migration/data/content_item_categories.json").read())
|
||||
storage["topics"]["cats"] = cats_data
|
||||
print("[migration.load] " + str(len(cats_data)) + " cats ")
|
||||
comments_data = json.loads(open("migration/data/comments.json").read())
|
||||
@@ -235,11 +227,7 @@ def data_load():
|
||||
storage["users"]["by_oid"][x["_id"]] = x
|
||||
# storage['users']['by_slug'][x['slug']] = x
|
||||
# no user.slug yet
|
||||
print(
|
||||
"[migration.load] "
|
||||
+ str(len(storage["users"]["by_oid"].keys()))
|
||||
+ " users by oid"
|
||||
)
|
||||
print("[migration.load] " + str(len(storage["users"]["by_oid"].keys())) + " users by oid")
|
||||
for x in tags_data:
|
||||
storage["topics"]["by_oid"][x["_id"]] = x
|
||||
storage["topics"]["by_slug"][x["slug"]] = x
|
||||
@@ -247,9 +235,7 @@ def data_load():
|
||||
storage["topics"]["by_oid"][x["_id"]] = x
|
||||
storage["topics"]["by_slug"][x["slug"]] = x
|
||||
print(
|
||||
"[migration.load] "
|
||||
+ str(len(storage["topics"]["by_slug"].keys()))
|
||||
+ " topics by slug"
|
||||
"[migration.load] " + str(len(storage["topics"]["by_slug"].keys())) + " topics by slug"
|
||||
)
|
||||
for item in content_data:
|
||||
slug = get_shout_slug(item)
|
||||
|
@@ -1,8 +1,9 @@
|
||||
import gc
|
||||
import json
|
||||
import os
|
||||
|
||||
import bson
|
||||
import gc
|
||||
|
||||
from .utils import DateTimeEncoder
|
||||
|
||||
|
||||
@@ -15,7 +16,7 @@ def json_tables():
|
||||
"email_subscriptions": [],
|
||||
"users": [],
|
||||
"comments": [],
|
||||
"remarks": []
|
||||
"remarks": [],
|
||||
}
|
||||
for table in data.keys():
|
||||
print('[migration] bson2json for ' + table)
|
||||
|
@@ -71,47 +71,29 @@ def export_slug(slug, storage):
|
||||
|
||||
|
||||
def export_email_subscriptions():
|
||||
email_subscriptions_data = json.loads(
|
||||
open("migration/data/email_subscriptions.json").read()
|
||||
)
|
||||
email_subscriptions_data = json.loads(open("migration/data/email_subscriptions.json").read())
|
||||
for data in email_subscriptions_data:
|
||||
# TODO: migrate to mailgun list manually
|
||||
# migrate_email_subscription(data)
|
||||
pass
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(email_subscriptions_data))
|
||||
+ " email subscriptions exported"
|
||||
)
|
||||
print("[migration] " + str(len(email_subscriptions_data)) + " email subscriptions exported")
|
||||
|
||||
|
||||
def export_shouts(storage):
|
||||
# update what was just migrated or load json again
|
||||
if len(storage["users"]["by_slugs"].keys()) == 0:
|
||||
storage["users"]["by_slugs"] = json.loads(
|
||||
open(EXPORT_DEST + "authors.json").read()
|
||||
)
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["users"]["by_slugs"].keys()))
|
||||
+ " exported authors "
|
||||
)
|
||||
storage["users"]["by_slugs"] = json.loads(open(EXPORT_DEST + "authors.json").read())
|
||||
print("[migration] " + str(len(storage["users"]["by_slugs"].keys())) + " exported authors ")
|
||||
if len(storage["shouts"]["by_slugs"].keys()) == 0:
|
||||
storage["shouts"]["by_slugs"] = json.loads(
|
||||
open(EXPORT_DEST + "articles.json").read()
|
||||
)
|
||||
storage["shouts"]["by_slugs"] = json.loads(open(EXPORT_DEST + "articles.json").read())
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(storage["shouts"]["by_slugs"].keys()))
|
||||
+ " exported articles "
|
||||
"[migration] " + str(len(storage["shouts"]["by_slugs"].keys())) + " exported articles "
|
||||
)
|
||||
for slug in storage["shouts"]["by_slugs"].keys():
|
||||
export_slug(slug, storage)
|
||||
|
||||
|
||||
def export_json(
|
||||
export_articles={}, export_authors={}, export_topics={}, export_comments={}
|
||||
):
|
||||
def export_json(export_articles={}, export_authors={}, export_topics={}, export_comments={}):
|
||||
open(EXPORT_DEST + "authors.json", "w").write(
|
||||
json.dumps(
|
||||
export_authors,
|
||||
@@ -152,8 +134,4 @@ def export_json(
|
||||
ensure_ascii=False,
|
||||
)
|
||||
)
|
||||
print(
|
||||
"[migration] "
|
||||
+ str(len(export_comments.items()))
|
||||
+ " exported articles with comments"
|
||||
)
|
||||
print("[migration] " + str(len(export_comments.items())) + " exported articles with comments")
|
||||
|
@@ -5,7 +5,6 @@ import uuid
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)"
|
||||
contentDir = os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)), "..", "..", "discoursio-web", "content"
|
||||
@@ -27,7 +26,6 @@ def replace_tooltips(body):
|
||||
return newbody
|
||||
|
||||
|
||||
|
||||
def extract_footnotes(body, shout_dict):
|
||||
parts = body.split("&&&")
|
||||
lll = len(parts)
|
||||
@@ -47,12 +45,16 @@ def extract_footnotes(body, shout_dict):
|
||||
extracted_body = part.split(fn, 1)[1].split('>', 1)[1].split('</a>', 1)[0]
|
||||
print("[extract] footnote link: " + extracted_link)
|
||||
with local_session() as session:
|
||||
Reaction.create({
|
||||
"shout": shout_dict['id'],
|
||||
"kind": ReactionKind.FOOTNOTE,
|
||||
"body": extracted_body,
|
||||
"range": str(body.index(fn + link) - len('<')) + ':' + str(body.index(extracted_body) + len('</a>'))
|
||||
})
|
||||
Reaction.create(
|
||||
{
|
||||
"shout": shout_dict['id'],
|
||||
"kind": ReactionKind.FOOTNOTE,
|
||||
"body": extracted_body,
|
||||
"range": str(body.index(fn + link) - len('<'))
|
||||
+ ':'
|
||||
+ str(body.index(extracted_body) + len('</a>')),
|
||||
}
|
||||
)
|
||||
newparts[i] = "<a href='#'>ℹ️</a>"
|
||||
else:
|
||||
newparts[i] = part
|
||||
@@ -76,9 +78,7 @@ def place_tooltips(body):
|
||||
print("[extract] footnote: " + part)
|
||||
fn = 'a class="footnote-url" href="'
|
||||
link = part.split(fn, 1)[1].split('"', 1)[0]
|
||||
extracted_part = (
|
||||
part.split(fn, 1)[0] + " " + part.split("/", 1)[-1]
|
||||
)
|
||||
extracted_part = part.split(fn, 1)[0] + " " + part.split("/", 1)[-1]
|
||||
newparts[i] = (
|
||||
"<Tooltip"
|
||||
+ (' link="' + link + '" ' if link else "")
|
||||
@@ -96,7 +96,9 @@ def place_tooltips(body):
|
||||
return ("".join(newparts), placed)
|
||||
|
||||
|
||||
IMG_REGEX = r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}="
|
||||
IMG_REGEX = (
|
||||
r"\!\[(.*?)\]\((data\:image\/(png|jpeg|jpg);base64\,((?:[A-Za-z\d+\/]{4})*(?:[A-Za-z\d+\/]{3}="
|
||||
)
|
||||
IMG_REGEX += r"|[A-Za-z\d+\/]{2}==)))\)"
|
||||
|
||||
parentDir = "/".join(os.getcwd().split("/")[:-1])
|
||||
@@ -159,11 +161,7 @@ def extract_imageparts(bodyparts, prefix):
|
||||
try:
|
||||
content = base64.b64decode(b64encoded + "==")
|
||||
open(public + link, "wb").write(content)
|
||||
print(
|
||||
"[extract] "
|
||||
+ str(len(content))
|
||||
+ " image bytes been written"
|
||||
)
|
||||
print("[extract] " + str(len(content)) + " image bytes been written")
|
||||
cache[b64encoded] = name
|
||||
except Exception:
|
||||
raise Exception
|
||||
@@ -172,18 +170,11 @@ def extract_imageparts(bodyparts, prefix):
|
||||
print("[extract] cached link " + cache[b64encoded])
|
||||
name = cache[b64encoded]
|
||||
link = cdn + "/upload/image-" + name + "." + ext
|
||||
newparts[i] = (
|
||||
current[: -len(mime)]
|
||||
+ current[-len(mime) :]
|
||||
+ link
|
||||
+ next[-b64end:]
|
||||
)
|
||||
newparts[i] = current[: -len(mime)] + current[-len(mime) :] + link + next[-b64end:]
|
||||
newparts[i + 1] = next[:-b64end]
|
||||
break
|
||||
return (
|
||||
extract_imageparts(
|
||||
newparts[i] + newparts[i + 1] + b64.join(bodyparts[(i + 2) :]), prefix
|
||||
)
|
||||
extract_imageparts(newparts[i] + newparts[i + 1] + b64.join(bodyparts[(i + 2) :]), prefix)
|
||||
if len(bodyparts) > (i + 1)
|
||||
else "".join(newparts)
|
||||
)
|
||||
@@ -271,7 +262,7 @@ def cleanup_md(body):
|
||||
return newbody
|
||||
|
||||
|
||||
def extract_md(body, shout_dict = None):
|
||||
def extract_md(body, shout_dict=None):
|
||||
newbody = body
|
||||
if newbody:
|
||||
newbody = cleanup_md(newbody)
|
||||
@@ -279,7 +270,6 @@ def extract_md(body, shout_dict = None):
|
||||
raise Exception("cleanup error")
|
||||
|
||||
if shout_dict:
|
||||
|
||||
uid = shout_dict['id'] or uuid.uuid4()
|
||||
newbody = extract_md_images(newbody, uid)
|
||||
if not newbody:
|
||||
@@ -293,7 +283,7 @@ def extract_md(body, shout_dict = None):
|
||||
|
||||
|
||||
def extract_media(entry):
|
||||
''' normalized media extraction method '''
|
||||
'''normalized media extraction method'''
|
||||
# media [ { title pic url body } ]}
|
||||
kind = entry.get("type")
|
||||
if not kind:
|
||||
@@ -323,12 +313,7 @@ def extract_media(entry):
|
||||
url = "https://vimeo.com/" + m["vimeoId"]
|
||||
# body
|
||||
body = m.get("body") or m.get("literatureBody") or ""
|
||||
media.append({
|
||||
"url": url,
|
||||
"pic": pic,
|
||||
"title": title,
|
||||
"body": body
|
||||
})
|
||||
media.append({"url": url, "pic": pic, "title": title, "body": body})
|
||||
return media
|
||||
|
||||
|
||||
@@ -398,9 +383,7 @@ def cleanup_html(body: str) -> str:
|
||||
r"<h4>\s*</h4>",
|
||||
r"<div>\s*</div>",
|
||||
]
|
||||
regex_replace = {
|
||||
r"<br>\s*</p>": "</p>"
|
||||
}
|
||||
regex_replace = {r"<br>\s*</p>": "</p>"}
|
||||
changed = True
|
||||
while changed:
|
||||
# we need several iterations to clean nested tags this way
|
||||
@@ -414,7 +397,8 @@ def cleanup_html(body: str) -> str:
|
||||
changed = True
|
||||
return new_body
|
||||
|
||||
def extract_html(entry, shout_id = None, cleanup=False):
|
||||
|
||||
def extract_html(entry, shout_id=None, cleanup=False):
|
||||
body_orig = (entry.get("body") or "").replace('\(', '(').replace('\)', ')')
|
||||
if cleanup:
|
||||
# we do that before bs parsing to catch the invalid html
|
||||
|
@@ -119,9 +119,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.lastWasList = False
|
||||
self.style = 0
|
||||
self.style_def = {} # type: Dict[str, Dict[str, str]]
|
||||
self.tag_stack = (
|
||||
[]
|
||||
) # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
|
||||
self.tag_stack = [] # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
|
||||
self.emphasis = 0
|
||||
self.drop_white_space = 0
|
||||
self.inheader = False
|
||||
@@ -300,9 +298,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
if strikethrough:
|
||||
self.quiet -= 1
|
||||
|
||||
def handle_tag(
|
||||
self, tag: str, attrs: Dict[str, Optional[str]], start: bool
|
||||
) -> None:
|
||||
def handle_tag(self, tag: str, attrs: Dict[str, Optional[str]], start: bool) -> None:
|
||||
self.current_tag = tag
|
||||
|
||||
if self.tag_callback is not None:
|
||||
@@ -333,9 +329,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
tag_style = element_style(attrs, self.style_def, parent_style)
|
||||
self.tag_stack.append((tag, attrs, tag_style))
|
||||
else:
|
||||
dummy, attrs, tag_style = (
|
||||
self.tag_stack.pop() if self.tag_stack else (None, {}, {})
|
||||
)
|
||||
dummy, attrs, tag_style = self.tag_stack.pop() if self.tag_stack else (None, {}, {})
|
||||
if self.tag_stack:
|
||||
parent_style = self.tag_stack[-1][2]
|
||||
|
||||
@@ -385,11 +379,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
):
|
||||
self.o("`") # NOTE: same as <code>
|
||||
self.span_highlight = True
|
||||
elif (
|
||||
self.current_class == "lead"
|
||||
and not self.inheader
|
||||
and not self.span_highlight
|
||||
):
|
||||
elif self.current_class == "lead" and not self.inheader and not self.span_highlight:
|
||||
# self.o("==") # NOTE: CriticMarkup {==
|
||||
self.span_lead = True
|
||||
else:
|
||||
@@ -479,11 +469,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
and not self.span_lead
|
||||
and not self.span_highlight
|
||||
):
|
||||
if (
|
||||
start
|
||||
and self.preceding_data
|
||||
and self.preceding_data[-1] == self.strong_mark[0]
|
||||
):
|
||||
if start and self.preceding_data and self.preceding_data[-1] == self.strong_mark[0]:
|
||||
strong = " " + self.strong_mark
|
||||
self.preceding_data += " "
|
||||
else:
|
||||
@@ -548,13 +534,8 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
"href" in attrs
|
||||
and not attrs["href"].startswith("#_ftn")
|
||||
and attrs["href"] is not None
|
||||
and not (
|
||||
self.skip_internal_links and attrs["href"].startswith("#")
|
||||
)
|
||||
and not (
|
||||
self.ignore_mailto_links
|
||||
and attrs["href"].startswith("mailto:")
|
||||
)
|
||||
and not (self.skip_internal_links and attrs["href"].startswith("#"))
|
||||
and not (self.ignore_mailto_links and attrs["href"].startswith("mailto:"))
|
||||
):
|
||||
self.astack.append(attrs)
|
||||
self.maybe_automatic_link = attrs["href"]
|
||||
@@ -638,9 +619,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.o("![" + escape_md(alt) + "]")
|
||||
if self.inline_links:
|
||||
href = attrs.get("href") or ""
|
||||
self.o(
|
||||
"(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")"
|
||||
)
|
||||
self.o("(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")")
|
||||
else:
|
||||
i = self.previousIndex(attrs)
|
||||
if i is not None:
|
||||
@@ -696,9 +675,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
# WARNING: does not line up <ol><li>s > 9 correctly.
|
||||
parent_list = None
|
||||
for list in self.list:
|
||||
self.o(
|
||||
" " if parent_list == "ol" and list.name == "ul" else " "
|
||||
)
|
||||
self.o(" " if parent_list == "ol" and list.name == "ul" else " ")
|
||||
parent_list = list.name
|
||||
|
||||
if li.name == "ul":
|
||||
@@ -787,9 +764,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.pbr()
|
||||
self.br_toggle = " "
|
||||
|
||||
def o(
|
||||
self, data: str, puredata: bool = False, force: Union[bool, str] = False
|
||||
) -> None:
|
||||
def o(self, data: str, puredata: bool = False, force: Union[bool, str] = False) -> None:
|
||||
"""
|
||||
Deal with indentation and whitespace
|
||||
"""
|
||||
@@ -864,9 +839,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.out(" ")
|
||||
self.space = False
|
||||
|
||||
if self.a and (
|
||||
(self.p_p == 2 and self.links_each_paragraph) or force == "end"
|
||||
):
|
||||
if self.a and ((self.p_p == 2 and self.links_each_paragraph) or force == "end"):
|
||||
if force == "end":
|
||||
self.out("\n")
|
||||
|
||||
@@ -925,11 +898,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
|
||||
if self.maybe_automatic_link is not None:
|
||||
href = self.maybe_automatic_link
|
||||
if (
|
||||
href == data
|
||||
and self.absolute_url_matcher.match(href)
|
||||
and self.use_automatic_links
|
||||
):
|
||||
if href == data and self.absolute_url_matcher.match(href) and self.use_automatic_links:
|
||||
self.o("<" + data + ">")
|
||||
self.empty_link = False
|
||||
return
|
||||
@@ -1000,9 +969,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
self.inline_links = False
|
||||
for para in text.split("\n"):
|
||||
if len(para) > 0:
|
||||
if not skipwrap(
|
||||
para, self.wrap_links, self.wrap_list_items, self.wrap_tables
|
||||
):
|
||||
if not skipwrap(para, self.wrap_links, self.wrap_list_items, self.wrap_tables):
|
||||
indent = ""
|
||||
if para.startswith(" " + self.ul_item_mark):
|
||||
# list item continuation: add a double indent to the
|
||||
@@ -1043,9 +1010,7 @@ class HTML2Text(html.parser.HTMLParser):
|
||||
return result
|
||||
|
||||
|
||||
def html2text(
|
||||
html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH
|
||||
) -> str:
|
||||
def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = config.BODY_WIDTH) -> str:
|
||||
h = html.strip() or ""
|
||||
if h:
|
||||
h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
|
||||
|
@@ -117,10 +117,7 @@ def main() -> None:
|
||||
dest="images_with_size",
|
||||
action="store_true",
|
||||
default=config.IMAGES_WITH_SIZE,
|
||||
help=(
|
||||
"Write image tags with height and width attrs as raw html to retain "
|
||||
"dimensions"
|
||||
),
|
||||
help=("Write image tags with height and width attrs as raw html to retain " "dimensions"),
|
||||
)
|
||||
p.add_argument(
|
||||
"-g",
|
||||
@@ -260,9 +257,7 @@ def main() -> None:
|
||||
default=config.CLOSE_QUOTE,
|
||||
help="The character used to close quotes",
|
||||
)
|
||||
p.add_argument(
|
||||
"--version", action="version", version=".".join(map(str, __version__))
|
||||
)
|
||||
p.add_argument("--version", action="version", version=".".join(map(str, __version__)))
|
||||
p.add_argument("filename", nargs="?")
|
||||
p.add_argument("encoding", nargs="?", default="utf-8")
|
||||
args = p.parse_args()
|
||||
|
@@ -4,9 +4,7 @@ from typing import Dict, List, Optional
|
||||
from . import config
|
||||
|
||||
unifiable_n = {
|
||||
html.entities.name2codepoint[k]: v
|
||||
for k, v in config.UNIFIABLE.items()
|
||||
if k != "nbsp"
|
||||
html.entities.name2codepoint[k]: v for k, v in config.UNIFIABLE.items() if k != "nbsp"
|
||||
}
|
||||
|
||||
|
||||
@@ -156,9 +154,7 @@ def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def skipwrap(
|
||||
para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
|
||||
) -> bool:
|
||||
def skipwrap(para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool) -> bool:
|
||||
# If it appears to contain a link
|
||||
# don't wrap
|
||||
if not wrap_links and config.RE_LINK.search(para):
|
||||
@@ -236,9 +232,7 @@ def reformat_table(lines: List[str], right_margin: int) -> List[str]:
|
||||
max_width += [len(x) + right_margin for x in cols[-(num_cols - max_cols) :]]
|
||||
max_cols = num_cols
|
||||
|
||||
max_width = [
|
||||
max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)
|
||||
]
|
||||
max_width = [max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width)]
|
||||
|
||||
# reformat
|
||||
new_lines = []
|
||||
@@ -247,15 +241,13 @@ def reformat_table(lines: List[str], right_margin: int) -> List[str]:
|
||||
if set(line.strip()) == set("-|"):
|
||||
filler = "-"
|
||||
new_cols = [
|
||||
x.rstrip() + (filler * (M - len(x.rstrip())))
|
||||
for x, M in zip(cols, max_width)
|
||||
x.rstrip() + (filler * (M - len(x.rstrip()))) for x, M in zip(cols, max_width)
|
||||
]
|
||||
new_lines.append("|-" + "|".join(new_cols) + "|")
|
||||
else:
|
||||
filler = " "
|
||||
new_cols = [
|
||||
x.rstrip() + (filler * (M - len(x.rstrip())))
|
||||
for x, M in zip(cols, max_width)
|
||||
x.rstrip() + (filler * (M - len(x.rstrip()))) for x, M in zip(cols, max_width)
|
||||
]
|
||||
new_lines.append("| " + "|".join(new_cols) + "|")
|
||||
return new_lines
|
||||
|
@@ -5,61 +5,48 @@ from dateutil.parser import parse as date_parse
|
||||
from base.orm import local_session
|
||||
from migration.html2text import html2text
|
||||
from orm.reaction import Reaction, ReactionKind
|
||||
from orm.shout import ShoutReactionsFollower
|
||||
from orm.shout import Shout, ShoutReactionsFollower
|
||||
from orm.topic import TopicFollower
|
||||
from orm.user import User
|
||||
from orm.shout import Shout
|
||||
|
||||
ts = datetime.now(tz=timezone.utc)
|
||||
|
||||
|
||||
def auto_followers(session, topics, reaction_dict):
|
||||
# creating shout's reactions following for reaction author
|
||||
following1 = session.query(
|
||||
ShoutReactionsFollower
|
||||
).where(
|
||||
ShoutReactionsFollower.follower == reaction_dict["createdBy"]
|
||||
).filter(
|
||||
ShoutReactionsFollower.shout == reaction_dict["shout"]
|
||||
).first()
|
||||
following1 = (
|
||||
session.query(ShoutReactionsFollower)
|
||||
.where(ShoutReactionsFollower.follower == reaction_dict["createdBy"])
|
||||
.filter(ShoutReactionsFollower.shout == reaction_dict["shout"])
|
||||
.first()
|
||||
)
|
||||
if not following1:
|
||||
following1 = ShoutReactionsFollower.create(
|
||||
follower=reaction_dict["createdBy"],
|
||||
shout=reaction_dict["shout"],
|
||||
auto=True
|
||||
follower=reaction_dict["createdBy"], shout=reaction_dict["shout"], auto=True
|
||||
)
|
||||
session.add(following1)
|
||||
# creating topics followings for reaction author
|
||||
for t in topics:
|
||||
tf = session.query(
|
||||
TopicFollower
|
||||
).where(
|
||||
TopicFollower.follower == reaction_dict["createdBy"]
|
||||
).filter(
|
||||
TopicFollower.topic == t['id']
|
||||
).first()
|
||||
tf = (
|
||||
session.query(TopicFollower)
|
||||
.where(TopicFollower.follower == reaction_dict["createdBy"])
|
||||
.filter(TopicFollower.topic == t['id'])
|
||||
.first()
|
||||
)
|
||||
if not tf:
|
||||
topic_following = TopicFollower.create(
|
||||
follower=reaction_dict["createdBy"],
|
||||
topic=t['id'],
|
||||
auto=True
|
||||
follower=reaction_dict["createdBy"], topic=t['id'], auto=True
|
||||
)
|
||||
session.add(topic_following)
|
||||
|
||||
|
||||
def migrate_ratings(session, entry, reaction_dict):
|
||||
for comment_rating_old in entry.get("ratings", []):
|
||||
rater = (
|
||||
session.query(User)
|
||||
.filter(User.oid == comment_rating_old["createdBy"])
|
||||
.first()
|
||||
)
|
||||
rater = session.query(User).filter(User.oid == comment_rating_old["createdBy"]).first()
|
||||
re_reaction_dict = {
|
||||
"shout": reaction_dict["shout"],
|
||||
"replyTo": reaction_dict["id"],
|
||||
"kind": ReactionKind.LIKE
|
||||
if comment_rating_old["value"] > 0
|
||||
else ReactionKind.DISLIKE,
|
||||
"kind": ReactionKind.LIKE if comment_rating_old["value"] > 0 else ReactionKind.DISLIKE,
|
||||
"createdBy": rater.id if rater else 1,
|
||||
}
|
||||
cts = comment_rating_old.get("createdAt")
|
||||
@@ -68,18 +55,15 @@ def migrate_ratings(session, entry, reaction_dict):
|
||||
try:
|
||||
# creating reaction from old rating
|
||||
rr = Reaction.create(**re_reaction_dict)
|
||||
following2 = session.query(
|
||||
ShoutReactionsFollower
|
||||
).where(
|
||||
ShoutReactionsFollower.follower == re_reaction_dict['createdBy']
|
||||
).filter(
|
||||
ShoutReactionsFollower.shout == rr.shout
|
||||
).first()
|
||||
following2 = (
|
||||
session.query(ShoutReactionsFollower)
|
||||
.where(ShoutReactionsFollower.follower == re_reaction_dict['createdBy'])
|
||||
.filter(ShoutReactionsFollower.shout == rr.shout)
|
||||
.first()
|
||||
)
|
||||
if not following2:
|
||||
following2 = ShoutReactionsFollower.create(
|
||||
follower=re_reaction_dict['createdBy'],
|
||||
shout=rr.shout,
|
||||
auto=True
|
||||
follower=re_reaction_dict['createdBy'], shout=rr.shout, auto=True
|
||||
)
|
||||
session.add(following2)
|
||||
session.add(rr)
|
||||
@@ -150,9 +134,7 @@ async def migrate(entry, storage):
|
||||
else:
|
||||
stage = "author and old id found"
|
||||
try:
|
||||
shout = session.query(
|
||||
Shout
|
||||
).where(Shout.slug == old_shout["slug"]).one()
|
||||
shout = session.query(Shout).where(Shout.slug == old_shout["slug"]).one()
|
||||
if shout:
|
||||
reaction_dict["shout"] = shout.id
|
||||
reaction_dict["createdBy"] = author.id if author else 1
|
||||
@@ -190,17 +172,20 @@ def migrate_2stage(old_comment, idmap):
|
||||
comment = session.query(Reaction).where(Reaction.id == new_id).first()
|
||||
try:
|
||||
if new_replyto_id:
|
||||
new_reply = session.query(Reaction).where(Reaction.id == new_replyto_id).first()
|
||||
new_reply = (
|
||||
session.query(Reaction).where(Reaction.id == new_replyto_id).first()
|
||||
)
|
||||
if not new_reply:
|
||||
print(new_replyto_id)
|
||||
raise Exception("cannot find reply by id!")
|
||||
comment.replyTo = new_reply.id
|
||||
session.add(comment)
|
||||
srf = session.query(ShoutReactionsFollower).where(
|
||||
ShoutReactionsFollower.shout == comment.shout
|
||||
).filter(
|
||||
ShoutReactionsFollower.follower == comment.createdBy
|
||||
).first()
|
||||
srf = (
|
||||
session.query(ShoutReactionsFollower)
|
||||
.where(ShoutReactionsFollower.shout == comment.shout)
|
||||
.filter(ShoutReactionsFollower.follower == comment.createdBy)
|
||||
.first()
|
||||
)
|
||||
if not srf:
|
||||
srf = ShoutReactionsFollower.create(
|
||||
shout=comment.shout, follower=comment.createdBy, auto=True
|
||||
|
@@ -1,16 +1,18 @@
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from dateutil.parser import parse as date_parse
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from transliterate import translit
|
||||
|
||||
from base.orm import local_session
|
||||
from migration.extract import extract_html, extract_media
|
||||
from orm.reaction import Reaction, ReactionKind
|
||||
from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
|
||||
from orm.shout import Shout, ShoutReactionsFollower, ShoutTopic
|
||||
from orm.topic import Topic, TopicFollower
|
||||
from orm.user import User
|
||||
from orm.topic import TopicFollower, Topic
|
||||
from services.stat.viewed import ViewedStorage
|
||||
import re
|
||||
|
||||
OLD_DATE = "2016-03-05 22:22:00.350000"
|
||||
ts = datetime.now(tz=timezone.utc)
|
||||
@@ -91,11 +93,12 @@ async def create_shout(shout_dict):
|
||||
s = Shout.create(**shout_dict)
|
||||
author = s.authors[0]
|
||||
with local_session() as session:
|
||||
srf = session.query(ShoutReactionsFollower).where(
|
||||
ShoutReactionsFollower.shout == s.id
|
||||
).filter(
|
||||
ShoutReactionsFollower.follower == author.id
|
||||
).first()
|
||||
srf = (
|
||||
session.query(ShoutReactionsFollower)
|
||||
.where(ShoutReactionsFollower.shout == s.id)
|
||||
.filter(ShoutReactionsFollower.follower == author.id)
|
||||
.first()
|
||||
)
|
||||
if not srf:
|
||||
srf = ShoutReactionsFollower.create(shout=s.id, follower=author.id, auto=True)
|
||||
session.add(srf)
|
||||
@@ -137,11 +140,14 @@ async def migrate(entry, storage):
|
||||
r = {
|
||||
"layout": type2layout[entry["type"]],
|
||||
"title": entry["title"],
|
||||
"authors": [author, ],
|
||||
"authors": [
|
||||
author,
|
||||
],
|
||||
"slug": get_shout_slug(entry),
|
||||
"cover": (
|
||||
"https://images.discours.io/unsafe/" +
|
||||
entry["thumborId"] if entry.get("thumborId") else entry.get("image", {}).get("url")
|
||||
"https://images.discours.io/unsafe/" + entry["thumborId"]
|
||||
if entry.get("thumborId")
|
||||
else entry.get("image", {}).get("url")
|
||||
),
|
||||
"visibility": "public" if entry.get("published") else "community",
|
||||
"publishedAt": date_parse(entry.get("publishedAt")) if entry.get("published") else None,
|
||||
@@ -150,7 +156,7 @@ async def migrate(entry, storage):
|
||||
"updatedAt": date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts,
|
||||
"createdBy": author.id,
|
||||
"topics": await add_topics_follower(entry, storage, author),
|
||||
"body": extract_html(entry, cleanup=True)
|
||||
"body": extract_html(entry, cleanup=True),
|
||||
}
|
||||
|
||||
# main topic patch
|
||||
@@ -184,7 +190,9 @@ async def migrate(entry, storage):
|
||||
|
||||
# udpate data
|
||||
shout_dict = shout.dict()
|
||||
shout_dict["authors"] = [author.dict(), ]
|
||||
shout_dict["authors"] = [
|
||||
author.dict(),
|
||||
]
|
||||
|
||||
# shout topics aftermath
|
||||
shout_dict["topics"] = await topics_aftermath(r, storage)
|
||||
@@ -193,7 +201,9 @@ async def migrate(entry, storage):
|
||||
await content_ratings_to_reactions(entry, shout_dict["slug"])
|
||||
|
||||
# shout views
|
||||
await ViewedStorage.increment(shout_dict["slug"], amount=entry.get("views", 1), viewer='old-discours')
|
||||
await ViewedStorage.increment(
|
||||
shout_dict["slug"], amount=entry.get("views", 1), viewer='old-discours'
|
||||
)
|
||||
# del shout_dict['ratings']
|
||||
|
||||
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
|
||||
@@ -205,7 +215,9 @@ async def add_topics_follower(entry, storage, user):
|
||||
topics = set([])
|
||||
category = entry.get("category")
|
||||
topics_by_oid = storage["topics"]["by_oid"]
|
||||
oids = [category, ] + entry.get("tags", [])
|
||||
oids = [
|
||||
category,
|
||||
] + entry.get("tags", [])
|
||||
for toid in oids:
|
||||
tslug = topics_by_oid.get(toid, {}).get("slug")
|
||||
if tslug:
|
||||
@@ -217,19 +229,14 @@ async def add_topics_follower(entry, storage, user):
|
||||
try:
|
||||
tpc = session.query(Topic).where(Topic.slug == tpcslug).first()
|
||||
if tpc:
|
||||
tf = session.query(
|
||||
TopicFollower
|
||||
).where(
|
||||
TopicFollower.follower == user.id
|
||||
).filter(
|
||||
TopicFollower.topic == tpc.id
|
||||
).first()
|
||||
tf = (
|
||||
session.query(TopicFollower)
|
||||
.where(TopicFollower.follower == user.id)
|
||||
.filter(TopicFollower.topic == tpc.id)
|
||||
.first()
|
||||
)
|
||||
if not tf:
|
||||
tf = TopicFollower.create(
|
||||
topic=tpc.id,
|
||||
follower=user.id,
|
||||
auto=True
|
||||
)
|
||||
tf = TopicFollower.create(topic=tpc.id, follower=user.id, auto=True)
|
||||
session.add(tf)
|
||||
session.commit()
|
||||
except IntegrityError:
|
||||
@@ -295,10 +302,7 @@ async def resolve_create_shout(shout_dict):
|
||||
for key in shout_dict:
|
||||
if key in s.__dict__:
|
||||
if s.__dict__[key] != shout_dict[key]:
|
||||
print(
|
||||
"[migration] shout already exists, but differs in %s"
|
||||
% key
|
||||
)
|
||||
print("[migration] shout already exists, but differs in %s" % key)
|
||||
bump = True
|
||||
else:
|
||||
print("[migration] shout already exists, but lacks %s" % key)
|
||||
@@ -344,9 +348,7 @@ async def topics_aftermath(entry, storage):
|
||||
)
|
||||
if not shout_topic_new:
|
||||
try:
|
||||
ShoutTopic.create(
|
||||
**{"shout": shout.id, "topic": new_topic.id}
|
||||
)
|
||||
ShoutTopic.create(**{"shout": shout.id, "topic": new_topic.id})
|
||||
except Exception:
|
||||
print("[migration] shout topic error: " + newslug)
|
||||
session.commit()
|
||||
@@ -363,9 +365,7 @@ async def content_ratings_to_reactions(entry, slug):
|
||||
with local_session() as session:
|
||||
for content_rating in entry.get("ratings", []):
|
||||
rater = (
|
||||
session.query(User)
|
||||
.filter(User.oid == content_rating["createdBy"])
|
||||
.first()
|
||||
session.query(User).filter(User.oid == content_rating["createdBy"]).first()
|
||||
) or User.default_user
|
||||
shout = session.query(Shout).where(Shout.slug == slug).first()
|
||||
cts = content_rating.get("createdAt")
|
||||
@@ -375,7 +375,7 @@ async def content_ratings_to_reactions(entry, slug):
|
||||
if content_rating["value"] > 0
|
||||
else ReactionKind.DISLIKE,
|
||||
"createdBy": rater.id,
|
||||
"shout": shout.id
|
||||
"shout": shout.id,
|
||||
}
|
||||
reaction = (
|
||||
session.query(Reaction)
|
||||
|
@@ -12,27 +12,19 @@ def migrate(entry, storage):
|
||||
print(shout_dict['body'])
|
||||
remark = {
|
||||
"shout": shout_dict['id'],
|
||||
"body": extract_md(
|
||||
html2text(entry['body']),
|
||||
shout_dict
|
||||
),
|
||||
"kind": ReactionKind.REMARK
|
||||
"body": extract_md(html2text(entry['body']), shout_dict),
|
||||
"kind": ReactionKind.REMARK,
|
||||
}
|
||||
|
||||
if entry.get('textBefore'):
|
||||
remark['range'] = str(
|
||||
shout_dict['body']
|
||||
.index(
|
||||
entry['textBefore'] or ''
|
||||
)
|
||||
) + ':' + str(
|
||||
shout_dict['body']
|
||||
.index(
|
||||
entry['textAfter'] or ''
|
||||
) + len(
|
||||
entry['textAfter'] or ''
|
||||
)
|
||||
remark['range'] = (
|
||||
str(shout_dict['body'].index(entry['textBefore'] or ''))
|
||||
+ ':'
|
||||
+ str(
|
||||
shout_dict['body'].index(entry['textAfter'] or '')
|
||||
+ len(entry['textAfter'] or '')
|
||||
)
|
||||
)
|
||||
|
||||
with local_session() as session:
|
||||
rmrk = Reaction.create(**remark)
|
||||
|
@@ -10,7 +10,7 @@ def migrate(entry):
|
||||
"slug": entry["slug"],
|
||||
"oid": entry["_id"],
|
||||
"title": entry["title"].replace(" ", " "),
|
||||
"body": extract_md(html2text(body_orig))
|
||||
"body": extract_md(html2text(body_orig)),
|
||||
}
|
||||
|
||||
with local_session() as session:
|
||||
|
@@ -23,7 +23,7 @@ def migrate(entry):
|
||||
"muted": False, # amnesty
|
||||
"links": [],
|
||||
"name": "anonymous",
|
||||
"password": entry["services"]["password"].get("bcrypt")
|
||||
"password": entry["services"]["password"].get("bcrypt"),
|
||||
}
|
||||
|
||||
if "updatedAt" in entry:
|
||||
@@ -35,7 +35,11 @@ def migrate(entry):
|
||||
slug = entry["profile"].get("path").lower()
|
||||
slug = re.sub('[^0-9a-zA-Z]+', '-', slug).strip()
|
||||
user_dict["slug"] = slug
|
||||
bio = (entry.get("profile", {"bio": ""}).get("bio") or "").replace('\(', '(').replace('\)', ')')
|
||||
bio = (
|
||||
(entry.get("profile", {"bio": ""}).get("bio") or "")
|
||||
.replace('\(', '(')
|
||||
.replace('\)', ')')
|
||||
)
|
||||
bio_text = BeautifulSoup(bio, features="lxml").text
|
||||
|
||||
if len(bio_text) > 120:
|
||||
@@ -46,8 +50,7 @@ def migrate(entry):
|
||||
# userpic
|
||||
try:
|
||||
user_dict["userpic"] = (
|
||||
"https://images.discours.io/unsafe/"
|
||||
+ entry["profile"]["thumborId"]
|
||||
"https://images.discours.io/unsafe/" + entry["profile"]["thumborId"]
|
||||
)
|
||||
except KeyError:
|
||||
try:
|
||||
@@ -62,11 +65,7 @@ def migrate(entry):
|
||||
name = (name + " " + ln) if ln else name
|
||||
if not name:
|
||||
name = slug if slug else "anonymous"
|
||||
name = (
|
||||
entry["profile"]["path"].lower().strip().replace(" ", "-")
|
||||
if len(name) < 2
|
||||
else name
|
||||
)
|
||||
name = entry["profile"]["path"].lower().strip().replace(" ", "-") if len(name) < 2 else name
|
||||
user_dict["name"] = name
|
||||
|
||||
# links
|
||||
@@ -95,9 +94,7 @@ def migrate(entry):
|
||||
except IntegrityError:
|
||||
print("[migration] cannot create user " + user_dict["slug"])
|
||||
with local_session() as session:
|
||||
old_user = (
|
||||
session.query(User).filter(User.slug == user_dict["slug"]).first()
|
||||
)
|
||||
old_user = session.query(User).filter(User.slug == user_dict["slug"]).first()
|
||||
old_user.oid = oid
|
||||
old_user.password = user_dict["password"]
|
||||
session.commit()
|
||||
@@ -114,7 +111,7 @@ def post_migrate():
|
||||
"slug": "old-discours",
|
||||
"username": "old-discours",
|
||||
"email": "old@discours.io",
|
||||
"name": "Просмотры на старой версии сайта"
|
||||
"name": "Просмотры на старой версии сайта",
|
||||
}
|
||||
|
||||
with local_session() as session:
|
||||
@@ -148,11 +145,7 @@ def migrate_2stage(entry, id_map):
|
||||
|
||||
user_rating = UserRating.create(**user_rating_dict)
|
||||
if user_rating_dict['value'] > 0:
|
||||
af = AuthorFollower.create(
|
||||
author=user.id,
|
||||
follower=rater.id,
|
||||
auto=True
|
||||
)
|
||||
af = AuthorFollower.create(author=user.id, follower=rater.id, auto=True)
|
||||
session.add(af)
|
||||
session.add(user_rating)
|
||||
session.commit()
|
||||
|
Reference in New Issue
Block a user