This commit is contained in:
2022-11-19 14:35:34 +03:00
parent 57e1460356
commit 47b285f8ac
18 changed files with 162 additions and 218 deletions

View File

@@ -7,7 +7,6 @@ import sys
from datetime import datetime
import bs4
from base.redis import redis
from migration.tables.comments import migrate as migrateComment
from migration.tables.comments import migrate_2stage as migrateComment_2stage
from migration.tables.content_items import get_shout_slug
@@ -17,6 +16,7 @@ from migration.tables.users import migrate as migrateUser
from migration.tables.users import migrate_2stage as migrateUser_2stage
from orm.reaction import Reaction
from settings import DB_URL
from orm import init_tables
# from export import export_email_subscriptions
from .export import export_mdx, export_slug
@@ -84,6 +84,7 @@ async def shouts_handle(storage, args):
discours_author = 0
anonymous_author = 0
pub_counter = 0
ignored = 0
topics_dataset_bodies = []
topics_dataset_tlist = []
for entry in storage["shouts"]["data"]:
@@ -96,40 +97,44 @@ async def shouts_handle(storage, args):
# migrate
shout = await migrateShout(entry, storage)
storage["shouts"]["by_oid"][entry["_id"]] = shout
storage["shouts"]["by_slug"][shout["slug"]] = shout
# shouts.topics
if not shout["topics"]:
print("[migration] no topics!")
if shout:
storage["shouts"]["by_oid"][entry["_id"]] = shout
storage["shouts"]["by_slug"][shout["slug"]] = shout
# shouts.topics
if not shout["topics"]:
print("[migration] no topics!")
# with author
author: str = shout["authors"][0].dict()
if author["slug"] == "discours":
discours_author += 1
if author["slug"] == "anonymous":
anonymous_author += 1
# print('[migration] ' + shout['slug'] + ' with author ' + author)
# with author
author: str = shout["authors"][0].dict()
if author["slug"] == "discours":
discours_author += 1
if author["slug"] == "anonymous":
anonymous_author += 1
# print('[migration] ' + shout['slug'] + ' with author ' + author)
if entry.get("published"):
if "mdx" in args:
export_mdx(shout)
pub_counter += 1
if entry.get("published"):
if "mdx" in args:
export_mdx(shout)
pub_counter += 1
# print main counter
counter += 1
line = str(counter + 1) + ": " + shout["slug"] + " @" + author["slug"]
print(line)
# print main counter
counter += 1
line = str(counter + 1) + ": " + shout["slug"] + " @" + author["slug"]
print(line)
b = bs4.BeautifulSoup(shout["body"], "html.parser")
texts = [shout["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
texts = texts + b.findAll(text=True)
topics_dataset_bodies.append(" ".join([x.strip().lower() for x in texts]))
topics_dataset_tlist.append(shout["topics"])
b = bs4.BeautifulSoup(shout["body"], "html.parser")
texts = [shout["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
texts = texts + b.findAll(text=True)
topics_dataset_bodies.append(" ".join([x.strip().lower() for x in texts]))
topics_dataset_tlist.append(shout["topics"])
else:
ignored += 1
# np.savetxt('topics_dataset.csv', (topics_dataset_bodies, topics_dataset_tlist), delimiter=',
# ', fmt='%s')
print("[migration] " + str(counter) + " content items were migrated")
print("[migration] " + str(ignored) + " content items were ignored")
print("[migration] " + str(pub_counter) + " have been published")
print("[migration] " + str(discours_author) + " authored by @discours")
print("[migration] " + str(anonymous_author) + " authored by @anonymous")
@@ -182,8 +187,6 @@ async def all_handle(storage, args):
await users_handle(storage)
await topics_handle(storage)
print("[migration] users and topics are migrated")
await redis.connect()
print("[migration] redis connected")
await shouts_handle(storage, args)
print("[migration] migrating comments")
await comments_handle(storage)
@@ -314,6 +317,7 @@ async def main():
cmd = sys.argv[1]
if type(cmd) == str:
print("[migration] command: " + cmd)
init_tables()
await handle_auto()
else:
print("[migration] usage: python server.py migrate")

View File

@@ -3,10 +3,8 @@ import json
from dateutil.parser import parse as date_parse
from sqlalchemy.exc import IntegrityError
from transliterate import translit
from base.orm import local_session
from migration.extract import prepare_html_body
from orm.community import Community
from orm.reaction import Reaction, ReactionKind
from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
from orm.user import User
@@ -103,12 +101,8 @@ async def migrate(entry, storage):
r = {
"layout": type2layout[entry["type"]],
"title": entry["title"],
"community": Community.default_community.id,
"authors": [],
"topics": set([]),
# 'rating': 0,
# 'ratings': [],
"createdAt": [],
"topics": set([])
}
topics_by_oid = storage["topics"]["by_oid"]
users_by_oid = storage["users"]["by_oid"]
@@ -177,20 +171,24 @@ async def migrate(entry, storage):
# add author as TopicFollower
with local_session() as session:
for tpc in r['topics']:
tf = session.query(
TopicFollower
).where(
TopicFollower.follower == userslug
).filter(
TopicFollower.topic == tpc
).first()
if not tf:
tf = TopicFollower.create(
topic=tpc,
follower=userslug,
auto=True
)
session.add(tf)
try:
tf = session.query(
TopicFollower
).where(
TopicFollower.follower == userslug
).filter(
TopicFollower.topic == tpc
).first()
if not tf:
tf = TopicFollower.create(
topic=tpc,
follower=userslug,
auto=True
)
session.add(tf)
except IntegrityError:
print('[migration.shout] skipped by topic ' + tpc)
return
entry["topics"] = r["topics"]
entry["cover"] = r["cover"]
@@ -205,7 +203,6 @@ async def migrate(entry, storage):
user = None
del shout_dict["topics"]
with local_session() as session:
# c = session.query(Community).all().pop()
if not user and userslug:
user = session.query(User).filter(User.slug == userslug).first()
if not user and userdata:

View File

@@ -200,7 +200,6 @@
"ecology": "ecology",
"economics": "economics",
"eda": "food",
"editing": "editing",
"editorial-statements": "editorial-statements",
"eduard-limonov": "eduard-limonov",
"education": "education",
@@ -597,7 +596,6 @@
"r-b": "rnb",
"rasizm": "racism",
"realizm": "realism",
"redaktura": "editorial",
"refleksiya": "reflection",
"reggi": "reggae",
"religion": "religion",

View File

@@ -1,6 +1,6 @@
from base.orm import local_session
from migration.extract import extract_md, html2text
from orm import Topic, Community
from orm import Topic
def migrate(entry):
@@ -8,9 +8,7 @@ def migrate(entry):
topic_dict = {
"slug": entry["slug"],
"oid": entry["_id"],
"title": entry["title"].replace(" ", " "),
"children": [],
"community": Community.default_community.slug,
"title": entry["title"].replace(" ", " ")
}
topic_dict["body"] = extract_md(html2text(body_orig), entry["_id"])
with local_session() as session:

View File

@@ -36,6 +36,7 @@ def migrate(entry):
)
bio = BeautifulSoup(entry.get("profile").get("bio") or "", features="lxml").text
if bio.startswith('<'):
print('[migration] bio! ' + bio)
bio = BeautifulSoup(bio, features="lxml").text
bio = bio.replace('\(', '(').replace('\)', ')')