nochecks

2022-11-19 14:35:34 +03:00
parent 57e1460356
commit 47b285f8ac
18 changed files with 162 additions and 218 deletions
--- a/migration/init.py
+++ b/migration/init.py
@@ -7,7 +7,6 @@ import sys
 from datetime import datetime

 import bs4
-from base.redis import redis
 from migration.tables.comments import migrate as migrateComment
 from migration.tables.comments import migrate_2stage as migrateComment_2stage
 from migration.tables.content_items import get_shout_slug
@@ -17,6 +16,7 @@ from migration.tables.users import migrate as migrateUser
 from migration.tables.users import migrate_2stage as migrateUser_2stage
 from orm.reaction import Reaction
 from settings import DB_URL
+from orm import init_tables

 # from export import export_email_subscriptions
 from .export import export_mdx, export_slug
@@ -84,6 +84,7 @@ async def shouts_handle(storage, args):
    discours_author = 0
    anonymous_author = 0
    pub_counter = 0
+    ignored = 0
    topics_dataset_bodies = []
    topics_dataset_tlist = []
    for entry in storage["shouts"]["data"]:
@@ -96,40 +97,44 @@ async def shouts_handle(storage, args):

        # migrate
        shout = await migrateShout(entry, storage)
-        storage["shouts"]["by_oid"][entry["_id"]] = shout
-        storage["shouts"]["by_slug"][shout["slug"]] = shout
-        # shouts.topics
-        if not shout["topics"]:
-            print("[migration] no topics!")
+        if shout:
+            storage["shouts"]["by_oid"][entry["_id"]] = shout
+            storage["shouts"]["by_slug"][shout["slug"]] = shout
+            # shouts.topics
+            if not shout["topics"]:
+                print("[migration] no topics!")

-        # with author
-        author: str = shout["authors"][0].dict()
-        if author["slug"] == "discours":
-            discours_author += 1
-        if author["slug"] == "anonymous":
-            anonymous_author += 1
-        # print('[migration] ' + shout['slug'] + ' with author ' + author)
+            # with author
+            author: str = shout["authors"][0].dict()
+            if author["slug"] == "discours":
+                discours_author += 1
+            if author["slug"] == "anonymous":
+                anonymous_author += 1
+            # print('[migration] ' + shout['slug'] + ' with author ' + author)

-        if entry.get("published"):
-            if "mdx" in args:
-                export_mdx(shout)
-            pub_counter += 1
+            if entry.get("published"):
+                if "mdx" in args:
+                    export_mdx(shout)
+                pub_counter += 1

-        # print main counter
-        counter += 1
-        line = str(counter + 1) + ": " + shout["slug"] + " @" + author["slug"]
-        print(line)
+            # print main counter
+            counter += 1
+            line = str(counter + 1) + ": " + shout["slug"] + " @" + author["slug"]
+            print(line)

-        b = bs4.BeautifulSoup(shout["body"], "html.parser")
-        texts = [shout["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
-        texts = texts + b.findAll(text=True)
-        topics_dataset_bodies.append(" ".join([x.strip().lower() for x in texts]))
-        topics_dataset_tlist.append(shout["topics"])
+            b = bs4.BeautifulSoup(shout["body"], "html.parser")
+            texts = [shout["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
+            texts = texts + b.findAll(text=True)
+            topics_dataset_bodies.append(" ".join([x.strip().lower() for x in texts]))
+            topics_dataset_tlist.append(shout["topics"])
+        else:
+            ignored += 1

    # np.savetxt('topics_dataset.csv', (topics_dataset_bodies, topics_dataset_tlist), delimiter=',
    # ', fmt='%s')

    print("[migration] " + str(counter) + " content items were migrated")
+    print("[migration] " + str(ignored) + " content items were ignored")
    print("[migration] " + str(pub_counter) + " have been published")
    print("[migration] " + str(discours_author) + " authored by @discours")
    print("[migration] " + str(anonymous_author) + " authored by @anonymous")
@@ -182,8 +187,6 @@ async def all_handle(storage, args):
    await users_handle(storage)
    await topics_handle(storage)
    print("[migration] users and topics are migrated")
-    await redis.connect()
-    print("[migration] redis connected")
    await shouts_handle(storage, args)
    print("[migration] migrating comments")
    await comments_handle(storage)
@@ -314,6 +317,7 @@ async def main():
        cmd = sys.argv[1]
        if type(cmd) == str:
            print("[migration] command: " + cmd)
+        init_tables()
        await handle_auto()
    else:
        print("[migration] usage: python server.py migrate")
--- a/migration/tables/content_items.py
+++ b/migration/tables/content_items.py
@@ -3,10 +3,8 @@ import json
 from dateutil.parser import parse as date_parse
 from sqlalchemy.exc import IntegrityError
 from transliterate import translit
-
 from base.orm import local_session
 from migration.extract import prepare_html_body
-from orm.community import Community
 from orm.reaction import Reaction, ReactionKind
 from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
 from orm.user import User
@@ -103,12 +101,8 @@ async def migrate(entry, storage):
    r = {
        "layout": type2layout[entry["type"]],
        "title": entry["title"],
-        "community": Community.default_community.id,
        "authors": [],
-        "topics": set([]),
-        # 'rating': 0,
-        # 'ratings': [],
-        "createdAt": [],
+        "topics": set([])
    }
    topics_by_oid = storage["topics"]["by_oid"]
    users_by_oid = storage["users"]["by_oid"]
@@ -177,20 +171,24 @@ async def migrate(entry, storage):
    # add author as TopicFollower
    with local_session() as session:
        for tpc in r['topics']:
-            tf = session.query(
-                TopicFollower
-            ).where(
-                TopicFollower.follower == userslug
-            ).filter(
-                TopicFollower.topic == tpc
-            ).first()
-            if not tf:
-                tf = TopicFollower.create(
-                    topic=tpc,
-                    follower=userslug,
-                    auto=True
-                )
-                session.add(tf)
+            try:
+                tf = session.query(
+                    TopicFollower
+                ).where(
+                    TopicFollower.follower == userslug
+                ).filter(
+                    TopicFollower.topic == tpc
+                ).first()
+                if not tf:
+                    tf = TopicFollower.create(
+                        topic=tpc,
+                        follower=userslug,
+                        auto=True
+                    )
+                    session.add(tf)
+            except IntegrityError:
+                print('[migration.shout] skipped by topic ' + tpc)
+                return

    entry["topics"] = r["topics"]
    entry["cover"] = r["cover"]
@@ -205,7 +203,6 @@ async def migrate(entry, storage):
    user = None
    del shout_dict["topics"]
    with local_session() as session:
-        # c = session.query(Community).all().pop()
        if not user and userslug:
            user = session.query(User).filter(User.slug == userslug).first()
        if not user and userdata:
--- a/migration/tables/replacements.json
+++ b/migration/tables/replacements.json
@@ -200,7 +200,6 @@
    "ecology": "ecology",
    "economics": "economics",
    "eda": "food",
-    "editing": "editing",
    "editorial-statements": "editorial-statements",
    "eduard-limonov": "eduard-limonov",
    "education": "education",
@@ -597,7 +596,6 @@
    "r-b": "rnb",
    "rasizm": "racism",
    "realizm": "realism",
-    "redaktura": "editorial",
    "refleksiya": "reflection",
    "reggi": "reggae",
    "religion": "religion",
--- a/migration/tables/topics.py
+++ b/migration/tables/topics.py
@@ -1,6 +1,6 @@
 from base.orm import local_session
 from migration.extract import extract_md, html2text
-from orm import Topic, Community
+from orm import Topic


 def migrate(entry):
@@ -8,9 +8,7 @@ def migrate(entry):
    topic_dict = {
        "slug": entry["slug"],
        "oid": entry["_id"],
-        "title": entry["title"].replace("&nbsp;", " "),
-        "children": [],
-        "community": Community.default_community.slug,
+        "title": entry["title"].replace("&nbsp;", " ")
    }
    topic_dict["body"] = extract_md(html2text(body_orig), entry["_id"])
    with local_session() as session:
--- a/migration/tables/users.py
+++ b/migration/tables/users.py
@@ -36,6 +36,7 @@ def migrate(entry):
        )
        bio = BeautifulSoup(entry.get("profile").get("bio") or "", features="lxml").text
        if bio.startswith('<'):
+            print('[migration] bio! ' + bio)
            bio = BeautifulSoup(bio, features="lxml").text
        bio = bio.replace('\(', '(').replace('\)', ')')