nochecks
This commit is contained in:
@@ -7,7 +7,6 @@ import sys
|
||||
from datetime import datetime
|
||||
|
||||
import bs4
|
||||
from base.redis import redis
|
||||
from migration.tables.comments import migrate as migrateComment
|
||||
from migration.tables.comments import migrate_2stage as migrateComment_2stage
|
||||
from migration.tables.content_items import get_shout_slug
|
||||
@@ -17,6 +16,7 @@ from migration.tables.users import migrate as migrateUser
|
||||
from migration.tables.users import migrate_2stage as migrateUser_2stage
|
||||
from orm.reaction import Reaction
|
||||
from settings import DB_URL
|
||||
from orm import init_tables
|
||||
|
||||
# from export import export_email_subscriptions
|
||||
from .export import export_mdx, export_slug
|
||||
@@ -84,6 +84,7 @@ async def shouts_handle(storage, args):
|
||||
discours_author = 0
|
||||
anonymous_author = 0
|
||||
pub_counter = 0
|
||||
ignored = 0
|
||||
topics_dataset_bodies = []
|
||||
topics_dataset_tlist = []
|
||||
for entry in storage["shouts"]["data"]:
|
||||
@@ -96,40 +97,44 @@ async def shouts_handle(storage, args):
|
||||
|
||||
# migrate
|
||||
shout = await migrateShout(entry, storage)
|
||||
storage["shouts"]["by_oid"][entry["_id"]] = shout
|
||||
storage["shouts"]["by_slug"][shout["slug"]] = shout
|
||||
# shouts.topics
|
||||
if not shout["topics"]:
|
||||
print("[migration] no topics!")
|
||||
if shout:
|
||||
storage["shouts"]["by_oid"][entry["_id"]] = shout
|
||||
storage["shouts"]["by_slug"][shout["slug"]] = shout
|
||||
# shouts.topics
|
||||
if not shout["topics"]:
|
||||
print("[migration] no topics!")
|
||||
|
||||
# with author
|
||||
author: str = shout["authors"][0].dict()
|
||||
if author["slug"] == "discours":
|
||||
discours_author += 1
|
||||
if author["slug"] == "anonymous":
|
||||
anonymous_author += 1
|
||||
# print('[migration] ' + shout['slug'] + ' with author ' + author)
|
||||
# with author
|
||||
author: str = shout["authors"][0].dict()
|
||||
if author["slug"] == "discours":
|
||||
discours_author += 1
|
||||
if author["slug"] == "anonymous":
|
||||
anonymous_author += 1
|
||||
# print('[migration] ' + shout['slug'] + ' with author ' + author)
|
||||
|
||||
if entry.get("published"):
|
||||
if "mdx" in args:
|
||||
export_mdx(shout)
|
||||
pub_counter += 1
|
||||
if entry.get("published"):
|
||||
if "mdx" in args:
|
||||
export_mdx(shout)
|
||||
pub_counter += 1
|
||||
|
||||
# print main counter
|
||||
counter += 1
|
||||
line = str(counter + 1) + ": " + shout["slug"] + " @" + author["slug"]
|
||||
print(line)
|
||||
# print main counter
|
||||
counter += 1
|
||||
line = str(counter + 1) + ": " + shout["slug"] + " @" + author["slug"]
|
||||
print(line)
|
||||
|
||||
b = bs4.BeautifulSoup(shout["body"], "html.parser")
|
||||
texts = [shout["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
|
||||
texts = texts + b.findAll(text=True)
|
||||
topics_dataset_bodies.append(" ".join([x.strip().lower() for x in texts]))
|
||||
topics_dataset_tlist.append(shout["topics"])
|
||||
b = bs4.BeautifulSoup(shout["body"], "html.parser")
|
||||
texts = [shout["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
|
||||
texts = texts + b.findAll(text=True)
|
||||
topics_dataset_bodies.append(" ".join([x.strip().lower() for x in texts]))
|
||||
topics_dataset_tlist.append(shout["topics"])
|
||||
else:
|
||||
ignored += 1
|
||||
|
||||
# np.savetxt('topics_dataset.csv', (topics_dataset_bodies, topics_dataset_tlist), delimiter=',
|
||||
# ', fmt='%s')
|
||||
|
||||
print("[migration] " + str(counter) + " content items were migrated")
|
||||
print("[migration] " + str(ignored) + " content items were ignored")
|
||||
print("[migration] " + str(pub_counter) + " have been published")
|
||||
print("[migration] " + str(discours_author) + " authored by @discours")
|
||||
print("[migration] " + str(anonymous_author) + " authored by @anonymous")
|
||||
@@ -182,8 +187,6 @@ async def all_handle(storage, args):
|
||||
await users_handle(storage)
|
||||
await topics_handle(storage)
|
||||
print("[migration] users and topics are migrated")
|
||||
await redis.connect()
|
||||
print("[migration] redis connected")
|
||||
await shouts_handle(storage, args)
|
||||
print("[migration] migrating comments")
|
||||
await comments_handle(storage)
|
||||
@@ -314,6 +317,7 @@ async def main():
|
||||
cmd = sys.argv[1]
|
||||
if type(cmd) == str:
|
||||
print("[migration] command: " + cmd)
|
||||
init_tables()
|
||||
await handle_auto()
|
||||
else:
|
||||
print("[migration] usage: python server.py migrate")
|
||||
|
@@ -3,10 +3,8 @@ import json
|
||||
from dateutil.parser import parse as date_parse
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from transliterate import translit
|
||||
|
||||
from base.orm import local_session
|
||||
from migration.extract import prepare_html_body
|
||||
from orm.community import Community
|
||||
from orm.reaction import Reaction, ReactionKind
|
||||
from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower
|
||||
from orm.user import User
|
||||
@@ -103,12 +101,8 @@ async def migrate(entry, storage):
|
||||
r = {
|
||||
"layout": type2layout[entry["type"]],
|
||||
"title": entry["title"],
|
||||
"community": Community.default_community.id,
|
||||
"authors": [],
|
||||
"topics": set([]),
|
||||
# 'rating': 0,
|
||||
# 'ratings': [],
|
||||
"createdAt": [],
|
||||
"topics": set([])
|
||||
}
|
||||
topics_by_oid = storage["topics"]["by_oid"]
|
||||
users_by_oid = storage["users"]["by_oid"]
|
||||
@@ -177,20 +171,24 @@ async def migrate(entry, storage):
|
||||
# add author as TopicFollower
|
||||
with local_session() as session:
|
||||
for tpc in r['topics']:
|
||||
tf = session.query(
|
||||
TopicFollower
|
||||
).where(
|
||||
TopicFollower.follower == userslug
|
||||
).filter(
|
||||
TopicFollower.topic == tpc
|
||||
).first()
|
||||
if not tf:
|
||||
tf = TopicFollower.create(
|
||||
topic=tpc,
|
||||
follower=userslug,
|
||||
auto=True
|
||||
)
|
||||
session.add(tf)
|
||||
try:
|
||||
tf = session.query(
|
||||
TopicFollower
|
||||
).where(
|
||||
TopicFollower.follower == userslug
|
||||
).filter(
|
||||
TopicFollower.topic == tpc
|
||||
).first()
|
||||
if not tf:
|
||||
tf = TopicFollower.create(
|
||||
topic=tpc,
|
||||
follower=userslug,
|
||||
auto=True
|
||||
)
|
||||
session.add(tf)
|
||||
except IntegrityError:
|
||||
print('[migration.shout] skipped by topic ' + tpc)
|
||||
return
|
||||
|
||||
entry["topics"] = r["topics"]
|
||||
entry["cover"] = r["cover"]
|
||||
@@ -205,7 +203,6 @@ async def migrate(entry, storage):
|
||||
user = None
|
||||
del shout_dict["topics"]
|
||||
with local_session() as session:
|
||||
# c = session.query(Community).all().pop()
|
||||
if not user and userslug:
|
||||
user = session.query(User).filter(User.slug == userslug).first()
|
||||
if not user and userdata:
|
||||
|
@@ -200,7 +200,6 @@
|
||||
"ecology": "ecology",
|
||||
"economics": "economics",
|
||||
"eda": "food",
|
||||
"editing": "editing",
|
||||
"editorial-statements": "editorial-statements",
|
||||
"eduard-limonov": "eduard-limonov",
|
||||
"education": "education",
|
||||
@@ -597,7 +596,6 @@
|
||||
"r-b": "rnb",
|
||||
"rasizm": "racism",
|
||||
"realizm": "realism",
|
||||
"redaktura": "editorial",
|
||||
"refleksiya": "reflection",
|
||||
"reggi": "reggae",
|
||||
"religion": "religion",
|
||||
|
@@ -1,6 +1,6 @@
|
||||
from base.orm import local_session
|
||||
from migration.extract import extract_md, html2text
|
||||
from orm import Topic, Community
|
||||
from orm import Topic
|
||||
|
||||
|
||||
def migrate(entry):
|
||||
@@ -8,9 +8,7 @@ def migrate(entry):
|
||||
topic_dict = {
|
||||
"slug": entry["slug"],
|
||||
"oid": entry["_id"],
|
||||
"title": entry["title"].replace(" ", " "),
|
||||
"children": [],
|
||||
"community": Community.default_community.slug,
|
||||
"title": entry["title"].replace(" ", " ")
|
||||
}
|
||||
topic_dict["body"] = extract_md(html2text(body_orig), entry["_id"])
|
||||
with local_session() as session:
|
||||
|
@@ -36,6 +36,7 @@ def migrate(entry):
|
||||
)
|
||||
bio = BeautifulSoup(entry.get("profile").get("bio") or "", features="lxml").text
|
||||
if bio.startswith('<'):
|
||||
print('[migration] bio! ' + bio)
|
||||
bio = BeautifulSoup(bio, features="lxml").text
|
||||
bio = bio.replace('\(', '(').replace('\)', ')')
|
||||
|
||||
|
Reference in New Issue
Block a user