2022-09-03 10:50:14 +00:00
|
|
|
|
""" cmd managed migration """
|
2023-10-26 21:07:35 +00:00
|
|
|
|
import asyncio
|
|
|
|
|
import gc
|
|
|
|
|
import json
|
|
|
|
|
import sys
|
2022-11-23 14:09:35 +00:00
|
|
|
|
from datetime import datetime, timezone
|
2023-10-26 21:07:35 +00:00
|
|
|
|
|
|
|
|
|
import bs4
|
|
|
|
|
|
2023-01-18 21:41:05 +00:00
|
|
|
|
from migration.export import export_mdx
|
2022-09-17 18:12:14 +00:00
|
|
|
|
from migration.tables.comments import migrate as migrateComment
|
|
|
|
|
from migration.tables.comments import migrate_2stage as migrateComment_2stage
|
2022-11-10 05:40:32 +00:00
|
|
|
|
from migration.tables.content_items import get_shout_slug
|
|
|
|
|
from migration.tables.content_items import migrate as migrateShout
|
2023-10-30 21:00:55 +00:00
|
|
|
|
|
|
|
|
|
# from migration.tables.remarks import migrate as migrateRemark
|
2022-09-17 18:12:14 +00:00
|
|
|
|
from migration.tables.topics import migrate as migrateTopic
|
2023-10-30 21:00:55 +00:00
|
|
|
|
from migration.tables.users import migrate as migrateUser
|
2022-09-17 18:12:14 +00:00
|
|
|
|
from migration.tables.users import migrate_2stage as migrateUser_2stage
|
2023-10-30 21:00:55 +00:00
|
|
|
|
from migration.tables.users import post_migrate as users_post_migrate
|
2022-11-19 11:35:34 +00:00
|
|
|
|
from orm import init_tables
|
2023-01-18 21:41:05 +00:00
|
|
|
|
from orm.reaction import Reaction
|
2022-08-11 09:59:35 +00:00
|
|
|
|
|
2022-11-23 14:09:35 +00:00
|
|
|
|
TODAY = datetime.strftime(datetime.now(tz=timezone.utc), "%Y%m%d")
|
2022-09-03 10:50:14 +00:00
|
|
|
|
OLD_DATE = "2016-03-05 22:22:00.350000"
|
2022-08-11 09:14:12 +00:00
|
|
|
|
|
|
|
|
|
|
2022-09-17 18:12:14 +00:00
|
|
|
|
async def users_handle(storage):
|
2022-09-03 10:50:14 +00:00
|
|
|
|
"""migrating users first"""
|
|
|
|
|
counter = 0
|
|
|
|
|
id_map = {}
|
|
|
|
|
print("[migration] migrating %d users" % (len(storage["users"]["data"])))
|
|
|
|
|
for entry in storage["users"]["data"]:
|
|
|
|
|
oid = entry["_id"]
|
|
|
|
|
user = migrateUser(entry)
|
|
|
|
|
storage["users"]["by_oid"][oid] = user # full
|
|
|
|
|
del user["password"]
|
|
|
|
|
del user["emailConfirmed"]
|
|
|
|
|
del user["username"]
|
|
|
|
|
del user["email"]
|
|
|
|
|
storage["users"]["by_slug"][user["slug"]] = user # public
|
|
|
|
|
id_map[user["oid"]] = user["slug"]
|
|
|
|
|
counter += 1
|
|
|
|
|
ce = 0
|
|
|
|
|
for entry in storage["users"]["data"]:
|
|
|
|
|
ce += migrateUser_2stage(entry, id_map)
|
2023-01-18 12:43:56 +00:00
|
|
|
|
users_post_migrate()
|
2022-08-11 09:14:12 +00:00
|
|
|
|
|
|
|
|
|
|
2022-09-17 18:12:14 +00:00
|
|
|
|
async def topics_handle(storage):
|
2022-09-03 10:50:14 +00:00
|
|
|
|
"""topics from categories and tags"""
|
|
|
|
|
counter = 0
|
|
|
|
|
for t in storage["topics"]["tags"] + storage["topics"]["cats"]:
|
|
|
|
|
if t["slug"] in storage["replacements"]:
|
|
|
|
|
t["slug"] = storage["replacements"][t["slug"]]
|
|
|
|
|
topic = migrateTopic(t)
|
|
|
|
|
storage["topics"]["by_oid"][t["_id"]] = topic
|
|
|
|
|
storage["topics"]["by_slug"][t["slug"]] = topic
|
|
|
|
|
counter += 1
|
|
|
|
|
else:
|
|
|
|
|
print("[migration] topic " + t["slug"] + " ignored")
|
|
|
|
|
for oldslug, newslug in storage["replacements"].items():
|
|
|
|
|
if oldslug != newslug and oldslug in storage["topics"]["by_slug"]:
|
|
|
|
|
oid = storage["topics"]["by_slug"][oldslug]["_id"]
|
|
|
|
|
del storage["topics"]["by_slug"][oldslug]
|
|
|
|
|
storage["topics"]["by_oid"][oid] = storage["topics"]["by_slug"][newslug]
|
|
|
|
|
print("[migration] " + str(counter) + " topics migrated")
|
2023-10-30 21:00:55 +00:00
|
|
|
|
print("[migration] " + str(len(storage["topics"]["by_oid"].values())) + " topics by oid")
|
|
|
|
|
print("[migration] " + str(len(storage["topics"]["by_slug"].values())) + " topics by slug")
|
2022-08-11 09:14:12 +00:00
|
|
|
|
|
|
|
|
|
|
2022-08-18 06:12:46 +00:00
|
|
|
|
async def shouts_handle(storage, args):
|
2022-09-03 10:50:14 +00:00
|
|
|
|
"""migrating content items one by one"""
|
|
|
|
|
counter = 0
|
|
|
|
|
discours_author = 0
|
2022-09-19 13:50:43 +00:00
|
|
|
|
anonymous_author = 0
|
2022-09-03 10:50:14 +00:00
|
|
|
|
pub_counter = 0
|
2022-11-19 11:35:34 +00:00
|
|
|
|
ignored = 0
|
2022-09-03 10:50:14 +00:00
|
|
|
|
topics_dataset_bodies = []
|
|
|
|
|
topics_dataset_tlist = []
|
|
|
|
|
for entry in storage["shouts"]["data"]:
|
2022-12-14 06:49:49 +00:00
|
|
|
|
gc.collect()
|
2022-09-03 10:50:14 +00:00
|
|
|
|
# slug
|
|
|
|
|
slug = get_shout_slug(entry)
|
|
|
|
|
|
|
|
|
|
# single slug mode
|
|
|
|
|
if "-" in args and slug not in args:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# migrate
|
2022-11-30 19:47:34 +00:00
|
|
|
|
shout_dict = await migrateShout(entry, storage)
|
|
|
|
|
if shout_dict:
|
|
|
|
|
storage["shouts"]["by_oid"][entry["_id"]] = shout_dict
|
|
|
|
|
storage["shouts"]["by_slug"][shout_dict["slug"]] = shout_dict
|
2022-11-19 11:35:34 +00:00
|
|
|
|
# shouts.topics
|
2022-11-30 19:47:34 +00:00
|
|
|
|
if not shout_dict["topics"]:
|
2022-11-19 11:35:34 +00:00
|
|
|
|
print("[migration] no topics!")
|
|
|
|
|
|
|
|
|
|
# with author
|
2022-11-30 19:47:34 +00:00
|
|
|
|
author = shout_dict["authors"][0]
|
2022-11-19 11:35:34 +00:00
|
|
|
|
if author["slug"] == "discours":
|
|
|
|
|
discours_author += 1
|
|
|
|
|
if author["slug"] == "anonymous":
|
|
|
|
|
anonymous_author += 1
|
|
|
|
|
# print('[migration] ' + shout['slug'] + ' with author ' + author)
|
|
|
|
|
|
|
|
|
|
if entry.get("published"):
|
|
|
|
|
if "mdx" in args:
|
2022-11-30 19:47:34 +00:00
|
|
|
|
export_mdx(shout_dict)
|
2022-11-19 11:35:34 +00:00
|
|
|
|
pub_counter += 1
|
|
|
|
|
|
|
|
|
|
# print main counter
|
|
|
|
|
counter += 1
|
2023-10-30 21:00:55 +00:00
|
|
|
|
print(
|
|
|
|
|
"[migration] shouts_handle %d: %s @%s"
|
|
|
|
|
% ((counter + 1), shout_dict["slug"], author["slug"])
|
|
|
|
|
)
|
2022-11-19 11:35:34 +00:00
|
|
|
|
|
2022-11-30 19:47:34 +00:00
|
|
|
|
b = bs4.BeautifulSoup(shout_dict["body"], "html.parser")
|
|
|
|
|
texts = [shout_dict["title"].lower().replace(r"[^а-яА-Яa-zA-Z]", "")]
|
2022-11-19 11:35:34 +00:00
|
|
|
|
texts = texts + b.findAll(text=True)
|
|
|
|
|
topics_dataset_bodies.append(" ".join([x.strip().lower() for x in texts]))
|
2022-11-30 19:47:34 +00:00
|
|
|
|
topics_dataset_tlist.append(shout_dict["topics"])
|
2022-11-19 11:35:34 +00:00
|
|
|
|
else:
|
|
|
|
|
ignored += 1
|
2022-09-03 10:50:14 +00:00
|
|
|
|
|
2022-09-17 18:12:14 +00:00
|
|
|
|
# np.savetxt('topics_dataset.csv', (topics_dataset_bodies, topics_dataset_tlist), delimiter=',
|
|
|
|
|
# ', fmt='%s')
|
2022-09-03 10:50:14 +00:00
|
|
|
|
|
|
|
|
|
print("[migration] " + str(counter) + " content items were migrated")
|
|
|
|
|
print("[migration] " + str(pub_counter) + " have been published")
|
2023-01-25 15:00:39 +00:00
|
|
|
|
print("[migration] " + str(discours_author) + " authored by @discours")
|
2022-09-19 13:50:43 +00:00
|
|
|
|
print("[migration] " + str(anonymous_author) + " authored by @anonymous")
|
2022-08-11 09:14:12 +00:00
|
|
|
|
|
|
|
|
|
|
2023-10-30 21:00:55 +00:00
|
|
|
|
# async def remarks_handle(storage):
|
|
|
|
|
# print("[migration] comments")
|
|
|
|
|
# c = 0
|
|
|
|
|
# for entry_remark in storage["remarks"]["data"]:
|
|
|
|
|
# remark = await migrateRemark(entry_remark, storage)
|
|
|
|
|
# c += 1
|
|
|
|
|
# print("[migration] " + str(c) + " remarks migrated")
|
2023-01-17 06:19:12 +00:00
|
|
|
|
|
|
|
|
|
|
2022-08-18 06:12:46 +00:00
|
|
|
|
async def comments_handle(storage):
|
2022-11-29 11:51:06 +00:00
|
|
|
|
print("[migration] comments")
|
2022-09-03 10:50:14 +00:00
|
|
|
|
id_map = {}
|
|
|
|
|
ignored_counter = 0
|
|
|
|
|
missed_shouts = {}
|
|
|
|
|
for oldcomment in storage["reactions"]["data"]:
|
|
|
|
|
if not oldcomment.get("deleted"):
|
|
|
|
|
reaction = await migrateComment(oldcomment, storage)
|
2023-10-30 21:00:55 +00:00
|
|
|
|
if isinstance(reaction, str):
|
2022-09-03 10:50:14 +00:00
|
|
|
|
missed_shouts[reaction] = oldcomment
|
2023-10-30 21:00:55 +00:00
|
|
|
|
elif isinstance(reaction, Reaction):
|
2022-09-03 10:50:14 +00:00
|
|
|
|
reaction = reaction.dict()
|
2022-09-17 18:12:14 +00:00
|
|
|
|
rid = reaction["id"]
|
2022-09-03 10:50:14 +00:00
|
|
|
|
oid = reaction["oid"]
|
2022-09-17 18:12:14 +00:00
|
|
|
|
id_map[oid] = rid
|
2022-09-03 10:50:14 +00:00
|
|
|
|
else:
|
|
|
|
|
ignored_counter += 1
|
|
|
|
|
|
|
|
|
|
for reaction in storage["reactions"]["data"]:
|
|
|
|
|
migrateComment_2stage(reaction, id_map)
|
|
|
|
|
print("[migration] " + str(len(id_map)) + " comments migrated")
|
|
|
|
|
print("[migration] " + str(ignored_counter) + " comments ignored")
|
|
|
|
|
print("[migration] " + str(len(missed_shouts.keys())) + " commented shouts missed")
|
|
|
|
|
missed_counter = 0
|
|
|
|
|
for missed in missed_shouts.values():
|
|
|
|
|
missed_counter += len(missed)
|
|
|
|
|
print("[migration] " + str(missed_counter) + " comments dropped")
|
2022-08-11 09:14:12 +00:00
|
|
|
|
|
|
|
|
|
|
2022-08-18 06:12:46 +00:00
|
|
|
|
async def all_handle(storage, args):
|
2022-09-03 10:50:14 +00:00
|
|
|
|
print("[migration] handle everything")
|
2022-09-17 18:12:14 +00:00
|
|
|
|
await users_handle(storage)
|
|
|
|
|
await topics_handle(storage)
|
2022-11-16 09:23:32 +00:00
|
|
|
|
print("[migration] users and topics are migrated")
|
2022-09-03 10:50:14 +00:00
|
|
|
|
await shouts_handle(storage, args)
|
2023-01-18 12:43:56 +00:00
|
|
|
|
# print("[migration] remarks...")
|
|
|
|
|
# await remarks_handle(storage)
|
2022-11-16 09:23:32 +00:00
|
|
|
|
print("[migration] migrating comments")
|
2022-09-03 10:50:14 +00:00
|
|
|
|
await comments_handle(storage)
|
|
|
|
|
# export_email_subscriptions()
|
|
|
|
|
print("[migration] done!")
|
2022-08-11 09:14:12 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def data_load():
|
2022-09-03 10:50:14 +00:00
|
|
|
|
storage = {
|
|
|
|
|
"content_items": {
|
|
|
|
|
"by_oid": {},
|
|
|
|
|
"by_slug": {},
|
|
|
|
|
},
|
|
|
|
|
"shouts": {"by_oid": {}, "by_slug": {}, "data": []},
|
|
|
|
|
"reactions": {"by_oid": {}, "by_slug": {}, "by_content": {}, "data": []},
|
|
|
|
|
"topics": {
|
|
|
|
|
"by_oid": {},
|
|
|
|
|
"by_slug": {},
|
|
|
|
|
"cats": [],
|
|
|
|
|
"tags": [],
|
|
|
|
|
},
|
2023-01-17 09:11:18 +00:00
|
|
|
|
"remarks": {"data": []},
|
2022-09-03 10:50:14 +00:00
|
|
|
|
"users": {"by_oid": {}, "by_slug": {}, "data": []},
|
|
|
|
|
"replacements": json.loads(open("migration/tables/replacements.json").read()),
|
|
|
|
|
}
|
|
|
|
|
try:
|
|
|
|
|
users_data = json.loads(open("migration/data/users.json").read())
|
|
|
|
|
print("[migration.load] " + str(len(users_data)) + " users ")
|
|
|
|
|
tags_data = json.loads(open("migration/data/tags.json").read())
|
|
|
|
|
storage["topics"]["tags"] = tags_data
|
|
|
|
|
print("[migration.load] " + str(len(tags_data)) + " tags ")
|
2023-10-30 21:00:55 +00:00
|
|
|
|
cats_data = json.loads(open("migration/data/content_item_categories.json").read())
|
2022-09-03 10:50:14 +00:00
|
|
|
|
storage["topics"]["cats"] = cats_data
|
|
|
|
|
print("[migration.load] " + str(len(cats_data)) + " cats ")
|
|
|
|
|
comments_data = json.loads(open("migration/data/comments.json").read())
|
|
|
|
|
storage["reactions"]["data"] = comments_data
|
|
|
|
|
print("[migration.load] " + str(len(comments_data)) + " comments ")
|
|
|
|
|
content_data = json.loads(open("migration/data/content_items.json").read())
|
|
|
|
|
storage["shouts"]["data"] = content_data
|
|
|
|
|
print("[migration.load] " + str(len(content_data)) + " content items ")
|
2023-01-17 06:19:12 +00:00
|
|
|
|
|
|
|
|
|
remarks_data = json.loads(open("migration/data/remarks.json").read())
|
|
|
|
|
storage["remarks"]["data"] = remarks_data
|
|
|
|
|
print("[migration.load] " + str(len(remarks_data)) + " remarks data ")
|
|
|
|
|
|
2022-09-03 10:50:14 +00:00
|
|
|
|
# fill out storage
|
|
|
|
|
for x in users_data:
|
|
|
|
|
storage["users"]["by_oid"][x["_id"]] = x
|
|
|
|
|
# storage['users']['by_slug'][x['slug']] = x
|
|
|
|
|
# no user.slug yet
|
2023-10-30 21:00:55 +00:00
|
|
|
|
print("[migration.load] " + str(len(storage["users"]["by_oid"].keys())) + " users by oid")
|
2022-09-03 10:50:14 +00:00
|
|
|
|
for x in tags_data:
|
|
|
|
|
storage["topics"]["by_oid"][x["_id"]] = x
|
|
|
|
|
storage["topics"]["by_slug"][x["slug"]] = x
|
|
|
|
|
for x in cats_data:
|
|
|
|
|
storage["topics"]["by_oid"][x["_id"]] = x
|
|
|
|
|
storage["topics"]["by_slug"][x["slug"]] = x
|
|
|
|
|
print(
|
2023-10-30 21:00:55 +00:00
|
|
|
|
"[migration.load] " + str(len(storage["topics"]["by_slug"].keys())) + " topics by slug"
|
2022-09-03 10:50:14 +00:00
|
|
|
|
)
|
|
|
|
|
for item in content_data:
|
|
|
|
|
slug = get_shout_slug(item)
|
|
|
|
|
storage["content_items"]["by_slug"][slug] = item
|
|
|
|
|
storage["content_items"]["by_oid"][item["_id"]] = item
|
|
|
|
|
print("[migration.load] " + str(len(content_data)) + " content items")
|
|
|
|
|
for x in comments_data:
|
|
|
|
|
storage["reactions"]["by_oid"][x["_id"]] = x
|
|
|
|
|
cid = x["contentItem"]
|
|
|
|
|
storage["reactions"]["by_content"][cid] = x
|
|
|
|
|
ci = storage["content_items"]["by_oid"].get(cid, {})
|
|
|
|
|
if "slug" in ci:
|
|
|
|
|
storage["reactions"]["by_slug"][ci["slug"]] = x
|
|
|
|
|
print(
|
|
|
|
|
"[migration.load] "
|
|
|
|
|
+ str(len(storage["reactions"]["by_content"].keys()))
|
|
|
|
|
+ " with comments"
|
|
|
|
|
)
|
2022-09-17 18:12:14 +00:00
|
|
|
|
storage["users"]["data"] = users_data
|
|
|
|
|
storage["topics"]["tags"] = tags_data
|
|
|
|
|
storage["topics"]["cats"] = cats_data
|
|
|
|
|
storage["shouts"]["data"] = content_data
|
|
|
|
|
storage["reactions"]["data"] = comments_data
|
2022-09-03 10:50:14 +00:00
|
|
|
|
except Exception as e:
|
|
|
|
|
raise e
|
|
|
|
|
return storage
|
2022-08-11 09:14:12 +00:00
|
|
|
|
|
|
|
|
|
|
2022-12-25 05:45:13 +00:00
|
|
|
|
async def handling_migration():
|
2022-12-21 06:10:12 +00:00
|
|
|
|
init_tables()
|
2022-09-03 10:50:14 +00:00
|
|
|
|
await all_handle(data_load(), sys.argv)
|
2022-11-29 11:51:06 +00:00
|
|
|
|
|
|
|
|
|
|
2022-12-25 05:45:13 +00:00
|
|
|
|
def process():
|
2022-12-25 05:48:45 +00:00
|
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
|
loop.run_until_complete(handling_migration())
|
2022-12-25 05:45:13 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
process()
|