from base.orm import local_session from datetime import datetime, timezone from dateutil.parser import parse as date_parse from migration.extract import extract_html, extract_media from orm.reaction import Reaction, ReactionKind from orm.shout import Shout, ShoutReactionsFollower, ShoutTopic from orm.topic import Topic, TopicFollower from orm.user import User from services.stat.viewed import ViewedStorage from sqlalchemy.exc import IntegrityError from transliterate import translit import json import re OLD_DATE = "2016-03-05 22:22:00.350000" ts = datetime.now(tz=timezone.utc) type2layout = { "Article": "article", "Literature": "literature", "Music": "music", "Video": "video", "Image": "image", } anondict = {"slug": "anonymous", "id": 1, "name": "Аноним"} discours = {"slug": "discours", "id": 2, "name": "Дискурс"} def get_shout_slug(entry): slug = entry.get("slug", "") if not slug: for friend in entry.get("friendlySlugs", []): slug = friend.get("slug", "") if slug: break slug = re.sub("[^0-9a-zA-Z]+", "-", slug) return slug def create_author_from_app(app): user = None userdata = None # check if email is used if app["email"]: with local_session() as session: user = session.query(User).where(User.email == app["email"]).first() if not user: # print('[migration] app %r' % app) name = app.get("name") if name: slug = translit(name, "ru", reversed=True).lower() slug = re.sub("[^0-9a-zA-Z]+", "-", slug) print("[migration] created slug %s" % slug) # check if slug is used if slug: user = session.query(User).where(User.slug == slug).first() # get slug from email if user: slug = app["email"].split("@")[0] user = session.query(User).where(User.slug == slug).first() # one more try if user: slug += "-author" user = session.query(User).where(User.slug == slug).first() # create user with application data if not user: userdata = { "username": app["email"], "email": app["email"], "name": app.get("name", ""), "emailConfirmed": False, "slug": slug, "createdAt": ts, "lastSeen": ts, } # print('[migration] userdata %r' % userdata) user = User.create(**userdata) session.add(user) session.commit() userdata["id"] = user.id userdata = user.dict() return userdata else: raise Exception("app is not ok", app) async def create_shout(shout_dict): s = Shout.create(**shout_dict) author = s.authors[0] with local_session() as session: srf = ( session.query(ShoutReactionsFollower) .where(ShoutReactionsFollower.shout == s.id) .filter(ShoutReactionsFollower.follower == author.id) .first() ) if not srf: srf = ShoutReactionsFollower.create(shout=s.id, follower=author.id, auto=True) session.add(srf) session.commit() return s async def get_user(entry, storage): app = entry.get("application") userdata = None user_oid = None if app: userdata = create_author_from_app(app) else: user_oid = entry.get("createdBy") if user_oid == "0": userdata = discours elif user_oid: userdata = storage["users"]["by_oid"].get(user_oid) if not userdata: print("no userdata by oid, anonymous") userdata = anondict print(app) # cleanup slug if userdata: slug = userdata.get("slug", "") if slug: slug = re.sub("[^0-9a-zA-Z]+", "-", slug) userdata["slug"] = slug else: userdata = anondict user = await process_user(userdata, storage, user_oid) return user, user_oid async def migrate(entry, storage): author, user_oid = await get_user(entry, storage) r = { "layout": type2layout[entry["type"]], "title": entry["title"], "authors": [ author, ], "slug": get_shout_slug(entry), "cover": ( "https://images.discours.io/unsafe/" + entry["thumborId"] if entry.get("thumborId") else entry.get("image", {}).get("url") ), "visibility": "public" if entry.get("published") else "community", "publishedAt": date_parse(entry.get("publishedAt")) if entry.get("published") else None, "deletedAt": date_parse(entry.get("deletedAt")) if entry.get("deletedAt") else None, "createdAt": date_parse(entry.get("createdAt", OLD_DATE)), "updatedAt": date_parse(entry["updatedAt"]) if "updatedAt" in entry else ts, "createdBy": author.id, "topics": await add_topics_follower(entry, storage, author), "body": extract_html(entry, cleanup=True), } # main topic patch r["mainTopic"] = r["topics"][0] # published author auto-confirm if entry.get("published"): with local_session() as session: # update user.emailConfirmed if published author.emailConfirmed = True session.add(author) session.commit() # media media = extract_media(entry) r["media"] = json.dumps(media, ensure_ascii=True) if media else None # ----------------------------------- copy shout_dict = r.copy() del shout_dict["topics"] try: # save shout to db shout_dict["oid"] = entry.get("_id", "") shout = await create_shout(shout_dict) except IntegrityError as e: print("[migration] create_shout integrity error", e) shout = await resolve_create_shout(shout_dict) except Exception as e: raise Exception(e) # udpate data shout_dict = shout.dict() shout_dict["authors"] = [ author.dict(), ] # shout topics aftermath shout_dict["topics"] = await topics_aftermath(r, storage) # content_item ratings to reactions await content_ratings_to_reactions(entry, shout_dict["slug"]) # shout views await ViewedStorage.increment( shout_dict["slug"], amount=entry.get("views", 1), viewer="old-discours" ) # del shout_dict['ratings'] storage["shouts"]["by_oid"][entry["_id"]] = shout_dict storage["shouts"]["by_slug"][shout_dict["slug"]] = shout_dict return shout_dict async def add_topics_follower(entry, storage, user): topics = set([]) category = entry.get("category") topics_by_oid = storage["topics"]["by_oid"] oids = [ category, ] + entry.get("tags", []) for toid in oids: tslug = topics_by_oid.get(toid, {}).get("slug") if tslug: topics.add(tslug) ttt = list(topics) # add author as TopicFollower with local_session() as session: for tpcslug in topics: try: tpc = session.query(Topic).where(Topic.slug == tpcslug).first() if tpc: tf = ( session.query(TopicFollower) .where(TopicFollower.follower == user.id) .filter(TopicFollower.topic == tpc.id) .first() ) if not tf: tf = TopicFollower.create(topic=tpc.id, follower=user.id, auto=True) session.add(tf) session.commit() except IntegrityError: print("[migration.shout] hidden by topic " + tpc.slug) # main topic maintopic = storage["replacements"].get(topics_by_oid.get(category, {}).get("slug")) if maintopic in ttt: ttt.remove(maintopic) ttt.insert(0, maintopic) return ttt async def process_user(userdata, storage, oid): with local_session() as session: uid = userdata.get("id") # anonymous as if not uid: print(userdata) print("has no id field, set it @anonymous") userdata = anondict uid = 1 user = session.query(User).filter(User.id == uid).first() if not user: try: slug = userdata["slug"].lower().strip() slug = re.sub("[^0-9a-zA-Z]+", "-", slug) userdata["slug"] = slug user = User.create(**userdata) session.add(user) session.commit() except IntegrityError: print(f"[migration] user creating with slug {userdata['slug']}") print("[migration] from userdata") print(userdata) raise Exception("[migration] cannot create user in content_items.get_user()") if user.id == 946: print("[migration] ***************** ALPINA") if user.id == 2: print("[migration] +++++++++++++++++ DISCOURS") userdata["id"] = user.id userdata["createdAt"] = user.createdAt storage["users"]["by_slug"][userdata["slug"]] = userdata storage["users"]["by_oid"][oid] = userdata if not user: raise Exception("could not get a user") return user async def resolve_create_shout(shout_dict): with local_session() as session: s = session.query(Shout).filter(Shout.slug == shout_dict["slug"]).first() bump = False if s: if s.createdAt != shout_dict["createdAt"]: # create new with different slug shout_dict["slug"] += "-" + shout_dict["layout"] try: await create_shout(shout_dict) except IntegrityError as e: print(e) bump = True else: # update old for key in shout_dict: if key in s.__dict__: if s.__dict__[key] != shout_dict[key]: print("[migration] shout already exists, but differs in %s" % key) bump = True else: print("[migration] shout already exists, but lacks %s" % key) bump = True if bump: s.update(shout_dict) else: print("[migration] something went wrong with shout: \n%r" % shout_dict) raise Exception("") session.commit() return s async def topics_aftermath(entry, storage): r = [] for tpc in filter(lambda x: bool(x), entry["topics"]): oldslug = tpc newslug = storage["replacements"].get(oldslug, oldslug) if newslug: with local_session() as session: shout = session.query(Shout).where(Shout.slug == entry["slug"]).first() new_topic = session.query(Topic).where(Topic.slug == newslug).first() shout_topic_old = ( session.query(ShoutTopic) .join(Shout) .join(Topic) .filter(Shout.slug == entry["slug"]) .filter(Topic.slug == oldslug) .first() ) if shout_topic_old: shout_topic_old.update({"topic": new_topic.id}) else: shout_topic_new = ( session.query(ShoutTopic) .join(Shout) .join(Topic) .filter(Shout.slug == entry["slug"]) .filter(Topic.slug == newslug) .first() ) if not shout_topic_new: try: ShoutTopic.create(**{"shout": shout.id, "topic": new_topic.id}) except Exception: print("[migration] shout topic error: " + newslug) session.commit() if newslug not in r: r.append(newslug) else: print("[migration] ignored topic slug: \n%r" % tpc["slug"]) # raise Exception return r async def content_ratings_to_reactions(entry, slug): try: with local_session() as session: for content_rating in entry.get("ratings", []): rater = ( session.query(User).filter(User.oid == content_rating["createdBy"]).first() ) or User.default_user shout = session.query(Shout).where(Shout.slug == slug).first() cts = content_rating.get("createdAt") reaction_dict = { "createdAt": date_parse(cts) if cts else None, "kind": ReactionKind.LIKE if content_rating["value"] > 0 else ReactionKind.DISLIKE, "createdBy": rater.id, "shout": shout.id, } reaction = ( session.query(Reaction) .filter(Reaction.shout == reaction_dict["shout"]) .filter(Reaction.createdBy == reaction_dict["createdBy"]) .filter(Reaction.kind == reaction_dict["kind"]) .first() ) if reaction: k = ReactionKind.AGREE if content_rating["value"] > 0 else ReactionKind.DISAGREE reaction_dict["kind"] = k reaction.update(reaction_dict) session.add(reaction) else: rea = Reaction.create(**reaction_dict) session.add(rea) # shout_dict['ratings'].append(reaction_dict) session.commit() except Exception: print("[migration] content_item.ratings error: \n%r" % content_rating)