From 152c3362a000cffce0de74bb05c9613ceb8d3758 Mon Sep 17 00:00:00 2001 From: tonyrewin Date: Sat, 26 Nov 2022 01:35:42 +0300 Subject: [PATCH 01/20] fixes-for-inbox-auth-and-startup-faster --- auth/authenticate.py | 42 ++++++++------- auth/credentials.py | 6 ++- auth/jwtcodec.py | 2 +- base/redis.py | 10 ++-- main.py | 4 ++ migration/tables/users.py | 2 +- resolvers/__init__.py | 5 +- resolvers/auth.py | 4 +- resolvers/inbox/chats.py | 57 ++++++--------------- resolvers/inbox/load.py | 99 +++++++++++++++++++----------------- resolvers/inbox/unread.py | 2 +- schema.graphql | 9 ++-- services/stat/topicstat.py | 18 +++---- services/zine/shoutauthor.py | 46 +++++++++++++++++ 14 files changed, 168 insertions(+), 138 deletions(-) create mode 100644 services/zine/shoutauthor.py diff --git a/auth/authenticate.py b/auth/authenticate.py index 95695604..242d2793 100644 --- a/auth/authenticate.py +++ b/auth/authenticate.py @@ -9,7 +9,7 @@ from auth.credentials import AuthCredentials, AuthUser from services.auth.users import UserStorage from settings import SESSION_TOKEN_HEADER from auth.tokenstorage import SessionToken -from base.exceptions import InvalidToken +from base.exceptions import InvalidToken, OperationNotAllowed, Unauthorized class JWTAuthenticate(AuthenticationBackend): @@ -30,27 +30,26 @@ class JWTAuthenticate(AuthenticationBackend): try: if len(token.split('.')) > 1: payload = await SessionToken.verify(token) + if payload is None: + return AuthCredentials(scopes=[]), AuthUser(user_id=None) + user = await UserStorage.get_user(payload.user_id) + if not user: + return AuthCredentials(scopes=[]), AuthUser(user_id=None) + scopes = await user.get_permission() + return ( + AuthCredentials( + user_id=payload.user_id, + scopes=scopes, + logged_in=True + ), + user, + ) else: InvalidToken("please try again") except Exception as exc: print("[auth.authenticate] session token verify error") print(exc) - return AuthCredentials(scopes=[], error_message=str(exc)), AuthUser( - user_id=None - ) - - if payload is None: - return AuthCredentials(scopes=[]), AuthUser(user_id=None) - - user = await UserStorage.get_user(payload.user_id) - if not user: - return AuthCredentials(scopes=[]), AuthUser(user_id=None) - - scopes = await user.get_permission() - return ( - AuthCredentials(user_id=payload.user_id, scopes=scopes, logged_in=True), - user, - ) + return AuthCredentials(scopes=[], error_message=str(exc)), AuthUser(user_id=None) def login_required(func): @@ -58,10 +57,9 @@ def login_required(func): async def wrap(parent, info: GraphQLResolveInfo, *args, **kwargs): # print('[auth.authenticate] login required for %r with info %r' % (func, info)) # debug only auth: AuthCredentials = info.context["request"].auth - if auth and auth.user_id: - print(auth) # debug only + # print(auth) if not auth.logged_in: - return {"error": auth.error_message or "Please login"} + raise OperationNotAllowed(auth.error_message or "Please login") return await func(parent, info, *args, **kwargs) return wrap @@ -73,9 +71,9 @@ def permission_required(resource, operation, func): print('[auth.authenticate] permission_required for %r with info %r' % (func, info)) # debug only auth: AuthCredentials = info.context["request"].auth if not auth.logged_in: - return {"error": auth.error_message or "Please login"} + raise Unauthorized(auth.error_message or "Please login") - # TODO: add check permission logix + # TODO: add actual check permission logix here return await func(parent, info, *args, **kwargs) diff --git a/auth/credentials.py b/auth/credentials.py index 401ae420..15738d16 100644 --- a/auth/credentials.py +++ b/auth/credentials.py @@ -2,7 +2,7 @@ from typing import List, Optional, Text from pydantic import BaseModel -from base.exceptions import OperationNotAllowed +from base.exceptions import Unauthorized class Permission(BaseModel): @@ -17,11 +17,13 @@ class AuthCredentials(BaseModel): @property def is_admin(self): + # TODO: check admin logix return True async def permissions(self) -> List[Permission]: if self.user_id is None: - raise OperationNotAllowed("Please login first") + raise Unauthorized("Please login first") + # TODO: implement permissions logix return NotImplemented() diff --git a/auth/jwtcodec.py b/auth/jwtcodec.py index c2feacd3..387df057 100644 --- a/auth/jwtcodec.py +++ b/auth/jwtcodec.py @@ -34,7 +34,7 @@ class JWTCodec: issuer="discours" ) r = TokenPayload(**payload) - print('[auth.jwtcodec] debug payload %r' % r) + # print('[auth.jwtcodec] debug payload %r' % r) return r except jwt.InvalidIssuedAtError: print('[auth.jwtcodec] invalid issued at: %r' % r) diff --git a/base/redis.py b/base/redis.py index 7468af0a..ccc3758a 100644 --- a/base/redis.py +++ b/base/redis.py @@ -12,6 +12,7 @@ class RedisCache: if self._instance is not None: return self._instance = await from_url(self._uri, encoding="utf-8") + # print(self._instance) async def disconnect(self): if self._instance is None: @@ -23,10 +24,11 @@ class RedisCache: async def execute(self, command, *args, **kwargs): while not self._instance: await sleep(1) - try: - await self._instance.execute_command(command, *args, **kwargs) - except Exception: - pass + try: + print("[redis] " + command + ' ' + ' '.join(args)) + return await self._instance.execute_command(command, *args, **kwargs) + except Exception: + pass async def lrange(self, key, start, stop): return await self._instance.lrange(key, start, stop) diff --git a/main.py b/main.py index 3e1deaae..49746195 100644 --- a/main.py +++ b/main.py @@ -20,6 +20,7 @@ from services.stat.reacted import ReactedStorage from services.stat.topicstat import TopicStat from services.stat.viewed import ViewedStorage from services.zine.gittask import GitTask +from services.zine.shoutauthor import ShoutAuthorStorage from settings import DEV_SERVER_STATUS_FILE_NAME import_module("resolvers") @@ -39,11 +40,14 @@ async def start_up(): print(views_stat_task) reacted_storage_task = asyncio.create_task(ReactedStorage.worker()) print(reacted_storage_task) + shout_author_task = asyncio.create_task(ShoutAuthorStorage.worker()) + print(shout_author_task) topic_stat_task = asyncio.create_task(TopicStat.worker()) print(topic_stat_task) git_task = asyncio.create_task(GitTask.git_task_worker()) print(git_task) + async def dev_start_up(): if exists(DEV_SERVER_STATUS_FILE_NAME): return diff --git a/migration/tables/users.py b/migration/tables/users.py index fe9b7374..6db7a243 100644 --- a/migration/tables/users.py +++ b/migration/tables/users.py @@ -17,7 +17,7 @@ def migrate(entry): "username": email, "email": email, "createdAt": parse(entry["createdAt"]), - "emailConfirmed": bool(entry["emails"][0]["verified"]), + "emailConfirmed": ("@discours.io" in email) or bool(entry["emails"][0]["verified"]), "muted": False, # amnesty "bio": entry["profile"].get("bio", ""), "notifications": [], diff --git a/resolvers/__init__.py b/resolvers/__init__.py index fccca3a0..da8800a2 100644 --- a/resolvers/__init__.py +++ b/resolvers/__init__.py @@ -48,8 +48,8 @@ from resolvers.zine.load import ( from resolvers.inbox.chats import ( create_chat, delete_chat, - update_chat, - invite_to_chat + update_chat + ) from resolvers.inbox.messages import ( create_message, @@ -111,7 +111,6 @@ __all__ = [ # inbox "load_chats", "load_messages_by", - "invite_to_chat", "create_chat", "delete_chat", "update_chat", diff --git a/resolvers/auth.py b/resolvers/auth.py index d6f1d40b..317d2733 100644 --- a/resolvers/auth.py +++ b/resolvers/auth.py @@ -13,7 +13,7 @@ from auth.identity import Identity, Password from auth.jwtcodec import JWTCodec from auth.tokenstorage import TokenStorage from base.exceptions import (BaseHttpException, InvalidPassword, InvalidToken, - ObjectNotExist, OperationNotAllowed) + ObjectNotExist, OperationNotAllowed, Unauthorized) from base.orm import local_session from base.resolvers import mutation, query from orm import Role, User @@ -37,7 +37,7 @@ async def get_current_user(_, info): "news": await user_subscriptions(user.slug), } else: - raise OperationNotAllowed("No session token present in request, try to login") + raise Unauthorized("No session token present in request, try to login") @mutation.field("confirmEmail") diff --git a/resolvers/inbox/chats.py b/resolvers/inbox/chats.py index f4b1ca1e..e24acf0c 100644 --- a/resolvers/inbox/chats.py +++ b/resolvers/inbox/chats.py @@ -7,43 +7,6 @@ from base.redis import redis from base.resolvers import mutation -async def add_user_to_chat(user_slug: str, chat_id: str, chat=None): - for member in chat["users"]: - chats_ids = await redis.execute("GET", f"chats_by_user/{member}") - if chats_ids: - chats_ids = list(json.loads(chats_ids)) - else: - chats_ids = [] - if chat_id not in chats_ids: - chats_ids.append(chat_id) - await redis.execute("SET", f"chats_by_user/{member}", json.dumps(chats_ids)) - - -@mutation.field("inviteChat") -async def invite_to_chat(_, info, invited: str, chat_id: str): - ''' invite user with :slug to chat with :chat_id ''' - user = info.context["request"].user - chat = await redis.execute("GET", f"chats/{chat_id}") - if not chat: - return { - "error": "chat not exist" - } - chat = dict(json.loads(chat)) - if not chat['private'] and user.slug not in chat['admins']: - return { - "error": "only admins can invite to private chat", - "chat": chat - } - else: - chat["users"].append(invited) - await add_user_to_chat(user.slug, chat_id, chat) - await redis.execute("SET", f"chats/{chat_id}", json.dumps(chat)) - return { - "error": None, - "chat": chat - } - - @mutation.field("updateChat") @login_required async def update_chat(_, info, chat_new: dict): @@ -71,9 +34,8 @@ async def update_chat(_, info, chat_new: dict): "admins": chat_new.get("admins", chat["admins"]), "users": chat_new.get("users", chat["users"]) }) - await add_user_to_chat(user.slug, chat_id, chat) await redis.execute("SET", f"chats/{chat.id}", json.dumps(chat)) - await redis.execute("SET", f"chats/{chat.id}/next_message_id", 0) + await redis.execute("COMMIT") return { "error": None, @@ -97,11 +59,22 @@ async def create_chat(_, info, title="", members=[]): "users": members, "admins": [user.slug, ] } + # double creation protection + cids = await redis.execute("SMEMBERS", f"chats_by_user/{user.slug}") + for cid in cids: + c = await redis.execute("GET", F"chats/{cid.decode('utf-8')}") + isc = [x for x in c["users"] if x not in chat["users"]] + if isc == [] and chat["title"] == c["title"]: + return { + "error": "chat was created before", + "chat": chat + } - await add_user_to_chat(user.slug, chat_id, chat) + for m in members: + await redis.execute("SADD", f"chats_by_user/{m}", chat_id) await redis.execute("SET", f"chats/{chat_id}", json.dumps(chat)) await redis.execute("SET", f"chats/{chat_id}/next_message_id", str(0)) - + await redis.execute("COMMIT") return { "error": None, "chat": chat @@ -117,6 +90,8 @@ async def delete_chat(_, info, chat_id: str): chat = dict(json.loads(chat)) if user.slug in chat['admins']: await redis.execute("DEL", f"chats/{chat_id}") + await redis.execute("SREM", "chats_by_user/" + user, chat_id) + await redis.execute("COMMIT") else: return { "error": "chat not exist" diff --git a/resolvers/inbox/load.py b/resolvers/inbox/load.py index cd38690b..871b0700 100644 --- a/resolvers/inbox/load.py +++ b/resolvers/inbox/load.py @@ -5,52 +5,51 @@ from auth.authenticate import login_required from base.redis import redis from base.orm import local_session from base.resolvers import query +from base.exceptions import ObjectNotExist from orm.user import User from resolvers.zine.profile import followed_authors from .unread import get_unread_counter -async def load_messages(chatId: str, limit: int, offset: int): - ''' load :limit messages for :chatId with :offset ''' +async def load_messages(chat_id: str, limit: int, offset: int): + ''' load :limit messages for :chat_id with :offset ''' messages = [] message_ids = await redis.lrange( - f"chats/{chatId}/message_ids", 0 - offset - limit, 0 - offset + f"chats/{chat_id}/message_ids", offset + limit, offset ) if message_ids: message_keys = [ - f"chats/{chatId}/messages/{mid}" for mid in message_ids + f"chats/{chat_id}/messages/{mid}" for mid in message_ids ] messages = await redis.mget(*message_keys) messages = [json.loads(msg) for msg in messages] - return { - "messages": messages, - "error": None - } + return messages @query.field("loadChats") @login_required -async def load_chats(_, info, limit: int, offset: int): +async def load_chats(_, info, limit: int = 50, offset: int = 0): """ load :limit chats of current user with :offset """ user = info.context["request"].user - if user: - chats = await redis.execute("GET", f"chats_by_user/{user.slug}") - if chats: - chats = list(json.loads(chats))[offset:offset + limit] - if not chats: - chats = [] - for c in chats: - c['messages'] = await load_messages(c['id'], limit, offset) - c['unread'] = await get_unread_counter(c['id'], user.slug) - return { - "chats": chats, - "error": None - } - else: - return { - "error": "please login", - "chats": [] - } + print('[inbox] load user\'s chats') + cids = await redis.execute("SMEMBERS", "chats_by_user/" + user.slug) + if cids: + cids = list(cids)[offset:offset + limit] + if not cids: + print('[inbox.load] no chats were found') + cids = [] + chats = [] + for cid in cids: + c = await redis.execute("GET", "chats/" + cid.decode("utf-8")) + if c: + c = json.loads(c) + c['messages'] = await load_messages(cid, 50, 0) + c['unread'] = await get_unread_counter(cid, user.slug) + chats.append(c) + return { + "chats": chats, + "error": None + } @query.field("loadMessagesBy") @@ -58,28 +57,36 @@ async def load_chats(_, info, limit: int, offset: int): async def load_messages_by(_, info, by, limit: int = 50, offset: int = 0): ''' load :amolimitunt messages of :chat_id with :offset ''' user = info.context["request"].user - my_chats = await redis.execute("GET", f"chats_by_user/{user.slug}") - chat_id = by.get('chat') - if chat_id: - chat = await redis.execute("GET", f"chats/{chat_id}") + cids = await redis.execute("SMEMBERS", "chats_by_user/" + user.slug) + by_chat = by.get('chat') + messages = [] + if by_chat: + chat = await redis.execute("GET", f"chats/{by_chat}") if not chat: - return { - "error": "chat not exist" - } - messages = await load_messages(chat_id, limit, offset) - user_id = by.get('author') - if user_id: - chats = await redis.execute("GET", f"chats_by_user/{user_id}") - our_chats = list(set(chats) & set(my_chats)) - for c in our_chats: - messages += await load_messages(c, limit, offset) + raise ObjectNotExist("Chat not exists") + messages = await load_messages(by_chat, limit, offset) + by_author = by.get('author') + if by_author: + if not by_chat: + # all author's messages + by_author_cids = await redis.execute("SMEMBERS", f"chats_by_user/{by_author}") + for c in list(by_author_cids & cids): + messages += await load_messages(c, limit, offset) + else: + # author's messages in chat + messages = filter(lambda m: m["author"] == by_author, messages) body_like = by.get('body') if body_like: - for c in my_chats: - mmm = await load_messages(c, limit, offset) - for m in mmm: - if body_like in m["body"]: - messages.append(m) + if not by_chat: + # search in all messages in all user's chats + for c in list(cids): + mmm = await load_messages(c, limit, offset) + for m in mmm: + if body_like in m["body"]: + messages.append(m) + else: + # search in chat's messages + messages = filter(lambda m: body_like in m["body"], messages) days = by.get("days") if days: messages = filter( diff --git a/resolvers/inbox/unread.py b/resolvers/inbox/unread.py index 0075a877..d5d2e553 100644 --- a/resolvers/inbox/unread.py +++ b/resolvers/inbox/unread.py @@ -17,6 +17,6 @@ async def get_total_unread_counter(user_slug: str): if chats: chats = json.loads(chats) for chat_id in chats: - n = await get_unread_counter(chat_id, user_slug) + n = await get_unread_counter(chat_id.decode('utf-8'), user_slug) unread += n return unread diff --git a/schema.graphql b/schema.graphql index e1d30873..e08342ec 100644 --- a/schema.graphql +++ b/schema.graphql @@ -151,7 +151,6 @@ type Mutation { createChat(title: String, members: [String]!): Result! updateChat(chat: ChatInput!): Result! deleteChat(chatId: String!): Result! - inviteChat(chatId: String!, userslug: String!): Result! createMessage(chat: String!, body: String!, replyTo: String): Result! updateMessage(chatId: String!, id: Int!, body: String!): Result! @@ -515,13 +514,13 @@ type Message { type Chat { id: String! createdAt: Int! - createdBy: User! + createdBy: String! updatedAt: Int! title: String description: String - users: [User]! - admins: [User] - messages: [Message]! + users: [String]! + admins: [String] + messages: [Message] unread: Int private: Boolean } diff --git a/services/stat/topicstat.py b/services/stat/topicstat.py index c95d0850..3a5689e6 100644 --- a/services/stat/topicstat.py +++ b/services/stat/topicstat.py @@ -3,7 +3,7 @@ import time from base.orm import local_session from orm.shout import Shout, ShoutTopic, ShoutAuthor from orm.topic import TopicFollower -from sqlalchemy.sql.expression import select +# from sqlalchemy.sql.expression import select class TopicStat: @@ -20,21 +20,19 @@ class TopicStat: start = time.time() self = TopicStat shout_topics = session.query(ShoutTopic, Shout).join(Shout).all() - all_shout_authors = session.query(ShoutAuthor).all() print("[stat.topics] %d links for shouts" % len(shout_topics)) for [shout_topic, shout] in shout_topics: tpc = shout_topic.topic - # shouts by topics - # shout = session.query(Shout).where(Shout.slug == shout_topic.shout).first() self.shouts_by_topic[tpc] = self.shouts_by_topic.get(tpc, dict()) self.shouts_by_topic[tpc][shout.slug] = shout - - # authors by topics - shout_authors = filter(lambda asa: asa.shout == shout.slug, all_shout_authors) - self.authors_by_topic[tpc] = self.authors_by_topic.get(tpc, dict()) - for sa in shout_authors: - self.authors_by_topic[tpc][sa.shout] = sa.caption + authors = session.query( + ShoutAuthor.user, ShoutAuthor.caption + ).filter( + ShoutAuthor.shout == shout.slug + ).all() + for a in authors: + self.authors_by_topic[tpc][a[0]] = a[1] self.followers_by_topic = {} followings = session.query(TopicFollower).all() diff --git a/services/zine/shoutauthor.py b/services/zine/shoutauthor.py new file mode 100644 index 00000000..477ff384 --- /dev/null +++ b/services/zine/shoutauthor.py @@ -0,0 +1,46 @@ +import asyncio +from base.orm import local_session +from orm.shout import ShoutAuthor, Shout + + +class ShoutAuthorStorage: + authors_by_shout = {} + lock = asyncio.Lock() + period = 30 * 60 # sec + + @staticmethod + async def load_captions(session): + self = ShoutAuthorStorage + sas = session.query(ShoutAuthor).join(Shout).all() + for sa in sas: + self.authors_by_shout[sa.shout] = self.authors_by_shout.get(sa.shout, []) + self.authors_by_shout[sa.shout].append([sa.user, sa.caption]) + print("[zine.authors] %d shouts indexed by authors" % len(self.authors_by_shout)) + + @staticmethod + async def get_authors(shout): + self = ShoutAuthorStorage + async with self.lock: + return self.authors_by_shout.get(shout, []) + + @staticmethod + async def get_author_caption(shout, author): + self = ShoutAuthorStorage + async with self.lock: + for a in self.authors_by_shout.get(shout, []): + if author in a: + return a[1] + return {"error": "author caption not found"} + + @staticmethod + async def worker(): + self = ShoutAuthorStorage + while True: + try: + with local_session() as session: + async with self.lock: + await self.load_captions(session) + print("[zine.authors] index by authors was updated") + except Exception as err: + print("[zine.authors] error indexing by author: %s" % (err)) + await asyncio.sleep(self.period) From dec9efc49bac8c9899dfa817503d1aefeadd596e Mon Sep 17 00:00:00 2001 From: tonyrewin Date: Sat, 26 Nov 2022 02:35:26 +0300 Subject: [PATCH 02/20] fix double prepend --- resolvers/inbox/chats.py | 14 ++++++++------ resolvers/zine/topics.py | 11 ++++++----- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/resolvers/inbox/chats.py b/resolvers/inbox/chats.py index e24acf0c..b11546d8 100644 --- a/resolvers/inbox/chats.py +++ b/resolvers/inbox/chats.py @@ -63,12 +63,14 @@ async def create_chat(_, info, title="", members=[]): cids = await redis.execute("SMEMBERS", f"chats_by_user/{user.slug}") for cid in cids: c = await redis.execute("GET", F"chats/{cid.decode('utf-8')}") - isc = [x for x in c["users"] if x not in chat["users"]] - if isc == [] and chat["title"] == c["title"]: - return { - "error": "chat was created before", - "chat": chat - } + if c: + c = json.loads(c) + isc = [x for x in c["users"] if x not in chat["users"]] + if isc == [] and chat["title"] == c["title"]: + return { + "error": "chat was created before", + "chat": chat + } for m in members: await redis.execute("SADD", f"chats_by_user/{m}", chat_id) diff --git a/resolvers/zine/topics.py b/resolvers/zine/topics.py index 81db4f91..6edb3c09 100644 --- a/resolvers/zine/topics.py +++ b/resolvers/zine/topics.py @@ -98,11 +98,12 @@ async def topic_follow(user, slug): async def topic_unfollow(user, slug): with local_session() as session: sub = ( - session.query(TopicFollower) - .filter( - and_(TopicFollower.follower == user.slug, TopicFollower.topic == slug) - ) - .first() + session.query(TopicFollower).filter( + and_( + TopicFollower.follower == user.slug, + TopicFollower.topic == slug + ) + ).first() ) if not sub: raise Exception("[resolvers.topics] follower not exist") From 839485873a99d5c71e7bb71f076c86b616da3ceb Mon Sep 17 00:00:00 2001 From: tonyrewin Date: Sat, 26 Nov 2022 02:49:02 +0300 Subject: [PATCH 03/20] frontend url setting --- auth/oauth.py | 4 ++-- resolvers/auth.py | 4 ++-- settings.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/auth/oauth.py b/auth/oauth.py index 88950c8d..54b5f11a 100644 --- a/auth/oauth.py +++ b/auth/oauth.py @@ -2,7 +2,7 @@ from authlib.integrations.starlette_client import OAuth from starlette.responses import RedirectResponse from auth.identity import Identity from auth.tokenstorage import TokenStorage -from settings import OAUTH_CLIENTS +from settings import OAUTH_CLIENTS, FRONTEND_URL oauth = OAuth() @@ -84,6 +84,6 @@ async def oauth_authorize(request): } user = Identity.oauth(user_input) session_token = await TokenStorage.create_session(user) - response = RedirectResponse(url="https://new.discours.io/confirm") + response = RedirectResponse(url=FRONTEND_URL + "/confirm") response.set_cookie("token", session_token) return response diff --git a/resolvers/auth.py b/resolvers/auth.py index 317d2733..00a5ea86 100644 --- a/resolvers/auth.py +++ b/resolvers/auth.py @@ -18,7 +18,7 @@ from base.orm import local_session from base.resolvers import mutation, query from orm import Role, User from resolvers.zine.profile import user_subscriptions -from settings import SESSION_TOKEN_HEADER +from settings import SESSION_TOKEN_HEADER, FRONTEND_URL @mutation.field("getSession") @@ -75,7 +75,7 @@ async def confirm_email_handler(request): if "error" in res: raise BaseHttpException(res['error']) else: - response = RedirectResponse(url="https://new.discours.io") + response = RedirectResponse(url=FRONTEND_URL) response.set_cookie("token", res["token"]) # session token return response diff --git a/settings.py b/settings.py index ccfdbed0..d6c60a4b 100644 --- a/settings.py +++ b/settings.py @@ -22,7 +22,7 @@ for provider in OAUTH_PROVIDERS: "id": environ.get(provider + "_OAUTH_ID"), "key": environ.get(provider + "_OAUTH_KEY"), } - +FRONTEND_URL = environ.get("FRONTEND_URL") or "http://localhost:3000" SHOUTS_REPO = "content" SESSION_TOKEN_HEADER = "Authorization" From 228cdf21e91253d9d58b35d963cb9c0b3fba811b Mon Sep 17 00:00:00 2001 From: tonyrewin Date: Sat, 26 Nov 2022 03:55:45 +0300 Subject: [PATCH 04/20] morning-fixes --- resolvers/zine/load.py | 100 ++++++++++++++++++-------------------- resolvers/zine/profile.py | 24 +++++++-- resolvers/zine/topics.py | 7 +-- services/search.py | 8 +-- services/stat/reacted.py | 15 +----- 5 files changed, 74 insertions(+), 80 deletions(-) diff --git a/resolvers/zine/load.py b/resolvers/zine/load.py index fe13d431..b883036e 100644 --- a/resolvers/zine/load.py +++ b/resolvers/zine/load.py @@ -5,23 +5,32 @@ from sqlalchemy.sql.expression import desc, asc, select, case from base.orm import local_session from base.resolvers import query from orm import ViewedEntry -from orm.shout import Shout, ShoutAuthor +from orm.shout import Shout from orm.reaction import Reaction, ReactionKind -from services.stat.reacted import ReactedStorage +from services.zine.shoutauthor import ShoutAuthorStorage +from services.stat.viewed import ViewedStorage -def add_rating_column(q): - return q.join(Reaction).add_columns(sa.func.sum(case( - (Reaction.kind == ReactionKind.AGREE, 1), - (Reaction.kind == ReactionKind.DISAGREE, -1), - (Reaction.kind == ReactionKind.PROOF, 1), - (Reaction.kind == ReactionKind.DISPROOF, -1), - (Reaction.kind == ReactionKind.ACCEPT, 1), - (Reaction.kind == ReactionKind.REJECT, -1), - (Reaction.kind == ReactionKind.LIKE, 1), - (Reaction.kind == ReactionKind.DISLIKE, -1), - else_=0 - )).label('rating')) +def calc_reactions(q): + return q.join(Reaction).add_columns( + sa.func.sum(case( + (Reaction.kind == ReactionKind.AGREE, 1), + (Reaction.kind == ReactionKind.DISAGREE, -1), + (Reaction.kind == ReactionKind.PROOF, 1), + (Reaction.kind == ReactionKind.DISPROOF, -1), + (Reaction.kind == ReactionKind.ACCEPT, 1), + (Reaction.kind == ReactionKind.REJECT, -1), + (Reaction.kind == ReactionKind.LIKE, 1), + (Reaction.kind == ReactionKind.DISLIKE, -1), + else_=0) + ).label('rating'), + sa.func.sum( + case( + (Reaction.body.is_not(None), 1), + else_=0 + ) + ).label('commented') + ) def apply_filters(q, filters, user=None): @@ -50,30 +59,30 @@ def apply_filters(q, filters, user=None): async def load_shout(_, info, slug): with local_session() as session: q = select(Shout).options( - # TODO add cation joinedload(Shout.authors), joinedload(Shout.topics), ) - q = add_rating_column(q) + q = calc_reactions(q) q = q.filter( Shout.slug == slug ).filter( Shout.deletedAt.is_(None) ).group_by(Shout.id) - [shout, rating] = session.execute(q).unique().one() - - shout.stat = await ReactedStorage.get_shout_stat(shout.slug, rating) + [shout, rating, commented] = session.execute(q).unique().one() + for a in shout.authors: + a.caption = await ShoutAuthorStorage.get_author_caption(a.slug) + viewed = await ViewedStorage.get_shout(shout.slug) + shout.stat = { + "rating": rating, + "viewed": viewed, + "commented": commented, + # "reacted": reacted + } return shout -def map_result_item(result_item): - shout = result_item[0] - shout.rating = result_item[1] - return shout - - @query.field("loadShouts") async def load_shouts_by(_, info, options): """ @@ -104,7 +113,7 @@ async def load_shouts_by(_, info, options): ) user = info.context["request"].user q = apply_filters(q, options.get("filters"), user) - q = add_rating_column(q) + q = calc_reactions(q) o = options.get("order_by") if o: @@ -127,37 +136,22 @@ async def load_shouts_by(_, info, options): order_by_desc = True if options.get('order_by_desc') is None else options.get('order_by_desc') - with_author_captions = False if options.get('with_author_captions') is None else options.get('with_author_captions') - query_order_by = desc(order_by) if order_by_desc else asc(order_by) offset = options.get("offset", 0) limit = options.get("limit", 10) q = q.group_by(Shout.id).order_by(query_order_by).limit(limit).offset(offset) + shouts = [] with local_session() as session: - shouts = list(map(map_result_item, session.execute(q).unique())) - - for shout in shouts: - shout.stat = await ReactedStorage.get_shout_stat(shout.slug, shout.rating) - del shout.rating - - author_captions = {} - - if with_author_captions: - author_captions_result = session.query(ShoutAuthor).where( - ShoutAuthor.shout.in_(map(lambda s: s.slug, shouts))).all() - - for author_captions_result_item in author_captions_result: - if author_captions.get(author_captions_result_item.shout) is None: - author_captions[author_captions_result_item.shout] = {} - - author_captions[ - author_captions_result_item.shout - ][ - author_captions_result_item.user - ] = author_captions_result_item.caption - - for author in shout.authors: - author.caption = author_captions[shout.slug][author.slug] - + for [shout, rating, commented] in session.execute(q).unique(): + shout.stat = { + "rating": rating, + "viewed": await ViewedStorage.get_shout(shout.slug), + "commented": commented, + # "reacted": reacted + } + # NOTE: no need authors captions in arrays + # for author in shout.authors: + # author.caption = await ShoutAuthorStorage.get_author_caption(shout.slug, author.slug) + shouts.append(shout) return shouts diff --git a/resolvers/zine/profile.py b/resolvers/zine/profile.py index d0e9d3af..9768a443 100644 --- a/resolvers/zine/profile.py +++ b/resolvers/zine/profile.py @@ -10,7 +10,6 @@ from orm.reaction import Reaction from orm.shout import ShoutAuthor from orm.topic import Topic, TopicFollower from orm.user import AuthorFollower, Role, User, UserRating, UserRole -from services.stat.reacted import ReactedStorage from services.stat.topicstat import TopicStat # from .community import followed_communities @@ -23,13 +22,12 @@ async def user_subscriptions(slug: str): "unread": await get_total_unread_counter(slug), # unread inbox messages counter "topics": [t.slug for t in await followed_topics(slug)], # followed topics slugs "authors": [a.slug for a in await followed_authors(slug)], # followed authors slugs - "reactions": await ReactedStorage.get_shouts_by_author(slug), + "reactions": await followed_reactions(slug) # "communities": [c.slug for c in followed_communities(slug)], # communities } async def get_author_stat(slug): - # TODO: implement author stat with local_session() as session: return { "shouts": session.query(ShoutAuthor).where(ShoutAuthor.user == slug).count(), @@ -41,11 +39,29 @@ async def get_author_stat(slug): ).where( Reaction.createdBy == slug ).filter( - func.length(Reaction.body) > 0 + Reaction.body.is_not(None) ).count() } +# @query.field("userFollowedDiscussions") +@login_required +async def followed_discussions(_, info, slug) -> List[Topic]: + return await followed_reactions(slug) + + +async def followed_reactions(slug): + with local_session() as session: + user = session.query(User).where(User.slug == slug).first() + return session.query( + Reaction.shout + ).where( + Reaction.author == slug + ).filter( + Reaction.createdAt > user.lastSeen + ).all() + + @query.field("userFollowedTopics") @login_required async def get_followed_topics(_, info, slug) -> List[Topic]: diff --git a/resolvers/zine/topics.py b/resolvers/zine/topics.py index 6edb3c09..8a913627 100644 --- a/resolvers/zine/topics.py +++ b/resolvers/zine/topics.py @@ -6,7 +6,6 @@ from base.resolvers import mutation, query from orm import Shout from orm.topic import Topic, TopicFollower from services.zine.topics import TopicStorage -# from services.stat.reacted import ReactedStorage from services.stat.topicstat import TopicStat @@ -17,11 +16,7 @@ async def get_topic_stat(slug): return { "shouts": len(TopicStat.shouts_by_topic.get(slug, {}).keys()), "authors": len(TopicStat.authors_by_topic.get(slug, {}).keys()), - "followers": len(TopicStat.followers_by_topic.get(slug, {}).keys()), - # "viewed": await ViewedStorage.get_topic(slug), - # "reacted": len(await ReactedStorage.get_topic(slug)), - # "commented": len(await ReactedStorage.get_topic_comments(slug)), - # "rating": await ReactedStorage.get_topic_rating(slug) + "followers": len(TopicStat.followers_by_topic.get(slug, {}).keys()) } diff --git a/services/search.py b/services/search.py index 9c9481be..834e5bf7 100644 --- a/services/search.py +++ b/services/search.py @@ -20,11 +20,13 @@ class SearchService: cached = await redis.execute("GET", text) if not cached: async with SearchService.lock: - by = { + options = { "title": text, - "body": text + "body": text, + "limit": limit, + "offset": offset } - payload = await load_shouts_by(None, None, by, limit, offset) + payload = await load_shouts_by(None, None, options) await redis.execute("SET", text, json.dumps(payload)) return payload else: diff --git a/services/stat/reacted.py b/services/stat/reacted.py index ef5a2953..3ae5684e 100644 --- a/services/stat/reacted.py +++ b/services/stat/reacted.py @@ -3,7 +3,6 @@ import time from base.orm import local_session from orm.reaction import ReactionKind, Reaction from services.zine.topics import TopicStorage -from services.stat.viewed import ViewedStorage def kind_to_rate(kind) -> int: @@ -34,18 +33,6 @@ class ReactedStorage: lock = asyncio.Lock() modified_shouts = set([]) - @staticmethod - async def get_shout_stat(slug, rating): - viewed = int(await ViewedStorage.get_shout(slug)) - # print(viewed) - return { - "viewed": viewed, - "reacted": len(await ReactedStorage.get_shout(slug)), - "commented": len(await ReactedStorage.get_comments(slug)), - # "rating": await ReactedStorage.get_rating(slug), - "rating": rating - } - @staticmethod async def get_shout(shout_slug): self = ReactedStorage @@ -59,7 +46,7 @@ class ReactedStorage: return self.reacted["authors"].get(user_slug, []) @staticmethod - async def get_shouts_by_author(user_slug): + async def get_followed_reactions(user_slug): self = ReactedStorage async with self.lock: author_reactions = self.reacted["authors"].get(user_slug, []) From 2d090a776e7bd76725a681194089fe09227fbca2 Mon Sep 17 00:00:00 2001 From: tonyrewin Date: Sat, 26 Nov 2022 04:15:09 +0300 Subject: [PATCH 05/20] hotfixes --- resolvers/zine/load.py | 2 +- resolvers/zine/profile.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resolvers/zine/load.py b/resolvers/zine/load.py index b883036e..db40fd19 100644 --- a/resolvers/zine/load.py +++ b/resolvers/zine/load.py @@ -71,7 +71,7 @@ async def load_shout(_, info, slug): [shout, rating, commented] = session.execute(q).unique().one() for a in shout.authors: - a.caption = await ShoutAuthorStorage.get_author_caption(a.slug) + a.caption = await ShoutAuthorStorage.get_author_caption(shout.slug, a.slug) viewed = await ViewedStorage.get_shout(shout.slug) shout.stat = { "rating": rating, diff --git a/resolvers/zine/profile.py b/resolvers/zine/profile.py index 9768a443..a133c2af 100644 --- a/resolvers/zine/profile.py +++ b/resolvers/zine/profile.py @@ -56,7 +56,7 @@ async def followed_reactions(slug): return session.query( Reaction.shout ).where( - Reaction.author == slug + Reaction.createdBy == slug ).filter( Reaction.createdAt > user.lastSeen ).all() From d898466ccdc22d4f13f90c0ad5fb51ad280fc6c9 Mon Sep 17 00:00:00 2001 From: tonyrewin Date: Sat, 26 Nov 2022 04:44:40 +0300 Subject: [PATCH 06/20] all-white --- resolvers/zine/load.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/resolvers/zine/load.py b/resolvers/zine/load.py index db40fd19..ad8f8247 100644 --- a/resolvers/zine/load.py +++ b/resolvers/zine/load.py @@ -37,8 +37,11 @@ def apply_filters(q, filters, user=None): filters = {} if filters is None else filters if filters.get("reacted") and user: q.join(Reaction, Reaction.createdBy == user.slug) - if filters.get("visibility"): + v = filters.get("visibility") + if v == "public": q = q.filter(Shout.visibility == filters.get("visibility")) + if v == "community": + q = q.filter(Shout.visibility.in_(["public", "community"])) if filters.get("layout"): q = q.filter(Shout.layout == filters.get("layout")) if filters.get("author"): From bc09f414e09244efed67ae2816e17ba28fbdddea Mon Sep 17 00:00:00 2001 From: tonyrewin Date: Sat, 26 Nov 2022 10:46:06 +0300 Subject: [PATCH 07/20] fixed-storages --- services/stat/reacted.py | 31 ++++++++++++-------------- services/stat/topicstat.py | 6 ++--- services/stat/viewed.py | 35 +++++++++++++++-------------- services/zine/shoutauthor.py | 43 +++++++++++++++++++----------------- 4 files changed, 57 insertions(+), 58 deletions(-) diff --git a/services/stat/reacted.py b/services/stat/reacted.py index 3ae5684e..049da7ef 100644 --- a/services/stat/reacted.py +++ b/services/stat/reacted.py @@ -163,32 +163,29 @@ class ReactedStorage: @staticmethod async def recount_changed(session): - start = time.time() self = ReactedStorage - async with self.lock: - sss = list(self.modified_shouts) - c = 0 - for slug in sss: - siblings = session.query(Reaction).where(Reaction.shout == slug).all() - c += len(siblings) - await self.recount(siblings) + sss = list(self.modified_shouts) + c = 0 + for slug in sss: + siblings = session.query(Reaction).where(Reaction.shout == slug).all() + c += len(siblings) + await self.recount(siblings) - print("[stat.reacted] %d reactions recounted" % c) - print("[stat.reacted] %d shouts modified" % len(self.modified_shouts)) - print("[stat.reacted] %d topics" % len(self.reacted["topics"].values())) - print("[stat.reacted] %d authors" % len(self.reacted["authors"].values())) - print("[stat.reacted] %d replies" % len(self.reacted["reactions"])) - self.modified_shouts = set([]) - - end = time.time() - print("[stat.reacted] recount_changed took %fs " % (end - start)) + print("[stat.reacted] %d reactions recounted" % c) + print("[stat.reacted] %d shouts modified" % len(self.modified_shouts)) + print("[stat.reacted] %d topics" % len(self.reacted["topics"].values())) + print("[stat.reacted] %d authors" % len(self.reacted["authors"].values())) + print("[stat.reacted] %d replies" % len(self.reacted["reactions"])) + self.modified_shouts = set([]) @staticmethod async def worker(): while True: try: with local_session() as session: + ts = time.time() await ReactedStorage.recount_changed(session) + print("[stat.reacted] recount_changed took %fs " % (time.time() - ts)) except Exception as err: print("[stat.reacted] recount error %s" % (err)) await asyncio.sleep(ReactedStorage.period) diff --git a/services/stat/topicstat.py b/services/stat/topicstat.py index 3a5689e6..09ff7257 100644 --- a/services/stat/topicstat.py +++ b/services/stat/topicstat.py @@ -17,7 +17,6 @@ class TopicStat: @staticmethod async def load_stat(session): - start = time.time() self = TopicStat shout_topics = session.query(ShoutTopic, Shout).join(Shout).all() print("[stat.topics] %d links for shouts" % len(shout_topics)) @@ -43,9 +42,6 @@ class TopicStat: self.followers_by_topic[topic] = self.followers_by_topic.get(topic, dict()) self.followers_by_topic[topic][userslug] = userslug - end = time.time() - print("[stat.topics] load_stat took %fs " % (end - start)) - @staticmethod async def get_shouts(topic): self = TopicStat @@ -59,8 +55,10 @@ class TopicStat: while True: try: with local_session() as session: + ts = time.time() async with self.lock: await self.load_stat(session) + print("[stat.topicstat] load_stat took %fs " % (time.time() - ts)) except Exception as err: raise Exception(err) if first_run: diff --git a/services/stat/viewed.py b/services/stat/viewed.py index 8a49c707..e8e24340 100644 --- a/services/stat/viewed.py +++ b/services/stat/viewed.py @@ -86,24 +86,23 @@ class ViewedStorage: """ query all the pages from ackee sorted by views count """ start = time.time() self = ViewedStorage - async with self.lock: + try: + self.pages = await self.client.execute_async(load_pages) + self.pages = self.pages["domains"][0]["statistics"]["pages"] + print("[stat.viewed] ackee pages updated") + shouts = {} try: - self.pages = await self.client.execute_async(load_pages) - self.pages = self.pages["domains"][0]["statistics"]["pages"] - print("[stat.viewed] ackee pages updated") - shouts = {} - try: - for page in self.pages: - p = page["value"].split("?")[0] - slug = p.split('discours.io/')[-1] - shouts[slug] = page["count"] - for slug, v in shouts: - await ViewedStorage.increment(slug, v) - except Exception: - pass - print("[stat.viewed] %d pages collected " % len(shouts.keys())) - except Exception as e: - raise e + for page in self.pages: + p = page["value"].split("?")[0] + slug = p.split('discours.io/')[-1] + shouts[slug] = page["count"] + for slug, v in shouts: + await ViewedStorage.increment(slug, v) + except Exception: + pass + print("[stat.viewed] %d pages collected " % len(shouts.keys())) + except Exception as e: + raise e end = time.time() print("[stat.viewed] update_pages took %fs " % (end - start)) @@ -180,8 +179,10 @@ class ViewedStorage: async with self.lock: while True: try: + ts = time.time() await self.update_pages() failed = 0 + print("[stat.viewed] update_pages took %fs " % (time.time() - ts)) except Exception: failed += 1 print("[stat.viewed] update failed #%d, wait 10 seconds" % failed) diff --git a/services/zine/shoutauthor.py b/services/zine/shoutauthor.py index 477ff384..25505bc5 100644 --- a/services/zine/shoutauthor.py +++ b/services/zine/shoutauthor.py @@ -1,46 +1,49 @@ import asyncio +import time from base.orm import local_session -from orm.shout import ShoutAuthor, Shout +from orm.shout import ShoutAuthor class ShoutAuthorStorage: authors_by_shout = {} lock = asyncio.Lock() - period = 30 * 60 # sec + # period = 30 * 60 # sec @staticmethod async def load_captions(session): self = ShoutAuthorStorage - sas = session.query(ShoutAuthor).join(Shout).all() + sas = session.query(ShoutAuthor).all() for sa in sas: - self.authors_by_shout[sa.shout] = self.authors_by_shout.get(sa.shout, []) - self.authors_by_shout[sa.shout].append([sa.user, sa.caption]) + self.authors_by_shout[sa.shout] = self.authors_by_shout.get(sa.shout, {}) + self.authors_by_shout[sa.shout][sa.user] = sa.caption print("[zine.authors] %d shouts indexed by authors" % len(self.authors_by_shout)) - @staticmethod - async def get_authors(shout): - self = ShoutAuthorStorage - async with self.lock: - return self.authors_by_shout.get(shout, []) - @staticmethod async def get_author_caption(shout, author): self = ShoutAuthorStorage async with self.lock: - for a in self.authors_by_shout.get(shout, []): - if author in a: - return a[1] - return {"error": "author caption not found"} + return self.authors_by_shout.get(shout, {}).get(author) + + @staticmethod + async def set_author_caption(shout, author, caption): + self = ShoutAuthorStorage + async with self.lock: + self.authors_by_shout[shout] = self.authors_by_shout.get(shout, {}) + self.authors_by_shout[shout][author] = caption + return { + "error": None, + } @staticmethod async def worker(): self = ShoutAuthorStorage - while True: + async with self.lock: + # while True: try: with local_session() as session: - async with self.lock: - await self.load_captions(session) - print("[zine.authors] index by authors was updated") + ts = time.time() + await self.load_captions(session) + print("[zine.authors] load_captions took %fs " % (time.time() - ts)) except Exception as err: print("[zine.authors] error indexing by author: %s" % (err)) - await asyncio.sleep(self.period) + # await asyncio.sleep(self.period) From 11d498c47dacb250f2b5717867d56387723b9553 Mon Sep 17 00:00:00 2001 From: tonyrewin Date: Sat, 26 Nov 2022 17:02:20 +0300 Subject: [PATCH 08/20] createChat: reuse p2p chat --- resolvers/inbox/chats.py | 43 +++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/resolvers/inbox/chats.py b/resolvers/inbox/chats.py index b11546d8..d9ace3a0 100644 --- a/resolvers/inbox/chats.py +++ b/resolvers/inbox/chats.py @@ -47,30 +47,37 @@ async def update_chat(_, info, chat_new: dict): @login_required async def create_chat(_, info, title="", members=[]): user = info.context["request"].user - chat_id = str(uuid.uuid4()) + chat = {} if user.slug not in members: members.append(user.slug) + + # reuse chat craeted before if exists + if len(members) == 2 and title == "": + chats1 = await redis.execute("SMEMBERS", f"chats_by_user/{members[0].slug}") + chats2 = await redis.execute("SMEMBERS", f"chats_by_user/{members[1].slug}") + chat = None + for c in chats1.intersection(chats2): + chat = await redis.execute("GET", f"chats/{c.decode('utf-8')}") + if chat: + chat = json.loads(chat) + if chat.title == "": + break + if chat: + return { + "chat": chat, + "error": "existed" + } + + chat_id = str(uuid.uuid4()) chat = { - "title": title, - "createdAt": int(datetime.now(tz=timezone.utc).timestamp()), - "updatedAt": int(datetime.now(tz=timezone.utc).timestamp()), - "createdBy": user.slug, "id": chat_id, "users": members, - "admins": [user.slug, ] + "title": title, + "createdBy": user.slug, + "createdAt": int(datetime.now(tz=timezone.utc).timestamp()), + "updatedAt": int(datetime.now(tz=timezone.utc).timestamp()), + # "admins": [user.slug, ] } - # double creation protection - cids = await redis.execute("SMEMBERS", f"chats_by_user/{user.slug}") - for cid in cids: - c = await redis.execute("GET", F"chats/{cid.decode('utf-8')}") - if c: - c = json.loads(c) - isc = [x for x in c["users"] if x not in chat["users"]] - if isc == [] and chat["title"] == c["title"]: - return { - "error": "chat was created before", - "chat": chat - } for m in members: await redis.execute("SADD", f"chats_by_user/{m}", chat_id) From 9ca9859563df285b4eef132f87b3e74d1fbb58d6 Mon Sep 17 00:00:00 2001 From: tonyrewin Date: Sat, 26 Nov 2022 17:58:53 +0300 Subject: [PATCH 09/20] media normalized --- migration/export.py | 8 ++--- migration/extract.py | 79 +++++++++++++++++++++----------------------- schema.graphql | 2 +- 3 files changed, 42 insertions(+), 47 deletions(-) diff --git a/migration/export.py b/migration/export.py index 988ab35d..a9340dc5 100644 --- a/migration/export.py +++ b/migration/export.py @@ -4,7 +4,7 @@ from datetime import datetime, timezone import frontmatter -from .extract import extract_html, prepare_html_body +from .extract import extract_html, prepare_html_body, extract_media from .utils import DateTimeEncoder OLD_DATE = "2016-03-05 22:22:00.350000" @@ -50,11 +50,11 @@ def export_mdx(r): def export_body(shout, storage): entry = storage["content_items"]["by_oid"][shout["oid"]] if entry: - shout["body"], media = prepare_html_body(entry) # prepare_md_body(entry) - shout["media"] = media + shout["body"] = prepare_html_body(entry) # prepare_md_body(entry) + shout["media"] = extract_media(entry) export_mdx(shout) print("[export] html for %s" % shout["slug"]) - body, _media = extract_html(entry) + body = extract_html(entry) open(contentDir + shout["slug"] + ".html", "w").write(body) else: raise Exception("no content_items entry found") diff --git a/migration/extract.py b/migration/extract.py index 4ea44d04..62199dcf 100644 --- a/migration/extract.py +++ b/migration/extract.py @@ -3,8 +3,6 @@ import os import re import uuid -from .html2text import html2text - TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)" contentDir = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "..", "discoursio-web", "content" @@ -258,47 +256,44 @@ def extract_md(body, oid=""): return newbody -def prepare_md_body(entry): - # body modifications - body = "" - kind = entry.get("type") - addon = "" - if kind == "Video": - addon = "" - for m in entry.get("media", []): - if "youtubeId" in m: - addon += "\n" +def extract_media(entry): + ''' normalized media extraction method ''' + # media [ { title pic url body } ]} + kind = entry.get("layout") + media = [] + for m in entry.get("media", []): + # title + title = m.get("title", "").replace("\n", " ").replace(" ", " ") + artist = m.get("performer") or m.get("artist") + if artist: + title = artist + " - " + title + + # pic + url = m.get("fileUrl") or m.get("url", "") + pic = "" + if "thumborId" in m: + pic = cdn + "/unsafe/1600x/" + m["thumborId"] + + # url + if not url: + if kind == "Image": + url = pic + elif "youtubeId" in m: + url = "https://youtube.com/?watch=" + m["youtubeId"] elif "vimeoId" in m: - addon += "\n" + url = "https://vimeo.com/" + m["vimeoId"] else: print("[extract] media is not supported") - print(m) - body = "import VideoPlayer from '$/components/Article/VideoPlayer'\n\n" + addon + # body + body = m.get("body") or m.get("literatureBody") - elif kind == "Music": - addon = "" - for m in entry.get("media", []): - artist = m.get("performer") - trackname = "" - if artist: - trackname += artist + " - " - if "title" in m: - trackname += m.get("title", "") - addon += ( - '\n' - ) - body = "import AudioPlayer from '$/components/Article/AudioPlayer'\n\n" + addon - - body_orig, media = extract_html(entry) - if body_orig: - body += extract_md(html2text(body_orig), entry["_id"]) - if not body: - print("[extract] empty MDX body") - return body, media + media.append({ + "url": url, + "pic": pic, + "title": title, + "body": body + }) + return media def prepare_html_body(entry): @@ -339,11 +334,11 @@ def prepare_html_body(entry): addon += '">' body += addon - body, media = extract_html(entry) + body = extract_html(entry) # if body_orig: body += extract_md(html2text(body_orig), entry['_id']) if not body: print("[extract] empty HTML body") - return body, media + return body def extract_html(entry): @@ -403,4 +398,4 @@ def extract_html(entry): if not body_orig: print("[extract] empty HTML body") # body_html = str(BeautifulSoup(body_orig, features="html.parser")) - return body_orig, media + return body_orig diff --git a/schema.graphql b/schema.graphql index e08342ec..4b5987c3 100644 --- a/schema.graphql +++ b/schema.graphql @@ -439,7 +439,7 @@ type Shout { deletedBy: User publishedBy: User publishedAt: DateTime - media: String + media: String # json [ { title pic url body }, .. ] stat: Stat } From 9a4cd6ba063f34c80bd7917fd2f1f9956a38cc6c Mon Sep 17 00:00:00 2001 From: tonyrewin Date: Sat, 26 Nov 2022 18:19:45 +0300 Subject: [PATCH 10/20] normalize media fixed --- migration/__init__.py | 3 -- migration/export.py | 9 ++--- migration/extract.py | 59 +++---------------------------- migration/tables/content_items.py | 5 +-- migration/tables/topics.py | 3 +- 5 files changed, 15 insertions(+), 64 deletions(-) diff --git a/migration/__init__.py b/migration/__init__.py index 2d195e06..4a25931d 100644 --- a/migration/__init__.py +++ b/migration/__init__.py @@ -314,9 +314,6 @@ async def handle_auto(): async def main(): if len(sys.argv) > 1: - cmd = sys.argv[1] - if type(cmd) == str: - print("[migration] command: " + cmd) init_tables() await handle_auto() else: diff --git a/migration/export.py b/migration/export.py index a9340dc5..102cfb14 100644 --- a/migration/export.py +++ b/migration/export.py @@ -4,7 +4,7 @@ from datetime import datetime, timezone import frontmatter -from .extract import extract_html, prepare_html_body, extract_media +from .extract import extract_html, extract_media from .utils import DateTimeEncoder OLD_DATE = "2016-03-05 22:22:00.350000" @@ -50,11 +50,12 @@ def export_mdx(r): def export_body(shout, storage): entry = storage["content_items"]["by_oid"][shout["oid"]] if entry: - shout["body"] = prepare_html_body(entry) # prepare_md_body(entry) - shout["media"] = extract_media(entry) + body = extract_html(entry) + media = extract_media(entry) + shout["body"] = body # prepare_html_body(entry) # prepare_md_body(entry) + shout["media"] = media export_mdx(shout) print("[export] html for %s" % shout["slug"]) - body = extract_html(entry) open(contentDir + shout["slug"] + ".html", "w").write(body) else: raise Exception("no content_items entry found") diff --git a/migration/extract.py b/migration/extract.py index 62199dcf..ccadb7e2 100644 --- a/migration/extract.py +++ b/migration/extract.py @@ -3,6 +3,9 @@ import os import re import uuid +from bs4 import BeautifulSoup + + TOOLTIP_REGEX = r"(\/\/\/(.+)\/\/\/)" contentDir = os.path.join( os.path.dirname(os.path.realpath(__file__)), "..", "..", "discoursio-web", "content" @@ -343,59 +346,7 @@ def prepare_html_body(entry): def extract_html(entry): body_orig = (entry.get("body") or "").replace('\(', '(').replace('\)', ')') - media = entry.get("media", []) - kind = entry.get("type") or "" - print("[extract] kind: " + kind) - mbodies = set([]) - if media: - # print('[extract] media is found') - for m in media: - mbody = m.get("body", "") - addon = "" - if kind == "Literature": - mbody = m.get("literatureBody") or m.get("body", "") - elif kind == "Image": - cover = "" - if "thumborId" in entry: - cover = cdn + "/unsafe/1600x/" + entry["thumborId"] - if not cover: - if "image" in entry: - cover = entry["image"].get("url", "") - if "cloudinary" in cover: - cover = "" - # else: print('[extract] cover: ' + cover) - title = m.get("title", "").replace("\n", " ").replace(" ", " ") - u = m.get("thumborId") or cover or "" - if title: - addon += "

" + title + "

\n" - if not u.startswith("http"): - u = s3 + u - if not u: - print("[extract] no image url for " + str(m)) - if "cloudinary" in u: - u = "img/lost.svg" - if u != cover or (u == cover and media.index(m) == 0): - addon += '' + title + '\n' - if addon: - body_orig += addon - # print('[extract] item addon: ' + addon) - # if addon: print('[extract] addon: %s' % addon) - if mbody and mbody not in mbodies: - mbodies.add(mbody) - body_orig += mbody - if len(list(mbodies)) != len(media): - print( - "[extract] %d/%d media item bodies appended" - % (len(list(mbodies)), len(media)) - ) - # print('[extract] media items body: \n' + body_orig) - if not body_orig: - for up in entry.get("bodyHistory", []) or []: - body_orig = up.get("text", "") or "" - if body_orig: - print("[extract] got html body from history") - break if not body_orig: print("[extract] empty HTML body") - # body_html = str(BeautifulSoup(body_orig, features="html.parser")) - return body_orig + body_html = str(BeautifulSoup(body_orig, features="html.parser")) + return body_html diff --git a/migration/tables/content_items.py b/migration/tables/content_items.py index af5f99d5..1c33ea51 100644 --- a/migration/tables/content_items.py +++ b/migration/tables/content_items.py @@ -4,7 +4,7 @@ from dateutil.parser import parse as date_parse from sqlalchemy.exc import IntegrityError from transliterate import translit from base.orm import local_session -from migration.extract import prepare_html_body +from migration.extract import extract_html, extract_media from orm.reaction import Reaction, ReactionKind from orm.shout import Shout, ShoutTopic, ShoutReactionsFollower from orm.user import User @@ -195,7 +195,8 @@ async def migrate(entry, storage): entry["cover"] = r["cover"] # body - r["body"], media = prepare_html_body(entry) + r["body"] = extract_html(entry) + media = extract_media(entry) if media: r["media"] = json.dumps(media, ensure_ascii=True) # save shout to db diff --git a/migration/tables/topics.py b/migration/tables/topics.py index 4b563716..15fcf245 100644 --- a/migration/tables/topics.py +++ b/migration/tables/topics.py @@ -1,5 +1,6 @@ from base.orm import local_session -from migration.extract import extract_md, html2text +from migration.extract import extract_md +from migration.html2text import html2text from orm import Topic From 2adf4422242f502d002c0ba2deffd2e8ff897357 Mon Sep 17 00:00:00 2001 From: tonyrewin Date: Sun, 27 Nov 2022 11:19:38 +0300 Subject: [PATCH 11/20] migration-upgrade --- README.md | 10 +- migration/extract.py | 22 ++- migration/tables/content_items.py | 206 ++++++++++++++++------------- migration/tables/replacements.json | 1 + resolvers/zine/reactions.py | 14 +- 5 files changed, 143 insertions(+), 110 deletions(-) diff --git a/README.md b/README.md index c69428a9..a0b778b2 100644 --- a/README.md +++ b/README.md @@ -24,10 +24,8 @@ apt install redis nginx First, install Postgres. Then you'll need some data ``` - -psql -U postgres -> create database discoursio; -> \q +brew install postgres +createdb discoursio python server.py migrate ``` @@ -42,3 +40,7 @@ python3 server.py dev Put the header 'Authorization' with token from signIn query or registerUser mutation. +# How to debug Ackee + +Set ACKEE_TOKEN var + diff --git a/migration/extract.py b/migration/extract.py index ccadb7e2..5dd7ccba 100644 --- a/migration/extract.py +++ b/migration/extract.py @@ -262,9 +262,12 @@ def extract_md(body, oid=""): def extract_media(entry): ''' normalized media extraction method ''' # media [ { title pic url body } ]} - kind = entry.get("layout") + kind = entry.get("type") + if not kind: + print(entry) + raise Exception("shout no layout") media = [] - for m in entry.get("media", []): + for m in entry.get("media") or []: # title title = m.get("title", "").replace("\n", " ").replace(" ", " ") artist = m.get("performer") or m.get("artist") @@ -274,7 +277,7 @@ def extract_media(entry): # pic url = m.get("fileUrl") or m.get("url", "") pic = "" - if "thumborId" in m: + if m.get("thumborId"): pic = cdn + "/unsafe/1600x/" + m["thumborId"] # url @@ -285,11 +288,8 @@ def extract_media(entry): url = "https://youtube.com/?watch=" + m["youtubeId"] elif "vimeoId" in m: url = "https://vimeo.com/" + m["vimeoId"] - else: - print("[extract] media is not supported") # body - body = m.get("body") or m.get("literatureBody") - + body = m.get("body") or m.get("literatureBody") or "" media.append({ "url": url, "pic": pic, @@ -306,7 +306,7 @@ def prepare_html_body(entry): addon = "" if kind == "Video": addon = "" - for m in entry.get("media", []): + for m in entry.get("media") or []: if "youtubeId" in m: addon += '