core/services/viewed.py

259 lines
8.8 KiB
Python
Raw Normal View History

2022-11-18 17:54:37 +00:00
import asyncio
2024-01-22 15:42:45 +00:00
import threading
2024-01-22 15:48:58 +00:00
from typing import Dict
2024-01-13 12:44:56 +00:00
from logging import Logger
import time
2024-01-22 16:17:39 +00:00
import json
2023-12-17 20:30:20 +00:00
from datetime import datetime, timedelta, timezone
2023-10-25 16:55:30 +00:00
from os import environ
2024-01-13 12:44:56 +00:00
import logging
2022-11-18 17:54:37 +00:00
from gql import Client, gql
2023-11-29 07:23:41 +00:00
from gql.transport.aiohttp import AIOHTTPTransport
2024-01-22 15:42:45 +00:00
from graphql import DocumentNode
2024-01-22 15:48:58 +00:00
from orm.author import Author
2022-11-29 12:36:46 +00:00
2024-01-22 15:48:58 +00:00
from orm.shout import Shout, ShoutAuthor, ShoutTopic
2023-11-22 18:23:15 +00:00
from orm.topic import Topic
2023-12-17 20:30:20 +00:00
from services.db import local_session
2022-11-21 22:23:16 +00:00
2024-01-22 19:21:41 +00:00
logging.basicConfig(level=logging.DEBUG)
2024-01-13 12:44:56 +00:00
logger = logging.getLogger("\t[services.viewed]\t")
logger.setLevel(logging.DEBUG)
2023-10-05 18:46:18 +00:00
load_facts = gql(
2023-10-05 20:18:06 +00:00
""" query getDomains {
2022-11-21 22:23:16 +00:00
domains {
id
title
facts {
activeVisitors
viewsToday
viewsMonth
viewsYear
}
2023-10-05 20:18:06 +00:00
} } """
2023-10-05 18:46:18 +00:00
)
2022-11-21 22:23:16 +00:00
2023-10-05 18:46:18 +00:00
load_pages = gql(
2023-10-05 20:18:06 +00:00
""" query getDomains {
2022-11-21 22:23:16 +00:00
domains {
title
statistics {
pages(sorting: TOP) {
# id
count
# created
value
2022-11-18 17:54:37 +00:00
}
}
2023-10-05 20:18:06 +00:00
} } """
2023-10-05 18:46:18 +00:00
)
2023-10-05 20:18:06 +00:00
2024-01-22 15:42:45 +00:00
create_record_mutation_string = """
createRecord(domainId: $domainId, input: $input) {
payload {
id
}
}
"""
create_record_mutation = gql(f"mutation {{{create_record_mutation_string}}}")
2024-01-22 15:45:35 +00:00
schema_str = open("schemas/ackee.graphql").read()
2022-11-21 22:23:16 +00:00
token = environ.get("ACKEE_TOKEN", "")
2024-01-22 15:42:45 +00:00
domain_id = environ.get("ACKEE_DOMAIN_ID", "")
ackee_site = environ.get("ACKEE_SITE", "https://testing.discours.io/")
2022-11-18 17:54:37 +00:00
2022-11-21 22:23:16 +00:00
def create_client(headers=None, schema=None):
2023-11-29 07:23:41 +00:00
transport = AIOHTTPTransport(
url="https://ackee.discours.io/api",
headers=headers,
2022-11-21 22:23:16 +00:00
)
2023-11-29 07:23:41 +00:00
return Client(schema=schema, transport=transport)
2022-11-18 17:54:37 +00:00
class ViewedStorage:
lock = asyncio.Lock()
2024-01-22 18:20:17 +00:00
views_by_shout = {}
shouts_by_topic = {}
shouts_by_author = {}
2022-11-21 22:23:16 +00:00
views = None
2022-11-22 13:58:55 +00:00
pages = None
2022-11-21 22:23:16 +00:00
domains = None
2024-01-22 15:42:45 +00:00
facts = None
2023-01-18 12:43:56 +00:00
period = 60 * 60 # every hour
2023-12-17 20:30:20 +00:00
client: Client | None = None
2022-11-21 22:23:16 +00:00
auth_result = None
2022-11-22 07:29:54 +00:00
disabled = False
2022-11-18 17:54:37 +00:00
2022-11-20 07:48:40 +00:00
@staticmethod
2022-11-21 22:23:16 +00:00
async def init():
2023-10-05 18:46:18 +00:00
"""graphql client connection using permanent token"""
2022-11-22 07:29:54 +00:00
self = ViewedStorage
async with self.lock:
if token:
2023-12-17 20:30:20 +00:00
self.client = create_client({"Authorization": f"Bearer {token}"}, schema=schema_str)
2024-01-13 12:44:56 +00:00
logger.info(" * authorized permanently by ackee.discours.io: %s" % token)
2023-12-23 05:40:41 +00:00
2024-01-22 16:17:39 +00:00
# Load pre-counted views from the JSON file
self.load_precounted_views()
2023-12-23 05:40:41 +00:00
views_stat_task = asyncio.create_task(self.worker())
2024-01-13 12:44:56 +00:00
logger.info(views_stat_task)
2022-11-22 07:29:54 +00:00
else:
2024-01-13 12:44:56 +00:00
logger.info(" * please set ACKEE_TOKEN")
2022-11-22 07:29:54 +00:00
self.disabled = True
2022-11-20 07:48:40 +00:00
2024-01-22 16:17:39 +00:00
@staticmethod
def load_precounted_views():
self = ViewedStorage
try:
with open("/dump/views.json", "r") as file:
precounted_views = json.load(file)
2024-01-22 18:20:17 +00:00
self.views_by_shout.update(precounted_views)
2024-01-22 18:38:38 +00:00
logger.info(f" * {len(precounted_views)} pre-counted shouts' views loaded successfully.")
2024-01-22 16:17:39 +00:00
except Exception as e:
logger.error(f"Error loading pre-counted views: {e}")
2022-11-21 22:23:16 +00:00
@staticmethod
async def update_pages():
2023-10-05 18:46:18 +00:00
"""query all the pages from ackee sorted by views count"""
2024-01-13 12:44:56 +00:00
logger.info(" ⎧ updating ackee pages data ---")
2024-01-13 12:57:35 +00:00
try:
start = time.time()
self = ViewedStorage
2024-01-22 15:42:45 +00:00
async with self.lock:
if self.client:
# Use asyncio.run to execute asynchronous code in the main entry point
self.pages = await asyncio.to_thread(self.client.execute, load_pages)
domains = self.pages.get("domains", [])
# logger.debug(f" | domains: {domains}")
for domain in domains:
pages = domain.get("statistics", {}).get("pages", [])
if pages:
# logger.debug(f" | pages: {pages}")
shouts = {}
for page in pages:
p = page["value"].split("?")[0]
slug = p.split("discours.io/")[-1]
shouts[slug] = page["count"]
for slug in shouts.keys():
2024-01-22 18:20:17 +00:00
self.views_by_shout[slug] = self.views_by_shout.get(slug, 0) + 1
2024-01-22 15:42:45 +00:00
self.update_topics(slug)
logger.info("%d pages collected " % len(shouts.keys()))
2024-01-13 12:57:35 +00:00
end = time.time()
logger.info(" ⎪ update_pages took %fs " % (end - start))
except Exception:
import traceback
2024-01-22 15:42:45 +00:00
2024-01-13 12:57:35 +00:00
traceback.print_exc()
2022-11-21 22:23:16 +00:00
@staticmethod
async def get_facts():
self = ViewedStorage
2024-01-22 15:42:45 +00:00
self.facts = []
2023-12-17 20:30:20 +00:00
try:
if self.client:
async with self.lock:
2024-01-22 15:42:45 +00:00
self.facts = await asyncio.to_thread(self.client.execute, load_pages)
2023-12-17 20:30:20 +00:00
except Exception as er:
2024-01-13 12:44:56 +00:00
logger.error(f" - get_facts error: {er}")
2024-01-22 15:42:45 +00:00
return self.facts or []
2022-11-18 17:54:37 +00:00
2022-11-19 11:35:34 +00:00
@staticmethod
2024-01-22 15:42:45 +00:00
async def get_shout(shout_slug) -> int:
2023-10-05 18:46:18 +00:00
"""getting shout views metric by slug"""
2022-11-19 11:35:34 +00:00
self = ViewedStorage
async with self.lock:
2024-01-22 18:20:17 +00:00
return self.views_by_shout.get(shout_slug, 0)
2023-11-03 10:10:22 +00:00
@staticmethod
2024-01-22 15:42:45 +00:00
async def get_shout_media(shout_slug) -> Dict[str, int]:
"""getting shout plays metric by slug"""
2023-11-03 10:10:22 +00:00
self = ViewedStorage
async with self.lock:
2024-01-22 18:20:17 +00:00
return self.views_by_shout.get(shout_slug, 0)
2022-11-19 11:35:34 +00:00
2022-11-21 05:18:50 +00:00
@staticmethod
2024-01-22 15:42:45 +00:00
async def get_topic(topic_slug) -> int:
2023-10-05 18:46:18 +00:00
"""getting topic views value summed"""
2022-11-21 05:18:50 +00:00
self = ViewedStorage
topic_views = 0
async with self.lock:
2024-01-22 18:20:17 +00:00
for shout_slug in self.shouts_by_topic.get(topic_slug, []):
topic_views += self.views_by_shout.get(shout_slug, 0)
2022-11-21 05:18:50 +00:00
return topic_views
2024-01-22 15:42:45 +00:00
@staticmethod
2024-01-22 18:20:17 +00:00
async def get_author(author_slug) -> int:
2024-01-22 15:42:45 +00:00
"""getting author views value summed"""
self = ViewedStorage
author_views = 0
async with self.lock:
2024-01-22 18:20:17 +00:00
for shout_slug in self.shouts_by_author.get(author_slug, []):
author_views += self.views_by_shout.get(shout_slug, 0)
2024-01-22 15:42:45 +00:00
return author_views
2022-11-22 13:58:55 +00:00
@staticmethod
2023-11-22 18:23:15 +00:00
def update_topics(shout_slug):
2024-01-22 18:20:17 +00:00
"""Updates topics counters by shout slug"""
2022-11-22 13:58:55 +00:00
self = ViewedStorage
2023-11-22 18:23:15 +00:00
with local_session() as session:
2024-01-22 18:20:17 +00:00
# Define a helper function to avoid code repetition
def update_groups(dictionary, key, value):
dictionary[key] = list(set(dictionary.get(key, []) + [value]))
# Update topics and authors using the helper function
for [_shout_topic, topic] in session.query(ShoutTopic, Topic).join(Topic).join(Shout).where(Shout.slug == shout_slug).all():
update_groups(self.shouts_by_topic, topic.slug, shout_slug)
for [_shout_topic, author] in session.query(ShoutAuthor, Author).join(Author).join(Shout).where(Shout.slug == shout_slug).all():
update_groups(self.shouts_by_author, author.slug, shout_slug)
2024-01-22 15:42:45 +00:00
2022-11-18 17:54:37 +00:00
@staticmethod
2024-01-22 15:42:45 +00:00
async def increment(shout_slug):
"""the proper way to change counter"""
resource = ackee_site + shout_slug
2022-11-18 17:54:37 +00:00
self = ViewedStorage
async with self.lock:
2024-01-22 18:20:17 +00:00
self.views_by_shout[shout_slug] = self.views_by_shout.get(shout_slug, 0) + 1
2023-11-03 10:10:22 +00:00
self.update_topics(shout_slug)
2024-01-22 15:42:45 +00:00
variables = {"domainId": domain_id, "input": {"siteLocation": resource}}
if self.client:
try:
await asyncio.to_thread(self.client.execute, create_record_mutation, variables)
except Exception as e:
logger.error(f"Error during threaded execution: {e}")
2023-11-03 10:10:22 +00:00
@staticmethod
2022-11-18 17:54:37 +00:00
async def worker():
2023-10-05 18:46:18 +00:00
"""async task worker"""
2022-11-21 22:23:16 +00:00
failed = 0
2022-11-22 07:29:54 +00:00
self = ViewedStorage
if self.disabled:
return
2023-10-05 22:45:32 +00:00
2023-01-18 12:43:56 +00:00
while True:
try:
2024-01-22 15:42:45 +00:00
logger.info(" - updating records...")
2023-01-18 12:43:56 +00:00
await self.update_pages()
failed = 0
except Exception:
failed += 1
2024-01-13 12:44:56 +00:00
logger.info(" - update failed #%d, wait 10 seconds" % failed)
2023-01-18 12:43:56 +00:00
if failed > 3:
2024-01-13 12:44:56 +00:00
logger.info(" - not trying to update anymore")
2023-01-18 12:43:56 +00:00
break
if failed == 0:
when = datetime.now(timezone.utc) + timedelta(seconds=self.period)
t = format(when.astimezone().isoformat())
2024-01-13 12:44:56 +00:00
logger.info(" ⎩ next update: %s" % (t.split("T")[0] + " " + t.split("T")[1].split(".")[0]))
2023-01-18 12:43:56 +00:00
await asyncio.sleep(self.period)
else:
await asyncio.sleep(10)
2024-01-13 12:44:56 +00:00
logger.info(" - trying to update data again")