core/services/viewed.py

225 lines
7.1 KiB
Python
Raw Normal View History

2022-11-18 17:54:37 +00:00
import asyncio
import time
2022-11-21 22:23:16 +00:00
from datetime import timedelta, timezone, datetime
2023-01-18 12:43:56 +00:00
from os import environ, path
2022-11-18 17:54:37 +00:00
from gql import Client, gql
2023-10-05 21:17:24 +00:00
from gql.transport.requests import RequestsHTTPTransport
2022-11-29 12:36:46 +00:00
2023-10-05 18:46:18 +00:00
from services.db import local_session
from orm import Topic
2022-11-29 12:36:46 +00:00
from orm.shout import ShoutTopic, Shout
2022-11-21 22:23:16 +00:00
2023-10-05 18:46:18 +00:00
load_facts = gql(
2023-10-05 20:18:06 +00:00
""" query getDomains {
2022-11-21 22:23:16 +00:00
domains {
id
title
facts {
activeVisitors
viewsToday
viewsMonth
viewsYear
}
2023-10-05 20:18:06 +00:00
} } """
2023-10-05 18:46:18 +00:00
)
2022-11-21 22:23:16 +00:00
2023-10-05 18:46:18 +00:00
load_pages = gql(
2023-10-05 20:18:06 +00:00
""" query getDomains {
2022-11-21 22:23:16 +00:00
domains {
title
statistics {
pages(sorting: TOP) {
# id
count
# created
value
2022-11-18 17:54:37 +00:00
}
}
2023-10-05 20:18:06 +00:00
} } """
2023-10-05 18:46:18 +00:00
)
2023-10-05 20:18:06 +00:00
2023-10-05 21:22:54 +00:00
schema_str = open("schemas/ackee.graphql").read()
2022-11-21 22:23:16 +00:00
token = environ.get("ACKEE_TOKEN", "")
2022-11-18 17:54:37 +00:00
2022-11-21 22:23:16 +00:00
def create_client(headers=None, schema=None):
return Client(
schema=schema,
2023-10-05 21:17:24 +00:00
transport=RequestsHTTPTransport(
2022-11-21 22:23:16 +00:00
url="https://ackee.discours.io/api",
2023-10-05 18:46:18 +00:00
headers=headers,
),
2022-11-21 22:23:16 +00:00
)
2022-11-18 17:54:37 +00:00
class ViewedStorage:
lock = asyncio.Lock()
2022-11-19 11:35:34 +00:00
by_shouts = {}
2022-11-21 05:18:50 +00:00
by_topics = {}
2022-11-21 22:23:16 +00:00
views = None
2022-11-22 13:58:55 +00:00
pages = None
2022-11-21 22:23:16 +00:00
domains = None
2023-01-18 12:43:56 +00:00
period = 60 * 60 # every hour
2022-11-18 17:54:37 +00:00
client = None
2022-11-21 22:23:16 +00:00
auth_result = None
2022-11-22 07:29:54 +00:00
disabled = False
2022-11-18 17:54:37 +00:00
2022-11-20 07:48:40 +00:00
@staticmethod
2022-11-21 22:23:16 +00:00
async def init():
2023-10-05 18:46:18 +00:00
"""graphql client connection using permanent token"""
2022-11-22 07:29:54 +00:00
self = ViewedStorage
async with self.lock:
if token:
2023-10-05 18:46:18 +00:00
self.client = create_client(
{"Authorization": "Bearer %s" % str(token)}, schema=schema_str
)
print(
2023-10-05 22:12:34 +00:00
"[stat] * authorized permanentely by ackee.discours.io: %s" % token
2023-10-05 18:46:18 +00:00
)
2022-11-22 07:29:54 +00:00
else:
2023-10-05 22:12:34 +00:00
print("[stat] * please set ACKEE_TOKEN")
2022-11-22 07:29:54 +00:00
self.disabled = True
2022-11-20 07:48:40 +00:00
2022-11-21 22:23:16 +00:00
@staticmethod
async def update_pages():
2023-10-05 18:46:18 +00:00
"""query all the pages from ackee sorted by views count"""
2023-10-05 22:12:34 +00:00
print("[stat] ⎧ updating ackee pages data ---")
start = time.time()
2022-11-20 07:48:40 +00:00
self = ViewedStorage
2022-11-26 07:46:06 +00:00
try:
self.pages = await self.client.execute_async(load_pages)
self.pages = self.pages["domains"][0]["statistics"]["pages"]
shouts = {}
2022-11-21 22:23:16 +00:00
try:
2022-11-26 07:46:06 +00:00
for page in self.pages:
p = page["value"].split("?")[0]
2023-10-05 18:46:18 +00:00
slug = p.split("discours.io/")[-1]
2022-11-26 07:46:06 +00:00
shouts[slug] = page["count"]
2023-01-18 12:43:56 +00:00
for slug in shouts.keys():
await ViewedStorage.increment(slug, shouts[slug])
2022-11-26 07:46:06 +00:00
except Exception:
pass
2023-10-05 22:12:34 +00:00
print("[stat] ⎪ %d pages collected " % len(shouts.keys()))
2022-11-26 07:46:06 +00:00
except Exception as e:
raise e
2022-11-21 22:23:16 +00:00
end = time.time()
2023-10-05 22:12:34 +00:00
print("[stat] ⎪ update_pages took %fs " % (end - start))
2022-11-21 22:23:16 +00:00
@staticmethod
async def get_facts():
self = ViewedStorage
async with self.lock:
return self.client.execute_async(load_facts)
2022-11-18 17:54:37 +00:00
2022-11-29 12:36:46 +00:00
# unused yet
2022-11-19 11:35:34 +00:00
@staticmethod
async def get_shout(shout_slug):
2023-10-05 18:46:18 +00:00
"""getting shout views metric by slug"""
2022-11-19 11:35:34 +00:00
self = ViewedStorage
async with self.lock:
2022-11-22 13:58:55 +00:00
shout_views = self.by_shouts.get(shout_slug)
if not shout_views:
shout_views = 0
2022-11-19 11:35:34 +00:00
with local_session() as session:
2022-11-23 11:30:44 +00:00
try:
2023-10-05 18:46:18 +00:00
shout = (
session.query(Shout).where(Shout.slug == shout_slug).one()
)
self.by_shouts[shout_slug] = shout.views
2022-11-23 11:30:44 +00:00
self.update_topics(session, shout_slug)
except Exception as e:
raise e
2022-11-22 13:58:55 +00:00
return shout_views
2022-11-19 11:35:34 +00:00
2022-11-21 05:18:50 +00:00
@staticmethod
async def get_topic(topic_slug):
2023-10-05 18:46:18 +00:00
"""getting topic views value summed"""
2022-11-21 05:18:50 +00:00
self = ViewedStorage
topic_views = 0
async with self.lock:
2022-11-22 13:58:55 +00:00
for shout_slug in self.by_topics.get(topic_slug, {}).keys():
topic_views += self.by_topics[topic_slug].get(shout_slug, 0)
2022-11-21 05:18:50 +00:00
return topic_views
2022-11-22 13:58:55 +00:00
@staticmethod
def update_topics(session, shout_slug):
2023-10-05 18:46:18 +00:00
"""updates topics counters by shout slug"""
2022-11-22 13:58:55 +00:00
self = ViewedStorage
2023-10-05 18:46:18 +00:00
for [shout_topic, topic] in (
session.query(ShoutTopic, Topic)
.join(Topic)
.join(Shout)
.where(Shout.slug == shout_slug)
.all()
):
2022-11-29 12:36:46 +00:00
if not self.by_topics.get(topic.slug):
self.by_topics[topic.slug] = {}
self.by_topics[topic.slug][shout_slug] = self.by_shouts[shout_slug]
2022-11-22 13:58:55 +00:00
2022-11-18 17:54:37 +00:00
@staticmethod
2023-10-05 18:46:18 +00:00
async def increment(shout_slug, amount=1, viewer="ackee"):
"""the only way to change views counter"""
2022-11-18 17:54:37 +00:00
self = ViewedStorage
async with self.lock:
# TODO optimize, currenty we execute 1 DB transaction per shout
2022-11-18 17:54:37 +00:00
with local_session() as session:
shout = session.query(Shout).where(Shout.slug == shout_slug).one()
2023-10-05 18:46:18 +00:00
if viewer == "old-discours":
# this is needed for old db migration
if shout.viewsOld == amount:
print(f"viewsOld amount: {amount}")
else:
print(f"viewsOld amount changed: {shout.viewsOld} --> {amount}")
shout.viewsOld = amount
2023-08-03 23:31:55 +00:00
else:
if shout.viewsAckee == amount:
print(f"viewsAckee amount: {amount}")
else:
2023-10-05 18:46:18 +00:00
print(
f"viewsAckee amount changed: {shout.viewsAckee} --> {amount}"
)
shout.viewsAckee = amount
2023-08-03 23:31:55 +00:00
2022-11-18 17:54:37 +00:00
session.commit()
# this part is currently unused
2022-11-19 11:35:34 +00:00
self.by_shouts[shout_slug] = self.by_shouts.get(shout_slug, 0) + amount
2022-11-22 13:58:55 +00:00
self.update_topics(session, shout_slug)
2022-11-18 17:54:37 +00:00
@staticmethod
async def worker():
2023-10-05 18:46:18 +00:00
"""async task worker"""
2022-11-21 22:23:16 +00:00
failed = 0
2022-11-22 07:29:54 +00:00
self = ViewedStorage
if self.disabled:
return
2023-10-05 22:12:34 +00:00
print("[stat] worker started")
2023-01-18 12:43:56 +00:00
while True:
try:
2023-10-05 22:12:34 +00:00
print("[stat] - updating views...")
2023-01-18 12:43:56 +00:00
await self.update_pages()
failed = 0
except Exception:
2023-10-05 22:15:23 +00:00
import traceback
traceback.print_exc()
2023-01-18 12:43:56 +00:00
failed += 1
2023-10-05 22:12:34 +00:00
print("[stat] - update failed #%d, wait 10 seconds" % failed)
2023-01-18 12:43:56 +00:00
if failed > 3:
2023-10-05 22:12:34 +00:00
print("[stat] - not trying to update anymore")
2023-01-18 12:43:56 +00:00
break
if failed == 0:
when = datetime.now(timezone.utc) + timedelta(seconds=self.period)
t = format(when.astimezone().isoformat())
2023-10-05 18:46:18 +00:00
print(
2023-10-05 22:12:34 +00:00
"[stat] ⎩ next update: %s"
2023-10-05 18:46:18 +00:00
% (t.split("T")[0] + " " + t.split("T")[1].split(".")[0])
)
2023-01-18 12:43:56 +00:00
await asyncio.sleep(self.period)
else:
await asyncio.sleep(10)
2023-10-05 22:12:34 +00:00
print("[stat] - trying to update data again")