style: readme with python granian server start

feat: with author sorting by shouts, followers and names
2025-05-25 20:51:39 +00:00 · 2025-05-25 17:30:12 -03:00
16 changed files with 305 additions and 1150 deletions
--- a/.gitea/workflows/main.yml
+++ b/.gitea/workflows/main.yml
@@ -29,16 +29,7 @@ jobs:
        if: github.ref == 'refs/heads/dev'
        uses: dokku/github-action@master
        with:
-          branch: 'main'
+          branch: 'dev'
          force: true
          git_remote_url: 'ssh://dokku@v2.discours.io:22/core'
          ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }}
-
-      - name: Push to dokku for staging branch
-        if: github.ref == 'refs/heads/staging'
-        uses: dokku/github-action@master
-        with:
-          branch: 'dev'
-          git_remote_url: 'ssh://dokku@staging.discours.io:22/core'
-          ssh_private_key: ${{ secrets.SSH_PRIVATE_KEY }}
-          git_push_flags: '--force'
--- a/.gitignore
+++ b/.gitignore
@@ -128,9 +128,6 @@ dmypy.json
 .idea
 temp.*

-# Debug
-DEBUG.log
-
 discours.key
 discours.crt
 discours.pem
@@ -165,4 +162,4 @@ views.json
 *.crt
 *cache.json
 .cursor
-.devcontainer/
+.devcontainer/devcontainer.json
--- a/1
+++ b/1
@@ -3,7 +3,6 @@ FROM python:slim
 RUN apt-get update && apt-get install -y \
    postgresql-client \
    curl \
-    build-essential \
    && rm -rf /var/lib/apt/lists/*

 WORKDIR /app
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ mkcert localhost
 Then, run the server:

 ```shell
-python server.py dev
+python -m granian main:app --interface asgi --host 0.0.0.0 --port 8000
 ```

 ### Useful Commands
--- a/cache/precache.py
+++ b/cache/precache.py
@@ -77,15 +77,11 @@ async def precache_topics_followers(topic_id: int, session):

 async def precache_data():
    logger.info("precaching...")
-    logger.debug("Entering precache_data")
    try:
        key = "authorizer_env"
-        logger.debug(f"Fetching existing hash for key '{key}' from Redis")
        # cache reset
        value = await redis.execute("HGETALL", key)
-        logger.debug(f"Fetched value for '{key}': {value}")
        await redis.execute("FLUSHDB")
-        logger.debug("Redis database flushed")
        logger.info("redis: FLUSHDB")

        # Преобразуем словарь в список аргументов для HSET
@@ -101,27 +97,21 @@ async def precache_data():
                await redis.execute("HSET", key, *value)
            logger.info(f"redis hash '{key}' was restored")

-        logger.info("Beginning topic precache phase")
        with local_session() as session:
            # topics
            q = select(Topic).where(Topic.community == 1)
            topics = get_with_stat(q)
-            logger.info(f"Found {len(topics)} topics to precache")
            for topic in topics:
                topic_dict = topic.dict() if hasattr(topic, "dict") else topic
-                logger.debug(f"Precaching topic id={topic_dict.get('id')}")
                await cache_topic(topic_dict)
-                logger.debug(f"Cached topic id={topic_dict.get('id')}")
                await asyncio.gather(
                    precache_topics_followers(topic_dict["id"], session),
                    precache_topics_authors(topic_dict["id"], session),
                )
-                logger.debug(f"Finished precaching followers and authors for topic id={topic_dict.get('id')}")
            logger.info(f"{len(topics)} topics and their followings precached")

            # authors
            authors = get_with_stat(select(Author).where(Author.user.is_not(None)))
-            logger.info(f"Found {len(authors)} authors to precache")
            logger.info(f"{len(authors)} authors found in database")
            for author in authors:
                if isinstance(author, Author):
@@ -129,12 +119,10 @@ async def precache_data():
                    author_id = profile.get("id")
                    user_id = profile.get("user", "").strip()
                    if author_id and user_id:
-                        logger.debug(f"Precaching author id={author_id}")
                        await cache_author(profile)
                        await asyncio.gather(
                            precache_authors_followers(author_id, session), precache_authors_follows(author_id, session)
                        )
-                        logger.debug(f"Finished precaching followers and follows for author id={author_id}")
                else:
                    logger.error(f"fail caching {author}")
            logger.info(f"{len(authors)} authors and their followings precached")
--- a/main.py
+++ b/main.py
@@ -17,7 +17,7 @@ from cache.revalidator import revalidation_manager
 from services.exception import ExceptionHandlerMiddleware
 from services.redis import redis
 from services.schema import create_all_tables, resolvers
-from services.search import search_service, initialize_search_index
+from services.search import search_service
 from services.viewed import ViewedStorage
 from services.webhook import WebhookEndpoint, create_webhook_endpoint
 from settings import DEV_SERVER_PID_FILE_NAME, MODE
@@ -34,79 +34,24 @@ async def start():
                f.write(str(os.getpid()))
    print(f"[main] process started in {MODE} mode")

-async def check_search_service():
-    """Check if search service is available and log result"""
-    info = await search_service.info()
-    if info.get("status") in ["error", "unavailable"]:
-        print(f"[WARNING] Search service unavailable: {info.get('message', 'unknown reason')}")
-    else:
-        print(f"[INFO] Search service is available: {info}")

-# Helper to run precache with timeout and catch errors
-async def precache_with_timeout():
-    try:
-        await asyncio.wait_for(precache_data(), timeout=60)
-    except asyncio.TimeoutError:
-        print("[precache] Precache timed out after 60 seconds")
-    except Exception as e:
-        print(f"[precache] Error during precache: {e}")
-
-
-# indexing DB data
-# async def indexing():
-#     from services.db import fetch_all_shouts
-#     all_shouts = await fetch_all_shouts()
-#     await initialize_search_index(all_shouts)
 async def lifespan(_app):
    try:
-        print("[lifespan] Starting application initialization")
        create_all_tables()
-        
-        # schedule precaching in background with timeout and error handling
-        asyncio.create_task(precache_with_timeout())
-        
        await asyncio.gather(
            redis.connect(),
+            precache_data(),
            ViewedStorage.init(),
            create_webhook_endpoint(),
-            check_search_service(),
+            search_service.info(),
            start(),
            revalidation_manager.start(),
        )
-        print("[lifespan] Basic initialization complete")
-
-        # Add a delay before starting the intensive search indexing
-        print("[lifespan] Waiting for system stabilization before search indexing...")
-        await asyncio.sleep(10)  # 10-second delay to let the system stabilize
-
-        # Start search indexing as a background task with lower priority
-        asyncio.create_task(initialize_search_index_background())
-
        yield
    finally:
-        print("[lifespan] Shutting down application services")
        tasks = [redis.disconnect(), ViewedStorage.stop(), revalidation_manager.stop()]
        await asyncio.gather(*tasks, return_exceptions=True)
-        print("[lifespan] Shutdown complete")

-# Initialize search index in the background
-async def initialize_search_index_background():
-    """Run search indexing as a background task with low priority"""
-    try:
-        print("[search] Starting background search indexing process")
-        from services.db import fetch_all_shouts
-        
-        # Get total count first (optional)
-        all_shouts = await fetch_all_shouts()
-        total_count = len(all_shouts) if all_shouts else 0
-        print(f"[search] Fetched {total_count} shouts for background indexing")
-        
-        # Start the indexing process with the fetched shouts
-        print("[search] Beginning background search index initialization...")
-        await initialize_search_index(all_shouts)
-        print("[search] Background search index initialization complete")
-    except Exception as e:
-        print(f"[search] Error in background search indexing: {str(e)}")

 # Создаем экземпляр GraphQL
 graphql_app = GraphQL(schema, debug=True)
--- a/orm/shout.py
+++ b/orm/shout.py
@@ -71,34 +71,6 @@ class ShoutAuthor(Base):
 class Shout(Base):
    """
    Публикация в системе.
-
-    Attributes:
-        body (str)
-        slug (str)
-        cover (str) : "Cover image url"
-        cover_caption (str) : "Cover image alt caption"
-        lead (str) 
-        title (str)
-        subtitle (str)
-        layout (str)
-        media (dict)
-        authors (list[Author])
-        topics (list[Topic])
-        reactions (list[Reaction])
-        lang (str)
-        version_of (int)
-        oid (str)
-        seo (str) : JSON
-        draft (int)
-        created_at (int)
-        updated_at (int)
-        published_at (int)
-        featured_at (int)
-        deleted_at (int)
-        created_by (int)
-        updated_by (int)
-        deleted_by (int)
-        community (int)
    """

    __tablename__ = "shout"
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,10 +13,6 @@ starlette
 gql
 ariadne
 granian
-
-# NLP and search
-httpx
-
 orjson
 pydantic
 trafilatura
--- a/resolvers/init.py
+++ b/resolvers/init.py
@@ -8,7 +8,6 @@ from resolvers.author import (  # search_authors,
    get_author_id,
    get_authors_all,
    load_authors_by,
-    load_authors_search,
    update_author,
 )
 from resolvers.community import get_communities_all, get_community
@@ -74,7 +73,6 @@ __all__ = [
    "get_author_follows_authors",
    "get_authors_all",
    "load_authors_by",
-    "load_authors_search",
    "update_author",
    ## "search_authors",
    # community
--- a/resolvers/author.py
+++ b/resolvers/author.py
@@ -20,7 +20,6 @@ from services.auth import login_required
 from services.db import local_session
 from services.redis import redis
 from services.schema import mutation, query
-from services.search import search_service
 from utils.logger import root_logger as logger

 DEFAULT_COMMUNITIES = [1]
@@ -78,18 +77,45 @@ async def get_authors_with_stats(limit=50, offset=0, by: Optional[str] = None):
            base_query = select(Author).where(Author.deleted_at.is_(None))

            # Применяем сортировку
+            
+            # vars for statistics sorting
+            stats_sort_field = None
+            stats_sort_direction = "desc"
+            
            if by:
                if isinstance(by, dict):
+                    logger.debug(f"Processing dict-based sorting: {by}")
                    # Обработка словаря параметров сортировки
-                    from sqlalchemy import asc, desc
+                    from sqlalchemy import asc, desc, func
+                    from orm.shout import ShoutAuthor
+                    from orm.author import AuthorFollower

-                    for field, direction in by.items():
-                        column = getattr(Author, field, None)
-                        if column:
-                            if direction.lower() == "desc":
+                    # Checking for order field in the dictionary
+                    if "order" in by:
+                        order_value = by["order"]
+                        logger.debug(f"Found order field with value: {order_value}")
+                        if order_value in ["shouts", "followers", "rating", "comments"]:
+                            stats_sort_field = order_value
+                            stats_sort_direction = "desc"  # По умолчанию убывающая сортировка для статистики
+                            logger.debug(f"Applying statistics-based sorting by: {stats_sort_field}")
+                        elif order_value == "name":
+                            # Sorting by name in ascending order
+                            base_query = base_query.order_by(asc(Author.name))
+                            logger.debug("Applying alphabetical sorting by name")
+                        else:
+                            # If order is not a stats field, treat it as a regular field
+                            column = getattr(Author, order_value, None)
+                            if column:
                                base_query = base_query.order_by(desc(column))
-                            else:
-                                base_query = base_query.order_by(column)
+                    else:
+                        # Regular sorting by fields
+                        for field, direction in by.items():
+                            column = getattr(Author, field, None)
+                            if column:
+                                if direction.lower() == "desc":
+                                    base_query = base_query.order_by(desc(column))
+                                else:
+                                    base_query = base_query.order_by(column)
                elif by == "new":
                    base_query = base_query.order_by(desc(Author.created_at))
                elif by == "active":
@@ -100,6 +126,55 @@ async def get_authors_with_stats(limit=50, offset=0, by: Optional[str] = None):
            else:
                base_query = base_query.order_by(desc(Author.created_at))

+            # If sorting by statistics, modify the query
+            if stats_sort_field == "shouts":
+                # Sorting by the number of shouts
+                from sqlalchemy import func, and_
+                from orm.shout import Shout, ShoutAuthor
+                
+                subquery = (
+                    select(
+                        ShoutAuthor.author,
+                        func.count(func.distinct(Shout.id)).label("shouts_count")
+                    )
+                    .select_from(ShoutAuthor)
+                    .join(Shout, ShoutAuthor.shout == Shout.id)
+                    .where(
+                        and_(
+                            Shout.deleted_at.is_(None),
+                            Shout.published_at.is_not(None)
+                        )
+                    )
+                    .group_by(ShoutAuthor.author)
+                    .subquery()
+                )
+                
+                base_query = (
+                    base_query
+                    .outerjoin(subquery, Author.id == subquery.c.author)
+                    .order_by(desc(func.coalesce(subquery.c.shouts_count, 0)))
+                )
+            elif stats_sort_field == "followers":
+                # Sorting by the number of followers
+                from sqlalchemy import func
+                from orm.author import AuthorFollower
+                
+                subquery = (
+                    select(
+                        AuthorFollower.author,
+                        func.count(func.distinct(AuthorFollower.follower)).label("followers_count")
+                    )
+                    .select_from(AuthorFollower)
+                    .group_by(AuthorFollower.author)
+                    .subquery()
+                )
+                
+                base_query = (
+                    base_query
+                    .outerjoin(subquery, Author.id == subquery.c.author)
+                    .order_by(desc(func.coalesce(subquery.c.followers_count, 0)))
+                )
+
            # Применяем лимит и смещение
            base_query = base_query.limit(limit).offset(offset)

@@ -302,46 +377,6 @@ async def load_authors_by(_, _info, by, limit, offset):
    return await get_authors_with_stats(limit, offset, by)


-@query.field("load_authors_search")
-async def load_authors_search(_, info, text: str, limit: int = 10, offset: int = 0):
-    """
-    Resolver for searching authors by text. Works with txt-ai search endpony.
-    Args:
-        text: Search text
-        limit: Maximum number of authors to return
-        offset: Offset for pagination
-    Returns:
-        list: List of authors matching the search criteria
-    """
-    
-    # Get author IDs from search engine (already sorted by relevance)
-    search_results = await search_service.search_authors(text, limit, offset)
-
-    if not search_results:
-        return []
-
-    author_ids = [result.get("id") for result in search_results if result.get("id")]
-    if not author_ids:
-        return []
-
-    # Fetch full author objects from DB
-    with local_session() as session:
-        # Simple query to get authors by IDs - no need for stats here
-        authors_query = select(Author).filter(Author.id.in_(author_ids))
-        db_authors = session.execute(authors_query).scalars().all()
-    
-    if not db_authors:
-        return []
-
-    # Create a dictionary for quick lookup
-    authors_dict = {str(author.id): author for author in db_authors}
-    
-    # Keep the order from search results (maintains the relevance sorting)
-    ordered_authors = [authors_dict[author_id] for author_id in author_ids if author_id in authors_dict]
-
-    return ordered_authors
-
-
 def get_author_id_from(slug="", user=None, author_id=None):
    try:
        author_id = None
--- a/resolvers/reader.py
+++ b/resolvers/reader.py
@@ -10,7 +10,7 @@ from orm.shout import Shout, ShoutAuthor, ShoutTopic
 from orm.topic import Topic
 from services.db import json_array_builder, json_builder, local_session
 from services.schema import query
-from services.search import search_text, get_search_count
+from services.search import search_text
 from services.viewed import ViewedStorage
 from utils.logger import root_logger as logger

@@ -187,10 +187,12 @@ def get_shouts_with_links(info, q, limit=20, offset=0):
    """
    shouts = []
    try:
+        # logger.info(f"Starting get_shouts_with_links with limit={limit}, offset={offset}")
        q = q.limit(limit).offset(offset)

        with local_session() as session:
            shouts_result = session.execute(q).all()
+            # logger.info(f"Got {len(shouts_result) if shouts_result else 0} shouts from query")

            if not shouts_result:
                logger.warning("No shouts found in query result")
@@ -201,6 +203,7 @@ def get_shouts_with_links(info, q, limit=20, offset=0):
                    shout = None
                    if hasattr(row, "Shout"):
                        shout = row.Shout
+                        # logger.debug(f"Processing shout#{shout.id} at index {idx}")
                    if shout:
                        shout_id = int(f"{shout.id}")
                        shout_dict = shout.dict()
@@ -228,16 +231,20 @@ def get_shouts_with_links(info, q, limit=20, offset=0):
                        topics = None
                        if has_field(info, "topics") and hasattr(row, "topics"):
                            topics = orjson.loads(row.topics) if isinstance(row.topics, str) else row.topics
+                            # logger.debug(f"Shout#{shout_id} topics: {topics}")
                            shout_dict["topics"] = topics

                        if has_field(info, "main_topic"):
                            main_topic = None
                            if hasattr(row, "main_topic"):
+                                # logger.debug(f"Raw main_topic for shout#{shout_id}: {row.main_topic}")
                                main_topic = (
                                    orjson.loads(row.main_topic) if isinstance(row.main_topic, str) else row.main_topic
                                )
+                                # logger.debug(f"Parsed main_topic for shout#{shout_id}: {main_topic}")

                            if not main_topic and topics and len(topics) > 0:
+                                # logger.info(f"No main_topic found for shout#{shout_id}, using first topic from list")
                                main_topic = {
                                    "id": topics[0]["id"],
                                    "title": topics[0]["title"],
@@ -245,8 +252,10 @@ def get_shouts_with_links(info, q, limit=20, offset=0):
                                    "is_main": True,
                                }
                            elif not main_topic:
+                                logger.warning(f"No main_topic and no topics found for shout#{shout_id}")
                                main_topic = {"id": 0, "title": "no topic", "slug": "notopic", "is_main": True}
                            shout_dict["main_topic"] = main_topic
+                            # logger.debug(f"Final main_topic for shout#{shout_id}: {main_topic}")

                        if has_field(info, "authors") and hasattr(row, "authors"):
                            shout_dict["authors"] = (
@@ -273,6 +282,7 @@ def get_shouts_with_links(info, q, limit=20, offset=0):
        logger.error(f"Fatal error in get_shouts_with_links: {e}", exc_info=True)
        raise
    finally:
+        logger.info(f"Returning {len(shouts)} shouts from get_shouts_with_links")
        return shouts


@@ -391,49 +401,33 @@ async def load_shouts_search(_, info, text, options):
    """
    limit = options.get("limit", 10)
    offset = options.get("offset", 0)
-    
    if isinstance(text, str) and len(text) > 2:
-        # Get search results with pagination
        results = await search_text(text, limit, offset)
-        
-        if not results:
-            logger.info(f"No search results found for '{text}'")
-            return []
-        
-        # Extract IDs in the order from the search engine
-        hits_ids = [str(sr.get("id")) for sr in results if sr.get("id")]
+        scores = {}
+        hits_ids = []
+        for sr in results:
+            shout_id = sr.get("id")
+            if shout_id:
+                shout_id = str(shout_id)
+                scores[shout_id] = sr.get("score")
+                hits_ids.append(shout_id)

-        # Query DB for only the IDs in the current page
-        q = query_with_stat(info)
+        q = (
+            query_with_stat(info)
+            if has_field(info, "stat")
+            else select(Shout).filter(and_(Shout.published_at.is_not(None), Shout.deleted_at.is_(None)))
+        )
        q = q.filter(Shout.id.in_(hits_ids))
-        q = apply_filters(q, options.get("filters", {}))
-
-        shouts = get_shouts_with_links(info, q, len(hits_ids), 0)
-
-        # Reorder shouts to match the order from hits_ids
-        shouts_dict = {str(shout['id']): shout for shout in shouts}
-        ordered_shouts = [shouts_dict[shout_id] for shout_id in hits_ids if shout_id in shouts_dict]
-
-        return ordered_shouts
+        q = apply_filters(q, options)
+        q = apply_sorting(q, options)
+        shouts = get_shouts_with_links(info, q, limit, offset)
+        for shout in shouts:
+            shout.score = scores[f"{shout.id}"]
+        shouts.sort(key=lambda x: x.score, reverse=True)
+        return shouts
    return []


-@query.field("get_search_results_count")
-async def get_search_results_count(_, info, text):
-    """
-    Returns the total count of search results for a search query.
-    
-    :param _: Root query object (unused)
-    :param info: GraphQL context information
-    :param text: Search query text
-    :return: Total count of results
-    """
-    if isinstance(text, str) and len(text) > 2:
-        count = await get_search_count(text)
-        return {"count": count}
-    return {"count": 0}
-
-
@query.field("load_shouts_unrated")
 async def load_shouts_unrated(_, info, options):
    """
--- a/schema/query.graphql
+++ b/schema/query.graphql
@@ -4,7 +4,7 @@ type Query {
  get_author_id(user: String!): Author
  get_authors_all: [Author]
  load_authors_by(by: AuthorsBy!, limit: Int, offset: Int): [Author]
-  load_authors_search(text: String!, limit: Int, offset: Int): [Author!] # Search for authors by name or bio
+  # search_authors(what: String!): [Author]

  # community
  get_community: Community
@@ -33,7 +33,6 @@ type Query {
  get_shout(slug: String, shout_id: Int): Shout
  load_shouts_by(options: LoadShoutsOptions): [Shout]
  load_shouts_search(text: String!, options: LoadShoutsOptions): [SearchResult]
-  get_search_results_count(text: String!): CountResult!
  load_shouts_bookmarked(options: LoadShoutsOptions): [Shout]

  # rating
--- a/schema/type.graphql
+++ b/schema/type.graphql
@@ -213,7 +213,6 @@ type CommonResult {
 }

 type SearchResult {
-    id: Int!
    slug: String!
    title: String!
    cover: String
@@ -281,7 +280,3 @@ type MyRateComment {
  my_rate: ReactionKind
 }

-type CountResult {
-  count: Int!
-}
-
--- a/services/db.py
+++ b/services/db.py
@@ -19,7 +19,7 @@ from sqlalchemy import (
    inspect,
    text,
 )
-from sqlalchemy.orm import Session, configure_mappers, declarative_base, joinedload
+from sqlalchemy.orm import Session, configure_mappers, declarative_base
 from sqlalchemy.sql.schema import Table

 from settings import DB_URL
@@ -259,32 +259,3 @@ def get_json_builder():

 # Используем их в коде
 json_builder, json_array_builder, json_cast = get_json_builder()
-
-# Fetch all shouts, with authors preloaded
-# This function is used for search indexing
-
-async def fetch_all_shouts(session=None):
-    """Fetch all published shouts for search indexing with authors preloaded"""
-    from orm.shout import Shout
-    
-    close_session = False
-    if session is None:
-        session = local_session()
-        close_session = True
-    
-    try:
-        # Fetch only published and non-deleted shouts with authors preloaded
-        query = session.query(Shout).options(
-            joinedload(Shout.authors)
-        ).filter(
-            Shout.published_at.is_not(None),
-            Shout.deleted_at.is_(None)
-        )
-        shouts = query.all()
-        return shouts
-    except Exception as e:
-        logger.error(f"Error fetching shouts for search indexing: {e}")
-        return []
-    finally:
-        if close_session:
-            session.close()
--- a/services/schema.py
+++ b/services/schema.py
@@ -29,19 +29,12 @@ async def request_graphql_data(gql, url=AUTH_URL, headers=None):
        async with httpx.AsyncClient() as client:
            response = await client.post(url, json=gql, headers=headers)
            if response.status_code == 200:
-                # Check if the response has content before parsing
-                if response.content and len(response.content.strip()) > 0:
-                    try:
-                        data = response.json()
-                        errors = data.get("errors")
-                        if errors:
-                            logger.error(f"{url} response: {data}")
-                        else:
-                            return data
-                    except Exception as json_err:
-                        logger.error(f"JSON decode error: {json_err}, Response content: {response.text[:100]}")
+                data = response.json()
+                errors = data.get("errors")
+                if errors:
+                    logger.error(f"{url} response: {data}")
                else:
-                    logger.error(f"{url}: Response is empty")
+                    return data
            else:
                logger.error(f"{url}: {response.status_code} {response.text}")
    except Exception as _e:
--- a/services/search.py
+++ b/services/search.py
Author	SHA1	Message	Date
Stepan Vladovskiy	804f900c38	style: readme with python granian server start All checks were successful Deploy on push / deploy (push) Successful in 6s Details	2025-05-25 20:51:39 +00:00
Stepan Vladovskiy	b5dd690fbb	feat: with author sorting by shouts, followers and names All checks were successful Deploy on push / deploy (push) Successful in 6s Details	2025-05-25 17:30:12 -03:00