core/services/search.py
Untone b574673f00
All checks were successful
Deploy to core / deploy (push) Successful in 1m35s
search-indicies
2024-01-29 05:26:49 +03:00

182 lines
5.8 KiB
Python

import json
import logging
import os
from opensearchpy import OpenSearch
from services.rediscache import redis
logger = logging.getLogger('[services.search] ')
logger.setLevel(logging.DEBUG)
ELASTIC_HOST = (
os.environ.get('ELASTIC_HOST', 'localhost')
.replace('https://', '')
.replace('http://', '')
)
ELASTIC_USER = os.environ.get('ELASTIC_USER', '')
ELASTIC_PASSWORD = os.environ.get('ELASTIC_PASSWORD', '')
ELASTIC_PORT = os.environ.get('ELASTIC_PORT', 9200)
ELASTIC_AUTH = f'{ELASTIC_USER}:{ELASTIC_PASSWORD}' if ELASTIC_USER else ''
ELASTIC_URL = os.environ.get(
'ELASTIC_URL', f'https://{ELASTIC_AUTH}@{ELASTIC_HOST}:{ELASTIC_PORT}'
)
REDIS_TTL = 86400 # 1 day in seconds
class SearchService:
def __init__(self, index_name='posts'):
logger.info('initialized')
self.index_name = index_name
self.disabled = False
try:
self.client = OpenSearch(
hosts=[{'host': ELASTIC_HOST, 'port': ELASTIC_PORT}],
http_compress=True,
http_auth=(ELASTIC_USER, ELASTIC_PASSWORD),
use_ssl=True,
verify_certs=False,
ssl_assert_hostname=False,
ssl_show_warn=False,
# ca_certs = ca_certs_path
)
except Exception as exc:
logger.error(exc)
self.disabled = True
self.check_index()
def info(self):
logging.info(f'{self.client}')
try:
indices = self.client.indices.get_alias('*')
logger.debug('List of indices:')
for index in indices:
logger.debug(f'- {index}')
except Exception as e:
logger.error(f'Error while listing indices: {e}')
def delete_index(self):
self.client.indices.delete(index=self.index_name, ignore_unavailable=True)
def create_index(self):
index_settings = {
'settings': {
'index': {
'number_of_shards': 1,
'auto_expand_replicas': '0-all',
},
'analysis': {
'analyzer': {
'ru': {
'tokenizer': 'standard',
'filter': ['lowercase', 'ru_stop', 'ru_stemmer'],
}
},
'filter': {
'ru_stemmer': {
'type': 'stemmer',
'language': 'russian',
},
'ru_stop': {
'type': 'stop',
'stopwords': '_russian_',
},
},
},
},
'mappings': {
'properties': {
'body': {'type': 'text', 'analyzer': 'ru'},
'text': {'type': 'text'},
'author': {'type': 'text'},
}
},
}
try:
self.client.indices.create(index=self.index_name, body=index_settings)
self.client.indices.close(index=self.index_name)
self.client.indices.open(index=self.index_name)
except Exception as error:
logger.warn(error)
def put_mapping(self):
mapping = {
'properties': {
'body': {'type': 'text', 'analyzer': 'ru'},
'text': {'type': 'text'},
'author': {'type': 'text'},
}
}
self.client.indices.put_mapping(index=self.index_name, body=mapping)
def check_index(self):
if not self.client.indices.exists(index=self.index_name):
logger.debug(f'Creating {self.index_name} index')
self.create_index()
self.put_mapping()
else:
# Check if the mapping is correct, and recreate the index if needed
mapping = self.client.indices.get_mapping(index=self.index_name)
expected_mapping = {
'properties': {
'body': {'type': 'text', 'analyzer': 'ru'},
'text': {'type': 'text'},
'author': {'type': 'text'},
}
}
if mapping != expected_mapping:
logger.debug(
f'Recreating {self.index_name} index due to incorrect mapping'
)
self.recreate_index()
def recreate_index(self):
self.delete_index()
self.check_index()
def index_post(self, shout):
id_ = str(shout.id)
logger.debug(f'Indexing post id {id_}')
self.client.index(index=self.index_name, id=id_, body=shout)
def search_post(self, query, limit, offset):
logger.debug(f'query: {query}')
search_body = {
'query': {'match': {'_all': query}},
}
search_response = self.client.search(
index=self.index_name, body=search_body, size=limit, from_=offset
)
hits = search_response['hits']['hits']
return [
{
**hit['_source'],
'score': hit['_score'],
}
for hit in hits
]
search = SearchService()
async def search_text(text: str, limit: int = 50, offset: int = 0):
payload = []
try:
# Use a key with a prefix to differentiate search results from other Redis data
redis_key = f'search:{text}'
if not search.disabled:
# Use OpenSearchService.search_post method
payload = search.search_post(text, limit, offset)
# Use Redis as cache with TTL
await redis.execute('SETEX', redis_key, REDIS_TTL, json.dumps(payload))
except Exception as e:
logging.error(f'Error during search: {e}')
return payload