2022-11-17 19:53:58 +00:00
|
|
|
import json
|
2024-01-29 00:27:30 +00:00
|
|
|
import os
|
2024-01-29 02:56:28 +00:00
|
|
|
from multiprocessing import Manager
|
2023-12-17 20:30:20 +00:00
|
|
|
|
2024-01-29 02:00:54 +00:00
|
|
|
from opensearchpy import OpenSearch
|
2023-12-17 20:30:20 +00:00
|
|
|
|
2024-02-20 16:19:46 +00:00
|
|
|
from services.logger import root_logger as logger
|
2024-01-29 01:09:54 +00:00
|
|
|
from services.rediscache import redis
|
2022-10-04 00:32:29 +00:00
|
|
|
|
2024-02-21 07:27:16 +00:00
|
|
|
ELASTIC_HOST = os.environ.get("ELASTIC_HOST", "").replace("https://", "")
|
|
|
|
ELASTIC_USER = os.environ.get("ELASTIC_USER", "")
|
|
|
|
ELASTIC_PASSWORD = os.environ.get("ELASTIC_PASSWORD", "")
|
|
|
|
ELASTIC_PORT = os.environ.get("ELASTIC_PORT", 9200)
|
|
|
|
ELASTIC_AUTH = f"{ELASTIC_USER}:{ELASTIC_PASSWORD}" if ELASTIC_USER else ""
|
|
|
|
ELASTIC_URL = os.environ.get(
|
|
|
|
"ELASTIC_URL", f"https://{ELASTIC_AUTH}@{ELASTIC_HOST}:{ELASTIC_PORT}"
|
|
|
|
)
|
2024-01-29 01:09:54 +00:00
|
|
|
REDIS_TTL = 86400 # 1 day in seconds
|
2024-01-29 00:27:30 +00:00
|
|
|
|
2024-01-29 02:56:28 +00:00
|
|
|
|
2024-01-29 08:09:10 +00:00
|
|
|
index_settings = {
|
2024-02-21 07:27:16 +00:00
|
|
|
"settings": {
|
|
|
|
"index": {
|
|
|
|
"number_of_shards": 1,
|
|
|
|
"auto_expand_replicas": "0-all",
|
2024-01-29 08:09:10 +00:00
|
|
|
},
|
2024-02-21 07:27:16 +00:00
|
|
|
"analysis": {
|
|
|
|
"analyzer": {
|
|
|
|
"ru": {
|
|
|
|
"tokenizer": "standard",
|
|
|
|
"filter": ["lowercase", "ru_stop", "ru_stemmer"],
|
2024-01-29 08:09:10 +00:00
|
|
|
}
|
|
|
|
},
|
2024-02-21 07:27:16 +00:00
|
|
|
"filter": {
|
|
|
|
"ru_stemmer": {
|
|
|
|
"type": "stemmer",
|
|
|
|
"language": "russian",
|
2024-01-29 08:09:10 +00:00
|
|
|
},
|
2024-02-21 07:27:16 +00:00
|
|
|
"ru_stop": {
|
|
|
|
"type": "stop",
|
|
|
|
"stopwords": "_russian_",
|
2024-01-29 08:09:10 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
2024-02-21 07:27:16 +00:00
|
|
|
"mappings": {
|
|
|
|
"properties": {
|
|
|
|
"body": {"type": "text", "analyzer": "ru"},
|
|
|
|
"title": {"type": "text", "analyzer": "ru"},
|
2024-01-29 08:09:10 +00:00
|
|
|
# 'author': {'type': 'text'},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2024-02-21 07:27:16 +00:00
|
|
|
expected_mapping = index_settings["mappings"]
|
2024-01-29 08:09:10 +00:00
|
|
|
|
|
|
|
|
2024-01-29 01:09:54 +00:00
|
|
|
class SearchService:
|
2024-02-21 07:27:16 +00:00
|
|
|
def __init__(self, index_name="search_index"):
|
2024-01-29 00:27:30 +00:00
|
|
|
self.index_name = index_name
|
2024-01-29 02:56:28 +00:00
|
|
|
self.manager = Manager()
|
|
|
|
self.client = None
|
|
|
|
|
|
|
|
# Используем менеджер для создания Lock и Value
|
|
|
|
self.lock = self.manager.Lock()
|
2024-02-21 07:27:16 +00:00
|
|
|
self.initialized_flag = self.manager.Value("i", 0)
|
2024-01-29 02:56:28 +00:00
|
|
|
|
|
|
|
# Only initialize the instance if it's not already initialized
|
|
|
|
if not self.initialized_flag.value and ELASTIC_HOST:
|
|
|
|
try:
|
|
|
|
self.client = OpenSearch(
|
2024-02-21 07:27:16 +00:00
|
|
|
hosts=[{"host": ELASTIC_HOST, "port": ELASTIC_PORT}],
|
2024-01-29 02:56:28 +00:00
|
|
|
http_compress=True,
|
|
|
|
http_auth=(ELASTIC_USER, ELASTIC_PASSWORD),
|
|
|
|
use_ssl=True,
|
|
|
|
verify_certs=False,
|
|
|
|
ssl_assert_hostname=False,
|
|
|
|
ssl_show_warn=False,
|
|
|
|
# ca_certs = ca_certs_path
|
|
|
|
)
|
2024-02-21 07:27:16 +00:00
|
|
|
logger.info(" Клиент OpenSearch.org подключен")
|
2024-01-29 08:01:04 +00:00
|
|
|
if self.lock.acquire(blocking=False):
|
|
|
|
try:
|
|
|
|
self.check_index()
|
|
|
|
finally:
|
|
|
|
self.lock.release()
|
|
|
|
else:
|
2024-02-21 07:27:16 +00:00
|
|
|
logger.debug(" проверка пропущена")
|
2024-01-29 02:56:28 +00:00
|
|
|
except Exception as exc:
|
2024-02-21 07:27:16 +00:00
|
|
|
logger.error(f" {exc}")
|
2024-01-29 03:18:36 +00:00
|
|
|
self.client = None
|
2024-01-29 02:56:28 +00:00
|
|
|
|
2024-01-29 01:41:46 +00:00
|
|
|
def info(self):
|
2024-01-29 10:02:14 +00:00
|
|
|
if isinstance(self.client, OpenSearch):
|
2024-02-21 07:27:16 +00:00
|
|
|
logger.info(" Поиск подключен") # : {self.client.info()}')
|
2024-01-29 03:03:37 +00:00
|
|
|
else:
|
2024-02-21 07:27:16 +00:00
|
|
|
logger.info(" * Задайте переменные среды для подключения к серверу поиска")
|
2024-01-29 01:41:46 +00:00
|
|
|
|
2024-01-29 00:27:30 +00:00
|
|
|
def delete_index(self):
|
2024-01-29 03:18:36 +00:00
|
|
|
if self.client:
|
2024-02-21 07:27:16 +00:00
|
|
|
logger.debug(f" Удаляем индекс {self.index_name}")
|
2024-01-29 03:09:40 +00:00
|
|
|
self.client.indices.delete(index=self.index_name, ignore_unavailable=True)
|
2024-01-29 00:27:30 +00:00
|
|
|
|
|
|
|
def create_index(self):
|
2024-01-29 08:09:10 +00:00
|
|
|
if self.client:
|
|
|
|
if self.lock.acquire(blocking=False):
|
|
|
|
try:
|
2024-02-21 07:27:16 +00:00
|
|
|
logger.debug(f" Создаём новый индекс: {self.index_name} ")
|
|
|
|
self.client.indices.create(
|
|
|
|
index=self.index_name, body=index_settings
|
|
|
|
)
|
2024-01-29 08:09:10 +00:00
|
|
|
self.client.indices.close(index=self.index_name)
|
|
|
|
self.client.indices.open(index=self.index_name)
|
|
|
|
finally:
|
|
|
|
self.lock.release()
|
|
|
|
else:
|
2024-02-21 07:27:16 +00:00
|
|
|
logger.debug(" ..")
|
2024-01-29 00:27:30 +00:00
|
|
|
|
|
|
|
def put_mapping(self):
|
2024-01-29 03:03:37 +00:00
|
|
|
if self.client:
|
2024-02-21 07:27:16 +00:00
|
|
|
logger.debug(f" Разметка индекации {self.index_name}")
|
|
|
|
self.client.indices.put_mapping(
|
|
|
|
index=self.index_name, body=expected_mapping
|
|
|
|
)
|
2024-01-29 00:27:30 +00:00
|
|
|
|
|
|
|
def check_index(self):
|
2024-01-29 03:03:37 +00:00
|
|
|
if self.client:
|
2024-01-29 03:18:36 +00:00
|
|
|
if not self.client.indices.exists(index=self.index_name):
|
2024-01-29 03:03:37 +00:00
|
|
|
self.create_index()
|
|
|
|
self.put_mapping()
|
|
|
|
else:
|
|
|
|
# Check if the mapping is correct, and recreate the index if needed
|
|
|
|
mapping = self.client.indices.get_mapping(index=self.index_name)
|
|
|
|
if mapping != expected_mapping:
|
|
|
|
self.recreate_index()
|
2024-01-29 00:27:30 +00:00
|
|
|
|
2024-01-29 01:09:54 +00:00
|
|
|
def recreate_index(self):
|
2024-01-29 03:51:26 +00:00
|
|
|
if self.lock.acquire(blocking=False):
|
|
|
|
try:
|
|
|
|
self.delete_index()
|
|
|
|
self.check_index()
|
|
|
|
finally:
|
|
|
|
self.lock.release()
|
|
|
|
else:
|
2024-02-21 07:27:16 +00:00
|
|
|
logger.debug(" ..")
|
2024-01-29 01:09:54 +00:00
|
|
|
|
2024-01-29 03:42:02 +00:00
|
|
|
def index(self, shout):
|
2024-01-29 03:18:36 +00:00
|
|
|
if self.client:
|
2024-01-29 02:37:10 +00:00
|
|
|
id_ = str(shout.id)
|
2024-02-21 07:27:16 +00:00
|
|
|
logger.debug(f" Индексируем пост {id_}")
|
2024-02-02 17:54:17 +00:00
|
|
|
self.client.index(index=self.index_name, id=id_, body=shout.dict())
|
2024-01-29 00:27:30 +00:00
|
|
|
|
2024-01-29 06:45:00 +00:00
|
|
|
async def search(self, text, limit, offset):
|
2024-02-21 07:27:16 +00:00
|
|
|
logger.debug(f" Ищем: {text}")
|
2024-01-29 00:27:30 +00:00
|
|
|
search_body = {
|
2024-02-21 07:27:16 +00:00
|
|
|
"query": {"match": {"_all": text}},
|
2024-01-29 00:27:30 +00:00
|
|
|
}
|
2024-01-29 03:03:37 +00:00
|
|
|
if self.client:
|
2024-02-21 07:27:16 +00:00
|
|
|
search_response = self.client.search(
|
|
|
|
index=self.index_name, body=search_body, size=limit, from_=offset
|
|
|
|
)
|
|
|
|
hits = search_response["hits"]["hits"]
|
2024-01-29 03:03:37 +00:00
|
|
|
|
2024-01-29 06:45:00 +00:00
|
|
|
results = [
|
2024-01-29 03:03:37 +00:00
|
|
|
{
|
2024-02-21 07:27:16 +00:00
|
|
|
**hit["_source"],
|
|
|
|
"score": hit["_score"],
|
2024-01-29 03:03:37 +00:00
|
|
|
}
|
|
|
|
for hit in hits
|
|
|
|
]
|
2024-01-29 06:45:00 +00:00
|
|
|
|
|
|
|
# Use Redis as cache with TTL
|
2024-02-21 07:27:16 +00:00
|
|
|
redis_key = f"search:{text}"
|
|
|
|
await redis.execute("SETEX", redis_key, REDIS_TTL, json.dumps(results))
|
2024-01-29 03:03:37 +00:00
|
|
|
return []
|
2024-01-29 00:27:30 +00:00
|
|
|
|
2024-01-29 01:41:46 +00:00
|
|
|
|
2024-01-29 03:42:02 +00:00
|
|
|
search_service = SearchService()
|
2024-01-29 01:41:46 +00:00
|
|
|
|
|
|
|
|
|
|
|
async def search_text(text: str, limit: int = 50, offset: int = 0):
|
|
|
|
payload = []
|
2024-01-29 06:45:00 +00:00
|
|
|
if search_service.client:
|
|
|
|
# Use OpenSearchService.search_post method
|
2024-01-29 07:48:36 +00:00
|
|
|
payload = await search_service.search(text, limit, offset)
|
2024-01-29 01:41:46 +00:00
|
|
|
return payload
|