mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 15:38:55 +01:00
Merge pull request #710 from khoj-ai/add-run-with-process-lock-and-fix-edge-cases
Extract run with process lock logic into func. Use it to re-index content
This commit is contained in:
commit
c9a8abafa4
8 changed files with 116 additions and 61 deletions
|
@ -307,30 +307,22 @@ def configure_middleware(app):
|
||||||
app.add_middleware(SessionMiddleware, secret_key=os.environ.get("KHOJ_DJANGO_SECRET_KEY", "!secret"))
|
app.add_middleware(SessionMiddleware, secret_key=os.environ.get("KHOJ_DJANGO_SECRET_KEY", "!secret"))
|
||||||
|
|
||||||
|
|
||||||
@schedule.repeat(schedule.every(22).to(25).hours)
|
|
||||||
def update_content_index():
|
def update_content_index():
|
||||||
try:
|
for user in get_all_users():
|
||||||
if ProcessLockAdapters.is_process_locked(ProcessLock.Operation.UPDATE_EMBEDDINGS):
|
all_files = collect_files(user=user)
|
||||||
logger.info("🔒 Skipping update content index due to lock")
|
success = configure_content(all_files, user=user)
|
||||||
return
|
all_files = collect_files(user=None)
|
||||||
ProcessLockAdapters.set_process_lock(
|
success = configure_content(all_files, user=None)
|
||||||
ProcessLock.Operation.UPDATE_EMBEDDINGS, max_duration_in_seconds=60 * 60 * 2
|
if not success:
|
||||||
)
|
raise RuntimeError("Failed to update content index")
|
||||||
|
logger.info("📪 Content index updated via Scheduler")
|
||||||
|
|
||||||
with timer("📬 Updating content index via Scheduler"):
|
|
||||||
for user in get_all_users():
|
|
||||||
all_files = collect_files(user=user)
|
|
||||||
success = configure_content(all_files, user=user)
|
|
||||||
all_files = collect_files(user=None)
|
|
||||||
success = configure_content(all_files, user=None)
|
|
||||||
if not success:
|
|
||||||
raise RuntimeError("Failed to update content index")
|
|
||||||
|
|
||||||
logger.info("📪 Content index updated via Scheduler")
|
@schedule.repeat(schedule.every(22).to(25).hours)
|
||||||
|
def update_content_index_regularly():
|
||||||
ProcessLockAdapters.remove_process_lock(ProcessLock.Operation.UPDATE_EMBEDDINGS)
|
ProcessLockAdapters.run_with_lock(
|
||||||
except Exception as e:
|
update_content_index, ProcessLock.Operation.UPDATE_EMBEDDINGS, max_duration_in_seconds=60 * 60 * 2
|
||||||
logger.error(f"🚨 Error updating content index via Scheduler: {e}", exc_info=True)
|
)
|
||||||
|
|
||||||
|
|
||||||
def configure_search_types():
|
def configure_search_types():
|
||||||
|
|
|
@ -5,7 +5,7 @@ import secrets
|
||||||
import sys
|
import sys
|
||||||
from datetime import date, datetime, timedelta, timezone
|
from datetime import date, datetime, timedelta, timezone
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import List, Optional, Type
|
from typing import Callable, List, Optional, Type
|
||||||
|
|
||||||
from asgiref.sync import sync_to_async
|
from asgiref.sync import sync_to_async
|
||||||
from django.contrib.sessions.backends.db import SessionStore
|
from django.contrib.sessions.backends.db import SessionStore
|
||||||
|
@ -46,7 +46,7 @@ from khoj.search_filter.file_filter import FileFilter
|
||||||
from khoj.search_filter.word_filter import WordFilter
|
from khoj.search_filter.word_filter import WordFilter
|
||||||
from khoj.utils import state
|
from khoj.utils import state
|
||||||
from khoj.utils.config import OfflineChatProcessorModel
|
from khoj.utils.config import OfflineChatProcessorModel
|
||||||
from khoj.utils.helpers import generate_random_name, is_none_or_empty
|
from khoj.utils.helpers import generate_random_name, is_none_or_empty, timer
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -421,6 +421,7 @@ class ProcessLockAdapters:
|
||||||
tz=timezone.utc
|
tz=timezone.utc
|
||||||
):
|
):
|
||||||
process_lock.delete()
|
process_lock.delete()
|
||||||
|
logger.info(f"🔓 Deleted stale {process_name} process lock on timeout")
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -428,6 +429,31 @@ class ProcessLockAdapters:
|
||||||
def remove_process_lock(process_name: str):
|
def remove_process_lock(process_name: str):
|
||||||
return ProcessLock.objects.filter(name=process_name).delete()
|
return ProcessLock.objects.filter(name=process_name).delete()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def run_with_lock(func: Callable, operation: ProcessLock.Operation, max_duration_in_seconds: int = 600):
|
||||||
|
# Exit early if process lock is already taken
|
||||||
|
if ProcessLockAdapters.is_process_locked(operation):
|
||||||
|
logger.info(f"🔒 Skip executing {func} as {operation} lock is already taken")
|
||||||
|
return
|
||||||
|
|
||||||
|
success = False
|
||||||
|
try:
|
||||||
|
# Set process lock
|
||||||
|
ProcessLockAdapters.set_process_lock(operation, max_duration_in_seconds)
|
||||||
|
logger.info(f"🔐 Locked {operation} to execute {func}")
|
||||||
|
|
||||||
|
# Execute Function
|
||||||
|
with timer(f"🔒 Run {func} with {operation} process lock", logger):
|
||||||
|
func()
|
||||||
|
success = True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"🚨 Error executing {func} with {operation} process lock: {e}", exc_info=True)
|
||||||
|
success = False
|
||||||
|
finally:
|
||||||
|
# Remove Process Lock
|
||||||
|
ProcessLockAdapters.remove_process_lock(operation)
|
||||||
|
logger.info(f"🔓 Unlocked {operation} process after executing {func} {'Succeeded' if success else 'Failed'}")
|
||||||
|
|
||||||
|
|
||||||
class ClientApplicationAdapters:
|
class ClientApplicationAdapters:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -53,17 +53,8 @@ class NotionContentConfig(ConfigBase):
|
||||||
token: str
|
token: str
|
||||||
|
|
||||||
|
|
||||||
class ImageContentConfig(ConfigBase):
|
|
||||||
input_directories: Optional[List[Path]] = None
|
|
||||||
input_filter: Optional[List[str]] = None
|
|
||||||
embeddings_file: Path
|
|
||||||
use_xmp_metadata: bool
|
|
||||||
batch_size: int
|
|
||||||
|
|
||||||
|
|
||||||
class ContentConfig(ConfigBase):
|
class ContentConfig(ConfigBase):
|
||||||
org: Optional[TextContentConfig] = None
|
org: Optional[TextContentConfig] = None
|
||||||
image: Optional[ImageContentConfig] = None
|
|
||||||
markdown: Optional[TextContentConfig] = None
|
markdown: Optional[TextContentConfig] = None
|
||||||
pdf: Optional[TextContentConfig] = None
|
pdf: Optional[TextContentConfig] = None
|
||||||
plaintext: Optional[TextContentConfig] = None
|
plaintext: Optional[TextContentConfig] = None
|
||||||
|
|
|
@ -30,16 +30,12 @@ from khoj.utils import fs_syncer, state
|
||||||
from khoj.utils.config import SearchModels
|
from khoj.utils.config import SearchModels
|
||||||
from khoj.utils.constants import web_directory
|
from khoj.utils.constants import web_directory
|
||||||
from khoj.utils.helpers import resolve_absolute_path
|
from khoj.utils.helpers import resolve_absolute_path
|
||||||
from khoj.utils.rawconfig import (
|
from khoj.utils.rawconfig import ContentConfig, ImageSearchConfig, SearchConfig
|
||||||
ContentConfig,
|
|
||||||
ImageContentConfig,
|
|
||||||
ImageSearchConfig,
|
|
||||||
SearchConfig,
|
|
||||||
)
|
|
||||||
from tests.helpers import (
|
from tests.helpers import (
|
||||||
ChatModelOptionsFactory,
|
ChatModelOptionsFactory,
|
||||||
OfflineChatProcessorConversationConfigFactory,
|
OfflineChatProcessorConversationConfigFactory,
|
||||||
OpenAIProcessorConversationConfigFactory,
|
OpenAIProcessorConversationConfigFactory,
|
||||||
|
ProcessLockFactory,
|
||||||
SubscriptionFactory,
|
SubscriptionFactory,
|
||||||
UserConversationProcessorConfigFactory,
|
UserConversationProcessorConfigFactory,
|
||||||
UserFactory,
|
UserFactory,
|
||||||
|
@ -211,6 +207,12 @@ def search_models(search_config: SearchConfig):
|
||||||
return search_models
|
return search_models
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db
|
||||||
|
@pytest.fixture
|
||||||
|
def default_process_lock():
|
||||||
|
return ProcessLockFactory()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def anyio_backend():
|
def anyio_backend():
|
||||||
return "asyncio"
|
return "asyncio"
|
||||||
|
@ -223,13 +225,6 @@ def content_config(tmp_path_factory, search_models: SearchModels, default_user:
|
||||||
|
|
||||||
# Generate Image Embeddings from Test Images
|
# Generate Image Embeddings from Test Images
|
||||||
content_config = ContentConfig()
|
content_config = ContentConfig()
|
||||||
content_config.image = ImageContentConfig(
|
|
||||||
input_filter=None,
|
|
||||||
input_directories=["tests/data/images"],
|
|
||||||
embeddings_file=content_dir.joinpath("image_embeddings.pt"),
|
|
||||||
batch_size=1,
|
|
||||||
use_xmp_metadata=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
LocalOrgConfig.objects.create(
|
LocalOrgConfig.objects.create(
|
||||||
input_files=None,
|
input_files=None,
|
||||||
|
|
|
@ -11,6 +11,7 @@ from khoj.database.models import (
|
||||||
KhojUser,
|
KhojUser,
|
||||||
OfflineChatProcessorConversationConfig,
|
OfflineChatProcessorConversationConfig,
|
||||||
OpenAIProcessorConversationConfig,
|
OpenAIProcessorConversationConfig,
|
||||||
|
ProcessLock,
|
||||||
SearchModelConfig,
|
SearchModelConfig,
|
||||||
Subscription,
|
Subscription,
|
||||||
UserConversationConfig,
|
UserConversationConfig,
|
||||||
|
@ -93,3 +94,10 @@ class SubscriptionFactory(factory.django.DjangoModelFactory):
|
||||||
type = "standard"
|
type = "standard"
|
||||||
is_recurring = False
|
is_recurring = False
|
||||||
renewal_date = make_aware(datetime.strptime("2100-04-01", "%Y-%m-%d"))
|
renewal_date = make_aware(datetime.strptime("2100-04-01", "%Y-%m-%d"))
|
||||||
|
|
||||||
|
|
||||||
|
class ProcessLockFactory(factory.django.DjangoModelFactory):
|
||||||
|
class Meta:
|
||||||
|
model = ProcessLock
|
||||||
|
|
||||||
|
name = "test_lock"
|
||||||
|
|
|
@ -273,7 +273,7 @@ def test_get_api_config_types(client, sample_org_data, default_user: KhojUser):
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert response.json() == ["all", "org", "image", "plaintext"]
|
assert response.json() == ["all", "org", "plaintext"]
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
|
58
tests/test_db_lock.py
Normal file
58
tests/test_db_lock.py
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
import time
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from khoj.database.adapters import ProcessLockAdapters
|
||||||
|
from khoj.database.models import ProcessLock
|
||||||
|
from tests.helpers import ProcessLockFactory
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db(transaction=True)
|
||||||
|
def test_process_lock(default_process_lock):
|
||||||
|
# Arrange
|
||||||
|
lock: ProcessLock = default_process_lock
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert True == ProcessLockAdapters.is_process_locked(lock.name)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db(transaction=True)
|
||||||
|
def test_expired_process_lock():
|
||||||
|
# Arrange
|
||||||
|
lock: ProcessLock = ProcessLockFactory(name="test_expired_lock", max_duration_in_seconds=2)
|
||||||
|
|
||||||
|
# Act
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert False == ProcessLockAdapters.is_process_locked(lock.name)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db(transaction=True)
|
||||||
|
def test_in_progress_lock(default_process_lock):
|
||||||
|
# Arrange
|
||||||
|
lock: ProcessLock = default_process_lock
|
||||||
|
|
||||||
|
# Act
|
||||||
|
ProcessLockAdapters.run_with_lock(lock.name, lambda: time.sleep(2))
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert True == ProcessLockAdapters.is_process_locked(lock.name)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db(transaction=True)
|
||||||
|
def test_run_with_completed():
|
||||||
|
# Arrange
|
||||||
|
ProcessLockAdapters.run_with_lock("test_run_with", lambda: time.sleep(2))
|
||||||
|
|
||||||
|
# Act
|
||||||
|
time.sleep(4)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert False == ProcessLockAdapters.is_process_locked("test_run_with")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.django_db(transaction=True)
|
||||||
|
def test_nonexistent_lock():
|
||||||
|
# Assert
|
||||||
|
assert False == ProcessLockAdapters.is_process_locked("nonexistent_lock")
|
|
@ -1,15 +0,0 @@
|
||||||
import pytest
|
|
||||||
|
|
||||||
from khoj.utils.rawconfig import ImageContentConfig, TextContentConfig
|
|
||||||
|
|
||||||
|
|
||||||
# Test
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
|
||||||
def test_input_filter_or_directories_required_in_image_content_config():
|
|
||||||
# Act
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
ImageContentConfig(
|
|
||||||
input_directories=None,
|
|
||||||
input_filter=None,
|
|
||||||
embeddings_file="note_embeddings.pt",
|
|
||||||
)
|
|
Loading…
Reference in a new issue