Automatically update search index hourly

- c535953 Update index automatically in non GUI mode too - 701d92e Lock the index before updating it via API or Scheduler - 3b0783a Automate updating embeddings, search index on a hourly schedule Resolves #106
2024-11-28 01:45:07 +01:00 · 2023-01-02 00:37:59 +00:00 · 2023-01-02 00:37:59 +00:00 · fe1398401d
commit fe1398401d
parent a58c243bc0 c535953915
5 changed files with 33 additions and 2 deletions
--- a/setup.py
+++ b/setup.py
@ -41,6 +41,7 @@ setup(
        "dateparser == 1.1.1",
        "pyqt6 == 6.3.1",
        "defusedxml == 0.7.1",
        'schedule == 1.1.0',
    ],
    include_package_data=True,
    entry_points={"console_scripts": ["khoj = src.main:run"]},
--- a/src/configure.py
+++ b/src/configure.py
@ -3,6 +3,9 @@ import sys
 import logging
 import json
 # External Packages
 import schedule
 # Internal Packages
 from src.processor.ledger.beancount_to_jsonl import BeancountToJsonl
 from src.processor.markdown.markdown_to_jsonl import MarkdownToJsonl
@ -31,12 +34,22 @@ def configure_server(args, required=False):
        state.config = args.config
    # Initialize the search model from Config
    state.search_index_lock.acquire()
    state.model = configure_search(state.model, state.config, args.regenerate)
    state.search_index_lock.release()
    # Initialize Processor from Config
    state.processor_config = configure_processor(args.config.processor)
@schedule.repeat(schedule.every(1).hour)
 def update_search_index():
    state.search_index_lock.acquire()
    state.model = configure_search(state.model, state.config, regenerate=False)
    state.search_index_lock.release()
    logger.info("Search Index updated via Scheduler")
 def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, t: SearchType = None):
    # Initialize Org Notes Search
    if (t == SearchType.Org or t == None) and config.content_type.org:
--- a/src/main.py
+++ b/src/main.py
@ -3,6 +3,7 @@ import os
 import signal
 import sys
 import logging
 import threading
 import warnings
 from platform import system
@ -16,6 +17,7 @@ from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
 from PyQt6 import QtWidgets
 from PyQt6.QtCore import QThread, QTimer
 import schedule
 # Internal Packages
 from src.configure import configure_server
@ -72,6 +74,8 @@ def run():
    logger.info("Starting Khoj...")
    if args.no_gui:
        # Setup task scheduler
        poll_task_scheduler()
        # Start Server
        configure_server(args, required=True)
        start_server(app, host=args.host, port=args.port, socket=args.socket)
@ -99,10 +103,10 @@ def run():
        # Setup Signal Handlers
        signal.signal(signal.SIGINT, sigint_handler)
-        # Invoke python Interpreter every 500ms to handle signals
+        # Invoke Python interpreter every 500ms to handle signals, run scheduled tasks
        timer = QTimer()
        timer.start(500)
-        timer.timeout.connect(lambda: None)
+        timer.timeout.connect(schedule.run_pending)
        # Start Application
        server.start()
@ -142,6 +146,13 @@ def start_server(app, host=None, port=None, socket=None):
        uvicorn.run(app, host=host, port=port)
 def poll_task_scheduler():
    timer_thread = threading.Timer(60.0, poll_task_scheduler)
    timer_thread.daemon = True
    timer_thread.start()
    schedule.run_pending()
 class ServerThread(QThread):
    def __init__(self, app, host=None, port=None, socket=None):
        super(ServerThread, self).__init__()
--- a/src/routers/api.py
+++ b/src/routers/api.py
@ -125,5 +125,9 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Opti
@api.get('/update')
 def update(t: Optional[SearchType] = None, force: Optional[bool] = False):
    state.search_index_lock.acquire()
    state.model = configure_search(state.model, state.config, regenerate=force, t=t)
    state.search_index_lock.release()
    logger.info("Search Index updated via API call")
    return {'status': 'ok', 'message': 'index updated'}
--- a/src/utils/state.py
+++ b/src/utils/state.py
@ -1,4 +1,5 @@
 # Standard Packages
 import threading
 from packaging import version
 # External Packages
@ -20,6 +21,7 @@ host: str = None
 port: int = None
 cli_args: list[str] = None
 query_cache = LRU()
 search_index_lock = threading.Lock()
 if torch.cuda.is_available():
    # Use CUDA GPU