diff --git a/pyproject.toml b/pyproject.toml index 4c32a1d2..06b2da55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,7 @@ dependencies = [ "apscheduler ~= 3.10.0", "pytz ~= 2024.1", "cron-descriptor == 1.4.3", + "django_apscheduler == 0.6.2", ] dynamic = ["version"] diff --git a/src/khoj/app/settings.py b/src/khoj/app/settings.py index 27be968e..2672f98d 100644 --- a/src/khoj/app/settings.py +++ b/src/khoj/app/settings.py @@ -77,6 +77,7 @@ INSTALLED_APPS = [ "django.contrib.messages", "django.contrib.staticfiles", "phonenumber_field", + "django_apscheduler", ] MIDDLEWARE = [ @@ -169,3 +170,20 @@ STATIC_URL = "/static/" # https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" + + +# Format string for displaying run time timestamps in the Django admin site. The default +# just adds seconds to the standard Django format, which is useful for displaying the timestamps +# for jobs that are scheduled to run on intervals of less than one minute. +# +# See https://docs.djangoproject.com/en/dev/ref/settings/#datetime-format for format string +# syntax details. +APSCHEDULER_DATETIME_FORMAT = "N j, Y, f:s a" + +# Maximum run time allowed for jobs that are triggered manually via the Django admin site, which +# prevents admin site HTTP requests from timing out. +# +# Longer running jobs should probably be handed over to a background task processing library +# that supports multiple background worker processes instead (e.g. Dramatiq, Celery, Django-RQ, +# etc. See: https://djangopackages.org/grids/g/workers-queues-tasks/ for popular options). +APSCHEDULER_RUN_NOW_TIMEOUT = 240 # Seconds diff --git a/src/khoj/main.py b/src/khoj/main.py index 6ce30c7a..4a9593af 100644 --- a/src/khoj/main.py +++ b/src/khoj/main.py @@ -128,20 +128,16 @@ def run(should_start_server=True): poll_task_scheduler() # Setup Background Scheduler - from django.conf import settings as django_settings + from django_apscheduler.jobstores import DjangoJobStore - django_db = django_settings.DATABASES["default"] state.scheduler = BackgroundScheduler( { - "apscheduler.jobstores.default": { - "type": "sqlalchemy", - "url": f'postgresql://{django_db["USER"]}:{django_db["PASSWORD"]}@{django_db["HOST"]}:{django_db["PORT"]}/{django_db["NAME"]}', - }, "apscheduler.timezone": "UTC", "apscheduler.job_defaults.misfire_grace_time": "60", # Useful to run scheduled jobs even when worker delayed because it was busy or down "apscheduler.job_defaults.coalesce": "true", # Combine multiple jobs into one if they are scheduled at the same time } ) + state.scheduler.add_jobstore(DjangoJobStore(), "default") state.scheduler.start() # Start Server diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 2840a5cb..60fda057 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -25,6 +25,7 @@ import openai import pytz import requests from apscheduler.triggers.cron import CronTrigger +from asgiref.sync import sync_to_async from fastapi import Depends, Header, HTTPException, Request, UploadFile from PIL import Image from starlette.authentication import has_required_scope @@ -927,7 +928,7 @@ async def create_automation( ) query_id = hashlib.md5(f"{query_to_run}".encode("utf-8")).hexdigest() job_id = f"automation_{user.uuid}_{crontime_string}_{query_id}" - job = state.scheduler.add_job( + job = await sync_to_async(state.scheduler.add_job)( run_with_process_lock, trigger=trigger, args=(