Fix refactor bugs, CSRF token issues for use in production (#531)

Fix refactor bugs, CSRF token issues for use in production * Add flags for samesite settings to enable django admin login * Include tzdata to dependencies to work around python package issues in linux * Use DJANGO_DEBUG flag correctly * Fix naming of entry field when creating EntryDate objects * Correctly retrieve openai config settings * Fix datefilter with embeddings name for field
2024-11-23 15:38:55 +01:00 · 2023-11-02 23:02:38 -07:00 · 2023-11-02 23:02:38 -07:00 · fb6ebd19fc
commit fb6ebd19fc
parent fe860aaf83
6 changed files with 38 additions and 14 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -72,6 +72,7 @@ dependencies = [
    "python-multipart == 0.0.6",
    "gunicorn == 21.2.0",
    "lxml == 4.9.3",
+    "tzdata == 2023.3",
 ]
 dynamic = ["version"]

--- a/src/app/settings.py
+++ b/src/app/settings.py
@ -24,10 +24,29 @@ BASE_DIR = Path(__file__).resolve().parent.parent.parent
 SECRET_KEY = os.getenv("DJANGO_SECRET_KEY")

 # SECURITY WARNING: don't run with debug turned on in production!
-DEBUG = True
+DEBUG = os.getenv("DJANGO_DEBUG", "False") == "True"

-ALLOWED_HOSTS = []
+ALLOWED_HOSTS = [".khoj.dev", "localhost", "127.0.0.1", "[::1]", "beta.khoj.dev"]

+CSRF_TRUSTED_ORIGINS = [
+    "https://app.khoj.dev",
+    "https://beta.khoj.dev",
+    "https://khoj.dev",
+    "https://*.khoj.dev",
+]
+
+COOKIE_SAMESITE = "None"
+if DEBUG:
+    SESSION_COOKIE_DOMAIN = "localhost"
+    CSRF_COOKIE_DOMAIN = "localhost"
+else:
+    SESSION_COOKIE_DOMAIN = "khoj.dev"
+    CSRF_COOKIE_DOMAIN = "khoj.dev"
+
+SESSION_COOKIE_SECURE = True
+CSRF_COOKIE_SECURE = True
+COOKIE_SAMESITE = "None"
+SESSION_COOKIE_SAMESITE = "None"

 # Application definition

--- a/src/database/adapters/init.py
+++ b/src/database/adapters/init.py
@ -265,6 +265,10 @@ class ConversationAdapters:

    @staticmethod
    async def get_openai_chat():
+        return await ChatModelOptions.objects.filter(model_type="openai").afirst()
+
+    @staticmethod
+    async def get_openai_chat_config():
        return await OpenAIProcessorConversationConfig.objects.filter().afirst()

    @staticmethod
@ -340,11 +344,11 @@ class EntryAdapters:
            if min_date is not None:
                # Convert the min_date timestamp to yyyy-mm-dd format
                formatted_min_date = date.fromtimestamp(min_date).strftime("%Y-%m-%d")
-                q_filter_terms &= Q(entry_dates__date__gte=formatted_min_date)
+                q_filter_terms &= Q(embeddings_dates__date__gte=formatted_min_date)
            if max_date is not None:
                # Convert the max_date timestamp to yyyy-mm-dd format
                formatted_max_date = date.fromtimestamp(max_date).strftime("%Y-%m-%d")
-                q_filter_terms &= Q(entry_dates__date__lte=formatted_max_date)
+                q_filter_terms &= Q(embeddings_dates__date__lte=formatted_max_date)

        relevant_entries = Entry.objects.filter(user=user).filter(
            q_filter_terms,
--- a/src/khoj/processor/text_to_entries.py
+++ b/src/khoj/processor/text_to_entries.py
@ -121,12 +121,12 @@ class TextToEntries(ABC):
                        batcher(entry_batches, batch_size), desc="Processing embeddings in batches"
                    ):
                        batch_embeddings_to_create = []
-                        for entry_hash, embedding in entry_batch:
+                        for entry_hash, new_entry in entry_batch:
                            entry = hash_to_current_entries[entry_hash]
                            batch_embeddings_to_create.append(
                                DbEntry(
                                    user=user,
-                                    embeddings=embedding,
+                                    embeddings=new_entry,
                                    raw=entry.raw,
                                    compiled=entry.compiled,
                                    heading=entry.heading[:1000],  # Truncate to max chars of field allowed
@ -136,19 +136,19 @@ class TextToEntries(ABC):
                                    corpus_id=entry.corpus_id,
                                )
                            )
-                        new_embeddings = DbEntry.objects.bulk_create(batch_embeddings_to_create)
-                        logger.debug(f"Created {len(new_embeddings)} new embeddings")
-                        num_new_embeddings += len(new_embeddings)
+                        new_entries = DbEntry.objects.bulk_create(batch_embeddings_to_create)
+                        logger.debug(f"Created {len(new_entries)} new embeddings")
+                        num_new_embeddings += len(new_entries)

                        dates_to_create = []
                        with timer("Create new date associations for new embeddings", logger):
-                            for embedding in new_embeddings:
-                                dates = self.date_filter.extract_dates(embedding.raw)
+                            for new_entry in new_entries:
+                                dates = self.date_filter.extract_dates(new_entry.raw)
                                for date in dates:
                                    dates_to_create.append(
                                        EntryDates(
                                            date=date,
-                                            embeddings=embedding,
+                                            entry=new_entry,
                                        )
                                    )
                            new_dates = EntryDates.objects.bulk_create(dates_to_create)
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@ -670,8 +670,9 @@ async def extract_references_and_questions(
                defiltered_query, loaded_model=loaded_model, conversation_log=meta_log, should_extract_questions=False
            )
        elif await ConversationAdapters.has_openai_chat():
+            openai_chat_config = await ConversationAdapters.get_openai_chat_config()
            openai_chat = await ConversationAdapters.get_openai_chat()
-            api_key = openai_chat.api_key
+            api_key = openai_chat_config.api_key
            chat_model = openai_chat.chat_model
            inferred_queries = extract_questions(
                defiltered_query, model=chat_model, api_key=api_key, conversation_log=meta_log
--- a/src/khoj/routers/web_client.py
+++ b/src/khoj/routers/web_client.py
@ -10,7 +10,6 @@ from fastapi.templating import Jinja2Templates
 from starlette.authentication import requires
 from khoj.utils.rawconfig import (
    TextContentConfig,
-    FullConfig,
    GithubContentConfig,
    GithubRepoConfig,
    NotionContentConfig,