From fb6ebd19fc610d4fc74d0bed91a920871fef4555 Mon Sep 17 00:00:00 2001 From: sabaimran <65192171+sabaimran@users.noreply.github.com> Date: Thu, 2 Nov 2023 23:02:38 -0700 Subject: [PATCH] Fix refactor bugs, CSRF token issues for use in production (#531) Fix refactor bugs, CSRF token issues for use in production * Add flags for samesite settings to enable django admin login * Include tzdata to dependencies to work around python package issues in linux * Use DJANGO_DEBUG flag correctly * Fix naming of entry field when creating EntryDate objects * Correctly retrieve openai config settings * Fix datefilter with embeddings name for field --- pyproject.toml | 1 + src/app/settings.py | 23 +++++++++++++++++++++-- src/database/adapters/__init__.py | 8 ++++++-- src/khoj/processor/text_to_entries.py | 16 ++++++++-------- src/khoj/routers/api.py | 3 ++- src/khoj/routers/web_client.py | 1 - 6 files changed, 38 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f4ae57f4..f52d70cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,7 @@ dependencies = [ "python-multipart == 0.0.6", "gunicorn == 21.2.0", "lxml == 4.9.3", + "tzdata == 2023.3", ] dynamic = ["version"] diff --git a/src/app/settings.py b/src/app/settings.py index 9a8b427b..44d1d3d6 100644 --- a/src/app/settings.py +++ b/src/app/settings.py @@ -24,10 +24,29 @@ BASE_DIR = Path(__file__).resolve().parent.parent.parent SECRET_KEY = os.getenv("DJANGO_SECRET_KEY") # SECURITY WARNING: don't run with debug turned on in production! -DEBUG = True +DEBUG = os.getenv("DJANGO_DEBUG", "False") == "True" -ALLOWED_HOSTS = [] +ALLOWED_HOSTS = [".khoj.dev", "localhost", "127.0.0.1", "[::1]", "beta.khoj.dev"] +CSRF_TRUSTED_ORIGINS = [ + "https://app.khoj.dev", + "https://beta.khoj.dev", + "https://khoj.dev", + "https://*.khoj.dev", +] + +COOKIE_SAMESITE = "None" +if DEBUG: + SESSION_COOKIE_DOMAIN = "localhost" + CSRF_COOKIE_DOMAIN = "localhost" +else: + SESSION_COOKIE_DOMAIN = "khoj.dev" + CSRF_COOKIE_DOMAIN = "khoj.dev" + +SESSION_COOKIE_SECURE = True +CSRF_COOKIE_SECURE = True +COOKIE_SAMESITE = "None" +SESSION_COOKIE_SAMESITE = "None" # Application definition diff --git a/src/database/adapters/__init__.py b/src/database/adapters/__init__.py index 909a78e5..7fbc5287 100644 --- a/src/database/adapters/__init__.py +++ b/src/database/adapters/__init__.py @@ -265,6 +265,10 @@ class ConversationAdapters: @staticmethod async def get_openai_chat(): + return await ChatModelOptions.objects.filter(model_type="openai").afirst() + + @staticmethod + async def get_openai_chat_config(): return await OpenAIProcessorConversationConfig.objects.filter().afirst() @staticmethod @@ -340,11 +344,11 @@ class EntryAdapters: if min_date is not None: # Convert the min_date timestamp to yyyy-mm-dd format formatted_min_date = date.fromtimestamp(min_date).strftime("%Y-%m-%d") - q_filter_terms &= Q(entry_dates__date__gte=formatted_min_date) + q_filter_terms &= Q(embeddings_dates__date__gte=formatted_min_date) if max_date is not None: # Convert the max_date timestamp to yyyy-mm-dd format formatted_max_date = date.fromtimestamp(max_date).strftime("%Y-%m-%d") - q_filter_terms &= Q(entry_dates__date__lte=formatted_max_date) + q_filter_terms &= Q(embeddings_dates__date__lte=formatted_max_date) relevant_entries = Entry.objects.filter(user=user).filter( q_filter_terms, diff --git a/src/khoj/processor/text_to_entries.py b/src/khoj/processor/text_to_entries.py index 0477caa2..b121f1c4 100644 --- a/src/khoj/processor/text_to_entries.py +++ b/src/khoj/processor/text_to_entries.py @@ -121,12 +121,12 @@ class TextToEntries(ABC): batcher(entry_batches, batch_size), desc="Processing embeddings in batches" ): batch_embeddings_to_create = [] - for entry_hash, embedding in entry_batch: + for entry_hash, new_entry in entry_batch: entry = hash_to_current_entries[entry_hash] batch_embeddings_to_create.append( DbEntry( user=user, - embeddings=embedding, + embeddings=new_entry, raw=entry.raw, compiled=entry.compiled, heading=entry.heading[:1000], # Truncate to max chars of field allowed @@ -136,19 +136,19 @@ class TextToEntries(ABC): corpus_id=entry.corpus_id, ) ) - new_embeddings = DbEntry.objects.bulk_create(batch_embeddings_to_create) - logger.debug(f"Created {len(new_embeddings)} new embeddings") - num_new_embeddings += len(new_embeddings) + new_entries = DbEntry.objects.bulk_create(batch_embeddings_to_create) + logger.debug(f"Created {len(new_entries)} new embeddings") + num_new_embeddings += len(new_entries) dates_to_create = [] with timer("Create new date associations for new embeddings", logger): - for embedding in new_embeddings: - dates = self.date_filter.extract_dates(embedding.raw) + for new_entry in new_entries: + dates = self.date_filter.extract_dates(new_entry.raw) for date in dates: dates_to_create.append( EntryDates( date=date, - embeddings=embedding, + entry=new_entry, ) ) new_dates = EntryDates.objects.bulk_create(dates_to_create) diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 84b22035..b8a5350b 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -670,8 +670,9 @@ async def extract_references_and_questions( defiltered_query, loaded_model=loaded_model, conversation_log=meta_log, should_extract_questions=False ) elif await ConversationAdapters.has_openai_chat(): + openai_chat_config = await ConversationAdapters.get_openai_chat_config() openai_chat = await ConversationAdapters.get_openai_chat() - api_key = openai_chat.api_key + api_key = openai_chat_config.api_key chat_model = openai_chat.chat_model inferred_queries = extract_questions( defiltered_query, model=chat_model, api_key=api_key, conversation_log=meta_log diff --git a/src/khoj/routers/web_client.py b/src/khoj/routers/web_client.py index 8014c62d..1b76ca06 100644 --- a/src/khoj/routers/web_client.py +++ b/src/khoj/routers/web_client.py @@ -10,7 +10,6 @@ from fastapi.templating import Jinja2Templates from starlette.authentication import requires from khoj.utils.rawconfig import ( TextContentConfig, - FullConfig, GithubContentConfig, GithubRepoConfig, NotionContentConfig,