Fix refactor bugs, CSRF token issues for use in production (#531)

Fix refactor bugs, CSRF token issues for use in production
* Add flags for samesite settings to enable django admin login
* Include tzdata to dependencies to work around python package issues in linux
* Use DJANGO_DEBUG flag correctly
* Fix naming of entry field when creating EntryDate objects
* Correctly retrieve openai config settings
* Fix datefilter with embeddings name for field
This commit is contained in:
sabaimran 2023-11-02 23:02:38 -07:00 committed by GitHub
parent fe860aaf83
commit fb6ebd19fc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 38 additions and 14 deletions

View file

@ -72,6 +72,7 @@ dependencies = [
"python-multipart == 0.0.6", "python-multipart == 0.0.6",
"gunicorn == 21.2.0", "gunicorn == 21.2.0",
"lxml == 4.9.3", "lxml == 4.9.3",
"tzdata == 2023.3",
] ]
dynamic = ["version"] dynamic = ["version"]

View file

@ -24,10 +24,29 @@ BASE_DIR = Path(__file__).resolve().parent.parent.parent
SECRET_KEY = os.getenv("DJANGO_SECRET_KEY") SECRET_KEY = os.getenv("DJANGO_SECRET_KEY")
# SECURITY WARNING: don't run with debug turned on in production! # SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True DEBUG = os.getenv("DJANGO_DEBUG", "False") == "True"
ALLOWED_HOSTS = [] ALLOWED_HOSTS = [".khoj.dev", "localhost", "127.0.0.1", "[::1]", "beta.khoj.dev"]
CSRF_TRUSTED_ORIGINS = [
"https://app.khoj.dev",
"https://beta.khoj.dev",
"https://khoj.dev",
"https://*.khoj.dev",
]
COOKIE_SAMESITE = "None"
if DEBUG:
SESSION_COOKIE_DOMAIN = "localhost"
CSRF_COOKIE_DOMAIN = "localhost"
else:
SESSION_COOKIE_DOMAIN = "khoj.dev"
CSRF_COOKIE_DOMAIN = "khoj.dev"
SESSION_COOKIE_SECURE = True
CSRF_COOKIE_SECURE = True
COOKIE_SAMESITE = "None"
SESSION_COOKIE_SAMESITE = "None"
# Application definition # Application definition

View file

@ -265,6 +265,10 @@ class ConversationAdapters:
@staticmethod @staticmethod
async def get_openai_chat(): async def get_openai_chat():
return await ChatModelOptions.objects.filter(model_type="openai").afirst()
@staticmethod
async def get_openai_chat_config():
return await OpenAIProcessorConversationConfig.objects.filter().afirst() return await OpenAIProcessorConversationConfig.objects.filter().afirst()
@staticmethod @staticmethod
@ -340,11 +344,11 @@ class EntryAdapters:
if min_date is not None: if min_date is not None:
# Convert the min_date timestamp to yyyy-mm-dd format # Convert the min_date timestamp to yyyy-mm-dd format
formatted_min_date = date.fromtimestamp(min_date).strftime("%Y-%m-%d") formatted_min_date = date.fromtimestamp(min_date).strftime("%Y-%m-%d")
q_filter_terms &= Q(entry_dates__date__gte=formatted_min_date) q_filter_terms &= Q(embeddings_dates__date__gte=formatted_min_date)
if max_date is not None: if max_date is not None:
# Convert the max_date timestamp to yyyy-mm-dd format # Convert the max_date timestamp to yyyy-mm-dd format
formatted_max_date = date.fromtimestamp(max_date).strftime("%Y-%m-%d") formatted_max_date = date.fromtimestamp(max_date).strftime("%Y-%m-%d")
q_filter_terms &= Q(entry_dates__date__lte=formatted_max_date) q_filter_terms &= Q(embeddings_dates__date__lte=formatted_max_date)
relevant_entries = Entry.objects.filter(user=user).filter( relevant_entries = Entry.objects.filter(user=user).filter(
q_filter_terms, q_filter_terms,

View file

@ -121,12 +121,12 @@ class TextToEntries(ABC):
batcher(entry_batches, batch_size), desc="Processing embeddings in batches" batcher(entry_batches, batch_size), desc="Processing embeddings in batches"
): ):
batch_embeddings_to_create = [] batch_embeddings_to_create = []
for entry_hash, embedding in entry_batch: for entry_hash, new_entry in entry_batch:
entry = hash_to_current_entries[entry_hash] entry = hash_to_current_entries[entry_hash]
batch_embeddings_to_create.append( batch_embeddings_to_create.append(
DbEntry( DbEntry(
user=user, user=user,
embeddings=embedding, embeddings=new_entry,
raw=entry.raw, raw=entry.raw,
compiled=entry.compiled, compiled=entry.compiled,
heading=entry.heading[:1000], # Truncate to max chars of field allowed heading=entry.heading[:1000], # Truncate to max chars of field allowed
@ -136,19 +136,19 @@ class TextToEntries(ABC):
corpus_id=entry.corpus_id, corpus_id=entry.corpus_id,
) )
) )
new_embeddings = DbEntry.objects.bulk_create(batch_embeddings_to_create) new_entries = DbEntry.objects.bulk_create(batch_embeddings_to_create)
logger.debug(f"Created {len(new_embeddings)} new embeddings") logger.debug(f"Created {len(new_entries)} new embeddings")
num_new_embeddings += len(new_embeddings) num_new_embeddings += len(new_entries)
dates_to_create = [] dates_to_create = []
with timer("Create new date associations for new embeddings", logger): with timer("Create new date associations for new embeddings", logger):
for embedding in new_embeddings: for new_entry in new_entries:
dates = self.date_filter.extract_dates(embedding.raw) dates = self.date_filter.extract_dates(new_entry.raw)
for date in dates: for date in dates:
dates_to_create.append( dates_to_create.append(
EntryDates( EntryDates(
date=date, date=date,
embeddings=embedding, entry=new_entry,
) )
) )
new_dates = EntryDates.objects.bulk_create(dates_to_create) new_dates = EntryDates.objects.bulk_create(dates_to_create)

View file

@ -670,8 +670,9 @@ async def extract_references_and_questions(
defiltered_query, loaded_model=loaded_model, conversation_log=meta_log, should_extract_questions=False defiltered_query, loaded_model=loaded_model, conversation_log=meta_log, should_extract_questions=False
) )
elif await ConversationAdapters.has_openai_chat(): elif await ConversationAdapters.has_openai_chat():
openai_chat_config = await ConversationAdapters.get_openai_chat_config()
openai_chat = await ConversationAdapters.get_openai_chat() openai_chat = await ConversationAdapters.get_openai_chat()
api_key = openai_chat.api_key api_key = openai_chat_config.api_key
chat_model = openai_chat.chat_model chat_model = openai_chat.chat_model
inferred_queries = extract_questions( inferred_queries = extract_questions(
defiltered_query, model=chat_model, api_key=api_key, conversation_log=meta_log defiltered_query, model=chat_model, api_key=api_key, conversation_log=meta_log

View file

@ -10,7 +10,6 @@ from fastapi.templating import Jinja2Templates
from starlette.authentication import requires from starlette.authentication import requires
from khoj.utils.rawconfig import ( from khoj.utils.rawconfig import (
TextContentConfig, TextContentConfig,
FullConfig,
GithubContentConfig, GithubContentConfig,
GithubRepoConfig, GithubRepoConfig,
NotionContentConfig, NotionContentConfig,