mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Fix refactor bugs, CSRF token issues for use in production (#531)
Fix refactor bugs, CSRF token issues for use in production * Add flags for samesite settings to enable django admin login * Include tzdata to dependencies to work around python package issues in linux * Use DJANGO_DEBUG flag correctly * Fix naming of entry field when creating EntryDate objects * Correctly retrieve openai config settings * Fix datefilter with embeddings name for field
This commit is contained in:
parent
fe860aaf83
commit
fb6ebd19fc
6 changed files with 38 additions and 14 deletions
|
@ -72,6 +72,7 @@ dependencies = [
|
||||||
"python-multipart == 0.0.6",
|
"python-multipart == 0.0.6",
|
||||||
"gunicorn == 21.2.0",
|
"gunicorn == 21.2.0",
|
||||||
"lxml == 4.9.3",
|
"lxml == 4.9.3",
|
||||||
|
"tzdata == 2023.3",
|
||||||
]
|
]
|
||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
|
|
||||||
|
|
|
@ -24,10 +24,29 @@ BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
||||||
SECRET_KEY = os.getenv("DJANGO_SECRET_KEY")
|
SECRET_KEY = os.getenv("DJANGO_SECRET_KEY")
|
||||||
|
|
||||||
# SECURITY WARNING: don't run with debug turned on in production!
|
# SECURITY WARNING: don't run with debug turned on in production!
|
||||||
DEBUG = True
|
DEBUG = os.getenv("DJANGO_DEBUG", "False") == "True"
|
||||||
|
|
||||||
ALLOWED_HOSTS = []
|
ALLOWED_HOSTS = [".khoj.dev", "localhost", "127.0.0.1", "[::1]", "beta.khoj.dev"]
|
||||||
|
|
||||||
|
CSRF_TRUSTED_ORIGINS = [
|
||||||
|
"https://app.khoj.dev",
|
||||||
|
"https://beta.khoj.dev",
|
||||||
|
"https://khoj.dev",
|
||||||
|
"https://*.khoj.dev",
|
||||||
|
]
|
||||||
|
|
||||||
|
COOKIE_SAMESITE = "None"
|
||||||
|
if DEBUG:
|
||||||
|
SESSION_COOKIE_DOMAIN = "localhost"
|
||||||
|
CSRF_COOKIE_DOMAIN = "localhost"
|
||||||
|
else:
|
||||||
|
SESSION_COOKIE_DOMAIN = "khoj.dev"
|
||||||
|
CSRF_COOKIE_DOMAIN = "khoj.dev"
|
||||||
|
|
||||||
|
SESSION_COOKIE_SECURE = True
|
||||||
|
CSRF_COOKIE_SECURE = True
|
||||||
|
COOKIE_SAMESITE = "None"
|
||||||
|
SESSION_COOKIE_SAMESITE = "None"
|
||||||
|
|
||||||
# Application definition
|
# Application definition
|
||||||
|
|
||||||
|
|
|
@ -265,6 +265,10 @@ class ConversationAdapters:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def get_openai_chat():
|
async def get_openai_chat():
|
||||||
|
return await ChatModelOptions.objects.filter(model_type="openai").afirst()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
async def get_openai_chat_config():
|
||||||
return await OpenAIProcessorConversationConfig.objects.filter().afirst()
|
return await OpenAIProcessorConversationConfig.objects.filter().afirst()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -340,11 +344,11 @@ class EntryAdapters:
|
||||||
if min_date is not None:
|
if min_date is not None:
|
||||||
# Convert the min_date timestamp to yyyy-mm-dd format
|
# Convert the min_date timestamp to yyyy-mm-dd format
|
||||||
formatted_min_date = date.fromtimestamp(min_date).strftime("%Y-%m-%d")
|
formatted_min_date = date.fromtimestamp(min_date).strftime("%Y-%m-%d")
|
||||||
q_filter_terms &= Q(entry_dates__date__gte=formatted_min_date)
|
q_filter_terms &= Q(embeddings_dates__date__gte=formatted_min_date)
|
||||||
if max_date is not None:
|
if max_date is not None:
|
||||||
# Convert the max_date timestamp to yyyy-mm-dd format
|
# Convert the max_date timestamp to yyyy-mm-dd format
|
||||||
formatted_max_date = date.fromtimestamp(max_date).strftime("%Y-%m-%d")
|
formatted_max_date = date.fromtimestamp(max_date).strftime("%Y-%m-%d")
|
||||||
q_filter_terms &= Q(entry_dates__date__lte=formatted_max_date)
|
q_filter_terms &= Q(embeddings_dates__date__lte=formatted_max_date)
|
||||||
|
|
||||||
relevant_entries = Entry.objects.filter(user=user).filter(
|
relevant_entries = Entry.objects.filter(user=user).filter(
|
||||||
q_filter_terms,
|
q_filter_terms,
|
||||||
|
|
|
@ -121,12 +121,12 @@ class TextToEntries(ABC):
|
||||||
batcher(entry_batches, batch_size), desc="Processing embeddings in batches"
|
batcher(entry_batches, batch_size), desc="Processing embeddings in batches"
|
||||||
):
|
):
|
||||||
batch_embeddings_to_create = []
|
batch_embeddings_to_create = []
|
||||||
for entry_hash, embedding in entry_batch:
|
for entry_hash, new_entry in entry_batch:
|
||||||
entry = hash_to_current_entries[entry_hash]
|
entry = hash_to_current_entries[entry_hash]
|
||||||
batch_embeddings_to_create.append(
|
batch_embeddings_to_create.append(
|
||||||
DbEntry(
|
DbEntry(
|
||||||
user=user,
|
user=user,
|
||||||
embeddings=embedding,
|
embeddings=new_entry,
|
||||||
raw=entry.raw,
|
raw=entry.raw,
|
||||||
compiled=entry.compiled,
|
compiled=entry.compiled,
|
||||||
heading=entry.heading[:1000], # Truncate to max chars of field allowed
|
heading=entry.heading[:1000], # Truncate to max chars of field allowed
|
||||||
|
@ -136,19 +136,19 @@ class TextToEntries(ABC):
|
||||||
corpus_id=entry.corpus_id,
|
corpus_id=entry.corpus_id,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
new_embeddings = DbEntry.objects.bulk_create(batch_embeddings_to_create)
|
new_entries = DbEntry.objects.bulk_create(batch_embeddings_to_create)
|
||||||
logger.debug(f"Created {len(new_embeddings)} new embeddings")
|
logger.debug(f"Created {len(new_entries)} new embeddings")
|
||||||
num_new_embeddings += len(new_embeddings)
|
num_new_embeddings += len(new_entries)
|
||||||
|
|
||||||
dates_to_create = []
|
dates_to_create = []
|
||||||
with timer("Create new date associations for new embeddings", logger):
|
with timer("Create new date associations for new embeddings", logger):
|
||||||
for embedding in new_embeddings:
|
for new_entry in new_entries:
|
||||||
dates = self.date_filter.extract_dates(embedding.raw)
|
dates = self.date_filter.extract_dates(new_entry.raw)
|
||||||
for date in dates:
|
for date in dates:
|
||||||
dates_to_create.append(
|
dates_to_create.append(
|
||||||
EntryDates(
|
EntryDates(
|
||||||
date=date,
|
date=date,
|
||||||
embeddings=embedding,
|
entry=new_entry,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
new_dates = EntryDates.objects.bulk_create(dates_to_create)
|
new_dates = EntryDates.objects.bulk_create(dates_to_create)
|
||||||
|
|
|
@ -670,8 +670,9 @@ async def extract_references_and_questions(
|
||||||
defiltered_query, loaded_model=loaded_model, conversation_log=meta_log, should_extract_questions=False
|
defiltered_query, loaded_model=loaded_model, conversation_log=meta_log, should_extract_questions=False
|
||||||
)
|
)
|
||||||
elif await ConversationAdapters.has_openai_chat():
|
elif await ConversationAdapters.has_openai_chat():
|
||||||
|
openai_chat_config = await ConversationAdapters.get_openai_chat_config()
|
||||||
openai_chat = await ConversationAdapters.get_openai_chat()
|
openai_chat = await ConversationAdapters.get_openai_chat()
|
||||||
api_key = openai_chat.api_key
|
api_key = openai_chat_config.api_key
|
||||||
chat_model = openai_chat.chat_model
|
chat_model = openai_chat.chat_model
|
||||||
inferred_queries = extract_questions(
|
inferred_queries = extract_questions(
|
||||||
defiltered_query, model=chat_model, api_key=api_key, conversation_log=meta_log
|
defiltered_query, model=chat_model, api_key=api_key, conversation_log=meta_log
|
||||||
|
|
|
@ -10,7 +10,6 @@ from fastapi.templating import Jinja2Templates
|
||||||
from starlette.authentication import requires
|
from starlette.authentication import requires
|
||||||
from khoj.utils.rawconfig import (
|
from khoj.utils.rawconfig import (
|
||||||
TextContentConfig,
|
TextContentConfig,
|
||||||
FullConfig,
|
|
||||||
GithubContentConfig,
|
GithubContentConfig,
|
||||||
GithubRepoConfig,
|
GithubRepoConfig,
|
||||||
NotionContentConfig,
|
NotionContentConfig,
|
||||||
|
|
Loading…
Reference in a new issue