Add default server configuration on first run in non-interactive mode

This should configure Khoj with decent default configurations via
Docker and avoid needing to configure Khoj via admin page to start
using dockerized Khoj

Update default max prompt size set during khoj initialization
as online chat model are cheaper and offline chat models have larger
context now
This commit is contained in:
Debanjum Singh Solanky 2024-09-18 19:19:27 -07:00
parent 020167c7cf
commit f177723711
4 changed files with 60 additions and 30 deletions

View file

@ -48,7 +48,7 @@ services:
# Replace the domain with your domain. Proceed with caution, especially if you are using anonymous mode. # Replace the domain with your domain. Proceed with caution, especially if you are using anonymous mode.
# - KHOJ_NO_HTTPS=True # - KHOJ_NO_HTTPS=True
# - KHOJ_DOMAIN=192.168.0.104 # - KHOJ_DOMAIN=192.168.0.104
command: --host="0.0.0.0" --port=42110 -vv --anonymous-mode command: --host="0.0.0.0" --port=42110 -vv --anonymous-mode --non-interactive
volumes: volumes:

View file

@ -131,7 +131,7 @@ def run(should_start_server=True):
logger.info(f"📦 Initializing DB:\n{db_migrate_output.getvalue().strip()}") logger.info(f"📦 Initializing DB:\n{db_migrate_output.getvalue().strip()}")
logger.debug(f"🌍 Initializing Web Client:\n{collectstatic_output.getvalue().strip()}") logger.debug(f"🌍 Initializing Web Client:\n{collectstatic_output.getvalue().strip()}")
initialization() initialization(not args.non_interactive)
# Create app directory, if it doesn't exist # Create app directory, if it doesn't exist
state.config_file.parent.mkdir(parents=True, exist_ok=True) state.config_file.parent.mkdir(parents=True, exist_ok=True)

View file

@ -50,6 +50,12 @@ def cli(args=None):
default=False, default=False,
help="Run Khoj in anonymous mode. This does not require any login for connecting users.", help="Run Khoj in anonymous mode. This does not require any login for connecting users.",
) )
parser.add_argument(
"--non-interactive",
action="store_true",
default=False,
help="Start Khoj in non-interactive mode. Assumes interactive shell unavailable for config. E.g when run via Docker.",
)
args, remaining_args = parser.parse_known_args(args) args, remaining_args = parser.parse_known_args(args)

View file

@ -15,11 +15,16 @@ from khoj.utils.constants import default_offline_chat_model, default_online_chat
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def initialization(): def initialization(interactive: bool = True):
def _create_admin_user(): def _create_admin_user():
logger.info( logger.info(
"👩‍✈️ Setting up admin user. These credentials will allow you to configure your server at /server/admin." "👩‍✈️ Setting up admin user. These credentials will allow you to configure your server at /server/admin."
) )
if not interactive and (not os.getenv("KHOJ_ADMIN_EMAIL") or not os.getenv("KHOJ_ADMIN_PASSWORD")):
logger.error(
"🚨 Admin user cannot be created. Please set the KHOJ_ADMIN_EMAIL, KHOJ_ADMIN_PASSWORD environment variables or start server in interactive mode."
)
exit(1)
email_addr = os.getenv("KHOJ_ADMIN_EMAIL") or input("Email: ") email_addr = os.getenv("KHOJ_ADMIN_EMAIL") or input("Email: ")
password = os.getenv("KHOJ_ADMIN_PASSWORD") or input("Password: ") password = os.getenv("KHOJ_ADMIN_PASSWORD") or input("Password: ")
admin_user = KhojUser.objects.create_superuser(email=email_addr, username=email_addr, password=password) admin_user = KhojUser.objects.create_superuser(email=email_addr, username=email_addr, password=password)
@ -27,23 +32,26 @@ def initialization():
def _create_chat_configuration(): def _create_chat_configuration():
logger.info( logger.info(
"🗣️ Configure chat models available to your server. You can always update these at /server/admin using the credentials of your admin account" "🗣️ Configure chat models available to your server. You can always update these at /server/admin using your admin account"
) )
try: try:
use_offline_model = input("Use offline chat model? (y/n): ") use_offline_model = "y" if not interactive else input("Use offline chat model? (y/n): ")
if use_offline_model == "y": if use_offline_model == "y":
logger.info("🗣️ Setting up offline chat model") logger.info("🗣️ Setting up offline chat model")
offline_chat_model = input( if interactive:
f"Enter the offline chat model you want to use. See HuggingFace for available GGUF models (default: {default_offline_chat_model}): " offline_chat_model = input(
) f"Enter the offline chat model you want to use. See HuggingFace for available GGUF models (default: {default_offline_chat_model}): "
)
else:
offline_chat_model = ""
if offline_chat_model == "": if offline_chat_model == "":
ChatModelOptions.objects.create( ChatModelOptions.objects.create(
chat_model=default_offline_chat_model, model_type=ChatModelOptions.ModelType.OFFLINE chat_model=default_offline_chat_model, model_type=ChatModelOptions.ModelType.OFFLINE
) )
else: else:
default_max_tokens = model_to_prompt_size.get(offline_chat_model, 2000) default_max_tokens = model_to_prompt_size.get(offline_chat_model, 4000)
max_tokens = input( max_tokens = input(
f"Enter the maximum number of tokens to use for the offline chat model (default {default_max_tokens}):" f"Enter the maximum number of tokens to use for the offline chat model (default {default_max_tokens}):"
) )
@ -66,40 +74,56 @@ def initialization():
except ModuleNotFoundError as e: except ModuleNotFoundError as e:
logger.warning("Offline models are not supported on this device.") logger.warning("Offline models are not supported on this device.")
use_openai_model = input("Use OpenAI models? (y/n): ") default_openai_api_key = os.getenv("OPENAI_API_KEY")
default_use_openai_model = {True: "y", False: "n"}[default_openai_api_key != None]
use_openai_model = default_use_openai_model if not interactive else input("Use OpenAI models? (y/n): ")
if use_openai_model == "y": if use_openai_model == "y":
logger.info("🗣️ Setting up your OpenAI configuration") logger.info("🗣️ Setting up your OpenAI configuration")
api_key = input("Enter your OpenAI API key: ") if interactive:
api_key = input(f"Enter your OpenAI API key (default: {default_openai_api_key}): ")
else:
api_key = default_openai_api_key
OpenAIProcessorConversationConfig.objects.create(api_key=api_key) OpenAIProcessorConversationConfig.objects.create(api_key=api_key)
openai_chat_model = input( if interactive:
f"Enter the OpenAI chat model you want to use (default: {default_online_chat_model}): " openai_chat_model = input(
) f"Enter the OpenAI chat model you want to use (default: {default_online_chat_model}): "
openai_chat_model = openai_chat_model or default_online_chat_model )
openai_chat_model = openai_chat_model or default_online_chat_model
default_max_tokens = model_to_prompt_size.get(openai_chat_model, 2000) else:
max_tokens = input( openai_chat_model = default_online_chat_model
f"Enter the maximum number of tokens to use for the OpenAI chat model (default: {default_max_tokens}): " default_max_tokens = model_to_prompt_size.get(openai_chat_model, 10000)
) if interactive:
max_tokens = max_tokens or default_max_tokens max_tokens = input(
f"Enter the maximum number of tokens to use for the OpenAI chat model (default: {default_max_tokens}): "
)
max_tokens = max_tokens or default_max_tokens
else:
max_tokens = default_max_tokens
ChatModelOptions.objects.create( ChatModelOptions.objects.create(
chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_prompt_size=max_tokens chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_prompt_size=max_tokens
) )
default_speech2text_model = "whisper-1" default_speech2text_model = "whisper-1"
openai_speech2text_model = input( if interactive:
f"Enter the OpenAI speech to text model you want to use (default: {default_speech2text_model}): " openai_speech2text_model = input(
) f"Enter the OpenAI speech to text model you want to use (default: {default_speech2text_model}): "
openai_speech2text_model = openai_speech2text_model or default_speech2text_model )
openai_speech2text_model = openai_speech2text_model or default_speech2text_model
else:
openai_speech2text_model = default_speech2text_model
SpeechToTextModelOptions.objects.create( SpeechToTextModelOptions.objects.create(
model_name=openai_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OPENAI model_name=openai_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OPENAI
) )
default_text_to_image_model = "dall-e-3" default_text_to_image_model = "dall-e-3"
openai_text_to_image_model = input( if interactive:
f"Enter the OpenAI text to image model you want to use (default: {default_text_to_image_model}): " openai_text_to_image_model = input(
) f"Enter the OpenAI text to image model you want to use (default: {default_text_to_image_model}): "
openai_text_to_image_model = openai_text_to_image_model or default_text_to_image_model )
openai_text_to_image_model = openai_text_to_image_model or default_text_to_image_model
else:
openai_text_to_image_model = default_text_to_image_model
TextToImageModelConfig.objects.create( TextToImageModelConfig.objects.create(
model_name=openai_text_to_image_model, model_type=TextToImageModelConfig.ModelType.OPENAI model_name=openai_text_to_image_model, model_type=TextToImageModelConfig.ModelType.OPENAI
) )
@ -107,7 +131,7 @@ def initialization():
if use_offline_model == "y" or use_openai_model == "y": if use_offline_model == "y" or use_openai_model == "y":
logger.info("🗣️ Chat model configuration complete") logger.info("🗣️ Chat model configuration complete")
use_offline_speech2text_model = input("Use offline speech to text model? (y/n): ") use_offline_speech2text_model = "n" if not interactive else input("Use offline speech to text model? (y/n): ")
if use_offline_speech2text_model == "y": if use_offline_speech2text_model == "y":
logger.info("🗣️ Setting up offline speech to text model") logger.info("🗣️ Setting up offline speech to text model")
# Delete any existing speech to text model options. There can only be one. # Delete any existing speech to text model options. There can only be one.