diff --git a/pyproject.toml b/pyproject.toml
index 63a50fac..42adf209 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -75,6 +75,7 @@ dependencies = [
     "tzdata == 2023.3",
     "rapidocr-onnxruntime == 1.3.8",
     "stripe == 7.3.0",
+    "openai-whisper >= 20231117",
 ]
 dynamic = ["version"]
 
diff --git a/src/khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py b/src/khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py
index c3e3c41d..37337791 100644
--- a/src/khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py
+++ b/src/khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py
@@ -1,4 +1,4 @@
-# Generated by Django 4.2.7 on 2023-11-26 09:37
+# Generated by Django 4.2.7 on 2023-11-26 13:54
 
 from django.db import migrations, models
 
@@ -15,11 +15,11 @@ class Migration(migrations.Migration):
                 ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
                 ("created_at", models.DateTimeField(auto_now_add=True)),
                 ("updated_at", models.DateTimeField(auto_now=True)),
-                ("model_name", models.CharField(default="whisper-1", max_length=200)),
+                ("model_name", models.CharField(default="base", max_length=200)),
                 (
                     "model_type",
                     models.CharField(
-                        choices=[("openai", "Openai"), ("offline", "Offline")], default="openai", max_length=200
+                        choices=[("openai", "Openai"), ("offline", "Offline")], default="offline", max_length=200
                     ),
                 ),
             ],
diff --git a/src/khoj/database/models/__init__.py b/src/khoj/database/models/__init__.py
index 77478ef5..82348fbe 100644
--- a/src/khoj/database/models/__init__.py
+++ b/src/khoj/database/models/__init__.py
@@ -125,8 +125,8 @@ class SpeechToTextModelOptions(BaseModel):
         OPENAI = "openai"
         OFFLINE = "offline"
 
-    model_name = models.CharField(max_length=200, default="whisper-1")
-    model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.OPENAI)
+    model_name = models.CharField(max_length=200, default="base")
+    model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.OFFLINE)
 
 
 class ChatModelOptions(BaseModel):
diff --git a/src/khoj/processor/conversation/offline/whisper.py b/src/khoj/processor/conversation/offline/whisper.py
new file mode 100644
index 00000000..d22486a9
--- /dev/null
+++ b/src/khoj/processor/conversation/offline/whisper.py
@@ -0,0 +1,17 @@
+# External Packages
+from asgiref.sync import sync_to_async
+import whisper
+
+# Internal Packages
+from khoj.utils import state
+
+
+async def transcribe_audio_offline(audio_filename: str, model: str) -> str | None:
+    """
+    Transcribe audio file offline using Whisper
+    """
+    # Send the audio data to the Whisper API
+    if not state.whisper_model:
+        state.whisper_model = whisper.load_model(model)
+    response = await sync_to_async(state.whisper_model.transcribe)(audio_filename)
+    return response["text"]
diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index 67b959a7..9f1b118e 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -31,6 +31,7 @@ from khoj.database.models import (
     NotionConfig,
 )
 from khoj.processor.conversation.offline.chat_model import extract_questions_offline
+from khoj.processor.conversation.offline.whisper import transcribe_audio_offline
 from khoj.processor.conversation.openai.gpt import extract_questions
 from khoj.processor.conversation.openai.whisper import transcribe_audio
 from khoj.processor.conversation.prompts import help_message, no_entries_found
@@ -605,13 +606,16 @@ async def transcribe(request: Request, common: CommonQueryParams, file: UploadFi
         # Send the audio data to the Whisper API
         speech_to_text_config = await ConversationAdapters.get_speech_to_text_config()
         openai_chat_config = await ConversationAdapters.get_openai_chat_config()
-        if not openai_chat_config or not speech_to_text_config:
+        if not speech_to_text_config:
             # If the user has not configured a speech to text model, return an unprocessable entity error
             status_code = 422
-        elif speech_to_text_config.model_type == ChatModelOptions.ModelType.OPENAI:
+        elif openai_chat_config and speech_to_text_config.model_type == ChatModelOptions.ModelType.OPENAI:
             api_key = openai_chat_config.api_key
             speech2text_model = speech_to_text_config.model_name
-            user_message = await transcribe_audio(model=speech2text_model, audio_file=audio_file, api_key=api_key)
+            user_message = await transcribe_audio(audio_file, model=speech2text_model, api_key=api_key)
+        elif speech_to_text_config.model_type == ChatModelOptions.ModelType.OFFLINE:
+            speech2text_model = speech_to_text_config.model_name
+            user_message = await transcribe_audio_offline(audio_filename, model=speech2text_model)
     finally:
         # Close and Delete the temporary audio file
         audio_file.close()
diff --git a/src/khoj/utils/initialization.py b/src/khoj/utils/initialization.py
index ee0454c4..313b18fc 100644
--- a/src/khoj/utils/initialization.py
+++ b/src/khoj/utils/initialization.py
@@ -74,10 +74,9 @@ def initialization():
         except ModuleNotFoundError as e:
             logger.warning("Offline models are not supported on this device.")
 
-        use_openai_model = input("Use OpenAI chat model? (y/n): ")
-
+        use_openai_model = input("Use OpenAI models? (y/n): ")
         if use_openai_model == "y":
-            logger.info("🗣️ Setting up OpenAI chat model")
+            logger.info("🗣️ Setting up your OpenAI configuration")
             api_key = input("Enter your OpenAI API key: ")
             OpenAIProcessorConversationConfig.objects.create(api_key=api_key)
 
@@ -104,7 +103,25 @@ def initialization():
                 model_name=openai_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OPENAI
             )
 
-        logger.info("🗣️  Chat model configuration complete")
+        if use_offline_model == "y" or use_openai_model == "y":
+            logger.info("🗣️  Chat model configuration complete")
+
+        use_offline_speech2text_model = input("Use offline speech to text model? (y/n): ")
+        if use_offline_speech2text_model == "y":
+            logger.info("🗣️ Setting up offline speech to text model")
+            # Delete any existing speech to text model options. There can only be one.
+            SpeechToTextModelOptions.objects.all().delete()
+
+            default_offline_speech2text_model = "base"
+            offline_speech2text_model = input(
+                f"Enter the Whisper model to use Offline (default: {default_offline_speech2text_model}): "
+            )
+            offline_speech2text_model = offline_speech2text_model or default_offline_speech2text_model
+            SpeechToTextModelOptions.objects.create(
+                model_name=offline_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OFFLINE
+            )
+
+            logger.info(f"🗣️  Offline speech to text model configured to {offline_speech2text_model}")
 
     admin_user = KhojUser.objects.filter(is_staff=True).first()
     if admin_user is None:
diff --git a/src/khoj/utils/state.py b/src/khoj/utils/state.py
index 91f5f0ce..ce4d5804 100644
--- a/src/khoj/utils/state.py
+++ b/src/khoj/utils/state.py
@@ -21,6 +21,7 @@ embeddings_model: EmbeddingsModel = None
 cross_encoder_model: CrossEncoderModel = None
 content_index = ContentIndex()
 gpt4all_processor_config: GPT4AllProcessorModel = None
+whisper_model = None
 config_file: Path = None
 verbose: int = 0
 host: str = None