From 10398f2c87582d9ffa12f84cee136688f6a4c784 Mon Sep 17 00:00:00 2001
From: sanj <67624670+iodrift@users.noreply.github.com>
Date: Fri, 31 Jan 2025 11:12:53 -0800
Subject: [PATCH] Auto-update: Fri Jan 31 11:12:53 PST 2025

---
 asr                                     | 43 +++++++++++++++++++++++++
 mlx_models/distil-medium.en/config.json | 13 ++++++++
 2 files changed, 56 insertions(+)
 create mode 100755 asr
 create mode 100644 mlx_models/distil-medium.en/config.json

diff --git a/asr b/asr
new file mode 100755
index 0000000..3f26c5e
--- /dev/null
+++ b/asr
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+
+import argparse
+import os
+import sys
+import tempfile
+import subprocess
+from lightning_whisper_mlx import LightningWhisperMLX
+
+def convert_to_mp3(input_path):
+    """Convert input file to MP3 using ffmpeg if necessary, storing in a temporary directory."""
+    if input_path.lower().endswith(".mp3"):
+        return input_path  # No conversion needed
+
+    temp_dir = tempfile.mkdtemp()
+    output_path = os.path.join(temp_dir, "converted.mp3")
+
+    try:
+        subprocess.run(["ffmpeg", "-y", "-i", input_path, "-q:a", "2", output_path],
+                       check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    except subprocess.CalledProcessError:
+        sys.exit("Error: Failed to convert file to MP3. Ensure ffmpeg is installed.")
+
+    return output_path
+
+def main():
+    parser = argparse.ArgumentParser(description="Transcribe or translate audio using LightningWhisperMLX.")
+    parser.add_argument("file", help="Path to the audio file.")
+    parser.add_argument("--translate", action="store_true", help="Enable translation mode.")
+
+    args = parser.parse_args()
+
+    audio_path = convert_to_mp3(args.file)
+    task_mode = "translate" if args.translate else "transcribe"
+
+    whisper = LightningWhisperMLX(model="distil-medium", batch_size=12, quant=None)
+    
+    result = whisper.transcribe(audio_path=audio_path, task=task_mode)['text']
+    print(result)
+
+if __name__ == "__main__":
+    main()
+
diff --git a/mlx_models/distil-medium.en/config.json b/mlx_models/distil-medium.en/config.json
new file mode 100644
index 0000000..90b9b4e
--- /dev/null
+++ b/mlx_models/distil-medium.en/config.json
@@ -0,0 +1,13 @@
+{
+    "n_mels": 80,
+    "n_audio_ctx": 1500,
+    "n_audio_state": 1024,
+    "n_audio_head": 16,
+    "n_audio_layer": 24,
+    "n_vocab": 51864,
+    "n_text_ctx": 448,
+    "n_text_state": 1024,
+    "n_text_head": 16,
+    "n_text_layer": 2,
+    "model_type": "whisper"
+}
\ No newline at end of file