From 10398f2c87582d9ffa12f84cee136688f6a4c784 Mon Sep 17 00:00:00 2001 From: sanj <67624670+iodrift@users.noreply.github.com> Date: Fri, 31 Jan 2025 11:12:53 -0800 Subject: [PATCH] Auto-update: Fri Jan 31 11:12:53 PST 2025 --- asr | 43 +++++++++++++++++++++++++ mlx_models/distil-medium.en/config.json | 13 ++++++++ 2 files changed, 56 insertions(+) create mode 100755 asr create mode 100644 mlx_models/distil-medium.en/config.json diff --git a/asr b/asr new file mode 100755 index 0000000..3f26c5e --- /dev/null +++ b/asr @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 + +import argparse +import os +import sys +import tempfile +import subprocess +from lightning_whisper_mlx import LightningWhisperMLX + +def convert_to_mp3(input_path): + """Convert input file to MP3 using ffmpeg if necessary, storing in a temporary directory.""" + if input_path.lower().endswith(".mp3"): + return input_path # No conversion needed + + temp_dir = tempfile.mkdtemp() + output_path = os.path.join(temp_dir, "converted.mp3") + + try: + subprocess.run(["ffmpeg", "-y", "-i", input_path, "-q:a", "2", output_path], + check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + except subprocess.CalledProcessError: + sys.exit("Error: Failed to convert file to MP3. Ensure ffmpeg is installed.") + + return output_path + +def main(): + parser = argparse.ArgumentParser(description="Transcribe or translate audio using LightningWhisperMLX.") + parser.add_argument("file", help="Path to the audio file.") + parser.add_argument("--translate", action="store_true", help="Enable translation mode.") + + args = parser.parse_args() + + audio_path = convert_to_mp3(args.file) + task_mode = "translate" if args.translate else "transcribe" + + whisper = LightningWhisperMLX(model="distil-medium", batch_size=12, quant=None) + + result = whisper.transcribe(audio_path=audio_path, task=task_mode)['text'] + print(result) + +if __name__ == "__main__": + main() + diff --git a/mlx_models/distil-medium.en/config.json b/mlx_models/distil-medium.en/config.json new file mode 100644 index 0000000..90b9b4e --- /dev/null +++ b/mlx_models/distil-medium.en/config.json @@ -0,0 +1,13 @@ +{ + "n_mels": 80, + "n_audio_ctx": 1500, + "n_audio_state": 1024, + "n_audio_head": 16, + "n_audio_layer": 24, + "n_vocab": 51864, + "n_text_ctx": 448, + "n_text_state": 1024, + "n_text_head": 16, + "n_text_layer": 2, + "model_type": "whisper" +} \ No newline at end of file