Auto-update: Fri Jan 31 11:12:53 PST 2025

This commit is contained in:
sanj 2025-01-31 11:12:53 -08:00
parent 73bb6fe3d2
commit 10398f2c87
2 changed files with 56 additions and 0 deletions
asr
mlx_models/distil-medium.en

43
asr Executable file
View file

@ -0,0 +1,43 @@
#!/usr/bin/env python3
import argparse
import os
import sys
import tempfile
import subprocess
from lightning_whisper_mlx import LightningWhisperMLX
def convert_to_mp3(input_path):
"""Convert input file to MP3 using ffmpeg if necessary, storing in a temporary directory."""
if input_path.lower().endswith(".mp3"):
return input_path # No conversion needed
temp_dir = tempfile.mkdtemp()
output_path = os.path.join(temp_dir, "converted.mp3")
try:
subprocess.run(["ffmpeg", "-y", "-i", input_path, "-q:a", "2", output_path],
check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except subprocess.CalledProcessError:
sys.exit("Error: Failed to convert file to MP3. Ensure ffmpeg is installed.")
return output_path
def main():
parser = argparse.ArgumentParser(description="Transcribe or translate audio using LightningWhisperMLX.")
parser.add_argument("file", help="Path to the audio file.")
parser.add_argument("--translate", action="store_true", help="Enable translation mode.")
args = parser.parse_args()
audio_path = convert_to_mp3(args.file)
task_mode = "translate" if args.translate else "transcribe"
whisper = LightningWhisperMLX(model="distil-medium", batch_size=12, quant=None)
result = whisper.transcribe(audio_path=audio_path, task=task_mode)['text']
print(result)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,13 @@
{
"n_mels": 80,
"n_audio_ctx": 1500,
"n_audio_state": 1024,
"n_audio_head": 16,
"n_audio_layer": 24,
"n_vocab": 51864,
"n_text_ctx": 448,
"n_text_state": 1024,
"n_text_head": 16,
"n_text_layer": 2,
"model_type": "whisper"
}