Auto-update: Fri Jan 31 11:12:53 PST 2025
This commit is contained in:
parent
73bb6fe3d2
commit
10398f2c87
2 changed files with 56 additions and 0 deletions
43
asr
Executable file
43
asr
Executable file
|
@ -0,0 +1,43 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import subprocess
|
||||
from lightning_whisper_mlx import LightningWhisperMLX
|
||||
|
||||
def convert_to_mp3(input_path):
|
||||
"""Convert input file to MP3 using ffmpeg if necessary, storing in a temporary directory."""
|
||||
if input_path.lower().endswith(".mp3"):
|
||||
return input_path # No conversion needed
|
||||
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
output_path = os.path.join(temp_dir, "converted.mp3")
|
||||
|
||||
try:
|
||||
subprocess.run(["ffmpeg", "-y", "-i", input_path, "-q:a", "2", output_path],
|
||||
check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
except subprocess.CalledProcessError:
|
||||
sys.exit("Error: Failed to convert file to MP3. Ensure ffmpeg is installed.")
|
||||
|
||||
return output_path
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Transcribe or translate audio using LightningWhisperMLX.")
|
||||
parser.add_argument("file", help="Path to the audio file.")
|
||||
parser.add_argument("--translate", action="store_true", help="Enable translation mode.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
audio_path = convert_to_mp3(args.file)
|
||||
task_mode = "translate" if args.translate else "transcribe"
|
||||
|
||||
whisper = LightningWhisperMLX(model="distil-medium", batch_size=12, quant=None)
|
||||
|
||||
result = whisper.transcribe(audio_path=audio_path, task=task_mode)['text']
|
||||
print(result)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
13
mlx_models/distil-medium.en/config.json
Normal file
13
mlx_models/distil-medium.en/config.json
Normal file
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"n_mels": 80,
|
||||
"n_audio_ctx": 1500,
|
||||
"n_audio_state": 1024,
|
||||
"n_audio_head": 16,
|
||||
"n_audio_layer": 24,
|
||||
"n_vocab": 51864,
|
||||
"n_text_ctx": 448,
|
||||
"n_text_state": 1024,
|
||||
"n_text_head": 16,
|
||||
"n_text_layer": 2,
|
||||
"model_type": "whisper"
|
||||
}
|
Loading…
Add table
Reference in a new issue