Auto-update: Sat Jan 18 19:49:07 PST 2025

This commit is contained in:
sanj 2025-01-18 19:49:07 -08:00
parent c221a2149a
commit df0b601ad7

114
llux.py
View file

@ -15,6 +15,7 @@ import os
import random import random
import tempfile import tempfile
import time import time
from openai import OpenAI
from typing import Optional, Dict, Any from typing import Optional, Dict, Any
import markdown import markdown
@ -68,6 +69,20 @@ class Llux:
self.diffusers_steps = diffusers_config["steps"] self.diffusers_steps = diffusers_config["steps"]
self.img_generation_confirmation = diffusers_config["img_generation_confirmation"] self.img_generation_confirmation = diffusers_config["img_generation_confirmation"]
# Text-to-speech configuration
# Example: reading from config["tts"] or you can just hard-code the base_url, model, etc.
tts_config = config.get("tts", {})
tts_url = tts_config.get("base_url")
tts_api_key = tts_config.get("api_key", "not-needed")
self.tts_model = tts_config.get("model", "kokoro")
self.tts_voice = tts_config.get("voice", "af_sky+af_bella") # single or multiple voicepacks
# Initialize TTS client
self.tts_client = OpenAI(
base_url=tts_url,
api_key=tts_api_key
)
# Create Matrix client # Create Matrix client
self.client = AsyncClient(self.server, self.username) self.client = AsyncClient(self.server, self.username)
self.join_time = datetime.datetime.now() self.join_time = datetime.datetime.now()
@ -190,6 +205,44 @@ class Llux:
}, },
) )
async def send_audio(self, channel: str, audio_path: str) -> None:
"""
Upload and send an MP3 (or any audio) file to Matrix as m.audio.
"""
try:
filename = os.path.basename(audio_path)
size_bytes = os.path.getsize(audio_path)
with open(audio_path, "rb") as f:
upload_response, upload_error = await self.client.upload(
f,
content_type="audio/mpeg",
filename=filename
)
if upload_error:
self.logger.error(f"Failed to upload audio: {upload_error}")
return
self.logger.debug(f"Successfully uploaded audio, URI: {upload_response.content_uri}")
await self.client.room_send(
room_id=channel,
message_type="m.room.message",
content={
"msgtype": "m.audio",
"url": upload_response.content_uri,
"body": filename,
"info": {
"mimetype": "audio/mpeg",
"size": size_bytes,
# you can also add "duration" here in ms if you want
}
}
)
except Exception as e:
self.logger.error(f"Error sending audio: {e}", exc_info=True)
await self.send_message(channel, f"Failed to send audio: {str(e)}")
async def send_image(self, channel: str, image_path: str) -> None: async def send_image(self, channel: str, image_path: str) -> None:
""" """
Send an image to a Matrix channel by uploading the file and then sending an m.image message. Send an image to a Matrix channel by uploading the file and then sending an m.image message.
@ -289,6 +342,65 @@ class Llux:
self.logger.error(f"Error downloading/processing image: {e}", exc_info=True) self.logger.error(f"Error downloading/processing image: {e}", exc_info=True)
return None return None
async def generate_tts(self, text: str) -> str:
"""
Generate an audio (MP3) file for the given text using an OpenAI-compatible TTS server.
Returns the path to the MP3 file.
"""
self.logger.info(f"Generating TTS for text: '{text}'")
# Create a temporary file to store the result
fd, path = tempfile.mkstemp(suffix=".mp3", dir=self.temp_dir)
os.close(fd) # We only need the path.
try:
# Write `text` to some temporary input file if needed, or just pass it directly
# Here we just pass it directly to the TTS call.
with self.tts_client.audio.speech.with_streaming_response.create(
model=self.tts_model,
voice=self.tts_voice,
input=text,
response_format="mp3"
) as response:
response.stream_to_file(path)
self.logger.debug(f"TTS audio saved to {path}")
return path
except Exception as e:
self.logger.error(f"Error generating TTS audio: {e}", exc_info=True)
raise e
async def generate_and_send_tts(self, channel: str, text: str, user_sender: str) -> None:
"""
Wrapper that calls generate_tts() and then uploads/sends the MP3 to Matrix.
Also logs the event in conversation history.
"""
try:
# A small courtesy message to let user know TTS is being prepared
confirmation_message = (
f"{user_sender} Generating TTS for: '{text}'. Please wait..."
)
await self.send_message(channel, confirmation_message)
audio_path = await self.generate_tts(text)
# Upload and send the audio file
await self.send_audio(channel, audio_path)
except Exception as e:
err_msg = f"Error generating TTS: {str(e)}"
self.logger.error(err_msg, exc_info=True)
await self.send_message(channel, err_msg)
else:
# Optionally store the TTS output in the conversation history
await self.add_history(
role="assistant",
channel=channel,
sender=user_sender,
message=f"Generated TTS for text: {text}",
image_path=None # no image, but you could store an "audio_path" variant if you like
)
async def add_history( async def add_history(
self, self,
role: str, role: str,
@ -656,6 +768,7 @@ class Llux:
- .reset / .stock : reset conversation to default or stock - .reset / .stock : reset conversation to default or stock
- .help : display help menus - .help : display help menus
- .img : generate and send an image - .img : generate and send an image
- .tts : generate and send audio
- .model / .clear : admin commands - .model / .clear : admin commands
""" """
self.logger.debug(f"Handling message: {message[0]} from {sender_display}") self.logger.debug(f"Handling message: {message[0]} from {sender_display}")
@ -671,6 +784,7 @@ class Llux:
".stock": lambda: self.reset(channel, sender, sender_display, stock=True), ".stock": lambda: self.reset(channel, sender, sender_display, stock=True),
".help": lambda: self.help_menu(channel, sender_display), ".help": lambda: self.help_menu(channel, sender_display),
".img": lambda: self.generate_and_send_image(channel, " ".join(message[1:]), sender), ".img": lambda: self.generate_and_send_image(channel, " ".join(message[1:]), sender),
".tts": lambda: self.generate_and_send_tts(channel, " ".join(message[1:]), sender),
} }
admin_commands = { admin_commands = {