Auto-update: Sat Jan 18 19:49:07 PST 2025

2025-01-18 19:49:07 -08:00 · 2025-01-18 19:49:07 -08:00 · df0b601ad7
commit df0b601ad7
parent c221a2149a
1 changed files with 114 additions and 0 deletions
--- a/llux.py
+++ b/llux.py
@ -15,6 +15,7 @@ import os
 import random
 import tempfile
 import time
 from openai import OpenAI
 from typing import Optional, Dict, Any
 import markdown
@ -68,6 +69,20 @@ class Llux:
        self.diffusers_steps = diffusers_config["steps"]
        self.img_generation_confirmation = diffusers_config["img_generation_confirmation"]
        # Text-to-speech configuration
        # Example: reading from config["tts"] or you can just hard-code the base_url, model, etc.
        tts_config = config.get("tts", {})
        tts_url = tts_config.get("base_url")
        tts_api_key = tts_config.get("api_key", "not-needed")
        self.tts_model = tts_config.get("model", "kokoro")
        self.tts_voice = tts_config.get("voice", "af_sky+af_bella")  # single or multiple voicepacks
        # Initialize TTS client
        self.tts_client = OpenAI(
            base_url=tts_url,
            api_key=tts_api_key
        )
        # Create Matrix client
        self.client = AsyncClient(self.server, self.username)
        self.join_time = datetime.datetime.now()
@ -190,6 +205,44 @@ class Llux:
            },
        )
    async def send_audio(self, channel: str, audio_path: str) -> None:
        """
        Upload and send an MP3 (or any audio) file to Matrix as m.audio.
        """
        try:
            filename = os.path.basename(audio_path)
            size_bytes = os.path.getsize(audio_path)
            with open(audio_path, "rb") as f:
                upload_response, upload_error = await self.client.upload(
                    f,
                    content_type="audio/mpeg",
                    filename=filename
                )
            if upload_error:
                self.logger.error(f"Failed to upload audio: {upload_error}")
                return
            self.logger.debug(f"Successfully uploaded audio, URI: {upload_response.content_uri}")
            await self.client.room_send(
                room_id=channel,
                message_type="m.room.message",
                content={
                    "msgtype": "m.audio",
                    "url": upload_response.content_uri,
                    "body": filename,
                    "info": {
                        "mimetype": "audio/mpeg",
                        "size": size_bytes,
                        # you can also add "duration" here in ms if you want
                    }
                }
            )
        except Exception as e:
            self.logger.error(f"Error sending audio: {e}", exc_info=True)
            await self.send_message(channel, f"Failed to send audio: {str(e)}")
    async def send_image(self, channel: str, image_path: str) -> None:
        """
        Send an image to a Matrix channel by uploading the file and then sending an m.image message.
@ -289,6 +342,65 @@ class Llux:
            self.logger.error(f"Error downloading/processing image: {e}", exc_info=True)
            return None
    async def generate_tts(self, text: str) -> str:
        """
        Generate an audio (MP3) file for the given text using an OpenAI-compatible TTS server.
        Returns the path to the MP3 file.
        """
        self.logger.info(f"Generating TTS for text: '{text}'")
        # Create a temporary file to store the result
        fd, path = tempfile.mkstemp(suffix=".mp3", dir=self.temp_dir)
        os.close(fd)  # We only need the path.
        try:
            # Write `text` to some temporary input file if needed, or just pass it directly
            # Here we just pass it directly to the TTS call.
            with self.tts_client.audio.speech.with_streaming_response.create(
                model=self.tts_model,
                voice=self.tts_voice,
                input=text,
                response_format="mp3"
            ) as response:
                response.stream_to_file(path)
            self.logger.debug(f"TTS audio saved to {path}")
            return path
        except Exception as e:
            self.logger.error(f"Error generating TTS audio: {e}", exc_info=True)
            raise e
    async def generate_and_send_tts(self, channel: str, text: str, user_sender: str) -> None:
        """
        Wrapper that calls generate_tts() and then uploads/sends the MP3 to Matrix.
        Also logs the event in conversation history.
        """
        try:
            # A small courtesy message to let user know TTS is being prepared
            confirmation_message = (
                f"{user_sender} Generating TTS for: '{text}'. Please wait..."
            )
            await self.send_message(channel, confirmation_message)
            audio_path = await self.generate_tts(text)
            # Upload and send the audio file
            await self.send_audio(channel, audio_path)
        except Exception as e:
            err_msg = f"Error generating TTS: {str(e)}"
            self.logger.error(err_msg, exc_info=True)
            await self.send_message(channel, err_msg)
        else:
            # Optionally store the TTS output in the conversation history
            await self.add_history(
                role="assistant",
                channel=channel,
                sender=user_sender,
                message=f"Generated TTS for text: {text}",
                image_path=None  # no image, but you could store an "audio_path" variant if you like
            )
    async def add_history(
        self,
        role: str,
@ -656,6 +768,7 @@ class Llux:
          - .reset / .stock        : reset conversation to default or stock
          - .help                  : display help menus
          - .img                   : generate and send an image
          - .tts                   : generate and send audio
          - .model / .clear        : admin commands
        """
        self.logger.debug(f"Handling message: {message[0]} from {sender_display}")
@ -671,6 +784,7 @@ class Llux:
            ".stock": lambda: self.reset(channel, sender, sender_display, stock=True),
            ".help": lambda: self.help_menu(channel, sender_display),
            ".img": lambda: self.generate_and_send_image(channel, " ".join(message[1:]), sender),
            ".tts": lambda: self.generate_and_send_tts(channel, " ".join(message[1:]), sender),
        }
        admin_commands = {