Auto-update: Sat Jan 18 19:49:07 PST 2025

2025-01-18 19:49:07 -08:00 · 2025-01-18 19:49:07 -08:00 · df0b601ad7
commit df0b601ad7
parent c221a2149a
1 changed files with 114 additions and 0 deletions
--- a/llux.py
+++ b/llux.py
@ -15,6 +15,7 @@ import os
 import random
 import tempfile
 import time
+from openai import OpenAI
 from typing import Optional, Dict, Any

 import markdown
@ -67,6 +68,20 @@ class Llux:
        self.diffusers_device = diffusers_config["device"]
        self.diffusers_steps = diffusers_config["steps"]
        self.img_generation_confirmation = diffusers_config["img_generation_confirmation"]
+        
+        # Text-to-speech configuration
+        # Example: reading from config["tts"] or you can just hard-code the base_url, model, etc.
+        tts_config = config.get("tts", {})
+        tts_url = tts_config.get("base_url")
+        tts_api_key = tts_config.get("api_key", "not-needed")
+        self.tts_model = tts_config.get("model", "kokoro")
+        self.tts_voice = tts_config.get("voice", "af_sky+af_bella")  # single or multiple voicepacks
+        
+        # Initialize TTS client
+        self.tts_client = OpenAI(
+            base_url=tts_url,
+            api_key=tts_api_key
+        )

        # Create Matrix client
        self.client = AsyncClient(self.server, self.username)
@ -189,6 +204,44 @@ class Llux:
                ),
            },
        )
+        
+    async def send_audio(self, channel: str, audio_path: str) -> None:
+        """
+        Upload and send an MP3 (or any audio) file to Matrix as m.audio.
+        """
+        try:
+            filename = os.path.basename(audio_path)
+            size_bytes = os.path.getsize(audio_path)
+            with open(audio_path, "rb") as f:
+                upload_response, upload_error = await self.client.upload(
+                    f,
+                    content_type="audio/mpeg",
+                    filename=filename
+                )
+            if upload_error:
+                self.logger.error(f"Failed to upload audio: {upload_error}")
+                return
+        
+            self.logger.debug(f"Successfully uploaded audio, URI: {upload_response.content_uri}")
+        
+            await self.client.room_send(
+                room_id=channel,
+                message_type="m.room.message",
+                content={
+                    "msgtype": "m.audio",
+                    "url": upload_response.content_uri,
+                    "body": filename,
+                    "info": {
+                        "mimetype": "audio/mpeg",
+                        "size": size_bytes,
+                        # you can also add "duration" here in ms if you want
+                    }
+                }
+            )
+        except Exception as e:
+            self.logger.error(f"Error sending audio: {e}", exc_info=True)
+            await self.send_message(channel, f"Failed to send audio: {str(e)}")
+

    async def send_image(self, channel: str, image_path: str) -> None:
        """
@ -288,6 +341,65 @@ class Llux:
        except Exception as e:
            self.logger.error(f"Error downloading/processing image: {e}", exc_info=True)
            return None
+            
+    async def generate_tts(self, text: str) -> str:
+        """
+        Generate an audio (MP3) file for the given text using an OpenAI-compatible TTS server.
+        Returns the path to the MP3 file.
+        """
+        self.logger.info(f"Generating TTS for text: '{text}'")
+    
+        # Create a temporary file to store the result
+        fd, path = tempfile.mkstemp(suffix=".mp3", dir=self.temp_dir)
+        os.close(fd)  # We only need the path.
+    
+        try:
+            # Write `text` to some temporary input file if needed, or just pass it directly
+            # Here we just pass it directly to the TTS call.
+            with self.tts_client.audio.speech.with_streaming_response.create(
+                model=self.tts_model,
+                voice=self.tts_voice,
+                input=text,
+                response_format="mp3"
+            ) as response:
+                response.stream_to_file(path)
+    
+            self.logger.debug(f"TTS audio saved to {path}")
+            return path
+        except Exception as e:
+            self.logger.error(f"Error generating TTS audio: {e}", exc_info=True)
+            raise e
+    
+    async def generate_and_send_tts(self, channel: str, text: str, user_sender: str) -> None:
+        """
+        Wrapper that calls generate_tts() and then uploads/sends the MP3 to Matrix.
+        Also logs the event in conversation history.
+        """
+        try:
+            # A small courtesy message to let user know TTS is being prepared
+            confirmation_message = (
+                f"{user_sender} Generating TTS for: '{text}'. Please wait..."
+            )
+            await self.send_message(channel, confirmation_message)
+    
+            audio_path = await self.generate_tts(text)
+    
+            # Upload and send the audio file
+            await self.send_audio(channel, audio_path)
+    
+        except Exception as e:
+            err_msg = f"Error generating TTS: {str(e)}"
+            self.logger.error(err_msg, exc_info=True)
+            await self.send_message(channel, err_msg)
+        else:
+            # Optionally store the TTS output in the conversation history
+            await self.add_history(
+                role="assistant",
+                channel=channel,
+                sender=user_sender,
+                message=f"Generated TTS for text: {text}",
+                image_path=None  # no image, but you could store an "audio_path" variant if you like
+            )

    async def add_history(
        self,
@ -656,6 +768,7 @@ class Llux:
          - .reset / .stock        : reset conversation to default or stock
          - .help                  : display help menus
          - .img                   : generate and send an image
+          - .tts                   : generate and send audio
          - .model / .clear        : admin commands
        """
        self.logger.debug(f"Handling message: {message[0]} from {sender_display}")
@ -671,6 +784,7 @@ class Llux:
            ".stock": lambda: self.reset(channel, sender, sender_display, stock=True),
            ".help": lambda: self.help_menu(channel, sender_display),
            ".img": lambda: self.generate_and_send_image(channel, " ".join(message[1:]), sender),
+            ".tts": lambda: self.generate_and_send_tts(channel, " ".join(message[1:]), sender),
        }

        admin_commands = {