From 2951fc92d7431e31734951aba7955cb7378f8d28 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 21 Nov 2023 23:38:36 -0800 Subject: [PATCH] Speak to Khoj from the Web client - Use icons to style speech to text recording state --- .../web/assets/icons/microphone-solid.svg | 1 + .../interface/web/assets/icons/stop-solid.svg | 37 ++++++++ src/khoj/interface/web/chat.html | 87 +++++++++++++++++-- 3 files changed, 120 insertions(+), 5 deletions(-) create mode 100644 src/khoj/interface/web/assets/icons/microphone-solid.svg create mode 100644 src/khoj/interface/web/assets/icons/stop-solid.svg diff --git a/src/khoj/interface/web/assets/icons/microphone-solid.svg b/src/khoj/interface/web/assets/icons/microphone-solid.svg new file mode 100644 index 00000000..3fc4b91d --- /dev/null +++ b/src/khoj/interface/web/assets/icons/microphone-solid.svg @@ -0,0 +1 @@ + diff --git a/src/khoj/interface/web/assets/icons/stop-solid.svg b/src/khoj/interface/web/assets/icons/stop-solid.svg new file mode 100644 index 00000000..a9aaba28 --- /dev/null +++ b/src/khoj/interface/web/assets/icons/stop-solid.svg @@ -0,0 +1,37 @@ + + + + + + + diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index 1c661a92..d346294f 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -369,6 +369,56 @@ To get started, just start typing below. You can also type / to see a list of co chat(); } } + + let mediaRecorder; + function speechToText() { + const speakButton = document.getElementById('speak-button'); + const speakButtonImg = document.getElementById('speak-button-img'); + const chatInput = document.getElementById('chat-input'); + + const sendToServer = (audioBlob) => { + const formData = new FormData(); + formData.append('file', audioBlob); + + fetch('/api/speak?client=web', { method: 'POST', body: formData }) + .then(response => response.ok ? response.json() : Promise.reject(response)) + .then(data => { chatInput.value += data.text; }) + .catch(err => err.status == 422 ? console.error("Configure speech-to-text model on server.") : console.error("Failed to transcribe audio")); + }; + + const handleRecording = (stream) => { + const audioChunks = []; + const recordingConfig = { mimeType: 'audio/webm' }; + mediaRecorder = new MediaRecorder(stream, recordingConfig); + + mediaRecorder.addEventListener("dataavailable", function(event) { + if (event.data.size > 0) audioChunks.push(event.data); + }); + + mediaRecorder.addEventListener("stop", function() { + const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); + sendToServer(audioBlob); + }); + + mediaRecorder.start(); + speakButtonImg.src = '/static/assets/icons/stop-solid.svg'; + speakButtonImg.alt = 'Stop Speaking'; + }; + + // Toggle recording + if (!mediaRecorder || mediaRecorder.state === 'inactive') { + navigator.mediaDevices + .getUserMedia({ audio: true }) + .then(handleRecording) + .catch((e) => { + console.error(e); + }); + } else if (mediaRecorder.state === 'recording') { + mediaRecorder.stop(); + speakButtonImg.src = '/static/assets/icons/microphone-solid.svg'; + speakButtonImg.alt = 'Speak'; + } + }
@@ -384,7 +434,12 @@ To get started, just start typing below. You can also type / to see a list of co