Speak to Khoj from the Web client

- Use icons to style speech to text recording state
This commit is contained in:
Debanjum Singh Solanky 2023-11-21 23:38:36 -08:00
parent cc77bc4076
commit 2951fc92d7
3 changed files with 120 additions and 5 deletions

View file

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 384 512"><!--! Font Awesome Pro 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. --><path d="M192 0C139 0 96 43 96 96V256c0 53 43 96 96 96s96-43 96-96V96c0-53-43-96-96-96zM64 216c0-13.3-10.7-24-24-24s-24 10.7-24 24v40c0 89.1 66.2 162.7 152 174.4V464H120c-13.3 0-24 10.7-24 24s10.7 24 24 24h72 72c13.3 0 24-10.7 24-24s-10.7-24-24-24H216V430.4c85.8-11.7 152-85.3 152-174.4V216c0-13.3-10.7-24-24-24s-24 10.7-24 24v40c0 70.7-57.3 128-128 128s-128-57.3-128-128V216z"/></svg>

After

Width:  |  Height:  |  Size: 616 B

View file

@ -0,0 +1,37 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
viewBox="0 0 384 512"
version="1.1"
id="svg1"
sodipodi:docname="stop-solid.svg"
inkscape:version="1.3 (0e150ed, 2023-07-21)"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<defs
id="defs1" />
<sodipodi:namedview
id="namedview1"
pagecolor="#ffffff"
bordercolor="#000000"
borderopacity="0.25"
inkscape:showpageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:deskcolor="#d1d1d1"
inkscape:zoom="0.4609375"
inkscape:cx="192"
inkscape:cy="256"
inkscape:window-width="1312"
inkscape:window-height="449"
inkscape:window-x="0"
inkscape:window-y="88"
inkscape:window-maximized="0"
inkscape:current-layer="svg1" />
<!--! Font Awesome Pro 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. -->
<path
d="M0 128C0 92.7 28.7 64 64 64H320c35.3 0 64 28.7 64 64V384c0 35.3-28.7 64-64 64H64c-35.3 0-64-28.7-64-64V128z"
id="path1"
style="fill:#aa0000" />
</svg>

After

Width:  |  Height:  |  Size: 1.3 KiB

View file

@ -369,6 +369,56 @@ To get started, just start typing below. You can also type / to see a list of co
chat();
}
}
let mediaRecorder;
function speechToText() {
const speakButton = document.getElementById('speak-button');
const speakButtonImg = document.getElementById('speak-button-img');
const chatInput = document.getElementById('chat-input');
const sendToServer = (audioBlob) => {
const formData = new FormData();
formData.append('file', audioBlob);
fetch('/api/speak?client=web', { method: 'POST', body: formData })
.then(response => response.ok ? response.json() : Promise.reject(response))
.then(data => { chatInput.value += data.text; })
.catch(err => err.status == 422 ? console.error("Configure speech-to-text model on server.") : console.error("Failed to transcribe audio"));
};
const handleRecording = (stream) => {
const audioChunks = [];
const recordingConfig = { mimeType: 'audio/webm' };
mediaRecorder = new MediaRecorder(stream, recordingConfig);
mediaRecorder.addEventListener("dataavailable", function(event) {
if (event.data.size > 0) audioChunks.push(event.data);
});
mediaRecorder.addEventListener("stop", function() {
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
sendToServer(audioBlob);
});
mediaRecorder.start();
speakButtonImg.src = '/static/assets/icons/stop-solid.svg';
speakButtonImg.alt = 'Stop Speaking';
};
// Toggle recording
if (!mediaRecorder || mediaRecorder.state === 'inactive') {
navigator.mediaDevices
.getUserMedia({ audio: true })
.then(handleRecording)
.catch((e) => {
console.error(e);
});
} else if (mediaRecorder.state === 'recording') {
mediaRecorder.stop();
speakButtonImg.src = '/static/assets/icons/microphone-solid.svg';
speakButtonImg.alt = 'Speak';
}
}
</script>
<body>
<div id="khoj-empty-container" class="khoj-empty-container">
@ -384,7 +434,12 @@ To get started, just start typing below. You can also type / to see a list of co
<!-- Chat Footer -->
<div id="chat-footer">
<div id="chat-tooltip" style="display: none;"></div>
<textarea id="chat-input" class="option" oninput="onChatInput()" onkeydown=incrementalChat(event) autofocus="autofocus" placeholder="Type / to see a list of commands, or just type your questions and hit enter."></textarea>
<div id="input-row">
<textarea id="chat-input" class="option" oninput="onChatInput()" onkeydown=incrementalChat(event) autofocus="autofocus" placeholder="Type / to see a list of commands, or just type your questions and hit enter."></textarea>
<button id="speak-button" onclick="speechToText()">
<img id="speak-button-img" src="/static/assets/icons/microphone-solid.svg" alt="Speak"></img>
</button>
</div>
</div>
</body>
<script>
@ -580,15 +635,17 @@ To get started, just start typing below. You can also type / to see a list of co
#chat-footer {
padding: 0;
margin: 8px;
display: grid;
grid-template-columns: minmax(70px, 100%);
grid-column-gap: 10px;
grid-row-gap: 10px;
}
#chat-footer > * {
padding: 15px;
border-radius: 5px;
border: 1px solid var(--main-text-color);
#input-row {
display: grid;
grid-template-columns: auto 32px;
grid-column-gap: 10px;
grid-row-gap: 10px;
background: #f9fafc
}
.option:hover {
@ -609,6 +666,26 @@ To get started, just start typing below. You can also type / to see a list of co
#chat-input:focus {
outline: none !important;
}
#speak-button {
background: var(--background-color);
border: none;
border-radius: 5px;
padding: 5px;
font-size: 14px;
font-weight: 300;
line-height: 1.5em;
cursor: pointer;
transition: background 0.3s ease-in-out;
}
#speak-button:hover {
background: var(--primary-hover);
}
#speak-button:active {
background: var(--primary-active);
}
#speak-button-img {
width: 24px;
}
.option-enabled {
box-shadow: 0 0 12px rgb(119, 156, 46);