mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Speak to Khoj from the Obsidian client
- Add transcription button with mic icon - Collect audio recording on pressing mic - Process and send audio recording to server for transcription - Extract the functionality to flash status in chat input for reuse
This commit is contained in:
parent
cc9eae5d18
commit
877532a167
2 changed files with 99 additions and 10 deletions
|
@ -1,4 +1,4 @@
|
|||
import { App, Modal, request, setIcon } from 'obsidian';
|
||||
import { App, Modal, RequestUrlParam, request, requestUrl, setIcon } from 'obsidian';
|
||||
import { KhojSetting } from 'src/settings';
|
||||
import fetch from "node-fetch";
|
||||
|
||||
|
@ -51,6 +51,16 @@ export class KhojChatModal extends Modal {
|
|||
})
|
||||
chatInput.addEventListener('change', (event) => { this.result = (<HTMLInputElement>event.target).value });
|
||||
|
||||
let transcribe = inputRow.createEl("button", {
|
||||
text: "Transcribe",
|
||||
attr: {
|
||||
id: "khoj-transcribe",
|
||||
class: "khoj-transcribe khoj-input-row-button",
|
||||
},
|
||||
})
|
||||
transcribe.addEventListener('click', async (_) => { await this.speechToText() });
|
||||
setIcon(transcribe, "mic");
|
||||
|
||||
let clearChat = inputRow.createEl("button", {
|
||||
text: "Clear History",
|
||||
attr: {
|
||||
|
@ -205,9 +215,19 @@ export class KhojChatModal extends Modal {
|
|||
}
|
||||
}
|
||||
|
||||
async clearConversationHistory() {
|
||||
flashStatusInChatInput(message: string) {
|
||||
// Get chat input element and original placeholder
|
||||
let chatInput = <HTMLInputElement>this.contentEl.getElementsByClassName("khoj-chat-input")[0];
|
||||
let originalPlaceholder = chatInput.placeholder;
|
||||
// Set placeholder to message
|
||||
chatInput.placeholder = message;
|
||||
// Reset placeholder after 2 seconds
|
||||
setTimeout(() => {
|
||||
chatInput.placeholder = originalPlaceholder;
|
||||
}, 2000);
|
||||
}
|
||||
|
||||
async clearConversationHistory() {
|
||||
let chatBody = this.contentEl.getElementsByClassName("khoj-chat-body")[0];
|
||||
|
||||
let response = await request({
|
||||
|
@ -224,15 +244,84 @@ export class KhojChatModal extends Modal {
|
|||
// If conversation history is cleared successfully, clear chat logs from modal
|
||||
chatBody.innerHTML = "";
|
||||
await this.getChatHistory();
|
||||
chatInput.placeholder = result.message;
|
||||
this.flashStatusInChatInput(result.message);
|
||||
}
|
||||
} catch (err) {
|
||||
chatInput.placeholder = "Failed to clear conversation history";
|
||||
} finally {
|
||||
// Reset to original placeholder text after some time
|
||||
setTimeout(() => {
|
||||
chatInput.placeholder = originalPlaceholder;
|
||||
}, 2000);
|
||||
this.flashStatusInChatInput("Failed to clear conversation history");
|
||||
}
|
||||
}
|
||||
|
||||
mediaRecorder: MediaRecorder | undefined;
|
||||
async speechToText() {
|
||||
const transcribeButton = <HTMLButtonElement>this.contentEl.getElementsByClassName("khoj-transcribe")[0];
|
||||
const chatInput = <HTMLInputElement>this.contentEl.getElementsByClassName("khoj-chat-input")[0];
|
||||
|
||||
const generateRequestBody = async (audioBlob: Blob, boundary_string: string) => {
|
||||
const boundary = `------${boundary_string}`;
|
||||
const chunks: ArrayBuffer[] = [];
|
||||
|
||||
chunks.push(new TextEncoder().encode(`${boundary}\r\n`));
|
||||
chunks.push(new TextEncoder().encode(`Content-Disposition: form-data; name="file"; filename="blob"\r\nContent-Type: "application/octet-stream"\r\n\r\n`));
|
||||
chunks.push(await audioBlob.arrayBuffer());
|
||||
chunks.push(new TextEncoder().encode('\r\n'));
|
||||
|
||||
await Promise.all(chunks);
|
||||
chunks.push(new TextEncoder().encode(`${boundary}--\r\n`));
|
||||
return await new Blob(chunks).arrayBuffer();
|
||||
};
|
||||
|
||||
const sendToServer = async (audioBlob: Blob) => {
|
||||
const boundary_string = `Boundary${Math.random().toString(36).slice(2)}`;
|
||||
const requestBody = await generateRequestBody(audioBlob, boundary_string);
|
||||
|
||||
const response = await requestUrl({
|
||||
url: `${this.setting.khojUrl}/api/speak?client=obsidian`,
|
||||
method: 'POST',
|
||||
headers: { "Authorization": `Bearer ${this.setting.khojApiKey}` },
|
||||
contentType: `multipart/form-data; boundary=----${boundary_string}`,
|
||||
body: requestBody,
|
||||
});
|
||||
|
||||
// Parse response from Khoj backend
|
||||
if (response.status === 200) {
|
||||
console.log(response);
|
||||
chatInput.value += response.json.text;
|
||||
} else if (response.status === 422) {
|
||||
throw new Error("⛔️ Failed to transcribe audio");
|
||||
} else {
|
||||
throw new Error("⛔️ Configure speech-to-text model on server.");
|
||||
}
|
||||
};
|
||||
|
||||
const handleRecording = (stream: MediaStream) => {
|
||||
const audioChunks: Blob[] = [];
|
||||
const recordingConfig = { mimeType: 'audio/webm' };
|
||||
this.mediaRecorder = new MediaRecorder(stream, recordingConfig);
|
||||
|
||||
this.mediaRecorder.addEventListener("dataavailable", function(event) {
|
||||
if (event.data.size > 0) audioChunks.push(event.data);
|
||||
});
|
||||
|
||||
this.mediaRecorder.addEventListener("stop", async function() {
|
||||
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
|
||||
await sendToServer(audioBlob);
|
||||
});
|
||||
|
||||
this.mediaRecorder.start();
|
||||
setIcon(transcribeButton, "mic-off");
|
||||
};
|
||||
|
||||
// Toggle recording
|
||||
if (!this.mediaRecorder || this.mediaRecorder.state === 'inactive') {
|
||||
navigator.mediaDevices
|
||||
.getUserMedia({ audio: true })
|
||||
.then(handleRecording)
|
||||
.catch((e) => {
|
||||
this.flashStatusInChatInput("⛔️ Failed to access microphone");
|
||||
});
|
||||
} else if (this.mediaRecorder.state === 'recording') {
|
||||
this.mediaRecorder.stop();
|
||||
setIcon(transcribeButton, "mic");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -112,7 +112,7 @@ If your plugin does not need CSS, delete this file.
|
|||
}
|
||||
.khoj-input-row {
|
||||
display: grid;
|
||||
grid-template-columns: auto 32px;
|
||||
grid-template-columns: auto 32px 32px;
|
||||
grid-column-gap: 10px;
|
||||
grid-row-gap: 10px;
|
||||
background: var(--background-primary);
|
||||
|
|
Loading…
Reference in a new issue