Transcribe only when keyboard shortcut or button pressed in Obsidian

- Transcribe on holding Ctrl+s keyboard shortcut
- Transcribe on holding the transcribe button pressed via mouse too
- Make the transcribe button robust to inadvertent touches by using timeout
- Do not transcribe, trigger auto-send on silences. Silence detection
  is super rudimentary, just blocks standard emanations by whisper
  when no speech
This commit is contained in:
Debanjum Singh Solanky 2024-07-07 13:19:57 +05:30
parent 0eb000c3ea
commit 3e0c882e27
2 changed files with 80 additions and 11 deletions

View file

@ -24,13 +24,13 @@ export class KhojChatView extends KhojPaneView {
setting: KhojSetting;
waitingForLocation: boolean;
location: Location;
keyPressTimeout: NodeJS.Timeout | null = null;
constructor(leaf: WorkspaceLeaf, setting: KhojSetting) {
super(leaf, setting);
// Register Modal Keybindings to send voice message
// Register chat view keybindings
this.scope = new Scope(this.app.scope);
this.scope.register(["Mod"], 's', async (event) => { await this.speechToText(event); });
this.waitingForLocation = true;
@ -124,6 +124,10 @@ export class KhojChatView extends KhojPaneView {
chatInput.addEventListener('input', (_) => { this.onChatInput() });
chatInput.addEventListener('keydown', (event) => { this.incrementalChat(event) });
// Add event listeners for long press keybinding
this.contentEl.addEventListener('keydown', this.handleKeyDown.bind(this));
this.contentEl.addEventListener('keyup', this.handleKeyUp.bind(this));
let transcribe = inputRow.createEl("button", {
text: "Transcribe",
attr: {
@ -131,7 +135,8 @@ export class KhojChatView extends KhojPaneView {
class: "khoj-transcribe khoj-input-row-button clickable-icon ",
},
})
transcribe.addEventListener('mousedown', async (event) => { await this.speechToText(event) });
transcribe.addEventListener('mousedown', (event) => { this.startSpeechToText(event) });
transcribe.addEventListener('mouseup', async (event) => { await this.stopSpeechToText(event) });
transcribe.addEventListener('touchstart', async (event) => { await this.speechToText(event) });
transcribe.addEventListener('touchend', async (event) => { await this.speechToText(event) });
transcribe.addEventListener('touchcancel', async (event) => { await this.speechToText(event) });
@ -165,6 +170,46 @@ export class KhojChatView extends KhojPaneView {
});
}
startSpeechToText(event: KeyboardEvent | MouseEvent | TouchEvent, timeout=200) {
if (!this.keyPressTimeout) {
this.keyPressTimeout = setTimeout(async () => {
// Reset auto send voice message timer, UI if running
if (this.sendMessageTimeout) {
// Stop the auto send voice message countdown timer UI
clearTimeout(this.sendMessageTimeout);
const sendButton = <HTMLButtonElement>this.contentEl.getElementsByClassName("khoj-chat-send")[0]
setIcon(sendButton, "arrow-up-circle")
let sendImg = <SVGElement>sendButton.getElementsByClassName("lucide-arrow-up-circle")[0]
sendImg.addEventListener('click', async (_) => { await this.chat() });
// Reset chat input value
const chatInput = <HTMLTextAreaElement>this.contentEl.getElementsByClassName("khoj-chat-input")[0];
chatInput.value = "";
}
// Start new voice message
await this.speechToText(event);
}, timeout);
}
}
async stopSpeechToText(event: KeyboardEvent | MouseEvent | TouchEvent) {
if (this.mediaRecorder) {
await this.speechToText(event);
}
if (this.keyPressTimeout) {
clearTimeout(this.keyPressTimeout);
this.keyPressTimeout = null;
}
}
handleKeyDown(event: KeyboardEvent) {
// Start speech to text if keyboard shortcut is pressed
if (event.key === 's' && event.getModifierState('Control')) this.startSpeechToText(event);
}
async handleKeyUp(event: KeyboardEvent) {
// Stop speech to text if keyboard shortcut is released
if (event.key === 's' && event.getModifierState('Control')) await this.stopSpeechToText(event);
}
processOnlineReferences(referenceSection: HTMLElement, onlineContext: any) {
let numOnlineReferences = 0;
for (let subquery in onlineContext) {
@ -993,9 +1038,19 @@ export class KhojChatView extends KhojPaneView {
});
// Parse response from Khoj backend
let noSpeechText: string[] = [
"Thanks for watching!",
"Thanks for watching.",
"Thank you for watching!",
"Thank you for watching.",
"You",
"Bye."
];
let noSpeech: boolean = false;
if (response.status === 200) {
console.log(response);
chatInput.value += response.json.text.trimStart();
noSpeech = noSpeechText.includes(response.json.text.trimStart());
if (!noSpeech) chatInput.value += response.json.text.trimStart();
this.autoResize();
} else if (response.status === 501) {
throw new Error("⛔️ Configure speech-to-text model on server.");
@ -1005,8 +1060,8 @@ export class KhojChatView extends KhojPaneView {
throw new Error("⛔️ Failed to transcribe audio.");
}
// Don't auto-send empty messages
if (chatInput.value.length === 0) return;
// Don't auto-send empty messages or when no speech is detected
if (chatInput.value.length === 0 || noSpeech) return;
// Show stop auto-send button. It stops auto-send when clicked
setIcon(sendButton, "stop-circle");
@ -1044,19 +1099,19 @@ export class KhojChatView extends KhojPaneView {
});
this.mediaRecorder.start();
setIcon(transcribeButton, "mic-off");
// setIcon(transcribeButton, "mic-off");
transcribeButton.classList.add("loading-encircle")
};
// Toggle recording
if (!this.mediaRecorder || this.mediaRecorder.state === 'inactive' || event.type === 'touchstart') {
if (!this.mediaRecorder || this.mediaRecorder.state === 'inactive' || event.type === 'touchstart' || event.type === 'mousedown' || event.type === 'keydown') {
navigator.mediaDevices
.getUserMedia({ audio: true })
?.then(handleRecording)
.catch((e) => {
this.flashStatusInChatInput("⛔️ Failed to access microphone");
});
} else if (this.mediaRecorder.state === 'recording' || event.type === 'touchend' || event.type === 'touchcancel') {
} else if (this.mediaRecorder?.state === 'recording' || event.type === 'touchend' || event.type === 'touchcancel' || event.type === 'mouseup' || event.type === 'keyup') {
this.mediaRecorder.stop();
this.mediaRecorder.stream.getTracks().forEach(track => track.stop());
this.mediaRecorder = undefined;

View file

@ -613,9 +613,23 @@ img.copy-icon {
margin-top: -16px;
margin-left: -16px;
border: 4px solid transparent;
border-top-color: var(--icon-color-active);
border-color: var(--icon-color-active);
border-radius: 50%;
animation: spin 1s linear infinite;
animation: pulse 3s ease-in-out infinite;
}
@keyframes pulse {
0% {
transform: scale(1);
opacity: 1;
}
50% {
transform: scale(1.2);
opacity: 0.2;
}
100% {
transform: scale(1);
opacity: 1;
}
}
@keyframes spin {