mirror of
https://github.com/khoj-ai/khoj.git
synced 2025-02-17 08:04:21 +00:00
Transcribe only when keyboard shortcut or button pressed in Obsidian
- Transcribe on holding Ctrl+s keyboard shortcut - Transcribe on holding the transcribe button pressed via mouse too - Make the transcribe button robust to inadvertent touches by using timeout - Do not transcribe, trigger auto-send on silences. Silence detection is super rudimentary, just blocks standard emanations by whisper when no speech
This commit is contained in:
parent
0eb000c3ea
commit
3e0c882e27
2 changed files with 80 additions and 11 deletions
|
@ -24,13 +24,13 @@ export class KhojChatView extends KhojPaneView {
|
|||
setting: KhojSetting;
|
||||
waitingForLocation: boolean;
|
||||
location: Location;
|
||||
keyPressTimeout: NodeJS.Timeout | null = null;
|
||||
|
||||
constructor(leaf: WorkspaceLeaf, setting: KhojSetting) {
|
||||
super(leaf, setting);
|
||||
|
||||
// Register Modal Keybindings to send voice message
|
||||
// Register chat view keybindings
|
||||
this.scope = new Scope(this.app.scope);
|
||||
this.scope.register(["Mod"], 's', async (event) => { await this.speechToText(event); });
|
||||
|
||||
this.waitingForLocation = true;
|
||||
|
||||
|
@ -124,6 +124,10 @@ export class KhojChatView extends KhojPaneView {
|
|||
chatInput.addEventListener('input', (_) => { this.onChatInput() });
|
||||
chatInput.addEventListener('keydown', (event) => { this.incrementalChat(event) });
|
||||
|
||||
// Add event listeners for long press keybinding
|
||||
this.contentEl.addEventListener('keydown', this.handleKeyDown.bind(this));
|
||||
this.contentEl.addEventListener('keyup', this.handleKeyUp.bind(this));
|
||||
|
||||
let transcribe = inputRow.createEl("button", {
|
||||
text: "Transcribe",
|
||||
attr: {
|
||||
|
@ -131,7 +135,8 @@ export class KhojChatView extends KhojPaneView {
|
|||
class: "khoj-transcribe khoj-input-row-button clickable-icon ",
|
||||
},
|
||||
})
|
||||
transcribe.addEventListener('mousedown', async (event) => { await this.speechToText(event) });
|
||||
transcribe.addEventListener('mousedown', (event) => { this.startSpeechToText(event) });
|
||||
transcribe.addEventListener('mouseup', async (event) => { await this.stopSpeechToText(event) });
|
||||
transcribe.addEventListener('touchstart', async (event) => { await this.speechToText(event) });
|
||||
transcribe.addEventListener('touchend', async (event) => { await this.speechToText(event) });
|
||||
transcribe.addEventListener('touchcancel', async (event) => { await this.speechToText(event) });
|
||||
|
@ -165,6 +170,46 @@ export class KhojChatView extends KhojPaneView {
|
|||
});
|
||||
}
|
||||
|
||||
startSpeechToText(event: KeyboardEvent | MouseEvent | TouchEvent, timeout=200) {
|
||||
if (!this.keyPressTimeout) {
|
||||
this.keyPressTimeout = setTimeout(async () => {
|
||||
// Reset auto send voice message timer, UI if running
|
||||
if (this.sendMessageTimeout) {
|
||||
// Stop the auto send voice message countdown timer UI
|
||||
clearTimeout(this.sendMessageTimeout);
|
||||
const sendButton = <HTMLButtonElement>this.contentEl.getElementsByClassName("khoj-chat-send")[0]
|
||||
setIcon(sendButton, "arrow-up-circle")
|
||||
let sendImg = <SVGElement>sendButton.getElementsByClassName("lucide-arrow-up-circle")[0]
|
||||
sendImg.addEventListener('click', async (_) => { await this.chat() });
|
||||
// Reset chat input value
|
||||
const chatInput = <HTMLTextAreaElement>this.contentEl.getElementsByClassName("khoj-chat-input")[0];
|
||||
chatInput.value = "";
|
||||
}
|
||||
// Start new voice message
|
||||
await this.speechToText(event);
|
||||
}, timeout);
|
||||
}
|
||||
}
|
||||
async stopSpeechToText(event: KeyboardEvent | MouseEvent | TouchEvent) {
|
||||
if (this.mediaRecorder) {
|
||||
await this.speechToText(event);
|
||||
}
|
||||
if (this.keyPressTimeout) {
|
||||
clearTimeout(this.keyPressTimeout);
|
||||
this.keyPressTimeout = null;
|
||||
}
|
||||
}
|
||||
|
||||
handleKeyDown(event: KeyboardEvent) {
|
||||
// Start speech to text if keyboard shortcut is pressed
|
||||
if (event.key === 's' && event.getModifierState('Control')) this.startSpeechToText(event);
|
||||
}
|
||||
|
||||
async handleKeyUp(event: KeyboardEvent) {
|
||||
// Stop speech to text if keyboard shortcut is released
|
||||
if (event.key === 's' && event.getModifierState('Control')) await this.stopSpeechToText(event);
|
||||
}
|
||||
|
||||
processOnlineReferences(referenceSection: HTMLElement, onlineContext: any) {
|
||||
let numOnlineReferences = 0;
|
||||
for (let subquery in onlineContext) {
|
||||
|
@ -993,9 +1038,19 @@ export class KhojChatView extends KhojPaneView {
|
|||
});
|
||||
|
||||
// Parse response from Khoj backend
|
||||
let noSpeechText: string[] = [
|
||||
"Thanks for watching!",
|
||||
"Thanks for watching.",
|
||||
"Thank you for watching!",
|
||||
"Thank you for watching.",
|
||||
"You",
|
||||
"Bye."
|
||||
];
|
||||
let noSpeech: boolean = false;
|
||||
if (response.status === 200) {
|
||||
console.log(response);
|
||||
chatInput.value += response.json.text.trimStart();
|
||||
noSpeech = noSpeechText.includes(response.json.text.trimStart());
|
||||
if (!noSpeech) chatInput.value += response.json.text.trimStart();
|
||||
this.autoResize();
|
||||
} else if (response.status === 501) {
|
||||
throw new Error("⛔️ Configure speech-to-text model on server.");
|
||||
|
@ -1005,8 +1060,8 @@ export class KhojChatView extends KhojPaneView {
|
|||
throw new Error("⛔️ Failed to transcribe audio.");
|
||||
}
|
||||
|
||||
// Don't auto-send empty messages
|
||||
if (chatInput.value.length === 0) return;
|
||||
// Don't auto-send empty messages or when no speech is detected
|
||||
if (chatInput.value.length === 0 || noSpeech) return;
|
||||
|
||||
// Show stop auto-send button. It stops auto-send when clicked
|
||||
setIcon(sendButton, "stop-circle");
|
||||
|
@ -1044,19 +1099,19 @@ export class KhojChatView extends KhojPaneView {
|
|||
});
|
||||
|
||||
this.mediaRecorder.start();
|
||||
setIcon(transcribeButton, "mic-off");
|
||||
// setIcon(transcribeButton, "mic-off");
|
||||
transcribeButton.classList.add("loading-encircle")
|
||||
};
|
||||
|
||||
// Toggle recording
|
||||
if (!this.mediaRecorder || this.mediaRecorder.state === 'inactive' || event.type === 'touchstart') {
|
||||
if (!this.mediaRecorder || this.mediaRecorder.state === 'inactive' || event.type === 'touchstart' || event.type === 'mousedown' || event.type === 'keydown') {
|
||||
navigator.mediaDevices
|
||||
.getUserMedia({ audio: true })
|
||||
?.then(handleRecording)
|
||||
.catch((e) => {
|
||||
this.flashStatusInChatInput("⛔️ Failed to access microphone");
|
||||
});
|
||||
} else if (this.mediaRecorder.state === 'recording' || event.type === 'touchend' || event.type === 'touchcancel') {
|
||||
} else if (this.mediaRecorder?.state === 'recording' || event.type === 'touchend' || event.type === 'touchcancel' || event.type === 'mouseup' || event.type === 'keyup') {
|
||||
this.mediaRecorder.stop();
|
||||
this.mediaRecorder.stream.getTracks().forEach(track => track.stop());
|
||||
this.mediaRecorder = undefined;
|
||||
|
|
|
@ -613,9 +613,23 @@ img.copy-icon {
|
|||
margin-top: -16px;
|
||||
margin-left: -16px;
|
||||
border: 4px solid transparent;
|
||||
border-top-color: var(--icon-color-active);
|
||||
border-color: var(--icon-color-active);
|
||||
border-radius: 50%;
|
||||
animation: spin 1s linear infinite;
|
||||
animation: pulse 3s ease-in-out infinite;
|
||||
}
|
||||
@keyframes pulse {
|
||||
0% {
|
||||
transform: scale(1);
|
||||
opacity: 1;
|
||||
}
|
||||
50% {
|
||||
transform: scale(1.2);
|
||||
opacity: 0.2;
|
||||
}
|
||||
100% {
|
||||
transform: scale(1);
|
||||
opacity: 1;
|
||||
}
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
|
|
Loading…
Add table
Reference in a new issue