Extract defilter query into conversation utils for reuse

This commit is contained in:
Debanjum Singh Solanky 2024-10-10 00:27:27 -07:00
parent e69a8382f2
commit 2dc5804571
5 changed files with 16 additions and 9 deletions

View file

@ -14,6 +14,9 @@ from transformers import AutoTokenizer
from khoj.database.adapters import ConversationAdapters, ais_user_subscribed
from khoj.database.models import ChatModelOptions, ClientApplication, KhojUser
from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
from khoj.search_filter.date_filter import DateFilter
from khoj.search_filter.file_filter import FileFilter
from khoj.search_filter.word_filter import WordFilter
from khoj.utils import state
from khoj.utils.helpers import is_none_or_empty, merge_dicts
@ -320,3 +323,11 @@ def reciprocal_conversation_to_chatml(message_pair):
def remove_json_codeblock(response: str):
"""Remove any markdown json codeblock formatting if present. Useful for non schema enforceable models"""
return response.removeprefix("```json").removesuffix("```")
def defilter_query(query: str):
"""Remove any query filters in query"""
defiltered_query = query
for filter in [DateFilter(), WordFilter(), FileFilter()]:
defiltered_query = filter.defilter(defiltered_query)
return defiltered_query

View file

@ -42,6 +42,7 @@ from khoj.processor.conversation.offline.chat_model import extract_questions_off
from khoj.processor.conversation.offline.whisper import transcribe_audio_offline
from khoj.processor.conversation.openai.gpt import extract_questions
from khoj.processor.conversation.openai.whisper import transcribe_audio
from khoj.processor.conversation.utils import defilter_query
from khoj.routers.helpers import (
ApiUserRateLimiter,
ChatEvent,
@ -375,9 +376,7 @@ async def extract_references_and_questions(
return
# Extract filter terms from user message
defiltered_query = q
for filter in [DateFilter(), WordFilter(), FileFilter()]:
defiltered_query = filter.defilter(defiltered_query)
defiltered_query = defilter_query(q)
filters_in_query = q.replace(defiltered_query, "").strip()
conversation = await sync_to_async(ConversationAdapters.get_conversation_by_id)(conversation_id)

View file

@ -24,7 +24,7 @@ from khoj.database.adapters import (
)
from khoj.database.models import Agent, KhojUser
from khoj.processor.conversation.prompts import help_message, no_entries_found
from khoj.processor.conversation.utils import save_to_conversation_log
from khoj.processor.conversation.utils import defilter_query, save_to_conversation_log
from khoj.processor.image.generate import text_to_image
from khoj.processor.speech.text_to_speech import generate_text_to_speech
from khoj.processor.tools.online_search import read_webpages, search_online
@ -700,7 +700,7 @@ async def chat(
## Extract Document References
compiled_references: List[Any] = []
inferred_queries: List[Any] = []
defiltered_query: str = None
defiltered_query = defilter_query(q)
if conversation_commands == [ConversationCommand.Default] or is_automated_task:
async for research_result in execute_information_collection(

View file

@ -7,8 +7,6 @@ from math import inf
from typing import List, Tuple
import dateparser as dtparse
from dateparser.search import search_dates
from dateparser_data.settings import default_parsers
from dateutil.relativedelta import relativedelta
from khoj.search_filter.base_filter import BaseFilter

View file

@ -1,11 +1,10 @@
import fnmatch
import logging
import re
from collections import defaultdict
from typing import List
from khoj.search_filter.base_filter import BaseFilter
from khoj.utils.helpers import LRU, timer
from khoj.utils.helpers import LRU
logger = logging.getLogger(__name__)