mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-12-18 18:47:11 +00:00
Allow research mode and other conversation commands in automations (#1011)
Some checks are pending
dockerize / Publish Khoj Docker Images (push) Waiting to run
dockerize / manifest (push) Blocked by required conditions
build and deploy github pages for documentation / deploy (push) Waiting to run
pre-commit / Setup Application and Lint (push) Waiting to run
pypi / Publish Python Package to PyPI (push) Waiting to run
test / Run Tests (push) Waiting to run
Some checks are pending
dockerize / Publish Khoj Docker Images (push) Waiting to run
dockerize / manifest (push) Blocked by required conditions
build and deploy github pages for documentation / deploy (push) Waiting to run
pre-commit / Setup Application and Lint (push) Waiting to run
pypi / Publish Python Package to PyPI (push) Waiting to run
test / Run Tests (push) Waiting to run
Major --- Previously we couldn't enable research mode or use other slash commands in automated tasks. This change separates determining if a chat query is triggered via automated task from the (other) conversation commands to run the query with. This unlocks the ability to enable research mode in automations apart from other variations like /image or /diagram etc. Minor --- - Clean the code to get data sources and output formats - Have some examples shown on automations page run in research mode now
This commit is contained in:
parent
3b050a33bb
commit
63d2c6f35a
3 changed files with 41 additions and 40 deletions
|
@ -165,7 +165,7 @@ const suggestedAutomationsMetadata: AutomationsData[] = [
|
||||||
{
|
{
|
||||||
subject: "Weekly Newsletter",
|
subject: "Weekly Newsletter",
|
||||||
query_to_run:
|
query_to_run:
|
||||||
"Compile a message including: 1. A recap of news from last week 2. An at-home workout I can do before work 3. A quote to inspire me for the week ahead",
|
"/research Compile a message including: 1. A recap of news from last week 2. An at-home workout I can do before work 3. A quote to inspire me for the week ahead",
|
||||||
schedule: "9AM every Monday",
|
schedule: "9AM every Monday",
|
||||||
next: "Next run at 9AM on Monday",
|
next: "Next run at 9AM on Monday",
|
||||||
crontime: "0 9 * * 1",
|
crontime: "0 9 * * 1",
|
||||||
|
@ -185,7 +185,7 @@ const suggestedAutomationsMetadata: AutomationsData[] = [
|
||||||
{
|
{
|
||||||
subject: "Front Page of Hacker News",
|
subject: "Front Page of Hacker News",
|
||||||
query_to_run:
|
query_to_run:
|
||||||
"Summarize the top 5 posts from https://news.ycombinator.com/best and share them with me, including links",
|
"/research Summarize the top 5 posts from https://news.ycombinator.com/best and share them with me, including links",
|
||||||
schedule: "9PM on every Wednesday",
|
schedule: "9PM on every Wednesday",
|
||||||
next: "Next run at 9PM on Wednesday",
|
next: "Next run at 9PM on Wednesday",
|
||||||
crontime: "0 21 * * 3",
|
crontime: "0 21 * * 3",
|
||||||
|
@ -195,7 +195,7 @@ const suggestedAutomationsMetadata: AutomationsData[] = [
|
||||||
{
|
{
|
||||||
subject: "Market Summary",
|
subject: "Market Summary",
|
||||||
query_to_run:
|
query_to_run:
|
||||||
"Get the market summary for today and share it with me. Focus on tech stocks and the S&P 500.",
|
"/research Get the market summary for today and share it with me. Focus on tech stocks and the S&P 500.",
|
||||||
schedule: "9AM on every weekday",
|
schedule: "9AM on every weekday",
|
||||||
next: "Next run at 9AM on Monday",
|
next: "Next run at 9AM on Monday",
|
||||||
crontime: "0 9 * * *",
|
crontime: "0 9 * * *",
|
||||||
|
@ -214,7 +214,7 @@ const suggestedAutomationsMetadata: AutomationsData[] = [
|
||||||
{
|
{
|
||||||
subject: "Round-up of research papers about AI in healthcare",
|
subject: "Round-up of research papers about AI in healthcare",
|
||||||
query_to_run:
|
query_to_run:
|
||||||
"Summarize the top 3 research papers about AI in healthcare that were published in the last week. Include links to the full papers.",
|
"/research Summarize the top 3 research papers about AI in healthcare that were published in the last week. Include links to the full papers.",
|
||||||
schedule: "9AM every Friday",
|
schedule: "9AM every Friday",
|
||||||
next: "Next run at 9AM on Friday",
|
next: "Next run at 9AM on Friday",
|
||||||
crontime: "0 9 * * 5",
|
crontime: "0 9 * * 5",
|
||||||
|
|
|
@ -724,7 +724,16 @@ async def chat(
|
||||||
yield result
|
yield result
|
||||||
return
|
return
|
||||||
|
|
||||||
conversation_commands = [get_conversation_command(query=q, any_references=True)]
|
# Automated tasks are handled before to allow mixing them with other conversation commands
|
||||||
|
cmds_to_rate_limit = []
|
||||||
|
is_automated_task = False
|
||||||
|
if q.startswith("/automated_task"):
|
||||||
|
is_automated_task = True
|
||||||
|
q = q.replace("/automated_task", "").lstrip()
|
||||||
|
cmds_to_rate_limit += [ConversationCommand.AutomatedTask]
|
||||||
|
|
||||||
|
# Extract conversation command from query
|
||||||
|
conversation_commands = [get_conversation_command(query=q)]
|
||||||
|
|
||||||
conversation = await ConversationAdapters.aget_conversation_by_user(
|
conversation = await ConversationAdapters.aget_conversation_by_user(
|
||||||
user,
|
user,
|
||||||
|
@ -757,11 +766,8 @@ async def chat(
|
||||||
location = None
|
location = None
|
||||||
if city or region or country or country_code:
|
if city or region or country or country_code:
|
||||||
location = LocationData(city=city, region=region, country=country, country_code=country_code)
|
location = LocationData(city=city, region=region, country=country, country_code=country_code)
|
||||||
|
|
||||||
user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
meta_log = conversation.conversation_log
|
meta_log = conversation.conversation_log
|
||||||
is_automated_task = conversation_commands == [ConversationCommand.AutomatedTask]
|
|
||||||
|
|
||||||
researched_results = ""
|
researched_results = ""
|
||||||
online_results: Dict = dict()
|
online_results: Dict = dict()
|
||||||
|
@ -778,7 +784,7 @@ async def chat(
|
||||||
generated_excalidraw_diagram: str = None
|
generated_excalidraw_diagram: str = None
|
||||||
program_execution_context: List[str] = []
|
program_execution_context: List[str] = []
|
||||||
|
|
||||||
if conversation_commands == [ConversationCommand.Default] or is_automated_task:
|
if conversation_commands == [ConversationCommand.Default]:
|
||||||
chosen_io = await aget_data_sources_and_output_format(
|
chosen_io = await aget_data_sources_and_output_format(
|
||||||
q,
|
q,
|
||||||
meta_log,
|
meta_log,
|
||||||
|
@ -799,7 +805,8 @@ async def chat(
|
||||||
async for result in send_event(ChatEvent.STATUS, f"**Selected Tools:** {conversation_commands_str}"):
|
async for result in send_event(ChatEvent.STATUS, f"**Selected Tools:** {conversation_commands_str}"):
|
||||||
yield result
|
yield result
|
||||||
|
|
||||||
for cmd in conversation_commands:
|
cmds_to_rate_limit += conversation_commands
|
||||||
|
for cmd in cmds_to_rate_limit:
|
||||||
try:
|
try:
|
||||||
await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
|
await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
|
||||||
q = q.replace(f"/{cmd.value}", "").strip()
|
q = q.replace(f"/{cmd.value}", "").strip()
|
||||||
|
|
|
@ -231,7 +231,7 @@ def get_next_url(request: Request) -> str:
|
||||||
return urljoin(str(request.base_url).rstrip("/"), next_path)
|
return urljoin(str(request.base_url).rstrip("/"), next_path)
|
||||||
|
|
||||||
|
|
||||||
def get_conversation_command(query: str, any_references: bool = False) -> ConversationCommand:
|
def get_conversation_command(query: str) -> ConversationCommand:
|
||||||
if query.startswith("/notes"):
|
if query.startswith("/notes"):
|
||||||
return ConversationCommand.Notes
|
return ConversationCommand.Notes
|
||||||
elif query.startswith("/help"):
|
elif query.startswith("/help"):
|
||||||
|
@ -254,9 +254,6 @@ def get_conversation_command(query: str, any_references: bool = False) -> Conver
|
||||||
return ConversationCommand.Code
|
return ConversationCommand.Code
|
||||||
elif query.startswith("/research"):
|
elif query.startswith("/research"):
|
||||||
return ConversationCommand.Research
|
return ConversationCommand.Research
|
||||||
# If no relevant notes found for the given query
|
|
||||||
elif not any_references:
|
|
||||||
return ConversationCommand.General
|
|
||||||
else:
|
else:
|
||||||
return ConversationCommand.Default
|
return ConversationCommand.Default
|
||||||
|
|
||||||
|
@ -408,42 +405,39 @@ async def aget_data_sources_and_output_format(
|
||||||
response = clean_json(response)
|
response = clean_json(response)
|
||||||
response = json.loads(response)
|
response = json.loads(response)
|
||||||
|
|
||||||
selected_sources = [q.strip() for q in response.get("source", []) if q.strip()]
|
chosen_sources = [s.strip() for s in response.get("source", []) if s.strip()]
|
||||||
selected_output = response.get("output", "text").strip() # Default to text output
|
chosen_output = response.get("output", "text").strip() # Default to text output
|
||||||
|
|
||||||
if not isinstance(selected_sources, list) or not selected_sources or len(selected_sources) == 0:
|
if is_none_or_empty(chosen_sources) or not isinstance(chosen_sources, list):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Invalid response for determining relevant tools: {selected_sources}. Raw Response: {response}"
|
f"Invalid response for determining relevant tools: {chosen_sources}. Raw Response: {response}"
|
||||||
)
|
)
|
||||||
|
|
||||||
result: Dict = {"sources": [], "output": None if not is_task else ConversationCommand.AutomatedTask}
|
output_mode = ConversationCommand.Text
|
||||||
for selected_source in selected_sources:
|
# Verify selected output mode is enabled for the agent, as the LLM can sometimes get confused by the tool options.
|
||||||
# Add a double check to verify it's in the agent list, because the LLM sometimes gets confused by the tool options.
|
if chosen_output in output_options.keys() and (len(agent_outputs) == 0 or chosen_output in agent_outputs):
|
||||||
if (
|
# Ensure that the chosen output mode exists as a valid ConversationCommand
|
||||||
selected_source in source_options.keys()
|
output_mode = ConversationCommand(chosen_output)
|
||||||
and isinstance(result["sources"], list)
|
|
||||||
and (len(agent_sources) == 0 or selected_source in agent_sources)
|
|
||||||
):
|
|
||||||
# Check whether the tool exists as a valid ConversationCommand
|
|
||||||
result["sources"].append(ConversationCommand(selected_source))
|
|
||||||
|
|
||||||
# Add a double check to verify it's in the agent list, because the LLM sometimes gets confused by the tool options.
|
data_sources = []
|
||||||
if selected_output in output_options.keys() and (len(agent_outputs) == 0 or selected_output in agent_outputs):
|
# Verify selected data sources are enabled for the agent, as the LLM can sometimes get confused by the tool options.
|
||||||
# Check whether the tool exists as a valid ConversationCommand
|
for chosen_source in chosen_sources:
|
||||||
result["output"] = ConversationCommand(selected_output)
|
# Ensure that the chosen data source exists as a valid ConversationCommand
|
||||||
|
if chosen_source in source_options.keys() and (len(agent_sources) == 0 or chosen_source in agent_sources):
|
||||||
|
data_sources.append(ConversationCommand(chosen_source))
|
||||||
|
|
||||||
if is_none_or_empty(result):
|
# Fallback to default sources if the inferred data sources are unset or invalid
|
||||||
|
if is_none_or_empty(data_sources):
|
||||||
if len(agent_sources) == 0:
|
if len(agent_sources) == 0:
|
||||||
result = {"sources": [ConversationCommand.Default], "output": ConversationCommand.Text}
|
data_sources = [ConversationCommand.Default]
|
||||||
else:
|
else:
|
||||||
result = {"sources": [ConversationCommand.General], "output": ConversationCommand.Text}
|
data_sources = [ConversationCommand.General]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Invalid response for determining relevant tools: {response}. Error: {e}", exc_info=True)
|
logger.error(f"Invalid response for determining relevant tools: {response}. Error: {e}", exc_info=True)
|
||||||
sources = agent_sources if len(agent_sources) > 0 else [ConversationCommand.Default]
|
data_sources = agent_sources if len(agent_sources) > 0 else [ConversationCommand.Default]
|
||||||
output = agent_outputs[0] if len(agent_outputs) > 0 else ConversationCommand.Text
|
output_mode = agent_outputs[0] if len(agent_outputs) > 0 else ConversationCommand.Text
|
||||||
result = {"sources": sources, "output": output}
|
|
||||||
|
|
||||||
return result
|
return {"sources": data_sources, "output": output_mode}
|
||||||
|
|
||||||
|
|
||||||
async def infer_webpage_urls(
|
async def infer_webpage_urls(
|
||||||
|
@ -1686,7 +1680,7 @@ def scheduled_chat(
|
||||||
last_run_time = datetime.strptime(last_run_time, "%Y-%m-%d %I:%M %p %Z").replace(tzinfo=timezone.utc)
|
last_run_time = datetime.strptime(last_run_time, "%Y-%m-%d %I:%M %p %Z").replace(tzinfo=timezone.utc)
|
||||||
|
|
||||||
# If the last run time was within the last 6 hours, don't run it again. This helps avoid multithreading issues and rate limits.
|
# If the last run time was within the last 6 hours, don't run it again. This helps avoid multithreading issues and rate limits.
|
||||||
if (datetime.now(timezone.utc) - last_run_time).total_seconds() < 21600:
|
if (datetime.now(timezone.utc) - last_run_time).total_seconds() < 6 * 60 * 60:
|
||||||
logger.info(f"Skipping scheduled chat {job_id} as the next run time is in the future.")
|
logger.info(f"Skipping scheduled chat {job_id} as the next run time is in the future.")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue