mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Merge branch 'master' of github.com:debanjum/semantic-search into saba/configui
This commit is contained in:
commit
3d4471e107
8 changed files with 234 additions and 4 deletions
|
@ -13,3 +13,4 @@ dependencies:
|
||||||
- pytest=6.*
|
- pytest=6.*
|
||||||
- pillow=8.*
|
- pillow=8.*
|
||||||
- torchvision=0.*
|
- torchvision=0.*
|
||||||
|
- openai=0.*
|
|
@ -30,3 +30,9 @@ search-type:
|
||||||
|
|
||||||
image:
|
image:
|
||||||
encoder: "clip-ViT-B-32"
|
encoder: "clip-ViT-B-32"
|
||||||
|
|
||||||
|
processor:
|
||||||
|
conversation:
|
||||||
|
openai-api-key: null
|
||||||
|
conversation-logfile: "tests/data/.conversation_logs.json"
|
||||||
|
conversation-history: null
|
57
src/main.py
57
src/main.py
|
@ -1,5 +1,6 @@
|
||||||
# Standard Packages
|
# Standard Packages
|
||||||
import sys
|
import sys
|
||||||
|
import json
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
# External Packages
|
# External Packages
|
||||||
|
@ -10,13 +11,15 @@ from fastapi.templating import Jinja2Templates
|
||||||
|
|
||||||
# Internal Packages
|
# Internal Packages
|
||||||
from src.search_type import asymmetric, symmetric_ledger, image_search
|
from src.search_type import asymmetric, symmetric_ledger, image_search
|
||||||
from src.utils.helpers import get_from_dict
|
from src.utils.helpers import get_absolute_path
|
||||||
from src.utils.cli import cli
|
from src.utils.cli import cli
|
||||||
from src.utils.config import SearchType, SearchModels, TextSearchConfig, ImageSearchConfig, SearchConfig
|
from src.utils.config import SearchType, SearchModels, TextSearchConfig, ImageSearchConfig, SearchConfig, ProcessorConfig, ConversationProcessorConfig
|
||||||
|
from src.processor.conversation.gpt import converse, message_to_prompt
|
||||||
|
|
||||||
# Application Global State
|
# Application Global State
|
||||||
model = SearchModels()
|
model = SearchModels()
|
||||||
search_config = SearchConfig()
|
search_config = SearchConfig()
|
||||||
|
processor_config = ProcessorConfig()
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
# app.mount("/views", StaticFiles(directory="./views"), name="views")
|
# app.mount("/views", StaticFiles(directory="./views"), name="views")
|
||||||
|
@ -92,6 +95,20 @@ def regenerate(t: Optional[SearchType] = None):
|
||||||
return {'status': 'ok', 'message': 'regeneration completed'}
|
return {'status': 'ok', 'message': 'regeneration completed'}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get('/chat')
|
||||||
|
def chat(q: str):
|
||||||
|
# Load Conversation History
|
||||||
|
conversation_history = processor_config.conversation.conversation_history
|
||||||
|
|
||||||
|
# Converse with OpenAI GPT
|
||||||
|
gpt_response = converse(q, conversation_history, api_key=processor_config.conversation.openai_api_key)
|
||||||
|
|
||||||
|
# Update Conversation History
|
||||||
|
processor_config.conversation.conversation_history = message_to_prompt(q, conversation_history, gpt_response)
|
||||||
|
|
||||||
|
return {'status': 'ok', 'response': gpt_response}
|
||||||
|
|
||||||
|
|
||||||
def initialize_search(config, regenerate, verbose):
|
def initialize_search(config, regenerate, verbose):
|
||||||
model = SearchModels()
|
model = SearchModels()
|
||||||
search_config = SearchConfig()
|
search_config = SearchConfig()
|
||||||
|
@ -119,6 +136,39 @@ def initialize_search(config, regenerate, verbose):
|
||||||
return model, search_config
|
return model, search_config
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_processor(config, verbose):
|
||||||
|
processor_config = ProcessorConfig()
|
||||||
|
|
||||||
|
# Initialize Conversation Processor
|
||||||
|
processor_config.conversation = ConversationProcessorConfig.create_from_dictionary(config, ('processor', 'conversation'), verbose)
|
||||||
|
|
||||||
|
# Load or Initialize Conversation History from Disk
|
||||||
|
conversation_logfile = processor_config.conversation.conversation_logfile
|
||||||
|
if processor_config.conversation.verbose:
|
||||||
|
print('Saving conversation logs to disk...')
|
||||||
|
|
||||||
|
if conversation_logfile.expanduser().absolute().is_file():
|
||||||
|
with open(get_absolute_path(conversation_logfile), 'r') as f:
|
||||||
|
processor_config.conversation.conversation_history = json.load(f).get('chat', '')
|
||||||
|
else:
|
||||||
|
processor_config.conversation.conversation_history = ''
|
||||||
|
|
||||||
|
return processor_config
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event('shutdown')
|
||||||
|
def shutdown_event():
|
||||||
|
if processor_config.conversation.verbose:
|
||||||
|
print('Saving conversation logs to disk...')
|
||||||
|
|
||||||
|
# Save Conversation History to Disk
|
||||||
|
conversation_logfile = get_absolute_path(processor_config.conversation.conversation_logfile)
|
||||||
|
with open(conversation_logfile, "w+", encoding='utf-8') as logfile:
|
||||||
|
json.dump({"chat": processor_config.conversation.conversation_history}, logfile)
|
||||||
|
|
||||||
|
print('Conversation logs saved to disk.')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# Load config from CLI
|
# Load config from CLI
|
||||||
args = cli(sys.argv[1:])
|
args = cli(sys.argv[1:])
|
||||||
|
@ -126,6 +176,9 @@ if __name__ == '__main__':
|
||||||
# Initialize Search from Config
|
# Initialize Search from Config
|
||||||
model, search_config = initialize_search(args.config, args.regenerate, args.verbose)
|
model, search_config = initialize_search(args.config, args.regenerate, args.verbose)
|
||||||
|
|
||||||
|
# Initialize Processor from Config
|
||||||
|
processor_config = initialize_processor(args.config, args.verbose)
|
||||||
|
|
||||||
# Start Application Server
|
# Start Application Server
|
||||||
if args.socket:
|
if args.socket:
|
||||||
uvicorn.run(app, proxy_headers=True, uds=args.socket)
|
uvicorn.run(app, proxy_headers=True, uds=args.socket)
|
||||||
|
|
70
src/processor/conversation/gpt.py
Normal file
70
src/processor/conversation/gpt.py
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
# Standard Packages
|
||||||
|
import os
|
||||||
|
|
||||||
|
# External Packages
|
||||||
|
import openai
|
||||||
|
|
||||||
|
|
||||||
|
def understand(text, api_key=None, temperature=0.5, max_tokens=100):
|
||||||
|
"""
|
||||||
|
Understand user input using OpenAI's GPT
|
||||||
|
"""
|
||||||
|
# Initialize Variables
|
||||||
|
openai.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
||||||
|
understand_primer="Extract information from each chat message\n\nremember(memory-type, data);\nmemory-type=[\"companion\", \"notes\", \"ledger\", \"image\", \"music\"]\nsearch(search-type, data);\nsearch-type=[\"google\", \"youtube\"]\ngenerate(activity);\nactivity=[\"paint\",\"write\", \"chat\"]\ntrigger-emotion(emotion);\nemotion=[\"happy\",\"confidence\",\"fear\",\"surprise\",\"sadness\",\"disgust\",\"anger\", \"curiosity\", \"calm\"]\n\nQ: How are you doing?\nA: activity(\"chat\"); trigger-emotion(\"surprise\")\nQ: Do you remember what I told you about my brother Antoine when we were at the beach?\nA: remember(\"notes\", \"Brother Antoine when we were at the beach\"); trigger-emotion(\"curiosity\");\nQ: what did we talk about last time?\nA: remember(\"notes\", \"talk last time\"); trigger-emotion(\"curiosity\");\nQ: Let's make some drawings!\nA: generate(\"paint\"); trigger-emotion(\"happy\");\nQ: Do you know anything about Lebanon?\nA: search(\"google\", \"lebanon\"); trigger-emotion(\"confidence\");\nQ: Find a video about a panda rolling in the grass\nA: search(\"youtube\",\"panda rolling in the grass\"); trigger-emotion(\"happy\"); \nQ: Tell me a scary story\nA: generate(\"write\" \"A story about some adventure\"); trigger-emotion(\"fear\");\nQ: What fiction book was I reading last week about AI starship?\nA: remember(\"notes\", \"read fiction book about AI starship last week\"); trigger-emotion(\"curiosity\");\nQ: How much did I spend at Subway for dinner last time?\nA: remember(\"ledger\", \"last Subway dinner\"); trigger-emotion(\"curiosity\");\nQ: I'm feeling sleepy\nA: activity(\"chat\"); trigger-emotion(\"calm\")\nQ: What was that popular Sri lankan song that Alex showed me recently?\nA: remember(\"music\", \"popular Sri lankan song that Alex showed recently\"); trigger-emotion(\"curiosity\"); \nQ: You're pretty funny!\nA: activity(\"chat\"); trigger-emotion(\"pride\")"
|
||||||
|
|
||||||
|
# Setup Prompt with Understand Primer
|
||||||
|
prompt = message_to_prompt(text, understand_primer, start_sequence="\nA:", restart_sequence="\nQ:")
|
||||||
|
|
||||||
|
# Get Reponse from GPT
|
||||||
|
response = openai.Completion.create(
|
||||||
|
engine="davinci",
|
||||||
|
prompt=prompt,
|
||||||
|
temperature=temperature,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
top_p=1,
|
||||||
|
frequency_penalty=0.2,
|
||||||
|
presence_penalty=0,
|
||||||
|
stop=["\n"])
|
||||||
|
|
||||||
|
# Extract, Clean Message from GPT's Response
|
||||||
|
story = response['choices'][0]['text']
|
||||||
|
return str(story)
|
||||||
|
|
||||||
|
|
||||||
|
def converse(text, conversation_history=None, api_key=None, temperature=0.9, max_tokens=150):
|
||||||
|
"""
|
||||||
|
Converse with user using OpenAI's GPT
|
||||||
|
"""
|
||||||
|
# Initialize Variables
|
||||||
|
openai.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
||||||
|
|
||||||
|
start_sequence = "\nAI:"
|
||||||
|
restart_sequence = "\nHuman:"
|
||||||
|
conversation_primer = f"The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly companion.\n{restart_sequence} Hello, who are you?{start_sequence} Hi, I am an AI conversational companion created by OpenAI. How can I help you today?"
|
||||||
|
|
||||||
|
# Setup Prompt with Primer or Conversation History
|
||||||
|
prompt = message_to_prompt(text, conversation_history or conversation_primer, start_sequence=start_sequence, restart_sequence=restart_sequence)
|
||||||
|
|
||||||
|
# Get Response from GPT
|
||||||
|
response = openai.Completion.create(
|
||||||
|
engine="davinci",
|
||||||
|
prompt=prompt,
|
||||||
|
temperature=temperature,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
top_p=1,
|
||||||
|
frequency_penalty=0,
|
||||||
|
presence_penalty=0.6,
|
||||||
|
stop=["\n", " Human:", " AI:"])
|
||||||
|
|
||||||
|
# Extract, Clean Message from GPT's Response
|
||||||
|
story = response['choices'][0]['text']
|
||||||
|
return str(story).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def message_to_prompt(user_message, conversation_history="", gpt_message=None, start_sequence="\nAI:", restart_sequence="\nHuman:"):
|
||||||
|
"""Create prompt for GPT from message"""
|
||||||
|
if gpt_message:
|
||||||
|
return f"{conversation_history}{restart_sequence} {user_message}{start_sequence} {gpt_message}"
|
||||||
|
else:
|
||||||
|
return f"{conversation_history}{restart_sequence} {user_message}{start_sequence}"
|
|
@ -80,6 +80,15 @@ default_config = {
|
||||||
'image':
|
'image':
|
||||||
{
|
{
|
||||||
'encoder': "clip-ViT-B-32"
|
'encoder': "clip-ViT-B-32"
|
||||||
}
|
},
|
||||||
|
},
|
||||||
|
'processor':
|
||||||
|
{
|
||||||
|
'conversation':
|
||||||
|
{
|
||||||
|
'openai-api-key': "",
|
||||||
|
'conversation-logfile': ".conversation_logs.json",
|
||||||
|
'conversation-history': ""
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,3 +93,27 @@ class SearchConfig():
|
||||||
ledger: TextSearchConfig = None
|
ledger: TextSearchConfig = None
|
||||||
music: TextSearchConfig = None
|
music: TextSearchConfig = None
|
||||||
image: ImageSearchConfig = None
|
image: ImageSearchConfig = None
|
||||||
|
|
||||||
|
|
||||||
|
class ConversationProcessorConfig():
|
||||||
|
def __init__(self, conversation_logfile, conversation_history, openai_api_key, verbose):
|
||||||
|
self.openai_api_key = openai_api_key
|
||||||
|
self.conversation_logfile = conversation_logfile
|
||||||
|
self.conversation_history = conversation_history
|
||||||
|
self.verbose = verbose
|
||||||
|
|
||||||
|
def create_from_dictionary(config, key_tree, verbose):
|
||||||
|
conversation_config = get_from_dict(config, *key_tree)
|
||||||
|
if not conversation_config:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return ConversationProcessorConfig(
|
||||||
|
openai_api_key = conversation_config['openai-api-key'],
|
||||||
|
conversation_history = '',
|
||||||
|
conversation_logfile = Path(conversation_config['conversation-logfile']),
|
||||||
|
verbose = verbose)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ProcessorConfig():
|
||||||
|
conversation: ConversationProcessorConfig = None
|
63
tests/test_chatbot.py
Normal file
63
tests/test_chatbot.py
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
# External Packages
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
# Internal Packages
|
||||||
|
from src.processor.conversation.gpt import converse, understand, message_to_prompt
|
||||||
|
|
||||||
|
# Input your OpenAI API key to run the tests below
|
||||||
|
api_key = None
|
||||||
|
|
||||||
|
|
||||||
|
# Test
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
def test_message_to_understand_prompt():
|
||||||
|
# Setup
|
||||||
|
understand_primer = "Extract information from each chat message\n\nremember(memory-type, data);\nmemory-type=[\"companion\", \"notes\", \"ledger\", \"image\", \"music\"]\nsearch(search-type, data);\nsearch-type=[\"google\", \"youtube\"]\ngenerate(activity);\nactivity=[\"paint\",\"write\", \"chat\"]\ntrigger-emotion(emotion);\nemotion=[\"happy\",\"confidence\",\"fear\",\"surprise\",\"sadness\",\"disgust\",\"anger\", \"curiosity\", \"calm\"]\n\nQ: How are you doing?\nA: activity(\"chat\"); trigger-emotion(\"surprise\")\nQ: Do you remember what I told you about my brother Antoine when we were at the beach?\nA: remember(\"notes\", \"Brother Antoine when we were at the beach\"); trigger-emotion(\"curiosity\");\nQ: what did we talk about last time?\nA: remember(\"notes\", \"talk last time\"); trigger-emotion(\"curiosity\");\nQ: Let's make some drawings!\nA: generate(\"paint\"); trigger-emotion(\"happy\");\nQ: Do you know anything about Lebanon?\nA: search(\"google\", \"lebanon\"); trigger-emotion(\"confidence\");\nQ: Find a video about a panda rolling in the grass\nA: search(\"youtube\",\"panda rolling in the grass\"); trigger-emotion(\"happy\"); \nQ: Tell me a scary story\nA: generate(\"write\" \"A story about some adventure\"); trigger-emotion(\"fear\");\nQ: What fiction book was I reading last week about AI starship?\nA: remember(\"notes\", \"read fiction book about AI starship last week\"); trigger-emotion(\"curiosity\");\nQ: How much did I spend at Subway for dinner last time?\nA: remember(\"ledger\", \"last Subway dinner\"); trigger-emotion(\"curiosity\");\nQ: I'm feeling sleepy\nA: activity(\"chat\"); trigger-emotion(\"calm\")\nQ: What was that popular Sri lankan song that Alex showed me recently?\nA: remember(\"music\", \"popular Sri lankan song that Alex showed recently\"); trigger-emotion(\"curiosity\"); \nQ: You're pretty funny!\nA: activity(\"chat\"); trigger-emotion(\"pride\")"
|
||||||
|
expected_response = "Extract information from each chat message\n\nremember(memory-type, data);\nmemory-type=[\"companion\", \"notes\", \"ledger\", \"image\", \"music\"]\nsearch(search-type, data);\nsearch-type=[\"google\", \"youtube\"]\ngenerate(activity);\nactivity=[\"paint\",\"write\", \"chat\"]\ntrigger-emotion(emotion);\nemotion=[\"happy\",\"confidence\",\"fear\",\"surprise\",\"sadness\",\"disgust\",\"anger\", \"curiosity\", \"calm\"]\n\nQ: How are you doing?\nA: activity(\"chat\"); trigger-emotion(\"surprise\")\nQ: Do you remember what I told you about my brother Antoine when we were at the beach?\nA: remember(\"notes\", \"Brother Antoine when we were at the beach\"); trigger-emotion(\"curiosity\");\nQ: what did we talk about last time?\nA: remember(\"notes\", \"talk last time\"); trigger-emotion(\"curiosity\");\nQ: Let's make some drawings!\nA: generate(\"paint\"); trigger-emotion(\"happy\");\nQ: Do you know anything about Lebanon?\nA: search(\"google\", \"lebanon\"); trigger-emotion(\"confidence\");\nQ: Find a video about a panda rolling in the grass\nA: search(\"youtube\",\"panda rolling in the grass\"); trigger-emotion(\"happy\"); \nQ: Tell me a scary story\nA: generate(\"write\" \"A story about some adventure\"); trigger-emotion(\"fear\");\nQ: What fiction book was I reading last week about AI starship?\nA: remember(\"notes\", \"read fiction book about AI starship last week\"); trigger-emotion(\"curiosity\");\nQ: How much did I spend at Subway for dinner last time?\nA: remember(\"ledger\", \"last Subway dinner\"); trigger-emotion(\"curiosity\");\nQ: I'm feeling sleepy\nA: activity(\"chat\"); trigger-emotion(\"calm\")\nQ: What was that popular Sri lankan song that Alex showed me recently?\nA: remember(\"music\", \"popular Sri lankan song that Alex showed recently\"); trigger-emotion(\"curiosity\"); \nQ: You're pretty funny!\nA: activity(\"chat\"); trigger-emotion(\"pride\")\nQ: When did I last dine at Burger King?\nA:"
|
||||||
|
|
||||||
|
# Act
|
||||||
|
actual_response = message_to_prompt("When did I last dine at Burger King?", understand_primer, start_sequence="\nA:", restart_sequence="\nQ:")
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert actual_response == expected_response
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.skipif(api_key is None,
|
||||||
|
reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys")
|
||||||
|
def test_minimal_chat_with_gpt():
|
||||||
|
# Act
|
||||||
|
response = converse("What will happen when the stars go out?", api_key=api_key)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(response) > 0
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.skipif(api_key is None,
|
||||||
|
reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys")
|
||||||
|
def test_chat_with_history():
|
||||||
|
# Act
|
||||||
|
start_sequence="\nAI:"
|
||||||
|
restart_sequence="\nHuman:"
|
||||||
|
|
||||||
|
conversation_primer = f"The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly companion.\n{restart_sequence} Hello, I am testatron. Who are you?{start_sequence} Hi, I am an AI conversational companion created by OpenAI. How can I help you today?"
|
||||||
|
conversation_history = conversation_primer
|
||||||
|
|
||||||
|
response = converse("Can you tell me my name?", conversation_history=conversation_history, api_key=api_key, temperature=0, max_tokens=50)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(response) > 0
|
||||||
|
assert "Testatron" in response or "testatron" in response
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.skipif(api_key is None,
|
||||||
|
reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys")
|
||||||
|
def test_understand_message_using_gpt():
|
||||||
|
# Act
|
||||||
|
response = understand("When did I last dine at Subway?", api_key=api_key)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(response) > 0
|
||||||
|
assert "remember(\"ledger\", " in response
|
|
@ -1,3 +1,6 @@
|
||||||
|
# External Packages
|
||||||
|
import pytest
|
||||||
|
|
||||||
# Internal Packages
|
# Internal Packages
|
||||||
from src.main import model
|
from src.main import model
|
||||||
from src.search_type import image_search
|
from src.search_type import image_search
|
||||||
|
@ -17,6 +20,7 @@ def test_image_search_setup(search_config):
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.skip(reason="results inconsistent currently")
|
||||||
def test_image_search(search_config):
|
def test_image_search(search_config):
|
||||||
# Arrange
|
# Arrange
|
||||||
model.image_search = image_search.setup(search_config.image, regenerate=False)
|
model.image_search = image_search.setup(search_config.image, regenerate=False)
|
||||||
|
|
Loading…
Reference in a new issue