Merge pull request #534 from khoj-ai/features/code-config-cleanup

Small fixes and update config UI to manage indexed data
This commit is contained in:
sabaimran 2023-11-05 15:45:45 -08:00 committed by GitHub
commit 81a615d7dd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 538 additions and 800 deletions

View file

@ -61,7 +61,7 @@ jobs:
env:
DEBIAN_FRONTEND: noninteractive
run: |
apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0
apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
- name: ⬇️ Install Postgres
env:

View file

@ -4,7 +4,7 @@ FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
LABEL org.opencontainers.image.source https://github.com/khoj-ai/khoj
# Install System Dependencies
RUN apt update -y && apt -y install python3-pip git
RUN apt update -y && apt -y install python3-pip git libsqlite3-0 ffmpeg libsm6 libxext6
WORKDIR /app

View file

@ -73,6 +73,7 @@ dependencies = [
"gunicorn == 21.2.0",
"lxml == 4.9.3",
"tzdata == 2023.3",
"rapidocr-onnxruntime == 1.3.8"
]
dynamic = ["version"]

View file

@ -291,8 +291,11 @@ class EntryAdapters:
return deleted_count
@staticmethod
def delete_all_entries(user: KhojUser, file_type: str):
deleted_count, _ = Entry.objects.filter(user=user, file_type=file_type).delete()
def delete_all_entries(user: KhojUser, file_type: str = None):
if file_type is None:
deleted_count, _ = Entry.objects.filter(user=user).delete()
else:
deleted_count, _ = Entry.objects.filter(user=user, file_type=file_type).delete()
return deleted_count
@staticmethod
@ -314,6 +317,18 @@ class EntryAdapters:
async def user_has_entries(user: KhojUser):
return await Entry.objects.filter(user=user).aexists()
@staticmethod
async def adelete_entry_by_file(user: KhojUser, file_path: str):
return await Entry.objects.filter(user=user, file_path=file_path).adelete()
@staticmethod
def aget_all_filenames(user: KhojUser):
return Entry.objects.filter(user=user).distinct("file_path").values_list("file_path", flat=True)
@staticmethod
async def adelete_all_entries(user: KhojUser):
return await Entry.objects.filter(user=user).adelete()
@staticmethod
def apply_filters(user: KhojUser, query: str, file_type_filter: str = None):
q_filter_terms = Q()

View file

@ -103,21 +103,6 @@ img.khoj-logo {
justify-self: center;
}
a.khoj-banner {
color: black;
text-decoration: none;
}
p.khoj-banner {
font-size: small;
margin: 0;
padding: 10px;
}
p#khoj-banner {
display: inline;
}
@media only screen and (max-width: 600px) {
div.khoj-header {
display: grid;

View file

@ -274,8 +274,9 @@
}
</script>
<body>
<div id="khoj-banner-container" class="khoj-banner-container">
<div id="khoj-empty-container" class="khoj-empty-container">
</div>
<!--Add Header Logo and Nav Pane-->
<div class="khoj-header">
<a class="khoj-logo" href="/">
@ -454,6 +455,11 @@
border-bottom: 1px dotted #475569;
}
div.khoj-empty-container {
padding: 0;
margin: 0;
}
@media (pointer: coarse), (hover: none) {
abbr[title] {
position: relative;
@ -490,12 +496,6 @@
margin: 4px;
grid-template-columns: auto;
}
a.khoj-banner {
display: block;
}
p.khoj-banner {
padding: 0;
}
}
@media only screen and (min-width: 600px) {
body {
@ -507,11 +507,6 @@
}
}
div.khoj-banner-container {
padding: 0px;
margin: 0px;
}
div#chat-tooltip {
text-align: left;
font-size: medium;
@ -533,23 +528,6 @@
text-align: center;
}
button#khoj-banner-submit,
input#khoj-banner-email {
padding: 10px;
border-radius: 5px;
border: 1px solid #475569;
background: #f9fafc;
}
button#khoj-banner-submit:hover,
input#khoj-banner-email:hover {
box-shadow: 0 0 11px #aaa;
}
div.khoj-banner-container-hidden {
margin: 0px;
padding: 0px;
}
div.programmatic-output {
background-color: #f5f5f5;
border: 1px solid #ddd;

View file

@ -362,35 +362,6 @@
gap: 4px;
}
</style>
<script>
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
khojBannerSubmit?.addEventListener("click", function(event) {
event.preventDefault();
var email = document.getElementById("khoj-banner-email").value;
fetch("https://app.khoj.dev/beta/users/", {
method: "POST",
body: JSON.stringify({
email: email
}),
headers: {
"Content-Type": "application/json"
}
}).then(function(response) {
return response.json();
}).then(function(data) {
console.log(data);
if (data.user != null) {
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
document.getElementById("khoj-banner-submit").remove();
} else {
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
}
}).catch(function(error) {
console.log(error);
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
});
});
</script>
<script src="./renderer.js"></script>
</html>

View file

@ -436,14 +436,6 @@
max-width: 90%;
}
div.khoj-banner-container {
background: linear-gradient(-45deg, #FFC107, #FF9800, #FF5722, #FF9800, #FFC107);
background-size: 400% 400%;
animation: gradient 15s ease infinite;
text-align: center;
padding: 10px;
}
@keyframes gradient {
0% {
background-position: 0% 50%;
@ -460,57 +452,5 @@
text-align: center;
}
button#khoj-banner-submit,
input#khoj-banner-email {
padding: 10px;
border-radius: 5px;
border: 1px solid #475569;
background: #f9fafc;
}
button#khoj-banner-submit:hover,
input#khoj-banner-email:hover {
box-shadow: 0 0 11px #aaa;
}
@media only screen and (max-width: 600px) {
a.khoj-banner {
display: block;
}
p.khoj-banner {
padding: 0;
}
}
</style>
<script>
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
khojBannerSubmit?.addEventListener("click", function(event) {
event.preventDefault();
var email = document.getElementById("khoj-banner-email").value;
fetch("https://app.khoj.dev/beta/users/", {
method: "POST",
body: JSON.stringify({
email: email
}),
headers: {
"Content-Type": "application/json"
}
}).then(function(response) {
return response.json();
}).then(function(data) {
console.log(data);
if (data.user != null) {
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
document.getElementById("khoj-banner-submit").remove();
} else {
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
}
}).catch(function(error) {
console.log(error);
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
});
});
</script>
</html>

View file

@ -159,24 +159,22 @@ def configure_middleware(app):
app.add_middleware(SessionMiddleware, secret_key=os.environ.get("KHOJ_DJANGO_SECRET_KEY", "!secret"))
if not state.demo:
@schedule.repeat(schedule.every(61).minutes)
def update_search_index():
try:
logger.info("📬 Updating content index via Scheduler")
for user in get_all_users():
all_files = collect_files(user=user)
state.content_index = configure_content(
state.content_index, state.config.content_type, all_files, state.search_models, user=user
)
all_files = collect_files(user=None)
@schedule.repeat(schedule.every(61).minutes)
def update_search_index():
try:
logger.info("📬 Updating content index via Scheduler")
for user in get_all_users():
all_files = collect_files(user=user)
state.content_index = configure_content(
state.content_index, state.config.content_type, all_files, state.search_models, user=None
state.content_index, state.config.content_type, all_files, state.search_models, user=user
)
logger.info("📪 Content index updated via Scheduler")
except Exception as e:
logger.error(f"🚨 Error updating content index via Scheduler: {e}", exc_info=True)
all_files = collect_files(user=None)
state.content_index = configure_content(
state.content_index, state.config.content_type, all_files, state.search_models, user=None
)
logger.info("📪 Content index updated via Scheduler")
except Exception as e:
logger.error(f"🚨 Error updating content index via Scheduler: {e}", exc_info=True)
def configure_search_types(config: FullConfig):

View file

@ -106,21 +106,6 @@ img.khoj-logo {
justify-self: center;
}
a.khoj-banner {
color: black;
text-decoration: none;
}
p.khoj-banner {
font-size: medium;
margin: 0;
padding: 10px;
}
p#khoj-banner {
display: inline;
}
/* Dropdown in navigation menu*/
#khoj-nav-menu-container {
display: flex;

View file

@ -53,10 +53,10 @@
justify-self: center;
}
.api-settings {
div.section-manage-files,
div.api-settings {
display: grid;
grid-template-columns: 1fr;
grid-template-rows: 1fr 1fr auto;
justify-items: start;
gap: 8px;
padding: 24px 24px;
@ -64,13 +64,23 @@
border: 1px solid rgb(229, 229, 229);
border-radius: 4px;
box-shadow: 0px 1px 3px 0px rgba(0,0,0,0.1),0px 1px 2px -1px rgba(0,0,0,0.8);
}
#api-settings-card-description {
}
div.section-manage-files {
width: 640px;
}
div.api-settings {
grid-template-rows: 1fr 1fr auto;
}
#api-settings-card-description {
margin: 8px 0 0 0;
}
#api-settings-keys-table {
}
#api-settings-keys-table {
margin-bottom: 16px;
}
}
div.instructions {
font-size: large;
@ -184,6 +194,37 @@
text-align: left;
}
button.remove-file-button:hover {
background-color: rgb(255 235 235);
border-radius: 3px;
border: none;
color: var(--flower);
padding: 4px;
cursor: pointer;
}
button.remove-file-button {
background-color: rgb(253 214 214);
border-radius: 3px;
border: none;
color: var(--flower);
padding: 4px;
}
div.file-element {
display: grid;
grid-template-columns: 1fr auto;
border: 1px solid rgb(229, 229, 229);
border-radius: 4px;
box-shadow: 0px 1px 3px 0px rgba(0,0,0,0.1),0px 1px 2px -1px rgba(0,0,0,0.8);
padding: 4px;
margin-bottom: 8px;
}
div.remove-button-container {
text-align: right;
}
button.card-button.happy {
color: var(--leaf);
}
@ -246,6 +287,11 @@
cursor: pointer;
}
a {
color: #3b82f6;
text-decoration: none;
}
@media screen and (max-width: 700px) {
.section-cards {
grid-template-columns: 1fr;
@ -255,7 +301,7 @@
body {
display: grid;
grid-template-columns: 1fr;
grid-template-rows: 1fr auto auto auto minmax(80px, 100%);
grid-template-rows: 1fr repeat(4, auto);
}
body > * {
grid-column: 1;
@ -281,9 +327,14 @@
grid-template-columns: auto;
}
div.section-manage-files,
div.api-settings {
width: auto;
}
div.finalize-buttons {
padding: 0;
}
}
</style>
</html>

View file

@ -165,7 +165,7 @@
function incrementalChat(event) {
if (!event.shiftKey && event.key === 'Enter') {
e.preventDefault();
event.preventDefault();
chat();
}
}
@ -261,17 +261,7 @@
}
</script>
<body>
<div id="khoj-banner-container" class="khoj-banner-container">
{% if demo %}
<!-- Banner linking to https://khoj.dev -->
<a class="khoj-banner" href="https://khoj.dev" target="_blank">
<p id="khoj-banner" class="khoj-banner">
Enroll in Khoj cloud to get your own assistant
</p>
</a>
<input type="text" id="khoj-banner-email" placeholder="email" class="khoj-banner-email"></input>
<button id="khoj-banner-submit" class="khoj-banner-button">Submit</button>
{% endif %}
<div id="khoj-empty-container" class="khoj-empty-container">
</div>
<!--Add Header Logo and Nav Pane-->
@ -480,12 +470,6 @@
margin: 4px;
grid-template-columns: auto;
}
a.khoj-banner {
display: block;
}
p.khoj-banner {
padding: 0;
}
}
@media only screen and (min-width: 700px) {
body {
@ -497,14 +481,6 @@
}
}
div.khoj-banner-container {
background: linear-gradient(-45deg, #FFC107, #FF9800, #FF5722, #FF9800, #FFC107);
background-size: 400% 400%;
animation: gradient 15s ease infinite;
text-align: center;
padding: 10px;
}
div#chat-tooltip {
text-align: left;
font-size: medium;
@ -526,19 +502,7 @@
text-align: center;
}
button#khoj-banner-submit,
input#khoj-banner-email {
padding: 10px;
border-radius: 5px;
border: 1px solid var(--main-text-color);
background: #f9fafc;
}
button#khoj-banner-submit:hover,
input#khoj-banner-email:hover {
box-shadow: 0 0 11px #aaa;
}
div.khoj-banner-container-hidden {
div.khoj-empty-container {
margin: 0px;
padding: 0px;
}
@ -558,39 +522,4 @@
white-space: pre-wrap;
}
</style>
<script>
if ("{{demo}}" === "False") {
document.getElementById("khoj-banner-container").classList.remove("khoj-banner-container");
document.getElementById("khoj-banner-container").classList.add("khoj-banner-container-hidden");
}
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
khojBannerSubmit?.addEventListener("click", function(event) {
event.preventDefault();
var email = document.getElementById("khoj-banner-email").value;
fetch("https://app.khoj.dev/beta/users/", {
method: "POST",
body: JSON.stringify({
email: email
}),
headers: {
"Content-Type": "application/json"
}
}).then(function(response) {
return response.json();
}).then(function(data) {
console.log(data);
if (data.user != null) {
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
document.getElementById("khoj-banner-submit").remove();
} else {
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
}
}).catch(function(error) {
console.log(error);
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
});
});
</script>
</html>

View file

@ -67,130 +67,6 @@
</div>
{% endif %}
</div>
<div class="card">
<div class="card-title-row">
<img class="card-icon" src="/static/assets/icons/markdown.svg" alt="markdown">
<h3 class="card-title">
Markdown
{% if current_model_state.markdown == True%}
<img id="configured-icon-markdown" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
{% endif %}
</h3>
</div>
<div class="card-description-row">
<p class="card-description">Set markdown files to index</p>
</div>
<div class="card-action-row">
<a class="card-button" href="/config/content_type/markdown">
{% if current_model_state.markdown %}
Update
{% else %}
Setup
{% endif %}
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
</a>
</div>
{% if current_model_state.markdown %}
<div id="clear-markdown" class="card-action-row">
<button class="card-button" onclick="clearContentType('markdown')">
Disable
</button>
</div>
{% endif %}
</div>
<div class="card">
<div class="card-title-row">
<img class="card-icon" src="/static/assets/icons/org.svg" alt="org">
<h3 class="card-title">
Org
{% if current_model_state.org == True %}
<img id="configured-icon-org" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
{% endif %}
</h3>
</div>
<div class="card-description-row">
<p class="card-description">Set org files to index</p>
</div>
<div class="card-action-row">
<a class="card-button" href="/config/content_type/org">
{% if current_model_state.org %}
Update
{% else %}
Setup
{% endif %}
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
</a>
</div>
{% if current_model_state.org %}
<div id="clear-org" class="card-action-row">
<button class="card-button" onclick="clearContentType('org')">
Disable
</button>
</div>
{% endif %}
</div>
<div class="card">
<div class="card-title-row">
<img class="card-icon" src="/static/assets/icons/pdf.svg" alt="PDF">
<h3 class="card-title">
PDF
{% if current_model_state.pdf == True %}
<img id="configured-icon-pdf" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
{% endif %}
</h3>
</div>
<div class="card-description-row">
<p class="card-description">Set PDF files to index</p>
</div>
<div class="card-action-row">
<a class="card-button" href="/config/content_type/pdf">
{% if current_model_state.pdf %}
Update
{% else %}
Setup
{% endif %}
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
</a>
</div>
{% if current_model_state.pdf %}
<div id="clear-pdf" class="card-action-row">
<button class="card-button" onclick="clearContentType('pdf')">
Disable
</button>
</div>
{% endif %}
</div>
<div class="card">
<div class="card-title-row">
<img class="card-icon" src="/static/assets/icons/plaintext.svg" alt="Plaintext">
<h3 class="card-title">
Plaintext
{% if current_model_state.plaintext == True %}
<img id="configured-icon-plaintext" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
{% endif %}
</h3>
</div>
<div class="card-description-row">
<p class="card-description">Set Plaintext files to index</p>
</div>
<div class="card-action-row">
<a class="card-button" href="/config/content_type/plaintext">
{% if current_model_state.plaintext %}
Update
{% else %}
Setup
{% endif %}
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
</a>
</div>
{% if current_model_state.plaintext %}
<div id="clear-plaintext" class="card-action-row">
<button class="card-button" onclick="clearContentType('plaintext')">
Disable
</button>
</div>
{% endif %}
</div>
</div>
</div>
<div class="section">
@ -246,6 +122,16 @@
</div>
</div>
</div>
<div class="section">
<h2 class="section-title">Manage Data</h2>
<div class="section-manage-files">
<div id="delete-all-files" class="delete-all=files">
<button id="delete-all-files" type="submit" title="Delete all indexed files">🗑️ Remove All</button>
</div>
<div class="indexed-files">
</div>
</div>
</div>
<div class="section general-settings">
<div id="results-count" title="Number of items to show in search and use for chat response">
<label for="results-count-slider">Results Count: <span id="results-count-value">5</span></label>
@ -291,8 +177,8 @@
};
function clearContentType(content_type) {
fetch('/api/delete/config/data/content_type/' + content_type, {
method: 'POST',
fetch('/api/config/data/content_type/' + content_type, {
method: 'DELETE',
headers: {
'Content-Type': 'application/json',
}
@ -462,5 +348,84 @@
// List user's API keys on page load
listApiKeys();
function removeFile(path) {
fetch('/api/config/data/file?filename=' + path, {
method: 'DELETE',
headers: {
'Content-Type': 'application/json',
}
})
.then(response => response.json())
.then(data => {
if (data.status == "ok") {
getAllFilenames();
}
})
}
// Get all currently indexed files
function getAllFilenames() {
fetch('/api/config/data/all')
.then(response => response.json())
.then(data => {
var indexedFiles = document.getElementsByClassName("indexed-files")[0];
indexedFiles.innerHTML = "";
if (data.length == 0) {
document.getElementById("delete-all-files").style.display = "none";
indexedFiles.innerHTML = "<div>Use the <a href='https://download.khoj.dev'>Khoj Desktop client</a> to index files.</div>";
} else {
document.getElementById("delete-all-files").style.display = "block";
}
for (var filename of data) {
let fileElement = document.createElement("div");
fileElement.classList.add("file-element");
let fileNameElement = document.createElement("div");
fileNameElement.classList.add("content-name");
fileNameElement.innerHTML = filename;
fileElement.appendChild(fileNameElement);
let buttonContainer = document.createElement("div");
buttonContainer.classList.add("remove-button-container");
let removeFileButton = document.createElement("button");
removeFileButton.classList.add("remove-file-button");
removeFileButton.innerHTML = "🗑️";
removeFileButton.addEventListener("click", ((filename) => {
return () => {
removeFile(filename);
};
})(filename));
buttonContainer.appendChild(removeFileButton);
fileElement.appendChild(buttonContainer);
indexedFiles.appendChild(fileElement);
}
})
.catch((error) => {
console.error('Error:', error);
});
}
// Get all currently indexed files on page load
getAllFilenames();
let deleteAllFilesButton = document.getElementById("delete-all-files");
deleteAllFilesButton.addEventListener("click", function(event) {
event.preventDefault();
fetch('/api/config/data/all', {
method: 'DELETE',
headers: {
'Content-Type': 'application/json',
}
})
.then(response => response.json())
.then(data => {
if (data.status == "ok") {
getAllFilenames();
}
})
});
</script>
{% endblock %}

View file

@ -10,18 +10,6 @@
</head>
<body>
{% if demo %}
<!-- Banner linking to https://khoj.dev -->
<div class="khoj-banner-container">
<a class="khoj-banner" href="https://khoj.dev" target="_blank">
<p id="khoj-banner" class="khoj-banner">
Enroll in Khoj cloud to get your own assistant
</p>
</a>
<input type="text" id="khoj-banner-email" placeholder="email" class="khoj-banner-email"></input>
<button id="khoj-banner-submit" class="khoj-banner-button">Submit</button>
</div>
{% endif %}
<div class="khoj-header"></div>
<!-- Login Modal -->
@ -106,19 +94,6 @@
justify-self: center;
}
button#khoj-banner-submit,
input#khoj-banner-email {
padding: 10px;
border-radius: 5px;
border: 1px solid #475569;
background: #f9fafc;
}
button#khoj-banner-submit:hover,
input#khoj-banner-email:hover {
box-shadow: 0 0 11px #aaa;
}
div#login-modal {
display: grid;
grid-template-columns: 1fr;
@ -143,12 +118,6 @@
}
@media only screen and (max-width: 700px) {
a.khoj-banner {
display: block;
}
p.khoj-banner {
padding: 0;
}
div#login-modal {
margin-left: 10%;
margin-right: 10%;
@ -156,34 +125,5 @@
}
</style>
<script>
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
khojBannerSubmit?.addEventListener("click", function(event) {
event.preventDefault();
var email = document.getElementById("khoj-banner-email").value;
fetch("https://app.khoj.dev/beta/users/", {
method: "POST",
body: JSON.stringify({
email: email
}),
headers: {
"Content-Type": "application/json"
}
}).then(function(response) {
return response.json();
}).then(function(data) {
console.log(data);
if (data.user != null) {
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
document.getElementById("khoj-banner-submit").remove();
} else {
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
}
}).catch(function(error) {
console.log(error);
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
});
});
</script>
<script src="https://accounts.google.com/gsi/client" async defer></script>
</html>

View file

@ -270,19 +270,6 @@
</script>
<body>
{% if demo %}
<!-- Banner linking to https://khoj.dev -->
<div class="khoj-banner-container">
<a class="khoj-banner" href="https://khoj.dev" target="_blank">
<p id="khoj-banner" class="khoj-banner">
Enroll in Khoj cloud to get your own assistant
</p>
</a>
<input type="text" id="khoj-banner-email" placeholder="email" class="khoj-banner-email"></input>
<button id="khoj-banner-submit" class="khoj-banner-button">Submit</button>
</div>
{% endif %}
<!--Add Header Logo and Nav Pane-->
{% import 'utils.html' as utils %}
{{ utils.heading_pane(user_photo, username) }}
@ -458,14 +445,6 @@
max-width: 90%;
}
div.khoj-banner-container {
background: linear-gradient(-45deg, #FFC107, #FF9800, #FF5722, #FF9800, #FFC107);
background-size: 400% 400%;
animation: gradient 15s ease infinite;
text-align: center;
padding: 10px;
}
@keyframes gradient {
0% {
background-position: 0% 50%;
@ -482,57 +461,6 @@
text-align: center;
}
button#khoj-banner-submit,
input#khoj-banner-email {
padding: 10px;
border-radius: 5px;
border: 1px solid var(--main-text-color);
background: #f9fafc;
}
button#khoj-banner-submit:hover,
input#khoj-banner-email:hover {
box-shadow: 0 0 11px #aaa;
}
@media only screen and (max-width: 700px) {
a.khoj-banner {
display: block;
}
p.khoj-banner {
padding: 0;
}
}
</style>
<script>
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
khojBannerSubmit?.addEventListener("click", function(event) {
event.preventDefault();
var email = document.getElementById("khoj-banner-email").value;
fetch("https://app.khoj.dev/beta/users/", {
method: "POST",
body: JSON.stringify({
email: email
}),
headers: {
"Content-Type": "application/json"
}
}).then(function(response) {
return response.json();
}).then(function(data) {
console.log(data);
if (data.user != null) {
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
document.getElementById("khoj-banner-submit").remove();
} else {
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
}
}).catch(function(error) {
console.log(error);
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
});
});
</script>
</html>

View file

@ -119,7 +119,6 @@ def set_state(args):
state.verbose = args.verbose
state.host = args.host
state.port = args.port
state.demo = args.demo
state.anonymous_mode = args.anonymous_mode
state.khoj_version = version("khoj-assistant")
state.chat_on_gpu = args.chat_on_gpu

View file

@ -68,13 +68,17 @@ class PdfToEntries(TextToEntries):
with open(f"{tmp_file}", "wb") as f:
bytes = pdf_files[pdf_file]
f.write(bytes)
loader = PyMuPDFLoader(f"{tmp_file}")
pdf_entries_per_file = [page.page_content for page in loader.load()]
try:
loader = PyMuPDFLoader(f"{tmp_file}", extract_images=True)
pdf_entries_per_file = [page.page_content for page in loader.load()]
except ImportError:
loader = PyMuPDFLoader(f"{tmp_file}")
pdf_entries_per_file = [page.page_content for page in loader.load()]
entry_to_location_map += zip(pdf_entries_per_file, [pdf_file] * len(pdf_entries_per_file))
entries.extend(pdf_entries_per_file)
except Exception as e:
logger.warning(f"Unable to process file: {pdf_file}. This file will not be indexed.")
logger.warning(e)
logger.warning(e, exc_info=True)
finally:
if os.path.exists(f"{tmp_file}"):
os.remove(f"{tmp_file}")

View file

@ -45,7 +45,15 @@ from fastapi.requests import Request
from database import adapters
from database.adapters import EntryAdapters, ConversationAdapters
from database.models import LocalMarkdownConfig, LocalOrgConfig, LocalPdfConfig, LocalPlaintextConfig, KhojUser
from database.models import (
LocalMarkdownConfig,
LocalOrgConfig,
LocalPdfConfig,
LocalPlaintextConfig,
KhojUser,
GithubConfig,
NotionConfig,
)
# Initialize Router
@ -54,14 +62,10 @@ logger = logging.getLogger(__name__)
def map_config_to_object(content_type: str):
if content_type == "org":
return LocalOrgConfig
if content_type == "markdown":
return LocalMarkdownConfig
if content_type == "pdf":
return LocalPdfConfig
if content_type == "plaintext":
return LocalPlaintextConfig
if content_type == "github":
return GithubConfig
if content_type == "notion":
return NotionConfig
async def map_config_to_db(config: FullConfig, user: KhojUser):
@ -111,183 +115,220 @@ async def map_config_to_db(config: FullConfig, user: KhojUser):
)
# If it's a demo instance, prevent updating any of the configuration.
if not state.demo:
def _initialize_config():
if state.config is None:
state.config = FullConfig()
state.config.search_type = SearchConfig.parse_obj(constants.default_config["search-type"])
def _initialize_config():
if state.config is None:
state.config = FullConfig()
state.config.search_type = SearchConfig.parse_obj(constants.default_config["search-type"])
@api.get("/config/data", response_model=FullConfig)
@requires(["authenticated"])
def get_config_data(request: Request):
user = request.user.object
EntryAdapters.get_unique_file_types(user)
@api.get("/config/data", response_model=FullConfig)
@requires(["authenticated"])
def get_config_data(request: Request):
user = request.user.object
EntryAdapters.get_unique_file_types(user)
return state.config
return state.config
@api.post("/config/data")
@requires(["authenticated"])
async def set_config_data(
request: Request,
updated_config: FullConfig,
client: Optional[str] = None,
):
user = request.user.object
await map_config_to_db(updated_config, user)
configuration_update_metadata = {}
@api.post("/config/data")
@requires(["authenticated"])
async def set_config_data(
request: Request,
updated_config: FullConfig,
client: Optional[str] = None,
):
user = request.user.object
await map_config_to_db(updated_config, user)
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
configuration_update_metadata = {}
if state.config.content_type is not None:
configuration_update_metadata["github"] = "github" in enabled_content
configuration_update_metadata["notion"] = "notion" in enabled_content
configuration_update_metadata["org"] = "org" in enabled_content
configuration_update_metadata["pdf"] = "pdf" in enabled_content
configuration_update_metadata["markdown"] = "markdown" in enabled_content
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
if state.config.processor is not None:
configuration_update_metadata["conversation_processor"] = state.config.processor.conversation is not None
if state.config.content_type is not None:
configuration_update_metadata["github"] = "github" in enabled_content
configuration_update_metadata["notion"] = "notion" in enabled_content
configuration_update_metadata["org"] = "org" in enabled_content
configuration_update_metadata["pdf"] = "pdf" in enabled_content
configuration_update_metadata["markdown"] = "markdown" in enabled_content
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_config",
client=client,
metadata=configuration_update_metadata,
)
return state.config
if state.config.processor is not None:
configuration_update_metadata["conversation_processor"] = state.config.processor.conversation is not None
@api.post("/config/data/content_type/github", status_code=200)
@requires(["authenticated"])
async def set_content_config_github_data(
request: Request,
updated_config: Union[GithubContentConfig, None],
client: Optional[str] = None,
):
_initialize_config()
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_config",
client=client,
metadata=configuration_update_metadata,
)
return state.config
user = request.user.object
await adapters.set_user_github_config(
user=user,
pat_token=updated_config.pat_token,
repos=updated_config.repos,
)
@api.post("/config/data/content_type/github", status_code=200)
@requires(["authenticated"])
async def set_content_config_github_data(
request: Request,
updated_config: Union[GithubContentConfig, None],
client: Optional[str] = None,
):
_initialize_config()
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_content_config",
client=client,
metadata={"content_type": "github"},
)
user = request.user.object
return {"status": "ok"}
await adapters.set_user_github_config(
user=user,
pat_token=updated_config.pat_token,
repos=updated_config.repos,
)
@api.post("/config/data/content_type/notion", status_code=200)
@requires(["authenticated"])
async def set_content_config_notion_data(
request: Request,
updated_config: Union[NotionContentConfig, None],
client: Optional[str] = None,
):
_initialize_config()
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_content_config",
client=client,
metadata={"content_type": "github"},
)
user = request.user.object
return {"status": "ok"}
await adapters.set_notion_config(
user=user,
token=updated_config.token,
)
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_content_config",
client=client,
metadata={"content_type": "notion"},
)
@api.post("/config/data/content_type/notion", status_code=200)
@requires(["authenticated"])
async def set_content_config_notion_data(
request: Request,
updated_config: Union[NotionContentConfig, None],
client: Optional[str] = None,
):
_initialize_config()
return {"status": "ok"}
user = request.user.object
@api.post("/delete/config/data/content_type/{content_type}", status_code=200)
@requires(["authenticated"])
async def remove_content_config_data(
request: Request,
content_type: str,
client: Optional[str] = None,
):
user = request.user.object
await adapters.set_notion_config(
user=user,
token=updated_config.token,
)
update_telemetry_state(
request=request,
telemetry_type="api",
api="delete_content_config",
client=client,
metadata={"content_type": content_type},
)
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_content_config",
client=client,
metadata={"content_type": "notion"},
)
content_object = map_config_to_object(content_type)
if content_object is None:
raise ValueError(f"Invalid content type: {content_type}")
return {"status": "ok"}
await content_object.objects.filter(user=user).adelete()
await sync_to_async(EntryAdapters.delete_all_entries)(user, content_type)
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
return {"status": "ok"}
@api.delete("/config/data/content_type/{content_type}", status_code=200)
@requires(["authenticated"])
async def remove_content_config_data(
request: Request,
content_type: str,
client: Optional[str] = None,
):
user = request.user.object
@api.post("/config/data/content_type/{content_type}", status_code=200)
@requires(["authenticated"])
async def set_content_config_data(
request: Request,
content_type: str,
updated_config: Union[TextContentConfig, None],
client: Optional[str] = None,
):
_initialize_config()
update_telemetry_state(
request=request,
telemetry_type="api",
api="delete_content_config",
client=client,
metadata={"content_type": content_type},
)
user = request.user.object
content_object = map_config_to_object(content_type)
if content_object is None:
raise ValueError(f"Invalid content type: {content_type}")
content_object = map_config_to_object(content_type)
await adapters.set_text_content_config(user, content_object, updated_config)
await content_object.objects.filter(user=user).adelete()
await sync_to_async(EntryAdapters.delete_all_entries)(user, content_type)
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_content_config",
client=client,
metadata={"content_type": content_type},
)
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
return {"status": "ok"}
return {"status": "ok"}
@api.post("/config/data/conversation/model", status_code=200)
@requires(["authenticated"])
async def update_chat_model(
request: Request,
id: str,
client: Optional[str] = None,
):
user = request.user.object
@api.delete("/config/data/file", status_code=200)
@requires(["authenticated"])
async def remove_file_data(
request: Request,
filename: str,
client: Optional[str] = None,
):
user = request.user.object
new_config = await ConversationAdapters.aset_user_conversation_processor(user, int(id))
update_telemetry_state(
request=request,
telemetry_type="api",
api="delete_file",
client=client,
)
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_conversation_chat_model",
client=client,
metadata={"processor_conversation_type": "conversation"},
)
await EntryAdapters.adelete_entry_by_file(user, filename)
if new_config is None:
return {"status": "error", "message": "Model not found"}
return {"status": "ok"}
return {"status": "ok"}
@api.get("/config/data/all", response_model=List[str])
@requires(["authenticated"])
async def get_all_filenames(
request: Request,
client: Optional[str] = None,
):
user = request.user.object
update_telemetry_state(
request=request,
telemetry_type="api",
api="get_all_filenames",
client=client,
)
return await sync_to_async(list)(EntryAdapters.aget_all_filenames(user))
@api.delete("/config/data/all", status_code=200)
@requires(["authenticated"])
async def remove_all_config_data(
request: Request,
client: Optional[str] = None,
):
user = request.user.object
update_telemetry_state(
request=request,
telemetry_type="api",
api="delete_all_config",
client=client,
)
await EntryAdapters.adelete_all_entries(user)
return {"status": "ok"}
@api.post("/config/data/conversation/model", status_code=200)
@requires(["authenticated"])
async def update_chat_model(
request: Request,
id: str,
client: Optional[str] = None,
):
user = request.user.object
new_config = await ConversationAdapters.aset_user_conversation_processor(user, int(id))
update_telemetry_state(
request=request,
telemetry_type="api",
api="set_conversation_chat_model",
client=client,
metadata={"processor_conversation_type": "conversation"},
)
if new_config is None:
return {"status": "error", "message": "Model not found"}
return {"status": "ok"}
# Create Routes
@ -377,6 +418,7 @@ async def search(
SearchType.Github,
SearchType.Notion,
SearchType.Plaintext,
SearchType.Pdf,
]:
# query markdown notes
search_futures += [

View file

@ -38,7 +38,6 @@ def index(request: Request):
"chat.html",
context={
"request": request,
"demo": state.demo,
"username": user.username,
"user_photo": user_picture,
},
@ -55,7 +54,6 @@ def index_post(request: Request):
"chat.html",
context={
"request": request,
"demo": state.demo,
"username": user.username,
"user_photo": user_picture,
},
@ -72,7 +70,6 @@ def search_page(request: Request):
"search.html",
context={
"request": request,
"demo": state.demo,
"username": user.username,
"user_photo": user_picture,
},
@ -89,7 +86,6 @@ def chat_page(request: Request):
"chat.html",
context={
"request": request,
"demo": state.demo,
"username": user.username,
"user_photo": user_picture,
},
@ -107,7 +103,6 @@ def login_page(request: Request):
"login.html",
context={
"request": request,
"demo": state.demo,
"google_client_id": google_client_id,
"redirect_uri": redirect_uri,
},
@ -125,142 +120,139 @@ def map_config_to_object(content_type: str):
return LocalPlaintextConfig
if not state.demo:
@web_client.get("/config", response_class=HTMLResponse)
@requires(["authenticated"], redirect="login_page")
def config_page(request: Request):
user = request.user.object
user_picture = request.session.get("user", {}).get("picture")
enabled_content = set(EntryAdapters.get_unique_file_types(user).all())
@web_client.get("/config", response_class=HTMLResponse)
@requires(["authenticated"], redirect="login_page")
def config_page(request: Request):
user = request.user.object
user_picture = request.session.get("user", {}).get("picture")
enabled_content = set(EntryAdapters.get_unique_file_types(user).all())
successfully_configured = {
"pdf": ("pdf" in enabled_content),
"markdown": ("markdown" in enabled_content),
"org": ("org" in enabled_content),
"image": False,
"github": ("github" in enabled_content),
"notion": ("notion" in enabled_content),
"plaintext": ("plaintext" in enabled_content),
}
successfully_configured = {
"pdf": ("pdf" in enabled_content),
"markdown": ("markdown" in enabled_content),
"org": ("org" in enabled_content),
"image": False,
"github": ("github" in enabled_content),
"notion": ("notion" in enabled_content),
"plaintext": ("plaintext" in enabled_content),
}
if state.content_index:
successfully_configured.update(
{
"image": state.content_index.image is not None,
}
)
conversation_options = ConversationAdapters.get_conversation_processor_options().all()
all_conversation_options = list()
for conversation_option in conversation_options:
all_conversation_options.append(
{"chat_model": conversation_option.chat_model, "id": conversation_option.id}
)
selected_conversation_config = ConversationAdapters.get_conversation_config(user)
return templates.TemplateResponse(
"config.html",
context={
"request": request,
"current_model_state": successfully_configured,
"anonymous_mode": state.anonymous_mode,
"username": user.username if user else None,
"conversation_options": all_conversation_options,
"selected_conversation_config": selected_conversation_config.id
if selected_conversation_config
else None,
"user_photo": user_picture,
},
if state.content_index:
successfully_configured.update(
{
"image": state.content_index.image is not None,
}
)
@web_client.get("/config/content_type/github", response_class=HTMLResponse)
@requires(["authenticated"], redirect="login_page")
def github_config_page(request: Request):
user = request.user.object
user_picture = request.session.get("user", {}).get("picture")
current_github_config = get_user_github_config(user)
conversation_options = ConversationAdapters.get_conversation_processor_options().all()
all_conversation_options = list()
for conversation_option in conversation_options:
all_conversation_options.append({"chat_model": conversation_option.chat_model, "id": conversation_option.id})
if current_github_config:
raw_repos = current_github_config.githubrepoconfig.all()
repos = []
for repo in raw_repos:
repos.append(
GithubRepoConfig(
name=repo.name,
owner=repo.owner,
branch=repo.branch,
)
selected_conversation_config = ConversationAdapters.get_conversation_config(user)
return templates.TemplateResponse(
"config.html",
context={
"request": request,
"current_model_state": successfully_configured,
"anonymous_mode": state.anonymous_mode,
"username": user.username if user else None,
"conversation_options": all_conversation_options,
"selected_conversation_config": selected_conversation_config.id if selected_conversation_config else None,
"user_photo": user_picture,
},
)
@web_client.get("/config/content_type/github", response_class=HTMLResponse)
@requires(["authenticated"], redirect="login_page")
def github_config_page(request: Request):
user = request.user.object
user_picture = request.session.get("user", {}).get("picture")
current_github_config = get_user_github_config(user)
if current_github_config:
raw_repos = current_github_config.githubrepoconfig.all()
repos = []
for repo in raw_repos:
repos.append(
GithubRepoConfig(
name=repo.name,
owner=repo.owner,
branch=repo.branch,
)
current_config = GithubContentConfig(
pat_token=current_github_config.pat_token,
repos=repos,
)
current_config = json.loads(current_config.json())
else:
current_config = {} # type: ignore
return templates.TemplateResponse(
"content_type_github_input.html",
context={
"request": request,
"current_config": current_config,
"username": user.username,
"user_photo": user_picture,
},
)
@web_client.get("/config/content_type/notion", response_class=HTMLResponse)
@requires(["authenticated"], redirect="login_page")
def notion_config_page(request: Request):
user = request.user.object
user_picture = request.session.get("user", {}).get("picture")
current_notion_config = get_user_notion_config(user)
current_config = NotionContentConfig(
token=current_notion_config.token if current_notion_config else "",
)
current_config = json.loads(current_config.json())
return templates.TemplateResponse(
"content_type_notion_input.html",
context={
"request": request,
"current_config": current_config,
"username": user.username,
"user_photo": user_picture,
},
)
@web_client.get("/config/content_type/{content_type}", response_class=HTMLResponse)
@requires(["authenticated"], redirect="login_page")
def content_config_page(request: Request, content_type: str):
if content_type not in VALID_TEXT_CONTENT_TYPES:
return templates.TemplateResponse("config.html", context={"request": request})
object = map_config_to_object(content_type)
user = request.user.object
user_picture = request.session.get("user", {}).get("picture")
config = object.objects.filter(user=user).first()
if config == None:
config = object.objects.create(user=user)
current_config = TextContentConfig(
input_files=config.input_files,
input_filter=config.input_filter,
index_heading_entries=config.index_heading_entries,
current_config = GithubContentConfig(
pat_token=current_github_config.pat_token,
repos=repos,
)
current_config = json.loads(current_config.json())
else:
current_config = {} # type: ignore
return templates.TemplateResponse(
"content_type_input.html",
context={
"request": request,
"current_config": current_config,
"content_type": content_type,
"username": user.username,
"user_photo": user_picture,
},
)
return templates.TemplateResponse(
"content_type_github_input.html",
context={
"request": request,
"current_config": current_config,
"username": user.username,
"user_photo": user_picture,
},
)
@web_client.get("/config/content_type/notion", response_class=HTMLResponse)
@requires(["authenticated"], redirect="login_page")
def notion_config_page(request: Request):
user = request.user.object
user_picture = request.session.get("user", {}).get("picture")
current_notion_config = get_user_notion_config(user)
current_config = NotionContentConfig(
token=current_notion_config.token if current_notion_config else "",
)
current_config = json.loads(current_config.json())
return templates.TemplateResponse(
"content_type_notion_input.html",
context={
"request": request,
"current_config": current_config,
"username": user.username,
"user_photo": user_picture,
},
)
@web_client.get("/config/content_type/{content_type}", response_class=HTMLResponse)
@requires(["authenticated"], redirect="login_page")
def content_config_page(request: Request, content_type: str):
if content_type not in VALID_TEXT_CONTENT_TYPES:
return templates.TemplateResponse("config.html", context={"request": request})
object = map_config_to_object(content_type)
user = request.user.object
user_picture = request.session.get("user", {}).get("picture")
config = object.objects.filter(user=user).first()
if config == None:
config = object.objects.create(user=user)
current_config = TextContentConfig(
input_files=config.input_files,
input_filter=config.input_filter,
index_heading_entries=config.index_heading_entries,
)
current_config = json.loads(current_config.json())
return templates.TemplateResponse(
"content_type_input.html",
context={
"request": request,
"current_config": current_config,
"content_type": content_type,
"username": user.username,
"user_photo": user_picture,
},
)

View file

@ -42,7 +42,6 @@ def cli(args=None):
parser.add_argument(
"--disable-chat-on-gpu", action="store_true", default=False, help="Disable using GPU for the offline chat model"
)
parser.add_argument("--demo", action="store_true", default=False, help="Run Khoj in demo mode")
parser.add_argument(
"--anonymous-mode",
action="store_true",

View file

@ -31,7 +31,6 @@ config_lock = threading.Lock()
chat_lock = threading.Lock()
SearchType = utils_config.SearchType
telemetry: List[Dict[str, str]] = []
demo: bool = False
khoj_version: str = None
device = get_device()
chat_on_gpu: bool = True

BIN
tests/data/pdf/ocr_samples.pdf vendored Normal file

Binary file not shown.

View file

@ -50,6 +50,23 @@ def test_multi_page_pdf_to_jsonl():
assert len(jsonl_data) == 6
def test_ocr_page_pdf_to_jsonl():
"Convert multiple pages from single PDF file to jsonl."
# Act
# Extract Entries from specified Pdf files
with open("tests/data/pdf/ocr_samples.pdf", "rb") as f:
pdf_bytes = f.read()
data = {"tests/data/pdf/ocr_samples.pdf": pdf_bytes}
entries, entry_to_file_map = PdfToEntries.extract_pdf_entries(pdf_files=data)
# Process Each Entry from All Pdf Files
entries = PdfToEntries.convert_pdf_entries_to_maps(entries, entry_to_file_map)
assert len(entries) == 1
assert "playing on a strip of marsh" in entries[0].raw
def test_get_pdf_files(tmp_path):
"Ensure Pdf files specified via input-filter, input-files extracted"
# Arrange