Merge pull request #534 from khoj-ai/features/code-config-cleanup

Small fixes and update config UI to manage indexed data
This commit is contained in:
sabaimran 2023-11-05 15:45:45 -08:00 committed by GitHub
commit 81a615d7dd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 538 additions and 800 deletions

View file

@ -61,7 +61,7 @@ jobs:
env: env:
DEBIAN_FRONTEND: noninteractive DEBIAN_FRONTEND: noninteractive
run: | run: |
apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0 apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
- name: ⬇️ Install Postgres - name: ⬇️ Install Postgres
env: env:

View file

@ -4,7 +4,7 @@ FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
LABEL org.opencontainers.image.source https://github.com/khoj-ai/khoj LABEL org.opencontainers.image.source https://github.com/khoj-ai/khoj
# Install System Dependencies # Install System Dependencies
RUN apt update -y && apt -y install python3-pip git RUN apt update -y && apt -y install python3-pip git libsqlite3-0 ffmpeg libsm6 libxext6
WORKDIR /app WORKDIR /app

View file

@ -73,6 +73,7 @@ dependencies = [
"gunicorn == 21.2.0", "gunicorn == 21.2.0",
"lxml == 4.9.3", "lxml == 4.9.3",
"tzdata == 2023.3", "tzdata == 2023.3",
"rapidocr-onnxruntime == 1.3.8"
] ]
dynamic = ["version"] dynamic = ["version"]

View file

@ -291,7 +291,10 @@ class EntryAdapters:
return deleted_count return deleted_count
@staticmethod @staticmethod
def delete_all_entries(user: KhojUser, file_type: str): def delete_all_entries(user: KhojUser, file_type: str = None):
if file_type is None:
deleted_count, _ = Entry.objects.filter(user=user).delete()
else:
deleted_count, _ = Entry.objects.filter(user=user, file_type=file_type).delete() deleted_count, _ = Entry.objects.filter(user=user, file_type=file_type).delete()
return deleted_count return deleted_count
@ -314,6 +317,18 @@ class EntryAdapters:
async def user_has_entries(user: KhojUser): async def user_has_entries(user: KhojUser):
return await Entry.objects.filter(user=user).aexists() return await Entry.objects.filter(user=user).aexists()
@staticmethod
async def adelete_entry_by_file(user: KhojUser, file_path: str):
return await Entry.objects.filter(user=user, file_path=file_path).adelete()
@staticmethod
def aget_all_filenames(user: KhojUser):
return Entry.objects.filter(user=user).distinct("file_path").values_list("file_path", flat=True)
@staticmethod
async def adelete_all_entries(user: KhojUser):
return await Entry.objects.filter(user=user).adelete()
@staticmethod @staticmethod
def apply_filters(user: KhojUser, query: str, file_type_filter: str = None): def apply_filters(user: KhojUser, query: str, file_type_filter: str = None):
q_filter_terms = Q() q_filter_terms = Q()

View file

@ -103,21 +103,6 @@ img.khoj-logo {
justify-self: center; justify-self: center;
} }
a.khoj-banner {
color: black;
text-decoration: none;
}
p.khoj-banner {
font-size: small;
margin: 0;
padding: 10px;
}
p#khoj-banner {
display: inline;
}
@media only screen and (max-width: 600px) { @media only screen and (max-width: 600px) {
div.khoj-header { div.khoj-header {
display: grid; display: grid;

View file

@ -274,8 +274,9 @@
} }
</script> </script>
<body> <body>
<div id="khoj-banner-container" class="khoj-banner-container"> <div id="khoj-empty-container" class="khoj-empty-container">
</div> </div>
<!--Add Header Logo and Nav Pane--> <!--Add Header Logo and Nav Pane-->
<div class="khoj-header"> <div class="khoj-header">
<a class="khoj-logo" href="/"> <a class="khoj-logo" href="/">
@ -454,6 +455,11 @@
border-bottom: 1px dotted #475569; border-bottom: 1px dotted #475569;
} }
div.khoj-empty-container {
padding: 0;
margin: 0;
}
@media (pointer: coarse), (hover: none) { @media (pointer: coarse), (hover: none) {
abbr[title] { abbr[title] {
position: relative; position: relative;
@ -490,12 +496,6 @@
margin: 4px; margin: 4px;
grid-template-columns: auto; grid-template-columns: auto;
} }
a.khoj-banner {
display: block;
}
p.khoj-banner {
padding: 0;
}
} }
@media only screen and (min-width: 600px) { @media only screen and (min-width: 600px) {
body { body {
@ -507,11 +507,6 @@
} }
} }
div.khoj-banner-container {
padding: 0px;
margin: 0px;
}
div#chat-tooltip { div#chat-tooltip {
text-align: left; text-align: left;
font-size: medium; font-size: medium;
@ -533,23 +528,6 @@
text-align: center; text-align: center;
} }
button#khoj-banner-submit,
input#khoj-banner-email {
padding: 10px;
border-radius: 5px;
border: 1px solid #475569;
background: #f9fafc;
}
button#khoj-banner-submit:hover,
input#khoj-banner-email:hover {
box-shadow: 0 0 11px #aaa;
}
div.khoj-banner-container-hidden {
margin: 0px;
padding: 0px;
}
div.programmatic-output { div.programmatic-output {
background-color: #f5f5f5; background-color: #f5f5f5;
border: 1px solid #ddd; border: 1px solid #ddd;

View file

@ -362,35 +362,6 @@
gap: 4px; gap: 4px;
} }
</style> </style>
<script>
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
khojBannerSubmit?.addEventListener("click", function(event) {
event.preventDefault();
var email = document.getElementById("khoj-banner-email").value;
fetch("https://app.khoj.dev/beta/users/", {
method: "POST",
body: JSON.stringify({
email: email
}),
headers: {
"Content-Type": "application/json"
}
}).then(function(response) {
return response.json();
}).then(function(data) {
console.log(data);
if (data.user != null) {
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
document.getElementById("khoj-banner-submit").remove();
} else {
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
}
}).catch(function(error) {
console.log(error);
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
});
});
</script>
<script src="./renderer.js"></script> <script src="./renderer.js"></script>
</html> </html>

View file

@ -436,14 +436,6 @@
max-width: 90%; max-width: 90%;
} }
div.khoj-banner-container {
background: linear-gradient(-45deg, #FFC107, #FF9800, #FF5722, #FF9800, #FFC107);
background-size: 400% 400%;
animation: gradient 15s ease infinite;
text-align: center;
padding: 10px;
}
@keyframes gradient { @keyframes gradient {
0% { 0% {
background-position: 0% 50%; background-position: 0% 50%;
@ -460,57 +452,5 @@
text-align: center; text-align: center;
} }
button#khoj-banner-submit,
input#khoj-banner-email {
padding: 10px;
border-radius: 5px;
border: 1px solid #475569;
background: #f9fafc;
}
button#khoj-banner-submit:hover,
input#khoj-banner-email:hover {
box-shadow: 0 0 11px #aaa;
}
@media only screen and (max-width: 600px) {
a.khoj-banner {
display: block;
}
p.khoj-banner {
padding: 0;
}
}
</style> </style>
<script>
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
khojBannerSubmit?.addEventListener("click", function(event) {
event.preventDefault();
var email = document.getElementById("khoj-banner-email").value;
fetch("https://app.khoj.dev/beta/users/", {
method: "POST",
body: JSON.stringify({
email: email
}),
headers: {
"Content-Type": "application/json"
}
}).then(function(response) {
return response.json();
}).then(function(data) {
console.log(data);
if (data.user != null) {
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
document.getElementById("khoj-banner-submit").remove();
} else {
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
}
}).catch(function(error) {
console.log(error);
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
});
});
</script>
</html> </html>

View file

@ -159,10 +159,8 @@ def configure_middleware(app):
app.add_middleware(SessionMiddleware, secret_key=os.environ.get("KHOJ_DJANGO_SECRET_KEY", "!secret")) app.add_middleware(SessionMiddleware, secret_key=os.environ.get("KHOJ_DJANGO_SECRET_KEY", "!secret"))
if not state.demo: @schedule.repeat(schedule.every(61).minutes)
def update_search_index():
@schedule.repeat(schedule.every(61).minutes)
def update_search_index():
try: try:
logger.info("📬 Updating content index via Scheduler") logger.info("📬 Updating content index via Scheduler")
for user in get_all_users(): for user in get_all_users():

View file

@ -106,21 +106,6 @@ img.khoj-logo {
justify-self: center; justify-self: center;
} }
a.khoj-banner {
color: black;
text-decoration: none;
}
p.khoj-banner {
font-size: medium;
margin: 0;
padding: 10px;
}
p#khoj-banner {
display: inline;
}
/* Dropdown in navigation menu*/ /* Dropdown in navigation menu*/
#khoj-nav-menu-container { #khoj-nav-menu-container {
display: flex; display: flex;

View file

@ -53,10 +53,10 @@
justify-self: center; justify-self: center;
} }
.api-settings { div.section-manage-files,
div.api-settings {
display: grid; display: grid;
grid-template-columns: 1fr; grid-template-columns: 1fr;
grid-template-rows: 1fr 1fr auto;
justify-items: start; justify-items: start;
gap: 8px; gap: 8px;
padding: 24px 24px; padding: 24px 24px;
@ -65,9 +65,19 @@
border-radius: 4px; border-radius: 4px;
box-shadow: 0px 1px 3px 0px rgba(0,0,0,0.1),0px 1px 2px -1px rgba(0,0,0,0.8); box-shadow: 0px 1px 3px 0px rgba(0,0,0,0.1),0px 1px 2px -1px rgba(0,0,0,0.8);
} }
div.section-manage-files {
width: 640px;
}
div.api-settings {
grid-template-rows: 1fr 1fr auto;
}
#api-settings-card-description { #api-settings-card-description {
margin: 8px 0 0 0; margin: 8px 0 0 0;
} }
#api-settings-keys-table { #api-settings-keys-table {
margin-bottom: 16px; margin-bottom: 16px;
} }
@ -184,6 +194,37 @@
text-align: left; text-align: left;
} }
button.remove-file-button:hover {
background-color: rgb(255 235 235);
border-radius: 3px;
border: none;
color: var(--flower);
padding: 4px;
cursor: pointer;
}
button.remove-file-button {
background-color: rgb(253 214 214);
border-radius: 3px;
border: none;
color: var(--flower);
padding: 4px;
}
div.file-element {
display: grid;
grid-template-columns: 1fr auto;
border: 1px solid rgb(229, 229, 229);
border-radius: 4px;
box-shadow: 0px 1px 3px 0px rgba(0,0,0,0.1),0px 1px 2px -1px rgba(0,0,0,0.8);
padding: 4px;
margin-bottom: 8px;
}
div.remove-button-container {
text-align: right;
}
button.card-button.happy { button.card-button.happy {
color: var(--leaf); color: var(--leaf);
} }
@ -246,6 +287,11 @@
cursor: pointer; cursor: pointer;
} }
a {
color: #3b82f6;
text-decoration: none;
}
@media screen and (max-width: 700px) { @media screen and (max-width: 700px) {
.section-cards { .section-cards {
grid-template-columns: 1fr; grid-template-columns: 1fr;
@ -255,7 +301,7 @@
body { body {
display: grid; display: grid;
grid-template-columns: 1fr; grid-template-columns: 1fr;
grid-template-rows: 1fr auto auto auto minmax(80px, 100%); grid-template-rows: 1fr repeat(4, auto);
} }
body > * { body > * {
grid-column: 1; grid-column: 1;
@ -281,9 +327,14 @@
grid-template-columns: auto; grid-template-columns: auto;
} }
div.section-manage-files,
div.api-settings { div.api-settings {
width: auto; width: auto;
} }
div.finalize-buttons {
padding: 0;
}
} }
</style> </style>
</html> </html>

View file

@ -165,7 +165,7 @@
function incrementalChat(event) { function incrementalChat(event) {
if (!event.shiftKey && event.key === 'Enter') { if (!event.shiftKey && event.key === 'Enter') {
e.preventDefault(); event.preventDefault();
chat(); chat();
} }
} }
@ -261,17 +261,7 @@
} }
</script> </script>
<body> <body>
<div id="khoj-banner-container" class="khoj-banner-container"> <div id="khoj-empty-container" class="khoj-empty-container">
{% if demo %}
<!-- Banner linking to https://khoj.dev -->
<a class="khoj-banner" href="https://khoj.dev" target="_blank">
<p id="khoj-banner" class="khoj-banner">
Enroll in Khoj cloud to get your own assistant
</p>
</a>
<input type="text" id="khoj-banner-email" placeholder="email" class="khoj-banner-email"></input>
<button id="khoj-banner-submit" class="khoj-banner-button">Submit</button>
{% endif %}
</div> </div>
<!--Add Header Logo and Nav Pane--> <!--Add Header Logo and Nav Pane-->
@ -480,12 +470,6 @@
margin: 4px; margin: 4px;
grid-template-columns: auto; grid-template-columns: auto;
} }
a.khoj-banner {
display: block;
}
p.khoj-banner {
padding: 0;
}
} }
@media only screen and (min-width: 700px) { @media only screen and (min-width: 700px) {
body { body {
@ -497,14 +481,6 @@
} }
} }
div.khoj-banner-container {
background: linear-gradient(-45deg, #FFC107, #FF9800, #FF5722, #FF9800, #FFC107);
background-size: 400% 400%;
animation: gradient 15s ease infinite;
text-align: center;
padding: 10px;
}
div#chat-tooltip { div#chat-tooltip {
text-align: left; text-align: left;
font-size: medium; font-size: medium;
@ -526,19 +502,7 @@
text-align: center; text-align: center;
} }
button#khoj-banner-submit, div.khoj-empty-container {
input#khoj-banner-email {
padding: 10px;
border-radius: 5px;
border: 1px solid var(--main-text-color);
background: #f9fafc;
}
button#khoj-banner-submit:hover,
input#khoj-banner-email:hover {
box-shadow: 0 0 11px #aaa;
}
div.khoj-banner-container-hidden {
margin: 0px; margin: 0px;
padding: 0px; padding: 0px;
} }
@ -558,39 +522,4 @@
white-space: pre-wrap; white-space: pre-wrap;
} }
</style> </style>
<script>
if ("{{demo}}" === "False") {
document.getElementById("khoj-banner-container").classList.remove("khoj-banner-container");
document.getElementById("khoj-banner-container").classList.add("khoj-banner-container-hidden");
}
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
khojBannerSubmit?.addEventListener("click", function(event) {
event.preventDefault();
var email = document.getElementById("khoj-banner-email").value;
fetch("https://app.khoj.dev/beta/users/", {
method: "POST",
body: JSON.stringify({
email: email
}),
headers: {
"Content-Type": "application/json"
}
}).then(function(response) {
return response.json();
}).then(function(data) {
console.log(data);
if (data.user != null) {
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
document.getElementById("khoj-banner-submit").remove();
} else {
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
}
}).catch(function(error) {
console.log(error);
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
});
});
</script>
</html> </html>

View file

@ -67,130 +67,6 @@
</div> </div>
{% endif %} {% endif %}
</div> </div>
<div class="card">
<div class="card-title-row">
<img class="card-icon" src="/static/assets/icons/markdown.svg" alt="markdown">
<h3 class="card-title">
Markdown
{% if current_model_state.markdown == True%}
<img id="configured-icon-markdown" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
{% endif %}
</h3>
</div>
<div class="card-description-row">
<p class="card-description">Set markdown files to index</p>
</div>
<div class="card-action-row">
<a class="card-button" href="/config/content_type/markdown">
{% if current_model_state.markdown %}
Update
{% else %}
Setup
{% endif %}
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
</a>
</div>
{% if current_model_state.markdown %}
<div id="clear-markdown" class="card-action-row">
<button class="card-button" onclick="clearContentType('markdown')">
Disable
</button>
</div>
{% endif %}
</div>
<div class="card">
<div class="card-title-row">
<img class="card-icon" src="/static/assets/icons/org.svg" alt="org">
<h3 class="card-title">
Org
{% if current_model_state.org == True %}
<img id="configured-icon-org" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
{% endif %}
</h3>
</div>
<div class="card-description-row">
<p class="card-description">Set org files to index</p>
</div>
<div class="card-action-row">
<a class="card-button" href="/config/content_type/org">
{% if current_model_state.org %}
Update
{% else %}
Setup
{% endif %}
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
</a>
</div>
{% if current_model_state.org %}
<div id="clear-org" class="card-action-row">
<button class="card-button" onclick="clearContentType('org')">
Disable
</button>
</div>
{% endif %}
</div>
<div class="card">
<div class="card-title-row">
<img class="card-icon" src="/static/assets/icons/pdf.svg" alt="PDF">
<h3 class="card-title">
PDF
{% if current_model_state.pdf == True %}
<img id="configured-icon-pdf" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
{% endif %}
</h3>
</div>
<div class="card-description-row">
<p class="card-description">Set PDF files to index</p>
</div>
<div class="card-action-row">
<a class="card-button" href="/config/content_type/pdf">
{% if current_model_state.pdf %}
Update
{% else %}
Setup
{% endif %}
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
</a>
</div>
{% if current_model_state.pdf %}
<div id="clear-pdf" class="card-action-row">
<button class="card-button" onclick="clearContentType('pdf')">
Disable
</button>
</div>
{% endif %}
</div>
<div class="card">
<div class="card-title-row">
<img class="card-icon" src="/static/assets/icons/plaintext.svg" alt="Plaintext">
<h3 class="card-title">
Plaintext
{% if current_model_state.plaintext == True %}
<img id="configured-icon-plaintext" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
{% endif %}
</h3>
</div>
<div class="card-description-row">
<p class="card-description">Set Plaintext files to index</p>
</div>
<div class="card-action-row">
<a class="card-button" href="/config/content_type/plaintext">
{% if current_model_state.plaintext %}
Update
{% else %}
Setup
{% endif %}
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
</a>
</div>
{% if current_model_state.plaintext %}
<div id="clear-plaintext" class="card-action-row">
<button class="card-button" onclick="clearContentType('plaintext')">
Disable
</button>
</div>
{% endif %}
</div>
</div> </div>
</div> </div>
<div class="section"> <div class="section">
@ -246,6 +122,16 @@
</div> </div>
</div> </div>
</div> </div>
<div class="section">
<h2 class="section-title">Manage Data</h2>
<div class="section-manage-files">
<div id="delete-all-files" class="delete-all=files">
<button id="delete-all-files" type="submit" title="Delete all indexed files">🗑️ Remove All</button>
</div>
<div class="indexed-files">
</div>
</div>
</div>
<div class="section general-settings"> <div class="section general-settings">
<div id="results-count" title="Number of items to show in search and use for chat response"> <div id="results-count" title="Number of items to show in search and use for chat response">
<label for="results-count-slider">Results Count: <span id="results-count-value">5</span></label> <label for="results-count-slider">Results Count: <span id="results-count-value">5</span></label>
@ -291,8 +177,8 @@
}; };
function clearContentType(content_type) { function clearContentType(content_type) {
fetch('/api/delete/config/data/content_type/' + content_type, { fetch('/api/config/data/content_type/' + content_type, {
method: 'POST', method: 'DELETE',
headers: { headers: {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
} }
@ -462,5 +348,84 @@
// List user's API keys on page load // List user's API keys on page load
listApiKeys(); listApiKeys();
function removeFile(path) {
fetch('/api/config/data/file?filename=' + path, {
method: 'DELETE',
headers: {
'Content-Type': 'application/json',
}
})
.then(response => response.json())
.then(data => {
if (data.status == "ok") {
getAllFilenames();
}
})
}
// Get all currently indexed files
function getAllFilenames() {
fetch('/api/config/data/all')
.then(response => response.json())
.then(data => {
var indexedFiles = document.getElementsByClassName("indexed-files")[0];
indexedFiles.innerHTML = "";
if (data.length == 0) {
document.getElementById("delete-all-files").style.display = "none";
indexedFiles.innerHTML = "<div>Use the <a href='https://download.khoj.dev'>Khoj Desktop client</a> to index files.</div>";
} else {
document.getElementById("delete-all-files").style.display = "block";
}
for (var filename of data) {
let fileElement = document.createElement("div");
fileElement.classList.add("file-element");
let fileNameElement = document.createElement("div");
fileNameElement.classList.add("content-name");
fileNameElement.innerHTML = filename;
fileElement.appendChild(fileNameElement);
let buttonContainer = document.createElement("div");
buttonContainer.classList.add("remove-button-container");
let removeFileButton = document.createElement("button");
removeFileButton.classList.add("remove-file-button");
removeFileButton.innerHTML = "🗑️";
removeFileButton.addEventListener("click", ((filename) => {
return () => {
removeFile(filename);
};
})(filename));
buttonContainer.appendChild(removeFileButton);
fileElement.appendChild(buttonContainer);
indexedFiles.appendChild(fileElement);
}
})
.catch((error) => {
console.error('Error:', error);
});
}
// Get all currently indexed files on page load
getAllFilenames();
let deleteAllFilesButton = document.getElementById("delete-all-files");
deleteAllFilesButton.addEventListener("click", function(event) {
event.preventDefault();
fetch('/api/config/data/all', {
method: 'DELETE',
headers: {
'Content-Type': 'application/json',
}
})
.then(response => response.json())
.then(data => {
if (data.status == "ok") {
getAllFilenames();
}
})
});
</script> </script>
{% endblock %} {% endblock %}

View file

@ -10,18 +10,6 @@
</head> </head>
<body> <body>
{% if demo %}
<!-- Banner linking to https://khoj.dev -->
<div class="khoj-banner-container">
<a class="khoj-banner" href="https://khoj.dev" target="_blank">
<p id="khoj-banner" class="khoj-banner">
Enroll in Khoj cloud to get your own assistant
</p>
</a>
<input type="text" id="khoj-banner-email" placeholder="email" class="khoj-banner-email"></input>
<button id="khoj-banner-submit" class="khoj-banner-button">Submit</button>
</div>
{% endif %}
<div class="khoj-header"></div> <div class="khoj-header"></div>
<!-- Login Modal --> <!-- Login Modal -->
@ -106,19 +94,6 @@
justify-self: center; justify-self: center;
} }
button#khoj-banner-submit,
input#khoj-banner-email {
padding: 10px;
border-radius: 5px;
border: 1px solid #475569;
background: #f9fafc;
}
button#khoj-banner-submit:hover,
input#khoj-banner-email:hover {
box-shadow: 0 0 11px #aaa;
}
div#login-modal { div#login-modal {
display: grid; display: grid;
grid-template-columns: 1fr; grid-template-columns: 1fr;
@ -143,12 +118,6 @@
} }
@media only screen and (max-width: 700px) { @media only screen and (max-width: 700px) {
a.khoj-banner {
display: block;
}
p.khoj-banner {
padding: 0;
}
div#login-modal { div#login-modal {
margin-left: 10%; margin-left: 10%;
margin-right: 10%; margin-right: 10%;
@ -156,34 +125,5 @@
} }
</style> </style>
<script>
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
khojBannerSubmit?.addEventListener("click", function(event) {
event.preventDefault();
var email = document.getElementById("khoj-banner-email").value;
fetch("https://app.khoj.dev/beta/users/", {
method: "POST",
body: JSON.stringify({
email: email
}),
headers: {
"Content-Type": "application/json"
}
}).then(function(response) {
return response.json();
}).then(function(data) {
console.log(data);
if (data.user != null) {
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
document.getElementById("khoj-banner-submit").remove();
} else {
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
}
}).catch(function(error) {
console.log(error);
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
});
});
</script>
<script src="https://accounts.google.com/gsi/client" async defer></script> <script src="https://accounts.google.com/gsi/client" async defer></script>
</html> </html>

View file

@ -270,19 +270,6 @@
</script> </script>
<body> <body>
{% if demo %}
<!-- Banner linking to https://khoj.dev -->
<div class="khoj-banner-container">
<a class="khoj-banner" href="https://khoj.dev" target="_blank">
<p id="khoj-banner" class="khoj-banner">
Enroll in Khoj cloud to get your own assistant
</p>
</a>
<input type="text" id="khoj-banner-email" placeholder="email" class="khoj-banner-email"></input>
<button id="khoj-banner-submit" class="khoj-banner-button">Submit</button>
</div>
{% endif %}
<!--Add Header Logo and Nav Pane--> <!--Add Header Logo and Nav Pane-->
{% import 'utils.html' as utils %} {% import 'utils.html' as utils %}
{{ utils.heading_pane(user_photo, username) }} {{ utils.heading_pane(user_photo, username) }}
@ -458,14 +445,6 @@
max-width: 90%; max-width: 90%;
} }
div.khoj-banner-container {
background: linear-gradient(-45deg, #FFC107, #FF9800, #FF5722, #FF9800, #FFC107);
background-size: 400% 400%;
animation: gradient 15s ease infinite;
text-align: center;
padding: 10px;
}
@keyframes gradient { @keyframes gradient {
0% { 0% {
background-position: 0% 50%; background-position: 0% 50%;
@ -482,57 +461,6 @@
text-align: center; text-align: center;
} }
button#khoj-banner-submit,
input#khoj-banner-email {
padding: 10px;
border-radius: 5px;
border: 1px solid var(--main-text-color);
background: #f9fafc;
}
button#khoj-banner-submit:hover,
input#khoj-banner-email:hover {
box-shadow: 0 0 11px #aaa;
}
@media only screen and (max-width: 700px) {
a.khoj-banner {
display: block;
}
p.khoj-banner {
padding: 0;
}
}
</style> </style>
<script>
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
khojBannerSubmit?.addEventListener("click", function(event) {
event.preventDefault();
var email = document.getElementById("khoj-banner-email").value;
fetch("https://app.khoj.dev/beta/users/", {
method: "POST",
body: JSON.stringify({
email: email
}),
headers: {
"Content-Type": "application/json"
}
}).then(function(response) {
return response.json();
}).then(function(data) {
console.log(data);
if (data.user != null) {
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
document.getElementById("khoj-banner-submit").remove();
} else {
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
}
}).catch(function(error) {
console.log(error);
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
});
});
</script>
</html> </html>

View file

@ -119,7 +119,6 @@ def set_state(args):
state.verbose = args.verbose state.verbose = args.verbose
state.host = args.host state.host = args.host
state.port = args.port state.port = args.port
state.demo = args.demo
state.anonymous_mode = args.anonymous_mode state.anonymous_mode = args.anonymous_mode
state.khoj_version = version("khoj-assistant") state.khoj_version = version("khoj-assistant")
state.chat_on_gpu = args.chat_on_gpu state.chat_on_gpu = args.chat_on_gpu

View file

@ -68,13 +68,17 @@ class PdfToEntries(TextToEntries):
with open(f"{tmp_file}", "wb") as f: with open(f"{tmp_file}", "wb") as f:
bytes = pdf_files[pdf_file] bytes = pdf_files[pdf_file]
f.write(bytes) f.write(bytes)
try:
loader = PyMuPDFLoader(f"{tmp_file}", extract_images=True)
pdf_entries_per_file = [page.page_content for page in loader.load()]
except ImportError:
loader = PyMuPDFLoader(f"{tmp_file}") loader = PyMuPDFLoader(f"{tmp_file}")
pdf_entries_per_file = [page.page_content for page in loader.load()] pdf_entries_per_file = [page.page_content for page in loader.load()]
entry_to_location_map += zip(pdf_entries_per_file, [pdf_file] * len(pdf_entries_per_file)) entry_to_location_map += zip(pdf_entries_per_file, [pdf_file] * len(pdf_entries_per_file))
entries.extend(pdf_entries_per_file) entries.extend(pdf_entries_per_file)
except Exception as e: except Exception as e:
logger.warning(f"Unable to process file: {pdf_file}. This file will not be indexed.") logger.warning(f"Unable to process file: {pdf_file}. This file will not be indexed.")
logger.warning(e) logger.warning(e, exc_info=True)
finally: finally:
if os.path.exists(f"{tmp_file}"): if os.path.exists(f"{tmp_file}"):
os.remove(f"{tmp_file}") os.remove(f"{tmp_file}")

View file

@ -45,7 +45,15 @@ from fastapi.requests import Request
from database import adapters from database import adapters
from database.adapters import EntryAdapters, ConversationAdapters from database.adapters import EntryAdapters, ConversationAdapters
from database.models import LocalMarkdownConfig, LocalOrgConfig, LocalPdfConfig, LocalPlaintextConfig, KhojUser from database.models import (
LocalMarkdownConfig,
LocalOrgConfig,
LocalPdfConfig,
LocalPlaintextConfig,
KhojUser,
GithubConfig,
NotionConfig,
)
# Initialize Router # Initialize Router
@ -54,14 +62,10 @@ logger = logging.getLogger(__name__)
def map_config_to_object(content_type: str): def map_config_to_object(content_type: str):
if content_type == "org": if content_type == "github":
return LocalOrgConfig return GithubConfig
if content_type == "markdown": if content_type == "notion":
return LocalMarkdownConfig return NotionConfig
if content_type == "pdf":
return LocalPdfConfig
if content_type == "plaintext":
return LocalPlaintextConfig
async def map_config_to_db(config: FullConfig, user: KhojUser): async def map_config_to_db(config: FullConfig, user: KhojUser):
@ -111,29 +115,28 @@ async def map_config_to_db(config: FullConfig, user: KhojUser):
) )
# If it's a demo instance, prevent updating any of the configuration. def _initialize_config():
if not state.demo:
def _initialize_config():
if state.config is None: if state.config is None:
state.config = FullConfig() state.config = FullConfig()
state.config.search_type = SearchConfig.parse_obj(constants.default_config["search-type"]) state.config.search_type = SearchConfig.parse_obj(constants.default_config["search-type"])
@api.get("/config/data", response_model=FullConfig)
@requires(["authenticated"]) @api.get("/config/data", response_model=FullConfig)
def get_config_data(request: Request): @requires(["authenticated"])
def get_config_data(request: Request):
user = request.user.object user = request.user.object
EntryAdapters.get_unique_file_types(user) EntryAdapters.get_unique_file_types(user)
return state.config return state.config
@api.post("/config/data")
@requires(["authenticated"]) @api.post("/config/data")
async def set_config_data( @requires(["authenticated"])
async def set_config_data(
request: Request, request: Request,
updated_config: FullConfig, updated_config: FullConfig,
client: Optional[str] = None, client: Optional[str] = None,
): ):
user = request.user.object user = request.user.object
await map_config_to_db(updated_config, user) await map_config_to_db(updated_config, user)
@ -160,13 +163,14 @@ if not state.demo:
) )
return state.config return state.config
@api.post("/config/data/content_type/github", status_code=200)
@requires(["authenticated"]) @api.post("/config/data/content_type/github", status_code=200)
async def set_content_config_github_data( @requires(["authenticated"])
async def set_content_config_github_data(
request: Request, request: Request,
updated_config: Union[GithubContentConfig, None], updated_config: Union[GithubContentConfig, None],
client: Optional[str] = None, client: Optional[str] = None,
): ):
_initialize_config() _initialize_config()
user = request.user.object user = request.user.object
@ -187,13 +191,14 @@ if not state.demo:
return {"status": "ok"} return {"status": "ok"}
@api.post("/config/data/content_type/notion", status_code=200)
@requires(["authenticated"]) @api.post("/config/data/content_type/notion", status_code=200)
async def set_content_config_notion_data( @requires(["authenticated"])
async def set_content_config_notion_data(
request: Request, request: Request,
updated_config: Union[NotionContentConfig, None], updated_config: Union[NotionContentConfig, None],
client: Optional[str] = None, client: Optional[str] = None,
): ):
_initialize_config() _initialize_config()
user = request.user.object user = request.user.object
@ -213,13 +218,14 @@ if not state.demo:
return {"status": "ok"} return {"status": "ok"}
@api.post("/delete/config/data/content_type/{content_type}", status_code=200)
@requires(["authenticated"]) @api.delete("/config/data/content_type/{content_type}", status_code=200)
async def remove_content_config_data( @requires(["authenticated"])
async def remove_content_config_data(
request: Request, request: Request,
content_type: str, content_type: str,
client: Optional[str] = None, client: Optional[str] = None,
): ):
user = request.user.object user = request.user.object
update_telemetry_state( update_telemetry_state(
@ -240,38 +246,73 @@ if not state.demo:
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user) enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
return {"status": "ok"} return {"status": "ok"}
@api.post("/config/data/content_type/{content_type}", status_code=200)
@requires(["authenticated"]) @api.delete("/config/data/file", status_code=200)
async def set_content_config_data( @requires(["authenticated"])
async def remove_file_data(
request: Request, request: Request,
content_type: str, filename: str,
updated_config: Union[TextContentConfig, None],
client: Optional[str] = None, client: Optional[str] = None,
): ):
_initialize_config()
user = request.user.object user = request.user.object
content_object = map_config_to_object(content_type)
await adapters.set_text_content_config(user, content_object, updated_config)
update_telemetry_state( update_telemetry_state(
request=request, request=request,
telemetry_type="api", telemetry_type="api",
api="set_content_config", api="delete_file",
client=client, client=client,
metadata={"content_type": content_type},
) )
await EntryAdapters.adelete_entry_by_file(user, filename)
return {"status": "ok"} return {"status": "ok"}
@api.post("/config/data/conversation/model", status_code=200)
@requires(["authenticated"]) @api.get("/config/data/all", response_model=List[str])
async def update_chat_model( @requires(["authenticated"])
async def get_all_filenames(
request: Request,
client: Optional[str] = None,
):
user = request.user.object
update_telemetry_state(
request=request,
telemetry_type="api",
api="get_all_filenames",
client=client,
)
return await sync_to_async(list)(EntryAdapters.aget_all_filenames(user))
@api.delete("/config/data/all", status_code=200)
@requires(["authenticated"])
async def remove_all_config_data(
request: Request,
client: Optional[str] = None,
):
user = request.user.object
update_telemetry_state(
request=request,
telemetry_type="api",
api="delete_all_config",
client=client,
)
await EntryAdapters.adelete_all_entries(user)
return {"status": "ok"}
@api.post("/config/data/conversation/model", status_code=200)
@requires(["authenticated"])
async def update_chat_model(
request: Request, request: Request,
id: str, id: str,
client: Optional[str] = None, client: Optional[str] = None,
): ):
user = request.user.object user = request.user.object
new_config = await ConversationAdapters.aset_user_conversation_processor(user, int(id)) new_config = await ConversationAdapters.aset_user_conversation_processor(user, int(id))
@ -377,6 +418,7 @@ async def search(
SearchType.Github, SearchType.Github,
SearchType.Notion, SearchType.Notion,
SearchType.Plaintext, SearchType.Plaintext,
SearchType.Pdf,
]: ]:
# query markdown notes # query markdown notes
search_futures += [ search_futures += [

View file

@ -38,7 +38,6 @@ def index(request: Request):
"chat.html", "chat.html",
context={ context={
"request": request, "request": request,
"demo": state.demo,
"username": user.username, "username": user.username,
"user_photo": user_picture, "user_photo": user_picture,
}, },
@ -55,7 +54,6 @@ def index_post(request: Request):
"chat.html", "chat.html",
context={ context={
"request": request, "request": request,
"demo": state.demo,
"username": user.username, "username": user.username,
"user_photo": user_picture, "user_photo": user_picture,
}, },
@ -72,7 +70,6 @@ def search_page(request: Request):
"search.html", "search.html",
context={ context={
"request": request, "request": request,
"demo": state.demo,
"username": user.username, "username": user.username,
"user_photo": user_picture, "user_photo": user_picture,
}, },
@ -89,7 +86,6 @@ def chat_page(request: Request):
"chat.html", "chat.html",
context={ context={
"request": request, "request": request,
"demo": state.demo,
"username": user.username, "username": user.username,
"user_photo": user_picture, "user_photo": user_picture,
}, },
@ -107,7 +103,6 @@ def login_page(request: Request):
"login.html", "login.html",
context={ context={
"request": request, "request": request,
"demo": state.demo,
"google_client_id": google_client_id, "google_client_id": google_client_id,
"redirect_uri": redirect_uri, "redirect_uri": redirect_uri,
}, },
@ -125,11 +120,9 @@ def map_config_to_object(content_type: str):
return LocalPlaintextConfig return LocalPlaintextConfig
if not state.demo: @web_client.get("/config", response_class=HTMLResponse)
@requires(["authenticated"], redirect="login_page")
@web_client.get("/config", response_class=HTMLResponse) def config_page(request: Request):
@requires(["authenticated"], redirect="login_page")
def config_page(request: Request):
user = request.user.object user = request.user.object
user_picture = request.session.get("user", {}).get("picture") user_picture = request.session.get("user", {}).get("picture")
enabled_content = set(EntryAdapters.get_unique_file_types(user).all()) enabled_content = set(EntryAdapters.get_unique_file_types(user).all())
@ -154,9 +147,7 @@ if not state.demo:
conversation_options = ConversationAdapters.get_conversation_processor_options().all() conversation_options = ConversationAdapters.get_conversation_processor_options().all()
all_conversation_options = list() all_conversation_options = list()
for conversation_option in conversation_options: for conversation_option in conversation_options:
all_conversation_options.append( all_conversation_options.append({"chat_model": conversation_option.chat_model, "id": conversation_option.id})
{"chat_model": conversation_option.chat_model, "id": conversation_option.id}
)
selected_conversation_config = ConversationAdapters.get_conversation_config(user) selected_conversation_config = ConversationAdapters.get_conversation_config(user)
@ -168,16 +159,15 @@ if not state.demo:
"anonymous_mode": state.anonymous_mode, "anonymous_mode": state.anonymous_mode,
"username": user.username if user else None, "username": user.username if user else None,
"conversation_options": all_conversation_options, "conversation_options": all_conversation_options,
"selected_conversation_config": selected_conversation_config.id "selected_conversation_config": selected_conversation_config.id if selected_conversation_config else None,
if selected_conversation_config
else None,
"user_photo": user_picture, "user_photo": user_picture,
}, },
) )
@web_client.get("/config/content_type/github", response_class=HTMLResponse)
@requires(["authenticated"], redirect="login_page") @web_client.get("/config/content_type/github", response_class=HTMLResponse)
def github_config_page(request: Request): @requires(["authenticated"], redirect="login_page")
def github_config_page(request: Request):
user = request.user.object user = request.user.object
user_picture = request.session.get("user", {}).get("picture") user_picture = request.session.get("user", {}).get("picture")
current_github_config = get_user_github_config(user) current_github_config = get_user_github_config(user)
@ -211,9 +201,10 @@ if not state.demo:
}, },
) )
@web_client.get("/config/content_type/notion", response_class=HTMLResponse)
@requires(["authenticated"], redirect="login_page") @web_client.get("/config/content_type/notion", response_class=HTMLResponse)
def notion_config_page(request: Request): @requires(["authenticated"], redirect="login_page")
def notion_config_page(request: Request):
user = request.user.object user = request.user.object
user_picture = request.session.get("user", {}).get("picture") user_picture = request.session.get("user", {}).get("picture")
current_notion_config = get_user_notion_config(user) current_notion_config = get_user_notion_config(user)
@ -234,9 +225,10 @@ if not state.demo:
}, },
) )
@web_client.get("/config/content_type/{content_type}", response_class=HTMLResponse)
@requires(["authenticated"], redirect="login_page") @web_client.get("/config/content_type/{content_type}", response_class=HTMLResponse)
def content_config_page(request: Request, content_type: str): @requires(["authenticated"], redirect="login_page")
def content_config_page(request: Request, content_type: str):
if content_type not in VALID_TEXT_CONTENT_TYPES: if content_type not in VALID_TEXT_CONTENT_TYPES:
return templates.TemplateResponse("config.html", context={"request": request}) return templates.TemplateResponse("config.html", context={"request": request})

View file

@ -42,7 +42,6 @@ def cli(args=None):
parser.add_argument( parser.add_argument(
"--disable-chat-on-gpu", action="store_true", default=False, help="Disable using GPU for the offline chat model" "--disable-chat-on-gpu", action="store_true", default=False, help="Disable using GPU for the offline chat model"
) )
parser.add_argument("--demo", action="store_true", default=False, help="Run Khoj in demo mode")
parser.add_argument( parser.add_argument(
"--anonymous-mode", "--anonymous-mode",
action="store_true", action="store_true",

View file

@ -31,7 +31,6 @@ config_lock = threading.Lock()
chat_lock = threading.Lock() chat_lock = threading.Lock()
SearchType = utils_config.SearchType SearchType = utils_config.SearchType
telemetry: List[Dict[str, str]] = [] telemetry: List[Dict[str, str]] = []
demo: bool = False
khoj_version: str = None khoj_version: str = None
device = get_device() device = get_device()
chat_on_gpu: bool = True chat_on_gpu: bool = True

BIN
tests/data/pdf/ocr_samples.pdf vendored Normal file

Binary file not shown.

View file

@ -50,6 +50,23 @@ def test_multi_page_pdf_to_jsonl():
assert len(jsonl_data) == 6 assert len(jsonl_data) == 6
def test_ocr_page_pdf_to_jsonl():
"Convert multiple pages from single PDF file to jsonl."
# Act
# Extract Entries from specified Pdf files
with open("tests/data/pdf/ocr_samples.pdf", "rb") as f:
pdf_bytes = f.read()
data = {"tests/data/pdf/ocr_samples.pdf": pdf_bytes}
entries, entry_to_file_map = PdfToEntries.extract_pdf_entries(pdf_files=data)
# Process Each Entry from All Pdf Files
entries = PdfToEntries.convert_pdf_entries_to_maps(entries, entry_to_file_map)
assert len(entries) == 1
assert "playing on a strip of marsh" in entries[0].raw
def test_get_pdf_files(tmp_path): def test_get_pdf_files(tmp_path):
"Ensure Pdf files specified via input-filter, input-files extracted" "Ensure Pdf files specified via input-filter, input-files extracted"
# Arrange # Arrange