mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Merge pull request #534 from khoj-ai/features/code-config-cleanup
Small fixes and update config UI to manage indexed data
This commit is contained in:
commit
81a615d7dd
26 changed files with 538 additions and 800 deletions
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
|
@ -61,7 +61,7 @@ jobs:
|
|||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
run: |
|
||||
apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0
|
||||
apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
|
||||
|
||||
- name: ⬇️ Install Postgres
|
||||
env:
|
||||
|
|
|
@ -4,7 +4,7 @@ FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
|
|||
LABEL org.opencontainers.image.source https://github.com/khoj-ai/khoj
|
||||
|
||||
# Install System Dependencies
|
||||
RUN apt update -y && apt -y install python3-pip git
|
||||
RUN apt update -y && apt -y install python3-pip git libsqlite3-0 ffmpeg libsm6 libxext6
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
|
|
@ -73,6 +73,7 @@ dependencies = [
|
|||
"gunicorn == 21.2.0",
|
||||
"lxml == 4.9.3",
|
||||
"tzdata == 2023.3",
|
||||
"rapidocr-onnxruntime == 1.3.8"
|
||||
]
|
||||
dynamic = ["version"]
|
||||
|
||||
|
|
|
@ -291,8 +291,11 @@ class EntryAdapters:
|
|||
return deleted_count
|
||||
|
||||
@staticmethod
|
||||
def delete_all_entries(user: KhojUser, file_type: str):
|
||||
deleted_count, _ = Entry.objects.filter(user=user, file_type=file_type).delete()
|
||||
def delete_all_entries(user: KhojUser, file_type: str = None):
|
||||
if file_type is None:
|
||||
deleted_count, _ = Entry.objects.filter(user=user).delete()
|
||||
else:
|
||||
deleted_count, _ = Entry.objects.filter(user=user, file_type=file_type).delete()
|
||||
return deleted_count
|
||||
|
||||
@staticmethod
|
||||
|
@ -314,6 +317,18 @@ class EntryAdapters:
|
|||
async def user_has_entries(user: KhojUser):
|
||||
return await Entry.objects.filter(user=user).aexists()
|
||||
|
||||
@staticmethod
|
||||
async def adelete_entry_by_file(user: KhojUser, file_path: str):
|
||||
return await Entry.objects.filter(user=user, file_path=file_path).adelete()
|
||||
|
||||
@staticmethod
|
||||
def aget_all_filenames(user: KhojUser):
|
||||
return Entry.objects.filter(user=user).distinct("file_path").values_list("file_path", flat=True)
|
||||
|
||||
@staticmethod
|
||||
async def adelete_all_entries(user: KhojUser):
|
||||
return await Entry.objects.filter(user=user).adelete()
|
||||
|
||||
@staticmethod
|
||||
def apply_filters(user: KhojUser, query: str, file_type_filter: str = None):
|
||||
q_filter_terms = Q()
|
||||
|
|
|
@ -103,21 +103,6 @@ img.khoj-logo {
|
|||
justify-self: center;
|
||||
}
|
||||
|
||||
a.khoj-banner {
|
||||
color: black;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
p.khoj-banner {
|
||||
font-size: small;
|
||||
margin: 0;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
p#khoj-banner {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
@media only screen and (max-width: 600px) {
|
||||
div.khoj-header {
|
||||
display: grid;
|
||||
|
|
|
@ -274,8 +274,9 @@
|
|||
}
|
||||
</script>
|
||||
<body>
|
||||
<div id="khoj-banner-container" class="khoj-banner-container">
|
||||
<div id="khoj-empty-container" class="khoj-empty-container">
|
||||
</div>
|
||||
|
||||
<!--Add Header Logo and Nav Pane-->
|
||||
<div class="khoj-header">
|
||||
<a class="khoj-logo" href="/">
|
||||
|
@ -454,6 +455,11 @@
|
|||
border-bottom: 1px dotted #475569;
|
||||
}
|
||||
|
||||
div.khoj-empty-container {
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
@media (pointer: coarse), (hover: none) {
|
||||
abbr[title] {
|
||||
position: relative;
|
||||
|
@ -490,12 +496,6 @@
|
|||
margin: 4px;
|
||||
grid-template-columns: auto;
|
||||
}
|
||||
a.khoj-banner {
|
||||
display: block;
|
||||
}
|
||||
p.khoj-banner {
|
||||
padding: 0;
|
||||
}
|
||||
}
|
||||
@media only screen and (min-width: 600px) {
|
||||
body {
|
||||
|
@ -507,11 +507,6 @@
|
|||
}
|
||||
}
|
||||
|
||||
div.khoj-banner-container {
|
||||
padding: 0px;
|
||||
margin: 0px;
|
||||
}
|
||||
|
||||
div#chat-tooltip {
|
||||
text-align: left;
|
||||
font-size: medium;
|
||||
|
@ -533,23 +528,6 @@
|
|||
text-align: center;
|
||||
}
|
||||
|
||||
button#khoj-banner-submit,
|
||||
input#khoj-banner-email {
|
||||
padding: 10px;
|
||||
border-radius: 5px;
|
||||
border: 1px solid #475569;
|
||||
background: #f9fafc;
|
||||
}
|
||||
|
||||
button#khoj-banner-submit:hover,
|
||||
input#khoj-banner-email:hover {
|
||||
box-shadow: 0 0 11px #aaa;
|
||||
}
|
||||
div.khoj-banner-container-hidden {
|
||||
margin: 0px;
|
||||
padding: 0px;
|
||||
}
|
||||
|
||||
div.programmatic-output {
|
||||
background-color: #f5f5f5;
|
||||
border: 1px solid #ddd;
|
||||
|
|
|
@ -362,35 +362,6 @@
|
|||
gap: 4px;
|
||||
}
|
||||
</style>
|
||||
<script>
|
||||
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
|
||||
khojBannerSubmit?.addEventListener("click", function(event) {
|
||||
event.preventDefault();
|
||||
var email = document.getElementById("khoj-banner-email").value;
|
||||
fetch("https://app.khoj.dev/beta/users/", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
email: email
|
||||
}),
|
||||
headers: {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
}).then(function(response) {
|
||||
return response.json();
|
||||
}).then(function(data) {
|
||||
console.log(data);
|
||||
if (data.user != null) {
|
||||
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
|
||||
document.getElementById("khoj-banner-submit").remove();
|
||||
} else {
|
||||
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
|
||||
}
|
||||
}).catch(function(error) {
|
||||
console.log(error);
|
||||
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
|
||||
});
|
||||
});
|
||||
</script>
|
||||
<script src="./renderer.js"></script>
|
||||
|
||||
</html>
|
||||
|
|
|
@ -436,14 +436,6 @@
|
|||
max-width: 90%;
|
||||
}
|
||||
|
||||
div.khoj-banner-container {
|
||||
background: linear-gradient(-45deg, #FFC107, #FF9800, #FF5722, #FF9800, #FFC107);
|
||||
background-size: 400% 400%;
|
||||
animation: gradient 15s ease infinite;
|
||||
text-align: center;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
@keyframes gradient {
|
||||
0% {
|
||||
background-position: 0% 50%;
|
||||
|
@ -460,57 +452,5 @@
|
|||
text-align: center;
|
||||
}
|
||||
|
||||
button#khoj-banner-submit,
|
||||
input#khoj-banner-email {
|
||||
padding: 10px;
|
||||
border-radius: 5px;
|
||||
border: 1px solid #475569;
|
||||
background: #f9fafc;
|
||||
}
|
||||
|
||||
button#khoj-banner-submit:hover,
|
||||
input#khoj-banner-email:hover {
|
||||
box-shadow: 0 0 11px #aaa;
|
||||
}
|
||||
|
||||
@media only screen and (max-width: 600px) {
|
||||
a.khoj-banner {
|
||||
display: block;
|
||||
}
|
||||
p.khoj-banner {
|
||||
padding: 0;
|
||||
}
|
||||
}
|
||||
|
||||
</style>
|
||||
<script>
|
||||
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
|
||||
khojBannerSubmit?.addEventListener("click", function(event) {
|
||||
event.preventDefault();
|
||||
var email = document.getElementById("khoj-banner-email").value;
|
||||
fetch("https://app.khoj.dev/beta/users/", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
email: email
|
||||
}),
|
||||
headers: {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
}).then(function(response) {
|
||||
return response.json();
|
||||
}).then(function(data) {
|
||||
console.log(data);
|
||||
if (data.user != null) {
|
||||
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
|
||||
document.getElementById("khoj-banner-submit").remove();
|
||||
} else {
|
||||
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
|
||||
}
|
||||
}).catch(function(error) {
|
||||
console.log(error);
|
||||
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
|
||||
});
|
||||
});
|
||||
</script>
|
||||
|
||||
</html>
|
||||
|
|
|
@ -159,24 +159,22 @@ def configure_middleware(app):
|
|||
app.add_middleware(SessionMiddleware, secret_key=os.environ.get("KHOJ_DJANGO_SECRET_KEY", "!secret"))
|
||||
|
||||
|
||||
if not state.demo:
|
||||
|
||||
@schedule.repeat(schedule.every(61).minutes)
|
||||
def update_search_index():
|
||||
try:
|
||||
logger.info("📬 Updating content index via Scheduler")
|
||||
for user in get_all_users():
|
||||
all_files = collect_files(user=user)
|
||||
state.content_index = configure_content(
|
||||
state.content_index, state.config.content_type, all_files, state.search_models, user=user
|
||||
)
|
||||
all_files = collect_files(user=None)
|
||||
@schedule.repeat(schedule.every(61).minutes)
|
||||
def update_search_index():
|
||||
try:
|
||||
logger.info("📬 Updating content index via Scheduler")
|
||||
for user in get_all_users():
|
||||
all_files = collect_files(user=user)
|
||||
state.content_index = configure_content(
|
||||
state.content_index, state.config.content_type, all_files, state.search_models, user=None
|
||||
state.content_index, state.config.content_type, all_files, state.search_models, user=user
|
||||
)
|
||||
logger.info("📪 Content index updated via Scheduler")
|
||||
except Exception as e:
|
||||
logger.error(f"🚨 Error updating content index via Scheduler: {e}", exc_info=True)
|
||||
all_files = collect_files(user=None)
|
||||
state.content_index = configure_content(
|
||||
state.content_index, state.config.content_type, all_files, state.search_models, user=None
|
||||
)
|
||||
logger.info("📪 Content index updated via Scheduler")
|
||||
except Exception as e:
|
||||
logger.error(f"🚨 Error updating content index via Scheduler: {e}", exc_info=True)
|
||||
|
||||
|
||||
def configure_search_types(config: FullConfig):
|
||||
|
|
|
@ -106,21 +106,6 @@ img.khoj-logo {
|
|||
justify-self: center;
|
||||
}
|
||||
|
||||
a.khoj-banner {
|
||||
color: black;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
p.khoj-banner {
|
||||
font-size: medium;
|
||||
margin: 0;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
p#khoj-banner {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
/* Dropdown in navigation menu*/
|
||||
#khoj-nav-menu-container {
|
||||
display: flex;
|
||||
|
|
|
@ -53,10 +53,10 @@
|
|||
justify-self: center;
|
||||
}
|
||||
|
||||
.api-settings {
|
||||
div.section-manage-files,
|
||||
div.api-settings {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr;
|
||||
grid-template-rows: 1fr 1fr auto;
|
||||
justify-items: start;
|
||||
gap: 8px;
|
||||
padding: 24px 24px;
|
||||
|
@ -64,13 +64,23 @@
|
|||
border: 1px solid rgb(229, 229, 229);
|
||||
border-radius: 4px;
|
||||
box-shadow: 0px 1px 3px 0px rgba(0,0,0,0.1),0px 1px 2px -1px rgba(0,0,0,0.8);
|
||||
}
|
||||
#api-settings-card-description {
|
||||
}
|
||||
|
||||
div.section-manage-files {
|
||||
width: 640px;
|
||||
}
|
||||
|
||||
div.api-settings {
|
||||
grid-template-rows: 1fr 1fr auto;
|
||||
}
|
||||
|
||||
#api-settings-card-description {
|
||||
margin: 8px 0 0 0;
|
||||
}
|
||||
#api-settings-keys-table {
|
||||
}
|
||||
|
||||
#api-settings-keys-table {
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
}
|
||||
|
||||
div.instructions {
|
||||
font-size: large;
|
||||
|
@ -184,6 +194,37 @@
|
|||
text-align: left;
|
||||
}
|
||||
|
||||
button.remove-file-button:hover {
|
||||
background-color: rgb(255 235 235);
|
||||
border-radius: 3px;
|
||||
border: none;
|
||||
color: var(--flower);
|
||||
padding: 4px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
button.remove-file-button {
|
||||
background-color: rgb(253 214 214);
|
||||
border-radius: 3px;
|
||||
border: none;
|
||||
color: var(--flower);
|
||||
padding: 4px;
|
||||
}
|
||||
|
||||
div.file-element {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr auto;
|
||||
border: 1px solid rgb(229, 229, 229);
|
||||
border-radius: 4px;
|
||||
box-shadow: 0px 1px 3px 0px rgba(0,0,0,0.1),0px 1px 2px -1px rgba(0,0,0,0.8);
|
||||
padding: 4px;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
div.remove-button-container {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
button.card-button.happy {
|
||||
color: var(--leaf);
|
||||
}
|
||||
|
@ -246,6 +287,11 @@
|
|||
cursor: pointer;
|
||||
}
|
||||
|
||||
a {
|
||||
color: #3b82f6;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
@media screen and (max-width: 700px) {
|
||||
.section-cards {
|
||||
grid-template-columns: 1fr;
|
||||
|
@ -255,7 +301,7 @@
|
|||
body {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr;
|
||||
grid-template-rows: 1fr auto auto auto minmax(80px, 100%);
|
||||
grid-template-rows: 1fr repeat(4, auto);
|
||||
}
|
||||
body > * {
|
||||
grid-column: 1;
|
||||
|
@ -281,9 +327,14 @@
|
|||
grid-template-columns: auto;
|
||||
}
|
||||
|
||||
div.section-manage-files,
|
||||
div.api-settings {
|
||||
width: auto;
|
||||
}
|
||||
|
||||
div.finalize-buttons {
|
||||
padding: 0;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</html>
|
||||
|
|
|
@ -165,7 +165,7 @@
|
|||
|
||||
function incrementalChat(event) {
|
||||
if (!event.shiftKey && event.key === 'Enter') {
|
||||
e.preventDefault();
|
||||
event.preventDefault();
|
||||
chat();
|
||||
}
|
||||
}
|
||||
|
@ -261,17 +261,7 @@
|
|||
}
|
||||
</script>
|
||||
<body>
|
||||
<div id="khoj-banner-container" class="khoj-banner-container">
|
||||
{% if demo %}
|
||||
<!-- Banner linking to https://khoj.dev -->
|
||||
<a class="khoj-banner" href="https://khoj.dev" target="_blank">
|
||||
<p id="khoj-banner" class="khoj-banner">
|
||||
Enroll in Khoj cloud to get your own assistant
|
||||
</p>
|
||||
</a>
|
||||
<input type="text" id="khoj-banner-email" placeholder="email" class="khoj-banner-email"></input>
|
||||
<button id="khoj-banner-submit" class="khoj-banner-button">Submit</button>
|
||||
{% endif %}
|
||||
<div id="khoj-empty-container" class="khoj-empty-container">
|
||||
</div>
|
||||
|
||||
<!--Add Header Logo and Nav Pane-->
|
||||
|
@ -480,12 +470,6 @@
|
|||
margin: 4px;
|
||||
grid-template-columns: auto;
|
||||
}
|
||||
a.khoj-banner {
|
||||
display: block;
|
||||
}
|
||||
p.khoj-banner {
|
||||
padding: 0;
|
||||
}
|
||||
}
|
||||
@media only screen and (min-width: 700px) {
|
||||
body {
|
||||
|
@ -497,14 +481,6 @@
|
|||
}
|
||||
}
|
||||
|
||||
div.khoj-banner-container {
|
||||
background: linear-gradient(-45deg, #FFC107, #FF9800, #FF5722, #FF9800, #FFC107);
|
||||
background-size: 400% 400%;
|
||||
animation: gradient 15s ease infinite;
|
||||
text-align: center;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
div#chat-tooltip {
|
||||
text-align: left;
|
||||
font-size: medium;
|
||||
|
@ -526,19 +502,7 @@
|
|||
text-align: center;
|
||||
}
|
||||
|
||||
button#khoj-banner-submit,
|
||||
input#khoj-banner-email {
|
||||
padding: 10px;
|
||||
border-radius: 5px;
|
||||
border: 1px solid var(--main-text-color);
|
||||
background: #f9fafc;
|
||||
}
|
||||
|
||||
button#khoj-banner-submit:hover,
|
||||
input#khoj-banner-email:hover {
|
||||
box-shadow: 0 0 11px #aaa;
|
||||
}
|
||||
div.khoj-banner-container-hidden {
|
||||
div.khoj-empty-container {
|
||||
margin: 0px;
|
||||
padding: 0px;
|
||||
}
|
||||
|
@ -558,39 +522,4 @@
|
|||
white-space: pre-wrap;
|
||||
}
|
||||
</style>
|
||||
<script>
|
||||
if ("{{demo}}" === "False") {
|
||||
document.getElementById("khoj-banner-container").classList.remove("khoj-banner-container");
|
||||
document.getElementById("khoj-banner-container").classList.add("khoj-banner-container-hidden");
|
||||
}
|
||||
|
||||
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
|
||||
|
||||
khojBannerSubmit?.addEventListener("click", function(event) {
|
||||
event.preventDefault();
|
||||
var email = document.getElementById("khoj-banner-email").value;
|
||||
fetch("https://app.khoj.dev/beta/users/", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
email: email
|
||||
}),
|
||||
headers: {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
}).then(function(response) {
|
||||
return response.json();
|
||||
}).then(function(data) {
|
||||
console.log(data);
|
||||
if (data.user != null) {
|
||||
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
|
||||
document.getElementById("khoj-banner-submit").remove();
|
||||
} else {
|
||||
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
|
||||
}
|
||||
}).catch(function(error) {
|
||||
console.log(error);
|
||||
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</html>
|
||||
|
|
|
@ -67,130 +67,6 @@
|
|||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="card-title-row">
|
||||
<img class="card-icon" src="/static/assets/icons/markdown.svg" alt="markdown">
|
||||
<h3 class="card-title">
|
||||
Markdown
|
||||
{% if current_model_state.markdown == True%}
|
||||
<img id="configured-icon-markdown" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
|
||||
{% endif %}
|
||||
</h3>
|
||||
</div>
|
||||
<div class="card-description-row">
|
||||
<p class="card-description">Set markdown files to index</p>
|
||||
</div>
|
||||
<div class="card-action-row">
|
||||
<a class="card-button" href="/config/content_type/markdown">
|
||||
{% if current_model_state.markdown %}
|
||||
Update
|
||||
{% else %}
|
||||
Setup
|
||||
{% endif %}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
|
||||
</a>
|
||||
</div>
|
||||
{% if current_model_state.markdown %}
|
||||
<div id="clear-markdown" class="card-action-row">
|
||||
<button class="card-button" onclick="clearContentType('markdown')">
|
||||
Disable
|
||||
</button>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="card-title-row">
|
||||
<img class="card-icon" src="/static/assets/icons/org.svg" alt="org">
|
||||
<h3 class="card-title">
|
||||
Org
|
||||
{% if current_model_state.org == True %}
|
||||
<img id="configured-icon-org" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
|
||||
{% endif %}
|
||||
</h3>
|
||||
</div>
|
||||
<div class="card-description-row">
|
||||
<p class="card-description">Set org files to index</p>
|
||||
</div>
|
||||
<div class="card-action-row">
|
||||
<a class="card-button" href="/config/content_type/org">
|
||||
{% if current_model_state.org %}
|
||||
Update
|
||||
{% else %}
|
||||
Setup
|
||||
{% endif %}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
|
||||
</a>
|
||||
</div>
|
||||
{% if current_model_state.org %}
|
||||
<div id="clear-org" class="card-action-row">
|
||||
<button class="card-button" onclick="clearContentType('org')">
|
||||
Disable
|
||||
</button>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="card-title-row">
|
||||
<img class="card-icon" src="/static/assets/icons/pdf.svg" alt="PDF">
|
||||
<h3 class="card-title">
|
||||
PDF
|
||||
{% if current_model_state.pdf == True %}
|
||||
<img id="configured-icon-pdf" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
|
||||
{% endif %}
|
||||
</h3>
|
||||
</div>
|
||||
<div class="card-description-row">
|
||||
<p class="card-description">Set PDF files to index</p>
|
||||
</div>
|
||||
<div class="card-action-row">
|
||||
<a class="card-button" href="/config/content_type/pdf">
|
||||
{% if current_model_state.pdf %}
|
||||
Update
|
||||
{% else %}
|
||||
Setup
|
||||
{% endif %}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
|
||||
</a>
|
||||
</div>
|
||||
{% if current_model_state.pdf %}
|
||||
<div id="clear-pdf" class="card-action-row">
|
||||
<button class="card-button" onclick="clearContentType('pdf')">
|
||||
Disable
|
||||
</button>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="card">
|
||||
<div class="card-title-row">
|
||||
<img class="card-icon" src="/static/assets/icons/plaintext.svg" alt="Plaintext">
|
||||
<h3 class="card-title">
|
||||
Plaintext
|
||||
{% if current_model_state.plaintext == True %}
|
||||
<img id="configured-icon-plaintext" class="configured-icon" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
|
||||
{% endif %}
|
||||
</h3>
|
||||
</div>
|
||||
<div class="card-description-row">
|
||||
<p class="card-description">Set Plaintext files to index</p>
|
||||
</div>
|
||||
<div class="card-action-row">
|
||||
<a class="card-button" href="/config/content_type/plaintext">
|
||||
{% if current_model_state.plaintext %}
|
||||
Update
|
||||
{% else %}
|
||||
Setup
|
||||
{% endif %}
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
|
||||
</a>
|
||||
</div>
|
||||
{% if current_model_state.plaintext %}
|
||||
<div id="clear-plaintext" class="card-action-row">
|
||||
<button class="card-button" onclick="clearContentType('plaintext')">
|
||||
Disable
|
||||
</button>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section">
|
||||
|
@ -246,6 +122,16 @@
|
|||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section">
|
||||
<h2 class="section-title">Manage Data</h2>
|
||||
<div class="section-manage-files">
|
||||
<div id="delete-all-files" class="delete-all=files">
|
||||
<button id="delete-all-files" type="submit" title="Delete all indexed files">🗑️ Remove All</button>
|
||||
</div>
|
||||
<div class="indexed-files">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="section general-settings">
|
||||
<div id="results-count" title="Number of items to show in search and use for chat response">
|
||||
<label for="results-count-slider">Results Count: <span id="results-count-value">5</span></label>
|
||||
|
@ -291,8 +177,8 @@
|
|||
};
|
||||
|
||||
function clearContentType(content_type) {
|
||||
fetch('/api/delete/config/data/content_type/' + content_type, {
|
||||
method: 'POST',
|
||||
fetch('/api/config/data/content_type/' + content_type, {
|
||||
method: 'DELETE',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
|
@ -462,5 +348,84 @@
|
|||
// List user's API keys on page load
|
||||
listApiKeys();
|
||||
|
||||
function removeFile(path) {
|
||||
fetch('/api/config/data/file?filename=' + path, {
|
||||
method: 'DELETE',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.status == "ok") {
|
||||
getAllFilenames();
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Get all currently indexed files
|
||||
function getAllFilenames() {
|
||||
fetch('/api/config/data/all')
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
var indexedFiles = document.getElementsByClassName("indexed-files")[0];
|
||||
indexedFiles.innerHTML = "";
|
||||
|
||||
if (data.length == 0) {
|
||||
document.getElementById("delete-all-files").style.display = "none";
|
||||
indexedFiles.innerHTML = "<div>Use the <a href='https://download.khoj.dev'>Khoj Desktop client</a> to index files.</div>";
|
||||
} else {
|
||||
document.getElementById("delete-all-files").style.display = "block";
|
||||
}
|
||||
|
||||
for (var filename of data) {
|
||||
let fileElement = document.createElement("div");
|
||||
fileElement.classList.add("file-element");
|
||||
|
||||
let fileNameElement = document.createElement("div");
|
||||
fileNameElement.classList.add("content-name");
|
||||
fileNameElement.innerHTML = filename;
|
||||
fileElement.appendChild(fileNameElement);
|
||||
|
||||
let buttonContainer = document.createElement("div");
|
||||
buttonContainer.classList.add("remove-button-container");
|
||||
let removeFileButton = document.createElement("button");
|
||||
removeFileButton.classList.add("remove-file-button");
|
||||
removeFileButton.innerHTML = "🗑️";
|
||||
removeFileButton.addEventListener("click", ((filename) => {
|
||||
return () => {
|
||||
removeFile(filename);
|
||||
};
|
||||
})(filename));
|
||||
buttonContainer.appendChild(removeFileButton);
|
||||
fileElement.appendChild(buttonContainer);
|
||||
indexedFiles.appendChild(fileElement);
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Error:', error);
|
||||
});
|
||||
}
|
||||
|
||||
// Get all currently indexed files on page load
|
||||
getAllFilenames();
|
||||
|
||||
let deleteAllFilesButton = document.getElementById("delete-all-files");
|
||||
deleteAllFilesButton.addEventListener("click", function(event) {
|
||||
event.preventDefault();
|
||||
fetch('/api/config/data/all', {
|
||||
method: 'DELETE',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.status == "ok") {
|
||||
getAllFilenames();
|
||||
}
|
||||
})
|
||||
});
|
||||
|
||||
</script>
|
||||
{% endblock %}
|
||||
|
|
|
@ -10,18 +10,6 @@
|
|||
</head>
|
||||
|
||||
<body>
|
||||
{% if demo %}
|
||||
<!-- Banner linking to https://khoj.dev -->
|
||||
<div class="khoj-banner-container">
|
||||
<a class="khoj-banner" href="https://khoj.dev" target="_blank">
|
||||
<p id="khoj-banner" class="khoj-banner">
|
||||
Enroll in Khoj cloud to get your own assistant
|
||||
</p>
|
||||
</a>
|
||||
<input type="text" id="khoj-banner-email" placeholder="email" class="khoj-banner-email"></input>
|
||||
<button id="khoj-banner-submit" class="khoj-banner-button">Submit</button>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="khoj-header"></div>
|
||||
|
||||
<!-- Login Modal -->
|
||||
|
@ -106,19 +94,6 @@
|
|||
justify-self: center;
|
||||
}
|
||||
|
||||
button#khoj-banner-submit,
|
||||
input#khoj-banner-email {
|
||||
padding: 10px;
|
||||
border-radius: 5px;
|
||||
border: 1px solid #475569;
|
||||
background: #f9fafc;
|
||||
}
|
||||
|
||||
button#khoj-banner-submit:hover,
|
||||
input#khoj-banner-email:hover {
|
||||
box-shadow: 0 0 11px #aaa;
|
||||
}
|
||||
|
||||
div#login-modal {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr;
|
||||
|
@ -143,12 +118,6 @@
|
|||
}
|
||||
|
||||
@media only screen and (max-width: 700px) {
|
||||
a.khoj-banner {
|
||||
display: block;
|
||||
}
|
||||
p.khoj-banner {
|
||||
padding: 0;
|
||||
}
|
||||
div#login-modal {
|
||||
margin-left: 10%;
|
||||
margin-right: 10%;
|
||||
|
@ -156,34 +125,5 @@
|
|||
}
|
||||
|
||||
</style>
|
||||
<script>
|
||||
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
|
||||
khojBannerSubmit?.addEventListener("click", function(event) {
|
||||
event.preventDefault();
|
||||
var email = document.getElementById("khoj-banner-email").value;
|
||||
fetch("https://app.khoj.dev/beta/users/", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
email: email
|
||||
}),
|
||||
headers: {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
}).then(function(response) {
|
||||
return response.json();
|
||||
}).then(function(data) {
|
||||
console.log(data);
|
||||
if (data.user != null) {
|
||||
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
|
||||
document.getElementById("khoj-banner-submit").remove();
|
||||
} else {
|
||||
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
|
||||
}
|
||||
}).catch(function(error) {
|
||||
console.log(error);
|
||||
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
|
||||
});
|
||||
});
|
||||
</script>
|
||||
<script src="https://accounts.google.com/gsi/client" async defer></script>
|
||||
</html>
|
||||
|
|
|
@ -270,19 +270,6 @@
|
|||
</script>
|
||||
|
||||
<body>
|
||||
{% if demo %}
|
||||
<!-- Banner linking to https://khoj.dev -->
|
||||
<div class="khoj-banner-container">
|
||||
<a class="khoj-banner" href="https://khoj.dev" target="_blank">
|
||||
<p id="khoj-banner" class="khoj-banner">
|
||||
Enroll in Khoj cloud to get your own assistant
|
||||
</p>
|
||||
</a>
|
||||
<input type="text" id="khoj-banner-email" placeholder="email" class="khoj-banner-email"></input>
|
||||
<button id="khoj-banner-submit" class="khoj-banner-button">Submit</button>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!--Add Header Logo and Nav Pane-->
|
||||
{% import 'utils.html' as utils %}
|
||||
{{ utils.heading_pane(user_photo, username) }}
|
||||
|
@ -458,14 +445,6 @@
|
|||
max-width: 90%;
|
||||
}
|
||||
|
||||
div.khoj-banner-container {
|
||||
background: linear-gradient(-45deg, #FFC107, #FF9800, #FF5722, #FF9800, #FFC107);
|
||||
background-size: 400% 400%;
|
||||
animation: gradient 15s ease infinite;
|
||||
text-align: center;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
@keyframes gradient {
|
||||
0% {
|
||||
background-position: 0% 50%;
|
||||
|
@ -482,57 +461,6 @@
|
|||
text-align: center;
|
||||
}
|
||||
|
||||
button#khoj-banner-submit,
|
||||
input#khoj-banner-email {
|
||||
padding: 10px;
|
||||
border-radius: 5px;
|
||||
border: 1px solid var(--main-text-color);
|
||||
background: #f9fafc;
|
||||
}
|
||||
|
||||
button#khoj-banner-submit:hover,
|
||||
input#khoj-banner-email:hover {
|
||||
box-shadow: 0 0 11px #aaa;
|
||||
}
|
||||
|
||||
@media only screen and (max-width: 700px) {
|
||||
a.khoj-banner {
|
||||
display: block;
|
||||
}
|
||||
p.khoj-banner {
|
||||
padding: 0;
|
||||
}
|
||||
}
|
||||
|
||||
</style>
|
||||
<script>
|
||||
var khojBannerSubmit = document.getElementById("khoj-banner-submit");
|
||||
khojBannerSubmit?.addEventListener("click", function(event) {
|
||||
event.preventDefault();
|
||||
var email = document.getElementById("khoj-banner-email").value;
|
||||
fetch("https://app.khoj.dev/beta/users/", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
email: email
|
||||
}),
|
||||
headers: {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
}).then(function(response) {
|
||||
return response.json();
|
||||
}).then(function(data) {
|
||||
console.log(data);
|
||||
if (data.user != null) {
|
||||
document.getElementById("khoj-banner").innerHTML = "Thanks for signing up. We'll be in touch soon! 🚀";
|
||||
document.getElementById("khoj-banner-submit").remove();
|
||||
} else {
|
||||
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
|
||||
}
|
||||
}).catch(function(error) {
|
||||
console.log(error);
|
||||
document.getElementById("khoj-banner").innerHTML = "There was an error signing up. Please contact team@khoj.dev";
|
||||
});
|
||||
});
|
||||
</script>
|
||||
|
||||
</html>
|
||||
|
|
|
@ -119,7 +119,6 @@ def set_state(args):
|
|||
state.verbose = args.verbose
|
||||
state.host = args.host
|
||||
state.port = args.port
|
||||
state.demo = args.demo
|
||||
state.anonymous_mode = args.anonymous_mode
|
||||
state.khoj_version = version("khoj-assistant")
|
||||
state.chat_on_gpu = args.chat_on_gpu
|
||||
|
|
|
@ -68,13 +68,17 @@ class PdfToEntries(TextToEntries):
|
|||
with open(f"{tmp_file}", "wb") as f:
|
||||
bytes = pdf_files[pdf_file]
|
||||
f.write(bytes)
|
||||
loader = PyMuPDFLoader(f"{tmp_file}")
|
||||
pdf_entries_per_file = [page.page_content for page in loader.load()]
|
||||
try:
|
||||
loader = PyMuPDFLoader(f"{tmp_file}", extract_images=True)
|
||||
pdf_entries_per_file = [page.page_content for page in loader.load()]
|
||||
except ImportError:
|
||||
loader = PyMuPDFLoader(f"{tmp_file}")
|
||||
pdf_entries_per_file = [page.page_content for page in loader.load()]
|
||||
entry_to_location_map += zip(pdf_entries_per_file, [pdf_file] * len(pdf_entries_per_file))
|
||||
entries.extend(pdf_entries_per_file)
|
||||
except Exception as e:
|
||||
logger.warning(f"Unable to process file: {pdf_file}. This file will not be indexed.")
|
||||
logger.warning(e)
|
||||
logger.warning(e, exc_info=True)
|
||||
finally:
|
||||
if os.path.exists(f"{tmp_file}"):
|
||||
os.remove(f"{tmp_file}")
|
||||
|
|
|
@ -45,7 +45,15 @@ from fastapi.requests import Request
|
|||
|
||||
from database import adapters
|
||||
from database.adapters import EntryAdapters, ConversationAdapters
|
||||
from database.models import LocalMarkdownConfig, LocalOrgConfig, LocalPdfConfig, LocalPlaintextConfig, KhojUser
|
||||
from database.models import (
|
||||
LocalMarkdownConfig,
|
||||
LocalOrgConfig,
|
||||
LocalPdfConfig,
|
||||
LocalPlaintextConfig,
|
||||
KhojUser,
|
||||
GithubConfig,
|
||||
NotionConfig,
|
||||
)
|
||||
|
||||
|
||||
# Initialize Router
|
||||
|
@ -54,14 +62,10 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
def map_config_to_object(content_type: str):
|
||||
if content_type == "org":
|
||||
return LocalOrgConfig
|
||||
if content_type == "markdown":
|
||||
return LocalMarkdownConfig
|
||||
if content_type == "pdf":
|
||||
return LocalPdfConfig
|
||||
if content_type == "plaintext":
|
||||
return LocalPlaintextConfig
|
||||
if content_type == "github":
|
||||
return GithubConfig
|
||||
if content_type == "notion":
|
||||
return NotionConfig
|
||||
|
||||
|
||||
async def map_config_to_db(config: FullConfig, user: KhojUser):
|
||||
|
@ -111,183 +115,220 @@ async def map_config_to_db(config: FullConfig, user: KhojUser):
|
|||
)
|
||||
|
||||
|
||||
# If it's a demo instance, prevent updating any of the configuration.
|
||||
if not state.demo:
|
||||
def _initialize_config():
|
||||
if state.config is None:
|
||||
state.config = FullConfig()
|
||||
state.config.search_type = SearchConfig.parse_obj(constants.default_config["search-type"])
|
||||
|
||||
def _initialize_config():
|
||||
if state.config is None:
|
||||
state.config = FullConfig()
|
||||
state.config.search_type = SearchConfig.parse_obj(constants.default_config["search-type"])
|
||||
|
||||
@api.get("/config/data", response_model=FullConfig)
|
||||
@requires(["authenticated"])
|
||||
def get_config_data(request: Request):
|
||||
user = request.user.object
|
||||
EntryAdapters.get_unique_file_types(user)
|
||||
@api.get("/config/data", response_model=FullConfig)
|
||||
@requires(["authenticated"])
|
||||
def get_config_data(request: Request):
|
||||
user = request.user.object
|
||||
EntryAdapters.get_unique_file_types(user)
|
||||
|
||||
return state.config
|
||||
return state.config
|
||||
|
||||
@api.post("/config/data")
|
||||
@requires(["authenticated"])
|
||||
async def set_config_data(
|
||||
request: Request,
|
||||
updated_config: FullConfig,
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
user = request.user.object
|
||||
await map_config_to_db(updated_config, user)
|
||||
|
||||
configuration_update_metadata = {}
|
||||
@api.post("/config/data")
|
||||
@requires(["authenticated"])
|
||||
async def set_config_data(
|
||||
request: Request,
|
||||
updated_config: FullConfig,
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
user = request.user.object
|
||||
await map_config_to_db(updated_config, user)
|
||||
|
||||
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
|
||||
configuration_update_metadata = {}
|
||||
|
||||
if state.config.content_type is not None:
|
||||
configuration_update_metadata["github"] = "github" in enabled_content
|
||||
configuration_update_metadata["notion"] = "notion" in enabled_content
|
||||
configuration_update_metadata["org"] = "org" in enabled_content
|
||||
configuration_update_metadata["pdf"] = "pdf" in enabled_content
|
||||
configuration_update_metadata["markdown"] = "markdown" in enabled_content
|
||||
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
|
||||
|
||||
if state.config.processor is not None:
|
||||
configuration_update_metadata["conversation_processor"] = state.config.processor.conversation is not None
|
||||
if state.config.content_type is not None:
|
||||
configuration_update_metadata["github"] = "github" in enabled_content
|
||||
configuration_update_metadata["notion"] = "notion" in enabled_content
|
||||
configuration_update_metadata["org"] = "org" in enabled_content
|
||||
configuration_update_metadata["pdf"] = "pdf" in enabled_content
|
||||
configuration_update_metadata["markdown"] = "markdown" in enabled_content
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_config",
|
||||
client=client,
|
||||
metadata=configuration_update_metadata,
|
||||
)
|
||||
return state.config
|
||||
if state.config.processor is not None:
|
||||
configuration_update_metadata["conversation_processor"] = state.config.processor.conversation is not None
|
||||
|
||||
@api.post("/config/data/content_type/github", status_code=200)
|
||||
@requires(["authenticated"])
|
||||
async def set_content_config_github_data(
|
||||
request: Request,
|
||||
updated_config: Union[GithubContentConfig, None],
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
_initialize_config()
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_config",
|
||||
client=client,
|
||||
metadata=configuration_update_metadata,
|
||||
)
|
||||
return state.config
|
||||
|
||||
user = request.user.object
|
||||
|
||||
await adapters.set_user_github_config(
|
||||
user=user,
|
||||
pat_token=updated_config.pat_token,
|
||||
repos=updated_config.repos,
|
||||
)
|
||||
@api.post("/config/data/content_type/github", status_code=200)
|
||||
@requires(["authenticated"])
|
||||
async def set_content_config_github_data(
|
||||
request: Request,
|
||||
updated_config: Union[GithubContentConfig, None],
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
_initialize_config()
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_content_config",
|
||||
client=client,
|
||||
metadata={"content_type": "github"},
|
||||
)
|
||||
user = request.user.object
|
||||
|
||||
return {"status": "ok"}
|
||||
await adapters.set_user_github_config(
|
||||
user=user,
|
||||
pat_token=updated_config.pat_token,
|
||||
repos=updated_config.repos,
|
||||
)
|
||||
|
||||
@api.post("/config/data/content_type/notion", status_code=200)
|
||||
@requires(["authenticated"])
|
||||
async def set_content_config_notion_data(
|
||||
request: Request,
|
||||
updated_config: Union[NotionContentConfig, None],
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
_initialize_config()
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_content_config",
|
||||
client=client,
|
||||
metadata={"content_type": "github"},
|
||||
)
|
||||
|
||||
user = request.user.object
|
||||
return {"status": "ok"}
|
||||
|
||||
await adapters.set_notion_config(
|
||||
user=user,
|
||||
token=updated_config.token,
|
||||
)
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_content_config",
|
||||
client=client,
|
||||
metadata={"content_type": "notion"},
|
||||
)
|
||||
@api.post("/config/data/content_type/notion", status_code=200)
|
||||
@requires(["authenticated"])
|
||||
async def set_content_config_notion_data(
|
||||
request: Request,
|
||||
updated_config: Union[NotionContentConfig, None],
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
_initialize_config()
|
||||
|
||||
return {"status": "ok"}
|
||||
user = request.user.object
|
||||
|
||||
@api.post("/delete/config/data/content_type/{content_type}", status_code=200)
|
||||
@requires(["authenticated"])
|
||||
async def remove_content_config_data(
|
||||
request: Request,
|
||||
content_type: str,
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
user = request.user.object
|
||||
await adapters.set_notion_config(
|
||||
user=user,
|
||||
token=updated_config.token,
|
||||
)
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="delete_content_config",
|
||||
client=client,
|
||||
metadata={"content_type": content_type},
|
||||
)
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_content_config",
|
||||
client=client,
|
||||
metadata={"content_type": "notion"},
|
||||
)
|
||||
|
||||
content_object = map_config_to_object(content_type)
|
||||
if content_object is None:
|
||||
raise ValueError(f"Invalid content type: {content_type}")
|
||||
return {"status": "ok"}
|
||||
|
||||
await content_object.objects.filter(user=user).adelete()
|
||||
await sync_to_async(EntryAdapters.delete_all_entries)(user, content_type)
|
||||
|
||||
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
|
||||
return {"status": "ok"}
|
||||
@api.delete("/config/data/content_type/{content_type}", status_code=200)
|
||||
@requires(["authenticated"])
|
||||
async def remove_content_config_data(
|
||||
request: Request,
|
||||
content_type: str,
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
user = request.user.object
|
||||
|
||||
@api.post("/config/data/content_type/{content_type}", status_code=200)
|
||||
@requires(["authenticated"])
|
||||
async def set_content_config_data(
|
||||
request: Request,
|
||||
content_type: str,
|
||||
updated_config: Union[TextContentConfig, None],
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
_initialize_config()
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="delete_content_config",
|
||||
client=client,
|
||||
metadata={"content_type": content_type},
|
||||
)
|
||||
|
||||
user = request.user.object
|
||||
content_object = map_config_to_object(content_type)
|
||||
if content_object is None:
|
||||
raise ValueError(f"Invalid content type: {content_type}")
|
||||
|
||||
content_object = map_config_to_object(content_type)
|
||||
await adapters.set_text_content_config(user, content_object, updated_config)
|
||||
await content_object.objects.filter(user=user).adelete()
|
||||
await sync_to_async(EntryAdapters.delete_all_entries)(user, content_type)
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_content_config",
|
||||
client=client,
|
||||
metadata={"content_type": content_type},
|
||||
)
|
||||
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
|
||||
return {"status": "ok"}
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
@api.post("/config/data/conversation/model", status_code=200)
|
||||
@requires(["authenticated"])
|
||||
async def update_chat_model(
|
||||
request: Request,
|
||||
id: str,
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
user = request.user.object
|
||||
@api.delete("/config/data/file", status_code=200)
|
||||
@requires(["authenticated"])
|
||||
async def remove_file_data(
|
||||
request: Request,
|
||||
filename: str,
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
user = request.user.object
|
||||
|
||||
new_config = await ConversationAdapters.aset_user_conversation_processor(user, int(id))
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="delete_file",
|
||||
client=client,
|
||||
)
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_conversation_chat_model",
|
||||
client=client,
|
||||
metadata={"processor_conversation_type": "conversation"},
|
||||
)
|
||||
await EntryAdapters.adelete_entry_by_file(user, filename)
|
||||
|
||||
if new_config is None:
|
||||
return {"status": "error", "message": "Model not found"}
|
||||
return {"status": "ok"}
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
@api.get("/config/data/all", response_model=List[str])
|
||||
@requires(["authenticated"])
|
||||
async def get_all_filenames(
|
||||
request: Request,
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
user = request.user.object
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="get_all_filenames",
|
||||
client=client,
|
||||
)
|
||||
|
||||
return await sync_to_async(list)(EntryAdapters.aget_all_filenames(user))
|
||||
|
||||
|
||||
@api.delete("/config/data/all", status_code=200)
|
||||
@requires(["authenticated"])
|
||||
async def remove_all_config_data(
|
||||
request: Request,
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
user = request.user.object
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="delete_all_config",
|
||||
client=client,
|
||||
)
|
||||
|
||||
await EntryAdapters.adelete_all_entries(user)
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@api.post("/config/data/conversation/model", status_code=200)
|
||||
@requires(["authenticated"])
|
||||
async def update_chat_model(
|
||||
request: Request,
|
||||
id: str,
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
user = request.user.object
|
||||
|
||||
new_config = await ConversationAdapters.aset_user_conversation_processor(user, int(id))
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="set_conversation_chat_model",
|
||||
client=client,
|
||||
metadata={"processor_conversation_type": "conversation"},
|
||||
)
|
||||
|
||||
if new_config is None:
|
||||
return {"status": "error", "message": "Model not found"}
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
# Create Routes
|
||||
|
@ -377,6 +418,7 @@ async def search(
|
|||
SearchType.Github,
|
||||
SearchType.Notion,
|
||||
SearchType.Plaintext,
|
||||
SearchType.Pdf,
|
||||
]:
|
||||
# query markdown notes
|
||||
search_futures += [
|
||||
|
|
|
@ -38,7 +38,6 @@ def index(request: Request):
|
|||
"chat.html",
|
||||
context={
|
||||
"request": request,
|
||||
"demo": state.demo,
|
||||
"username": user.username,
|
||||
"user_photo": user_picture,
|
||||
},
|
||||
|
@ -55,7 +54,6 @@ def index_post(request: Request):
|
|||
"chat.html",
|
||||
context={
|
||||
"request": request,
|
||||
"demo": state.demo,
|
||||
"username": user.username,
|
||||
"user_photo": user_picture,
|
||||
},
|
||||
|
@ -72,7 +70,6 @@ def search_page(request: Request):
|
|||
"search.html",
|
||||
context={
|
||||
"request": request,
|
||||
"demo": state.demo,
|
||||
"username": user.username,
|
||||
"user_photo": user_picture,
|
||||
},
|
||||
|
@ -89,7 +86,6 @@ def chat_page(request: Request):
|
|||
"chat.html",
|
||||
context={
|
||||
"request": request,
|
||||
"demo": state.demo,
|
||||
"username": user.username,
|
||||
"user_photo": user_picture,
|
||||
},
|
||||
|
@ -107,7 +103,6 @@ def login_page(request: Request):
|
|||
"login.html",
|
||||
context={
|
||||
"request": request,
|
||||
"demo": state.demo,
|
||||
"google_client_id": google_client_id,
|
||||
"redirect_uri": redirect_uri,
|
||||
},
|
||||
|
@ -125,142 +120,139 @@ def map_config_to_object(content_type: str):
|
|||
return LocalPlaintextConfig
|
||||
|
||||
|
||||
if not state.demo:
|
||||
@web_client.get("/config", response_class=HTMLResponse)
|
||||
@requires(["authenticated"], redirect="login_page")
|
||||
def config_page(request: Request):
|
||||
user = request.user.object
|
||||
user_picture = request.session.get("user", {}).get("picture")
|
||||
enabled_content = set(EntryAdapters.get_unique_file_types(user).all())
|
||||
|
||||
@web_client.get("/config", response_class=HTMLResponse)
|
||||
@requires(["authenticated"], redirect="login_page")
|
||||
def config_page(request: Request):
|
||||
user = request.user.object
|
||||
user_picture = request.session.get("user", {}).get("picture")
|
||||
enabled_content = set(EntryAdapters.get_unique_file_types(user).all())
|
||||
successfully_configured = {
|
||||
"pdf": ("pdf" in enabled_content),
|
||||
"markdown": ("markdown" in enabled_content),
|
||||
"org": ("org" in enabled_content),
|
||||
"image": False,
|
||||
"github": ("github" in enabled_content),
|
||||
"notion": ("notion" in enabled_content),
|
||||
"plaintext": ("plaintext" in enabled_content),
|
||||
}
|
||||
|
||||
successfully_configured = {
|
||||
"pdf": ("pdf" in enabled_content),
|
||||
"markdown": ("markdown" in enabled_content),
|
||||
"org": ("org" in enabled_content),
|
||||
"image": False,
|
||||
"github": ("github" in enabled_content),
|
||||
"notion": ("notion" in enabled_content),
|
||||
"plaintext": ("plaintext" in enabled_content),
|
||||
}
|
||||
|
||||
if state.content_index:
|
||||
successfully_configured.update(
|
||||
{
|
||||
"image": state.content_index.image is not None,
|
||||
}
|
||||
)
|
||||
|
||||
conversation_options = ConversationAdapters.get_conversation_processor_options().all()
|
||||
all_conversation_options = list()
|
||||
for conversation_option in conversation_options:
|
||||
all_conversation_options.append(
|
||||
{"chat_model": conversation_option.chat_model, "id": conversation_option.id}
|
||||
)
|
||||
|
||||
selected_conversation_config = ConversationAdapters.get_conversation_config(user)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"config.html",
|
||||
context={
|
||||
"request": request,
|
||||
"current_model_state": successfully_configured,
|
||||
"anonymous_mode": state.anonymous_mode,
|
||||
"username": user.username if user else None,
|
||||
"conversation_options": all_conversation_options,
|
||||
"selected_conversation_config": selected_conversation_config.id
|
||||
if selected_conversation_config
|
||||
else None,
|
||||
"user_photo": user_picture,
|
||||
},
|
||||
if state.content_index:
|
||||
successfully_configured.update(
|
||||
{
|
||||
"image": state.content_index.image is not None,
|
||||
}
|
||||
)
|
||||
|
||||
@web_client.get("/config/content_type/github", response_class=HTMLResponse)
|
||||
@requires(["authenticated"], redirect="login_page")
|
||||
def github_config_page(request: Request):
|
||||
user = request.user.object
|
||||
user_picture = request.session.get("user", {}).get("picture")
|
||||
current_github_config = get_user_github_config(user)
|
||||
conversation_options = ConversationAdapters.get_conversation_processor_options().all()
|
||||
all_conversation_options = list()
|
||||
for conversation_option in conversation_options:
|
||||
all_conversation_options.append({"chat_model": conversation_option.chat_model, "id": conversation_option.id})
|
||||
|
||||
if current_github_config:
|
||||
raw_repos = current_github_config.githubrepoconfig.all()
|
||||
repos = []
|
||||
for repo in raw_repos:
|
||||
repos.append(
|
||||
GithubRepoConfig(
|
||||
name=repo.name,
|
||||
owner=repo.owner,
|
||||
branch=repo.branch,
|
||||
)
|
||||
selected_conversation_config = ConversationAdapters.get_conversation_config(user)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"config.html",
|
||||
context={
|
||||
"request": request,
|
||||
"current_model_state": successfully_configured,
|
||||
"anonymous_mode": state.anonymous_mode,
|
||||
"username": user.username if user else None,
|
||||
"conversation_options": all_conversation_options,
|
||||
"selected_conversation_config": selected_conversation_config.id if selected_conversation_config else None,
|
||||
"user_photo": user_picture,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@web_client.get("/config/content_type/github", response_class=HTMLResponse)
|
||||
@requires(["authenticated"], redirect="login_page")
|
||||
def github_config_page(request: Request):
|
||||
user = request.user.object
|
||||
user_picture = request.session.get("user", {}).get("picture")
|
||||
current_github_config = get_user_github_config(user)
|
||||
|
||||
if current_github_config:
|
||||
raw_repos = current_github_config.githubrepoconfig.all()
|
||||
repos = []
|
||||
for repo in raw_repos:
|
||||
repos.append(
|
||||
GithubRepoConfig(
|
||||
name=repo.name,
|
||||
owner=repo.owner,
|
||||
branch=repo.branch,
|
||||
)
|
||||
current_config = GithubContentConfig(
|
||||
pat_token=current_github_config.pat_token,
|
||||
repos=repos,
|
||||
)
|
||||
current_config = json.loads(current_config.json())
|
||||
else:
|
||||
current_config = {} # type: ignore
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"content_type_github_input.html",
|
||||
context={
|
||||
"request": request,
|
||||
"current_config": current_config,
|
||||
"username": user.username,
|
||||
"user_photo": user_picture,
|
||||
},
|
||||
)
|
||||
|
||||
@web_client.get("/config/content_type/notion", response_class=HTMLResponse)
|
||||
@requires(["authenticated"], redirect="login_page")
|
||||
def notion_config_page(request: Request):
|
||||
user = request.user.object
|
||||
user_picture = request.session.get("user", {}).get("picture")
|
||||
current_notion_config = get_user_notion_config(user)
|
||||
|
||||
current_config = NotionContentConfig(
|
||||
token=current_notion_config.token if current_notion_config else "",
|
||||
)
|
||||
|
||||
current_config = json.loads(current_config.json())
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"content_type_notion_input.html",
|
||||
context={
|
||||
"request": request,
|
||||
"current_config": current_config,
|
||||
"username": user.username,
|
||||
"user_photo": user_picture,
|
||||
},
|
||||
)
|
||||
|
||||
@web_client.get("/config/content_type/{content_type}", response_class=HTMLResponse)
|
||||
@requires(["authenticated"], redirect="login_page")
|
||||
def content_config_page(request: Request, content_type: str):
|
||||
if content_type not in VALID_TEXT_CONTENT_TYPES:
|
||||
return templates.TemplateResponse("config.html", context={"request": request})
|
||||
|
||||
object = map_config_to_object(content_type)
|
||||
user = request.user.object
|
||||
user_picture = request.session.get("user", {}).get("picture")
|
||||
config = object.objects.filter(user=user).first()
|
||||
if config == None:
|
||||
config = object.objects.create(user=user)
|
||||
|
||||
current_config = TextContentConfig(
|
||||
input_files=config.input_files,
|
||||
input_filter=config.input_filter,
|
||||
index_heading_entries=config.index_heading_entries,
|
||||
current_config = GithubContentConfig(
|
||||
pat_token=current_github_config.pat_token,
|
||||
repos=repos,
|
||||
)
|
||||
current_config = json.loads(current_config.json())
|
||||
else:
|
||||
current_config = {} # type: ignore
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"content_type_input.html",
|
||||
context={
|
||||
"request": request,
|
||||
"current_config": current_config,
|
||||
"content_type": content_type,
|
||||
"username": user.username,
|
||||
"user_photo": user_picture,
|
||||
},
|
||||
)
|
||||
return templates.TemplateResponse(
|
||||
"content_type_github_input.html",
|
||||
context={
|
||||
"request": request,
|
||||
"current_config": current_config,
|
||||
"username": user.username,
|
||||
"user_photo": user_picture,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@web_client.get("/config/content_type/notion", response_class=HTMLResponse)
|
||||
@requires(["authenticated"], redirect="login_page")
|
||||
def notion_config_page(request: Request):
|
||||
user = request.user.object
|
||||
user_picture = request.session.get("user", {}).get("picture")
|
||||
current_notion_config = get_user_notion_config(user)
|
||||
|
||||
current_config = NotionContentConfig(
|
||||
token=current_notion_config.token if current_notion_config else "",
|
||||
)
|
||||
|
||||
current_config = json.loads(current_config.json())
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"content_type_notion_input.html",
|
||||
context={
|
||||
"request": request,
|
||||
"current_config": current_config,
|
||||
"username": user.username,
|
||||
"user_photo": user_picture,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@web_client.get("/config/content_type/{content_type}", response_class=HTMLResponse)
|
||||
@requires(["authenticated"], redirect="login_page")
|
||||
def content_config_page(request: Request, content_type: str):
|
||||
if content_type not in VALID_TEXT_CONTENT_TYPES:
|
||||
return templates.TemplateResponse("config.html", context={"request": request})
|
||||
|
||||
object = map_config_to_object(content_type)
|
||||
user = request.user.object
|
||||
user_picture = request.session.get("user", {}).get("picture")
|
||||
config = object.objects.filter(user=user).first()
|
||||
if config == None:
|
||||
config = object.objects.create(user=user)
|
||||
|
||||
current_config = TextContentConfig(
|
||||
input_files=config.input_files,
|
||||
input_filter=config.input_filter,
|
||||
index_heading_entries=config.index_heading_entries,
|
||||
)
|
||||
current_config = json.loads(current_config.json())
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"content_type_input.html",
|
||||
context={
|
||||
"request": request,
|
||||
"current_config": current_config,
|
||||
"content_type": content_type,
|
||||
"username": user.username,
|
||||
"user_photo": user_picture,
|
||||
},
|
||||
)
|
||||
|
|
|
@ -42,7 +42,6 @@ def cli(args=None):
|
|||
parser.add_argument(
|
||||
"--disable-chat-on-gpu", action="store_true", default=False, help="Disable using GPU for the offline chat model"
|
||||
)
|
||||
parser.add_argument("--demo", action="store_true", default=False, help="Run Khoj in demo mode")
|
||||
parser.add_argument(
|
||||
"--anonymous-mode",
|
||||
action="store_true",
|
||||
|
|
|
@ -31,7 +31,6 @@ config_lock = threading.Lock()
|
|||
chat_lock = threading.Lock()
|
||||
SearchType = utils_config.SearchType
|
||||
telemetry: List[Dict[str, str]] = []
|
||||
demo: bool = False
|
||||
khoj_version: str = None
|
||||
device = get_device()
|
||||
chat_on_gpu: bool = True
|
||||
|
|
BIN
tests/data/pdf/ocr_samples.pdf
vendored
Normal file
BIN
tests/data/pdf/ocr_samples.pdf
vendored
Normal file
Binary file not shown.
|
@ -50,6 +50,23 @@ def test_multi_page_pdf_to_jsonl():
|
|||
assert len(jsonl_data) == 6
|
||||
|
||||
|
||||
def test_ocr_page_pdf_to_jsonl():
|
||||
"Convert multiple pages from single PDF file to jsonl."
|
||||
# Act
|
||||
# Extract Entries from specified Pdf files
|
||||
with open("tests/data/pdf/ocr_samples.pdf", "rb") as f:
|
||||
pdf_bytes = f.read()
|
||||
|
||||
data = {"tests/data/pdf/ocr_samples.pdf": pdf_bytes}
|
||||
entries, entry_to_file_map = PdfToEntries.extract_pdf_entries(pdf_files=data)
|
||||
|
||||
# Process Each Entry from All Pdf Files
|
||||
entries = PdfToEntries.convert_pdf_entries_to_maps(entries, entry_to_file_map)
|
||||
|
||||
assert len(entries) == 1
|
||||
assert "playing on a strip of marsh" in entries[0].raw
|
||||
|
||||
|
||||
def test_get_pdf_files(tmp_path):
|
||||
"Ensure Pdf files specified via input-filter, input-files extracted"
|
||||
# Arrange
|
Loading…
Reference in a new issue