From 425496844b136fa15421d9abae52299afe8e599a Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sat, 4 May 2024 13:26:04 +0530 Subject: [PATCH 1/3] Rename assets URL from Khoj S3 bucket to assets.khoj.dev Server khoj assets from khoj domain --- documentation/docs/clients/web.md | 2 +- documentation/docs/data-sources/share_your_data.md | 2 +- documentation/docusaurus.config.js | 2 +- src/interface/desktop/search.html | 2 +- src/khoj/database/adapters/__init__.py | 2 +- src/khoj/interface/web/chat.html | 2 +- src/khoj/interface/web/login.html | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/documentation/docs/clients/web.md b/documentation/docs/clients/web.md index dc583d71..0d6def28 100644 --- a/documentation/docs/clients/web.md +++ b/documentation/docs/clients/web.md @@ -25,7 +25,7 @@ You can upload documents to Khoj from the web interface, one at a time. This is 1. You can drag and drop the document into the chat window. 2. Or click the paperclip icon in the chat window and select the document from your file system. -![demo of dragging and dropping a file](https://khoj-web-bucket.s3.amazonaws.com/drag_drop_file.gif) +![demo of dragging and dropping a file](https://assets.khoj.dev/drag_drop_file.gif) ### Install on Phone You can optionally install Khoj as a [Progressive Web App (PWA)](https://web.dev/learn/pwa/installation). This makes it quick and easy to access Khoj on your phone. diff --git a/documentation/docs/data-sources/share_your_data.md b/documentation/docs/data-sources/share_your_data.md index 2299b7d0..86e0d0dc 100644 --- a/documentation/docs/data-sources/share_your_data.md +++ b/documentation/docs/data-sources/share_your_data.md @@ -13,4 +13,4 @@ There are several ways you can get started with sharing your data with the Khoj - Setup the sync options for either [Obsidian](/clients/obsidian) or [Emacs](/clients/emacs) to automatically sync your documents with Khoj. This is best if you are already using these tools and want to leverage Khoj's AI capabilities. - Configure your [Notion](/data-sources/notion_integration) or [Github](/data-sources/github_integration) to sync with Khoj. By providing your credentials, you can keep the data synced in the background. -![demo of dragging and dropping a file](https://khoj-web-bucket.s3.amazonaws.com/drag_drop_file.gif) +![demo of dragging and dropping a file](https://assets.khoj.dev/drag_drop_file.gif) diff --git a/documentation/docusaurus.config.js b/documentation/docusaurus.config.js index 526e8295..ed594bbf 100644 --- a/documentation/docusaurus.config.js +++ b/documentation/docusaurus.config.js @@ -80,7 +80,7 @@ const config = { {name: 'og:type', content: 'website'}, {name: 'og:site_name', content: 'Khoj Documentation'}, {name: 'og:description', content: 'Quickly get started with using or self-hosting Khoj'}, - {name: 'og:image', content: 'https://khoj-web-bucket.s3.amazonaws.com/link_preview_docs.png'}, + {name: 'og:image', content: 'https://assets.khoj.dev/link_preview_docs.png'}, {name: 'og:url', content: 'https://docs.khoj.dev'}, {name: 'keywords', content: 'khoj, khoj ai, chatgpt, open ai, open source, productivity'} ], diff --git a/src/interface/desktop/search.html b/src/interface/desktop/search.html index 36a81d9b..1f2133fe 100644 --- a/src/interface/desktop/search.html +++ b/src/interface/desktop/search.html @@ -2,7 +2,7 @@ - + Khoj - Search diff --git a/src/khoj/database/adapters/__init__.py b/src/khoj/database/adapters/__init__.py index fac2ca27..a783e8be 100644 --- a/src/khoj/database/adapters/__init__.py +++ b/src/khoj/database/adapters/__init__.py @@ -485,7 +485,7 @@ class ClientApplicationAdapters: class AgentAdapters: DEFAULT_AGENT_NAME = "Khoj" - DEFAULT_AGENT_AVATAR = "https://khoj-web-bucket.s3.amazonaws.com/lamp-128.png" + DEFAULT_AGENT_AVATAR = "https://assets.khoj.dev/lamp-128.png" DEFAULT_AGENT_SLUG = "khoj" @staticmethod diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index bcffc254..a0e7f628 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -3,7 +3,7 @@ - + Khoj - Chat diff --git a/src/khoj/interface/web/login.html b/src/khoj/interface/web/login.html index 91ef6007..98bfbd23 100644 --- a/src/khoj/interface/web/login.html +++ b/src/khoj/interface/web/login.html @@ -7,7 +7,7 @@ - + From 80cbaca9359e1d325b25f2dca5b63e76129a98ac Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sat, 4 May 2024 17:36:39 +0530 Subject: [PATCH 2/3] Serve generated images from Khoj domain instead of directly from S3 Use CNAME to forward requests from the khoj subdomain to the equivalent S3 bucket --- .../serve_generated_images_from_server.py | 60 +++++++++++++++++++ src/khoj/routers/storage.py | 5 +- 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 src/khoj/database/management/commands/serve_generated_images_from_server.py diff --git a/src/khoj/database/management/commands/serve_generated_images_from_server.py b/src/khoj/database/management/commands/serve_generated_images_from_server.py new file mode 100644 index 00000000..0ed19128 --- /dev/null +++ b/src/khoj/database/management/commands/serve_generated_images_from_server.py @@ -0,0 +1,60 @@ +from django.core.management.base import BaseCommand + +from khoj.database.models import Conversation +from khoj.utils.helpers import ImageIntentType, is_none_or_empty + + +class Command(BaseCommand): + help = "Serve Khoj generated images from a different URL." + + def add_arguments(self, parser): + # Pass Source URL + parser.add_argument( + "--source", + action="store", + help="URL from which generated images are currently served.", + ) + # Pass Destination URL + parser.add_argument("--destination", action="store", help="URL to serve generated image from going forward.") + + # Add a new argument 'reverse' to the command + parser.add_argument( + "--reverse", + action="store_true", + help="Revert to serve generated images from source instead of destination URL.", + ) + + def handle(self, *args, **options): + updated_count = 0 + if not options.get("source") or not options.get("destination"): + self.stdout.write( + self.style.ERROR( + "AWS_IMAGE_UPLOAD_BUCKET environment variable or --source, --destination args needs to be set." + ) + ) + return + + destination = options["source"] if options["reverse"] else options["destination"] + source = options["destination"] if options["reverse"] else options["source"] + for conversation in Conversation.objects.all(): + conversation_updated = False + for chat in conversation.conversation_log.get("chat", []): + if ( + chat.get("by", "") == "khoj" + and not is_none_or_empty(chat.get("message")) + and chat.get("message", "").startswith(source) + and chat.get("intent", {}).get("type", "") == ImageIntentType.TEXT_TO_IMAGE2.value + ): + if chat.get("message", "").endswith(".webp"): + # Convert source url to destination url + chat["message"] = chat["message"].replace(source, destination) + conversation_updated = True + updated_count += 1 + + if conversation_updated: + print("Save the updated conversation") + conversation.save() + + if updated_count > 0: + success = f"Successfully converted {updated_count} image URLs from {source} to {destination}.".strip() + self.stdout.write(self.style.SUCCESS(success)) diff --git a/src/khoj/routers/storage.py b/src/khoj/routers/storage.py index 9a5d448f..8d7b08e5 100644 --- a/src/khoj/routers/storage.py +++ b/src/khoj/routers/storage.py @@ -6,6 +6,9 @@ logger = logging.getLogger(__name__) AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY") AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY") +# S3 supports serving assets via your domain. Khoj expects this to be used in production. To enable it: +# 1. Your bucket name for images should be of the form sub.domain.tld. For example, generated.khoj.dev +# 2. Add CNAME entry to your domain's DNS records pointing to the S3 bucket. For example, CNAME generated.khoj.dev generated-khoj-dev.s3.amazonaws.com AWS_UPLOAD_IMAGE_BUCKET_NAME = os.getenv("AWS_IMAGE_UPLOAD_BUCKET") aws_enabled = AWS_ACCESS_KEY is not None and AWS_SECRET_KEY is not None and AWS_UPLOAD_IMAGE_BUCKET_NAME is not None @@ -25,7 +28,7 @@ def upload_image(image: bytes, user_id: uuid.UUID): image_key = f"{user_id}/{uuid.uuid4()}.webp" try: s3_client.put_object(Bucket=AWS_UPLOAD_IMAGE_BUCKET_NAME, Key=image_key, Body=image, ACL="public-read") - url = f"https://{AWS_UPLOAD_IMAGE_BUCKET_NAME}.s3.amazonaws.com/{image_key}" + url = f"https://{AWS_UPLOAD_IMAGE_BUCKET_NAME}/{image_key}" return url except Exception as e: logger.error(f"Failed to upload image to S3: {e}") From 7823ef09dc24d314702285afa8f30629a1df3b7f Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 29 May 2024 17:50:07 +0530 Subject: [PATCH 3/3] Simplify conditional code. Improve logs to track conversion progress --- ...server.py => change_generated_images_url.py} | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) rename src/khoj/database/management/commands/{serve_generated_images_from_server.py => change_generated_images_url.py} (76%) diff --git a/src/khoj/database/management/commands/serve_generated_images_from_server.py b/src/khoj/database/management/commands/change_generated_images_url.py similarity index 76% rename from src/khoj/database/management/commands/serve_generated_images_from_server.py rename to src/khoj/database/management/commands/change_generated_images_url.py index 0ed19128..a3d6c2f5 100644 --- a/src/khoj/database/management/commands/serve_generated_images_from_server.py +++ b/src/khoj/database/management/commands/change_generated_images_url.py @@ -1,4 +1,5 @@ from django.core.management.base import BaseCommand +from tqdm import tqdm from khoj.database.models import Conversation from khoj.utils.helpers import ImageIntentType, is_none_or_empty @@ -29,7 +30,7 @@ class Command(BaseCommand): if not options.get("source") or not options.get("destination"): self.stdout.write( self.style.ERROR( - "AWS_IMAGE_UPLOAD_BUCKET environment variable or --source, --destination args needs to be set." + "Set --source, --destination args to migrate serving images from source to destination URL." ) ) return @@ -38,21 +39,21 @@ class Command(BaseCommand): source = options["destination"] if options["reverse"] else options["source"] for conversation in Conversation.objects.all(): conversation_updated = False - for chat in conversation.conversation_log.get("chat", []): + for chat in tqdm(conversation.conversation_log.get("chat", []), desc="Processing Conversations"): if ( chat.get("by", "") == "khoj" and not is_none_or_empty(chat.get("message")) and chat.get("message", "").startswith(source) and chat.get("intent", {}).get("type", "") == ImageIntentType.TEXT_TO_IMAGE2.value + and chat.get("message", "").endswith(".webp") ): - if chat.get("message", "").endswith(".webp"): - # Convert source url to destination url - chat["message"] = chat["message"].replace(source, destination) - conversation_updated = True - updated_count += 1 + # Convert source url to destination url + chat["message"] = chat["message"].replace(source, destination) + conversation_updated = True + updated_count += 1 if conversation_updated: - print("Save the updated conversation") + print(f"Save the updated conversation {conversation.id} to the database.") conversation.save() if updated_count > 0: