mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-12-02 20:03:01 +01:00
Batch send of index files
This commit is contained in:
parent
4395ed8065
commit
192cd53003
1 changed files with 70 additions and 23 deletions
|
@ -416,10 +416,47 @@ Auto invokes setup steps on calling main entrypoint."
|
||||||
(files-to-index (or file-paths
|
(files-to-index (or file-paths
|
||||||
(append (mapcan (lambda (dir) (directory-files-recursively dir "\\.\\(org\\|md\\|markdown\\|pdf\\|txt\\|rst\\|xml\\|htm\\|html\\)$")) content-directories) content-files)))
|
(append (mapcan (lambda (dir) (directory-files-recursively dir "\\.\\(org\\|md\\|markdown\\|pdf\\|txt\\|rst\\|xml\\|htm\\|html\\)$")) content-directories) content-files)))
|
||||||
(type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type)))
|
(type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type)))
|
||||||
|
(delete-files (khoj--get-delete-file-list khoj--indexed-files files-to-index))
|
||||||
(inhibit-message t)
|
(inhibit-message t)
|
||||||
(message-log-max nil))
|
(message-log-max nil)
|
||||||
(let ((url-request-method "POST")
|
(batch-size 50))
|
||||||
(url-request-data (khoj--render-files-as-request-body files-to-index khoj--indexed-files boundary))
|
(dolist (files (khoj--split-file-list files-to-index batch-size))
|
||||||
|
(khoj--send-index-update-request (khoj--render-update-files-as-request-body files boundary) boundary content-type type-query force))
|
||||||
|
(when delete-files
|
||||||
|
(khoj--send-index-update-request (khoj--render-delete-files-as-request-body delete-files boundary) boundary content-type type-query force))
|
||||||
|
(setq khoj--indexed-files files-to-index)))
|
||||||
|
|
||||||
|
(defun khoj--get-delete-file-list (indexed-files upload-files)
|
||||||
|
"Get delete file list. when `INDEXED-FILES' no longer in `UPLOAD-FILES'.
|
||||||
|
delete them. return delete-file-list."
|
||||||
|
(let (delete-files '())
|
||||||
|
(dolist (indexed-file indexed-files)
|
||||||
|
(when (not (member indexed-file upload-files))
|
||||||
|
(push indexed-file delete-files)))
|
||||||
|
delete-files))
|
||||||
|
|
||||||
|
(defun khoj--split-file-list (file-list size)
|
||||||
|
"Split `FILE-LIST' into subgroups of `SIZE' files each."
|
||||||
|
(let ((subgroups '())
|
||||||
|
(current-group '()))
|
||||||
|
(dolist (file file-list)
|
||||||
|
(if (= (length current-group) size)
|
||||||
|
;; If the current group has size files, start a new group
|
||||||
|
(progn
|
||||||
|
(push current-group subgroups)
|
||||||
|
(setq current-group '()))
|
||||||
|
(push file current-group)))
|
||||||
|
;; Add the last group if it's not empty
|
||||||
|
(when current-group
|
||||||
|
(push (nreverse current-group) subgroups))
|
||||||
|
(nreverse subgroups))) ; Reverse to maintain the original order of file-list
|
||||||
|
|
||||||
|
(defun khoj--send-index-update-request (body boundary &optional content-type type-query force)
|
||||||
|
"Send `BODY' request to khoj server. 'TYPE-QUERY' is appended to the URL.
|
||||||
|
Use `BOUNDARY' to add headder conte
|
||||||
|
nt-type."
|
||||||
|
(let ((url-request-method "POST")
|
||||||
|
(url-request-data body)
|
||||||
(url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary))
|
(url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary))
|
||||||
("Authorization" . ,(format "Bearer %s" khoj-api-key)))))
|
("Authorization" . ,(format "Bearer %s" khoj-api-key)))))
|
||||||
(with-current-buffer
|
(with-current-buffer
|
||||||
|
@ -437,10 +474,9 @@ Auto invokes setup steps on calling main entrypoint."
|
||||||
(if content-type (format "%s " content-type) "all")
|
(if content-type (format "%s " content-type) "all")
|
||||||
(string-trim (format "%s %s" (nth 1 (nth 1 status)) (nth 2 (nth 1 status))))
|
(string-trim (format "%s %s" (nth 1 (nth 1 status)) (nth 2 (nth 1 status))))
|
||||||
(if (> (- (point-max) (point)) 0) (format ". Response: %s" (string-trim (buffer-substring-no-properties (point) (point-max)))) ""))))))
|
(if (> (- (point-max) (point)) 0) (format ". Response: %s" (string-trim (buffer-substring-no-properties (point) (point-max)))) ""))))))
|
||||||
nil t t)))
|
nil t t))))
|
||||||
(setq khoj--indexed-files files-to-index)))
|
|
||||||
|
|
||||||
(defun khoj--render-files-as-request-body (files-to-index previously-indexed-files boundary)
|
(defun khoj--render-update-files-as-request-body (files-to-index boundary)
|
||||||
"Render `FILES-TO-INDEX', `PREVIOUSLY-INDEXED-FILES' as multi-part form body.
|
"Render `FILES-TO-INDEX', `PREVIOUSLY-INDEXED-FILES' as multi-part form body.
|
||||||
Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request."
|
Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request."
|
||||||
(with-temp-buffer
|
(with-temp-buffer
|
||||||
|
@ -448,32 +484,43 @@ Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request.
|
||||||
(insert "\n")
|
(insert "\n")
|
||||||
(dolist (file-to-index files-to-index)
|
(dolist (file-to-index files-to-index)
|
||||||
;; find file content-type. Choose from org, markdown, pdf, plaintext
|
;; find file content-type. Choose from org, markdown, pdf, plaintext
|
||||||
(let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org")
|
(let ((content-type (khoj--filename-to-mime-type file-to-index))
|
||||||
((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown")
|
(file-name (encode-coding-string file-to-index 'utf-8)))
|
||||||
((string-match "\\.pdf$" file-to-index) "application/pdf")
|
|
||||||
(t "text/plain"))))
|
|
||||||
(insert (format "--%s\r\n" boundary))
|
(insert (format "--%s\r\n" boundary))
|
||||||
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
|
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-name))
|
||||||
(insert (format "Content-Type: %s\r\n\r\n" content-type))
|
(insert (format "Content-Type: %s\r\n\r\n" content-type))
|
||||||
(insert (with-temp-buffer
|
(insert (with-temp-buffer
|
||||||
(insert-file-contents-literally file-to-index)
|
(insert-file-contents-literally file-to-index)
|
||||||
(buffer-string)))
|
(buffer-string)))
|
||||||
(insert "\r\n")))
|
(insert "\r\n")))
|
||||||
(dolist (file-to-index previously-indexed-files)
|
|
||||||
(when (not (member file-to-index files-to-index))
|
|
||||||
;; find file content-type. Choose from org, markdown, pdf, plaintext
|
|
||||||
(let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org")
|
|
||||||
((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown")
|
|
||||||
((string-match "\\.pdf$" file-to-index) "application/pdf")
|
|
||||||
(t "text/plain"))))
|
|
||||||
(insert (format "--%s\r\n" boundary))
|
|
||||||
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
|
|
||||||
(insert "Content-Type: text/org\r\n\r\n")
|
|
||||||
(insert "")
|
|
||||||
(insert "\r\n"))))
|
|
||||||
(insert (format "--%s--\r\n" boundary))
|
(insert (format "--%s--\r\n" boundary))
|
||||||
(buffer-string)))
|
(buffer-string)))
|
||||||
|
|
||||||
|
(defun khoj--render-delete-files-as-request-body (delete-files boundary)
|
||||||
|
"Render `DELETE-FILES' as multi-part form body.
|
||||||
|
Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request."
|
||||||
|
(with-temp-buffer
|
||||||
|
(set-buffer-multibyte nil)
|
||||||
|
(insert "\n")
|
||||||
|
(debug delete-files)
|
||||||
|
(dolist (file-to-index delete-files)
|
||||||
|
(let ((content-type (khoj--filename-to-mime-type file-to-index))
|
||||||
|
(file-name (encode-coding-string file-to-index 'utf-8)))
|
||||||
|
(insert (format "--%s\r\n" boundary))
|
||||||
|
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-name))
|
||||||
|
(insert "Content-Type: %s\r\n\r\n" content-type)
|
||||||
|
(insert "")
|
||||||
|
(insert "\r\n")))
|
||||||
|
(insert (format "--%s--\r\n" boundary))
|
||||||
|
(buffer-string)))
|
||||||
|
|
||||||
|
(defun khoj--filename-to-mime-type (file-name)
|
||||||
|
"`FILE-NAME' to mimeType."
|
||||||
|
(cond ((string-match "\\.org$" file-name) "text/org")
|
||||||
|
((string-match "\\.\\(md\\|markdown\\)$" file-name) "text/markdown")
|
||||||
|
((string-match "\\.pdf$" file-name) "application/pdf")
|
||||||
|
(t "text/plain")))
|
||||||
|
|
||||||
;; Cancel any running indexing timer, first
|
;; Cancel any running indexing timer, first
|
||||||
(when khoj--index-timer
|
(when khoj--index-timer
|
||||||
(cancel-timer khoj--index-timer))
|
(cancel-timer khoj--index-timer))
|
||||||
|
|
Loading…
Reference in a new issue