mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-30 19:03:01 +01:00
Batch send of index files
This commit is contained in:
parent
4395ed8065
commit
192cd53003
1 changed files with 70 additions and 23 deletions
|
@ -416,10 +416,47 @@ Auto invokes setup steps on calling main entrypoint."
|
|||
(files-to-index (or file-paths
|
||||
(append (mapcan (lambda (dir) (directory-files-recursively dir "\\.\\(org\\|md\\|markdown\\|pdf\\|txt\\|rst\\|xml\\|htm\\|html\\)$")) content-directories) content-files)))
|
||||
(type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type)))
|
||||
(delete-files (khoj--get-delete-file-list khoj--indexed-files files-to-index))
|
||||
(inhibit-message t)
|
||||
(message-log-max nil))
|
||||
(message-log-max nil)
|
||||
(batch-size 50))
|
||||
(dolist (files (khoj--split-file-list files-to-index batch-size))
|
||||
(khoj--send-index-update-request (khoj--render-update-files-as-request-body files boundary) boundary content-type type-query force))
|
||||
(when delete-files
|
||||
(khoj--send-index-update-request (khoj--render-delete-files-as-request-body delete-files boundary) boundary content-type type-query force))
|
||||
(setq khoj--indexed-files files-to-index)))
|
||||
|
||||
(defun khoj--get-delete-file-list (indexed-files upload-files)
|
||||
"Get delete file list. when `INDEXED-FILES' no longer in `UPLOAD-FILES'.
|
||||
delete them. return delete-file-list."
|
||||
(let (delete-files '())
|
||||
(dolist (indexed-file indexed-files)
|
||||
(when (not (member indexed-file upload-files))
|
||||
(push indexed-file delete-files)))
|
||||
delete-files))
|
||||
|
||||
(defun khoj--split-file-list (file-list size)
|
||||
"Split `FILE-LIST' into subgroups of `SIZE' files each."
|
||||
(let ((subgroups '())
|
||||
(current-group '()))
|
||||
(dolist (file file-list)
|
||||
(if (= (length current-group) size)
|
||||
;; If the current group has size files, start a new group
|
||||
(progn
|
||||
(push current-group subgroups)
|
||||
(setq current-group '()))
|
||||
(push file current-group)))
|
||||
;; Add the last group if it's not empty
|
||||
(when current-group
|
||||
(push (nreverse current-group) subgroups))
|
||||
(nreverse subgroups))) ; Reverse to maintain the original order of file-list
|
||||
|
||||
(defun khoj--send-index-update-request (body boundary &optional content-type type-query force)
|
||||
"Send `BODY' request to khoj server. 'TYPE-QUERY' is appended to the URL.
|
||||
Use `BOUNDARY' to add headder conte
|
||||
nt-type."
|
||||
(let ((url-request-method "POST")
|
||||
(url-request-data (khoj--render-files-as-request-body files-to-index khoj--indexed-files boundary))
|
||||
(url-request-data body)
|
||||
(url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary))
|
||||
("Authorization" . ,(format "Bearer %s" khoj-api-key)))))
|
||||
(with-current-buffer
|
||||
|
@ -437,10 +474,9 @@ Auto invokes setup steps on calling main entrypoint."
|
|||
(if content-type (format "%s " content-type) "all")
|
||||
(string-trim (format "%s %s" (nth 1 (nth 1 status)) (nth 2 (nth 1 status))))
|
||||
(if (> (- (point-max) (point)) 0) (format ". Response: %s" (string-trim (buffer-substring-no-properties (point) (point-max)))) ""))))))
|
||||
nil t t)))
|
||||
(setq khoj--indexed-files files-to-index)))
|
||||
nil t t))))
|
||||
|
||||
(defun khoj--render-files-as-request-body (files-to-index previously-indexed-files boundary)
|
||||
(defun khoj--render-update-files-as-request-body (files-to-index boundary)
|
||||
"Render `FILES-TO-INDEX', `PREVIOUSLY-INDEXED-FILES' as multi-part form body.
|
||||
Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request."
|
||||
(with-temp-buffer
|
||||
|
@ -448,32 +484,43 @@ Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request.
|
|||
(insert "\n")
|
||||
(dolist (file-to-index files-to-index)
|
||||
;; find file content-type. Choose from org, markdown, pdf, plaintext
|
||||
(let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org")
|
||||
((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown")
|
||||
((string-match "\\.pdf$" file-to-index) "application/pdf")
|
||||
(t "text/plain"))))
|
||||
(let ((content-type (khoj--filename-to-mime-type file-to-index))
|
||||
(file-name (encode-coding-string file-to-index 'utf-8)))
|
||||
(insert (format "--%s\r\n" boundary))
|
||||
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
|
||||
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-name))
|
||||
(insert (format "Content-Type: %s\r\n\r\n" content-type))
|
||||
(insert (with-temp-buffer
|
||||
(insert-file-contents-literally file-to-index)
|
||||
(buffer-string)))
|
||||
(insert "\r\n")))
|
||||
(dolist (file-to-index previously-indexed-files)
|
||||
(when (not (member file-to-index files-to-index))
|
||||
;; find file content-type. Choose from org, markdown, pdf, plaintext
|
||||
(let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org")
|
||||
((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown")
|
||||
((string-match "\\.pdf$" file-to-index) "application/pdf")
|
||||
(t "text/plain"))))
|
||||
(insert (format "--%s\r\n" boundary))
|
||||
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
|
||||
(insert "Content-Type: text/org\r\n\r\n")
|
||||
(insert "")
|
||||
(insert "\r\n"))))
|
||||
(insert (format "--%s--\r\n" boundary))
|
||||
(buffer-string)))
|
||||
|
||||
(defun khoj--render-delete-files-as-request-body (delete-files boundary)
|
||||
"Render `DELETE-FILES' as multi-part form body.
|
||||
Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request."
|
||||
(with-temp-buffer
|
||||
(set-buffer-multibyte nil)
|
||||
(insert "\n")
|
||||
(debug delete-files)
|
||||
(dolist (file-to-index delete-files)
|
||||
(let ((content-type (khoj--filename-to-mime-type file-to-index))
|
||||
(file-name (encode-coding-string file-to-index 'utf-8)))
|
||||
(insert (format "--%s\r\n" boundary))
|
||||
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-name))
|
||||
(insert "Content-Type: %s\r\n\r\n" content-type)
|
||||
(insert "")
|
||||
(insert "\r\n")))
|
||||
(insert (format "--%s--\r\n" boundary))
|
||||
(buffer-string)))
|
||||
|
||||
(defun khoj--filename-to-mime-type (file-name)
|
||||
"`FILE-NAME' to mimeType."
|
||||
(cond ((string-match "\\.org$" file-name) "text/org")
|
||||
((string-match "\\.\\(md\\|markdown\\)$" file-name) "text/markdown")
|
||||
((string-match "\\.pdf$" file-name) "application/pdf")
|
||||
(t "text/plain")))
|
||||
|
||||
;; Cancel any running indexing timer, first
|
||||
(when khoj--index-timer
|
||||
(cancel-timer khoj--index-timer))
|
||||
|
|
Loading…
Reference in a new issue