mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Add separate unit test for github. Will only run of a PAT token is set
This commit is contained in:
parent
3a61919344
commit
751edfefe5
3 changed files with 29 additions and 1 deletions
|
@ -328,6 +328,11 @@ Add your OpenAI API to Khoj by using either of the two options below:
|
|||
1. [Setup your OpenAI API key in Khoj](#set-your-openai-api-key-in-khoj)
|
||||
2. Interact with them from the [Khoj Swagger docs](http://locahost:8000/docs)[^2]
|
||||
|
||||
### Use a Github Repository as a source
|
||||
Note that this plugin is currently *only* indexing Markdown files. It will ignore all other files in the repository. This is because Khoj, as it stands, is a semantic search engine. Eventually, we hope to get to a state where you can search for any file in your repository and even explain code.
|
||||
|
||||
1. Get a [pat token](https://docs.github.com/en/github/authenticating-to-github/keeping-your-account-and-data-secure/creating-a-personal-access-token) with `repo` and `read:org` scopes in the classic flow.
|
||||
2. Configure your settings to include the `owner` and `repo_name`. The `owner` will be the organization name if the repo is in an organization. The `repo_name` will be the name of the repository. Optionally, you can also supply a branch name. If no branch name is supplied, the `master` branch will be used.
|
||||
|
||||
## Performance
|
||||
|
||||
|
@ -458,7 +463,7 @@ conda activate khoj
|
|||
|
||||
#### Before Creating PR
|
||||
|
||||
1. Run Tests
|
||||
1. Run Tests. If you get an error complaining about a missing `fast_tokenizer_file`, follow the solution [in this Github issue](https://github.com/UKPLab/sentence-transformers/issues/1659).
|
||||
```shell
|
||||
pytest
|
||||
```
|
||||
|
|
|
@ -16,6 +16,7 @@ from khoj.utils.rawconfig import (
|
|||
ConversationProcessorConfig,
|
||||
ProcessorConfig,
|
||||
TextContentConfig,
|
||||
GithubContentConfig,
|
||||
ImageContentConfig,
|
||||
SearchConfig,
|
||||
TextSearchConfig,
|
||||
|
@ -89,6 +90,15 @@ def content_config(tmp_path_factory, search_config: SearchConfig):
|
|||
)
|
||||
}
|
||||
|
||||
content_config.github = GithubContentConfig(
|
||||
pat_token=os.getenv("GITHUB_PAT_TOKEN"),
|
||||
repo_name="lantern",
|
||||
repo_owner="khoj-ai",
|
||||
repo_branch="master",
|
||||
compressed_jsonl=content_dir.joinpath("github.jsonl.gz"),
|
||||
embeddings_file=content_dir.joinpath("github_embeddings.pt"),
|
||||
)
|
||||
|
||||
filters = [DateFilter(), WordFilter(), FileFilter()]
|
||||
text_search.setup(
|
||||
JsonlToJsonl, content_config.plugins["plugin1"], search_config.asymmetric, regenerate=False, filters=filters
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# System Packages
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
# External Packages
|
||||
import pytest
|
||||
|
@ -10,6 +11,7 @@ from khoj.utils.state import model
|
|||
from khoj.search_type import text_search
|
||||
from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig
|
||||
from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
|
||||
from khoj.processor.github.github_to_jsonl import GithubToJsonl
|
||||
|
||||
|
||||
# Test
|
||||
|
@ -170,3 +172,14 @@ def test_incremental_update(content_config: ContentConfig, search_config: Search
|
|||
# Cleanup
|
||||
# reset input_files in config to empty list
|
||||
content_config.org.input_files = []
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
@pytest.mark.skipif(os.getenv("GITHUB_PAT_TOKEN") is None, reason="GITHUB_PAT_TOKEN not set")
|
||||
def test_asymmetric_setup_github(content_config: ContentConfig, search_config: SearchConfig):
|
||||
# Act
|
||||
# Regenerate notes embeddings during asymmetric setup
|
||||
github_model = text_search.setup(GithubToJsonl, content_config.github, search_config.asymmetric, regenerate=True)
|
||||
|
||||
# Assert
|
||||
assert len(github_model.entries) > 1
|
||||
|
|
Loading…
Reference in a new issue