mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Add separate unit test for github. Will only run of a PAT token is set
This commit is contained in:
parent
3a61919344
commit
751edfefe5
3 changed files with 29 additions and 1 deletions
|
@ -328,6 +328,11 @@ Add your OpenAI API to Khoj by using either of the two options below:
|
||||||
1. [Setup your OpenAI API key in Khoj](#set-your-openai-api-key-in-khoj)
|
1. [Setup your OpenAI API key in Khoj](#set-your-openai-api-key-in-khoj)
|
||||||
2. Interact with them from the [Khoj Swagger docs](http://locahost:8000/docs)[^2]
|
2. Interact with them from the [Khoj Swagger docs](http://locahost:8000/docs)[^2]
|
||||||
|
|
||||||
|
### Use a Github Repository as a source
|
||||||
|
Note that this plugin is currently *only* indexing Markdown files. It will ignore all other files in the repository. This is because Khoj, as it stands, is a semantic search engine. Eventually, we hope to get to a state where you can search for any file in your repository and even explain code.
|
||||||
|
|
||||||
|
1. Get a [pat token](https://docs.github.com/en/github/authenticating-to-github/keeping-your-account-and-data-secure/creating-a-personal-access-token) with `repo` and `read:org` scopes in the classic flow.
|
||||||
|
2. Configure your settings to include the `owner` and `repo_name`. The `owner` will be the organization name if the repo is in an organization. The `repo_name` will be the name of the repository. Optionally, you can also supply a branch name. If no branch name is supplied, the `master` branch will be used.
|
||||||
|
|
||||||
## Performance
|
## Performance
|
||||||
|
|
||||||
|
@ -458,7 +463,7 @@ conda activate khoj
|
||||||
|
|
||||||
#### Before Creating PR
|
#### Before Creating PR
|
||||||
|
|
||||||
1. Run Tests
|
1. Run Tests. If you get an error complaining about a missing `fast_tokenizer_file`, follow the solution [in this Github issue](https://github.com/UKPLab/sentence-transformers/issues/1659).
|
||||||
```shell
|
```shell
|
||||||
pytest
|
pytest
|
||||||
```
|
```
|
||||||
|
|
|
@ -16,6 +16,7 @@ from khoj.utils.rawconfig import (
|
||||||
ConversationProcessorConfig,
|
ConversationProcessorConfig,
|
||||||
ProcessorConfig,
|
ProcessorConfig,
|
||||||
TextContentConfig,
|
TextContentConfig,
|
||||||
|
GithubContentConfig,
|
||||||
ImageContentConfig,
|
ImageContentConfig,
|
||||||
SearchConfig,
|
SearchConfig,
|
||||||
TextSearchConfig,
|
TextSearchConfig,
|
||||||
|
@ -89,6 +90,15 @@ def content_config(tmp_path_factory, search_config: SearchConfig):
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
content_config.github = GithubContentConfig(
|
||||||
|
pat_token=os.getenv("GITHUB_PAT_TOKEN"),
|
||||||
|
repo_name="lantern",
|
||||||
|
repo_owner="khoj-ai",
|
||||||
|
repo_branch="master",
|
||||||
|
compressed_jsonl=content_dir.joinpath("github.jsonl.gz"),
|
||||||
|
embeddings_file=content_dir.joinpath("github_embeddings.pt"),
|
||||||
|
)
|
||||||
|
|
||||||
filters = [DateFilter(), WordFilter(), FileFilter()]
|
filters = [DateFilter(), WordFilter(), FileFilter()]
|
||||||
text_search.setup(
|
text_search.setup(
|
||||||
JsonlToJsonl, content_config.plugins["plugin1"], search_config.asymmetric, regenerate=False, filters=filters
|
JsonlToJsonl, content_config.plugins["plugin1"], search_config.asymmetric, regenerate=False, filters=filters
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# System Packages
|
# System Packages
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import os
|
||||||
|
|
||||||
# External Packages
|
# External Packages
|
||||||
import pytest
|
import pytest
|
||||||
|
@ -10,6 +11,7 @@ from khoj.utils.state import model
|
||||||
from khoj.search_type import text_search
|
from khoj.search_type import text_search
|
||||||
from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig
|
from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig
|
||||||
from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
|
from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
|
||||||
|
from khoj.processor.github.github_to_jsonl import GithubToJsonl
|
||||||
|
|
||||||
|
|
||||||
# Test
|
# Test
|
||||||
|
@ -170,3 +172,14 @@ def test_incremental_update(content_config: ContentConfig, search_config: Search
|
||||||
# Cleanup
|
# Cleanup
|
||||||
# reset input_files in config to empty list
|
# reset input_files in config to empty list
|
||||||
content_config.org.input_files = []
|
content_config.org.input_files = []
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.skipif(os.getenv("GITHUB_PAT_TOKEN") is None, reason="GITHUB_PAT_TOKEN not set")
|
||||||
|
def test_asymmetric_setup_github(content_config: ContentConfig, search_config: SearchConfig):
|
||||||
|
# Act
|
||||||
|
# Regenerate notes embeddings during asymmetric setup
|
||||||
|
github_model = text_search.setup(GithubToJsonl, content_config.github, search_config.asymmetric, regenerate=True)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(github_model.entries) > 1
|
||||||
|
|
Loading…
Reference in a new issue