mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 15:38:55 +01:00
Fix github workflow to start Khoj, connect to PG and upload results
- Do not trigger tests to run in ci on update to evals
This commit is contained in:
parent
7c0fd71bfd
commit
a2ccf6f59f
3 changed files with 47 additions and 6 deletions
34
.github/workflows/run_evals.yml
vendored
34
.github/workflows/run_evals.yml
vendored
|
@ -45,9 +45,14 @@ jobs:
|
|||
env:
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_DB: postgres
|
||||
ports:
|
||||
- 5432:5432
|
||||
options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
|
||||
options: >-
|
||||
--health-cmd pg_isready
|
||||
--health-interval 10s
|
||||
--health-timeout 5s
|
||||
--health-retries 5
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
@ -57,7 +62,7 @@ jobs:
|
|||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.10
|
||||
python-version: '3.10'
|
||||
|
||||
- name: Get App Version
|
||||
id: hatch
|
||||
|
@ -88,7 +93,9 @@ jobs:
|
|||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
SERPER_DEV_API_KEY: ${{ secrets.SERPER_DEV_API_KEY }}
|
||||
OLOSTEP_API_KEY: ${{ secrets.OLOSTEP_API_KEY }}
|
||||
POSTGRES_HOST: postgres
|
||||
KHOJ_ADMIN_EMAIL: khoj
|
||||
KHOJ_ADMIN_PASSWORD: khoj
|
||||
POSTGRES_HOST: localhost
|
||||
POSTGRES_PORT: 5432
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
|
@ -119,4 +126,23 @@ jobs:
|
|||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: eval-results-${{ steps.hatch.outputs.version }}-${{ matrix.khoj_mode }}-${{ matrix.dataset }}
|
||||
path: "*_evaluation_results_*.csv"
|
||||
path: |
|
||||
*_evaluation_results_*.csv
|
||||
*_evaluation_summary_*.txt
|
||||
|
||||
- name: Display Results
|
||||
if: always()
|
||||
run: |
|
||||
# Read and display summary
|
||||
echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- Chat Model: Gemini 1.5 Flash 002" >> $GITHUB_STEP_SUMMARY
|
||||
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
||||
tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# Display in logs too
|
||||
echo "===== EVALUATION RESULTS ====="
|
||||
cat *_evaluation_summary_*.txt
|
||||
|
|
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
|
@ -5,6 +5,7 @@ on:
|
|||
paths:
|
||||
- src/khoj/**
|
||||
- tests/**
|
||||
- '!tests/evals/**'
|
||||
- config/**
|
||||
- pyproject.toml
|
||||
- .pre-commit-config.yml
|
||||
|
@ -15,6 +16,7 @@ on:
|
|||
paths:
|
||||
- src/khoj/**
|
||||
- tests/**
|
||||
- '!tests/evals/**'
|
||||
- config/**
|
||||
- pyproject.toml
|
||||
- .pre-commit-config.yml
|
||||
|
|
|
@ -286,10 +286,23 @@ def main():
|
|||
logger.info(f"\nOverall Accuracy: {colored_accuracy}")
|
||||
logger.info(f"\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}")
|
||||
|
||||
# Save results
|
||||
# Save summary to file
|
||||
sample_type = f"Sampling Type: {SAMPLE_SIZE} samples." if SAMPLE_SIZE else "Whole dataset."
|
||||
sample_type += " Randomized." if RANDOMIZE else ""
|
||||
summary = (
|
||||
f"Overall Accuracy: {accuracy:.2%}\n\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}\n\n{sample_type}\n"
|
||||
)
|
||||
summary_file = args.output.replace(".csv", ".txt") if args.output else None
|
||||
summary_file = (
|
||||
summary_file or f"{args.dataset}_evaluation_summary_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
|
||||
)
|
||||
with open(summary_file, "w") as f:
|
||||
f.write(summary)
|
||||
|
||||
# Save raw results to file
|
||||
output_file = args.output or f"{args.dataset}_evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv"
|
||||
df.to_csv(output_file, index=False)
|
||||
logger.info(f"Results saved to {output_file}")
|
||||
logger.info(f"Results saved to {summary_file}, {output_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in a new issue