Fix github workflow to start Khoj, connect to PG and upload results

- Do not trigger tests to run in ci on update to evals
This commit is contained in:
Debanjum 2024-11-18 02:26:25 -08:00
parent 7c0fd71bfd
commit a2ccf6f59f
3 changed files with 47 additions and 6 deletions

View file

@ -45,9 +45,14 @@ jobs:
env: env:
POSTGRES_PASSWORD: postgres POSTGRES_PASSWORD: postgres
POSTGRES_USER: postgres POSTGRES_USER: postgres
POSTGRES_DB: postgres
ports: ports:
- 5432:5432 - 5432:5432
options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
@ -57,7 +62,7 @@ jobs:
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v4 uses: actions/setup-python@v4
with: with:
python-version: 3.10 python-version: '3.10'
- name: Get App Version - name: Get App Version
id: hatch id: hatch
@ -88,7 +93,9 @@ jobs:
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
SERPER_DEV_API_KEY: ${{ secrets.SERPER_DEV_API_KEY }} SERPER_DEV_API_KEY: ${{ secrets.SERPER_DEV_API_KEY }}
OLOSTEP_API_KEY: ${{ secrets.OLOSTEP_API_KEY }} OLOSTEP_API_KEY: ${{ secrets.OLOSTEP_API_KEY }}
POSTGRES_HOST: postgres KHOJ_ADMIN_EMAIL: khoj
KHOJ_ADMIN_PASSWORD: khoj
POSTGRES_HOST: localhost
POSTGRES_PORT: 5432 POSTGRES_PORT: 5432
POSTGRES_USER: postgres POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres POSTGRES_PASSWORD: postgres
@ -119,4 +126,23 @@ jobs:
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v3
with: with:
name: eval-results-${{ steps.hatch.outputs.version }}-${{ matrix.khoj_mode }}-${{ matrix.dataset }} name: eval-results-${{ steps.hatch.outputs.version }}-${{ matrix.khoj_mode }}-${{ matrix.dataset }}
path: "*_evaluation_results_*.csv" path: |
*_evaluation_results_*.csv
*_evaluation_summary_*.txt
- name: Display Results
if: always()
run: |
# Read and display summary
echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY
echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY
echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY
echo "- Chat Model: Gemini 1.5 Flash 002" >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
# Display in logs too
echo "===== EVALUATION RESULTS ====="
cat *_evaluation_summary_*.txt

View file

@ -5,6 +5,7 @@ on:
paths: paths:
- src/khoj/** - src/khoj/**
- tests/** - tests/**
- '!tests/evals/**'
- config/** - config/**
- pyproject.toml - pyproject.toml
- .pre-commit-config.yml - .pre-commit-config.yml
@ -15,6 +16,7 @@ on:
paths: paths:
- src/khoj/** - src/khoj/**
- tests/** - tests/**
- '!tests/evals/**'
- config/** - config/**
- pyproject.toml - pyproject.toml
- .pre-commit-config.yml - .pre-commit-config.yml

View file

@ -286,10 +286,23 @@ def main():
logger.info(f"\nOverall Accuracy: {colored_accuracy}") logger.info(f"\nOverall Accuracy: {colored_accuracy}")
logger.info(f"\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}") logger.info(f"\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}")
# Save results # Save summary to file
sample_type = f"Sampling Type: {SAMPLE_SIZE} samples." if SAMPLE_SIZE else "Whole dataset."
sample_type += " Randomized." if RANDOMIZE else ""
summary = (
f"Overall Accuracy: {accuracy:.2%}\n\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}\n\n{sample_type}\n"
)
summary_file = args.output.replace(".csv", ".txt") if args.output else None
summary_file = (
summary_file or f"{args.dataset}_evaluation_summary_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
)
with open(summary_file, "w") as f:
f.write(summary)
# Save raw results to file
output_file = args.output or f"{args.dataset}_evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv" output_file = args.output or f"{args.dataset}_evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv"
df.to_csv(output_file, index=False) df.to_csv(output_file, index=False)
logger.info(f"Results saved to {output_file}") logger.info(f"Results saved to {summary_file}, {output_file}")
if __name__ == "__main__": if __name__ == "__main__":