mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Fix github workflow to start Khoj, connect to PG and upload results
- Do not trigger tests to run in ci on update to evals
This commit is contained in:
parent
7c0fd71bfd
commit
a2ccf6f59f
3 changed files with 47 additions and 6 deletions
34
.github/workflows/run_evals.yml
vendored
34
.github/workflows/run_evals.yml
vendored
|
@ -45,9 +45,14 @@ jobs:
|
||||||
env:
|
env:
|
||||||
POSTGRES_PASSWORD: postgres
|
POSTGRES_PASSWORD: postgres
|
||||||
POSTGRES_USER: postgres
|
POSTGRES_USER: postgres
|
||||||
|
POSTGRES_DB: postgres
|
||||||
ports:
|
ports:
|
||||||
- 5432:5432
|
- 5432:5432
|
||||||
options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
|
options: >-
|
||||||
|
--health-cmd pg_isready
|
||||||
|
--health-interval 10s
|
||||||
|
--health-timeout 5s
|
||||||
|
--health-retries 5
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
@ -57,7 +62,7 @@ jobs:
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v4
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: 3.10
|
python-version: '3.10'
|
||||||
|
|
||||||
- name: Get App Version
|
- name: Get App Version
|
||||||
id: hatch
|
id: hatch
|
||||||
|
@ -88,7 +93,9 @@ jobs:
|
||||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||||
SERPER_DEV_API_KEY: ${{ secrets.SERPER_DEV_API_KEY }}
|
SERPER_DEV_API_KEY: ${{ secrets.SERPER_DEV_API_KEY }}
|
||||||
OLOSTEP_API_KEY: ${{ secrets.OLOSTEP_API_KEY }}
|
OLOSTEP_API_KEY: ${{ secrets.OLOSTEP_API_KEY }}
|
||||||
POSTGRES_HOST: postgres
|
KHOJ_ADMIN_EMAIL: khoj
|
||||||
|
KHOJ_ADMIN_PASSWORD: khoj
|
||||||
|
POSTGRES_HOST: localhost
|
||||||
POSTGRES_PORT: 5432
|
POSTGRES_PORT: 5432
|
||||||
POSTGRES_USER: postgres
|
POSTGRES_USER: postgres
|
||||||
POSTGRES_PASSWORD: postgres
|
POSTGRES_PASSWORD: postgres
|
||||||
|
@ -119,4 +126,23 @@ jobs:
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: eval-results-${{ steps.hatch.outputs.version }}-${{ matrix.khoj_mode }}-${{ matrix.dataset }}
|
name: eval-results-${{ steps.hatch.outputs.version }}-${{ matrix.khoj_mode }}-${{ matrix.dataset }}
|
||||||
path: "*_evaluation_results_*.csv"
|
path: |
|
||||||
|
*_evaluation_results_*.csv
|
||||||
|
*_evaluation_summary_*.txt
|
||||||
|
|
||||||
|
- name: Display Results
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
# Read and display summary
|
||||||
|
echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "- Chat Model: Gemini 1.5 Flash 002" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
||||||
|
tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
|
# Display in logs too
|
||||||
|
echo "===== EVALUATION RESULTS ====="
|
||||||
|
cat *_evaluation_summary_*.txt
|
||||||
|
|
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
|
@ -5,6 +5,7 @@ on:
|
||||||
paths:
|
paths:
|
||||||
- src/khoj/**
|
- src/khoj/**
|
||||||
- tests/**
|
- tests/**
|
||||||
|
- '!tests/evals/**'
|
||||||
- config/**
|
- config/**
|
||||||
- pyproject.toml
|
- pyproject.toml
|
||||||
- .pre-commit-config.yml
|
- .pre-commit-config.yml
|
||||||
|
@ -15,6 +16,7 @@ on:
|
||||||
paths:
|
paths:
|
||||||
- src/khoj/**
|
- src/khoj/**
|
||||||
- tests/**
|
- tests/**
|
||||||
|
- '!tests/evals/**'
|
||||||
- config/**
|
- config/**
|
||||||
- pyproject.toml
|
- pyproject.toml
|
||||||
- .pre-commit-config.yml
|
- .pre-commit-config.yml
|
||||||
|
|
|
@ -286,10 +286,23 @@ def main():
|
||||||
logger.info(f"\nOverall Accuracy: {colored_accuracy}")
|
logger.info(f"\nOverall Accuracy: {colored_accuracy}")
|
||||||
logger.info(f"\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}")
|
logger.info(f"\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}")
|
||||||
|
|
||||||
# Save results
|
# Save summary to file
|
||||||
|
sample_type = f"Sampling Type: {SAMPLE_SIZE} samples." if SAMPLE_SIZE else "Whole dataset."
|
||||||
|
sample_type += " Randomized." if RANDOMIZE else ""
|
||||||
|
summary = (
|
||||||
|
f"Overall Accuracy: {accuracy:.2%}\n\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}\n\n{sample_type}\n"
|
||||||
|
)
|
||||||
|
summary_file = args.output.replace(".csv", ".txt") if args.output else None
|
||||||
|
summary_file = (
|
||||||
|
summary_file or f"{args.dataset}_evaluation_summary_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
|
||||||
|
)
|
||||||
|
with open(summary_file, "w") as f:
|
||||||
|
f.write(summary)
|
||||||
|
|
||||||
|
# Save raw results to file
|
||||||
output_file = args.output or f"{args.dataset}_evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv"
|
output_file = args.output or f"{args.dataset}_evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv"
|
||||||
df.to_csv(output_file, index=False)
|
df.to_csv(output_file, index=False)
|
||||||
logger.info(f"Results saved to {output_file}")
|
logger.info(f"Results saved to {summary_file}, {output_file}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Reference in a new issue