Fix github workflow to start Khoj, connect to PG and upload results

- Do not trigger tests to run in ci on update to evals
2024-11-23 15:38:55 +01:00 · 2024-11-18 02:26:25 -08:00 · 2024-11-18 02:26:25 -08:00 · a2ccf6f59f
commit a2ccf6f59f
parent 7c0fd71bfd
3 changed files with 47 additions and 6 deletions
--- a/.github/workflows/run_evals.yml
+++ b/.github/workflows/run_evals.yml
@ -45,9 +45,14 @@ jobs:
        env:
          POSTGRES_PASSWORD: postgres
          POSTGRES_USER: postgres
          POSTGRES_DB: postgres
        ports:
          - 5432:5432
-        options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
+        options: >-
          --health-cmd pg_isready
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
    steps:
      - uses: actions/checkout@v3
@ -57,7 +62,7 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@v4
        with:
-          python-version: 3.10
+          python-version: '3.10'
      - name: Get App Version
        id: hatch
@ -88,7 +93,9 @@ jobs:
          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
          SERPER_DEV_API_KEY: ${{ secrets.SERPER_DEV_API_KEY }}
          OLOSTEP_API_KEY: ${{ secrets.OLOSTEP_API_KEY }}
-          POSTGRES_HOST: postgres
+          KHOJ_ADMIN_EMAIL: khoj
          KHOJ_ADMIN_PASSWORD: khoj
          POSTGRES_HOST: localhost
          POSTGRES_PORT: 5432
          POSTGRES_USER: postgres
          POSTGRES_PASSWORD: postgres
@ -119,4 +126,23 @@ jobs:
        uses: actions/upload-artifact@v3
        with:
          name: eval-results-${{ steps.hatch.outputs.version }}-${{ matrix.khoj_mode }}-${{ matrix.dataset }}
-          path: "*_evaluation_results_*.csv"
+          path: |
            *_evaluation_results_*.csv
            *_evaluation_summary_*.txt
      - name: Display Results
        if: always()
        run: |
          # Read and display summary
          echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY
          echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY
          echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY
          echo "- Chat Model: Gemini 1.5 Flash 002" >> $GITHUB_STEP_SUMMARY
          echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
          tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
          # Display in logs too
          echo "===== EVALUATION RESULTS ====="
          cat *_evaluation_summary_*.txt
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -5,6 +5,7 @@ on:
    paths:
      - src/khoj/**
      - tests/**
      - '!tests/evals/**'
      - config/**
      - pyproject.toml
      - .pre-commit-config.yml
@ -15,6 +16,7 @@ on:
    paths:
      - src/khoj/**
      - tests/**
      - '!tests/evals/**'
      - config/**
      - pyproject.toml
      - .pre-commit-config.yml
--- a/tests/evals/eval.py
+++ b/tests/evals/eval.py
@ -286,10 +286,23 @@ def main():
    logger.info(f"\nOverall Accuracy: {colored_accuracy}")
    logger.info(f"\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}")
-    # Save results
+    # Save summary to file
    sample_type = f"Sampling Type: {SAMPLE_SIZE} samples." if SAMPLE_SIZE else "Whole dataset."
    sample_type += " Randomized." if RANDOMIZE else ""
    summary = (
        f"Overall Accuracy: {accuracy:.2%}\n\nAccuracy by Reasoning Type:\n{reasoning_type_accuracy}\n\n{sample_type}\n"
    )
    summary_file = args.output.replace(".csv", ".txt") if args.output else None
    summary_file = (
        summary_file or f"{args.dataset}_evaluation_summary_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
    )
    with open(summary_file, "w") as f:
        f.write(summary)
    # Save raw results to file
    output_file = args.output or f"{args.dataset}_evaluation_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv"
    df.to_csv(output_file, index=False)
-    logger.info(f"Results saved to {output_file}")
+    logger.info(f"Results saved to {summary_file}, {output_file}")
 if __name__ == "__main__":