Show correct example index being currently processed in frames eval

Previously the batch start index wasn't being passed so all batches
started in parallel were showing the same processing example index

This change doesn't impact the evaluation itself, just the index shown
of the example currently being evaluated
This commit is contained in:
Debanjum 2024-11-08 15:46:44 -08:00
parent 84a8088c2b
commit f967bdf702

View file

@ -101,10 +101,10 @@ def evaluate_response(query: str, agent_response: str, ground_truth: str) -> Dic
return {"decision": "FALSE", "explanation": f"Evaluation failed: {str(e)}"} return {"decision": "FALSE", "explanation": f"Evaluation failed: {str(e)}"}
def process_batch(batch, counter, results, dataset_length): def process_batch(batch, batch_start, results, dataset_length):
for prompt, answer, reasoning_type in batch: for idx, (prompt, answer, reasoning_type) in enumerate(batch):
counter += 1 current_index = batch_start + idx
logger.info(f"Processing example: {counter}/{dataset_length}") logger.info(f"Processing example: {current_index}/{dataset_length}")
# Trigger research mode if enabled # Trigger research mode if enabled
prompt = f"/{KHOJ_MODE} {prompt}" if KHOJ_MODE else prompt prompt = f"/{KHOJ_MODE} {prompt}" if KHOJ_MODE else prompt
@ -122,7 +122,7 @@ def process_batch(batch, counter, results, dataset_length):
# Store results # Store results
results.append( results.append(
{ {
"index": counter, "index": current_index,
"prompt": prompt, "prompt": prompt,
"ground_truth": answer, "ground_truth": answer,
"agent_response": agent_response, "agent_response": agent_response,
@ -169,12 +169,13 @@ def main():
with concurrent.futures.ThreadPoolExecutor() as executor: with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [] futures = []
for i in range(0, dataset_length, BATCH_SIZE): for i in range(0, dataset_length, BATCH_SIZE):
batch_start = i
batch = zip( batch = zip(
dataset["Prompt"][i : i + BATCH_SIZE], dataset["Prompt"][i : i + BATCH_SIZE],
dataset["Answer"][i : i + BATCH_SIZE], dataset["Answer"][i : i + BATCH_SIZE],
dataset["reasoning_types"][i : i + BATCH_SIZE], dataset["reasoning_types"][i : i + BATCH_SIZE],
) )
futures.append(executor.submit(process_batch, batch, counter, results, dataset_length)) futures.append(executor.submit(process_batch, batch, batch_start, results, dataset_length))
# Wait for all futures to complete # Wait for all futures to complete
concurrent.futures.wait(futures) concurrent.futures.wait(futures)