eval_run(test_files: list[str] = typer.Argument(..., help='List of paths to json/jsonl files containing test cases'), backend: str = typer.Option('ollama', '--backend', '-b', help='Generation backend'), model: str = typer.Option(None, '--model', help='Generation model name'), max_gen_tokens: int = typer.Option(256, '--max-gen-tokens', help='Max tokens to generate for responses'), judge_backend: str = typer.Option(None, '--judge-backend', '-jb', help='Judge backend'), judge_model: str = typer.Option(None, '--judge-model', help='Judge model name'), max_judge_tokens: int = typer.Option(256, '--max-judge-tokens', help="Max tokens for the judge model's judgement."), output_path: str = typer.Option('eval_results', '--output-path', '-o', help='Output path for results'), output_format: str = typer.Option('json', '--output-format', help='Either json or jsonl format for results'), continue_on_error: bool = typer.Option(True, '--continue-on-error'))