# Define LLM Judge Evaluator checking for Actionable Responses
from phoenix.evals import create_classifier, LLM
from phoenix.experiments.types import EvaluationResult
# Define Prompt Template
support_response_actionability_judge = """
You are evaluating a customer support agent's response.
Determine whether the response is ACTIONABLE and helps resolve the user's issue.
Mark the response as CORRECT if it:
- Directly addresses the user's specific question
- Provides concrete steps, guidance, or information
- Clearly routes the user toward a solution
Mark the response as INCORRECT if it:
- Is generic, vague, or non-specific
- Avoids answering the question
- Provides no clear next steps
- Deflects with phrases like "contact support" without guidance
User Query:
{input.query}
Agent Response:
{output}
Return only one label: "correct" or "incorrect".
"""
# Create Evaluator
actionability_judge = create_classifier(
name="actionability-judge",
prompt_template=support_response_actionability_judge,
llm=LLM(model="gpt-5", provider="openai"),
choices={"correct": 1.0, "incorrect": 0.0},
)
def call_actionability_judge(input, output):
"""
Wrapper function for the actionability judge evaluator.
This is needed because run_experiment expects a function, not an evaluator object.
"""
results = actionability_judge.evaluate({
"input": input,
"output": output
})
result = results[0]
return EvaluationResult(
score=result.score,
label=result.label,
explanation=result.explanation
)