Evaluations
Answer Relevancy Metric
Evaluates the relevancy of an answer given a specific input.
from agiflow_eval import AnswerRelevancyMetric, LLMTestCase
metric = AnswerRelevancyMetric(metadata=metadata, model=model)
test_case = LLMTestCase(input="input text", actual_output="actual output text")
score = await metric.a_measure(test_case)
Bias Metric
Measures the presence of bias in the model's output.
from agiflow_eval import BiasMetric, LLMTestCase
metric = BiasMetric(metadata=metadata, model=model)
test_case = LLMTestCase(input="input text", actual_output="actual output text")
score = await metric.a_measure(test_case)
Contextual Relevancy Metric
Assesses the relevancy of the output in a given context.
from agiflow_eval import ContextualRelevancyMetric, LLMTestCase
metric = ContextualRelevancyMetric(metadata=metadata, model=model)
test_case = LLMTestCase(
input="input text",
actual_output="actual output text",
retrieval_context="retrieval context text"
)
score = await metric.a_measure(test_case)
Faithfulness Metric
Determines the faithfulness of the model's output to the given context or input.
from agiflow_eval import FaithfulnessMetric, LLMTestCase
metric = FaithfulnessMetric(metadata=metadata, model=model)
test_case = LLMTestCase(
input="input text",
actual_output="actual output text",
retrieval_context="retrieval context text"
)
score = await metric.a_measure(test_case)
Hallucination Metric
Measures the degree of hallucination in the model's output.
from agiflow_eval import HallucinationMetric, LLMTestCase
metric = HallucinationMetric(metadata=metadata, model=model)
test_case = LLMTestCase(
input="input text",
actual_output="actual output text",
context="context text"
)
score = await metric.a_measure(test_case)
Toxicity Metric
Evaluates the toxicity level of the model's output.
from agiflow_eval import ToxicityMetric, LLMTestCase
metric = ToxicityMetric(metadata=metadata, model=model)
test_case = LLMTestCase(
input="input text",
actual_output="actual output text"
)
score = await metric.a_measure(test_case)