Agiflow Eval
Evaluations

Evaluations

Answer Relevancy Metric

Evaluates the relevancy of an answer given a specific input.

from agiflow_eval import AnswerRelevancyMetric, LLMTestCase
 
metric = AnswerRelevancyMetric(metadata=metadata, model=model)
test_case = LLMTestCase(input="input text", actual_output="actual output text")
score = await metric.a_measure(test_case)

Bias Metric

Measures the presence of bias in the model's output.

from agiflow_eval import BiasMetric, LLMTestCase
 
metric = BiasMetric(metadata=metadata, model=model)
test_case = LLMTestCase(input="input text", actual_output="actual output text")
score = await metric.a_measure(test_case)

Contextual Relevancy Metric

Assesses the relevancy of the output in a given context.

from agiflow_eval import ContextualRelevancyMetric, LLMTestCase
 
metric = ContextualRelevancyMetric(metadata=metadata, model=model)
test_case = LLMTestCase(
  input="input text",
  actual_output="actual output text",
  retrieval_context="retrieval context text"
)
score = await metric.a_measure(test_case)

Faithfulness Metric

Determines the faithfulness of the model's output to the given context or input.

from agiflow_eval import FaithfulnessMetric, LLMTestCase
 
metric = FaithfulnessMetric(metadata=metadata, model=model)
test_case = LLMTestCase(
  input="input text",
  actual_output="actual output text",
  retrieval_context="retrieval context text"
)
score = await metric.a_measure(test_case)

Hallucination Metric

Measures the degree of hallucination in the model's output.

from agiflow_eval import HallucinationMetric, LLMTestCase
 
metric = HallucinationMetric(metadata=metadata, model=model)
test_case = LLMTestCase(
  input="input text",
  actual_output="actual output text",
  context="context text"
)
score = await metric.a_measure(test_case)

Toxicity Metric

Evaluates the toxicity level of the model's output.

from agiflow_eval import ToxicityMetric, LLMTestCase
 
metric = ToxicityMetric(metadata=metadata, model=model)
test_case = LLMTestCase(
  input="input text",
  actual_output="actual output text"
)
score = await metric.a_measure(test_case)