Skip to content

Guardrails#

ragbits.guardrails.base.Guardrail #

Bases: ABC

Abstract class representing guardrail

verify abstractmethod async #

verify(input_to_verify: Prompt | str) -> GuardrailVerificationResult

Verifies whether provided input meets certain criteria

PARAMETER DESCRIPTION
input_to_verify

prompt or output of the model to check

TYPE: Prompt | str

RETURNS DESCRIPTION
GuardrailVerificationResult

verification result

Source code in packages/ragbits-guardrails/src/ragbits/guardrails/base.py
@abstractmethod
async def verify(self, input_to_verify: Prompt | str) -> GuardrailVerificationResult:
    """
    Verifies whether provided input meets certain criteria

    Args:
        input_to_verify: prompt or output of the model to check

    Returns:
        verification result
    """

ragbits.guardrails.base.GuardrailManager #

GuardrailManager(guardrails: list[Guardrail])

Class responsible for running guardrails

Source code in packages/ragbits-guardrails/src/ragbits/guardrails/base.py
def __init__(self, guardrails: list[Guardrail]):
    self._guardrails = guardrails

verify async #

verify(input_to_verify: Prompt | str) -> list[GuardrailVerificationResult]

Verifies whether provided input meets certain criteria

PARAMETER DESCRIPTION
input_to_verify

prompt or output of the model to check

TYPE: Prompt | str

RETURNS DESCRIPTION
list[GuardrailVerificationResult]

list of verification result

Source code in packages/ragbits-guardrails/src/ragbits/guardrails/base.py
async def verify(self, input_to_verify: Prompt | str) -> list[GuardrailVerificationResult]:
    """
    Verifies whether provided input meets certain criteria

    Args:
        input_to_verify: prompt or output of the model to check

    Returns:
        list of verification result
    """
    return [await guardrail.verify(input_to_verify) for guardrail in self._guardrails]

ragbits.guardrails.base.GuardrailVerificationResult #

Bases: BaseModel

Class representing result of guardrail verification

guardrail_name instance-attribute #

guardrail_name: str

succeeded instance-attribute #

succeeded: bool

fail_reason instance-attribute #

fail_reason: str | None

ragbits.guardrails.openai_moderation.OpenAIModerationGuardrail #

OpenAIModerationGuardrail(moderation_model: str = 'omni-moderation-latest')

Bases: Guardrail

Guardrail based on OpenAI moderation

Source code in packages/ragbits-guardrails/src/ragbits/guardrails/openai_moderation.py
def __init__(self, moderation_model: str = "omni-moderation-latest"):
    self._openai_client = AsyncOpenAI()
    self._moderation_model = moderation_model

verify async #

verify(input_to_verify: Prompt | str) -> GuardrailVerificationResult

Verifies whether provided input meets certain criteria

PARAMETER DESCRIPTION
input_to_verify

prompt or output of the model to check

TYPE: Prompt | str

RETURNS DESCRIPTION
GuardrailVerificationResult

verification result

Source code in packages/ragbits-guardrails/src/ragbits/guardrails/openai_moderation.py
async def verify(self, input_to_verify: Prompt | str) -> GuardrailVerificationResult:
    """
    Verifies whether provided input meets certain criteria

    Args:
        input_to_verify: prompt or output of the model to check

    Returns:
        verification result
    """
    if isinstance(input_to_verify, Prompt):
        inputs = [{"type": "text", "text": input_to_verify.rendered_user_prompt}]
        if input_to_verify.rendered_system_prompt is not None:
            inputs.append({"type": "text", "text": input_to_verify.rendered_system_prompt})
        if images := input_to_verify.images:
            inputs.extend(
                [
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{base64.b64encode(im).decode('utf-8')}"},  # type: ignore
                    }
                    for im in images
                ]
            )
    else:
        inputs = [{"type": "text", "text": input_to_verify}]
    response = await self._openai_client.moderations.create(model=self._moderation_model, input=inputs)  # type: ignore

    fail_reasons = [result for result in response.results if result.flagged]
    return GuardrailVerificationResult(
        guardrail_name=self.__class__.__name__,
        succeeded=len(fail_reasons) == 0,
        fail_reason=None if len(fail_reasons) == 0 else str(fail_reasons),
    )