Skip to content

Evaluate#

ragbits.evaluate.evaluator.Evaluator #

Evaluator(batch_size: int = 10, num_retries: int = 3, backoff_multiplier: int = 1, backoff_max: int = 60)

Bases: WithConstructionConfig

Evaluator class.

Initialize the Evaluator instance.

PARAMETER DESCRIPTION
batch_size

batch size for the evaluation pipeline inference.

TYPE: int DEFAULT: 10

num_retries

The number of retries per evaluation pipeline inference error.

TYPE: int DEFAULT: 3

backoff_multiplier

The base delay multiplier for exponential backoff (in seconds).

TYPE: int DEFAULT: 1

backoff_max

The maximum allowed delay (in seconds) between retries.

TYPE: int DEFAULT: 60

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py
def __init__(
    self,
    batch_size: int = 10,
    num_retries: int = 3,
    backoff_multiplier: int = 1,
    backoff_max: int = 60,
) -> None:
    """
    Initialize the Evaluator instance.

    Args:
        batch_size: batch size for the evaluation pipeline inference.
        num_retries: The number of retries per evaluation pipeline inference error.
        backoff_multiplier: The base delay multiplier for exponential backoff (in seconds).
        backoff_max: The maximum allowed delay (in seconds) between retries.
    """
    self.batch_size = batch_size
    self.num_retries = num_retries
    self.backoff_multiplier = backoff_multiplier
    self.backoff_max = backoff_max

default_module class-attribute #

default_module: ModuleType | None = None

configuration_key class-attribute #

configuration_key: str

batch_size instance-attribute #

batch_size = batch_size

num_retries instance-attribute #

num_retries = num_retries

backoff_multiplier instance-attribute #

backoff_multiplier = backoff_multiplier

backoff_max instance-attribute #

backoff_max = backoff_max

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> Self

Initializes the class with the provided configuration.

PARAMETER DESCRIPTION
config

A dictionary containing configuration details for the class.

TYPE: dict

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    return cls(**config)

run_from_config async classmethod #

run_from_config(config: dict) -> EvaluatorResult

Run the evaluation based on configuration.

PARAMETER DESCRIPTION
config

Evaluation config.

TYPE: dict

RETURNS DESCRIPTION
EvaluatorResult

The evaluation results.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py
@classmethod
async def run_from_config(cls, config: dict) -> EvaluatorResult:
    """
    Run the evaluation based on configuration.

    Args:
        config: Evaluation config.

    Returns:
        The evaluation results.
    """
    evaluator_config = EvaluatorConfig.model_validate(config)
    evaluation_config = EvaluationConfig.model_validate(evaluator_config.evaluation)
    pipeline: EvaluationPipeline = EvaluationPipeline.subclass_from_config(evaluation_config.pipeline)
    dataloader: DataLoader = DataLoader.subclass_from_config(evaluation_config.dataloader)
    metricset: MetricSet = MetricSet.from_config(evaluation_config.metrics)

    evaluator = cls.from_config(evaluator_config.evaluator or {})
    return await evaluator.compute(
        pipeline=pipeline,
        dataloader=dataloader,
        metricset=metricset,
    )

compute async #

compute(pipeline: EvaluationPipeline[EvaluationTargetT, EvaluationDataT, EvaluationResultT], dataloader: DataLoader[EvaluationDataT], metricset: MetricSet[EvaluationResultT]) -> EvaluatorResult[EvaluationResultT]

Compute the evaluation results for the given pipeline and data.

PARAMETER DESCRIPTION
pipeline

The pipeline to be evaluated.

TYPE: EvaluationPipeline[EvaluationTargetT, EvaluationDataT, EvaluationResultT]

dataloader

The dataloader to load the data.

TYPE: DataLoader[EvaluationDataT]

metricset

The metrics to be computed.

TYPE: MetricSet[EvaluationResultT]

RETURNS DESCRIPTION
EvaluatorResult[EvaluationResultT]

The evaluation results.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py
async def compute(
    self,
    pipeline: EvaluationPipeline[EvaluationTargetT, EvaluationDataT, EvaluationResultT],
    dataloader: DataLoader[EvaluationDataT],
    metricset: MetricSet[EvaluationResultT],
) -> EvaluatorResult[EvaluationResultT]:
    """
    Compute the evaluation results for the given pipeline and data.

    Args:
        pipeline: The pipeline to be evaluated.
        dataloader: The dataloader to load the data.
        metricset: The metrics to be computed.

    Returns:
        The evaluation results.
    """
    await pipeline.prepare()

    dataset = await dataloader.load()
    results, errors, time_perf = await self._call_pipeline(pipeline, dataset)
    metrics = await metricset.compute(results)

    return EvaluatorResult(
        metrics=metrics,
        results=results,
        errors=errors,
        time_perf=time_perf,
    )

ragbits.evaluate.optimizer.Optimizer #

Optimizer(direction: str = 'maximize', n_trials: int = 10, max_retries_for_trial: int = 1)

Bases: WithConstructionConfig

Optimizer class.

Initialize the pipeline optimizer.

PARAMETER DESCRIPTION
direction

Direction of optimization.

TYPE: str DEFAULT: 'maximize'

n_trials

The number of trials for each process.

TYPE: int DEFAULT: 10

max_retries_for_trial

The number of retires for single process.

TYPE: int DEFAULT: 1

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py
def __init__(self, direction: str = "maximize", n_trials: int = 10, max_retries_for_trial: int = 1) -> None:
    """
    Initialize the pipeline optimizer.

    Args:
        direction: Direction of optimization.
        n_trials: The number of trials for each process.
        max_retries_for_trial: The number of retires for single process.
    """
    self.direction = direction
    self.n_trials = n_trials
    self.max_retries_for_trial = max_retries_for_trial
    # workaround for optuna not allowing different choices for different trials
    # TODO check how optuna handles parallelism. discuss if we want to have parallel studies
    self._choices_cache: dict[str, list] = {}

default_module class-attribute #

default_module: ModuleType | None = None

configuration_key class-attribute #

configuration_key: str

direction instance-attribute #

direction = direction

n_trials instance-attribute #

n_trials = n_trials

max_retries_for_trial instance-attribute #

max_retries_for_trial = max_retries_for_trial

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> Self

Initializes the class with the provided configuration.

PARAMETER DESCRIPTION
config

A dictionary containing configuration details for the class.

TYPE: dict

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    return cls(**config)

run_from_config classmethod #

run_from_config(config: dict) -> list[tuple[dict, float, dict[str, float]]]

Run the optimization process configured with a config object.

PARAMETER DESCRIPTION
config

Optimizer config.

TYPE: dict

RETURNS DESCRIPTION
list[tuple[dict, float, dict[str, float]]]

List of tested configs with associated scores and metrics.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py
@classmethod
def run_from_config(cls, config: dict) -> list[tuple[dict, float, dict[str, float]]]:
    """
    Run the optimization process configured with a config object.

    Args:
        config: Optimizer config.

    Returns:
        List of tested configs with associated scores and metrics.
    """
    optimizer_config = OptimizerConfig.model_validate(config)
    evaluator_config = EvaluatorConfig.model_validate(optimizer_config.evaluator)

    dataloader: DataLoader = DataLoader.subclass_from_config(evaluator_config.evaluation.dataloader)
    metricset: MetricSet = MetricSet.from_config(evaluator_config.evaluation.metrics)

    pipeline_class = import_by_path(evaluator_config.evaluation.pipeline.type)
    pipeline_config = dict(evaluator_config.evaluation.pipeline.config)
    callbacks = [setup_optuna_neptune_callback()] if optimizer_config.neptune_callback else []

    optimizer = cls.from_config(optimizer_config.optimizer or {})
    return optimizer.optimize(
        pipeline_class=pipeline_class,
        pipeline_config=pipeline_config,
        metricset=metricset,
        dataloader=dataloader,
        callbacks=callbacks,
    )

optimize #

optimize(pipeline_class: type[EvaluationPipeline], pipeline_config: dict, dataloader: DataLoader, metricset: MetricSet, callbacks: list[Callable] | None = None) -> list[tuple[dict, float, dict[str, float]]]

Run the optimization process for given parameters.

PARAMETER DESCRIPTION
pipeline_class

Pipeline to be optimized.

TYPE: type[EvaluationPipeline]

pipeline_config

Configuration defining the optimization process.

TYPE: dict

dataloader

Data loader.

TYPE: DataLoader

metricset

Metrics to be optimized.

TYPE: MetricSet

callbacks

Experiment callbacks.

TYPE: list[Callable] | None DEFAULT: None

RETURNS DESCRIPTION
list[tuple[dict, float, dict[str, float]]]

List of tested configs with associated scores and metrics.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py
def optimize(
    self,
    pipeline_class: type[EvaluationPipeline],
    pipeline_config: dict,
    dataloader: DataLoader,
    metricset: MetricSet,
    callbacks: list[Callable] | None = None,
) -> list[tuple[dict, float, dict[str, float]]]:
    """
    Run the optimization process for given parameters.

    Args:
        pipeline_class: Pipeline to be optimized.
        pipeline_config: Configuration defining the optimization process.
        dataloader: Data loader.
        metricset: Metrics to be optimized.
        callbacks: Experiment callbacks.

    Returns:
        List of tested configs with associated scores and metrics.
    """

    def objective(trial: Trial) -> float:
        return self._objective(
            trial=trial,
            pipeline_class=pipeline_class,
            pipeline_config=pipeline_config,
            dataloader=dataloader,
            metricset=metricset,
        )

    study = optuna.create_study(direction=self.direction)
    study.optimize(
        func=objective,
        n_trials=self.n_trials,
        callbacks=callbacks,
    )
    return sorted(
        [
            (
                trial.user_attrs["config"],
                trial.user_attrs["score"],
                trial.user_attrs["metrics"],
            )
            for trial in study.get_trials()
        ],
        key=lambda x: -x[1] if self.direction == "maximize" else x[1],
    )