Evaluate#

ragbits.evaluate.evaluator.Evaluator #

Evaluator(batch_size: int = 10, num_retries: int = 3, backoff_multiplier: int = 1, backoff_max: int = 60)

Bases: WithConstructionConfig

Evaluator class.

Initialize the Evaluator instance.

PARAMETER	DESCRIPTION
`batch_size`	batch size for the evaluation pipeline inference. TYPE: `int` DEFAULT: `10`
`num_retries`	The number of retries per evaluation pipeline inference error. TYPE: `int` DEFAULT: `3`
`backoff_multiplier`	The base delay multiplier for exponential backoff (in seconds). TYPE: `int` DEFAULT: `1`
`backoff_max`	The maximum allowed delay (in seconds) between retries. TYPE: `int` DEFAULT: `60`

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py

def __init__(
    self,
    batch_size: int = 10,
    num_retries: int = 3,
    backoff_multiplier: int = 1,
    backoff_max: int = 60,
) -> None:
    """
    Initialize the Evaluator instance.

    Args:
        batch_size: batch size for the evaluation pipeline inference.
        num_retries: The number of retries per evaluation pipeline inference error.
        backoff_multiplier: The base delay multiplier for exponential backoff (in seconds).
        backoff_max: The maximum allowed delay (in seconds) between retries.
    """
    self.batch_size = batch_size
    self.num_retries = num_retries
    self.backoff_multiplier = backoff_multiplier
    self.backoff_max = backoff_max

default_module `class-attribute` #

default_module: ModuleType | None = None

configuration_key `class-attribute` #

configuration_key: str

batch_size `instance-attribute` #

batch_size = batch_size

num_retries `instance-attribute` #

num_retries = num_retries

backoff_multiplier `instance-attribute` #

backoff_multiplier = backoff_multiplier

backoff_max `instance-attribute` #

backoff_max = backoff_max

subclass_from_config `classmethod` #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER	DESCRIPTION
`config`	A model containing configuration details for the class. TYPE: `ObjectConstructionConfig`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

RAISES	DESCRIPTION
`InvalidConfigError`	The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory `classmethod` #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER	DESCRIPTION
`factory_path`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided factory function.

RAISES	DESCRIPTION
`InvalidConfigError`	The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass `classmethod` #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER	DESCRIPTION
`config`	The CoreConfig instance containing preferred factory and configuration details. TYPE: `CoreConfig`
`factory_path_override`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str \| None` DEFAULT: `None`
`yaml_path_override`	A string representing the path to the YAML file containing the Ragstack instance configuration. TYPE: `Path \| None` DEFAULT: `None`

RAISES	DESCRIPTION
`InvalidConfigError`	If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config `classmethod` #

from_config(config: dict) -> Self

Initializes the class with the provided configuration.

PARAMETER	DESCRIPTION
`config`	A dictionary containing configuration details for the class. TYPE: `dict`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    return cls(**config)

run_from_config `async` `classmethod` #

run_from_config(config: dict) -> EvaluatorResult

Run the evaluation based on configuration.

PARAMETER	DESCRIPTION
`config`	Evaluation config. TYPE: `dict`

RETURNS	DESCRIPTION
`EvaluatorResult`	The evaluation results.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py

@classmethod
async def run_from_config(cls, config: dict) -> EvaluatorResult:
    """
    Run the evaluation based on configuration.

    Args:
        config: Evaluation config.

    Returns:
        The evaluation results.
    """
    evaluator_config = EvaluatorConfig.model_validate(config)
    evaluation_config = EvaluationConfig.model_validate(evaluator_config.evaluation)
    pipeline: EvaluationPipeline = EvaluationPipeline.subclass_from_config(evaluation_config.pipeline)
    dataloader: DataLoader = DataLoader.subclass_from_config(evaluation_config.dataloader)
    metricset: MetricSet = MetricSet.from_config(evaluation_config.metrics)

    evaluator = cls.from_config(evaluator_config.evaluator or {})
    return await evaluator.compute(
        pipeline=pipeline,
        dataloader=dataloader,
        metricset=metricset,
    )

compute `async` #

compute(pipeline: EvaluationPipeline[EvaluationTargetT, EvaluationDataT, EvaluationResultT], dataloader: DataLoader[EvaluationDataT], metricset: MetricSet[EvaluationResultT]) -> EvaluatorResult[EvaluationResultT]

Compute the evaluation results for the given pipeline and data.

PARAMETER	DESCRIPTION
`pipeline`	The pipeline to be evaluated. TYPE: `EvaluationPipeline[EvaluationTargetT, EvaluationDataT, EvaluationResultT]`
`dataloader`	The dataloader to load the data. TYPE: `DataLoader[EvaluationDataT]`
`metricset`	The metrics to be computed. TYPE: `MetricSet[EvaluationResultT]`

RETURNS	DESCRIPTION
`EvaluatorResult[EvaluationResultT]`	The evaluation results.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/evaluator.py

async def compute(
    self,
    pipeline: EvaluationPipeline[EvaluationTargetT, EvaluationDataT, EvaluationResultT],
    dataloader: DataLoader[EvaluationDataT],
    metricset: MetricSet[EvaluationResultT],
) -> EvaluatorResult[EvaluationResultT]:
    """
    Compute the evaluation results for the given pipeline and data.

    Args:
        pipeline: The pipeline to be evaluated.
        dataloader: The dataloader to load the data.
        metricset: The metrics to be computed.

    Returns:
        The evaluation results.
    """
    await pipeline.prepare()

    dataset = await dataloader.load()
    results, errors, time_perf = await self._call_pipeline(pipeline, dataset)
    metrics = await metricset.compute(results)

    return EvaluatorResult(
        metrics=metrics,
        results=results,
        errors=errors,
        time_perf=time_perf,
    )

ragbits.evaluate.optimizer.Optimizer #

Optimizer(direction: str = 'maximize', n_trials: int = 10, max_retries_for_trial: int = 1)

Bases: WithConstructionConfig

Optimizer class.

Initialize the pipeline optimizer.

PARAMETER	DESCRIPTION
`direction`	Direction of optimization. TYPE: `str` DEFAULT: `'maximize'`
`n_trials`	The number of trials for each process. TYPE: `int` DEFAULT: `10`
`max_retries_for_trial`	The number of retires for single process. TYPE: `int` DEFAULT: `1`

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py

def __init__(self, direction: str = "maximize", n_trials: int = 10, max_retries_for_trial: int = 1) -> None:
    """
    Initialize the pipeline optimizer.

    Args:
        direction: Direction of optimization.
        n_trials: The number of trials for each process.
        max_retries_for_trial: The number of retires for single process.
    """
    self.direction = direction
    self.n_trials = n_trials
    self.max_retries_for_trial = max_retries_for_trial
    # workaround for optuna not allowing different choices for different trials
    # TODO check how optuna handles parallelism. discuss if we want to have parallel studies
    self._choices_cache: dict[str, list] = {}

default_module `class-attribute` #

default_module: ModuleType | None = None

configuration_key `class-attribute` #

configuration_key: str

direction `instance-attribute` #

direction = direction

n_trials `instance-attribute` #

n_trials = n_trials

max_retries_for_trial `instance-attribute` #

max_retries_for_trial = max_retries_for_trial

subclass_from_config `classmethod` #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER	DESCRIPTION
`config`	A model containing configuration details for the class. TYPE: `ObjectConstructionConfig`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

RAISES	DESCRIPTION
`InvalidConfigError`	The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory `classmethod` #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER	DESCRIPTION
`factory_path`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided factory function.

RAISES	DESCRIPTION
`InvalidConfigError`	The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass `classmethod` #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER	DESCRIPTION
`config`	The CoreConfig instance containing preferred factory and configuration details. TYPE: `CoreConfig`
`factory_path_override`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str \| None` DEFAULT: `None`
`yaml_path_override`	A string representing the path to the YAML file containing the Ragstack instance configuration. TYPE: `Path \| None` DEFAULT: `None`

RAISES	DESCRIPTION
`InvalidConfigError`	If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config `classmethod` #

from_config(config: dict) -> Self

Initializes the class with the provided configuration.

PARAMETER	DESCRIPTION
`config`	A dictionary containing configuration details for the class. TYPE: `dict`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    return cls(**config)

run_from_config `classmethod` #

run_from_config(config: dict) -> list[tuple[dict, float, dict[str, float]]]

Run the optimization process configured with a config object.

PARAMETER	DESCRIPTION
`config`	Optimizer config. TYPE: `dict`

RETURNS	DESCRIPTION
`list[tuple[dict, float, dict[str, float]]]`	List of tested configs with associated scores and metrics.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py

@classmethod
def run_from_config(cls, config: dict) -> list[tuple[dict, float, dict[str, float]]]:
    """
    Run the optimization process configured with a config object.

    Args:
        config: Optimizer config.

    Returns:
        List of tested configs with associated scores and metrics.
    """
    optimizer_config = OptimizerConfig.model_validate(config)
    evaluator_config = EvaluatorConfig.model_validate(optimizer_config.evaluator)

    dataloader: DataLoader = DataLoader.subclass_from_config(evaluator_config.evaluation.dataloader)
    metricset: MetricSet = MetricSet.from_config(evaluator_config.evaluation.metrics)

    pipeline_class = import_by_path(evaluator_config.evaluation.pipeline.type)
    pipeline_config = dict(evaluator_config.evaluation.pipeline.config)
    callbacks = [setup_optuna_neptune_callback()] if optimizer_config.neptune_callback else []

    optimizer = cls.from_config(optimizer_config.optimizer or {})
    return optimizer.optimize(
        pipeline_class=pipeline_class,
        pipeline_config=pipeline_config,
        metricset=metricset,
        dataloader=dataloader,
        callbacks=callbacks,
    )

optimize #

optimize(pipeline_class: type[EvaluationPipeline], pipeline_config: dict, dataloader: DataLoader, metricset: MetricSet, callbacks: list[Callable] | None = None) -> list[tuple[dict, float, dict[str, float]]]

Run the optimization process for given parameters.

PARAMETER	DESCRIPTION
`pipeline_class`	Pipeline to be optimized. TYPE: `type[EvaluationPipeline]`
`pipeline_config`	Configuration defining the optimization process. TYPE: `dict`
`dataloader`	Data loader. TYPE: `DataLoader`
`metricset`	Metrics to be optimized. TYPE: `MetricSet`
`callbacks`	Experiment callbacks. TYPE: `list[Callable] \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[tuple[dict, float, dict[str, float]]]`	List of tested configs with associated scores and metrics.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/optimizer.py

def optimize(
    self,
    pipeline_class: type[EvaluationPipeline],
    pipeline_config: dict,
    dataloader: DataLoader,
    metricset: MetricSet,
    callbacks: list[Callable] | None = None,
) -> list[tuple[dict, float, dict[str, float]]]:
    """
    Run the optimization process for given parameters.

    Args:
        pipeline_class: Pipeline to be optimized.
        pipeline_config: Configuration defining the optimization process.
        dataloader: Data loader.
        metricset: Metrics to be optimized.
        callbacks: Experiment callbacks.

    Returns:
        List of tested configs with associated scores and metrics.
    """

    def objective(trial: Trial) -> float:
        return self._objective(
            trial=trial,
            pipeline_class=pipeline_class,
            pipeline_config=pipeline_config,
            dataloader=dataloader,
            metricset=metricset,
        )

    study = optuna.create_study(direction=self.direction)
    study.optimize(
        func=objective,
        n_trials=self.n_trials,
        callbacks=callbacks,
    )
    return sorted(
        [
            (
                trial.user_attrs["config"],
                trial.user_attrs["score"],
                trial.user_attrs["metrics"],
            )
            for trial in study.get_trials()
        ],
        key=lambda x: -x[1] if self.direction == "maximize" else x[1],
    )

Evaluate#

ragbits.evaluate.evaluator.Evaluator #

default_module class-attribute #

configuration_key class-attribute #

batch_size instance-attribute #

num_retries instance-attribute #

backoff_multiplier instance-attribute #

backoff_max instance-attribute #

subclass_from_config classmethod #

subclass_from_factory classmethod #

preferred_subclass classmethod #

from_config classmethod #

run_from_config async classmethod #

compute async #

ragbits.evaluate.optimizer.Optimizer #

default_module class-attribute #

configuration_key class-attribute #

direction instance-attribute #

n_trials instance-attribute #

max_retries_for_trial instance-attribute #

subclass_from_config classmethod #

subclass_from_factory classmethod #

preferred_subclass classmethod #

from_config classmethod #

run_from_config classmethod #

optimize #

default_module `class-attribute` #

configuration_key `class-attribute` #

batch_size `instance-attribute` #

num_retries `instance-attribute` #

backoff_multiplier `instance-attribute` #

backoff_max `instance-attribute` #

subclass_from_config `classmethod` #

subclass_from_factory `classmethod` #

preferred_subclass `classmethod` #

from_config `classmethod` #

run_from_config `async` `classmethod` #

compute `async` #

default_module `class-attribute` #

configuration_key `class-attribute` #

direction `instance-attribute` #

n_trials `instance-attribute` #

max_retries_for_trial `instance-attribute` #

subclass_from_config `classmethod` #

subclass_from_factory `classmethod` #

preferred_subclass `classmethod` #

from_config `classmethod` #

run_from_config `classmethod` #