Skip to content

Data Loaders#

ragbits.evaluate.dataloaders.base.DataLoader #

DataLoader(source: Source, *, split: str = 'data', required_keys: set[str] | None = None)

Bases: WithConstructionConfig, Generic[EvaluationDataT], ABC

Evaluation data loader.

Initialize the data loader.

PARAMETER DESCRIPTION
source

The source to load the evaluation data from.

TYPE: Source

split

The split to load the data from. Split is fixed for data loaders to "data", but you can slice it using the Hugging Face API.

TYPE: str DEFAULT: 'data'

required_keys

The required columns for the evaluation data.

TYPE: set[str] | None DEFAULT: None

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/base.py
def __init__(self, source: Source, *, split: str = "data", required_keys: set[str] | None = None) -> None:
    """
    Initialize the data loader.

    Args:
        source: The source to load the evaluation data from.
        split: The split to load the data from. Split is fixed for data loaders to "data",
            but you can slice it using the [Hugging Face API](https://huggingface.co/docs/datasets/v1.11.0/splits.html#slicing-api).
        required_keys: The required columns for the evaluation data.
    """
    self.source = source
    self.split = split
    self.required_keys = required_keys or set()

default_module class-attribute #

default_module: ModuleType | None = dataloaders

configuration_key class-attribute #

configuration_key: str = 'dataloader'

source instance-attribute #

source = source

split instance-attribute #

split = split

required_keys instance-attribute #

required_keys = required_keys or set()

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> Self

Create an instance of DataLoader from a configuration dictionary.

PARAMETER DESCRIPTION
config

A dictionary containing configuration settings for the data loader.

TYPE: dict

RETURNS DESCRIPTION
Self

An instance of the data loader class initialized with the provided configuration.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/base.py
@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Create an instance of `DataLoader` from a configuration dictionary.

    Args:
        config: A dictionary containing configuration settings for the data loader.

    Returns:
        An instance of the data loader class initialized with the provided configuration.
    """
    dataloader_config = DataLoaderConfig.model_validate(config)
    config["source"] = Source.subclass_from_config(dataloader_config.source)
    return super().from_config(config)

load async #

load() -> Iterable[EvaluationDataT]

Load the data.

RETURNS DESCRIPTION
Iterable[EvaluationDataT]

The loaded evaluation data.

RAISES DESCRIPTION
DataLoaderIncorrectFormatDataError

If evaluation dataset is incorrectly formatted.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/base.py
async def load(self) -> Iterable[EvaluationDataT]:
    """
    Load the data.

    Returns:
        The loaded evaluation data.

    Raises:
        DataLoaderIncorrectFormatDataError: If evaluation dataset is incorrectly formatted.
    """
    data_path = await self.source.fetch()
    dataset = load_dataset(
        path=str(data_path.parent),
        data_files={"data": str(data_path.name)},
        split=self.split,
    )
    if not self.required_keys.issubset(dataset.features):
        raise DataLoaderIncorrectFormatDataError(
            required_features=list(self.required_keys),
            data_path=data_path,
        )
    return await self.map(dataset.to_list())

map abstractmethod async #

map(dataset: Iterable[dict]) -> Iterable[EvaluationDataT]

Map the dataset to the evaluation data.

PARAMETER DESCRIPTION
dataset

The dataset to map.

TYPE: Iterable[dict]

RETURNS DESCRIPTION
Iterable[EvaluationDataT]

The evaluation data.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/base.py
@abstractmethod
async def map(self, dataset: Iterable[dict]) -> Iterable[EvaluationDataT]:
    """
    Map the dataset to the evaluation data.

    Args:
        dataset: The dataset to map.

    Returns:
        The evaluation data.
    """

ragbits.evaluate.dataloaders.document_search.DocumentSearchDataLoader #

DocumentSearchDataLoader(source: Source, *, split: str = 'data', question_key: str = 'question', document_ids_key: str = 'document_ids', passages_key: str = 'passages', page_numbers_key: str = 'page_numbers')

Bases: DataLoader[DocumentSearchData]

Document search evaluation data loader.

The source used for this data loader should point to a file that can be loaded by Hugging Face.

Initialize the document search data loader.

PARAMETER DESCRIPTION
source

The source to load the data from.

TYPE: Source

split

The split to load the data from. Split is fixed for data loaders to "data", but you can slice it using the Hugging Face API.

TYPE: str DEFAULT: 'data'

question_key

The dataset column name that contains the question.

TYPE: str DEFAULT: 'question'

document_ids_key

The dataset column name that contains the document ids. Document ids are optional.

TYPE: str DEFAULT: 'document_ids'

passages_key

The dataset column name that contains the passages. Passages are optional.

TYPE: str DEFAULT: 'passages'

page_numbers_key

The dataset column name that contains the page numbers. Page numbers are optional.

TYPE: str DEFAULT: 'page_numbers'

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/document_search.py
def __init__(
    self,
    source: Source,
    *,
    split: str = "data",
    question_key: str = "question",
    document_ids_key: str = "document_ids",
    passages_key: str = "passages",
    page_numbers_key: str = "page_numbers",
) -> None:
    """
    Initialize the document search data loader.

    Args:
        source: The source to load the data from.
        split: The split to load the data from. Split is fixed for data loaders to "data",
            but you can slice it using the [Hugging Face API](https://huggingface.co/docs/datasets/v1.11.0/splits.html#slicing-api).
        question_key: The dataset column name that contains the question.
        document_ids_key: The dataset column name that contains the document ids. Document ids are optional.
        passages_key: The dataset column name that contains the passages. Passages are optional.
        page_numbers_key: The dataset column name that contains the page numbers. Page numbers are optional.
    """
    super().__init__(source=source, split=split, required_keys={question_key})
    self.question_key = question_key
    self.document_ids_key = document_ids_key
    self.passages_key = passages_key
    self.page_numbers_key = page_numbers_key

default_module class-attribute #

default_module: ModuleType | None = dataloaders

configuration_key class-attribute #

configuration_key: str = 'dataloader'

source instance-attribute #

source = source

split instance-attribute #

split = split

required_keys instance-attribute #

required_keys = required_keys or set()

question_key instance-attribute #

question_key = question_key

document_ids_key instance-attribute #

document_ids_key = document_ids_key

passages_key instance-attribute #

passages_key = passages_key

page_numbers_key instance-attribute #

page_numbers_key = page_numbers_key

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> Self

Create an instance of DataLoader from a configuration dictionary.

PARAMETER DESCRIPTION
config

A dictionary containing configuration settings for the data loader.

TYPE: dict

RETURNS DESCRIPTION
Self

An instance of the data loader class initialized with the provided configuration.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/base.py
@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Create an instance of `DataLoader` from a configuration dictionary.

    Args:
        config: A dictionary containing configuration settings for the data loader.

    Returns:
        An instance of the data loader class initialized with the provided configuration.
    """
    dataloader_config = DataLoaderConfig.model_validate(config)
    config["source"] = Source.subclass_from_config(dataloader_config.source)
    return super().from_config(config)

load async #

load() -> Iterable[EvaluationDataT]

Load the data.

RETURNS DESCRIPTION
Iterable[EvaluationDataT]

The loaded evaluation data.

RAISES DESCRIPTION
DataLoaderIncorrectFormatDataError

If evaluation dataset is incorrectly formatted.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/base.py
async def load(self) -> Iterable[EvaluationDataT]:
    """
    Load the data.

    Returns:
        The loaded evaluation data.

    Raises:
        DataLoaderIncorrectFormatDataError: If evaluation dataset is incorrectly formatted.
    """
    data_path = await self.source.fetch()
    dataset = load_dataset(
        path=str(data_path.parent),
        data_files={"data": str(data_path.name)},
        split=self.split,
    )
    if not self.required_keys.issubset(dataset.features):
        raise DataLoaderIncorrectFormatDataError(
            required_features=list(self.required_keys),
            data_path=data_path,
        )
    return await self.map(dataset.to_list())

map async #

map(dataset: Iterable[dict]) -> Iterable[DocumentSearchData]

Map the dataset to the document search data schema.

PARAMETER DESCRIPTION
dataset

The dataset to map.

TYPE: Iterable[dict]

RETURNS DESCRIPTION
Iterable[DocumentSearchData]

The document search data.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/document_search.py
async def map(self, dataset: Iterable[dict]) -> Iterable[DocumentSearchData]:
    """
    Map the dataset to the document search data schema.

    Args:
        dataset: The dataset to map.

    Returns:
        The document search data.
    """
    return [
        DocumentSearchData(
            question=data.get(self.question_key, ""),
            reference_document_ids=data.get(self.document_ids_key),
            reference_passages=data.get(self.passages_key),
            reference_page_numbers=data.get(self.page_numbers_key),
        )
        for data in dataset
    ]

ragbits.evaluate.dataloaders.question_answer.QuestionAnswerDataLoader #

QuestionAnswerDataLoader(source: Source, *, split: str = 'data', question_key: str = 'question', answer_key: str = 'answer', context_key: str = 'context')

Bases: DataLoader[QuestionAnswerData]

Question answer evaluation data loader.

The source used for this data loader should point to a file that can be loaded by Hugging Face.

Initialize the question answer data loader.

PARAMETER DESCRIPTION
source

The source to load the data from.

TYPE: Source

split

The split to load the data from.

TYPE: str DEFAULT: 'data'

question_key

The dataset column name that contains the question.

TYPE: str DEFAULT: 'question'

answer_key

The dataset column name that contains the answer.

TYPE: str DEFAULT: 'answer'

context_key

The dataset column name that contains the context. Context is optional.

TYPE: str DEFAULT: 'context'

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/question_answer.py
def __init__(
    self,
    source: Source,
    *,
    split: str = "data",
    question_key: str = "question",
    answer_key: str = "answer",
    context_key: str = "context",
) -> None:
    """
    Initialize the question answer data loader.

    Args:
        source: The source to load the data from.
        split: The split to load the data from.
        question_key: The dataset column name that contains the question.
        answer_key: The dataset column name that contains the answer.
        context_key: The dataset column name that contains the context. Context is optional.
    """
    super().__init__(source=source, split=split, required_keys={question_key, answer_key})
    self.question_key = question_key
    self.answer_key = answer_key
    self.context_key = context_key

default_module class-attribute #

default_module: ModuleType | None = dataloaders

configuration_key class-attribute #

configuration_key: str = 'dataloader'

source instance-attribute #

source = source

split instance-attribute #

split = split

required_keys instance-attribute #

required_keys = required_keys or set()

question_key instance-attribute #

question_key = question_key

answer_key instance-attribute #

answer_key = answer_key

context_key instance-attribute #

context_key = context_key

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> Self

Create an instance of DataLoader from a configuration dictionary.

PARAMETER DESCRIPTION
config

A dictionary containing configuration settings for the data loader.

TYPE: dict

RETURNS DESCRIPTION
Self

An instance of the data loader class initialized with the provided configuration.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/base.py
@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Create an instance of `DataLoader` from a configuration dictionary.

    Args:
        config: A dictionary containing configuration settings for the data loader.

    Returns:
        An instance of the data loader class initialized with the provided configuration.
    """
    dataloader_config = DataLoaderConfig.model_validate(config)
    config["source"] = Source.subclass_from_config(dataloader_config.source)
    return super().from_config(config)

load async #

load() -> Iterable[EvaluationDataT]

Load the data.

RETURNS DESCRIPTION
Iterable[EvaluationDataT]

The loaded evaluation data.

RAISES DESCRIPTION
DataLoaderIncorrectFormatDataError

If evaluation dataset is incorrectly formatted.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/base.py
async def load(self) -> Iterable[EvaluationDataT]:
    """
    Load the data.

    Returns:
        The loaded evaluation data.

    Raises:
        DataLoaderIncorrectFormatDataError: If evaluation dataset is incorrectly formatted.
    """
    data_path = await self.source.fetch()
    dataset = load_dataset(
        path=str(data_path.parent),
        data_files={"data": str(data_path.name)},
        split=self.split,
    )
    if not self.required_keys.issubset(dataset.features):
        raise DataLoaderIncorrectFormatDataError(
            required_features=list(self.required_keys),
            data_path=data_path,
        )
    return await self.map(dataset.to_list())

map async #

map(dataset: Iterable[dict]) -> Iterable[QuestionAnswerData]

Map the dataset to the question answer data schema.

PARAMETER DESCRIPTION
dataset

The dataset to map.

TYPE: Iterable[dict]

RETURNS DESCRIPTION
Iterable[QuestionAnswerData]

The question answer data.

Source code in packages/ragbits-evaluate/src/ragbits/evaluate/dataloaders/question_answer.py
async def map(self, dataset: Iterable[dict]) -> Iterable[QuestionAnswerData]:
    """
    Map the dataset to the question answer data schema.

    Args:
        dataset: The dataset to map.

    Returns:
        The question answer data.
    """
    return [
        QuestionAnswerData(
            question=data.get(self.question_key, ""),
            reference_answer=data.get(self.answer_key, ""),
            reference_context=data.get(self.context_key),
        )
        for data in dataset
    ]