Skip to content

Element Enrichers#

ragbits.document_search.ingestion.enrichers.router.ElementEnricherRouter #

ElementEnricherRouter(enrichers: Mapping[type[Element], ElementEnricher] | None = None)

Bases: WithConstructionConfig

The class responsible for routing the element to the correct enricher based on the element type.

Initialize the ElementEnricherRouter instance.

PARAMETER DESCRIPTION
enrichers

The mapping of element types and their enrichers. To override default enrichers.

TYPE: Mapping[type[Element], ElementEnricher] | None DEFAULT: None

Source code in packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py
def __init__(
    self,
    enrichers: Mapping[type[Element], ElementEnricher] | None = None,
) -> None:
    """
    Initialize the ElementEnricherRouter instance.

    Args:
        enrichers: The mapping of element types and their enrichers. To override default enrichers.
    """
    self._enrichers = {**_DEFAULT_ENRICHERS, **enrichers} if enrichers else _DEFAULT_ENRICHERS

default_module class-attribute #

default_module: ModuleType | None = None

configuration_key class-attribute #

configuration_key: str = 'enricher_router'

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)
    obj = factory()
    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")
    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict[str, ObjectConstructionConfig]) -> Self

Initialize the class with the provided configuration.

PARAMETER DESCRIPTION
config

A dictionary containing configuration details for the class.

TYPE: dict[str, ObjectConstructionConfig]

RETURNS DESCRIPTION
Self

The ElementEnricherRouter.

RAISES DESCRIPTION
InvalidConfigError

If any of the provided parsers cannot be initialized.

Source code in packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py
@classmethod
def from_config(cls, config: dict[str, ObjectConstructionConfig]) -> Self:
    """
    Initialize the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        The ElementEnricherRouter.

    Raises:
        InvalidConfigError: If any of the provided parsers cannot be initialized.
    """
    enrichers: dict[type[Element], ElementEnricher] = {
        import_by_path(element_type, element): ElementEnricher.subclass_from_config(enricher_config)
        for element_type, enricher_config in config.items()
    }
    return super().from_config({"enrichers": enrichers})

get #

get(element_type: type[Element]) -> ElementEnricher

Get the enricher for the element.

PARAMETER DESCRIPTION
element_type

The element type.

TYPE: type[Element]

RETURNS DESCRIPTION
ElementEnricher

The enricher for processing the element.

RAISES DESCRIPTION
EnricherNotFoundError

If no enricher is found for the element type.

Source code in packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/router.py
def get(self, element_type: type[Element]) -> ElementEnricher:
    """
    Get the enricher for the element.

    Args:
        element_type: The element type.

    Returns:
        The enricher for processing the element.

    Raises:
        EnricherNotFoundError: If no enricher is found for the element type.
    """
    enricher = self._enrichers.get(element_type)

    if isinstance(enricher, ElementEnricher):
        return enricher

    raise EnricherNotFoundError(element_type)

ragbits.document_search.ingestion.enrichers.base.ElementEnricher #

Bases: Generic[ElementT], WithConstructionConfig, ABC

Base class for element enrichers, responsible for providing additional information about elements.

Enrichers operate on raw elements and are used to fill in missing fields that could not be filled in during parsing. They usually deal with summarizing text or describing images.

default_module class-attribute #

default_module: ModuleType | None = enrichers

configuration_key class-attribute #

configuration_key: str = 'enricher'

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)
    obj = factory()
    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")
    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> Self

Initializes the class with the provided configuration.

PARAMETER DESCRIPTION
config

A dictionary containing configuration details for the class.

TYPE: dict

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    return cls(**config)

enrich abstractmethod async #

enrich(elements: list[ElementT]) -> list[ElementT]

Enrich elements.

PARAMETER DESCRIPTION
elements

The elements to be enriched.

TYPE: list[ElementT]

RETURNS DESCRIPTION
list[ElementT]

The list of enriched elements.

RAISES DESCRIPTION
EnricherError

If the enrichment of the elements failed.

Source code in packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py
@abstractmethod
async def enrich(self, elements: list[ElementT]) -> list[ElementT]:
    """
    Enrich elements.

    Args:
        elements: The elements to be enriched.

    Returns:
        The list of enriched elements.

    Raises:
        EnricherError: If the enrichment of the elements failed.
    """

validate_element_type classmethod #

validate_element_type(element_type: type[Element]) -> None

Check if the enricher supports the element type.

PARAMETER DESCRIPTION
element_type

The element type to validate against the enricher.

TYPE: type[Element]

RAISES DESCRIPTION
EnricherElementNotSupportedError

If the element type is not supported.

Source code in packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py
@classmethod
def validate_element_type(cls, element_type: type[Element]) -> None:
    """
    Check if the enricher supports the element type.

    Args:
        element_type: The element type to validate against the enricher.

    Raises:
        EnricherElementNotSupportedError: If the element type is not supported.
    """
    if element_type != cls.__orig_bases__[0].__args__[0]:  # type: ignore
        raise EnricherElementNotSupportedError(enricher_name=cls.__name__, element_type=element_type)

ragbits.document_search.ingestion.enrichers.image.ImageElementEnricher #

ImageElementEnricher(llm: LLM | None = None, prompt: type[Prompt[ImageDescriberInput, ImageDescriberOutput]] | None = None)

Bases: ElementEnricher[ImageElement]

Enricher that describes image elements using LLM.

Initialize the ImageElementEnricher instance.

PARAMETER DESCRIPTION
llm

The language model to use for describing images.

TYPE: LLM | None DEFAULT: None

prompt

The prompt class to use.

TYPE: type[Prompt[ImageDescriberInput, ImageDescriberOutput]] | None DEFAULT: None

Source code in packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py
def __init__(
    self,
    llm: LLM | None = None,
    prompt: type[Prompt[ImageDescriberInput, ImageDescriberOutput]] | None = None,
) -> None:
    """
    Initialize the ImageElementEnricher instance.

    Args:
        llm: The language model to use for describing images.
        prompt: The prompt class to use.
    """
    self._llm = llm or get_preferred_llm(llm_type=LLMType.VISION)
    self._prompt = prompt or ImageDescriberPrompt

default_module class-attribute #

default_module: ModuleType | None = enrichers

configuration_key class-attribute #

configuration_key: str = 'enricher'

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)
    obj = factory()
    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")
    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

validate_element_type classmethod #

validate_element_type(element_type: type[Element]) -> None

Check if the enricher supports the element type.

PARAMETER DESCRIPTION
element_type

The element type to validate against the enricher.

TYPE: type[Element]

RAISES DESCRIPTION
EnricherElementNotSupportedError

If the element type is not supported.

Source code in packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/base.py
@classmethod
def validate_element_type(cls, element_type: type[Element]) -> None:
    """
    Check if the enricher supports the element type.

    Args:
        element_type: The element type to validate against the enricher.

    Raises:
        EnricherElementNotSupportedError: If the element type is not supported.
    """
    if element_type != cls.__orig_bases__[0].__args__[0]:  # type: ignore
        raise EnricherElementNotSupportedError(enricher_name=cls.__name__, element_type=element_type)

enrich async #

enrich(elements: list[ImageElement]) -> list[ImageElement]

Enrich image elements with additional description of the image.

PARAMETER DESCRIPTION
elements

The elements to be enriched.

TYPE: list[ImageElement]

RETURNS DESCRIPTION
list[ImageElement]

The list of enriched elements.

RAISES DESCRIPTION
EnricherElementNotSupportedError

If the element type is not supported.

LLMError

If LLM generation fails.

Source code in packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py
async def enrich(self, elements: list[ImageElement]) -> list[ImageElement]:
    """
    Enrich image elements with additional description of the image.

    Args:
        elements: The elements to be enriched.

    Returns:
        The list of enriched elements.

    Raises:
        EnricherElementNotSupportedError: If the element type is not supported.
        LLMError: If LLM generation fails.
    """
    responses: list[ImageDescriberOutput] = []
    for element in elements:
        self.validate_element_type(type(element))
        prompt = self._prompt(ImageDescriberInput(image=element.image_bytes))
        responses.append(await self._llm.generate(prompt))

    return [
        ImageElement(
            document_meta=element.document_meta,
            description=response.description,
            image_bytes=element.image_bytes,
            ocr_extracted_text=element.ocr_extracted_text,
        )
        for element, response in zip(elements, responses, strict=True)
    ]

from_config classmethod #

from_config(config: dict) -> ImageElementEnricher

Create an ImageElementEnricher instance from a configuration dictionary.

PARAMETER DESCRIPTION
config

The dictionary containing the configuration settings.

TYPE: dict

RETURNS DESCRIPTION
ImageElementEnricher

The initialized instance of ImageElementEnricher.

RAISES DESCRIPTION
ValidationError

If the configuration doesn't follow the expected format.

InvalidConfigError

If llm or prompt can't be found or are not the correct type.

Source code in packages/ragbits-document-search/src/ragbits/document_search/ingestion/enrichers/image.py
@classmethod
def from_config(cls, config: dict) -> "ImageElementEnricher":
    """
    Create an `ImageElementEnricher` instance from a configuration dictionary.

    Args:
        config: The dictionary containing the configuration settings.

    Returns:
        The initialized instance of `ImageElementEnricher`.

    Raises:
        ValidationError: If the configuration doesn't follow the expected format.
        InvalidConfigError: If llm or prompt can't be found or are not the correct type.
    """
    config["llm"] = (
        LLM.subclass_from_config(ObjectConstructionConfig.model_validate(config["llm"]))
        if "llm" in config
        else None
    )
    config["prompt"] = import_by_path(config["prompt"]) if "prompt" in config else None
    return super().from_config(config)