Embedders#

ragbits.core.embeddings.base.Embedder #

Embedder(default_options: OptionsT | None = None)

Bases: ConfigurableComponent[EmbedderOptionsT], ABC

Abstract class that defines a common interface for both sparse and dense embedding models.

Constructs a new ConfigurableComponent instance.

PARAMETER	DESCRIPTION
`default_options`	The default options for the component. TYPE: `OptionsT \| None` DEFAULT: `None`

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

def __init__(self, default_options: OptionsT | None = None) -> None:
    """
    Constructs a new ConfigurableComponent instance.

    Args:
        default_options: The default options for the component.
    """
    self.default_options: OptionsT = default_options or self.options_cls()

default_options `instance-attribute` #

default_options: OptionsT = default_options or options_cls()

options_cls `instance-attribute` #

options_cls: type[EmbedderOptionsT]

default_module `class-attribute` `instance-attribute` #

default_module: ClassVar = embeddings

configuration_key `class-attribute` `instance-attribute` #

configuration_key: ClassVar = 'embedder'

subclass_from_config `classmethod` #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER	DESCRIPTION
`config`	A model containing configuration details for the class. TYPE: `ObjectConstructionConfig`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

RAISES	DESCRIPTION
`InvalidConfigError`	The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory `classmethod` #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER	DESCRIPTION
`factory_path`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided factory function.

RAISES	DESCRIPTION
`InvalidConfigError`	The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass `classmethod` #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER	DESCRIPTION
`config`	The CoreConfig instance containing preferred factory and configuration details. TYPE: `CoreConfig`
`factory_path_override`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str \| None` DEFAULT: `None`
`yaml_path_override`	A string representing the path to the YAML file containing the Ragstack instance configuration. TYPE: `Path \| None` DEFAULT: `None`

RAISES	DESCRIPTION
`InvalidConfigError`	If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config `classmethod` #

from_config(config: dict[str, Any]) -> Self

Initializes the class with the provided configuration.

PARAMETER	DESCRIPTION
`config`	A dictionary containing configuration details for the class. TYPE: `dict[str, Any]`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def from_config(cls, config: dict[str, Any]) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    default_options = config.pop("default_options", None)
    options = cls.options_cls(**default_options) if default_options else None
    return cls(**config, default_options=options)

embed_text `abstractmethod` `async` #

embed_text(data: list[str], options: EmbedderOptionsT | None = None) -> list[list[float]] | list[SparseVector]

Creates embeddings for the given strings.

PARAMETER	DESCRIPTION
`data`	List of strings to get embeddings for. TYPE: `list[str]`
`options`	Additional settings used by the Embedder model. TYPE: `EmbedderOptionsT \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[list[float]] \| list[SparseVector]`	List of embeddings for the given strings.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/base.py

@abstractmethod
async def embed_text(
    self, data: list[str], options: EmbedderOptionsT | None = None
) -> list[list[float]] | list[SparseVector]:
    """
    Creates embeddings for the given strings.

    Args:
        data: List of strings to get embeddings for.
        options: Additional settings used by the Embedder model.

    Returns:
        List of embeddings for the given strings.
    """

get_vector_size `abstractmethod` `async` #

get_vector_size() -> VectorSize

Get information about the vector size/dimensions returned by this embedder.

RETURNS	DESCRIPTION
`VectorSize`	VectorSize object containing dimension information and whether vectors are sparse.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/base.py

@abstractmethod
async def get_vector_size(self) -> VectorSize:
    """
    Get information about the vector size/dimensions returned by this embedder.

    Returns:
        VectorSize object containing dimension information and whether vectors are sparse.
    """

image_support #

image_support() -> bool

Check if the model supports image embeddings.

RETURNS	DESCRIPTION
`bool`	True if the model supports image embeddings, False otherwise.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/base.py

def image_support(self) -> bool:  # noqa: PLR6301
    """
    Check if the model supports image embeddings.

    Returns:
        True if the model supports image embeddings, False otherwise.
    """
    return False

embed_image `async` #

embed_image(images: list[bytes], options: EmbedderOptionsT | None = None) -> list[list[float]] | list[SparseVector]

Creates embeddings for the given images.

PARAMETER	DESCRIPTION
`images`	List of images to get embeddings for. TYPE: `list[bytes]`
`options`	Additional settings used by the Embedder model. TYPE: `EmbedderOptionsT \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[list[float]] \| list[SparseVector]`	List of embeddings for the given images.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/base.py

async def embed_image(
    self, images: list[bytes], options: EmbedderOptionsT | None = None
) -> list[list[float]] | list[SparseVector]:
    """
    Creates embeddings for the given images.

    Args:
        images: List of images to get embeddings for.
        options: Additional settings used by the Embedder model.

    Returns:
        List of embeddings for the given images.
    """
    raise NotImplementedError("Image embeddings are not supported by this model.")

ragbits.core.embeddings.dense.DenseEmbedder #

DenseEmbedder(default_options: OptionsT | None = None)

Bases: Embedder[EmbedderOptionsT], ABC

Abstract client for communication with dense embedding models.

Constructs a new ConfigurableComponent instance.

PARAMETER	DESCRIPTION
`default_options`	The default options for the component. TYPE: `OptionsT \| None` DEFAULT: `None`

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

def __init__(self, default_options: OptionsT | None = None) -> None:
    """
    Constructs a new ConfigurableComponent instance.

    Args:
        default_options: The default options for the component.
    """
    self.default_options: OptionsT = default_options or self.options_cls()

default_module `class-attribute` `instance-attribute` #

default_module: ClassVar = embeddings

configuration_key `class-attribute` `instance-attribute` #

configuration_key: ClassVar = 'embedder'

options_cls `instance-attribute` #

options_cls: type[EmbedderOptionsT]

default_options `instance-attribute` #

default_options: OptionsT = default_options or options_cls()

subclass_from_config `classmethod` #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER	DESCRIPTION
`config`	A model containing configuration details for the class. TYPE: `ObjectConstructionConfig`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

RAISES	DESCRIPTION
`InvalidConfigError`	The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory `classmethod` #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER	DESCRIPTION
`factory_path`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided factory function.

RAISES	DESCRIPTION
`InvalidConfigError`	The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass `classmethod` #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER	DESCRIPTION
`config`	The CoreConfig instance containing preferred factory and configuration details. TYPE: `CoreConfig`
`factory_path_override`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str \| None` DEFAULT: `None`
`yaml_path_override`	A string representing the path to the YAML file containing the Ragstack instance configuration. TYPE: `Path \| None` DEFAULT: `None`

RAISES	DESCRIPTION
`InvalidConfigError`	If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config `classmethod` #

from_config(config: dict[str, Any]) -> Self

Initializes the class with the provided configuration.

PARAMETER	DESCRIPTION
`config`	A dictionary containing configuration details for the class. TYPE: `dict[str, Any]`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def from_config(cls, config: dict[str, Any]) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    default_options = config.pop("default_options", None)
    options = cls.options_cls(**default_options) if default_options else None
    return cls(**config, default_options=options)

image_support #

image_support() -> bool

Check if the model supports image embeddings.

RETURNS	DESCRIPTION
`bool`	True if the model supports image embeddings, False otherwise.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/base.py

def image_support(self) -> bool:  # noqa: PLR6301
    """
    Check if the model supports image embeddings.

    Returns:
        True if the model supports image embeddings, False otherwise.
    """
    return False

embed_text `abstractmethod` `async` #

embed_text(data: list[str], options: EmbedderOptionsT | None = None) -> list[list[float]]

Creates embeddings for the given strings.

PARAMETER	DESCRIPTION
`data`	List of strings to get embeddings for. TYPE: `list[str]`
`options`	Additional settings used by the Embedder model. TYPE: `EmbedderOptionsT \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[list[float]]`	List of embeddings for the given strings.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/base.py

@abstractmethod
async def embed_text(self, data: list[str], options: EmbedderOptionsT | None = None) -> list[list[float]]:
    """
    Creates embeddings for the given strings.

    Args:
        data: List of strings to get embeddings for.
        options: Additional settings used by the Embedder model.

    Returns:
        List of embeddings for the given strings.
    """

get_vector_size `abstractmethod` `async` #

get_vector_size() -> VectorSize

Get information about the dense vector size/dimensions returned by this embedder.

RETURNS	DESCRIPTION
`VectorSize`	VectorSize object with is_sparse=False and the embedding dimension.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/base.py

@abstractmethod
async def get_vector_size(self) -> VectorSize:
    """
    Get information about the dense vector size/dimensions returned by this embedder.

    Returns:
        VectorSize object with is_sparse=False and the embedding dimension.
    """

embed_image `async` #

embed_image(images: list[bytes], options: EmbedderOptionsT | None = None) -> list[list[float]]

Creates embeddings for the given images.

PARAMETER	DESCRIPTION
`images`	List of images to get embeddings for. TYPE: `list[bytes]`
`options`	Additional settings used by the Embedder model. TYPE: `EmbedderOptionsT \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[list[float]]`	List of embeddings for the given images.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/base.py

async def embed_image(self, images: list[bytes], options: EmbedderOptionsT | None = None) -> list[list[float]]:
    """
    Creates embeddings for the given images.

    Args:
        images: List of images to get embeddings for.
        options: Additional settings used by the Embedder model.

    Returns:
        List of embeddings for the given images.
    """
    raise NotImplementedError("Image embeddings are not supported by this model.")

ragbits.core.embeddings.dense.local.LocalEmbedder #

LocalEmbedder(model_name: str, default_options: LocalEmbedderOptions | None = None, **model_kwargs: Any)

Bases: DenseEmbedder[LocalEmbedderOptions]

Class for interaction with any encoder available in HuggingFace.

Note: Local implementation is not dedicated for production. Use it only in experiments / evaluation.

Constructs a new local LLM instance.

PARAMETER	DESCRIPTION
`model_name`	Name of the model to use. TYPE: `str`
`default_options`	Default options for the embedding model. TYPE: `LocalEmbedderOptions \| None` DEFAULT: `None`
`model_kwargs`	Additional arguments to pass to the SentenceTransformer. TYPE: `Any` DEFAULT: `{}`

RAISES	DESCRIPTION
`ImportError`	If the 'local' extra requirements are not installed.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/local.py

def __init__(
    self,
    model_name: str,
    default_options: LocalEmbedderOptions | None = None,
    **model_kwargs: Any,  # noqa: ANN401
) -> None:
    """
    Constructs a new local LLM instance.

    Args:
        model_name: Name of the model to use.
        default_options: Default options for the embedding model.
        model_kwargs: Additional arguments to pass to the SentenceTransformer.

    Raises:
        ImportError: If the 'local' extra requirements are not installed.
    """
    if not HAS_LOCAL_EMBEDDINGS:
        raise ImportError("You need to install the 'local' extra requirements to use local embeddings models")

    super().__init__(default_options=default_options)

    self.model_name = model_name
    self.model = SentenceTransformer(self.model_name, **model_kwargs)

default_module `class-attribute` `instance-attribute` #

default_module: ClassVar = embeddings

configuration_key `class-attribute` `instance-attribute` #

configuration_key: ClassVar = 'embedder'

default_options `instance-attribute` #

default_options: OptionsT = default_options or options_cls()

options_cls `class-attribute` `instance-attribute` #

options_cls = LocalEmbedderOptions

model_name `instance-attribute` #

model_name = model_name

model `instance-attribute` #

model = SentenceTransformer(model_name, **model_kwargs)

subclass_from_config `classmethod` #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER	DESCRIPTION
`config`	A model containing configuration details for the class. TYPE: `ObjectConstructionConfig`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

RAISES	DESCRIPTION
`InvalidConfigError`	The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory `classmethod` #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER	DESCRIPTION
`factory_path`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided factory function.

RAISES	DESCRIPTION
`InvalidConfigError`	The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass `classmethod` #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER	DESCRIPTION
`config`	The CoreConfig instance containing preferred factory and configuration details. TYPE: `CoreConfig`
`factory_path_override`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str \| None` DEFAULT: `None`
`yaml_path_override`	A string representing the path to the YAML file containing the Ragstack instance configuration. TYPE: `Path \| None` DEFAULT: `None`

RAISES	DESCRIPTION
`InvalidConfigError`	If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config `classmethod` #

from_config(config: dict[str, Any]) -> Self

Initializes the class with the provided configuration.

PARAMETER	DESCRIPTION
`config`	A dictionary containing configuration details for the class. TYPE: `dict[str, Any]`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def from_config(cls, config: dict[str, Any]) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    default_options = config.pop("default_options", None)
    options = cls.options_cls(**default_options) if default_options else None
    return cls(**config, default_options=options)

image_support #

image_support() -> bool

Check if the model supports image embeddings.

RETURNS	DESCRIPTION
`bool`	True if the model supports image embeddings, False otherwise.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/base.py

def image_support(self) -> bool:  # noqa: PLR6301
    """
    Check if the model supports image embeddings.

    Returns:
        True if the model supports image embeddings, False otherwise.
    """
    return False

embed_image `async` #

embed_image(images: list[bytes], options: EmbedderOptionsT | None = None) -> list[list[float]]

Creates embeddings for the given images.

PARAMETER	DESCRIPTION
`images`	List of images to get embeddings for. TYPE: `list[bytes]`
`options`	Additional settings used by the Embedder model. TYPE: `EmbedderOptionsT \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[list[float]]`	List of embeddings for the given images.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/base.py

async def embed_image(self, images: list[bytes], options: EmbedderOptionsT | None = None) -> list[list[float]]:
    """
    Creates embeddings for the given images.

    Args:
        images: List of images to get embeddings for.
        options: Additional settings used by the Embedder model.

    Returns:
        List of embeddings for the given images.
    """
    raise NotImplementedError("Image embeddings are not supported by this model.")

get_vector_size `async` #

get_vector_size() -> VectorSize

Get the vector size for this local SentenceTransformer model.

RETURNS	DESCRIPTION
`VectorSize`	VectorSize object with the model's embedding dimension.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/local.py

async def get_vector_size(self) -> VectorSize:
    """
    Get the vector size for this local SentenceTransformer model.

    Returns:
        VectorSize object with the model's embedding dimension.
    """
    dimension = self.model.get_sentence_embedding_dimension()
    if dimension is None:
        sample_embedding = await self.embed_text(["sample"])
        dimension = len(sample_embedding[0])
    return VectorSize(size=dimension, is_sparse=False)

embed_text `async` #

embed_text(data: list[str], options: LocalEmbedderOptions | None = None) -> list[list[float]]

Calls the appropriate encoder endpoint with the given data and options.

PARAMETER	DESCRIPTION
`data`	List of strings to get embeddings for. TYPE: `list[str]`
`options`	Additional options to pass to the embedding model. TYPE: `LocalEmbedderOptions \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[list[float]]`	List of embeddings for the given strings.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/local.py

async def embed_text(self, data: list[str], options: LocalEmbedderOptions | None = None) -> list[list[float]]:
    """
    Calls the appropriate encoder endpoint with the given data and options.

    Args:
        data: List of strings to get embeddings for.
        options: Additional options to pass to the embedding model.

    Returns:
        List of embeddings for the given strings.
    """
    merged_options = (self.default_options | options) if options else self.default_options
    with trace(
        data=data,
        model_name=self.model_name,
        model_obj=repr(self.model),
        options=merged_options.dict(),
    ) as outputs:
        outputs.embeddings = self.model.encode(data, **merged_options.encode_kwargs).tolist()
    return outputs.embeddings

ragbits.core.embeddings.dense.litellm.LiteLLMEmbedder #

LiteLLMEmbedder(model_name: str = 'text-embedding-3-small', default_options: LiteLLMEmbedderOptions | None = None, *, api_base: str | None = None, base_url: str | None = None, api_key: str | None = None, api_version: str | None = None, router: Router | None = None)

Bases: DenseEmbedder[LiteLLMEmbedderOptions]

Client for creating text embeddings using LiteLLM API.

Constructs the LiteLLMEmbeddingClient.

PARAMETER	DESCRIPTION
`model_name`	Name of the LiteLLM supported model to be used. Default is "text-embedding-3-small". TYPE: `str` DEFAULT: `'text-embedding-3-small'`
`default_options`	Default options to pass to the LiteLLM API. TYPE: `LiteLLMEmbedderOptions \| None` DEFAULT: `None`
`api_base`	The API endpoint you want to call the model with. TYPE: `str \| None` DEFAULT: `None`
`base_url`	Alias for api_base. If both are provided, api_base takes precedence. TYPE: `str \| None` DEFAULT: `None`
`api_key`	API key to be used. If not specified, an environment variable will be used, for more information, follow the instructions for your specific vendor in the LiteLLM documentation. TYPE: `str \| None` DEFAULT: `None`
`api_version`	The API version for the call. TYPE: `str \| None` DEFAULT: `None`
`router`	Router to be used to route requests to different models. TYPE: `Router \| None` DEFAULT: `None`

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/litellm.py

def __init__(
    self,
    model_name: str = "text-embedding-3-small",
    default_options: LiteLLMEmbedderOptions | None = None,
    *,
    api_base: str | None = None,
    base_url: str | None = None,  # Alias for api_base
    api_key: str | None = None,
    api_version: str | None = None,
    router: litellm.Router | None = None,
) -> None:
    """
    Constructs the LiteLLMEmbeddingClient.

    Args:
        model_name: Name of the [LiteLLM supported model](https://docs.litellm.ai/docs/embedding/supported_embedding)\
            to be used. Default is "text-embedding-3-small".
        default_options: Default options to pass to the LiteLLM API.
        api_base: The API endpoint you want to call the model with.
        base_url: Alias for api_base. If both are provided, api_base takes precedence.
        api_key: API key to be used. If not specified, an environment variable will be used,
            for more information, follow the instructions for your specific vendor in the\
            [LiteLLM documentation](https://docs.litellm.ai/docs/embedding/supported_embedding).
        api_version: The API version for the call.
        router: Router to be used to [route requests](https://docs.litellm.ai/docs/routing) to different models.
    """
    super().__init__(default_options=default_options)

    self.model_name = model_name
    self.api_base = api_base or base_url
    self.api_key = api_key
    self.api_version = api_version
    self.router = router

default_module `class-attribute` `instance-attribute` #

default_module: ClassVar = embeddings

configuration_key `class-attribute` `instance-attribute` #

configuration_key: ClassVar = 'embedder'

default_options `instance-attribute` #

default_options: OptionsT = default_options or options_cls()

options_cls `class-attribute` `instance-attribute` #

options_cls = LiteLLMEmbedderOptions

model_name `instance-attribute` #

model_name = model_name

api_base `instance-attribute` #

api_base = api_base or base_url

api_key `instance-attribute` #

api_key = api_key

api_version `instance-attribute` #

api_version = api_version

router `instance-attribute` #

router = router

subclass_from_config `classmethod` #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER	DESCRIPTION
`config`	A model containing configuration details for the class. TYPE: `ObjectConstructionConfig`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

RAISES	DESCRIPTION
`InvalidConfigError`	The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory `classmethod` #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER	DESCRIPTION
`factory_path`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided factory function.

RAISES	DESCRIPTION
`InvalidConfigError`	The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass `classmethod` #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER	DESCRIPTION
`config`	The CoreConfig instance containing preferred factory and configuration details. TYPE: `CoreConfig`
`factory_path_override`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str \| None` DEFAULT: `None`
`yaml_path_override`	A string representing the path to the YAML file containing the Ragstack instance configuration. TYPE: `Path \| None` DEFAULT: `None`

RAISES	DESCRIPTION
`InvalidConfigError`	If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

image_support #

image_support() -> bool

Check if the model supports image embeddings.

RETURNS	DESCRIPTION
`bool`	True if the model supports image embeddings, False otherwise.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/base.py

def image_support(self) -> bool:  # noqa: PLR6301
    """
    Check if the model supports image embeddings.

    Returns:
        True if the model supports image embeddings, False otherwise.
    """
    return False

embed_image `async` #

embed_image(images: list[bytes], options: EmbedderOptionsT | None = None) -> list[list[float]]

Creates embeddings for the given images.

PARAMETER	DESCRIPTION
`images`	List of images to get embeddings for. TYPE: `list[bytes]`
`options`	Additional settings used by the Embedder model. TYPE: `EmbedderOptionsT \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[list[float]]`	List of embeddings for the given images.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/base.py

async def embed_image(self, images: list[bytes], options: EmbedderOptionsT | None = None) -> list[list[float]]:
    """
    Creates embeddings for the given images.

    Args:
        images: List of images to get embeddings for.
        options: Additional settings used by the Embedder model.

    Returns:
        List of embeddings for the given images.
    """
    raise NotImplementedError("Image embeddings are not supported by this model.")

get_vector_size `async` #

get_vector_size() -> VectorSize

Get the vector size for this LiteLLM model.

If dimensions are specified in default options, use that value. Otherwise, embed a sample text to determine the dimension.

RETURNS	DESCRIPTION
`VectorSize`	VectorSize object with the model's embedding dimension.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/litellm.py

async def get_vector_size(self) -> VectorSize:
    """
    Get the vector size for this LiteLLM model.

    If dimensions are specified in default options, use that value.
    Otherwise, embed a sample text to determine the dimension.

    Returns:
        VectorSize object with the model's embedding dimension.
    """
    # Check if dimensions are explicitly set in default options
    if (
        self.default_options
        and self.default_options.dimensions is not NOT_GIVEN
        and self.default_options.dimensions is not None
    ):
        # We've checked that dimensions is not None and not NOT_GIVEN, so it must be int
        return VectorSize(size=cast(int, self.default_options.dimensions), is_sparse=False)

    # If no dimensions specified, embed a sample text to determine size
    sample_embedding = await self.embed_text(["sample"])
    return VectorSize(size=len(sample_embedding[0]), is_sparse=False)

embed_text `async` #

embed_text(data: list[str], options: LiteLLMEmbedderOptions | None = None) -> list[list[float]]

Creates embeddings for the given strings.

PARAMETER	DESCRIPTION
`data`	List of strings to get embeddings for. TYPE: `list[str]`
`options`	Additional options to pass to the Lite LLM API. TYPE: `LiteLLMEmbedderOptions \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[list[float]]`	List of embeddings for the given strings.

RAISES	DESCRIPTION
`EmbeddingConnectionError`	If there is a connection error with the embedding API.
`EmbeddingEmptyResponseError`	If the embedding API returns an empty response.
`EmbeddingStatusError`	If the embedding API returns an error status code.
`EmbeddingResponseError`	If the embedding API response is invalid.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/litellm.py

async def embed_text(self, data: list[str], options: LiteLLMEmbedderOptions | None = None) -> list[list[float]]:
    """
    Creates embeddings for the given strings.

    Args:
        data: List of strings to get embeddings for.
        options: Additional options to pass to the Lite LLM API.

    Returns:
        List of embeddings for the given strings.

    Raises:
        EmbeddingConnectionError: If there is a connection error with the embedding API.
        EmbeddingEmptyResponseError: If the embedding API returns an empty response.
        EmbeddingStatusError: If the embedding API returns an error status code.
        EmbeddingResponseError: If the embedding API response is invalid.
    """
    merged_options = (self.default_options | options) if options else self.default_options

    with trace(
        data=data,
        model=self.model_name,
        api_base=self.api_base,
        api_version=self.api_version,
        options=merged_options.dict(),
    ) as outputs:
        try:
            entrypoint = self.router or litellm
            response = await entrypoint.aembedding(
                input=data,
                model=self.model_name,
                api_base=self.api_base,
                api_key=self.api_key,
                api_version=self.api_version,
                **merged_options.dict(),
            )
        except litellm.openai.APIConnectionError as exc:
            raise EmbeddingConnectionError() from exc
        except litellm.openai.APIStatusError as exc:
            raise EmbeddingStatusError(exc.message, exc.status_code) from exc
        except litellm.openai.APIResponseValidationError as exc:
            raise EmbeddingResponseError() from exc

        if not response.data:
            raise EmbeddingEmptyResponseError()

        outputs.embeddings = [embedding["embedding"] for embedding in response.data]
        if response.usage:
            outputs.completion_tokens = response.usage.completion_tokens
            outputs.prompt_tokens = response.usage.prompt_tokens
            outputs.total_tokens = response.usage.total_tokens

    return outputs.embeddings

from_config `classmethod` #

from_config(config: dict[str, Any]) -> Self

Creates and returns a LiteLLMEmbedder instance.

PARAMETER	DESCRIPTION
`config`	A configuration object containing the configuration for initializing the LiteLLMEmbedder instance. TYPE: `dict[str, Any]`

RETURNS	DESCRIPTION
`LiteLLMEmbedder`	An initialized LiteLLMEmbedder instance. TYPE: `Self`

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/litellm.py

@classmethod
def from_config(cls, config: dict[str, Any]) -> Self:
    """
    Creates and returns a LiteLLMEmbedder instance.

    Args:
        config: A configuration object containing the configuration for initializing the LiteLLMEmbedder instance.

    Returns:
        LiteLLMEmbedder: An initialized LiteLLMEmbedder instance.
    """
    if "router" in config:
        router = litellm.router.Router(model_list=config["router"])
        config["router"] = router

    # Map base_url to api_base if present
    if "base_url" in config and "api_base" not in config:
        config["api_base"] = config.pop("base_url")

    return super().from_config(config)

ragbits.core.embeddings.dense.fastembed.FastEmbedEmbedder #

FastEmbedEmbedder(model_name: str, use_gpu: bool = False, default_options: FastEmbedOptions | None = None)

Bases: DenseEmbedder[FastEmbedOptions]

Class for creating dense text embeddings using FastEmbed library. For more information, see the FastEmbed GitHub.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/fastembed.py

def __init__(self, model_name: str, use_gpu: bool = False, default_options: FastEmbedOptions | None = None):
    super().__init__(default_options=default_options)
    self.model_name = model_name
    self.use_gpu = use_gpu
    if use_gpu:
        self._model = TextEmbedding(model_name=model_name, providers=["CUDAExecutionProvider"])
    else:
        self._model = TextEmbedding(model_name=model_name)

default_module `class-attribute` `instance-attribute` #

default_module: ClassVar = embeddings

configuration_key `class-attribute` `instance-attribute` #

configuration_key: ClassVar = 'embedder'

default_options `instance-attribute` #

default_options: OptionsT = default_options or options_cls()

options_cls `class-attribute` `instance-attribute` #

options_cls = FastEmbedOptions

model_name `instance-attribute` #

model_name = model_name

use_gpu `instance-attribute` #

use_gpu = use_gpu

subclass_from_config `classmethod` #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER	DESCRIPTION
`config`	A model containing configuration details for the class. TYPE: `ObjectConstructionConfig`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

RAISES	DESCRIPTION
`InvalidConfigError`	The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory `classmethod` #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER	DESCRIPTION
`factory_path`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided factory function.

RAISES	DESCRIPTION
`InvalidConfigError`	The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass `classmethod` #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER	DESCRIPTION
`config`	The CoreConfig instance containing preferred factory and configuration details. TYPE: `CoreConfig`
`factory_path_override`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str \| None` DEFAULT: `None`
`yaml_path_override`	A string representing the path to the YAML file containing the Ragstack instance configuration. TYPE: `Path \| None` DEFAULT: `None`

RAISES	DESCRIPTION
`InvalidConfigError`	If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config `classmethod` #

from_config(config: dict[str, Any]) -> Self

Initializes the class with the provided configuration.

PARAMETER	DESCRIPTION
`config`	A dictionary containing configuration details for the class. TYPE: `dict[str, Any]`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def from_config(cls, config: dict[str, Any]) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    default_options = config.pop("default_options", None)
    options = cls.options_cls(**default_options) if default_options else None
    return cls(**config, default_options=options)

image_support #

image_support() -> bool

Check if the model supports image embeddings.

RETURNS	DESCRIPTION
`bool`	True if the model supports image embeddings, False otherwise.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/base.py

def image_support(self) -> bool:  # noqa: PLR6301
    """
    Check if the model supports image embeddings.

    Returns:
        True if the model supports image embeddings, False otherwise.
    """
    return False

embed_image `async` #

embed_image(images: list[bytes], options: EmbedderOptionsT | None = None) -> list[list[float]]

Creates embeddings for the given images.

PARAMETER	DESCRIPTION
`images`	List of images to get embeddings for. TYPE: `list[bytes]`
`options`	Additional settings used by the Embedder model. TYPE: `EmbedderOptionsT \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[list[float]]`	List of embeddings for the given images.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/base.py

async def embed_image(self, images: list[bytes], options: EmbedderOptionsT | None = None) -> list[list[float]]:
    """
    Creates embeddings for the given images.

    Args:
        images: List of images to get embeddings for.
        options: Additional settings used by the Embedder model.

    Returns:
        List of embeddings for the given images.
    """
    raise NotImplementedError("Image embeddings are not supported by this model.")

get_vector_size `async` #

get_vector_size() -> VectorSize

Get the vector size for this FastEmbed model.

RETURNS	DESCRIPTION
`VectorSize`	VectorSize object with the model's embedding dimension.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/fastembed.py

async def get_vector_size(self) -> VectorSize:
    """
    Get the vector size for this FastEmbed model.

    Returns:
        VectorSize object with the model's embedding dimension.
    """
    # Get model info from FastEmbed's supported models list
    supported_models = self._model.list_supported_models()
    model_info = next((model for model in supported_models if model["model"] == self.model_name), None)

    if model_info and "dim" in model_info:
        vector_size = model_info["dim"]
    else:
        # Fallback to the original method if metadata is not available
        sample_embedding = await self.embed_text(["sample"])
        vector_size = len(sample_embedding[0])

    return VectorSize(size=vector_size, is_sparse=False)

embed_text `async` #

embed_text(data: list[str], options: EmbedderOptionsT | None = None) -> list[list[float]]

Embeds a list of strings into a list of embeddings.

PARAMETER	DESCRIPTION
`data`	List of strings to get embeddings for. TYPE: `list[str]`
`options`	Additional options to pass to the embedding model. TYPE: `EmbedderOptionsT \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[list[float]]`	List of embeddings for the given strings.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/dense/fastembed.py

async def embed_text(self, data: list[str], options: EmbedderOptionsT | None = None) -> list[list[float]]:
    """
    Embeds a list of strings into a list of embeddings.

    Args:
        data: List of strings to get embeddings for.
        options: Additional options to pass to the embedding model.

    Returns:
        List of embeddings for the given strings.
    """
    merged_options = (self.default_options | options) if options else self.default_options
    with trace(
        data=data, model_name=self.model_name, model_obj=repr(self._model), options=merged_options.dict()
    ) as outputs:
        embeddings = [[float(x) for x in result] for result in self._model.embed(data, **merged_options.dict())]
        outputs.embeddings = embeddings
    return embeddings

ragbits.core.embeddings.sparse.base.SparseEmbedder #

SparseEmbedder(default_options: OptionsT | None = None)

Bases: Embedder[SparseEmbedderOptionsT], ABC

Sparse embedding interface

Constructs a new ConfigurableComponent instance.

PARAMETER	DESCRIPTION
`default_options`	The default options for the component. TYPE: `OptionsT \| None` DEFAULT: `None`

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

def __init__(self, default_options: OptionsT | None = None) -> None:
    """
    Constructs a new ConfigurableComponent instance.

    Args:
        default_options: The default options for the component.
    """
    self.default_options: OptionsT = default_options or self.options_cls()

default_module `class-attribute` `instance-attribute` #

default_module: ClassVar = embeddings

configuration_key `class-attribute` `instance-attribute` #

configuration_key: ClassVar = 'embedder'

options_cls `instance-attribute` #

options_cls: type[EmbedderOptionsT]

default_options `instance-attribute` #

default_options: OptionsT = default_options or options_cls()

subclass_from_config `classmethod` #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER	DESCRIPTION
`config`	A model containing configuration details for the class. TYPE: `ObjectConstructionConfig`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

RAISES	DESCRIPTION
`InvalidConfigError`	The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory `classmethod` #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER	DESCRIPTION
`factory_path`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided factory function.

RAISES	DESCRIPTION
`InvalidConfigError`	The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass `classmethod` #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER	DESCRIPTION
`config`	The CoreConfig instance containing preferred factory and configuration details. TYPE: `CoreConfig`
`factory_path_override`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str \| None` DEFAULT: `None`
`yaml_path_override`	A string representing the path to the YAML file containing the Ragstack instance configuration. TYPE: `Path \| None` DEFAULT: `None`

RAISES	DESCRIPTION
`InvalidConfigError`	If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config `classmethod` #

from_config(config: dict[str, Any]) -> Self

Initializes the class with the provided configuration.

PARAMETER	DESCRIPTION
`config`	A dictionary containing configuration details for the class. TYPE: `dict[str, Any]`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def from_config(cls, config: dict[str, Any]) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    default_options = config.pop("default_options", None)
    options = cls.options_cls(**default_options) if default_options else None
    return cls(**config, default_options=options)

image_support #

image_support() -> bool

Check if the model supports image embeddings.

RETURNS	DESCRIPTION
`bool`	True if the model supports image embeddings, False otherwise.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/base.py

def image_support(self) -> bool:  # noqa: PLR6301
    """
    Check if the model supports image embeddings.

    Returns:
        True if the model supports image embeddings, False otherwise.
    """
    return False

embed_text `abstractmethod` `async` #

embed_text(texts: list[str], options: SparseEmbedderOptionsT | None = None) -> list[SparseVector]

Transforms a list of texts into sparse vectors.

PARAMETER	DESCRIPTION
`texts`	list of input texts. TYPE: `list[str]`
`options`	optional embedding options TYPE: `SparseEmbedderOptionsT \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[SparseVector]`	list of sparse embeddings.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/sparse/base.py

@abstractmethod
async def embed_text(self, texts: list[str], options: SparseEmbedderOptionsT | None = None) -> list[SparseVector]:
    """
    Transforms a list of texts into sparse vectors.

    Args:
        texts: list of input texts.
        options: optional embedding options

    Returns:
        list of sparse embeddings.
    """

get_vector_size `abstractmethod` `async` #

get_vector_size() -> VectorSize

Get information about the sparse vector size/dimensions returned by this embedder.

RETURNS	DESCRIPTION
`VectorSize`	VectorSize object with is_sparse=True and the vocabulary size.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/sparse/base.py

@abstractmethod
async def get_vector_size(self) -> VectorSize:
    """
    Get information about the sparse vector size/dimensions returned by this embedder.

    Returns:
        VectorSize object with is_sparse=True and the vocabulary size.
    """

embed_image `async` #

embed_image(images: list[bytes], options: SparseEmbedderOptionsT | None = None) -> list[SparseVector]

Creates embeddings for the given images.

PARAMETER	DESCRIPTION
`images`	List of images to get embeddings for. TYPE: `list[bytes]`
`options`	Additional settings used by the Embedder model. TYPE: `SparseEmbedderOptionsT \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[SparseVector]`	List of sparse embeddings for the given images.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/sparse/base.py

async def embed_image(
    self, images: list[bytes], options: SparseEmbedderOptionsT | None = None
) -> list[SparseVector]:
    """
    Creates embeddings for the given images.

    Args:
        images: List of images to get embeddings for.
        options: Additional settings used by the Embedder model.

    Returns:
        List of sparse embeddings for the given images.
    """
    raise NotImplementedError("Image embeddings are not supported by this model.")

ragbits.core.embeddings.sparse.fastembed.FastEmbedSparseEmbedder #

FastEmbedSparseEmbedder(model_name: str, use_gpu: bool = False, default_options: FastEmbedOptions | None = None)

Bases: SparseEmbedder[FastEmbedOptions]

Class for creating sparse text embeddings using FastEmbed library. For more information, see the FastEmbed GitHub.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/sparse/fastembed.py

def __init__(self, model_name: str, use_gpu: bool = False, default_options: FastEmbedOptions | None = None):
    super().__init__(default_options=default_options)
    self.model_name = model_name
    self.use_gpu = use_gpu
    if use_gpu:
        self._model = SparseTextEmbedding(model_name=model_name, providers=["CUDAExecutionProvider"])
    else:
        self._model = SparseTextEmbedding(model_name=model_name)

default_module `class-attribute` `instance-attribute` #

default_module: ClassVar = embeddings

configuration_key `class-attribute` `instance-attribute` #

configuration_key: ClassVar = 'embedder'

default_options `instance-attribute` #

default_options: OptionsT = default_options or options_cls()

options_cls `class-attribute` `instance-attribute` #

options_cls = FastEmbedOptions

model_name `instance-attribute` #

model_name = model_name

use_gpu `instance-attribute` #

use_gpu = use_gpu

subclass_from_config `classmethod` #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER	DESCRIPTION
`config`	A model containing configuration details for the class. TYPE: `ObjectConstructionConfig`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

RAISES	DESCRIPTION
`InvalidConfigError`	The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory `classmethod` #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER	DESCRIPTION
`factory_path`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided factory function.

RAISES	DESCRIPTION
`InvalidConfigError`	The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass `classmethod` #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER	DESCRIPTION
`config`	The CoreConfig instance containing preferred factory and configuration details. TYPE: `CoreConfig`
`factory_path_override`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str \| None` DEFAULT: `None`
`yaml_path_override`	A string representing the path to the YAML file containing the Ragstack instance configuration. TYPE: `Path \| None` DEFAULT: `None`

RAISES	DESCRIPTION
`InvalidConfigError`	If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config `classmethod` #

from_config(config: dict[str, Any]) -> Self

Initializes the class with the provided configuration.

PARAMETER	DESCRIPTION
`config`	A dictionary containing configuration details for the class. TYPE: `dict[str, Any]`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def from_config(cls, config: dict[str, Any]) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    default_options = config.pop("default_options", None)
    options = cls.options_cls(**default_options) if default_options else None
    return cls(**config, default_options=options)

image_support #

image_support() -> bool

Check if the model supports image embeddings.

RETURNS	DESCRIPTION
`bool`	True if the model supports image embeddings, False otherwise.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/base.py

def image_support(self) -> bool:  # noqa: PLR6301
    """
    Check if the model supports image embeddings.

    Returns:
        True if the model supports image embeddings, False otherwise.
    """
    return False

embed_image `async` #

embed_image(images: list[bytes], options: SparseEmbedderOptionsT | None = None) -> list[SparseVector]

Creates embeddings for the given images.

PARAMETER	DESCRIPTION
`images`	List of images to get embeddings for. TYPE: `list[bytes]`
`options`	Additional settings used by the Embedder model. TYPE: `SparseEmbedderOptionsT \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[SparseVector]`	List of sparse embeddings for the given images.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/sparse/base.py

async def embed_image(
    self, images: list[bytes], options: SparseEmbedderOptionsT | None = None
) -> list[SparseVector]:
    """
    Creates embeddings for the given images.

    Args:
        images: List of images to get embeddings for.
        options: Additional settings used by the Embedder model.

    Returns:
        List of sparse embeddings for the given images.
    """
    raise NotImplementedError("Image embeddings are not supported by this model.")

get_vector_size `async` #

get_vector_size() -> VectorSize

Get the vector size for this FastEmbed sparse model.

For sparse models, this returns the vocabulary size.

RETURNS	DESCRIPTION
`VectorSize`	VectorSize object with is_sparse=True and the vocabulary size.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/sparse/fastembed.py

async def get_vector_size(self) -> VectorSize:
    """
    Get the vector size for this FastEmbed sparse model.

    For sparse models, this returns the vocabulary size.

    Returns:
        VectorSize object with is_sparse=True and the vocabulary size.
    """
    # Get model info from FastEmbed's supported models list
    supported_models = self._model.list_supported_models()
    model_info = next((model for model in supported_models if model["model"] == self.model_name), None)

    if model_info and "vocab_size" in model_info:
        vocab_size = model_info["vocab_size"]
    else:
        sample_embedding = await self.embed_text(["sample text with various tokens"])
        vocab_size = (
            max(sample_embedding[0].indices) + 1 if sample_embedding and sample_embedding[0].indices else 30000
        )

    return VectorSize(size=vocab_size, is_sparse=True)

embed_text `async` #

embed_text(data: list[str], options: EmbedderOptionsT | None = None) -> list[SparseVector]

Embeds a list of strings into a list of sparse embeddings.

PARAMETER	DESCRIPTION
`data`	List of strings to get embeddings for. TYPE: `list[str]`
`options`	Additional options to pass to the embedding model. TYPE: `EmbedderOptionsT \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[SparseVector]`	List of embeddings for the given strings.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/sparse/fastembed.py

async def embed_text(self, data: list[str], options: EmbedderOptionsT | None = None) -> list[SparseVector]:
    """
    Embeds a list of strings into a list of sparse embeddings.

    Args:
        data: List of strings to get embeddings for.
        options: Additional options to pass to the embedding model.

    Returns:
        List of embeddings for the given strings.
    """
    merged_options = (self.default_options | options) if options else self.default_options
    with trace(
        data=data, model_name=self.model_name, model_obj=repr(self._model), options=merged_options.dict()
    ) as outputs:
        outputs.embeddings = [
            SparseVector(values=[float(x) for x in result.values], indices=[int(x) for x in result.indices])
            for result in self._model.embed(data, **merged_options.dict())
        ]
    return outputs.embeddings

ragbits.core.embeddings.sparse.bag_of_tokens.BagOfTokens #

BagOfTokens(model_name: str | None = None, encoding_name: str | None = None, default_options: BagOfTokensOptions | None = None)

Bases: SparseEmbedder[BagOfTokensOptions]

BagOfTokens implementations of sparse Embedder interface

Initialize the BagOfTokens embedder.

PARAMETER	DESCRIPTION
`model_name`	Name of the model to use for tokenization (e.g., "gpt-4o"). TYPE: `str \| None` DEFAULT: `None`
`encoding_name`	Name of the encoding to use for tokenization. TYPE: `str \| None` DEFAULT: `None`
`default_options`	Default options for the embedder. TYPE: `BagOfTokensOptions \| None` DEFAULT: `None`

RAISES	DESCRIPTION
`ValueError`	If both model_name and encoding_name are provided, or if neither is provided.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/sparse/bag_of_tokens.py

def __init__(
    self,
    model_name: str | None = None,
    encoding_name: str | None = None,
    default_options: BagOfTokensOptions | None = None,
) -> None:
    """
    Initialize the BagOfTokens embedder.

    Args:
        model_name: Name of the model to use for tokenization (e.g., "gpt-4o").
        encoding_name: Name of the encoding to use for tokenization.
        default_options: Default options for the embedder.

    Raises:
        ValueError: If both model_name and encoding_name are provided, or if neither is provided.
    """
    super().__init__(default_options=default_options)

    if encoding_name and model_name:
        raise ValueError("Please specify only one of encoding_name or model_name")
    if not (encoding_name or model_name):
        # Default to gpt-4o if neither is specified
        model_name = "gpt-4o"

    if encoding_name:
        self._encoder = tiktoken.get_encoding(encoding_name=encoding_name)
    elif model_name:
        self._encoder = tiktoken.encoding_for_model(model_name=model_name)
    else:
        raise ValueError("Either encoding_name or model_name needs to be specified")

default_module `class-attribute` `instance-attribute` #

default_module: ClassVar = embeddings

configuration_key `class-attribute` `instance-attribute` #

configuration_key: ClassVar = 'embedder'

default_options `instance-attribute` #

default_options: OptionsT = default_options or options_cls()

options_cls `class-attribute` `instance-attribute` #

options_cls = BagOfTokensOptions

subclass_from_config `classmethod` #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER	DESCRIPTION
`config`	A model containing configuration details for the class. TYPE: `ObjectConstructionConfig`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

RAISES	DESCRIPTION
`InvalidConfigError`	The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory `classmethod` #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory. Supports both synchronous and asynchronous factory functions.

PARAMETER	DESCRIPTION
`factory_path`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided factory function.

RAISES	DESCRIPTION
`InvalidConfigError`	The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory. Supports both synchronous and asynchronous factory functions.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)

    if asyncio.iscoroutinefunction(factory):
        try:
            loop = asyncio.get_running_loop()
            obj = asyncio.run_coroutine_threadsafe(factory, loop).result()
        except RuntimeError:
            obj = asyncio.run(factory())
    else:
        obj = factory()

    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")

    return obj

preferred_subclass `classmethod` #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER	DESCRIPTION
`config`	The CoreConfig instance containing preferred factory and configuration details. TYPE: `CoreConfig`
`factory_path_override`	A string representing the path to the factory function in the format of "module.submodule:factory_name". TYPE: `str \| None` DEFAULT: `None`
`yaml_path_override`	A string representing the path to the YAML file containing the Ragstack instance configuration. TYPE: `Path \| None` DEFAULT: `None`

RAISES	DESCRIPTION
`InvalidConfigError`	If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config `classmethod` #

from_config(config: dict[str, Any]) -> Self

Initializes the class with the provided configuration.

PARAMETER	DESCRIPTION
`config`	A dictionary containing configuration details for the class. TYPE: `dict[str, Any]`

RETURNS	DESCRIPTION
`Self`	An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py

@classmethod
def from_config(cls, config: dict[str, Any]) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    default_options = config.pop("default_options", None)
    options = cls.options_cls(**default_options) if default_options else None
    return cls(**config, default_options=options)

image_support #

image_support() -> bool

Check if the model supports image embeddings.

RETURNS	DESCRIPTION
`bool`	True if the model supports image embeddings, False otherwise.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/base.py

def image_support(self) -> bool:  # noqa: PLR6301
    """
    Check if the model supports image embeddings.

    Returns:
        True if the model supports image embeddings, False otherwise.
    """
    return False

embed_image `async` #

embed_image(images: list[bytes], options: SparseEmbedderOptionsT | None = None) -> list[SparseVector]

Creates embeddings for the given images.

PARAMETER	DESCRIPTION
`images`	List of images to get embeddings for. TYPE: `list[bytes]`
`options`	Additional settings used by the Embedder model. TYPE: `SparseEmbedderOptionsT \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[SparseVector]`	List of sparse embeddings for the given images.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/sparse/base.py

async def embed_image(
    self, images: list[bytes], options: SparseEmbedderOptionsT | None = None
) -> list[SparseVector]:
    """
    Creates embeddings for the given images.

    Args:
        images: List of images to get embeddings for.
        options: Additional settings used by the Embedder model.

    Returns:
        List of sparse embeddings for the given images.
    """
    raise NotImplementedError("Image embeddings are not supported by this model.")

get_vector_size `async` #

get_vector_size() -> VectorSize

Get the vector size for this BagOfTokens model.

For BagOfTokens, this returns the tokenizer vocabulary size.

RETURNS	DESCRIPTION
`VectorSize`	VectorSize object with is_sparse=True and the vocabulary size.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/sparse/bag_of_tokens.py

async def get_vector_size(self) -> VectorSize:
    """
    Get the vector size for this BagOfTokens model.

    For BagOfTokens, this returns the tokenizer vocabulary size.

    Returns:
        VectorSize object with is_sparse=True and the vocabulary size.
    """
    vocab_size = self._encoder.n_vocab
    return VectorSize(size=vocab_size, is_sparse=True)

embed_text `async` #

embed_text(texts: list[str], options: BagOfTokensOptions | None = None) -> list[SparseVector]

Transforms a list of texts into sparse vectors using bag-of-tokens representation.

PARAMETER	DESCRIPTION
`texts`	list of input texts. TYPE: `list[str]`
`options`	optional embedding options TYPE: `BagOfTokensOptions \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[SparseVector]`	list of SparseVector instances.

Source code in packages/ragbits-core/src/ragbits/core/embeddings/sparse/bag_of_tokens.py

async def embed_text(self, texts: list[str], options: BagOfTokensOptions | None = None) -> list[SparseVector]:
    """
    Transforms a list of texts into sparse vectors using bag-of-tokens representation.

    Args:
        texts: list of input texts.
        options: optional embedding options

    Returns:
        list of SparseVector instances.
    """
    vectors = []
    merged_options = self.default_options | options if options else self.default_options
    with trace(data=texts, options=merged_options.dict()) as outputs:
        min_token_count = merged_options.min_token_count or float("-inf")
        for text in texts:
            tokens = self._encoder.encode(text)
            token_counts = Counter(tokens)
            non_zero_dims = []
            non_zero_vals = []

            for token, count in token_counts.items():
                if count < min_token_count:
                    continue
                non_zero_dims.append(token)
                non_zero_vals.append(float(count))

            vectors.append(SparseVector(indices=non_zero_dims, values=non_zero_vals))
        outputs.embeddings = vectors
    return vectors

Embedders#

ragbits.core.embeddings.base.Embedder #

default_options instance-attribute #

options_cls instance-attribute #

default_module class-attribute instance-attribute #

configuration_key class-attribute instance-attribute #

subclass_from_config classmethod #

subclass_from_factory classmethod #

preferred_subclass classmethod #

from_config classmethod #

embed_text abstractmethod async #

get_vector_size abstractmethod async #

image_support #

embed_image async #

ragbits.core.embeddings.dense.DenseEmbedder #

default_module class-attribute instance-attribute #

configuration_key class-attribute instance-attribute #

options_cls instance-attribute #

default_options instance-attribute #

subclass_from_config classmethod #

subclass_from_factory classmethod #

preferred_subclass classmethod #

from_config classmethod #

image_support #

embed_text abstractmethod async #

get_vector_size abstractmethod async #

embed_image async #

ragbits.core.embeddings.dense.local.LocalEmbedder #

default_module class-attribute instance-attribute #

configuration_key class-attribute instance-attribute #

default_options instance-attribute #

options_cls class-attribute instance-attribute #

model_name instance-attribute #

model instance-attribute #

subclass_from_config classmethod #

subclass_from_factory classmethod #

preferred_subclass classmethod #

from_config classmethod #

image_support #

embed_image async #

get_vector_size async #

embed_text async #

ragbits.core.embeddings.dense.litellm.LiteLLMEmbedder #

default_module class-attribute instance-attribute #

configuration_key class-attribute instance-attribute #

default_options instance-attribute #

options_cls class-attribute instance-attribute #

model_name instance-attribute #

api_base instance-attribute #

api_key instance-attribute #

api_version instance-attribute #

router instance-attribute #

subclass_from_config classmethod #

subclass_from_factory classmethod #

preferred_subclass classmethod #

image_support #

embed_image async #

get_vector_size async #

embed_text async #

from_config classmethod #

ragbits.core.embeddings.dense.fastembed.FastEmbedEmbedder #

default_module class-attribute instance-attribute #

configuration_key class-attribute instance-attribute #

default_options instance-attribute #

options_cls class-attribute instance-attribute #

model_name instance-attribute #

use_gpu instance-attribute #

subclass_from_config classmethod #

subclass_from_factory classmethod #

preferred_subclass classmethod #

from_config classmethod #

image_support #

embed_image async #

get_vector_size async #

embed_text async #

ragbits.core.embeddings.sparse.base.SparseEmbedder #

default_module class-attribute instance-attribute #

configuration_key class-attribute instance-attribute #

options_cls instance-attribute #

default_options instance-attribute #

default_options `instance-attribute` #

options_cls `instance-attribute` #

default_module `class-attribute` `instance-attribute` #

configuration_key `class-attribute` `instance-attribute` #

subclass_from_config `classmethod` #

subclass_from_factory `classmethod` #

preferred_subclass `classmethod` #

from_config `classmethod` #

embed_text `abstractmethod` `async` #

get_vector_size `abstractmethod` `async` #

embed_image `async` #

default_module `class-attribute` `instance-attribute` #

configuration_key `class-attribute` `instance-attribute` #

options_cls `instance-attribute` #

default_options `instance-attribute` #

subclass_from_config `classmethod` #

subclass_from_factory `classmethod` #

preferred_subclass `classmethod` #

from_config `classmethod` #

embed_text `abstractmethod` `async` #

get_vector_size `abstractmethod` `async` #

embed_image `async` #

default_module `class-attribute` `instance-attribute` #

configuration_key `class-attribute` `instance-attribute` #

default_options `instance-attribute` #

options_cls `class-attribute` `instance-attribute` #

model_name `instance-attribute` #

model `instance-attribute` #

subclass_from_config `classmethod` #

subclass_from_factory `classmethod` #

preferred_subclass `classmethod` #

from_config `classmethod` #

embed_image `async` #

get_vector_size `async` #

embed_text `async` #

default_module `class-attribute` `instance-attribute` #

configuration_key `class-attribute` `instance-attribute` #

default_options `instance-attribute` #

options_cls `class-attribute` `instance-attribute` #

model_name `instance-attribute` #

api_base `instance-attribute` #

api_key `instance-attribute` #

api_version `instance-attribute` #

router `instance-attribute` #

subclass_from_config `classmethod` #

subclass_from_factory `classmethod` #

preferred_subclass `classmethod` #

embed_image `async` #

get_vector_size `async` #

embed_text `async` #

from_config `classmethod` #

default_module `class-attribute` `instance-attribute` #

configuration_key `class-attribute` `instance-attribute` #

default_options `instance-attribute` #

options_cls `class-attribute` `instance-attribute` #

model_name `instance-attribute` #

use_gpu `instance-attribute` #

subclass_from_config `classmethod` #

subclass_from_factory `classmethod` #

preferred_subclass `classmethod` #

from_config `classmethod` #

embed_image `async` #

get_vector_size `async` #

embed_text `async` #

default_module `class-attribute` `instance-attribute` #

configuration_key `class-attribute` `instance-attribute` #

options_cls `instance-attribute` #

default_options `instance-attribute` #

subclass_from_config `classmethod` #

subclass_from_factory `classmethod` #

preferred_subclass `classmethod` #

from_config `classmethod` #

embed_text `abstractmethod` `async` #

get_vector_size `abstractmethod` `async` #

embed_image `async` #

default_module `class-attribute` `instance-attribute` #

configuration_key `class-attribute` `instance-attribute` #

default_options `instance-attribute` #

options_cls `class-attribute` `instance-attribute` #

model_name `instance-attribute` #