Skip to content

Vector Stores#

ragbits.core.vector_stores.base.VectorStoreEntry #

Bases: BaseModel

An object representing a vector database entry.

id instance-attribute #

id: str

key instance-attribute #

key: str

vector instance-attribute #

vector: list[float]

metadata instance-attribute #

metadata: dict

ragbits.core.vector_stores.base.VectorStoreOptions #

Bases: Options

An object representing the options for the vector store.

model_config class-attribute instance-attribute #

model_config = ConfigDict(extra='allow', arbitrary_types_allowed=True)

k class-attribute instance-attribute #

k: int = 5

max_distance class-attribute instance-attribute #

max_distance: float | None = None

dict #

dict() -> dict[str, Any]

Creates a dictionary representation of the Options instance. If a value is None, it will be replaced with a provider-specific not-given sentinel.

RETURNS DESCRIPTION
dict[str, Any]

A dictionary representation of the Options instance.

Source code in packages/ragbits-core/src/ragbits/core/options.py
def dict(self) -> dict[str, Any]:  # type: ignore # mypy complains about overriding BaseModel.dict
    """
    Creates a dictionary representation of the Options instance.
    If a value is None, it will be replaced with a provider-specific not-given sentinel.

    Returns:
        A dictionary representation of the Options instance.
    """
    options = self.model_dump()

    return {
        key: self._not_given if value is None or isinstance(value, NotGiven) else value
        for key, value in options.items()
    }

ragbits.core.vector_stores.base.VectorStore #

VectorStore(default_options: VectorStoreOptionsT | None = None, metadata_store: MetadataStore | None = None)

Bases: ConfigurableComponent[VectorStoreOptionsT], ABC

A class with an implementation of Vector Store, allowing to store and retrieve vectors by similarity function.

Constructs a new VectorStore instance.

PARAMETER DESCRIPTION
default_options

The default options for querying the vector store.

TYPE: VectorStoreOptionsT | None DEFAULT: None

metadata_store

The metadata store to use.

TYPE: MetadataStore | None DEFAULT: None

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/base.py
def __init__(
    self,
    default_options: VectorStoreOptionsT | None = None,
    metadata_store: MetadataStore | None = None,
) -> None:
    """
    Constructs a new VectorStore instance.

    Args:
        default_options: The default options for querying the vector store.
        metadata_store: The metadata store to use.
    """
    super().__init__(default_options=default_options)
    self._metadata_store = metadata_store

default_options instance-attribute #

default_options: OptionsT = default_options or options_cls()

options_cls instance-attribute #

options_cls: type[VectorStoreOptionsT]

default_module class-attribute instance-attribute #

default_module: ClassVar = vector_stores

configuration_key class-attribute instance-attribute #

configuration_key: ClassVar = 'vector_store'

subclass_from_config classmethod #

subclass_from_config(config: ObjectContructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectContructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectContructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)
    obj = factory()
    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")
    return obj

subclass_from_defaults classmethod #

subclass_from_defaults(defaults: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at default configuration file, and default factory function. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
defaults

The CoreConfig instance containing default factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_defaults(
    cls, defaults: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at default configuration file, and default factory function.
    Takes optional overrides for both, which takes a higher precedence.

    Args:
        defaults: The CoreConfig instance containing default factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        config = get_config_from_yaml(yaml_path_override)
        if type_config := config.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectContructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if default_factory := defaults.default_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(default_factory)

    if default_config := defaults.default_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectContructionConfig.model_validate(default_config))

    raise NoDefaultConfigError(f"Could not find default factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> Self

Initializes the class with the provided configuration.

PARAMETER DESCRIPTION
config

A dictionary containing configuration details for the class.

TYPE: dict

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
ValidationError

The metadata_store configuration doesn't follow the expected format.

InvalidConfigError

The metadata_store class can't be found or is not the correct type.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/base.py
@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        ValidationError: The metadata_store configuration doesn't follow the expected format.
        InvalidConfigError: The metadata_store class can't be found or is not the correct type.
    """
    default_options = config.pop("default_options", None)
    options = cls.options_cls(**default_options) if default_options else None

    store_config = config.pop("metadata_store", None)
    store = (
        MetadataStore.subclass_from_config(ObjectContructionConfig.model_validate(store_config))
        if store_config
        else None
    )

    return cls(**config, default_options=options, metadata_store=store)

store abstractmethod async #

store(entries: list[VectorStoreEntry]) -> None

Store entries in the vector store.

PARAMETER DESCRIPTION
entries

The entries to store.

TYPE: list[VectorStoreEntry]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/base.py
@abstractmethod
async def store(self, entries: list[VectorStoreEntry]) -> None:
    """
    Store entries in the vector store.

    Args:
        entries: The entries to store.
    """

retrieve abstractmethod async #

retrieve(vector: list[float], options: VectorStoreOptionsT | None = None) -> list[VectorStoreEntry]

Retrieve entries from the vector store.

PARAMETER DESCRIPTION
vector

The vector to search for.

TYPE: list[float]

options

The options for querying the vector store.

TYPE: VectorStoreOptionsT | None DEFAULT: None

RETURNS DESCRIPTION
list[VectorStoreEntry]

The entries.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/base.py
@abstractmethod
async def retrieve(self, vector: list[float], options: VectorStoreOptionsT | None = None) -> list[VectorStoreEntry]:
    """
    Retrieve entries from the vector store.

    Args:
        vector: The vector to search for.
        options: The options for querying the vector store.

    Returns:
        The entries.
    """

remove abstractmethod async #

remove(ids: list[str]) -> None

Remove entries from the vector store.

PARAMETER DESCRIPTION
ids

The list of entries' IDs to remove.

TYPE: list[str]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/base.py
@abstractmethod
async def remove(self, ids: list[str]) -> None:
    """
    Remove entries from the vector store.

    Args:
        ids: The list of entries' IDs to remove.
    """

list abstractmethod async #

list(where: WhereQuery | None = None, limit: int | None = None, offset: int = 0) -> list[VectorStoreEntry]

List entries from the vector store. The entries can be filtered, limited and offset.

PARAMETER DESCRIPTION
where

The filter dictionary - the keys are the field names and the values are the values to filter by. Not specifying the key means no filtering.

TYPE: WhereQuery | None DEFAULT: None

limit

The maximum number of entries to return.

TYPE: int | None DEFAULT: None

offset

The number of entries to skip.

TYPE: int DEFAULT: 0

RETURNS DESCRIPTION
list[VectorStoreEntry]

The entries.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/base.py
@abstractmethod
async def list(
    self, where: WhereQuery | None = None, limit: int | None = None, offset: int = 0
) -> list[VectorStoreEntry]:
    """
    List entries from the vector store. The entries can be filtered, limited and offset.

    Args:
        where: The filter dictionary - the keys are the field names and the values are the values to filter by.
            Not specifying the key means no filtering.
        limit: The maximum number of entries to return.
        offset: The number of entries to skip.

    Returns:
        The entries.
    """

ragbits.core.vector_stores.in_memory.InMemoryVectorStore #

InMemoryVectorStore(default_options: VectorStoreOptions | None = None, metadata_store: MetadataStore | None = None)

Bases: VectorStore[VectorStoreOptions]

A simple in-memory implementation of Vector Store, storing vectors in memory.

Constructs a new InMemoryVectorStore instance.

PARAMETER DESCRIPTION
default_options

The default options for querying the vector store.

TYPE: VectorStoreOptions | None DEFAULT: None

metadata_store

The metadata store to use.

TYPE: MetadataStore | None DEFAULT: None

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/in_memory.py
def __init__(
    self,
    default_options: VectorStoreOptions | None = None,
    metadata_store: MetadataStore | None = None,
) -> None:
    """
    Constructs a new InMemoryVectorStore instance.

    Args:
        default_options: The default options for querying the vector store.
        metadata_store: The metadata store to use.
    """
    super().__init__(default_options=default_options, metadata_store=metadata_store)
    self._storage: dict[str, VectorStoreEntry] = {}

default_module class-attribute instance-attribute #

default_module: ClassVar = vector_stores

configuration_key class-attribute instance-attribute #

configuration_key: ClassVar = 'vector_store'

default_options instance-attribute #

default_options: OptionsT = default_options or options_cls()

options_cls class-attribute instance-attribute #

options_cls = VectorStoreOptions

subclass_from_config classmethod #

subclass_from_config(config: ObjectContructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectContructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectContructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)
    obj = factory()
    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")
    return obj

subclass_from_defaults classmethod #

subclass_from_defaults(defaults: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at default configuration file, and default factory function. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
defaults

The CoreConfig instance containing default factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_defaults(
    cls, defaults: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at default configuration file, and default factory function.
    Takes optional overrides for both, which takes a higher precedence.

    Args:
        defaults: The CoreConfig instance containing default factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        config = get_config_from_yaml(yaml_path_override)
        if type_config := config.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectContructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if default_factory := defaults.default_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(default_factory)

    if default_config := defaults.default_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectContructionConfig.model_validate(default_config))

    raise NoDefaultConfigError(f"Could not find default factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> InMemoryVectorStore

Creates and returns an instance of the InMemoryVectorStore class from the given configuration.

PARAMETER DESCRIPTION
config

A dictionary containing the configuration for initializing the InMemoryVectorStore instance.

TYPE: dict

RETURNS DESCRIPTION
InMemoryVectorStore

An initialized instance of the InMemoryVectorStore class.

RAISES DESCRIPTION
ValidationError

The metadata_store configuration doesn't follow the expected format.

InvalidConfigError

The metadata_store class can't be found or is not the correct type.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/in_memory.py
@classmethod
def from_config(cls, config: dict) -> "InMemoryVectorStore":
    """
    Creates and returns an instance of the InMemoryVectorStore class from the given configuration.

    Args:
        config: A dictionary containing the configuration for initializing the InMemoryVectorStore instance.

    Returns:
        An initialized instance of the InMemoryVectorStore class.

    Raises:
        ValidationError: The metadata_store configuration doesn't follow the expected format.
        InvalidConfigError: The metadata_store class can't be found or is not the correct type.
    """
    store = (
        MetadataStore.subclass_from_config(ObjectContructionConfig.model_validate(config["metadata_store"]))
        if "metadata_store" in config
        else None
    )
    return cls(
        default_options=VectorStoreOptions(**config.get("default_options", {})),
        metadata_store=store,
    )

store async #

store(entries: list[VectorStoreEntry]) -> None

Store entries in the vector store.

PARAMETER DESCRIPTION
entries

The entries to store.

TYPE: list[VectorStoreEntry]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/in_memory.py
@traceable
async def store(self, entries: list[VectorStoreEntry]) -> None:
    """
    Store entries in the vector store.

    Args:
        entries: The entries to store.
    """
    for entry in entries:
        self._storage[entry.id] = entry

retrieve async #

retrieve(vector: list[float], options: VectorStoreOptions | None = None) -> list[VectorStoreEntry]

Retrieve entries from the vector store.

PARAMETER DESCRIPTION
vector

The vector to search for.

TYPE: list[float]

options

The options for querying the vector store.

TYPE: VectorStoreOptions | None DEFAULT: None

RETURNS DESCRIPTION
list[VectorStoreEntry]

The entries.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/in_memory.py
@traceable
async def retrieve(self, vector: list[float], options: VectorStoreOptions | None = None) -> list[VectorStoreEntry]:
    """
    Retrieve entries from the vector store.

    Args:
        vector: The vector to search for.
        options: The options for querying the vector store.

    Returns:
        The entries.
    """
    merged_options = (self.default_options | options) if options else self.default_options
    entries = sorted(
        (
            (entry, float(np.linalg.norm(np.array(entry.vector) - np.array(vector))))
            for entry in self._storage.values()
        ),
        key=lambda x: x[1],
    )
    return [
        entry
        for entry, distance in entries[: merged_options.k]
        if merged_options.max_distance is None or distance <= merged_options.max_distance
    ]

remove async #

remove(ids: list[str]) -> None

Remove entries from the vector store.

PARAMETER DESCRIPTION
ids

The list of entries' IDs to remove.

TYPE: list[str]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/in_memory.py
@traceable
async def remove(self, ids: list[str]) -> None:
    """
    Remove entries from the vector store.

    Args:
        ids: The list of entries' IDs to remove.
    """
    for id in ids:
        del self._storage[id]

list async #

list(where: WhereQuery | None = None, limit: int | None = None, offset: int = 0) -> list[VectorStoreEntry]

List entries from the vector store. The entries can be filtered, limited and offset.

PARAMETER DESCRIPTION
where

The filter dictionary - the keys are the field names and the values are the values to filter by. Not specifying the key means no filtering.

TYPE: WhereQuery | None DEFAULT: None

limit

The maximum number of entries to return.

TYPE: int | None DEFAULT: None

offset

The number of entries to skip.

TYPE: int DEFAULT: 0

RETURNS DESCRIPTION
list[VectorStoreEntry]

The entries.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/in_memory.py
@traceable
async def list(
    self, where: WhereQuery | None = None, limit: int | None = None, offset: int = 0
) -> list[VectorStoreEntry]:
    """
    List entries from the vector store. The entries can be filtered, limited and offset.

    Args:
        where: The filter dictionary - the keys are the field names and the values are the values to filter by.
            Not specifying the key means no filtering.
        limit: The maximum number of entries to return.
        offset: The number of entries to skip.

    Returns:
        The entries.
    """
    entries = iter(self._storage.values())

    if where:
        entries = (
            entry for entry in entries if all(entry.metadata.get(key) == value for key, value in where.items())
        )

    if offset:
        entries = islice(entries, offset, None)

    if limit:
        entries = islice(entries, limit)

    return list(entries)

ragbits.core.vector_stores.chroma.ChromaVectorStore #

ChromaVectorStore(client: ClientAPI, index_name: str, distance_method: Literal['l2', 'ip', 'cosine'] = 'cosine', default_options: VectorStoreOptions | None = None, metadata_store: MetadataStore | None = None)

Bases: VectorStore[VectorStoreOptions]

Vector store implementation using Chroma.

Constructs a new ChromaVectorStore instance.

PARAMETER DESCRIPTION
client

The ChromaDB client.

TYPE: ClientAPI

index_name

The name of the index.

TYPE: str

distance_method

The distance method to use.

TYPE: Literal['l2', 'ip', 'cosine'] DEFAULT: 'cosine'

default_options

The default options for querying the vector store.

TYPE: VectorStoreOptions | None DEFAULT: None

metadata_store

The metadata store to use. If None, the metadata will be stored in ChromaDB.

TYPE: MetadataStore | None DEFAULT: None

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py
def __init__(
    self,
    client: ClientAPI,
    index_name: str,
    distance_method: Literal["l2", "ip", "cosine"] = "cosine",
    default_options: VectorStoreOptions | None = None,
    metadata_store: MetadataStore | None = None,
) -> None:
    """
    Constructs a new ChromaVectorStore instance.

    Args:
        client: The ChromaDB client.
        index_name: The name of the index.
        distance_method: The distance method to use.
        default_options: The default options for querying the vector store.
        metadata_store: The metadata store to use. If None, the metadata will be stored in ChromaDB.
    """
    super().__init__(default_options=default_options, metadata_store=metadata_store)
    self._client = client
    self._index_name = index_name
    self._distance_method = distance_method
    self._collection = self._client.get_or_create_collection(
        name=self._index_name,
        metadata={"hnsw:space": self._distance_method},
    )

default_module class-attribute instance-attribute #

default_module: ClassVar = vector_stores

configuration_key class-attribute instance-attribute #

configuration_key: ClassVar = 'vector_store'

default_options instance-attribute #

default_options: OptionsT = default_options or options_cls()

options_cls class-attribute instance-attribute #

options_cls = VectorStoreOptions

subclass_from_config classmethod #

subclass_from_config(config: ObjectContructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectContructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectContructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)
    obj = factory()
    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")
    return obj

subclass_from_defaults classmethod #

subclass_from_defaults(defaults: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at default configuration file, and default factory function. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
defaults

The CoreConfig instance containing default factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_defaults(
    cls, defaults: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at default configuration file, and default factory function.
    Takes optional overrides for both, which takes a higher precedence.

    Args:
        defaults: The CoreConfig instance containing default factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        config = get_config_from_yaml(yaml_path_override)
        if type_config := config.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectContructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if default_factory := defaults.default_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(default_factory)

    if default_config := defaults.default_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectContructionConfig.model_validate(default_config))

    raise NoDefaultConfigError(f"Could not find default factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> Self

Initializes the class with the provided configuration.

PARAMETER DESCRIPTION
config

A dictionary containing configuration details for the class.

TYPE: dict

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
ValidationError

The client or metadata_store configuration doesn't follow the expected format.

InvalidConfigError

The client or metadata_store class can't be found or is not the correct type.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py
@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        ValidationError: The client or metadata_store configuration doesn't follow the expected format.
        InvalidConfigError: The client or metadata_store class can't be found or is not the correct type.
    """
    client_options = ObjectContructionConfig.model_validate(config["client"])
    client_cls = import_by_path(client_options.type, chromadb)
    config["client"] = client_cls(**client_options.config)
    return super().from_config(config)

store async #

store(entries: list[VectorStoreEntry]) -> None

Stores entries in the ChromaDB collection.

PARAMETER DESCRIPTION
entries

The entries to store.

TYPE: list[VectorStoreEntry]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py
@traceable
async def store(self, entries: list[VectorStoreEntry]) -> None:
    """
    Stores entries in the ChromaDB collection.

    Args:
        entries: The entries to store.
    """
    if not entries:
        return

    ids = [entry.id for entry in entries]
    documents = [entry.key for entry in entries]
    embeddings = [entry.vector for entry in entries]
    metadatas = [entry.metadata for entry in entries]

    # Flatten metadata
    flattened_metadatas = [self._flatten_metadata(metadata) for metadata in metadatas]

    metadatas = (
        flattened_metadatas
        if self._metadata_store is None
        else await self._metadata_store.store(ids, flattened_metadatas)  # type: ignore
    )

    self._collection.add(ids=ids, embeddings=embeddings, metadatas=metadatas, documents=documents)  # type: ignore

retrieve async #

retrieve(vector: list[float], options: VectorStoreOptions | None = None) -> list[VectorStoreEntry]

Retrieves entries from the ChromaDB collection.

PARAMETER DESCRIPTION
vector

The vector to query.

TYPE: list[float]

options

The options for querying the vector store.

TYPE: VectorStoreOptions | None DEFAULT: None

RETURNS DESCRIPTION
list[VectorStoreEntry]

The retrieved entries.

RAISES DESCRIPTION
MetadataNotFoundError

If the metadata is not found.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py
@traceable
async def retrieve(self, vector: list[float], options: VectorStoreOptions | None = None) -> list[VectorStoreEntry]:
    """
    Retrieves entries from the ChromaDB collection.

    Args:
        vector: The vector to query.
        options: The options for querying the vector store.

    Returns:
        The retrieved entries.

    Raises:
        MetadataNotFoundError: If the metadata is not found.
    """
    merged_options = (self.default_options | options) if options else self.default_options

    results = self._collection.query(
        query_embeddings=vector,
        n_results=merged_options.k,
        include=["metadatas", "embeddings", "distances", "documents"],
    )

    ids = results.get("ids") or []
    embeddings = results.get("embeddings") or []
    distances = results.get("distances") or []
    documents = results.get("documents") or []
    metadatas = [
        [metadata for batch in results.get("metadatas", []) for metadata in batch]  # type: ignore
        if self._metadata_store is None
        else await self._metadata_store.get(*ids)
    ]

    return [
        VectorStoreEntry(
            id=id,
            key=document,
            vector=list(embeddings),
            metadata=unflatten_dict(metadata) if metadata else {},  # type: ignore
        )
        for batch in zip(ids, metadatas, embeddings, distances, documents, strict=True)
        for id, metadata, embeddings, distance, document in zip(*batch, strict=True)
        if merged_options.max_distance is None or distance <= merged_options.max_distance
    ]

remove async #

remove(ids: list[str]) -> None

Remove entries from the vector store.

PARAMETER DESCRIPTION
ids

The list of entries' IDs to remove.

TYPE: list[str]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py
@traceable
async def remove(self, ids: list[str]) -> None:
    """
    Remove entries from the vector store.

    Args:
        ids: The list of entries' IDs to remove.
    """
    self._collection.delete(ids=ids)

list async #

list(where: WhereQuery | None = None, limit: int | None = None, offset: int = 0) -> list[VectorStoreEntry]

List entries from the vector store. The entries can be filtered, limited and offset.

PARAMETER DESCRIPTION
where

The filter dictionary - the keys are the field names and the values are the values to filter by. Not specifying the key means no filtering.

TYPE: WhereQuery | None DEFAULT: None

limit

The maximum number of entries to return.

TYPE: int | None DEFAULT: None

offset

The number of entries to skip.

TYPE: int DEFAULT: 0

RETURNS DESCRIPTION
list[VectorStoreEntry]

The entries.

RAISES DESCRIPTION
MetadataNotFoundError

If the metadata is not found.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py
@traceable
async def list(
    self, where: WhereQuery | None = None, limit: int | None = None, offset: int = 0
) -> list[VectorStoreEntry]:
    """
    List entries from the vector store. The entries can be filtered, limited and offset.

    Args:
        where: The filter dictionary - the keys are the field names and the values are the values to filter by.
            Not specifying the key means no filtering.
        limit: The maximum number of entries to return.
        offset: The number of entries to skip.

    Returns:
        The entries.

    Raises:
        MetadataNotFoundError: If the metadata is not found.
    """
    # Cast `where` to chromadb's Where type
    where_chroma: chromadb.Where | None = dict(where) if where else None

    results = self._collection.get(
        where=where_chroma,
        limit=limit,
        offset=offset,
        include=["metadatas", "embeddings", "documents"],
    )

    ids = results.get("ids") or []
    embeddings = results.get("embeddings") or []
    documents = results.get("documents") or []
    metadatas = (
        results.get("metadatas") or [] if self._metadata_store is None else await self._metadata_store.get(ids)
    )

    return [
        VectorStoreEntry(
            id=id,
            key=document,
            vector=list(embedding),
            metadata=unflatten_dict(metadata) if metadata else {},  # type: ignore
        )
        for id, metadata, embedding, document in zip(ids, metadatas, embeddings, documents, strict=True)
    ]

ragbits.core.vector_stores.qdrant.QdrantVectorStore #

QdrantVectorStore(client: AsyncQdrantClient, index_name: str, distance_method: Distance = Distance.COSINE, default_options: VectorStoreOptions | None = None, metadata_store: MetadataStore | None = None)

Bases: VectorStore[VectorStoreOptions]

Vector store implementation using Qdrant.

Constructs a new QdrantVectorStore instance.

PARAMETER DESCRIPTION
client

An instance of the Qdrant client.

TYPE: AsyncQdrantClient

index_name

The name of the index.

TYPE: str

distance_method

The distance metric to use when creating the collection.

TYPE: Distance DEFAULT: COSINE

default_options

The default options for querying the vector store.

TYPE: VectorStoreOptions | None DEFAULT: None

metadata_store

The metadata store to use. If None, the metadata will be stored in Qdrant.

TYPE: MetadataStore | None DEFAULT: None

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py
def __init__(
    self,
    client: AsyncQdrantClient,
    index_name: str,
    distance_method: Distance = Distance.COSINE,
    default_options: VectorStoreOptions | None = None,
    metadata_store: MetadataStore | None = None,
) -> None:
    """
    Constructs a new QdrantVectorStore instance.

    Args:
        client: An instance of the Qdrant client.
        index_name: The name of the index.
        distance_method: The distance metric to use when creating the collection.
        default_options: The default options for querying the vector store.
        metadata_store: The metadata store to use. If None, the metadata will be stored in Qdrant.
    """
    super().__init__(default_options=default_options, metadata_store=metadata_store)
    self._client = client
    self._index_name = index_name
    self._distance_method = distance_method

default_module class-attribute instance-attribute #

default_module: ClassVar = vector_stores

configuration_key class-attribute instance-attribute #

configuration_key: ClassVar = 'vector_store'

default_options instance-attribute #

default_options: OptionsT = default_options or options_cls()

options_cls class-attribute instance-attribute #

options_cls = VectorStoreOptions

subclass_from_config classmethod #

subclass_from_config(config: ObjectContructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectContructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectContructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)
    obj = factory()
    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")
    return obj

subclass_from_defaults classmethod #

subclass_from_defaults(defaults: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at default configuration file, and default factory function. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
defaults

The CoreConfig instance containing default factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_defaults(
    cls, defaults: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at default configuration file, and default factory function.
    Takes optional overrides for both, which takes a higher precedence.

    Args:
        defaults: The CoreConfig instance containing default factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        config = get_config_from_yaml(yaml_path_override)
        if type_config := config.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectContructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if default_factory := defaults.default_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(default_factory)

    if default_config := defaults.default_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectContructionConfig.model_validate(default_config))

    raise NoDefaultConfigError(f"Could not find default factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> Self

Initializes the class with the provided configuration.

PARAMETER DESCRIPTION
config

A dictionary containing configuration details for the class.

TYPE: dict

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
ValidationError

The client or metadata_store configuration doesn't follow the expected format.

InvalidConfigError

The client or metadata_store class can't be found or is not the correct type.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py
@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        ValidationError: The client or metadata_store configuration doesn't follow the expected format.
        InvalidConfigError: The client or metadata_store class can't be found or is not the correct type.
    """
    client_options = ObjectContructionConfig.model_validate(config["client"])
    client_cls = import_by_path(client_options.type, qdrant_client)
    config["client"] = client_cls(**client_options.config)
    return super().from_config(config)

store async #

store(entries: list[VectorStoreEntry]) -> None

Stores vector entries in the Qdrant collection.

PARAMETER DESCRIPTION
entries

List of VectorStoreEntry objects to store

TYPE: list[VectorStoreEntry]

RAISES DESCRIPTION
QdrantException

If upload to collection fails.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py
@traceable
async def store(self, entries: list[VectorStoreEntry]) -> None:
    """
    Stores vector entries in the Qdrant collection.

    Args:
        entries: List of VectorStoreEntry objects to store

    Raises:
        QdrantException: If upload to collection fails.
    """
    if not entries:
        return

    if not await self._client.collection_exists(self._index_name):
        await self._client.create_collection(
            collection_name=self._index_name,
            vectors_config=VectorParams(size=len(entries[0].vector), distance=self._distance_method),
        )

    ids = [entry.id for entry in entries]
    embeddings = [entry.vector for entry in entries]
    payloads = [{"document": entry.key} for entry in entries]
    metadatas = [entry.metadata for entry in entries]

    metadatas = (
        [{"metadata": json.dumps(metadata, default=str)} for metadata in metadatas]
        if self._metadata_store is None
        else await self._metadata_store.store(ids, metadatas)  # type: ignore
    )
    if metadatas is not None:
        payloads = [{**payload, **metadata} for (payload, metadata) in zip(payloads, metadatas, strict=True)]

    self._client.upload_collection(
        collection_name=self._index_name,
        vectors=embeddings,
        payload=payloads,
        ids=ids,
        wait=True,
    )

retrieve async #

retrieve(vector: list[float], options: VectorStoreOptions | None = None) -> list[VectorStoreEntry]

Retrieves entries from the Qdrant collection based on vector similarity.

PARAMETER DESCRIPTION
vector

The vector to query.

TYPE: list[float]

options

The options for querying the vector store.

TYPE: VectorStoreOptions | None DEFAULT: None

RETURNS DESCRIPTION
list[VectorStoreEntry]

The retrieved entries.

RAISES DESCRIPTION
MetadataNotFoundError

If metadata cannot be retrieved

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py
@traceable
async def retrieve(self, vector: list[float], options: VectorStoreOptions | None = None) -> list[VectorStoreEntry]:
    """
    Retrieves entries from the Qdrant collection based on vector similarity.

    Args:
        vector: The vector to query.
        options: The options for querying the vector store.

    Returns:
        The retrieved entries.

    Raises:
        MetadataNotFoundError: If metadata cannot be retrieved
    """
    merged_options = (self.default_options | options) if options else self.default_options
    score_threshold = 1 - merged_options.max_distance if merged_options.max_distance else None

    results = await self._client.query_points(
        collection_name=self._index_name,
        query=vector,
        limit=merged_options.k,
        score_threshold=score_threshold,
        with_payload=True,
        with_vectors=True,
    )

    ids = [point.id for point in results.points]
    vectors = [point.vector for point in results.points]
    documents = [point.payload["document"] for point in results.points]  # type: ignore
    metadatas = (
        [json.loads(point.payload["metadata"]) for point in results.points]  # type: ignore
        if self._metadata_store is None
        else await self._metadata_store.get(ids)  # type: ignore
    )

    return [
        VectorStoreEntry(
            id=str(id),
            key=document,
            vector=vector,  # type: ignore
            metadata=metadata,
        )
        for id, document, vector, metadata in zip(ids, documents, vectors, metadatas, strict=True)
    ]

remove async #

remove(ids: list[str]) -> None

Remove entries from the vector store.

PARAMETER DESCRIPTION
ids

The list of entries' IDs to remove.

TYPE: list[str]

RAISES DESCRIPTION
ValueError

If collection named self._index_name is not present in the vector store.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py
@traceable
async def remove(self, ids: list[str]) -> None:
    """
    Remove entries from the vector store.

    Args:
        ids: The list of entries' IDs to remove.

    Raises:
        ValueError: If collection named `self._index_name` is not present in the vector store.
    """
    await self._client.delete(
        collection_name=self._index_name,
        points_selector=models.PointIdsList(
            points=typing.cast(list[int | str], ids),
        ),
    )

list async #

list(where: Filter | None = None, limit: int | None = None, offset: int = 0) -> list[VectorStoreEntry]

List entries from the vector store. The entries can be filtered, limited and offset.

PARAMETER DESCRIPTION
where

Conditions for filtering results. Reference: https://qdrant.tech/documentation/concepts/filtering

TYPE: Filter | None DEFAULT: None

limit

The maximum number of entries to return.

TYPE: int | None DEFAULT: None

offset

The number of entries to skip.

TYPE: int DEFAULT: 0

RETURNS DESCRIPTION
list[VectorStoreEntry]

The entries.

RAISES DESCRIPTION
MetadataNotFoundError

If the metadata is not found.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py
@traceable
async def list(  # type: ignore
    self,
    where: Filter | None = None,  # type: ignore
    limit: int | None = None,
    offset: int = 0,
) -> list[VectorStoreEntry]:
    """
    List entries from the vector store. The entries can be filtered, limited and offset.

    Args:
        where: Conditions for filtering results.
            Reference: https://qdrant.tech/documentation/concepts/filtering
        limit: The maximum number of entries to return.
        offset: The number of entries to skip.

    Returns:
        The entries.

    Raises:
        MetadataNotFoundError: If the metadata is not found.
    """
    collection_exists = await self._client.collection_exists(collection_name=self._index_name)
    if not collection_exists:
        return []

    limit = limit or (await self._client.count(collection_name=self._index_name)).count

    results = await self._client.query_points(
        collection_name=self._index_name,
        query_filter=where,
        limit=limit,
        offset=offset,
        with_payload=True,
        with_vectors=True,
    )

    ids = [point.id for point in results.points]
    vectors = [point.vector for point in results.points]
    documents = [point.payload["document"] for point in results.points]  # type: ignore
    metadatas = (
        [json.loads(point.payload["metadata"]) for point in results.points]  # type: ignore
        if self._metadata_store is None
        else await self._metadata_store.get(ids)  # type: ignore
    )

    return [
        VectorStoreEntry(
            id=str(id),
            key=document,
            vector=vector,  # type: ignore
            metadata=metadata,
        )
        for id, document, vector, metadata in zip(ids, documents, vectors, metadatas, strict=True)
    ]