Skip to content

Vector Stores#

ragbits.core.vector_stores.base.VectorStoreEntry #

Bases: BaseModel

An object representing a vector database entry. Contains text and/or image for embedding + metadata.

id instance-attribute #

id: UUID

text class-attribute instance-attribute #

text: str | None = None

image_bytes class-attribute instance-attribute #

image_bytes: SerializableBytes | None = None

metadata class-attribute instance-attribute #

metadata: dict = {}

text_or_image_required #

text_or_image_required() -> Self

Validates that either text or image_bytes are provided.

RAISES DESCRIPTION
ValueError

If neither text nor image_bytes are provided.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/base.py
@pydantic.model_validator(mode="after")
def text_or_image_required(self) -> Self:
    """
    Validates that either text or image_bytes are provided.

    Raises:
        ValueError: If neither text nor image_bytes are provided.
    """
    if not self.text and not self.image_bytes:
        raise ValueError("Either text or image_bytes must be provided.")
    return self

ragbits.core.vector_stores.base.VectorStoreOptions #

Bases: Options

An object representing the options for the vector store.

model_config class-attribute instance-attribute #

model_config = ConfigDict(extra='allow', arbitrary_types_allowed=True)

k class-attribute instance-attribute #

k: int = 5

max_distance class-attribute instance-attribute #

max_distance: float | None = None

dict #

dict() -> dict[str, Any]

Creates a dictionary representation of the Options instance. If a value is None, it will be replaced with a provider-specific not-given sentinel.

RETURNS DESCRIPTION
dict[str, Any]

A dictionary representation of the Options instance.

Source code in packages/ragbits-core/src/ragbits/core/options.py
def dict(self) -> dict[str, Any]:  # type: ignore # mypy complains about overriding BaseModel.dict
    """
    Creates a dictionary representation of the Options instance.
    If a value is None, it will be replaced with a provider-specific not-given sentinel.

    Returns:
        A dictionary representation of the Options instance.
    """
    options = self.model_dump()

    return {
        key: self._not_given if value is None or isinstance(value, NotGiven) else value
        for key, value in options.items()
    }

ragbits.core.vector_stores.base.VectorStore #

VectorStore(default_options: OptionsT | None = None)

Bases: ConfigurableComponent[VectorStoreOptionsT], ABC

A class with an implementation of Vector Store, allowing to store and retrieve vectors by similarity function.

Constructs a new ConfigurableComponent instance.

PARAMETER DESCRIPTION
default_options

The default options for the component.

TYPE: OptionsT | None DEFAULT: None

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
def __init__(self, default_options: OptionsT | None = None) -> None:
    """
    Constructs a new ConfigurableComponent instance.

    Args:
        default_options: The default options for the component.
    """
    self.default_options: OptionsT = default_options or self.options_cls()

default_options instance-attribute #

default_options: OptionsT = default_options or options_cls()

options_cls instance-attribute #

options_cls: type[VectorStoreOptionsT]

default_module class-attribute instance-attribute #

default_module: ClassVar = vector_stores

configuration_key class-attribute instance-attribute #

configuration_key: ClassVar = 'vector_store'

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)
    obj = factory()
    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")
    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict[str, Any]) -> Self

Initializes the class with the provided configuration.

PARAMETER DESCRIPTION
config

A dictionary containing configuration details for the class.

TYPE: dict[str, Any]

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def from_config(cls, config: dict[str, Any]) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    default_options = config.pop("default_options", None)
    options = cls.options_cls(**default_options) if default_options else None
    return cls(**config, default_options=options)

store abstractmethod async #

store(entries: list[VectorStoreEntry]) -> None

Store entries in the vector store.

PARAMETER DESCRIPTION
entries

The entries to store.

TYPE: list[VectorStoreEntry]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/base.py
@abstractmethod
async def store(self, entries: list[VectorStoreEntry]) -> None:
    """
    Store entries in the vector store.

    Args:
        entries: The entries to store.
    """

retrieve abstractmethod async #

retrieve(text: str, options: VectorStoreOptionsT | None = None) -> list[VectorStoreResult]

Retrieve entries from the vector store most similar to the provided text.

PARAMETER DESCRIPTION
text

The text to query the vector store with.

TYPE: str

options

The options for querying the vector store.

TYPE: VectorStoreOptionsT | None DEFAULT: None

RETURNS DESCRIPTION
list[VectorStoreResult]

The entries.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/base.py
@abstractmethod
async def retrieve(
    self,
    text: str,
    options: VectorStoreOptionsT | None = None,
) -> list[VectorStoreResult]:
    """
    Retrieve entries from the vector store most similar to the provided text.

    Args:
        text: The text to query the vector store with.
        options: The options for querying the vector store.

    Returns:
        The entries.
    """

remove abstractmethod async #

remove(ids: list[UUID]) -> None

Remove entries from the vector store.

PARAMETER DESCRIPTION
ids

The list of entries' IDs to remove.

TYPE: list[UUID]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/base.py
@abstractmethod
async def remove(self, ids: list[UUID]) -> None:
    """
    Remove entries from the vector store.

    Args:
        ids: The list of entries' IDs to remove.
    """

list abstractmethod async #

list(where: WhereQuery | None = None, limit: int | None = None, offset: int = 0) -> list[VectorStoreEntry]

List entries from the vector store. The entries can be filtered, limited and offset.

PARAMETER DESCRIPTION
where

The filter dictionary - the keys are the field names and the values are the values to filter by. Not specifying the key means no filtering.

TYPE: WhereQuery | None DEFAULT: None

limit

The maximum number of entries to return.

TYPE: int | None DEFAULT: None

offset

The number of entries to skip.

TYPE: int DEFAULT: 0

RETURNS DESCRIPTION
list[VectorStoreEntry]

The entries.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/base.py
@abstractmethod
async def list(
    self, where: WhereQuery | None = None, limit: int | None = None, offset: int = 0
) -> list[VectorStoreEntry]:
    """
    List entries from the vector store. The entries can be filtered, limited and offset.

    Args:
        where: The filter dictionary - the keys are the field names and the values are the values to filter by.
            Not specifying the key means no filtering.
        limit: The maximum number of entries to return.
        offset: The number of entries to skip.

    Returns:
        The entries.
    """

ragbits.core.vector_stores.hybrid.HybridSearchVectorStore #

HybridSearchVectorStore(*vector_stores: VectorStore, retrieval_strategy: HybridRetrivalStrategy | None = None)

Bases: VectorStore

A vector store that takes multiple vector store objects and proxies requests to them, returning the union of results.

Constructs a new HybridSearchVectorStore instance.

PARAMETER DESCRIPTION
vector_stores

The vector stores to proxy requests to.

TYPE: VectorStore DEFAULT: ()

retrieval_strategy

The retrieval strategy to use when combining results, uses OrderedHybridRetrivalStrategy by default.

TYPE: HybridRetrivalStrategy | None DEFAULT: None

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/hybrid.py
def __init__(self, *vector_stores: VectorStore, retrieval_strategy: HybridRetrivalStrategy | None = None) -> None:
    """
    Constructs a new HybridSearchVectorStore instance.

    Args:
        vector_stores: The vector stores to proxy requests to.
        retrieval_strategy: The retrieval strategy to use when combining results,
            uses OrderedHybridRetrivalStrategy by default.
    """
    self.vector_stores = vector_stores
    self.retrieval_strategy = retrieval_strategy or OrderedHybridRetrivalStrategy()

default_module class-attribute instance-attribute #

default_module: ClassVar = vector_stores

configuration_key class-attribute instance-attribute #

configuration_key: ClassVar = 'vector_store'

default_options instance-attribute #

default_options: OptionsT = default_options or options_cls()

options_cls class-attribute instance-attribute #

options_cls = VectorStoreOptions

vector_stores instance-attribute #

vector_stores = vector_stores

retrieval_strategy instance-attribute #

retrieval_strategy = retrieval_strategy or OrderedHybridRetrivalStrategy()

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)
    obj = factory()
    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")
    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict[str, Any]) -> Self

Initializes the class with the provided configuration.

PARAMETER DESCRIPTION
config

A dictionary containing configuration details for the class.

TYPE: dict[str, Any]

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def from_config(cls, config: dict[str, Any]) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    default_options = config.pop("default_options", None)
    options = cls.options_cls(**default_options) if default_options else None
    return cls(**config, default_options=options)

store async #

store(entries: list[VectorStoreEntry]) -> None

Store entries in the vector stores.

Sends entries to all vector stores to be stored, although individual vector stores are free to implement their own logic regarding which entries to store. For example, some vector stores may only store entries with specific type of content (images, text, etc.).

PARAMETER DESCRIPTION
entries

The entries to store.

TYPE: list[VectorStoreEntry]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/hybrid.py
@traceable
async def store(self, entries: list[VectorStoreEntry]) -> None:
    """
    Store entries in the vector stores.

    Sends entries to all vector stores to be stored, although individual vector stores are free to implement
    their own logic regarding which entries to store. For example, some vector stores may only store entries
    with specific type of content (images, text, etc.).

    Args:
        entries: The entries to store.
    """
    store_tasks = (vector_store.store(entries) for vector_store in self.vector_stores)
    await asyncio.gather(*store_tasks)

retrieve async #

retrieve(text: str, options: VectorStoreOptions | None = None) -> list[VectorStoreResult]

Retrieve entries from the vector stores most similar to the provided text. The results are combined using the retrieval strategy provided in the constructor.

PARAMETER DESCRIPTION
text

The text to query the vector store with.

TYPE: str

options

The options for querying the vector stores.

TYPE: VectorStoreOptions | None DEFAULT: None

RETURNS DESCRIPTION
list[VectorStoreResult]

The entries.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/hybrid.py
@traceable
async def retrieve(
    self,
    text: str,
    options: VectorStoreOptions | None = None,
) -> list[VectorStoreResult]:
    """
    Retrieve entries from the vector stores most similar to the provided text. The results are combined using
    the retrieval strategy provided in the constructor.

    Args:
        text: The text to query the vector store with.
        options: The options for querying the vector stores.

    Returns:
        The entries.
    """
    retrieve_tasks = (vector_store.retrieve(text, options) for vector_store in self.vector_stores)
    results = await asyncio.gather(*retrieve_tasks)

    return self.retrieval_strategy.join(results)

remove async #

remove(ids: list[UUID]) -> None

Remove entries from all vector stores.

PARAMETER DESCRIPTION
ids

The list of entries' IDs to remove.

TYPE: list[UUID]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/hybrid.py
@traceable
async def remove(self, ids: list[UUID]) -> None:
    """
    Remove entries from all vector stores.

    Args:
        ids: The list of entries' IDs to remove.
    """
    remove_tasks = (vector_store.remove(ids) for vector_store in self.vector_stores)
    await asyncio.gather(*remove_tasks)

list async #

list(where: WhereQuery | None = None, limit: int | None = None, offset: int = 0) -> list[VectorStoreEntry]

List entries from the vector stores. The entries can be filtered, limited and offset. Vector stores are queried in the order they were provided in the constructor.

PARAMETER DESCRIPTION
where

The filter dictionary - the keys are the field names and the values are the values to filter by. Not specifying the key means no filtering.

TYPE: WhereQuery | None DEFAULT: None

limit

The maximum number of entries to return.

TYPE: int | None DEFAULT: None

offset

The number of entries to skip.

TYPE: int DEFAULT: 0

RETURNS DESCRIPTION
list[VectorStoreEntry]

The entries.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/hybrid.py
@traceable
async def list(
    self, where: WhereQuery | None = None, limit: int | None = None, offset: int = 0
) -> list[VectorStoreEntry]:
    """
    List entries from the vector stores. The entries can be filtered, limited and offset.
    Vector stores are queried in the order they were provided in the constructor.

    Args:
        where: The filter dictionary - the keys are the field names and the values are the values to filter by.
            Not specifying the key means no filtering.
        limit: The maximum number of entries to return.
        offset: The number of entries to skip.

    Returns:
        The entries.
    """
    retrieved_results: dict[UUID, VectorStoreEntry] = {}
    for vector_store in self.vector_stores:
        if limit is not None and (offset + limit - len(retrieved_results)) <= 0:
            break

        store_results = await vector_store.list(where)
        retrieved_results.update({entry.id: entry for entry in store_results})

    results = list(retrieved_results.values())
    results = results[offset:] if limit is None else results[offset : offset + limit]

    return results

ragbits.core.vector_stores.in_memory.InMemoryVectorStore #

InMemoryVectorStore(embedder: Embedder, embedding_type: EmbeddingType = EmbeddingType.TEXT, default_options: VectorStoreOptions | None = None)

Bases: VectorStoreWithExternalEmbedder[VectorStoreOptions]

A simple in-memory implementation of Vector Store, storing vectors in memory.

Constructs a new InMemoryVectorStore instance.

PARAMETER DESCRIPTION
default_options

The default options for querying the vector store.

TYPE: VectorStoreOptions | None DEFAULT: None

embedder

The embedder to use for converting entries to vectors.

TYPE: Embedder

embedding_type

Which part of the entry to embed, either text or image. The other part will be ignored.

TYPE: EmbeddingType DEFAULT: TEXT

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/in_memory.py
def __init__(
    self,
    embedder: Embedder,
    embedding_type: EmbeddingType = EmbeddingType.TEXT,
    default_options: VectorStoreOptions | None = None,
) -> None:
    """
    Constructs a new InMemoryVectorStore instance.

    Args:
        default_options: The default options for querying the vector store.
        embedder: The embedder to use for converting entries to vectors.
        embedding_type: Which part of the entry to embed, either text or image. The other part will be ignored.
    """
    super().__init__(
        default_options=default_options,
        embedder=embedder,
        embedding_type=embedding_type,
    )
    self._entries: dict[UUID, VectorStoreEntry] = {}
    self._embeddings: dict[UUID, list[float]] = {}

default_module class-attribute instance-attribute #

default_module: ClassVar = vector_stores

configuration_key class-attribute instance-attribute #

configuration_key: ClassVar = 'vector_store'

default_options instance-attribute #

default_options: OptionsT = default_options or options_cls()

options_cls class-attribute instance-attribute #

options_cls = VectorStoreOptions

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)
    obj = factory()
    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")
    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> Self

Initializes the class with the provided configuration.

PARAMETER DESCRIPTION
config

A dictionary containing configuration details for the class.

TYPE: dict

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/base.py
@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    default_options = config.pop("default_options", None)
    options = cls.options_cls(**default_options) if default_options else None

    embedder_config = config.pop("embedder")
    embedder: Embedder = Embedder.subclass_from_config(ObjectConstructionConfig.model_validate(embedder_config))

    return cls(**config, default_options=options, embedder=embedder)

store async #

store(entries: list[VectorStoreEntry]) -> None

Store entries in the vector store.

PARAMETER DESCRIPTION
entries

The entries to store.

TYPE: list[VectorStoreEntry]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/in_memory.py
async def store(self, entries: list[VectorStoreEntry]) -> None:
    """
    Store entries in the vector store.

    Args:
        entries: The entries to store.
    """
    with trace(
        entries=entries,
        embedder=repr(self._embedder),
        embedding_type=self._embedding_type,
    ) as outputs:
        embeddings = await self._create_embeddings(entries)
        self._embeddings.update(embeddings)
        self._entries.update({entry.id: entry for entry in entries if entry.id in embeddings})
        outputs.embeddings = self._embeddings
        outputs.entries = self._entries

retrieve async #

retrieve(text: str, options: VectorStoreOptions | None = None) -> list[VectorStoreResult]

Retrieve entries from the vector store most similar to the provided text.

PARAMETER DESCRIPTION
text

The text to query the vector store with.

TYPE: str

options

The options for querying the vector store.

TYPE: VectorStoreOptions | None DEFAULT: None

RETURNS DESCRIPTION
list[VectorStoreResult]

The entries.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/in_memory.py
async def retrieve(
    self,
    text: str,
    options: VectorStoreOptions | None = None,
) -> list[VectorStoreResult]:
    """
    Retrieve entries from the vector store most similar to the provided text.

    Args:
        text: The text to query the vector store with.
        options: The options for querying the vector store.

    Returns:
        The entries.
    """
    merged_options = (self.default_options | options) if options else self.default_options
    with trace(
        text=text,
        options=merged_options.dict(),
        embedder=repr(self._embedder),
        embedding_type=self._embedding_type,
    ) as outputs:
        query_vector = await self._embedder.embed_text([text])
        results: list[VectorStoreResult] = []

        for entry_id, vector in self._embeddings.items():
            distance = float(np.linalg.norm(np.array(vector) - np.array(query_vector)))
            result = VectorStoreResult(entry=self._entries[entry_id], vector=vector, score=distance)
            if merged_options.max_distance is None or result.score <= merged_options.max_distance:
                results.append(result)

        outputs.results = sorted(results, key=lambda r: r.score)[: merged_options.k]
        return outputs.results

remove async #

remove(ids: list[UUID]) -> None

Remove entries from the vector store.

PARAMETER DESCRIPTION
ids

The list of entries' IDs to remove.

TYPE: list[UUID]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/in_memory.py
@traceable
async def remove(self, ids: list[UUID]) -> None:
    """
    Remove entries from the vector store.

    Args:
        ids: The list of entries' IDs to remove.
    """
    for id in ids:
        del self._entries[id]
        del self._embeddings[id]

list async #

list(where: WhereQuery | None = None, limit: int | None = None, offset: int = 0) -> list[VectorStoreEntry]

List entries from the vector store. The entries can be filtered, limited and offset.

PARAMETER DESCRIPTION
where

The filter dictionary - the keys are the field names and the values are the values to filter by. Not specifying the key means no filtering.

TYPE: WhereQuery | None DEFAULT: None

limit

The maximum number of entries to return.

TYPE: int | None DEFAULT: None

offset

The number of entries to skip.

TYPE: int DEFAULT: 0

RETURNS DESCRIPTION
list[VectorStoreEntry]

The entries.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/in_memory.py
@traceable
async def list(
    self, where: WhereQuery | None = None, limit: int | None = None, offset: int = 0
) -> list[VectorStoreEntry]:
    """
    List entries from the vector store. The entries can be filtered, limited and offset.

    Args:
        where: The filter dictionary - the keys are the field names and the values are the values to filter by.
            Not specifying the key means no filtering.
        limit: The maximum number of entries to return.
        offset: The number of entries to skip.

    Returns:
        The entries.
    """
    entries = iter(self._entries.values())

    entries = (entry for entry in entries if entry.id in self._embeddings)

    if where:
        entries = (
            entry for entry in entries if all(entry.metadata.get(key) == value for key, value in where.items())
        )

    if offset:
        entries = islice(entries, offset, None)

    if limit:
        entries = islice(entries, limit)

    return list(entries)

ragbits.core.vector_stores.chroma.ChromaVectorStore #

ChromaVectorStore(client: ClientAPI, index_name: str, embedder: Embedder, embedding_type: EmbeddingType = EmbeddingType.TEXT, distance_method: Literal['l2', 'ip', 'cosine'] = 'cosine', default_options: VectorStoreOptions | None = None)

Bases: VectorStoreWithExternalEmbedder[VectorStoreOptions]

Vector store implementation using Chroma.

Constructs a new ChromaVectorStore instance.

PARAMETER DESCRIPTION
client

The ChromaDB client.

TYPE: ClientAPI

index_name

The name of the index.

TYPE: str

embedder

The embedder to use for converting entries to vectors.

TYPE: Embedder

embedding_type

Which part of the entry to embed, either text or image. The other part will be ignored.

TYPE: EmbeddingType DEFAULT: TEXT

distance_method

The distance method to use.

TYPE: Literal['l2', 'ip', 'cosine'] DEFAULT: 'cosine'

default_options

The default options for querying the vector store.

TYPE: VectorStoreOptions | None DEFAULT: None

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py
def __init__(
    self,
    client: ClientAPI,
    index_name: str,
    embedder: Embedder,
    embedding_type: EmbeddingType = EmbeddingType.TEXT,
    distance_method: Literal["l2", "ip", "cosine"] = "cosine",
    default_options: VectorStoreOptions | None = None,
) -> None:
    """
    Constructs a new ChromaVectorStore instance.

    Args:
        client: The ChromaDB client.
        index_name: The name of the index.
        embedder: The embedder to use for converting entries to vectors.
        embedding_type: Which part of the entry to embed, either text or image. The other part will be ignored.
        distance_method: The distance method to use.
        default_options: The default options for querying the vector store.
    """
    super().__init__(
        default_options=default_options,
        embedder=embedder,
        embedding_type=embedding_type,
    )
    self._client = client
    self._index_name = index_name
    self._distance_method = distance_method
    self._collection = self._client.get_or_create_collection(
        name=self._index_name,
        metadata={"hnsw:space": self._distance_method},
    )

default_module class-attribute instance-attribute #

default_module: ClassVar = vector_stores

configuration_key class-attribute instance-attribute #

configuration_key: ClassVar = 'vector_store'

default_options instance-attribute #

default_options: OptionsT = default_options or options_cls()

options_cls class-attribute instance-attribute #

options_cls = VectorStoreOptions

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)
    obj = factory()
    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")
    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> Self

Initializes the class with the provided configuration.

PARAMETER DESCRIPTION
config

A dictionary containing configuration details for the class.

TYPE: dict

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py
@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    client_options = ObjectConstructionConfig.model_validate(config["client"])
    client_cls = import_by_path(client_options.type, chromadb)
    config["client"] = client_cls(**client_options.config)
    return super().from_config(config)

store async #

store(entries: list[VectorStoreEntry]) -> None

Stores entries in the ChromaDB collection.

In case entry contains both text and image embeddings, only one of them will get embedded - text by default, unless the option prefer_image is set to True.

PARAMETER DESCRIPTION
entries

The entries to store.

TYPE: list[VectorStoreEntry]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py
async def store(self, entries: list[VectorStoreEntry]) -> None:
    """
    Stores entries in the ChromaDB collection.

    In case entry contains both text and image embeddings,
    only one of them will get embedded - text by default, unless
    the option `prefer_image` is set to True.

    Args:
        entries: The entries to store.
    """
    with trace(
        entries=entries,
        index_name=self._index_name,
        collection=self._collection,
        distance_method=self._distance_method,
        embedder=repr(self._embedder),
        embedding_type=self._embedding_type,
    ):
        if not entries:
            return

        ids = []
        documents = []
        metadatas: list[Mapping] = []
        embeddings: list[Sequence[float]] = []

        raw_embeddings = await self._create_embeddings(entries)
        for entry in entries:
            if not raw_embeddings.get(entry.id):
                continue

            embeddings.append(raw_embeddings[entry.id])
            ids.append(str(entry.id))
            documents.append(entry.text or "")
            metadatas.append(
                self._flatten_metadata(
                    {
                        **entry.metadata,
                        **{
                            "__image": entry.image_bytes.hex() if entry.image_bytes else None,
                        },
                    }
                )
            )

        self._collection.add(
            ids=ids,
            embeddings=embeddings,
            metadatas=metadatas,
            documents=documents,
        )

retrieve async #

retrieve(text: str, options: VectorStoreOptions | None = None) -> list[VectorStoreResult]

Retrieves entries from the ChromaDB collection.

PARAMETER DESCRIPTION
text

The text to query the vector store with.

TYPE: str

options

The options for querying the vector store.

TYPE: VectorStoreOptions | None DEFAULT: None

RETURNS DESCRIPTION
list[VectorStoreResult]

The retrieved entries.

RAISES DESCRIPTION
MetadataNotFoundError

If the metadata is not found.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py
async def retrieve(
    self,
    text: str,
    options: VectorStoreOptions | None = None,
) -> list[VectorStoreResult]:
    """
    Retrieves entries from the ChromaDB collection.

    Args:
        text: The text to query the vector store with.
        options: The options for querying the vector store.

    Returns:
        The retrieved entries.

    Raises:
        MetadataNotFoundError: If the metadata is not found.
    """
    merged_options = (self.default_options | options) if options else self.default_options
    with trace(
        text=text,
        options=merged_options.dict(),
        index_name=self._index_name,
        collection=self._collection,
        distance_method=self._distance_method,
        embedder=repr(self._embedder),
        embedding_type=self._embedding_type,
    ) as outputs:
        query_vector = (await self._embedder.embed_text([text]))[0]

        results = self._collection.query(
            query_embeddings=query_vector,
            n_results=merged_options.k,
            include=[
                types.IncludeEnum.metadatas,
                types.IncludeEnum.embeddings,
                types.IncludeEnum.distances,
                types.IncludeEnum.documents,
            ],
        )

        ids = [id for batch in results.get("ids", []) for id in batch]
        distances = [distance for batch in results.get("distances") or [] for distance in batch]
        documents = [document for batch in results.get("documents") or [] for document in batch]
        embeddings = [embedding for batch in results.get("embeddings") or [] for embedding in batch]

        metadatas: Sequence = [dict(metadata) for batch in results.get("metadatas") or [] for metadata in batch]

        # Remove the `# type: ignore` comment when https://github.com/deepsense-ai/ragbits/pull/379/files resolved
        unflattened_metadatas: list[dict] = [unflatten_dict(metadata) if metadata else {} for metadata in metadatas]  # type: ignore[misc]

        images: list[bytes | None] = [metadata.pop("__image", None) for metadata in unflattened_metadatas]

        outputs.results = [
            VectorStoreResult(
                score=distance,
                vector=vector,
                entry=VectorStoreEntry(
                    id=id,
                    text=document,
                    image_bytes=image,
                    metadata=metadata,
                ),
            )
            for id, metadata, distance, document, image, vector in zip(
                ids, unflattened_metadatas, distances, documents, images, embeddings, strict=True
            )
            if merged_options.max_distance is None or distance <= merged_options.max_distance
        ]

        return outputs.results

remove async #

remove(ids: list[UUID]) -> None

Remove entries from the vector store.

PARAMETER DESCRIPTION
ids

The list of entries' IDs to remove.

TYPE: list[UUID]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py
async def remove(self, ids: list[UUID]) -> None:
    """
    Remove entries from the vector store.

    Args:
        ids: The list of entries' IDs to remove.
    """
    with trace(ids=ids, collection=self._collection, index_name=self._index_name):
        self._collection.delete(ids=[str(id) for id in ids])

list async #

list(where: WhereQuery | None = None, limit: int | None = None, offset: int = 0) -> list[VectorStoreEntry]

List entries from the vector store. The entries can be filtered, limited and offset.

PARAMETER DESCRIPTION
where

The filter dictionary - the keys are the field names and the values are the values to filter by. Not specifying the key means no filtering.

TYPE: WhereQuery | None DEFAULT: None

limit

The maximum number of entries to return.

TYPE: int | None DEFAULT: None

offset

The number of entries to skip.

TYPE: int DEFAULT: 0

RETURNS DESCRIPTION
list[VectorStoreEntry]

The entries.

RAISES DESCRIPTION
MetadataNotFoundError

If the metadata is not found.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/chroma.py
async def list(
    self, where: WhereQuery | None = None, limit: int | None = None, offset: int = 0
) -> list[VectorStoreEntry]:
    """
    List entries from the vector store. The entries can be filtered, limited and offset.

    Args:
        where: The filter dictionary - the keys are the field names and the values are the values to filter by.
            Not specifying the key means no filtering.
        limit: The maximum number of entries to return.
        offset: The number of entries to skip.

    Returns:
        The entries.

    Raises:
        MetadataNotFoundError: If the metadata is not found.
    """
    with trace(
        where=where, collection=self._collection, index_name=self._index_name, limit=limit, offset=offset
    ) as outputs:
        # Cast `where` to chromadb's Where type
        where_chroma: chromadb.Where | None = dict(where) if where else None

        results = self._collection.get(
            where=where_chroma,
            limit=limit,
            offset=offset,
            include=[types.IncludeEnum.metadatas, types.IncludeEnum.documents],
        )

        ids = results.get("ids") or []
        documents = results.get("documents") or []
        metadatas: Sequence = results.get("metadatas") or []

        # Remove the `# type: ignore` comment when https://github.com/deepsense-ai/ragbits/pull/379/files resolved
        unflattened_metadatas: list[dict] = [unflatten_dict(metadata) if metadata else {} for metadata in metadatas]  # type: ignore[misc]

        images: list[bytes | None] = [metadata.pop("__image", None) for metadata in unflattened_metadatas]

        outputs.results = [
            VectorStoreEntry(
                id=UUID(id),
                text=document,
                metadata=metadata,
                image_bytes=image,
            )
            for id, metadata, document, image in zip(ids, unflattened_metadatas, documents, images, strict=True)
        ]

        return outputs.results

ragbits.core.vector_stores.qdrant.QdrantVectorStore #

QdrantVectorStore(client: AsyncQdrantClient, index_name: str, embedder: Embedder, embedding_type: EmbeddingType = EmbeddingType.TEXT, distance_method: Distance = Distance.COSINE, default_options: VectorStoreOptions | None = None)

Bases: VectorStoreWithExternalEmbedder[VectorStoreOptions]

Vector store implementation using Qdrant.

Constructs a new QdrantVectorStore instance.

PARAMETER DESCRIPTION
client

An instance of the Qdrant client.

TYPE: AsyncQdrantClient

index_name

The name of the index.

TYPE: str

embedder

The embedder to use for converting entries to vectors.

TYPE: Embedder

embedding_type

Which part of the entry to embed, either text or image. The other part will be ignored.

TYPE: EmbeddingType DEFAULT: TEXT

distance_method

The distance metric to use when creating the collection.

TYPE: Distance DEFAULT: COSINE

default_options

The default options for querying the vector store.

TYPE: VectorStoreOptions | None DEFAULT: None

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py
def __init__(
    self,
    client: AsyncQdrantClient,
    index_name: str,
    embedder: Embedder,
    embedding_type: EmbeddingType = EmbeddingType.TEXT,
    distance_method: Distance = Distance.COSINE,
    default_options: VectorStoreOptions | None = None,
) -> None:
    """
    Constructs a new QdrantVectorStore instance.

    Args:
        client: An instance of the Qdrant client.
        index_name: The name of the index.
        embedder: The embedder to use for converting entries to vectors.
        embedding_type: Which part of the entry to embed, either text or image. The other part will be ignored.
        distance_method: The distance metric to use when creating the collection.
        default_options: The default options for querying the vector store.
    """
    super().__init__(
        default_options=default_options,
        embedder=embedder,
        embedding_type=embedding_type,
    )
    self._client = client
    self._index_name = index_name
    self._distance_method = distance_method

default_module class-attribute instance-attribute #

default_module: ClassVar = vector_stores

configuration_key class-attribute instance-attribute #

configuration_key: ClassVar = 'vector_store'

default_options instance-attribute #

default_options: OptionsT = default_options or options_cls()

options_cls class-attribute instance-attribute #

options_cls = VectorStoreOptions

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)
    obj = factory()
    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")
    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> Self

Initializes the class with the provided configuration.

PARAMETER DESCRIPTION
config

A dictionary containing configuration details for the class.

TYPE: dict

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py
@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    client_options = ObjectConstructionConfig.model_validate(config["client"])
    client_cls = import_by_path(client_options.type, qdrant_client)
    if "limits" in client_options.config:
        limits = httpx.Limits(**client_options.config["limits"])
        client_options.config["limits"] = limits
    config["client"] = client_cls(**client_options.config)
    return super().from_config(config)

store async #

store(entries: list[VectorStoreEntry]) -> None

Stores vector entries in the Qdrant collection.

PARAMETER DESCRIPTION
entries

List of VectorStoreEntry objects to store

TYPE: list[VectorStoreEntry]

RAISES DESCRIPTION
QdrantException

If upload to collection fails.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py
async def store(self, entries: list[VectorStoreEntry]) -> None:
    """
    Stores vector entries in the Qdrant collection.

    Args:
        entries: List of VectorStoreEntry objects to store

    Raises:
        QdrantException: If upload to collection fails.
    """
    with trace(
        entries=entries,
        index_name=self._index_name,
        distance_method=self._distance_method,
        embedder=repr(self._embedder),
        embedding_type=self._embedding_type,
    ):
        if not entries:
            return

        embeddings: dict = await self._create_embeddings(entries)

        if not await self._client.collection_exists(self._index_name):
            vector_size = len(next(iter(embeddings.values())))
            await self._client.create_collection(
                collection_name=self._index_name,
                vectors_config=VectorParams(size=vector_size, distance=self._distance_method),
            )

        points = (
            models.PointStruct(
                id=str(entry.id),
                vector=embeddings[entry.id],
                payload=entry.model_dump(exclude_none=True),
            )
            for entry in entries
            if entry.id in embeddings
        )

        self._client.upload_points(
            collection_name=self._index_name,
            points=points,
            wait=True,
        )

retrieve async #

retrieve(text: str, options: VectorStoreOptionsT | None = None) -> list[VectorStoreResult]

Retrieves entries from the Qdrant collection based on vector similarity.

PARAMETER DESCRIPTION
text

The text to query the vector store with.

TYPE: str

options

The options for querying the vector store.

TYPE: VectorStoreOptionsT | None DEFAULT: None

RETURNS DESCRIPTION
list[VectorStoreResult]

The retrieved entries.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py
async def retrieve(self, text: str, options: VectorStoreOptionsT | None = None) -> list[VectorStoreResult]:
    """
    Retrieves entries from the Qdrant collection based on vector similarity.

    Args:
        text: The text to query the vector store with.
        options: The options for querying the vector store.

    Returns:
        The retrieved entries.
    """
    merged_options = (self.default_options | options) if options else self.default_options
    score_threshold = 1 - merged_options.max_distance if merged_options.max_distance else None
    with trace(
        text=text,
        options=merged_options,
        index_name=self._index_name,
        distance_method=self._distance_method,
        embedder=repr(self._embedder),
        embedding_type=self._embedding_type,
    ) as outputs:
        query_vector = (await self._embedder.embed_text([text]))[0]

        query_results = await self._client.query_points(
            collection_name=self._index_name,
            query=query_vector,
            limit=merged_options.k,
            score_threshold=score_threshold,
            with_payload=True,
            with_vectors=True,
        )

        outputs.results = []
        for point in query_results.points:
            entry = VectorStoreEntry.model_validate(point.payload)

            outputs.results.append(
                VectorStoreResult(
                    entry=entry,
                    score=point.score,
                    vector=cast(list[float], point.vector),
                )
            )

        return outputs.results

remove async #

remove(ids: list[UUID]) -> None

Remove entries from the vector store.

PARAMETER DESCRIPTION
ids

The list of entries' IDs to remove.

TYPE: list[UUID]

RAISES DESCRIPTION
ValueError

If collection named self._index_name is not present in the vector store.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py
async def remove(self, ids: list[UUID]) -> None:
    """
    Remove entries from the vector store.

    Args:
        ids: The list of entries' IDs to remove.

    Raises:
        ValueError: If collection named `self._index_name` is not present in the vector store.
    """
    with (
        trace(ids=ids, index_name=self._index_name),
        contextlib.suppress(KeyError),  # it's ok if a point already doesn't exist
    ):
        await self._client.delete(
            collection_name=self._index_name,
            points_selector=models.PointIdsList(points=[str(id) for id in ids]),
        )

list async #

list(where: WhereQuery | None = None, limit: int | None = None, offset: int = 0) -> list[VectorStoreEntry]

List entries from the vector store. The entries can be filtered, limited and offset.

PARAMETER DESCRIPTION
where

Conditions for filtering results. Reference: https://qdrant.tech/documentation/concepts/filtering

TYPE: WhereQuery | None DEFAULT: None

limit

The maximum number of entries to return.

TYPE: int | None DEFAULT: None

offset

The number of entries to skip.

TYPE: int DEFAULT: 0

RETURNS DESCRIPTION
list[VectorStoreEntry]

The entries.

RAISES DESCRIPTION
MetadataNotFoundError

If the metadata is not found.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/qdrant.py
async def list(
    self,
    where: WhereQuery | None = None,
    limit: int | None = None,
    offset: int = 0,
) -> list[VectorStoreEntry]:
    """
    List entries from the vector store. The entries can be filtered, limited and offset.

    Args:
        where: Conditions for filtering results.
            Reference: https://qdrant.tech/documentation/concepts/filtering
        limit: The maximum number of entries to return.
        offset: The number of entries to skip.

    Returns:
        The entries.

    Raises:
        MetadataNotFoundError: If the metadata is not found.
    """
    with trace(where=where, index_name=self._index_name, limit=limit, offset=offset) as outputs:
        collection_exists = await self._client.collection_exists(collection_name=self._index_name)
        if not collection_exists:
            return []

        limit = limit or (await self._client.count(collection_name=self._index_name)).count

        qdrant_filter = self._create_qdrant_filter(where) if where else None

        results = await self._client.query_points(
            collection_name=self._index_name,
            query_filter=qdrant_filter,
            limit=limit,
            offset=offset,
            with_payload=True,
            with_vectors=True,
        )

        outputs.results = [VectorStoreEntry.model_validate(point.payload) for point in results.points]

        return outputs.results

ragbits.core.vector_stores.pgvector.PgVectorStore #

PgVectorStore(client: Pool, table_name: str, vector_size: int, embedder: Embedder, embedding_type: EmbeddingType = EmbeddingType.TEXT, distance_method: str = 'cosine', hnsw_params: dict | None = None, default_options: VectorStoreOptions | None = None)

Bases: VectorStoreWithExternalEmbedder[VectorStoreOptions]

Vector store implementation using [pgvector]

Currently, doesn't support image embeddings when storing and retrieving entries. This will be added in the future.

Constructs a new PgVectorStore instance.

PARAMETER DESCRIPTION
client

The pgVector database connection pool.

TYPE: Pool

table_name

The name of the table.

TYPE: str

vector_size

The size of the vectors.

TYPE: int

embedder

The embedder to use for converting entries to vectors.

TYPE: Embedder

embedding_type

Which part of the entry to embed, either text or image. The other part will be ignored.

TYPE: EmbeddingType DEFAULT: TEXT

distance_method

The distance method to use.

TYPE: str DEFAULT: 'cosine'

hnsw_params

The parameters for the HNSW index. If None, the default parameters will be used.

TYPE: dict | None DEFAULT: None

default_options

The default options for querying the vector store.

TYPE: VectorStoreOptions | None DEFAULT: None

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/pgvector.py
def __init__(
    self,
    client: asyncpg.Pool,
    table_name: str,
    vector_size: int,
    embedder: Embedder,
    embedding_type: EmbeddingType = EmbeddingType.TEXT,
    distance_method: str = "cosine",
    hnsw_params: dict | None = None,
    default_options: VectorStoreOptions | None = None,
) -> None:
    """
    Constructs a new PgVectorStore instance.

    Args:
        client: The pgVector database connection pool.
        table_name: The name of the table.
        vector_size: The size of the vectors.
        embedder: The embedder to use for converting entries to vectors.
        embedding_type: Which part of the entry to embed, either text or image. The other part will be ignored.
        distance_method: The distance method to use.
        hnsw_params: The parameters for the HNSW index. If None, the default parameters will be used.
        default_options: The default options for querying the vector store.
    """
    (
        super().__init__(
            default_options=default_options,
            embedder=embedder,
            embedding_type=embedding_type,
        ),
    )

    if not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", table_name):
        raise ValueError(f"Invalid table name: {table_name}")
    if not isinstance(vector_size, int) or vector_size <= 0:
        raise ValueError("Vector size must be a positive integer.")

    if hnsw_params is None:
        hnsw_params = {"m": 4, "ef_construction": 10}
    elif not isinstance(hnsw_params, dict):
        raise ValueError("hnsw_params must be a dictionary.")
    elif "m" not in hnsw_params or "ef_construction" not in hnsw_params:
        raise ValueError("hnsw_params must contain 'm' and 'ef_construction' keys.")
    elif not isinstance(hnsw_params["m"], int) or hnsw_params["m"] <= 0:
        raise ValueError("m must be a positive integer.")
    elif not isinstance(hnsw_params["ef_construction"], int) or hnsw_params["ef_construction"] <= 0:
        raise ValueError("ef_construction must be a positive integer.")

    self._client = client
    self._table_name = table_name
    self._vector_size = vector_size
    self._distance_method = distance_method
    self._hnsw_params = hnsw_params

default_module class-attribute instance-attribute #

default_module: ClassVar = vector_stores

configuration_key class-attribute instance-attribute #

configuration_key: ClassVar = 'vector_store'

default_options instance-attribute #

default_options: OptionsT = default_options or options_cls()

options_cls class-attribute instance-attribute #

options_cls = VectorStoreOptions

subclass_from_config classmethod #

subclass_from_config(config: ObjectConstructionConfig) -> Self

Initializes the class with the provided configuration. May return a subclass of the class, if requested by the configuration.

PARAMETER DESCRIPTION
config

A model containing configuration details for the class.

TYPE: ObjectConstructionConfig

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

RAISES DESCRIPTION
InvalidConfigError

The class can't be found or is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_config(cls, config: ObjectConstructionConfig) -> Self:
    """
    Initializes the class with the provided configuration. May return a subclass of the class,
    if requested by the configuration.

    Args:
        config: A model containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.

    Raises:
        InvalidConfigError: The class can't be found or is not a subclass of the current class.
    """
    subclass = import_by_path(config.type, cls.default_module)
    if not issubclass(subclass, cls):
        raise InvalidConfigError(f"{subclass} is not a subclass of {cls}")

    return subclass.from_config(config.config)

subclass_from_factory classmethod #

subclass_from_factory(factory_path: str) -> Self

Creates the class using the provided factory function. May return a subclass of the class, if requested by the factory.

PARAMETER DESCRIPTION
factory_path

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided factory function.

RAISES DESCRIPTION
InvalidConfigError

The factory can't be found or the object returned is not a subclass of the current class.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def subclass_from_factory(cls, factory_path: str) -> Self:
    """
    Creates the class using the provided factory function. May return a subclass of the class,
    if requested by the factory.

    Args:
        factory_path: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".

    Returns:
        An instance of the class initialized with the provided factory function.

    Raises:
        InvalidConfigError: The factory can't be found or the object returned
            is not a subclass of the current class.
    """
    factory = import_by_path(factory_path, cls.default_module)
    obj = factory()
    if not isinstance(obj, cls):
        raise InvalidConfigError(f"The object returned by factory {factory_path} is not an instance of {cls}")
    return obj

preferred_subclass classmethod #

preferred_subclass(config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None) -> Self

Tries to create an instance by looking at project's component preferences, either from YAML or from the factory. Takes optional overrides for both, which takes a higher precedence.

PARAMETER DESCRIPTION
config

The CoreConfig instance containing preferred factory and configuration details.

TYPE: CoreConfig

factory_path_override

A string representing the path to the factory function in the format of "module.submodule:factory_name".

TYPE: str | None DEFAULT: None

yaml_path_override

A string representing the path to the YAML file containing the Ragstack instance configuration.

TYPE: Path | None DEFAULT: None

RAISES DESCRIPTION
InvalidConfigError

If the default factory or configuration can't be found.

Source code in packages/ragbits-core/src/ragbits/core/utils/config_handling.py
@classmethod
def preferred_subclass(
    cls, config: CoreConfig, factory_path_override: str | None = None, yaml_path_override: Path | None = None
) -> Self:
    """
    Tries to create an instance by looking at project's component preferences, either from YAML
    or from the factory. Takes optional overrides for both, which takes a higher precedence.

    Args:
        config: The CoreConfig instance containing preferred factory and configuration details.
        factory_path_override: A string representing the path to the factory function
            in the format of "module.submodule:factory_name".
        yaml_path_override: A string representing the path to the YAML file containing
            the Ragstack instance configuration.

    Raises:
        InvalidConfigError: If the default factory or configuration can't be found.
    """
    if yaml_path_override:
        preferences = get_config_from_yaml(yaml_path_override)
        if type_config := preferences.get(cls.configuration_key):
            return cls.subclass_from_config(ObjectConstructionConfig.model_validate(type_config))

    if factory_path_override:
        return cls.subclass_from_factory(factory_path_override)

    if preferred_factory := config.component_preference_factories.get(cls.configuration_key):
        return cls.subclass_from_factory(preferred_factory)

    if preferred_config := config.preferred_instances_config.get(cls.configuration_key):
        return cls.subclass_from_config(ObjectConstructionConfig.model_validate(preferred_config))

    raise NoPreferredConfigError(f"Could not find preferred factory or configuration for {cls.configuration_key}")

from_config classmethod #

from_config(config: dict) -> Self

Initializes the class with the provided configuration.

PARAMETER DESCRIPTION
config

A dictionary containing configuration details for the class.

TYPE: dict

RETURNS DESCRIPTION
Self

An instance of the class initialized with the provided configuration.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/base.py
@classmethod
def from_config(cls, config: dict) -> Self:
    """
    Initializes the class with the provided configuration.

    Args:
        config: A dictionary containing configuration details for the class.

    Returns:
        An instance of the class initialized with the provided configuration.
    """
    default_options = config.pop("default_options", None)
    options = cls.options_cls(**default_options) if default_options else None

    embedder_config = config.pop("embedder")
    embedder: Embedder = Embedder.subclass_from_config(ObjectConstructionConfig.model_validate(embedder_config))

    return cls(**config, default_options=options, embedder=embedder)

create_table async #

create_table() -> None

Create a pgVector table with an HNSW index for given similarity.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/pgvector.py
async def create_table(self) -> None:
    """
    Create a pgVector table with an HNSW index for given similarity.
    """
    with trace(
        table_name=self._table_name,
        distance_method=self._distance_method,
        vector_size=self._vector_size,
        hnsw_index_parameters=self._hnsw_params,
    ):
        check_table_existence = """
                SELECT EXISTS (
                SELECT FROM information_schema.tables
                WHERE table_name = $1
            ); """
        distance = DISTANCE_OPS[self._distance_method][0]
        create_vector_extension = "CREATE EXTENSION IF NOT EXISTS vector;"
        # _table_name and has been validated in the class constructor, and it is a valid table name.
        # _vector_size has been validated in the class constructor, and it is a valid vector size.

        create_table_query = f"""
        CREATE TABLE {self._table_name}
        (id UUID, key TEXT, vector VECTOR({self._vector_size}), metadata JSONB);
        """
        # _hnsw_params has been validated in the class constructor, and it is valid dict[str,int].
        create_index_query = f"""
                CREATE INDEX {self._table_name + "_hnsw_idx"} ON {self._table_name}
                USING hnsw (vector {distance})
                WITH (m = {self._hnsw_params["m"]}, ef_construction = {self._hnsw_params["ef_construction"]});
                """

        async with self._client.acquire() as conn:
            await conn.execute(create_vector_extension)
            exists = await conn.fetchval(check_table_existence, self._table_name)

            if not exists:
                try:
                    async with conn.transaction():
                        await conn.execute(create_table_query)
                        await conn.execute(create_index_query)

                    print("Table and index created!")
                except Exception as e:
                    print(f"Failed to create table and index: {e}")
                    raise
            else:
                print("Table already exists!")

store async #

store(entries: list[VectorStoreEntry]) -> None

Stores entries in the pgVector collection.

PARAMETER DESCRIPTION
entries

The entries to store.

TYPE: list[VectorStoreEntry]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/pgvector.py
async def store(self, entries: list[VectorStoreEntry]) -> None:
    """
    Stores entries in the pgVector collection.

    Args:
        entries: The entries to store.
    """
    if not entries:
        return
    # _table_name has been validated in the class constructor, and it is a valid table name.
    insert_query = f"""
    INSERT INTO {self._table_name} (id, key, vector, metadata)
    VALUES ($1, $2, $3, $4)
    """  # noqa S608
    with trace(
        table_name=self._table_name,
        entries=entries,
        vector_size=self._vector_size,
        embedder=repr(self._embedder),
        embedding_type=self._embedding_type,
    ):
        embeddings = await self._create_embeddings(entries)

        try:
            async with self._client.acquire() as conn:
                for entry in entries:
                    if entry.id not in embeddings:
                        continue

                    await conn.execute(
                        insert_query,
                        str(entry.id),
                        entry.text,
                        str(embeddings[entry.id]),
                        json.dumps(entry.metadata, default=pydantic_encoder),
                    )
        except asyncpg.exceptions.UndefinedTableError:
            print(f"Table {self._table_name} does not exist. Creating the table.")
            try:
                await self.create_table()
            except Exception as e:
                print(f"Failed to handle missing table: {e}")
                return

            print("Table created successfully. Inserting entries...")
            await self.store(entries)

remove async #

remove(ids: list[UUID]) -> None

Remove entries from the vector store.

PARAMETER DESCRIPTION
ids

The list of entries' IDs to remove.

TYPE: list[UUID]

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/pgvector.py
async def remove(self, ids: list[UUID]) -> None:
    """
    Remove entries from the vector store.

    Args:
        ids: The list of entries' IDs to remove.
    """
    if not ids:
        print("No IDs provided, nothing to remove")
        return
    # _table_name has been validated in the class constructor, and it is a valid table name.
    remove_query = f"""
    DELETE FROM {self._table_name}
    WHERE id = ANY($1)
    """  # noqa S608
    with trace(table_name=self._table_name, ids=ids):
        try:
            async with self._client.acquire() as conn:
                await conn.execute(remove_query, ids)
        except asyncpg.exceptions.UndefinedTableError:
            print(f"Table {self._table_name} does not exist.")
            return

retrieve async #

retrieve(text: str, options: VectorStoreOptionsT | None = None) -> list[VectorStoreResult]

Retrieves entries from the pgVector collection.

PARAMETER DESCRIPTION
text

The text to query the vector store with.

TYPE: str

options

The options for querying the vector store.

TYPE: VectorStoreOptionsT | None DEFAULT: None

RETURNS DESCRIPTION
list[VectorStoreResult]

The retrieved entries.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/pgvector.py
async def retrieve(
    self,
    text: str,
    options: VectorStoreOptionsT | None = None,
) -> list[VectorStoreResult]:
    """
    Retrieves entries from the pgVector collection.

    Args:
        text: The text to query the vector store with.
        options: The options for querying the vector store.

    Returns:
        The retrieved entries.
    """
    query_options = (self.default_options | options) if options else self.default_options
    with trace(
        text=text,
        table_name=self._table_name,
        query_options=query_options,
        vector_size=self._vector_size,
        distance_method=self._distance_method,
        embedder=repr(self._embedder),
        embedding_type=self._embedding_type,
    ) as outputs:
        vector = (await self._embedder.embed_text([text]))[0]

        query_options = (self.default_options | options) if options else self.default_options
        retrieve_query, values = self._create_retrieve_query(vector, query_options)

        try:
            async with self._client.acquire() as conn:
                results = await conn.fetch(retrieve_query, *values)

            outputs.results = [
                VectorStoreResult(
                    entry=VectorStoreEntry(
                        id=record["id"],
                        text=record["key"],
                        metadata=json.loads(record["metadata"]),
                    ),
                    vector=json.loads(record["vector"]),
                    score=record["distance"],
                )
                for record in results
            ]

        except asyncpg.exceptions.UndefinedTableError:
            print(f"Table {self._table_name} does not exist.")
            outputs.results = []
        return outputs.results

list async #

list(where: WhereQuery | None = None, limit: int | None = None, offset: int = 0) -> list[VectorStoreEntry]

List entries from the vector store. The entries can be filtered, limited and offset.

PARAMETER DESCRIPTION
where

The filter dictionary - the keys are the field names and the values are the values to filter by. Not specifying the key means no filtering.

TYPE: WhereQuery | None DEFAULT: None

limit

The maximum number of entries to return.

TYPE: int | None DEFAULT: None

offset

The number of entries to skip.

TYPE: int DEFAULT: 0

RETURNS DESCRIPTION
list[VectorStoreEntry]

The entries.

Source code in packages/ragbits-core/src/ragbits/core/vector_stores/pgvector.py
async def list(
    self, where: WhereQuery | None = None, limit: int | None = None, offset: int = 0
) -> list[VectorStoreEntry]:
    """
    List entries from the vector store. The entries can be filtered, limited and offset.

    Args:
        where: The filter dictionary - the keys are the field names and the values are the values to filter by.
            Not specifying the key means no filtering.
        limit: The maximum number of entries to return.
        offset: The number of entries to skip.

    Returns:
        The entries.
    """
    with trace(table=self._table_name, query=where, limit=limit, offset=offset) as outputs:
        list_query, values = self._create_list_query(where, limit, offset)
        outputs.listed_entries = await self._fetch_records(list_query, values)
        return outputs.listed_entries