Skip to content

Documents and Elements#

ragbits.document_search.documents.document.Document #

Bases: BaseModel

An object representing a document which is downloaded and stored locally.

local_path instance-attribute #

local_path: Path

metadata instance-attribute #

metadata: DocumentMeta

from_document_meta classmethod #

from_document_meta(document_meta: DocumentMeta, local_path: Path) -> Document

Create a document from a document metadata. Based on the document type, it will return a different object.

PARAMETER DESCRIPTION
document_meta

The document metadata.

TYPE: DocumentMeta

local_path

The local path to the document.

TYPE: Path

RETURNS DESCRIPTION
Document

The document.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/document.py
@classmethod
def from_document_meta(cls, document_meta: DocumentMeta, local_path: Path) -> "Document":
    """
    Create a document from a document metadata.
    Based on the document type, it will return a different object.

    Args:
        document_meta: The document metadata.
        local_path: The local path to the document.

    Returns:
        The document.
    """
    if document_meta.document_type in [DocumentType.MD, DocumentType.TXT]:
        return TextDocument(local_path=local_path, metadata=document_meta)
    return cls(local_path=local_path, metadata=document_meta)

ragbits.document_search.documents.element.Element #

Bases: BaseModel, ABC

An object representing an element in a document.

element_type instance-attribute #

element_type: str

document_meta instance-attribute #

document_meta: DocumentMeta

location class-attribute instance-attribute #

location: ElementLocation | None = None

id property #

id: str

Get the ID of the element. The id is primarly used as a key in the vector store. The current representation is a UUID5 hash of various element metadata, including its contents and location where it was sourced from.

RETURNS DESCRIPTION
str

The ID in the form of a UUID5 hash.

key property #

key: str

Get the representation of the element for embedding.

RETURNS DESCRIPTION
str

The representation for embedding.

text_representation abstractmethod property #

text_representation: str

Get the text representation of the element.

RETURNS DESCRIPTION
str

The text representation.

from_vector_db_entry classmethod #

from_vector_db_entry(db_entry: VectorStoreEntry) -> Element

Create an element from a vector database entry.

PARAMETER DESCRIPTION
db_entry

The vector database entry.

TYPE: VectorStoreEntry

RETURNS DESCRIPTION
Element

The element.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
@classmethod
def from_vector_db_entry(cls, db_entry: VectorStoreEntry) -> "Element":
    """
    Create an element from a vector database entry.

    Args:
        db_entry: The vector database entry.

    Returns:
        The element.
    """
    element_type = db_entry.metadata["element_type"]
    element_cls = Element._elements_registry[element_type]
    return element_cls(**db_entry.metadata)

to_vector_db_entry #

to_vector_db_entry(vector: list[float]) -> VectorStoreEntry

Create a vector database entry from the element.

PARAMETER DESCRIPTION
vector

The vector.

TYPE: list[float]

RETURNS DESCRIPTION
VectorStoreEntry

The vector database entry

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
def to_vector_db_entry(self, vector: list[float]) -> VectorStoreEntry:
    """
    Create a vector database entry from the element.

    Args:
        vector: The vector.

    Returns:
        The vector database entry
    """
    return VectorStoreEntry(
        id=self.id,
        key=self.key,
        vector=vector,
        metadata=self.model_dump(exclude={"id", "key"}),
    )

ragbits.document_search.documents.sources.Source #

Bases: BaseModel, ABC

An object representing a source.

id abstractmethod property #

id: str

Get the source ID.

RETURNS DESCRIPTION
str

The source ID.

class_identifier classmethod #

class_identifier() -> str

Get an identifier for the source type.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py
@classmethod
def class_identifier(cls) -> str:
    """
    Get an identifier for the source type.
    """
    return to_snake(cls.__name__)

source_type #

source_type() -> str

Pydantic field based on the class identifier.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py
@computed_field
def source_type(self) -> str:
    """
    Pydantic field based on the class identifier.
    """
    return self.class_identifier()

fetch abstractmethod async #

fetch() -> Path

Load the source.

RETURNS DESCRIPTION
Path

The path to the source.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/sources.py
@abstractmethod
async def fetch(self) -> Path:
    """
    Load the source.

    Returns:
        The path to the source.
    """