Skip to content

Elements#

ragbits.document_search.documents.element.Element #

Bases: BaseModel, ABC

An object representing an element in a document.

element_type instance-attribute #

element_type: str

document_meta instance-attribute #

document_meta: DocumentMeta

location class-attribute instance-attribute #

location: ElementLocation | None = None

id property #

id: str

Retrieve the ID of the element, primarily used to represent the element's data.

RETURNS DESCRIPTION
str

string representing element

TYPE: str

key property #

key: str | None

Get the representation of the element for embedding.

RETURNS DESCRIPTION
str | None

The representation for embedding.

text_representation abstractmethod property #

text_representation: str | None

Get the text representation of the element.

RETURNS DESCRIPTION
str | None

The text representation.

image_representation property #

image_representation: bytes | None

Get the image representation of the element.

RETURNS DESCRIPTION
bytes | None

The image representation.

get_id_components #

get_id_components() -> dict[str, str]

Creates a dictionary of key value pairs of id components

RETURNS DESCRIPTION
dict

a dictionary

TYPE: dict[str, str]

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
def get_id_components(self) -> dict[str, str]:
    """
    Creates a dictionary of key value pairs of id components

    Returns:
        dict: a dictionary
    """
    id_components = {
        "meta": self.document_meta.id,
        "type": self.element_type,
        "key": str(self.key),
        "text": str(self.text_representation),
        "location": str(self.location),
    }
    return id_components

from_vector_db_entry classmethod #

from_vector_db_entry(db_entry: VectorStoreEntry) -> Element

Create an element from a vector database entry.

PARAMETER DESCRIPTION
db_entry

The vector database entry.

TYPE: VectorStoreEntry

RETURNS DESCRIPTION
Element

The element.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
@classmethod
def from_vector_db_entry(cls, db_entry: VectorStoreEntry) -> "Element":
    """
    Create an element from a vector database entry.

    Args:
        db_entry: The vector database entry.

    Returns:
        The element.
    """
    element_type = db_entry.metadata["element_type"]
    element_cls = Element._elements_registry[element_type]
    if "embedding_type" in db_entry.metadata:
        del db_entry.metadata["embedding_type"]
    return element_cls(**db_entry.metadata)

to_vector_db_entry #

to_vector_db_entry() -> VectorStoreEntry

Create a vector database entry from the element.

RETURNS DESCRIPTION
VectorStoreEntry

The vector database entry

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
def to_vector_db_entry(self) -> VectorStoreEntry:
    """
    Create a vector database entry from the element.

    Returns:
        The vector database entry
    """
    id_components = [
        self.id,
    ]
    vector_store_entry_id = uuid.uuid5(uuid.NAMESPACE_OID, ";".join(id_components))
    metadata = self.model_dump(exclude={"id", "key"})
    metadata["document_meta"]["source"]["id"] = self.document_meta.source.id

    return VectorStoreEntry(
        id=vector_store_entry_id, text=self.key, image_bytes=self.image_representation, metadata=metadata
    )

ragbits.document_search.documents.element.TextElement #

Bases: Element

An object representing a text element in a document.

document_meta instance-attribute #

document_meta: DocumentMeta

location class-attribute instance-attribute #

location: ElementLocation | None = None

id property #

id: str

Retrieve the ID of the element, primarily used to represent the element's data.

RETURNS DESCRIPTION
str

string representing element

TYPE: str

key property #

key: str | None

Get the representation of the element for embedding.

RETURNS DESCRIPTION
str | None

The representation for embedding.

image_representation property #

image_representation: bytes | None

Get the image representation of the element.

RETURNS DESCRIPTION
bytes | None

The image representation.

element_type class-attribute instance-attribute #

element_type: str = 'text'

content instance-attribute #

content: str

text_representation property #

text_representation: str

Get the text representation of the element.

RETURNS DESCRIPTION
str

The text representation.

get_id_components #

get_id_components() -> dict[str, str]

Creates a dictionary of key value pairs of id components

RETURNS DESCRIPTION
dict

a dictionary

TYPE: dict[str, str]

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
def get_id_components(self) -> dict[str, str]:
    """
    Creates a dictionary of key value pairs of id components

    Returns:
        dict: a dictionary
    """
    id_components = {
        "meta": self.document_meta.id,
        "type": self.element_type,
        "key": str(self.key),
        "text": str(self.text_representation),
        "location": str(self.location),
    }
    return id_components

from_vector_db_entry classmethod #

from_vector_db_entry(db_entry: VectorStoreEntry) -> Element

Create an element from a vector database entry.

PARAMETER DESCRIPTION
db_entry

The vector database entry.

TYPE: VectorStoreEntry

RETURNS DESCRIPTION
Element

The element.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
@classmethod
def from_vector_db_entry(cls, db_entry: VectorStoreEntry) -> "Element":
    """
    Create an element from a vector database entry.

    Args:
        db_entry: The vector database entry.

    Returns:
        The element.
    """
    element_type = db_entry.metadata["element_type"]
    element_cls = Element._elements_registry[element_type]
    if "embedding_type" in db_entry.metadata:
        del db_entry.metadata["embedding_type"]
    return element_cls(**db_entry.metadata)

to_vector_db_entry #

to_vector_db_entry() -> VectorStoreEntry

Create a vector database entry from the element.

RETURNS DESCRIPTION
VectorStoreEntry

The vector database entry

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
def to_vector_db_entry(self) -> VectorStoreEntry:
    """
    Create a vector database entry from the element.

    Returns:
        The vector database entry
    """
    id_components = [
        self.id,
    ]
    vector_store_entry_id = uuid.uuid5(uuid.NAMESPACE_OID, ";".join(id_components))
    metadata = self.model_dump(exclude={"id", "key"})
    metadata["document_meta"]["source"]["id"] = self.document_meta.source.id

    return VectorStoreEntry(
        id=vector_store_entry_id, text=self.key, image_bytes=self.image_representation, metadata=metadata
    )

ragbits.document_search.documents.element.ImageElement #

Bases: Element

An object representing an image element in a document.

document_meta instance-attribute #

document_meta: DocumentMeta

location class-attribute instance-attribute #

location: ElementLocation | None = None

id property #

id: str

Retrieve the ID of the element, primarily used to represent the element's data.

RETURNS DESCRIPTION
str

string representing element

TYPE: str

key property #

key: str | None

Get the representation of the element for embedding.

RETURNS DESCRIPTION
str | None

The representation for embedding.

element_type class-attribute instance-attribute #

element_type: str = 'image'

image_bytes instance-attribute #

image_bytes: SerializableBytes

description class-attribute instance-attribute #

description: str | None = None

ocr_extracted_text class-attribute instance-attribute #

ocr_extracted_text: str | None = None

text_representation property #

text_representation: str | None

Get the text representation of the element.

RETURNS DESCRIPTION
str | None

The text representation.

image_representation property #

image_representation: bytes

Get the image representation of the element.

RETURNS DESCRIPTION
bytes

The image representation.

from_vector_db_entry classmethod #

from_vector_db_entry(db_entry: VectorStoreEntry) -> Element

Create an element from a vector database entry.

PARAMETER DESCRIPTION
db_entry

The vector database entry.

TYPE: VectorStoreEntry

RETURNS DESCRIPTION
Element

The element.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
@classmethod
def from_vector_db_entry(cls, db_entry: VectorStoreEntry) -> "Element":
    """
    Create an element from a vector database entry.

    Args:
        db_entry: The vector database entry.

    Returns:
        The element.
    """
    element_type = db_entry.metadata["element_type"]
    element_cls = Element._elements_registry[element_type]
    if "embedding_type" in db_entry.metadata:
        del db_entry.metadata["embedding_type"]
    return element_cls(**db_entry.metadata)

to_vector_db_entry #

to_vector_db_entry() -> VectorStoreEntry

Create a vector database entry from the element.

RETURNS DESCRIPTION
VectorStoreEntry

The vector database entry

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
def to_vector_db_entry(self) -> VectorStoreEntry:
    """
    Create a vector database entry from the element.

    Returns:
        The vector database entry
    """
    id_components = [
        self.id,
    ]
    vector_store_entry_id = uuid.uuid5(uuid.NAMESPACE_OID, ";".join(id_components))
    metadata = self.model_dump(exclude={"id", "key"})
    metadata["document_meta"]["source"]["id"] = self.document_meta.source.id

    return VectorStoreEntry(
        id=vector_store_entry_id, text=self.key, image_bytes=self.image_representation, metadata=metadata
    )

get_id_components #

get_id_components() -> dict[str, str]

Creates a dictionary of key value pairs of id components

RETURNS DESCRIPTION
dict

a dictionary

TYPE: dict[str, str]

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/element.py
def get_id_components(self) -> dict[str, str]:
    """
    Creates a dictionary of key value pairs of id components

    Returns:
        dict: a dictionary
    """
    id_components = super().get_id_components()
    id_components["image_hash"] = hashlib.sha256(self.image_bytes).hexdigest()
    return id_components