Skip to content

Documents#

ragbits.document_search.documents.document.Document #

Bases: BaseModel

An object representing a document which is downloaded and stored locally.

local_path instance-attribute #

local_path: Path

metadata instance-attribute #

metadata: DocumentMeta

from_document_meta classmethod #

from_document_meta(document_meta: DocumentMeta, local_path: Path) -> Document

Create a document from a document metadata. Based on the document type, it will return a different object.

PARAMETER DESCRIPTION
document_meta

The document metadata.

TYPE: DocumentMeta

local_path

The local path to the document.

TYPE: Path

RETURNS DESCRIPTION
Document

The document.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/document.py
@classmethod
def from_document_meta(cls, document_meta: DocumentMeta, local_path: Path) -> "Document":
    """
    Create a document from a document metadata.
    Based on the document type, it will return a different object.

    Args:
        document_meta: The document metadata.
        local_path: The local path to the document.

    Returns:
        The document.
    """
    if document_meta.document_type in [DocumentType.MD, DocumentType.TXT]:
        return TextDocument(local_path=local_path, metadata=document_meta)
    return cls(local_path=local_path, metadata=document_meta)

ragbits.document_search.documents.document.TextDocument #

Bases: Document

An object representing a text document.

local_path instance-attribute #

local_path: Path

metadata instance-attribute #

metadata: DocumentMeta

content property #

content: str

Get the content of the document.

RETURNS DESCRIPTION
str

The content of the document.

from_document_meta classmethod #

from_document_meta(document_meta: DocumentMeta, local_path: Path) -> Document

Create a document from a document metadata. Based on the document type, it will return a different object.

PARAMETER DESCRIPTION
document_meta

The document metadata.

TYPE: DocumentMeta

local_path

The local path to the document.

TYPE: Path

RETURNS DESCRIPTION
Document

The document.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/document.py
@classmethod
def from_document_meta(cls, document_meta: DocumentMeta, local_path: Path) -> "Document":
    """
    Create a document from a document metadata.
    Based on the document type, it will return a different object.

    Args:
        document_meta: The document metadata.
        local_path: The local path to the document.

    Returns:
        The document.
    """
    if document_meta.document_type in [DocumentType.MD, DocumentType.TXT]:
        return TextDocument(local_path=local_path, metadata=document_meta)
    return cls(local_path=local_path, metadata=document_meta)

ragbits.document_search.documents.document.DocumentMeta #

Bases: BaseModel

An object representing a document metadata.

document_type instance-attribute #

document_type: DocumentType

source instance-attribute #

source: Source

id property #

id: str

Get the document ID.

RETURNS DESCRIPTION
str

The document ID.

fetch async #

fetch() -> Document

This method fetches the document from source (potentially remote) and creates an object to interface with it. Based on the document type, it will return a different object.

RETURNS DESCRIPTION
Document

The document.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/document.py
async def fetch(self) -> "Document":
    """
    This method fetches the document from source (potentially remote) and creates an object to interface with it.
    Based on the document type, it will return a different object.

    Returns:
        The document.
    """
    local_path = await self.source.fetch()
    return Document.from_document_meta(self, local_path)

create_text_document_from_literal classmethod #

create_text_document_from_literal(content: str) -> DocumentMeta

Create a text document from a literal content. This method is deprecated, use from_literal() instead.

PARAMETER DESCRIPTION
content

The content of the document.

TYPE: str

RETURNS DESCRIPTION
DocumentMeta

The document metadata.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/document.py
@classmethod
@deprecated("Use from_literal() instead")
def create_text_document_from_literal(cls, content: str) -> "DocumentMeta":
    """
    Create a text document from a literal content. This method is deprecated, use from_literal() instead.

    Args:
        content: The content of the document.

    Returns:
        The document metadata.
    """
    return cls.from_literal(content)

from_literal classmethod #

from_literal(content: str) -> DocumentMeta

Create a text document from a literal content.

PARAMETER DESCRIPTION
content

The content of the document.

TYPE: str

RETURNS DESCRIPTION
DocumentMeta

The document metadata.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/document.py
@classmethod
def from_literal(cls, content: str) -> "DocumentMeta":
    """
    Create a text document from a literal content.

    Args:
        content: The content of the document.

    Returns:
        The document metadata.
    """
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
        temp_file.write(content.encode())

    return cls(
        document_type=DocumentType.TXT,
        source=LocalFileSource(path=Path(temp_file.name)),
    )

from_local_path classmethod #

from_local_path(local_path: Path) -> DocumentMeta

Create a document metadata from a local path.

PARAMETER DESCRIPTION
local_path

The local path to the document.

TYPE: Path

RETURNS DESCRIPTION
DocumentMeta

The document metadata.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/document.py
@classmethod
def from_local_path(cls, local_path: Path) -> "DocumentMeta":
    """
    Create a document metadata from a local path.

    Args:
        local_path: The local path to the document.

    Returns:
        The document metadata.
    """
    return cls(
        document_type=cls._infer_document_type(local_path),
        source=LocalFileSource(path=local_path),
    )

from_source async classmethod #

from_source(source: Source) -> DocumentMeta

Create a document metadata from a source.

PARAMETER DESCRIPTION
source

The source from which the document is fetched.

TYPE: Source

RETURNS DESCRIPTION
DocumentMeta

The document metadata.

Source code in packages/ragbits-document-search/src/ragbits/document_search/documents/document.py
@classmethod
async def from_source(cls, source: Source) -> "DocumentMeta":
    """
    Create a document metadata from a source.

    Args:
        source: The source from which the document is fetched.

    Returns:
        The document metadata.
    """
    path = await source.fetch()

    return cls(
        document_type=cls._infer_document_type(path),
        source=source,
    )

ragbits.document_search.documents.document.DocumentType #

Bases: str, Enum

Document types that can be parsed.

MD class-attribute instance-attribute #

MD = 'md'

TXT class-attribute instance-attribute #

TXT = 'txt'

PDF class-attribute instance-attribute #

PDF = 'pdf'

CSV class-attribute instance-attribute #

CSV = 'csv'

DOC class-attribute instance-attribute #

DOC = 'doc'

DOCX class-attribute instance-attribute #

DOCX = 'docx'

HTML class-attribute instance-attribute #

HTML = 'html'

EPUB class-attribute instance-attribute #

EPUB = 'epub'

XLSX class-attribute instance-attribute #

XLSX = 'xlsx'

XLS class-attribute instance-attribute #

XLS = 'xls'

ORG class-attribute instance-attribute #

ORG = 'org'

ODT class-attribute instance-attribute #

ODT = 'odt'

PPT class-attribute instance-attribute #

PPT = 'ppt'

PPTX class-attribute instance-attribute #

PPTX = 'pptx'

RST class-attribute instance-attribute #

RST = 'rst'

RTF class-attribute instance-attribute #

RTF = 'rtf'

TSV class-attribute instance-attribute #

TSV = 'tsv'

JSON class-attribute instance-attribute #

JSON = 'json'

JSONL class-attribute instance-attribute #

JSONL = 'jsonl'

XML class-attribute instance-attribute #

XML = 'xml'

JPG class-attribute instance-attribute #

JPG = 'jpg'

PNG class-attribute instance-attribute #

PNG = 'png'

UNKNOWN class-attribute instance-attribute #

UNKNOWN = 'unknown'