Skip to content

LLMs#

ragbits.core.llms.LLM #

LLM(model_name: str, default_options: LLMOptions | None = None)

Bases: Generic[LLMClientOptions], ABC

Abstract class for interaction with Large Language Model.

Constructs a new LLM instance.

PARAMETER DESCRIPTION
model_name

Name of the model to be used.

TYPE: str

default_options

Default options to be used.

TYPE: LLMOptions | None DEFAULT: None

RAISES DESCRIPTION
TypeError

If the subclass is missing the '_options_cls' attribute.

Source code in packages/ragbits-core/src/ragbits/core/llms/base.py
def __init__(self, model_name: str, default_options: LLMOptions | None = None) -> None:
    """
    Constructs a new LLM instance.

    Args:
        model_name: Name of the model to be used.
        default_options: Default options to be used.

    Raises:
        TypeError: If the subclass is missing the '_options_cls' attribute.
    """
    self.model_name = model_name
    self.default_options = default_options or self._options_cls()

model_name instance-attribute #

model_name = model_name

default_options instance-attribute #

default_options = default_options or _options_cls()

client abstractmethod cached property #

client: LLMClient

Client for the LLM.

count_tokens #

count_tokens(prompt: BasePrompt) -> int

Counts tokens in the prompt.

PARAMETER DESCRIPTION
prompt

Formatted prompt template with conversation and response parsing configuration.

TYPE: BasePrompt

RETURNS DESCRIPTION
int

Number of tokens in the prompt.

Source code in packages/ragbits-core/src/ragbits/core/llms/base.py
def count_tokens(self, prompt: BasePrompt) -> int:  # noqa: PLR6301
    """
    Counts tokens in the prompt.

    Args:
        prompt: Formatted prompt template with conversation and response parsing configuration.

    Returns:
        Number of tokens in the prompt.
    """
    return sum(len(message["content"]) for message in prompt.chat)

generate_raw async #

generate_raw(prompt: BasePrompt, *, options: LLMOptions | None = None) -> str

Prepares and sends a prompt to the LLM and returns the raw response (without parsing).

PARAMETER DESCRIPTION
prompt

Formatted prompt template with conversation.

TYPE: BasePrompt

options

Options to use for the LLM client.

TYPE: LLMOptions | None DEFAULT: None

RETURNS DESCRIPTION
str

Raw text response from LLM.

Source code in packages/ragbits-core/src/ragbits/core/llms/base.py
async def generate_raw(
    self,
    prompt: BasePrompt,
    *,
    options: LLMOptions | None = None,
) -> str:
    """
    Prepares and sends a prompt to the LLM and returns the raw response (without parsing).

    Args:
        prompt: Formatted prompt template with conversation.
        options: Options to use for the LLM client.

    Returns:
        Raw text response from LLM.
    """
    options = (self.default_options | options) if options else self.default_options
    response = await self.client.call(
        conversation=self._format_chat_for_llm(prompt),
        options=options,
        json_mode=prompt.json_mode,
        output_schema=prompt.output_schema(),
    )

    return response

generate async #

generate(prompt: BasePrompt, *, options: LLMOptions | None = None) -> OutputT

Prepares and sends a prompt to the LLM and returns response parsed to the output type of the prompt (if available).

PARAMETER DESCRIPTION
prompt

Formatted prompt template with conversation and optional response parsing configuration.

TYPE: BasePrompt

options

Options to use for the LLM client.

TYPE: LLMOptions | None DEFAULT: None

RETURNS DESCRIPTION
OutputT

Text response from LLM.

Source code in packages/ragbits-core/src/ragbits/core/llms/base.py
async def generate(
    self,
    prompt: BasePrompt,
    *,
    options: LLMOptions | None = None,
) -> OutputT:
    """
    Prepares and sends a prompt to the LLM and returns response parsed to the
    output type of the prompt (if available).

    Args:
        prompt: Formatted prompt template with conversation and optional response parsing configuration.
        options: Options to use for the LLM client.

    Returns:
        Text response from LLM.
    """
    response = await self.generate_raw(prompt, options=options)

    if isinstance(prompt, BasePromptWithParser):
        return prompt.parse_response(response)

    return cast(OutputT, response)

generate_streaming async #

generate_streaming(prompt: BasePrompt, *, options: LLMOptions | None = None) -> AsyncGenerator[str, None]

Prepares and sends a prompt to the LLM and streams the results.

PARAMETER DESCRIPTION
prompt

Formatted prompt template with conversation.

TYPE: BasePrompt

options

Options to use for the LLM client.

TYPE: LLMOptions | None DEFAULT: None

RETURNS DESCRIPTION
AsyncGenerator[str, None]

Response stream from LLM.

Source code in packages/ragbits-core/src/ragbits/core/llms/base.py
async def generate_streaming(
    self,
    prompt: BasePrompt,
    *,
    options: LLMOptions | None = None,
) -> AsyncGenerator[str, None]:
    """
    Prepares and sends a prompt to the LLM and streams the results.

    Args:
        prompt: Formatted prompt template with conversation.
        options: Options to use for the LLM client.

    Returns:
        Response stream from LLM.
    """
    options = (self.default_options | options) if options else self.default_options
    response = await self.client.call_streaming(
        conversation=self._format_chat_for_llm(prompt),
        options=options,
        json_mode=prompt.json_mode,
        output_schema=prompt.output_schema(),
    )
    async for text_piece in response:
        yield text_piece

ragbits.core.llms.local.LocalLLM #

LocalLLM(model_name: str, default_options: LocalLLMOptions | None = None, *, api_key: str | None = None)

Bases: LLM[LocalLLMOptions]

Class for interaction with any LLM available in HuggingFace.

Constructs a new local LLM instance.

PARAMETER DESCRIPTION
model_name

Name of the model to use. This should be a model from the CausalLM class.

TYPE: str

default_options

Default options for the LLM.

TYPE: LocalLLMOptions | None DEFAULT: None

api_key

The API key for Hugging Face authentication.

TYPE: str | None DEFAULT: None

RAISES DESCRIPTION
ImportError

If the 'local' extra requirements are not installed.

Source code in packages/ragbits-core/src/ragbits/core/llms/local.py
def __init__(
    self,
    model_name: str,
    default_options: LocalLLMOptions | None = None,
    *,
    api_key: str | None = None,
) -> None:
    """
    Constructs a new local LLM instance.

    Args:
        model_name: Name of the model to use. This should be a model from the CausalLM class.
        default_options: Default options for the LLM.
        api_key: The API key for Hugging Face authentication.

    Raises:
        ImportError: If the 'local' extra requirements are not installed.
    """
    if not HAS_LOCAL_LLM:
        raise ImportError("You need to install the 'local' extra requirements to use local LLM models")

    super().__init__(model_name, default_options)
    self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_key)
    self.api_key = api_key

model_name instance-attribute #

model_name = model_name

default_options instance-attribute #

default_options = default_options or _options_cls()

tokenizer instance-attribute #

tokenizer = from_pretrained(model_name, token=api_key)

api_key instance-attribute #

api_key = api_key

client cached property #

client: LocalLLMClient

Client for the LLM.

RETURNS DESCRIPTION
LocalLLMClient

The client used to interact with the LLM.

generate_raw async #

generate_raw(prompt: BasePrompt, *, options: LLMOptions | None = None) -> str

Prepares and sends a prompt to the LLM and returns the raw response (without parsing).

PARAMETER DESCRIPTION
prompt

Formatted prompt template with conversation.

TYPE: BasePrompt

options

Options to use for the LLM client.

TYPE: LLMOptions | None DEFAULT: None

RETURNS DESCRIPTION
str

Raw text response from LLM.

Source code in packages/ragbits-core/src/ragbits/core/llms/base.py
async def generate_raw(
    self,
    prompt: BasePrompt,
    *,
    options: LLMOptions | None = None,
) -> str:
    """
    Prepares and sends a prompt to the LLM and returns the raw response (without parsing).

    Args:
        prompt: Formatted prompt template with conversation.
        options: Options to use for the LLM client.

    Returns:
        Raw text response from LLM.
    """
    options = (self.default_options | options) if options else self.default_options
    response = await self.client.call(
        conversation=self._format_chat_for_llm(prompt),
        options=options,
        json_mode=prompt.json_mode,
        output_schema=prompt.output_schema(),
    )

    return response

generate async #

generate(prompt: BasePrompt, *, options: LLMOptions | None = None) -> OutputT

Prepares and sends a prompt to the LLM and returns response parsed to the output type of the prompt (if available).

PARAMETER DESCRIPTION
prompt

Formatted prompt template with conversation and optional response parsing configuration.

TYPE: BasePrompt

options

Options to use for the LLM client.

TYPE: LLMOptions | None DEFAULT: None

RETURNS DESCRIPTION
OutputT

Text response from LLM.

Source code in packages/ragbits-core/src/ragbits/core/llms/base.py
async def generate(
    self,
    prompt: BasePrompt,
    *,
    options: LLMOptions | None = None,
) -> OutputT:
    """
    Prepares and sends a prompt to the LLM and returns response parsed to the
    output type of the prompt (if available).

    Args:
        prompt: Formatted prompt template with conversation and optional response parsing configuration.
        options: Options to use for the LLM client.

    Returns:
        Text response from LLM.
    """
    response = await self.generate_raw(prompt, options=options)

    if isinstance(prompt, BasePromptWithParser):
        return prompt.parse_response(response)

    return cast(OutputT, response)

generate_streaming async #

generate_streaming(prompt: BasePrompt, *, options: LLMOptions | None = None) -> AsyncGenerator[str, None]

Prepares and sends a prompt to the LLM and streams the results.

PARAMETER DESCRIPTION
prompt

Formatted prompt template with conversation.

TYPE: BasePrompt

options

Options to use for the LLM client.

TYPE: LLMOptions | None DEFAULT: None

RETURNS DESCRIPTION
AsyncGenerator[str, None]

Response stream from LLM.

Source code in packages/ragbits-core/src/ragbits/core/llms/base.py
async def generate_streaming(
    self,
    prompt: BasePrompt,
    *,
    options: LLMOptions | None = None,
) -> AsyncGenerator[str, None]:
    """
    Prepares and sends a prompt to the LLM and streams the results.

    Args:
        prompt: Formatted prompt template with conversation.
        options: Options to use for the LLM client.

    Returns:
        Response stream from LLM.
    """
    options = (self.default_options | options) if options else self.default_options
    response = await self.client.call_streaming(
        conversation=self._format_chat_for_llm(prompt),
        options=options,
        json_mode=prompt.json_mode,
        output_schema=prompt.output_schema(),
    )
    async for text_piece in response:
        yield text_piece

count_tokens #

count_tokens(prompt: BasePrompt) -> int

Counts tokens in the messages.

PARAMETER DESCRIPTION
prompt

Messages to count tokens for.

TYPE: BasePrompt

RETURNS DESCRIPTION
int

Number of tokens in the messages.

Source code in packages/ragbits-core/src/ragbits/core/llms/local.py
def count_tokens(self, prompt: BasePrompt) -> int:
    """
    Counts tokens in the messages.

    Args:
        prompt: Messages to count tokens for.

    Returns:
        Number of tokens in the messages.
    """
    input_ids = self.tokenizer.apply_chat_template(prompt.chat)
    return len(input_ids)

ragbits.core.llms.litellm.LiteLLM #

LiteLLM(model_name: str = 'gpt-3.5-turbo', default_options: LiteLLMOptions | None = None, *, base_url: str | None = None, api_key: str | None = None, api_version: str | None = None, use_structured_output: bool = False)

Bases: LLM[LiteLLMOptions]

Class for interaction with any LLM supported by LiteLLM API.

Constructs a new LiteLLM instance.

PARAMETER DESCRIPTION
model_name

Name of the LiteLLM supported model to be used. Default is "gpt-3.5-turbo".

TYPE: str DEFAULT: 'gpt-3.5-turbo'

default_options

Default options to be used.

TYPE: LiteLLMOptions | None DEFAULT: None

base_url

Base URL of the LLM API.

TYPE: str | None DEFAULT: None

api_key

API key to be used. API key to be used. If not specified, an environment variable will be used, for more information, follow the instructions for your specific vendor in the LiteLLM documentation.

TYPE: str | None DEFAULT: None

api_version

API version to be used. If not specified, the default version will be used.

TYPE: str | None DEFAULT: None

use_structured_output

Whether to request a structured output from the model. Default is False. Can only be combined with models that support structured output.

TYPE: bool DEFAULT: False

RAISES DESCRIPTION
ImportError

If the 'litellm' extra requirements are not installed.

Source code in packages/ragbits-core/src/ragbits/core/llms/litellm.py
def __init__(
    self,
    model_name: str = "gpt-3.5-turbo",
    default_options: LiteLLMOptions | None = None,
    *,
    base_url: str | None = None,
    api_key: str | None = None,
    api_version: str | None = None,
    use_structured_output: bool = False,
) -> None:
    """
    Constructs a new LiteLLM instance.

    Args:
        model_name: Name of the [LiteLLM supported model](https://docs.litellm.ai/docs/providers) to be used.\
            Default is "gpt-3.5-turbo".
        default_options: Default options to be used.
        base_url: Base URL of the LLM API.
        api_key: API key to be used. API key to be used. If not specified, an environment variable will be used,
            for more information, follow the instructions for your specific vendor in the\
            [LiteLLM documentation](https://docs.litellm.ai/docs/providers).
        api_version: API version to be used. If not specified, the default version will be used.
        use_structured_output: Whether to request a
            [structured output](https://docs.litellm.ai/docs/completion/json_mode#pass-in-json_schema)
            from the model. Default is False. Can only be combined with models that support structured output.

    Raises:
        ImportError: If the 'litellm' extra requirements are not installed.
    """
    if not HAS_LITELLM:
        raise ImportError("You need to install the 'litellm' extra requirements to use LiteLLM models")

    super().__init__(model_name, default_options)
    self.base_url = base_url
    self.api_key = api_key
    self.api_version = api_version
    self.use_structured_output = use_structured_output

model_name instance-attribute #

model_name = model_name

default_options instance-attribute #

default_options = default_options or _options_cls()

base_url instance-attribute #

base_url = base_url

api_key instance-attribute #

api_key = api_key

api_version instance-attribute #

api_version = api_version

use_structured_output instance-attribute #

use_structured_output = use_structured_output

client cached property #

client: LiteLLMClient

Client for the LLM.

generate_raw async #

generate_raw(prompt: BasePrompt, *, options: LLMOptions | None = None) -> str

Prepares and sends a prompt to the LLM and returns the raw response (without parsing).

PARAMETER DESCRIPTION
prompt

Formatted prompt template with conversation.

TYPE: BasePrompt

options

Options to use for the LLM client.

TYPE: LLMOptions | None DEFAULT: None

RETURNS DESCRIPTION
str

Raw text response from LLM.

Source code in packages/ragbits-core/src/ragbits/core/llms/base.py
async def generate_raw(
    self,
    prompt: BasePrompt,
    *,
    options: LLMOptions | None = None,
) -> str:
    """
    Prepares and sends a prompt to the LLM and returns the raw response (without parsing).

    Args:
        prompt: Formatted prompt template with conversation.
        options: Options to use for the LLM client.

    Returns:
        Raw text response from LLM.
    """
    options = (self.default_options | options) if options else self.default_options
    response = await self.client.call(
        conversation=self._format_chat_for_llm(prompt),
        options=options,
        json_mode=prompt.json_mode,
        output_schema=prompt.output_schema(),
    )

    return response

generate async #

generate(prompt: BasePrompt, *, options: LLMOptions | None = None) -> OutputT

Prepares and sends a prompt to the LLM and returns response parsed to the output type of the prompt (if available).

PARAMETER DESCRIPTION
prompt

Formatted prompt template with conversation and optional response parsing configuration.

TYPE: BasePrompt

options

Options to use for the LLM client.

TYPE: LLMOptions | None DEFAULT: None

RETURNS DESCRIPTION
OutputT

Text response from LLM.

Source code in packages/ragbits-core/src/ragbits/core/llms/base.py
async def generate(
    self,
    prompt: BasePrompt,
    *,
    options: LLMOptions | None = None,
) -> OutputT:
    """
    Prepares and sends a prompt to the LLM and returns response parsed to the
    output type of the prompt (if available).

    Args:
        prompt: Formatted prompt template with conversation and optional response parsing configuration.
        options: Options to use for the LLM client.

    Returns:
        Text response from LLM.
    """
    response = await self.generate_raw(prompt, options=options)

    if isinstance(prompt, BasePromptWithParser):
        return prompt.parse_response(response)

    return cast(OutputT, response)

generate_streaming async #

generate_streaming(prompt: BasePrompt, *, options: LLMOptions | None = None) -> AsyncGenerator[str, None]

Prepares and sends a prompt to the LLM and streams the results.

PARAMETER DESCRIPTION
prompt

Formatted prompt template with conversation.

TYPE: BasePrompt

options

Options to use for the LLM client.

TYPE: LLMOptions | None DEFAULT: None

RETURNS DESCRIPTION
AsyncGenerator[str, None]

Response stream from LLM.

Source code in packages/ragbits-core/src/ragbits/core/llms/base.py
async def generate_streaming(
    self,
    prompt: BasePrompt,
    *,
    options: LLMOptions | None = None,
) -> AsyncGenerator[str, None]:
    """
    Prepares and sends a prompt to the LLM and streams the results.

    Args:
        prompt: Formatted prompt template with conversation.
        options: Options to use for the LLM client.

    Returns:
        Response stream from LLM.
    """
    options = (self.default_options | options) if options else self.default_options
    response = await self.client.call_streaming(
        conversation=self._format_chat_for_llm(prompt),
        options=options,
        json_mode=prompt.json_mode,
        output_schema=prompt.output_schema(),
    )
    async for text_piece in response:
        yield text_piece

count_tokens #

count_tokens(prompt: BasePrompt) -> int

Counts tokens in the prompt.

PARAMETER DESCRIPTION
prompt

Formatted prompt template with conversation and response parsing configuration.

TYPE: BasePrompt

RETURNS DESCRIPTION
int

Number of tokens in the prompt.

Source code in packages/ragbits-core/src/ragbits/core/llms/litellm.py
def count_tokens(self, prompt: BasePrompt) -> int:
    """
    Counts tokens in the prompt.

    Args:
        prompt: Formatted prompt template with conversation and response parsing configuration.

    Returns:
        Number of tokens in the prompt.
    """
    return sum(litellm.token_counter(model=self.model_name, text=message["content"]) for message in prompt.chat)