Skip to content

TokenUsage dataclass

The tokens used in a message API inference call.

Source code in src/aws_sdk_bedrock_runtime/models.py
10097
10098
10099
10100
10101
10102
10103
10104
10105
10106
10107
10108
10109
10110
10111
10112
10113
10114
10115
10116
10117
10118
10119
10120
10121
10122
10123
10124
10125
10126
10127
10128
10129
10130
10131
10132
10133
10134
10135
10136
10137
10138
10139
10140
10141
10142
10143
10144
10145
10146
10147
10148
10149
10150
10151
10152
10153
10154
10155
10156
10157
10158
10159
10160
10161
10162
10163
10164
10165
10166
10167
10168
10169
10170
10171
10172
10173
10174
10175
10176
10177
10178
10179
10180
@dataclass(kw_only=True)
class TokenUsage:
    """The tokens used in a message API inference call."""

    input_tokens: int
    """The number of tokens sent in the request to the model."""

    output_tokens: int
    """The number of tokens that the model generated for the request."""

    total_tokens: int
    """The total of input tokens and tokens generated by the model."""

    cache_read_input_tokens: int | None = None
    """The number of input tokens read from the cache for the request."""

    cache_write_input_tokens: int | None = None
    """The number of input tokens written to the cache for the request."""

    def serialize(self, serializer: ShapeSerializer):
        serializer.write_struct(_SCHEMA_TOKEN_USAGE, self)

    def serialize_members(self, serializer: ShapeSerializer):
        serializer.write_integer(
            _SCHEMA_TOKEN_USAGE.members["inputTokens"], self.input_tokens
        )
        serializer.write_integer(
            _SCHEMA_TOKEN_USAGE.members["outputTokens"], self.output_tokens
        )
        serializer.write_integer(
            _SCHEMA_TOKEN_USAGE.members["totalTokens"], self.total_tokens
        )
        if self.cache_read_input_tokens is not None:
            serializer.write_integer(
                _SCHEMA_TOKEN_USAGE.members["cacheReadInputTokens"],
                self.cache_read_input_tokens,
            )

        if self.cache_write_input_tokens is not None:
            serializer.write_integer(
                _SCHEMA_TOKEN_USAGE.members["cacheWriteInputTokens"],
                self.cache_write_input_tokens,
            )

    @classmethod
    def deserialize(cls, deserializer: ShapeDeserializer) -> Self:
        return cls(**cls.deserialize_kwargs(deserializer))

    @classmethod
    def deserialize_kwargs(cls, deserializer: ShapeDeserializer) -> dict[str, Any]:
        kwargs: dict[str, Any] = {}

        def _consumer(schema: Schema, de: ShapeDeserializer) -> None:
            match schema.expect_member_index():
                case 0:
                    kwargs["input_tokens"] = de.read_integer(
                        _SCHEMA_TOKEN_USAGE.members["inputTokens"]
                    )

                case 1:
                    kwargs["output_tokens"] = de.read_integer(
                        _SCHEMA_TOKEN_USAGE.members["outputTokens"]
                    )

                case 2:
                    kwargs["total_tokens"] = de.read_integer(
                        _SCHEMA_TOKEN_USAGE.members["totalTokens"]
                    )

                case 3:
                    kwargs["cache_read_input_tokens"] = de.read_integer(
                        _SCHEMA_TOKEN_USAGE.members["cacheReadInputTokens"]
                    )

                case 4:
                    kwargs["cache_write_input_tokens"] = de.read_integer(
                        _SCHEMA_TOKEN_USAGE.members["cacheWriteInputTokens"]
                    )

                case _:
                    logger.debug("Unexpected member schema: %s", schema)

        deserializer.read_struct(_SCHEMA_TOKEN_USAGE, consumer=_consumer)
        return kwargs

Attributes

cache_read_input_tokens class-attribute instance-attribute

cache_read_input_tokens: int | None = None

The number of input tokens read from the cache for the request.

cache_write_input_tokens class-attribute instance-attribute

cache_write_input_tokens: int | None = None

The number of input tokens written to the cache for the request.

input_tokens instance-attribute

input_tokens: int

The number of tokens sent in the request to the model.

output_tokens instance-attribute

output_tokens: int

The number of tokens that the model generated for the request.

total_tokens instance-attribute

total_tokens: int

The total of input tokens and tokens generated by the model.