"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT."""
# @generated-id: 36f204c64074

from __future__ import annotations
from .documenturlchunk import DocumentURLChunk, DocumentURLChunkTypedDict
from .filechunk import FileChunk, FileChunkTypedDict
from .imageurlchunk import ImageURLChunk, ImageURLChunkTypedDict
from .responseformat import ResponseFormat, ResponseFormatTypedDict
from mistralai.client.types import (
    BaseModel,
    Nullable,
    OptionalNullable,
    UNSET,
    UNSET_SENTINEL,
)
from pydantic import model_serializer
from typing import List, Literal, Optional, Union
from typing_extensions import NotRequired, TypeAliasType, TypedDict


DocumentUnionTypedDict = TypeAliasType(
    "DocumentUnionTypedDict",
    Union[FileChunkTypedDict, ImageURLChunkTypedDict, DocumentURLChunkTypedDict],
)
r"""Document to run OCR on"""


DocumentUnion = TypeAliasType(
    "DocumentUnion", Union[FileChunk, ImageURLChunk, DocumentURLChunk]
)
r"""Document to run OCR on"""


PagesTypedDict = TypeAliasType("PagesTypedDict", Union[str, List[int]])
r"""Specific pages to process. Accepts a list of integers or a string of comma-separated numbers and ranges (e.g. '0,1,2' or '0-5' or '0,2-4'). Page numbers start from 0."""


Pages = TypeAliasType("Pages", Union[str, List[int]])
r"""Specific pages to process. Accepts a list of integers or a string of comma-separated numbers and ranges (e.g. '0,1,2' or '0-5' or '0,2-4'). Page numbers start from 0."""


TableFormat = Literal[
    "markdown",
    "html",
]


ConfidenceScoresGranularity = Literal[
    "word",
    "page",
]


class OCRRequestTypedDict(TypedDict):
    model: Nullable[str]
    document: DocumentUnionTypedDict
    r"""Document to run OCR on"""
    id: NotRequired[str]
    pages: NotRequired[Nullable[PagesTypedDict]]
    r"""Specific pages to process. Accepts a list of integers or a string of comma-separated numbers and ranges (e.g. '0,1,2' or '0-5' or '0,2-4'). Page numbers start from 0."""
    include_image_base64: NotRequired[Nullable[bool]]
    r"""Include image URLs in response"""
    image_limit: NotRequired[Nullable[int]]
    r"""Max images to extract"""
    image_min_size: NotRequired[Nullable[int]]
    r"""Minimum height and width of image to extract"""
    bbox_annotation_format: NotRequired[Nullable[ResponseFormatTypedDict]]
    r"""Structured output class for extracting useful information from each extracted bounding box / image from document. Only json_schema is valid for this field"""
    document_annotation_format: NotRequired[Nullable[ResponseFormatTypedDict]]
    r"""Structured output class for extracting useful information from the entire document. Only json_schema is valid for this field"""
    document_annotation_prompt: NotRequired[Nullable[str]]
    r"""Optional prompt to guide the model in extracting structured output from the entire document. A document_annotation_format must be provided."""
    table_format: NotRequired[Nullable[TableFormat]]
    extract_header: NotRequired[bool]
    extract_footer: NotRequired[bool]
    confidence_scores_granularity: NotRequired[Nullable[ConfidenceScoresGranularity]]
    r"""Granularity for confidence scores: 'word' (per-word scores) or 'page' (aggregate only). Defaults to None (no confidence scores) to keep response payload small."""


class OCRRequest(BaseModel):
    model: Nullable[str]

    document: DocumentUnion
    r"""Document to run OCR on"""

    id: Optional[str] = None

    pages: OptionalNullable[Pages] = UNSET
    r"""Specific pages to process. Accepts a list of integers or a string of comma-separated numbers and ranges (e.g. '0,1,2' or '0-5' or '0,2-4'). Page numbers start from 0."""

    include_image_base64: OptionalNullable[bool] = UNSET
    r"""Include image URLs in response"""

    image_limit: OptionalNullable[int] = UNSET
    r"""Max images to extract"""

    image_min_size: OptionalNullable[int] = UNSET
    r"""Minimum height and width of image to extract"""

    bbox_annotation_format: OptionalNullable[ResponseFormat] = UNSET
    r"""Structured output class for extracting useful information from each extracted bounding box / image from document. Only json_schema is valid for this field"""

    document_annotation_format: OptionalNullable[ResponseFormat] = UNSET
    r"""Structured output class for extracting useful information from the entire document. Only json_schema is valid for this field"""

    document_annotation_prompt: OptionalNullable[str] = UNSET
    r"""Optional prompt to guide the model in extracting structured output from the entire document. A document_annotation_format must be provided."""

    table_format: OptionalNullable[TableFormat] = UNSET

    extract_header: Optional[bool] = None

    extract_footer: Optional[bool] = None

    confidence_scores_granularity: OptionalNullable[ConfidenceScoresGranularity] = UNSET
    r"""Granularity for confidence scores: 'word' (per-word scores) or 'page' (aggregate only). Defaults to None (no confidence scores) to keep response payload small."""

    @model_serializer(mode="wrap")
    def serialize_model(self, handler):
        optional_fields = set(
            [
                "id",
                "pages",
                "include_image_base64",
                "image_limit",
                "image_min_size",
                "bbox_annotation_format",
                "document_annotation_format",
                "document_annotation_prompt",
                "table_format",
                "extract_header",
                "extract_footer",
                "confidence_scores_granularity",
            ]
        )
        nullable_fields = set(
            [
                "model",
                "pages",
                "include_image_base64",
                "image_limit",
                "image_min_size",
                "bbox_annotation_format",
                "document_annotation_format",
                "document_annotation_prompt",
                "table_format",
                "confidence_scores_granularity",
            ]
        )
        serialized = handler(self)
        m = {}

        for n, f in type(self).model_fields.items():
            k = f.alias or n
            val = serialized.get(k, serialized.get(n))
            is_nullable_and_explicitly_set = (
                k in nullable_fields
                and (self.__pydantic_fields_set__.intersection({n}))  # pylint: disable=no-member
            )

            if val != UNSET_SENTINEL:
                if (
                    val is not None
                    or k not in optional_fields
                    or is_nullable_and_explicitly_set
                ):
                    m[k] = val

        return m
