Source code for zarr.core.dtype.wrapper

"""
Wrapper for native array data types.

The ``ZDType`` class is an abstract base class for wrapping native array data types, e.g. NumPy dtypes.
``ZDType`` provides a common interface for working with data types in a way that is independent of the
underlying data type system.

The wrapper class encapsulates a native data type. Instances of the class can be created from a
native data type instance, and a native data type instance can be created from an instance of the
wrapper class.

The wrapper class is responsible for:
- Serializing and deserializing a native data type to Zarr V2 or Zarr V3 metadata.
  This ensures that the data type can be properly stored and retrieved from array metadata.
- Serializing and deserializing scalar values to Zarr V2 or Zarr V3 metadata. This is important for
  storing a fill value for an array in a manner that is valid for the data type.

You can add support for a new data type in Zarr by subclassing ``ZDType`` wrapper class and adapt its methods
to support your native data type. The wrapper class must be added to a data type registry
(defined elsewhere) before array creation routines or array reading routines can use your new data
type.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import (
    TYPE_CHECKING,
    ClassVar,
    Generic,
    Literal,
    Self,
    TypeGuard,
    TypeVar,
    overload,
)

import numpy as np

if TYPE_CHECKING:
    from zarr.core.common import JSON, ZarrFormat
    from zarr.core.dtype.common import DTypeJSON, DTypeSpec_V2, DTypeSpec_V3

# This the upper bound for the scalar types we support. It's numpy scalars + str,
# because the new variable-length string dtype in numpy does not have a corresponding scalar type
TBaseScalar = np.generic | str | bytes
# This is the bound for the dtypes that we support. If we support non-numpy dtypes,
# then this bound will need to be widened.
TBaseDType = np.dtype[np.generic]

# These two type parameters are covariant because we want
# x : ZDType[BaseDType, BaseScalar] = ZDType[SubDType, SubScalar]
# to type check
TScalar_co = TypeVar("TScalar_co", bound=TBaseScalar, covariant=True)
TDType_co = TypeVar("TDType_co", bound=TBaseDType, covariant=True)


[docs] @dataclass(frozen=True, kw_only=True, slots=True) class ZDType(ABC, Generic[TDType_co, TScalar_co]): """ Abstract base class for wrapping native array data types, e.g. numpy dtypes Attributes ---------- dtype_cls : ClassVar[type[TDType]] The wrapped dtype class. This is a class variable. _zarr_v3_name : ClassVar[str] The name given to the data type by a Zarr v3 data type specification. This is a class variable, and it should generally be unique across different data types. """ # this class will create a native data type # mypy currently disallows class variables to contain type parameters # but it seems OK for us to use it here: # https://github.com/python/typing/discussions/1424#discussioncomment-7989934 dtype_cls: ClassVar[type[TDType_co]] # type: ignore[misc] _zarr_v3_name: ClassVar[str] @classmethod def _check_native_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[TDType_co]: """ Check that a native data type matches the dtype_cls class attribute. Used as a type guard. Parameters ---------- dtype : TDType The dtype to check. Returns ------- Bool True if the dtype matches, False otherwise. """ return type(dtype) is cls.dtype_cls
[docs] @classmethod @abstractmethod def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self: """ Create a ZDType instance from a native data type. This method is used when taking a user-provided native data type, like a NumPy data type, and creating the corresponding ZDType instance from them. Parameters ---------- dtype : TDType The native data type object to wrap. Returns ------- Self The ZDType that wraps the native data type. Raises ------ TypeError If the native data type is not consistent with the wrapped data type. """ raise NotImplementedError # pragma: no cover
[docs] @abstractmethod def to_native_dtype(self: Self) -> TDType_co: """ Return an instance of the wrapped data type. This operation inverts ``from_native_dtype``. Returns ------- TDType The native data type wrapped by this ZDType. """ raise NotImplementedError # pragma: no cover
@classmethod @abstractmethod def _from_json_v2(cls: type[Self], data: DTypeJSON) -> Self: raise NotImplementedError # pragma: no cover @classmethod @abstractmethod def _from_json_v3(cls: type[Self], data: DTypeJSON) -> Self: raise NotImplementedError # pragma: no cover
[docs] @classmethod def from_json(cls: type[Self], data: DTypeJSON, *, zarr_format: ZarrFormat) -> Self: """ Create an instance of this ZDType from JSON data. Parameters ---------- data : DTypeJSON The JSON representation of the data type. zarr_format : ZarrFormat The zarr format version. Returns ------- Self An instance of this data type. """ if zarr_format == 2: return cls._from_json_v2(data) if zarr_format == 3: return cls._from_json_v3(data) raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover
@overload def to_json(self, zarr_format: Literal[2]) -> DTypeSpec_V2: ... @overload def to_json(self, zarr_format: Literal[3]) -> DTypeSpec_V3: ...
[docs] @abstractmethod def to_json(self, zarr_format: ZarrFormat) -> DTypeSpec_V2 | DTypeSpec_V3: """ Serialize this ZDType to JSON. Parameters ---------- zarr_format : ZarrFormat The zarr format version. Returns ------- DTypeJSON_V2 | DTypeJSON_V3 The JSON-serializable representation of the wrapped data type """ raise NotImplementedError # pragma: no cover
@abstractmethod def _check_scalar(self, data: object) -> bool: """ Check that an python object is a valid scalar value for the wrapped data type. Parameters ---------- data : object A value to check. Returns ------- Bool True if the object is valid, False otherwise. """ raise NotImplementedError # pragma: no cover
[docs] @abstractmethod def cast_scalar(self, data: object) -> TScalar_co: """ Cast a python object to the wrapped scalar type. The type of the provided scalar is first checked for compatibility. If it's incompatible with the associated scalar type, a ``TypeError`` will be raised. Parameters ---------- data : object The python object to cast. Returns ------- TScalar The cast value. """ raise NotImplementedError # pragma: no cover
[docs] @abstractmethod def default_scalar(self) -> TScalar_co: """ Get the default scalar value for the wrapped data type. This is a method, rather than an attribute, because the default value for some data types depends on parameters that are not known until a concrete data type is wrapped. For example, data types parametrized by a length like fixed-length strings or bytes will generate scalars consistent with that length. Returns ------- TScalar The default value for this data type. """ raise NotImplementedError # pragma: no cover
[docs] @abstractmethod def from_json_scalar(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar_co: """ Read a JSON-serializable value as a scalar. Parameters ---------- data : JSON A JSON representation of a scalar value. zarr_format : ZarrFormat The zarr format version. This is specified because the JSON serialization of scalars differs between Zarr V2 and Zarr V3. Returns ------- TScalar The deserialized scalar value. """ raise NotImplementedError # pragma: no cover
[docs] @abstractmethod def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> JSON: """ Serialize a python object to the JSON representation of a scalar. The value will first be cast to the scalar type associated with this ZDType, then serialized to JSON. Parameters ---------- data : object The value to convert. zarr_format : ZarrFormat The zarr format version. This is specified because the JSON serialization of scalars differs between Zarr V2 and Zarr V3. Returns ------- JSON The JSON-serialized scalar. """ raise NotImplementedError # pragma: no cover
def scalar_failed_type_check_msg( cls_instance: ZDType[TBaseDType, TBaseScalar], bad_scalar: object ) -> str: """ Generate an error message reporting that a particular value failed a type check when attempting to cast that value to a scalar. """ return ( f"The value {bad_scalar!r} failed a type check. " f"It cannot be safely cast to a scalar compatible with {cls_instance}. " f"Consult the documentation for {cls_instance} to determine the possible values that can " "be cast to scalars of the wrapped data type." )