from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import (
TYPE_CHECKING,
ClassVar,
Literal,
Self,
TypedDict,
TypeGuard,
TypeVar,
cast,
get_args,
overload,
)
import numpy as np
from typing_extensions import ReadOnly
from zarr.core.common import NamedConfig
from zarr.core.dtype.common import (
DataTypeValidationError,
DTypeConfig_V2,
DTypeJSON,
HasEndianness,
HasItemSize,
check_dtype_spec_v2,
)
from zarr.core.dtype.npy.common import (
DATETIME_UNIT,
DateTimeUnit,
check_json_int,
endianness_to_numpy_str,
get_endianness_from_numpy_dtype,
)
from zarr.core.dtype.wrapper import TBaseDType, ZDType
if TYPE_CHECKING:
from zarr.core.common import JSON, ZarrFormat
TimeDeltaLike = str | int | bytes | np.timedelta64 | timedelta | None
DateTimeLike = str | int | bytes | np.datetime64 | datetime | None
def datetime_from_int(data: int, *, unit: DateTimeUnit, scale_factor: int) -> np.datetime64:
"""
Convert an integer to a datetime64.
Parameters
----------
data : int
The integer to convert.
unit : DateTimeUnit
The unit of the datetime64.
scale_factor : int
The scale factor of the datetime64.
Returns
-------
numpy.datetime64
The datetime64 value.
"""
dtype_name = f"datetime64[{scale_factor}{unit}]"
return cast("np.datetime64", np.int64(data).view(dtype_name))
def datetimelike_to_int(data: np.datetime64 | np.timedelta64) -> int:
"""
Convert a datetime64 or a timedelta64 to an integer.
Parameters
----------
data : np.datetime64 | numpy.timedelta64
The value to convert.
Returns
-------
int
An integer representation of the scalar.
"""
return data.view(np.int64).item()
def check_json_time(data: JSON) -> TypeGuard[Literal["NaT"] | int]:
"""
Type guard to check if the input JSON data is the literal string "NaT"
or an integer.
"""
return check_json_int(data) or data == "NaT"
BaseTimeDType_co = TypeVar(
"BaseTimeDType_co",
bound=np.dtypes.TimeDelta64DType | np.dtypes.DateTime64DType,
covariant=True,
)
BaseTimeScalar_co = TypeVar(
"BaseTimeScalar_co", bound=np.timedelta64 | np.datetime64, covariant=True
)
class TimeConfig(TypedDict):
"""
The configuration for the numpy.timedelta64 or numpy.datetime64 data type in Zarr V3.
Attributes
----------
unit : ReadOnly[DateTimeUnit]
A string encoding a unit of time.
scale_factor : ReadOnly[int]
A scale factor.
Examples
--------
.. code-block:: python
{"unit": "ms", "scale_factor": 1}
"""
unit: ReadOnly[DateTimeUnit]
scale_factor: ReadOnly[int]
[docs]
class DateTime64JSON_V3(NamedConfig[Literal["numpy.datetime64"], TimeConfig]):
"""
The JSON representation of the ``numpy.datetime64`` data type in Zarr V3.
References
----------
This representation is defined in the ``numpy.datetime64``
`specification document <https://zarr-specs.readthedocs.io/en/latest/spec/v3/datatypes.html#numpy-datetime64>`__.
Examples
--------
.. code-block:: python
{
"name": "numpy.datetime64",
"configuration": {
"unit": "ms",
"scale_factor": 1
}
}
"""
[docs]
class TimeDelta64JSON_V3(NamedConfig[Literal["numpy.timedelta64"], TimeConfig]):
"""
The JSON representation of the ``TimeDelta64`` data type in Zarr V3.
References
----------
This representation is defined in the numpy.timedelta64
`specification document <https://zarr-specs.readthedocs.io/en/latest/spec/v3/datatypes.html#numpy-timedelta64>`__.
Examples
--------
.. code-block:: python
{
"name": "numpy.timedelta64",
"configuration": {
"unit": "ms",
"scale_factor": 1
}
}
"""
[docs]
class TimeDelta64JSON_V2(DTypeConfig_V2[str, None]):
"""
A wrapper around the JSON representation of the ``TimeDelta64`` data type in Zarr V2.
The ``name`` field of this class contains the value that would appear under the
``dtype`` field in Zarr V2 array metadata.
References
----------
The structure of the ``name`` field is defined in the Zarr V2
`specification document <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding>`__.
Examples
--------
.. code-block:: python
{
"name": "<m8[1s]",
"object_codec_id": None
}
"""
[docs]
class DateTime64JSON_V2(DTypeConfig_V2[str, None]):
"""
A wrapper around the JSON representation of the ``DateTime64`` data type in Zarr V2.
The ``name`` field of this class contains the value that would appear under the
``dtype`` field in Zarr V2 array metadata.
References
----------
The structure of the ``name`` field is defined in the Zarr V2
`specification document <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding>`__.
Examples
--------
.. code-block:: python
{
"name": "<M8[10s]",
"object_codec_id": None
}
"""
@dataclass(frozen=True, kw_only=True, slots=True)
class TimeDTypeBase(ZDType[BaseTimeDType_co, BaseTimeScalar_co], HasEndianness, HasItemSize):
"""
A base class for data types that represent time via the NumPy TimeDelta64 and DateTime64 data
types.
Attributes
----------
scale_factor : int
The scale factor for the time unit.
unit : str
The unit of time.
"""
_numpy_name: ClassVar[Literal["datetime64", "timedelta64"]]
scale_factor: int
unit: DateTimeUnit
def __post_init__(self) -> None:
if self.scale_factor < 1:
raise ValueError(f"scale_factor must be > 0, got {self.scale_factor}.")
if self.scale_factor >= 2**31:
raise ValueError(f"scale_factor must be < 2147483648, got {self.scale_factor}.")
if self.unit not in get_args(DateTimeUnit):
raise ValueError(f"unit must be one of {get_args(DateTimeUnit)}, got {self.unit!r}.")
@classmethod
def from_native_dtype(cls, dtype: TBaseDType) -> Self:
"""
Create an instance of this class from a native NumPy data type.
Parameters
----------
dtype : TBaseDType
The native NumPy dtype to convert.
Returns
-------
Self
An instance of this data type.
Raises
------
DataTypeValidationError
If the dtype is not a valid representation of this class.
"""
if cls._check_native_dtype(dtype):
unit, scale_factor = np.datetime_data(dtype.name)
unit = cast("DateTimeUnit", unit)
return cls(
unit=unit,
scale_factor=scale_factor,
endianness=get_endianness_from_numpy_dtype(dtype),
)
raise DataTypeValidationError(
f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
)
def to_native_dtype(self) -> BaseTimeDType_co:
# Numpy does not allow creating datetime64 or timedelta64 via
# np.dtypes.{dtype_name}()
# so we use np.dtype with a formatted string.
"""
Convert this data type to a NumPy temporal data type with the appropriate
unit and scale factor.
Returns
-------
BaseTimeDType_co
A NumPy data type object representing the time data type with
the specified unit, scale factor, and byte order.
"""
dtype_string = f"{self._numpy_name}[{self.scale_factor}{self.unit}]"
return np.dtype(dtype_string).newbyteorder(endianness_to_numpy_str(self.endianness)) # type: ignore[return-value]
def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> int:
"""
Convert a python object to a JSON representation of a datetime64 or timedelta64 scalar.
Parameters
----------
data : object
The python object to convert.
zarr_format : ZarrFormat
The Zarr format version (2 or 3).
Returns
-------
int
The JSON representation of the scalar.
"""
return datetimelike_to_int(data) # type: ignore[arg-type]
@property
def item_size(self) -> int:
"""
The size of a single scalar in bytes.
Returns
-------
int
The size of a single scalar in bytes.
"""
return 8
[docs]
@dataclass(frozen=True, kw_only=True, slots=True)
class TimeDelta64(TimeDTypeBase[np.dtypes.TimeDelta64DType, np.timedelta64], HasEndianness):
"""
A Zarr data type for arrays containing NumPy TimeDelta64 data.
Wraps the ``np.dtypesTimeDelta64DType`` data type. Scalars for this data type
are instances of `np.timedelta64`.
Attributes
----------
dtype_cls : Type[np.dtypesTimeDelta64DType]
The NumPy dtype class for this data type.
scale_factor : int
The scale factor for this data type.
unit : DateTimeUnit
The unit for this data type.
References
----------
The Zarr V2 representation of this data type is defined in the Zarr V2
`specification document <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding>`__.
The Zarr V3 representation of this data type is defined in the ``numpy.timedelta64``
`specification document <https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/numpy.timedelta64>`__
"""
# mypy infers the type of np.dtypes.TimeDelta64DType to be
# "Callable[[Literal['Y', 'M', 'W', 'D'] | Literal['h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs', 'as']], Never]"
dtype_cls = np.dtypes.TimeDelta64DType # type: ignore[assignment]
unit: DateTimeUnit = "generic"
scale_factor: int = 1
_zarr_v3_name: ClassVar[Literal["numpy.timedelta64"]] = "numpy.timedelta64"
_zarr_v2_names: ClassVar[tuple[Literal[">m8"], Literal["<m8"]]] = (">m8", "<m8")
_numpy_name: ClassVar[Literal["timedelta64"]] = "timedelta64"
@classmethod
def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[TimeDelta64JSON_V2]:
"""
Validate that the provided JSON input accurately represents a NumPy timedelta64 data type,
which could be in the form of strings like "<m8" or ">m8[10s]". This method serves as a type
guard, helping to refine the type of unknown JSON input by confirming its adherence to the
expected format for NumPy timedelta64 data types.
The JSON input should contain a "name" key with a value that matches the expected string
pattern for NumPy timedelta64 data types. The pattern includes an optional unit enclosed
within square brackets, following the base type identifier.
Returns
-------
bool
True if the JSON input is a valid representation of this class,
otherwise False.
"""
if not check_dtype_spec_v2(data):
return False
name = data["name"]
# match <m[ns], >m[M], etc
# consider making this a standalone function
if not isinstance(name, str):
return False
if not name.startswith(cls._zarr_v2_names):
return False
if len(name) == 3:
# no unit, and
# we already checked that this string is either <m8 or >m8
return True
else:
return name[4:-1].endswith(DATETIME_UNIT) and name[-1] == "]"
@classmethod
def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[DateTime64JSON_V3]:
"""
Check that the input is a valid JSON representation of this class in Zarr V3.
Returns
-------
TypeGuard[DateTime64JSON_V3]
True if the JSON input is a valid representation of this class,
otherwise False.
"""
return (
isinstance(data, dict)
and set(data.keys()) == {"name", "configuration"}
and data["name"] == cls._zarr_v3_name
and isinstance(data["configuration"], dict)
and set(data["configuration"].keys()) == {"unit", "scale_factor"}
)
@classmethod
def _from_json_v2(cls, data: DTypeJSON) -> Self:
"""
Create a TimeDelta64 from a Zarr V2-flavored JSON.
Parameters
----------
data : DTypeJSON
The JSON data.
Returns
-------
TimeDelta64
An instance of TimeDelta64.
Raises
------
DataTypeValidationError
If the input JSON is not a valid representation of this class.
"""
if cls._check_json_v2(data):
name = data["name"]
return cls.from_native_dtype(np.dtype(name))
msg = (
f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string "
f"representation of an instance of {cls.dtype_cls}"
)
raise DataTypeValidationError(msg)
@classmethod
def _from_json_v3(cls, data: DTypeJSON) -> Self:
"""
Create a TimeDelta64 from a Zarr V3-flavored JSON.
The JSON representation of a TimeDelta64 in Zarr V3 is a dict with a 'name' key
with the value 'numpy.timedelta64', and a 'configuration' key with a value of a dict
with a 'unit' key and a 'scale_factor' key.
For example:
.. code-block:: json
{
"name": "numpy.timedelta64",
"configuration": {
"unit": "generic",
"scale_factor": 1
}
}
"""
if cls._check_json_v3(data):
unit = data["configuration"]["unit"]
scale_factor = data["configuration"]["scale_factor"]
return cls(unit=unit, scale_factor=scale_factor)
msg = (
f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a dict "
f"with a 'name' key with the value 'numpy.timedelta64', "
"and a 'configuration' key with a value of a dict with a 'unit' key and a "
"'scale_factor' key"
)
raise DataTypeValidationError(msg)
@overload
def to_json(self, zarr_format: Literal[2]) -> TimeDelta64JSON_V2: ...
@overload
def to_json(self, zarr_format: Literal[3]) -> TimeDelta64JSON_V3: ...
[docs]
def to_json(self, zarr_format: ZarrFormat) -> TimeDelta64JSON_V2 | TimeDelta64JSON_V3:
"""
Serialize this data type to JSON.
Parameters
----------
zarr_format : ZarrFormat
The Zarr format version (2 or 3).
Returns
-------
TimeDelta64JSON_V2 | TimeDelta64JSON_V3
The JSON representation of the data type.
Raises
------
ValueError
If the zarr_format is not 2 or 3.
"""
if zarr_format == 2:
name = self.to_native_dtype().str
return {"name": name, "object_codec_id": None}
elif zarr_format == 3:
return {
"name": self._zarr_v3_name,
"configuration": {"unit": self.unit, "scale_factor": self.scale_factor},
}
raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover
def _check_scalar(self, data: object) -> TypeGuard[TimeDeltaLike]:
"""
Check if the input is a scalar of this data type.
Parameters
----------
data : object
The object to check.
Returns
-------
TypeGuard[TimeDeltaLike]
True if the input is a scalar of this data type, False otherwise.
"""
if data is None:
return True
return isinstance(data, str | int | bytes | np.timedelta64 | timedelta)
def _cast_scalar_unchecked(self, data: TimeDeltaLike) -> np.timedelta64:
"""
Cast the provided scalar input to a numpy timedelta64 without any type checking.
This method assumes that the input data is already a valid scalar of this data type,
and does not perform any validation or type checks. It directly casts the input
to a numpy timedelta64 scalar using the unit and scale factor defined in the class.
Parameters
----------
data : TimeDeltaLike
The scalar input data to cast.
Returns
-------
numpy.timedelta64
The input data cast as a numpy timedelta64 scalar.
"""
return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")
[docs]
def cast_scalar(self, data: object) -> np.timedelta64:
"""
Cast the input to a numpy timedelta64 scalar. If the input is not a scalar of this data type,
raise a TypeError.
"""
if self._check_scalar(data):
return self._cast_scalar_unchecked(data)
msg = (
f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the "
f"data type {self}."
)
raise TypeError(msg)
[docs]
def default_scalar(self) -> np.timedelta64:
"""
Return a default scalar of this data type.
This method provides a default value for the timedelta64 scalar, which is
a 'Not-a-Time' (NaT) value.
"""
return np.timedelta64("NaT")
[docs]
def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelta64:
"""
Create a scalar of this data type from JSON input.
Parameters
----------
data : JSON
The JSON representation of the scalar value.
zarr_format : int
The zarr format to use for the JSON representation.
Returns
-------
numpy.timedelta64
The scalar value of this data type.
Raises
------
TypeError
If the input JSON is not a valid representation of a scalar for this data type.
"""
if check_json_time(data):
return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")
raise TypeError(f"Invalid type: {data}. Expected an integer.") # pragma: no cover
[docs]
@dataclass(frozen=True, kw_only=True, slots=True)
class DateTime64(TimeDTypeBase[np.dtypes.DateTime64DType, np.datetime64], HasEndianness):
"""
A Zarr data type for arrays containing NumPy Datetime64 data.
Wraps the ``np.dtypes.TimeDelta64DType`` data type. Scalars for this data type
are instances of ``np.datetime64``.
Attributes
----------
dtype_cls : Type[np.dtypesTimeDelta64DType]
The numpy dtype class for this data type.
unit : DateTimeUnit
The unit of time for this data type.
scale_factor : int
The scale factor for the time unit.
References
----------
The Zarr V2 representation of this data type is defined in the Zarr V2
`specification document <https://github.com/zarr-developers/zarr-specs/blob/main/docs/v2/v2.0.rst#data-type-encoding>`__.
The Zarr V3 representation of this data type is defined in the ``numpy.datetime64``
`specification document <https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/numpy.datetime64>`__
"""
dtype_cls = np.dtypes.DateTime64DType # type: ignore[assignment]
_zarr_v3_name: ClassVar[Literal["numpy.datetime64"]] = "numpy.datetime64"
_zarr_v2_names: ClassVar[tuple[Literal[">M8"], Literal["<M8"]]] = (">M8", "<M8")
_numpy_name: ClassVar[Literal["datetime64"]] = "datetime64"
unit: DateTimeUnit = "generic"
scale_factor: int = 1
@classmethod
def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[DateTime64JSON_V2]:
"""
Check that the input is a valid JSON representation of this data type.
Parameters
----------
data : DTypeJSON
The JSON data to check.
Returns
-------
TypeGuard[DateTime64JSON_V2]
True if the input is a valid JSON representation of a NumPy datetime64 data type,
otherwise False.
"""
if not check_dtype_spec_v2(data):
return False
name = data["name"]
if not isinstance(name, str):
return False
if not name.startswith(cls._zarr_v2_names):
return False
if len(name) == 3:
# no unit, and
# we already checked that this string is either <M8 or >M8
return True
else:
return name[4:-1].endswith(DATETIME_UNIT) and name[-1] == "]"
@classmethod
def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[DateTime64JSON_V3]:
"""
Check that the input is a valid JSON representation of this class in Zarr V3.
Parameters
----------
data : DTypeJSON
The JSON data to check.
Returns
-------
TypeGuard[DateTime64JSON_V3]
True if the input is a valid JSON representation of a numpy datetime64 data type in Zarr V3, False otherwise.
"""
return (
isinstance(data, dict)
and set(data.keys()) == {"name", "configuration"}
and data["name"] == cls._zarr_v3_name
and isinstance(data["configuration"], dict)
and set(data["configuration"].keys()) == {"unit", "scale_factor"}
)
@classmethod
def _from_json_v2(cls, data: DTypeJSON) -> Self:
"""
Create an instance of this data type from a Zarr V2-flavored JSON representation.
This method checks if the provided JSON data is a valid representation of this class.
If valid, it creates an instance using the native NumPy dtype. Otherwise, it raises a
DataTypeValidationError.
Parameters
----------
data : DTypeJSON
The JSON data to parse.
Returns
-------
Self
An instance of this data type.
Raises
------
DataTypeValidationError
If the input JSON is not a valid representation of this class.
"""
if cls._check_json_v2(data):
name = data["name"]
return cls.from_native_dtype(np.dtype(name))
msg = (
f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string "
f"representation of an instance of {cls.dtype_cls}"
)
raise DataTypeValidationError(msg)
@classmethod
def _from_json_v3(cls, data: DTypeJSON) -> Self:
"""
Create an instance of this data type from a Zarr V3-flavored JSON representation.
This method checks if the provided JSON data is a valid representation of this class.
If valid, it creates an instance using the native NumPy dtype. Otherwise, it raises a
DataTypeValidationError.
Parameters
----------
data : DTypeJSON
The JSON data to parse.
Returns
-------
Self
An instance of this data type.
Raises
------
DataTypeValidationError
If the input JSON is not a valid representation of this class.
"""
if cls._check_json_v3(data):
unit = data["configuration"]["unit"]
scale_factor = data["configuration"]["scale_factor"]
return cls(unit=unit, scale_factor=scale_factor)
msg = (
f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a dict "
f"with a 'name' key with the value 'numpy.datetime64', "
"and a 'configuration' key with a value of a dict with a 'unit' key and a "
"'scale_factor' key"
)
raise DataTypeValidationError(msg)
@overload
def to_json(self, zarr_format: Literal[2]) -> DateTime64JSON_V2: ...
@overload
def to_json(self, zarr_format: Literal[3]) -> DateTime64JSON_V3: ...
[docs]
def to_json(self, zarr_format: ZarrFormat) -> DateTime64JSON_V2 | DateTime64JSON_V3:
"""
Serialize this data type to JSON.
Parameters
----------
zarr_format : ZarrFormat
The Zarr format version (2 or 3).
Returns
-------
DateTime64JSON_V2 | DateTime64JSON_V3
The JSON representation of the data type.
Raises
------
ValueError
If the zarr_format is not 2 or 3.
"""
if zarr_format == 2:
name = self.to_native_dtype().str
return {"name": name, "object_codec_id": None}
elif zarr_format == 3:
return {
"name": self._zarr_v3_name,
"configuration": {"unit": self.unit, "scale_factor": self.scale_factor},
}
raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover
def _check_scalar(self, data: object) -> TypeGuard[DateTimeLike]:
"""
Check if the input is convertible to a scalar of this data type.
Parameters
----------
data : object
The object to check.
Returns
-------
TypeGuard[DateTimeLike]
True if the input is a scalar of this data type, False otherwise.
"""
if data is None:
return True
return isinstance(data, str | int | bytes | np.datetime64 | datetime)
def _cast_scalar_unchecked(self, data: DateTimeLike) -> np.datetime64:
"""
Cast the input to a scalar of this data type without any type checking.
Parameters
----------
data : DateTimeLike
The scalar data to cast.
Returns
-------
numpy.datetime64
The input cast to a NumPy datetime scalar.
"""
return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")
[docs]
def cast_scalar(self, data: object) -> np.datetime64:
"""
Cast the input to a scalar of this data type after a type check.
Parameters
----------
data : object
The scalar value to cast.
Returns
-------
numpy.datetime64
The input cast to a NumPy datetime scalar.
Raises
------
TypeError
If the data cannot be converted to a numpy datetime scalar.
"""
if self._check_scalar(data):
return self._cast_scalar_unchecked(data)
msg = (
f"Cannot convert object {data!r} with type {type(data)} to a scalar compatible with the "
f"data type {self}."
)
raise TypeError(msg)
[docs]
def default_scalar(self) -> np.datetime64:
"""
Return the default scalar value for this data type.
Returns
-------
numpy.datetime64
The default scalar value, which is a 'Not-a-Time' (NaT) value
"""
return np.datetime64("NaT")
[docs]
def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
"""
Read a JSON-serializable value as a scalar.
Parameters
----------
data : JSON
The JSON-serializable value.
zarr_format : ZarrFormat
The zarr format version.
Returns
-------
numpy.datetime64
The numpy datetime scalar.
Raises
------
TypeError
If the input is not a valid integer type.
"""
if check_json_time(data):
return self._cast_scalar_unchecked(data)
raise TypeError(f"Invalid type: {data}. Expected an integer.") # pragma: no cover