summaryrefslogtreecommitdiffstats
path: root/Lib/zoneinfo
diff options
context:
space:
mode:
authorPaul Ganssle <paul@ganssle.io>2020-05-16 08:20:06 (GMT)
committerGitHub <noreply@github.com>2020-05-16 08:20:06 (GMT)
commit62972d9d73e83d6eea157617cc69500ffec9e3f0 (patch)
treebd6ddda94be33d9803087398008d46b6cb370adb /Lib/zoneinfo
parent6e8cda91d92da72800d891b2fc2073ecbc134d98 (diff)
downloadcpython-62972d9d73e83d6eea157617cc69500ffec9e3f0.zip
cpython-62972d9d73e83d6eea157617cc69500ffec9e3f0.tar.gz
cpython-62972d9d73e83d6eea157617cc69500ffec9e3f0.tar.bz2
bpo-40503: PEP 615: Tests and implementation for zoneinfo (GH-19909)
This is the initial implementation of PEP 615, the zoneinfo module, ported from the standalone reference implementation (see https://www.python.org/dev/peps/pep-0615/#reference-implementation for a link, which has a more detailed commit history). This includes (hopefully) all functional elements described in the PEP, but documentation is found in a separate PR. This includes: 1. A pure python implementation of the ZoneInfo class 2. A C accelerated implementation of the ZoneInfo class 3. Tests with 100% branch coverage for the Python code (though C code coverage is less than 100%). 4. A compile-time configuration option on Linux (though not on Windows) Differences from the reference implementation: - The module is arranged slightly differently: the accelerated module is `_zoneinfo` rather than `zoneinfo._czoneinfo`, which also necessitates some changes in the test support function. (Suggested by Victor Stinner and Steve Dower.) - The tests are arranged slightly differently and do not include the property tests. The tests live at test/test_zoneinfo/test_zoneinfo.py rather than test/test_zoneinfo.py or test/test_zoneinfo/__init__.py because we may do some refactoring in the future that would likely require this separation anyway; we may: - include the property tests - automatically run all the tests against both pure Python and C, rather than manually constructing C and Python test classes (similar to the way this works with test_datetime.py, which generates C and Python test cases from datetimetester.py). - This includes a compile-time configuration option on Linux (though not on Windows); added with much help from Thomas Wouters. - Integration into the CPython build system is obviously different from building a standalone zoneinfo module wheel. - This includes configuration to install the tzdata package as part of CI, though only on the coverage jobs. Introducing a PyPI dependency as part of the CI build was controversial, and this is seen as less of a major change, since the coverage jobs already depend on pip and PyPI. Additional changes that were introduced as part of this PR, most / all of which were backported to the reference implementation: - Fixed reference and memory leaks With much debugging help from Pablo Galindo - Added smoke tests ensuring that the C and Python modules are built The import machinery can be somewhat fragile, and the "seamlessly falls back to pure Python" nature of this module makes it so that a problem building the C extension or a failure to import the pure Python version might easily go unnoticed. - Adjustments to zoneinfo.__dir__ Suggested by Petr Viktorin. - Slight refactorings as suggested by Steve Dower. - Removed unnecessary if check on std_abbr Discovered this because of a missing line in branch coverage.
Diffstat (limited to 'Lib/zoneinfo')
-rw-r--r--Lib/zoneinfo/__init__.py29
-rw-r--r--Lib/zoneinfo/_common.py166
-rw-r--r--Lib/zoneinfo/_tzpath.py110
-rw-r--r--Lib/zoneinfo/_zoneinfo.py755
4 files changed, 1060 insertions, 0 deletions
diff --git a/Lib/zoneinfo/__init__.py b/Lib/zoneinfo/__init__.py
new file mode 100644
index 0000000..81a2d5e
--- /dev/null
+++ b/Lib/zoneinfo/__init__.py
@@ -0,0 +1,29 @@
+__all__ = [
+ "ZoneInfo",
+ "reset_tzpath",
+ "TZPATH",
+ "ZoneInfoNotFoundError",
+ "InvalidTZPathWarning",
+]
+
+from . import _tzpath
+from ._common import ZoneInfoNotFoundError
+
+try:
+ from _zoneinfo import ZoneInfo
+except ImportError: # pragma: nocover
+ from ._zoneinfo import ZoneInfo
+
+reset_tzpath = _tzpath.reset_tzpath
+InvalidTZPathWarning = _tzpath.InvalidTZPathWarning
+
+
+def __getattr__(name):
+ if name == "TZPATH":
+ return _tzpath.TZPATH
+ else:
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+def __dir__():
+ return sorted(list(globals()) + ["TZPATH"])
diff --git a/Lib/zoneinfo/_common.py b/Lib/zoneinfo/_common.py
new file mode 100644
index 0000000..3d35d4f
--- /dev/null
+++ b/Lib/zoneinfo/_common.py
@@ -0,0 +1,166 @@
+import struct
+
+
+def load_tzdata(key):
+ import importlib.resources
+
+ components = key.split("/")
+ package_name = ".".join(["tzdata.zoneinfo"] + components[:-1])
+ resource_name = components[-1]
+
+ try:
+ return importlib.resources.open_binary(package_name, resource_name)
+ except (ImportError, FileNotFoundError, UnicodeEncodeError):
+ # There are three types of exception that can be raised that all amount
+ # to "we cannot find this key":
+ #
+ # ImportError: If package_name doesn't exist (e.g. if tzdata is not
+ # installed, or if there's an error in the folder name like
+ # Amrica/New_York)
+ # FileNotFoundError: If resource_name doesn't exist in the package
+ # (e.g. Europe/Krasnoy)
+ # UnicodeEncodeError: If package_name or resource_name are not UTF-8,
+ # such as keys containing a surrogate character.
+ raise ZoneInfoNotFoundError(f"No time zone found with key {key}")
+
+
+def load_data(fobj):
+ header = _TZifHeader.from_file(fobj)
+
+ if header.version == 1:
+ time_size = 4
+ time_type = "l"
+ else:
+ # Version 2+ has 64-bit integer transition times
+ time_size = 8
+ time_type = "q"
+
+ # Version 2+ also starts with a Version 1 header and data, which
+ # we need to skip now
+ skip_bytes = (
+ header.timecnt * 5 # Transition times and types
+ + header.typecnt * 6 # Local time type records
+ + header.charcnt # Time zone designations
+ + header.leapcnt * 8 # Leap second records
+ + header.isstdcnt # Standard/wall indicators
+ + header.isutcnt # UT/local indicators
+ )
+
+ fobj.seek(skip_bytes, 1)
+
+ # Now we need to read the second header, which is not the same
+ # as the first
+ header = _TZifHeader.from_file(fobj)
+
+ typecnt = header.typecnt
+ timecnt = header.timecnt
+ charcnt = header.charcnt
+
+ # The data portion starts with timecnt transitions and indices
+ if timecnt:
+ trans_list_utc = struct.unpack(
+ f">{timecnt}{time_type}", fobj.read(timecnt * time_size)
+ )
+ trans_idx = struct.unpack(f">{timecnt}B", fobj.read(timecnt))
+ else:
+ trans_list_utc = ()
+ trans_idx = ()
+
+ # Read the ttinfo struct, (utoff, isdst, abbrind)
+ if typecnt:
+ utcoff, isdst, abbrind = zip(
+ *(struct.unpack(">lbb", fobj.read(6)) for i in range(typecnt))
+ )
+ else:
+ utcoff = ()
+ isdst = ()
+ abbrind = ()
+
+ # Now read the abbreviations. They are null-terminated strings, indexed
+ # not by position in the array but by position in the unsplit
+ # abbreviation string. I suppose this makes more sense in C, which uses
+ # null to terminate the strings, but it's inconvenient here...
+ char_total = 0
+ abbr_vals = {}
+ abbr_chars = fobj.read(charcnt)
+
+ def get_abbr(idx):
+ # Gets a string starting at idx and running until the next \x00
+ #
+ # We cannot pre-populate abbr_vals by splitting on \x00 because there
+ # are some zones that use subsets of longer abbreviations, like so:
+ #
+ # LMT\x00AHST\x00HDT\x00
+ #
+ # Where the idx to abbr mapping should be:
+ #
+ # {0: "LMT", 4: "AHST", 5: "HST", 9: "HDT"}
+ if idx not in abbr_vals:
+ span_end = abbr_chars.find(b"\x00", idx)
+ abbr_vals[idx] = abbr_chars[idx:span_end].decode()
+
+ return abbr_vals[idx]
+
+ abbr = tuple(get_abbr(idx) for idx in abbrind)
+
+ # The remainder of the file consists of leap seconds (currently unused) and
+ # the standard/wall and ut/local indicators, which are metadata we don't need.
+ # In version 2 files, we need to skip the unnecessary data to get at the TZ string:
+ if header.version >= 2:
+ # Each leap second record has size (time_size + 4)
+ skip_bytes = header.isutcnt + header.isstdcnt + header.leapcnt * 12
+ fobj.seek(skip_bytes, 1)
+
+ c = fobj.read(1) # Should be \n
+ assert c == b"\n", c
+
+ tz_bytes = b""
+ while (c := fobj.read(1)) != b"\n":
+ tz_bytes += c
+
+ tz_str = tz_bytes
+ else:
+ tz_str = None
+
+ return trans_idx, trans_list_utc, utcoff, isdst, abbr, tz_str
+
+
+class _TZifHeader:
+ __slots__ = [
+ "version",
+ "isutcnt",
+ "isstdcnt",
+ "leapcnt",
+ "timecnt",
+ "typecnt",
+ "charcnt",
+ ]
+
+ def __init__(self, *args):
+ assert len(self.__slots__) == len(args)
+ for attr, val in zip(self.__slots__, args):
+ setattr(self, attr, val)
+
+ @classmethod
+ def from_file(cls, stream):
+ # The header starts with a 4-byte "magic" value
+ if stream.read(4) != b"TZif":
+ raise ValueError("Invalid TZif file: magic not found")
+
+ _version = stream.read(1)
+ if _version == b"\x00":
+ version = 1
+ else:
+ version = int(_version)
+ stream.read(15)
+
+ args = (version,)
+
+ # Slots are defined in the order that the bytes are arranged
+ args = args + struct.unpack(">6l", stream.read(24))
+
+ return cls(*args)
+
+
+class ZoneInfoNotFoundError(KeyError):
+ """Exception raised when a ZoneInfo key is not found."""
diff --git a/Lib/zoneinfo/_tzpath.py b/Lib/zoneinfo/_tzpath.py
new file mode 100644
index 0000000..8cff0b1
--- /dev/null
+++ b/Lib/zoneinfo/_tzpath.py
@@ -0,0 +1,110 @@
+import os
+import sys
+import sysconfig
+
+
+def reset_tzpath(to=None):
+ global TZPATH
+
+ tzpaths = to
+ if tzpaths is not None:
+ if isinstance(tzpaths, (str, bytes)):
+ raise TypeError(
+ f"tzpaths must be a list or tuple, "
+ + f"not {type(tzpaths)}: {tzpaths!r}"
+ )
+ elif not all(map(os.path.isabs, tzpaths)):
+ raise ValueError(_get_invalid_paths_message(tzpaths))
+ base_tzpath = tzpaths
+ else:
+ env_var = os.environ.get("PYTHONTZPATH", None)
+ if env_var is not None:
+ base_tzpath = _parse_python_tzpath(env_var)
+ else:
+ base_tzpath = _parse_python_tzpath(
+ sysconfig.get_config_var("TZPATH")
+ )
+
+ TZPATH = tuple(base_tzpath)
+
+
+def _parse_python_tzpath(env_var):
+ if not env_var:
+ return ()
+
+ raw_tzpath = env_var.split(os.pathsep)
+ new_tzpath = tuple(filter(os.path.isabs, raw_tzpath))
+
+ # If anything has been filtered out, we will warn about it
+ if len(new_tzpath) != len(raw_tzpath):
+ import warnings
+
+ msg = _get_invalid_paths_message(raw_tzpath)
+
+ warnings.warn(
+ "Invalid paths specified in PYTHONTZPATH environment variable."
+ + msg,
+ InvalidTZPathWarning,
+ )
+
+ return new_tzpath
+
+
+def _get_invalid_paths_message(tzpaths):
+ invalid_paths = (path for path in tzpaths if not os.path.isabs(path))
+
+ prefix = "\n "
+ indented_str = prefix + prefix.join(invalid_paths)
+
+ return (
+ "Paths should be absolute but found the following relative paths:"
+ + indented_str
+ )
+
+
+def find_tzfile(key):
+ """Retrieve the path to a TZif file from a key."""
+ _validate_tzfile_path(key)
+ for search_path in TZPATH:
+ filepath = os.path.join(search_path, key)
+ if os.path.isfile(filepath):
+ return filepath
+
+ return None
+
+
+_TEST_PATH = os.path.normpath(os.path.join("_", "_"))[:-1]
+
+
+def _validate_tzfile_path(path, _base=_TEST_PATH):
+ if os.path.isabs(path):
+ raise ValueError(
+ f"ZoneInfo keys may not be absolute paths, got: {path}"
+ )
+
+ # We only care about the kinds of path normalizations that would change the
+ # length of the key - e.g. a/../b -> a/b, or a/b/ -> a/b. On Windows,
+ # normpath will also change from a/b to a\b, but that would still preserve
+ # the length.
+ new_path = os.path.normpath(path)
+ if len(new_path) != len(path):
+ raise ValueError(
+ f"ZoneInfo keys must be normalized relative paths, got: {path}"
+ )
+
+ resolved = os.path.normpath(os.path.join(_base, new_path))
+ if not resolved.startswith(_base):
+ raise ValueError(
+ f"ZoneInfo keys must refer to subdirectories of TZPATH, got: {path}"
+ )
+
+
+del _TEST_PATH
+
+
+class InvalidTZPathWarning(RuntimeWarning):
+ """Warning raised if an invalid path is specified in PYTHONTZPATH."""
+
+
+TZPATH = ()
+reset_tzpath()
diff --git a/Lib/zoneinfo/_zoneinfo.py b/Lib/zoneinfo/_zoneinfo.py
new file mode 100644
index 0000000..69133ae
--- /dev/null
+++ b/Lib/zoneinfo/_zoneinfo.py
@@ -0,0 +1,755 @@
+import bisect
+import calendar
+import collections
+import functools
+import os
+import re
+import struct
+import sys
+import weakref
+from datetime import datetime, timedelta, timezone, tzinfo
+
+from . import _common, _tzpath
+
+EPOCH = datetime(1970, 1, 1)
+EPOCHORDINAL = datetime(1970, 1, 1).toordinal()
+
+# It is relatively expensive to construct new timedelta objects, and in most
+# cases we're looking at the same deltas, like integer numbers of hours, etc.
+# To improve speed and memory use, we'll keep a dictionary with references
+# to the ones we've already used so far.
+#
+# Loading every time zone in the 2020a version of the time zone database
+# requires 447 timedeltas, which requires approximately the amount of space
+# that ZoneInfo("America/New_York") with 236 transitions takes up, so we will
+# set the cache size to 512 so that in the common case we always get cache
+# hits, but specifically crafted ZoneInfo objects don't leak arbitrary amounts
+# of memory.
+@functools.lru_cache(maxsize=512)
+def _load_timedelta(seconds):
+ return timedelta(seconds=seconds)
+
+
+class ZoneInfo(tzinfo):
+ _strong_cache_size = 8
+ _strong_cache = collections.OrderedDict()
+ _weak_cache = weakref.WeakValueDictionary()
+ __module__ = "zoneinfo"
+
+ def __init_subclass__(cls):
+ cls._strong_cache = collections.OrderedDict()
+ cls._weak_cache = weakref.WeakValueDictionary()
+
+ def __new__(cls, key):
+ instance = cls._weak_cache.get(key, None)
+ if instance is None:
+ instance = cls._weak_cache.setdefault(key, cls._new_instance(key))
+ instance._from_cache = True
+
+ # Update the "strong" cache
+ cls._strong_cache[key] = cls._strong_cache.pop(key, instance)
+
+ if len(cls._strong_cache) > cls._strong_cache_size:
+ cls._strong_cache.popitem(last=False)
+
+ return instance
+
+ @classmethod
+ def no_cache(cls, key):
+ obj = cls._new_instance(key)
+ obj._from_cache = False
+
+ return obj
+
+ @classmethod
+ def _new_instance(cls, key):
+ obj = super().__new__(cls)
+ obj._key = key
+ obj._file_path = obj._find_tzfile(key)
+
+ if obj._file_path is not None:
+ file_obj = open(obj._file_path, "rb")
+ else:
+ file_obj = _common.load_tzdata(key)
+
+ with file_obj as f:
+ obj._load_file(f)
+
+ return obj
+
+ @classmethod
+ def from_file(cls, fobj, /, key=None):
+ obj = super().__new__(cls)
+ obj._key = key
+ obj._file_path = None
+ obj._load_file(fobj)
+ obj._file_repr = repr(fobj)
+
+ # Disable pickling for objects created from files
+ obj.__reduce__ = obj._file_reduce
+
+ return obj
+
+ @classmethod
+ def clear_cache(cls, *, only_keys=None):
+ if only_keys is not None:
+ for key in only_keys:
+ cls._weak_cache.pop(key, None)
+ cls._strong_cache.pop(key, None)
+
+ else:
+ cls._weak_cache.clear()
+ cls._strong_cache.clear()
+
+ @property
+ def key(self):
+ return self._key
+
+ def utcoffset(self, dt):
+ return self._find_trans(dt).utcoff
+
+ def dst(self, dt):
+ return self._find_trans(dt).dstoff
+
+ def tzname(self, dt):
+ return self._find_trans(dt).tzname
+
+ def fromutc(self, dt):
+ """Convert from datetime in UTC to datetime in local time"""
+
+ if not isinstance(dt, datetime):
+ raise TypeError("fromutc() requires a datetime argument")
+ if dt.tzinfo is not self:
+ raise ValueError("dt.tzinfo is not self")
+
+ timestamp = self._get_local_timestamp(dt)
+ num_trans = len(self._trans_utc)
+
+ if num_trans >= 1 and timestamp < self._trans_utc[0]:
+ tti = self._tti_before
+ fold = 0
+ elif (
+ num_trans == 0 or timestamp > self._trans_utc[-1]
+ ) and not isinstance(self._tz_after, _ttinfo):
+ tti, fold = self._tz_after.get_trans_info_fromutc(
+ timestamp, dt.year
+ )
+ elif num_trans == 0:
+ tti = self._tz_after
+ fold = 0
+ else:
+ idx = bisect.bisect_right(self._trans_utc, timestamp)
+
+ if num_trans > 1 and timestamp >= self._trans_utc[1]:
+ tti_prev, tti = self._ttinfos[idx - 2 : idx]
+ elif timestamp > self._trans_utc[-1]:
+ tti_prev = self._ttinfos[-1]
+ tti = self._tz_after
+ else:
+ tti_prev = self._tti_before
+ tti = self._ttinfos[0]
+
+ # Detect fold
+ shift = tti_prev.utcoff - tti.utcoff
+ fold = shift.total_seconds() > timestamp - self._trans_utc[idx - 1]
+ dt += tti.utcoff
+ if fold:
+ return dt.replace(fold=1)
+ else:
+ return dt
+
+ def _find_trans(self, dt):
+ if dt is None:
+ if self._fixed_offset:
+ return self._tz_after
+ else:
+ return _NO_TTINFO
+
+ ts = self._get_local_timestamp(dt)
+
+ lt = self._trans_local[dt.fold]
+
+ num_trans = len(lt)
+
+ if num_trans and ts < lt[0]:
+ return self._tti_before
+ elif not num_trans or ts > lt[-1]:
+ if isinstance(self._tz_after, _TZStr):
+ return self._tz_after.get_trans_info(ts, dt.year, dt.fold)
+ else:
+ return self._tz_after
+ else:
+ # idx is the transition that occurs after this timestamp, so we
+ # subtract off 1 to get the current ttinfo
+ idx = bisect.bisect_right(lt, ts) - 1
+ assert idx >= 0
+ return self._ttinfos[idx]
+
+ def _get_local_timestamp(self, dt):
+ return (
+ (dt.toordinal() - EPOCHORDINAL) * 86400
+ + dt.hour * 3600
+ + dt.minute * 60
+ + dt.second
+ )
+
+ def __str__(self):
+ if self._key is not None:
+ return f"{self._key}"
+ else:
+ return repr(self)
+
+ def __repr__(self):
+ if self._key is not None:
+ return f"{self.__class__.__name__}(key={self._key!r})"
+ else:
+ return f"{self.__class__.__name__}.from_file({self._file_repr})"
+
+ def __reduce__(self):
+ return (self.__class__._unpickle, (self._key, self._from_cache))
+
+ def _file_reduce(self):
+ import pickle
+
+ raise pickle.PicklingError(
+ "Cannot pickle a ZoneInfo file created from a file stream."
+ )
+
+ @classmethod
+ def _unpickle(cls, key, from_cache, /):
+ if from_cache:
+ return cls(key)
+ else:
+ return cls.no_cache(key)
+
+ def _find_tzfile(self, key):
+ return _tzpath.find_tzfile(key)
+
+ def _load_file(self, fobj):
+ # Retrieve all the data as it exists in the zoneinfo file
+ trans_idx, trans_utc, utcoff, isdst, abbr, tz_str = _common.load_data(
+ fobj
+ )
+
+ # Infer the DST offsets (needed for .dst()) from the data
+ dstoff = self._utcoff_to_dstoff(trans_idx, utcoff, isdst)
+
+ # Convert all the transition times (UTC) into "seconds since 1970-01-01 local time"
+ trans_local = self._ts_to_local(trans_idx, trans_utc, utcoff)
+
+ # Construct `_ttinfo` objects for each transition in the file
+ _ttinfo_list = [
+ _ttinfo(
+ _load_timedelta(utcoffset), _load_timedelta(dstoffset), tzname
+ )
+ for utcoffset, dstoffset, tzname in zip(utcoff, dstoff, abbr)
+ ]
+
+ self._trans_utc = trans_utc
+ self._trans_local = trans_local
+ self._ttinfos = [_ttinfo_list[idx] for idx in trans_idx]
+
+ # Find the first non-DST transition
+ for i in range(len(isdst)):
+ if not isdst[i]:
+ self._tti_before = _ttinfo_list[i]
+ break
+ else:
+ if self._ttinfos:
+ self._tti_before = self._ttinfos[0]
+ else:
+ self._tti_before = None
+
+ # Set the "fallback" time zone
+ if tz_str is not None and tz_str != b"":
+ self._tz_after = _parse_tz_str(tz_str.decode())
+ else:
+ if not self._ttinfos and not _ttinfo_list:
+ raise ValueError("No time zone information found.")
+
+ if self._ttinfos:
+ self._tz_after = self._ttinfos[-1]
+ else:
+ self._tz_after = _ttinfo_list[-1]
+
+ # Determine if this is a "fixed offset" zone, meaning that the output
+ # of the utcoffset, dst and tzname functions does not depend on the
+ # specific datetime passed.
+ #
+ # We make three simplifying assumptions here:
+ #
+ # 1. If _tz_after is not a _ttinfo, it has transitions that might
+ # actually occur (it is possible to construct TZ strings that
+ # specify STD and DST but no transitions ever occur, such as
+ # AAA0BBB,0/0,J365/25).
+ # 2. If _ttinfo_list contains more than one _ttinfo object, the objects
+ # represent different offsets.
+ # 3. _ttinfo_list contains no unused _ttinfos (in which case an
+ # otherwise fixed-offset zone with extra _ttinfos defined may
+ # appear to *not* be a fixed offset zone).
+ #
+ # Violations to these assumptions would be fairly exotic, and exotic
+ # zones should almost certainly not be used with datetime.time (the
+ # only thing that would be affected by this).
+ if len(_ttinfo_list) > 1 or not isinstance(self._tz_after, _ttinfo):
+ self._fixed_offset = False
+ elif not _ttinfo_list:
+ self._fixed_offset = True
+ else:
+ self._fixed_offset = _ttinfo_list[0] == self._tz_after
+
+ @staticmethod
+ def _utcoff_to_dstoff(trans_idx, utcoffsets, isdsts):
+ # Now we must transform our ttis and abbrs into `_ttinfo` objects,
+ # but there is an issue: .dst() must return a timedelta with the
+ # difference between utcoffset() and the "standard" offset, but
+ # the "base offset" and "DST offset" are not encoded in the file;
+ # we can infer what they are from the isdst flag, but it is not
+ # sufficient to to just look at the last standard offset, because
+ # occasionally countries will shift both DST offset and base offset.
+
+ typecnt = len(isdsts)
+ dstoffs = [0] * typecnt # Provisionally assign all to 0.
+ dst_cnt = sum(isdsts)
+ dst_found = 0
+
+ for i in range(1, len(trans_idx)):
+ if dst_cnt == dst_found:
+ break
+
+ idx = trans_idx[i]
+
+ dst = isdsts[idx]
+
+ # We're only going to look at daylight saving time
+ if not dst:
+ continue
+
+ # Skip any offsets that have already been assigned
+ if dstoffs[idx] != 0:
+ continue
+
+ dstoff = 0
+ utcoff = utcoffsets[idx]
+
+ comp_idx = trans_idx[i - 1]
+
+ if not isdsts[comp_idx]:
+ dstoff = utcoff - utcoffsets[comp_idx]
+
+ if not dstoff and idx < (typecnt - 1):
+ comp_idx = trans_idx[i + 1]
+
+ # If the following transition is also DST and we couldn't
+ # find the DST offset by this point, we're going ot have to
+ # skip it and hope this transition gets assigned later
+ if isdsts[comp_idx]:
+ continue
+
+ dstoff = utcoff - utcoffsets[comp_idx]
+
+ if dstoff:
+ dst_found += 1
+ dstoffs[idx] = dstoff
+ else:
+ # If we didn't find a valid value for a given index, we'll end up
+ # with dstoff = 0 for something where `isdst=1`. This is obviously
+ # wrong - one hour will be a much better guess than 0
+ for idx in range(typecnt):
+ if not dstoffs[idx] and isdsts[idx]:
+ dstoffs[idx] = 3600
+
+ return dstoffs
+
+ @staticmethod
+ def _ts_to_local(trans_idx, trans_list_utc, utcoffsets):
+ """Generate number of seconds since 1970 *in the local time*.
+
+ This is necessary to easily find the transition times in local time"""
+ if not trans_list_utc:
+ return [[], []]
+
+ # Start with the timestamps and modify in-place
+ trans_list_wall = [list(trans_list_utc), list(trans_list_utc)]
+
+ if len(utcoffsets) > 1:
+ offset_0 = utcoffsets[0]
+ offset_1 = utcoffsets[trans_idx[0]]
+ if offset_1 > offset_0:
+ offset_1, offset_0 = offset_0, offset_1
+ else:
+ offset_0 = offset_1 = utcoffsets[0]
+
+ trans_list_wall[0][0] += offset_0
+ trans_list_wall[1][0] += offset_1
+
+ for i in range(1, len(trans_idx)):
+ offset_0 = utcoffsets[trans_idx[i - 1]]
+ offset_1 = utcoffsets[trans_idx[i]]
+
+ if offset_1 > offset_0:
+ offset_1, offset_0 = offset_0, offset_1
+
+ trans_list_wall[0][i] += offset_0
+ trans_list_wall[1][i] += offset_1
+
+ return trans_list_wall
+
+
+class _ttinfo:
+ __slots__ = ["utcoff", "dstoff", "tzname"]
+
+ def __init__(self, utcoff, dstoff, tzname):
+ self.utcoff = utcoff
+ self.dstoff = dstoff
+ self.tzname = tzname
+
+ def __eq__(self, other):
+ return (
+ self.utcoff == other.utcoff
+ and self.dstoff == other.dstoff
+ and self.tzname == other.tzname
+ )
+
+ def __repr__(self): # pragma: nocover
+ return (
+ f"{self.__class__.__name__}"
+ + f"({self.utcoff}, {self.dstoff}, {self.tzname})"
+ )
+
+
+_NO_TTINFO = _ttinfo(None, None, None)
+
+
+class _TZStr:
+ __slots__ = (
+ "std",
+ "dst",
+ "start",
+ "end",
+ "get_trans_info",
+ "get_trans_info_fromutc",
+ "dst_diff",
+ )
+
+ def __init__(
+ self, std_abbr, std_offset, dst_abbr, dst_offset, start=None, end=None
+ ):
+ self.dst_diff = dst_offset - std_offset
+ std_offset = _load_timedelta(std_offset)
+ self.std = _ttinfo(
+ utcoff=std_offset, dstoff=_load_timedelta(0), tzname=std_abbr
+ )
+
+ self.start = start
+ self.end = end
+
+ dst_offset = _load_timedelta(dst_offset)
+ delta = _load_timedelta(self.dst_diff)
+ self.dst = _ttinfo(utcoff=dst_offset, dstoff=delta, tzname=dst_abbr)
+
+ # These are assertions because the constructor should only be called
+ # by functions that would fail before passing start or end
+ assert start is not None, "No transition start specified"
+ assert end is not None, "No transition end specified"
+
+ self.get_trans_info = self._get_trans_info
+ self.get_trans_info_fromutc = self._get_trans_info_fromutc
+
+ def transitions(self, year):
+ start = self.start.year_to_epoch(year)
+ end = self.end.year_to_epoch(year)
+ return start, end
+
+ def _get_trans_info(self, ts, year, fold):
+ """Get the information about the current transition - tti"""
+ start, end = self.transitions(year)
+
+ # With fold = 0, the period (denominated in local time) with the
+ # smaller offset starts at the end of the gap and ends at the end of
+ # the fold; with fold = 1, it runs from the start of the gap to the
+ # beginning of the fold.
+ #
+ # So in order to determine the DST boundaries we need to know both
+ # the fold and whether DST is positive or negative (rare), and it
+ # turns out that this boils down to fold XOR is_positive.
+ if fold == (self.dst_diff >= 0):
+ end -= self.dst_diff
+ else:
+ start += self.dst_diff
+
+ if start < end:
+ isdst = start <= ts < end
+ else:
+ isdst = not (end <= ts < start)
+
+ return self.dst if isdst else self.std
+
+ def _get_trans_info_fromutc(self, ts, year):
+ start, end = self.transitions(year)
+ start -= self.std.utcoff.total_seconds()
+ end -= self.dst.utcoff.total_seconds()
+
+ if start < end:
+ isdst = start <= ts < end
+ else:
+ isdst = not (end <= ts < start)
+
+ # For positive DST, the ambiguous period is one dst_diff after the end
+ # of DST; for negative DST, the ambiguous period is one dst_diff before
+ # the start of DST.
+ if self.dst_diff > 0:
+ ambig_start = end
+ ambig_end = end + self.dst_diff
+ else:
+ ambig_start = start
+ ambig_end = start - self.dst_diff
+
+ fold = ambig_start <= ts < ambig_end
+
+ return (self.dst if isdst else self.std, fold)
+
+
+def _post_epoch_days_before_year(year):
+ """Get the number of days between 1970-01-01 and YEAR-01-01"""
+ y = year - 1
+ return y * 365 + y // 4 - y // 100 + y // 400 - EPOCHORDINAL
+
+
+class _DayOffset:
+ __slots__ = ["d", "julian", "hour", "minute", "second"]
+
+ def __init__(self, d, julian, hour=2, minute=0, second=0):
+ if not (0 + julian) <= d <= 365:
+ min_day = 0 + julian
+ raise ValueError(f"d must be in [{min_day}, 365], not: {d}")
+
+ self.d = d
+ self.julian = julian
+ self.hour = hour
+ self.minute = minute
+ self.second = second
+
+ def year_to_epoch(self, year):
+ days_before_year = _post_epoch_days_before_year(year)
+
+ d = self.d
+ if self.julian and d >= 59 and calendar.isleap(year):
+ d += 1
+
+ epoch = (days_before_year + d) * 86400
+ epoch += self.hour * 3600 + self.minute * 60 + self.second
+
+ return epoch
+
+
+class _CalendarOffset:
+ __slots__ = ["m", "w", "d", "hour", "minute", "second"]
+
+ _DAYS_BEFORE_MONTH = (
+ -1,
+ 0,
+ 31,
+ 59,
+ 90,
+ 120,
+ 151,
+ 181,
+ 212,
+ 243,
+ 273,
+ 304,
+ 334,
+ )
+
+ def __init__(self, m, w, d, hour=2, minute=0, second=0):
+ if not 0 < m <= 12:
+ raise ValueError("m must be in (0, 12]")
+
+ if not 0 < w <= 5:
+ raise ValueError("w must be in (0, 5]")
+
+ if not 0 <= d <= 6:
+ raise ValueError("d must be in [0, 6]")
+
+ self.m = m
+ self.w = w
+ self.d = d
+ self.hour = hour
+ self.minute = minute
+ self.second = second
+
+ @classmethod
+ def _ymd2ord(cls, year, month, day):
+ return (
+ _post_epoch_days_before_year(year)
+ + cls._DAYS_BEFORE_MONTH[month]
+ + (month > 2 and calendar.isleap(year))
+ + day
+ )
+
+ # TODO: These are not actually epoch dates as they are expressed in local time
+ def year_to_epoch(self, year):
+ """Calculates the datetime of the occurrence from the year"""
+ # We know year and month, we need to convert w, d into day of month
+ #
+ # Week 1 is the first week in which day `d` (where 0 = Sunday) appears.
+ # Week 5 represents the last occurrence of day `d`, so we need to know
+ # the range of the month.
+ first_day, days_in_month = calendar.monthrange(year, self.m)
+
+ # This equation seems magical, so I'll break it down:
+ # 1. calendar says 0 = Monday, POSIX says 0 = Sunday
+ # so we need first_day + 1 to get 1 = Monday -> 7 = Sunday,
+ # which is still equivalent because this math is mod 7
+ # 2. Get first day - desired day mod 7: -1 % 7 = 6, so we don't need
+ # to do anything to adjust negative numbers.
+ # 3. Add 1 because month days are a 1-based index.
+ month_day = (self.d - (first_day + 1)) % 7 + 1
+
+ # Now use a 0-based index version of `w` to calculate the w-th
+ # occurrence of `d`
+ month_day += (self.w - 1) * 7
+
+ # month_day will only be > days_in_month if w was 5, and `w` means
+ # "last occurrence of `d`", so now we just check if we over-shot the
+ # end of the month and if so knock off 1 week.
+ if month_day > days_in_month:
+ month_day -= 7
+
+ ordinal = self._ymd2ord(year, self.m, month_day)
+ epoch = ordinal * 86400
+ epoch += self.hour * 3600 + self.minute * 60 + self.second
+ return epoch
+
+
+def _parse_tz_str(tz_str):
+ # The tz string has the format:
+ #
+ # std[offset[dst[offset],start[/time],end[/time]]]
+ #
+ # std and dst must be 3 or more characters long and must not contain
+ # a leading colon, embedded digits, commas, nor a plus or minus signs;
+ # The spaces between "std" and "offset" are only for display and are
+ # not actually present in the string.
+ #
+ # The format of the offset is ``[+|-]hh[:mm[:ss]]``
+
+ offset_str, *start_end_str = tz_str.split(",", 1)
+
+ # fmt: off
+ parser_re = re.compile(
+ r"(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
+ r"((?P<stdoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?)" +
+ r"((?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
+ r"((?P<dstoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?))?" +
+ r")?" + # dst
+ r")?$" # stdoff
+ )
+ # fmt: on
+
+ m = parser_re.match(offset_str)
+
+ if m is None:
+ raise ValueError(f"{tz_str} is not a valid TZ string")
+
+ std_abbr = m.group("std")
+ dst_abbr = m.group("dst")
+ dst_offset = None
+
+ std_abbr = std_abbr.strip("<>")
+
+ if dst_abbr:
+ dst_abbr = dst_abbr.strip("<>")
+
+ if std_offset := m.group("stdoff"):
+ try:
+ std_offset = _parse_tz_delta(std_offset)
+ except ValueError as e:
+ raise ValueError(f"Invalid STD offset in {tz_str}") from e
+ else:
+ std_offset = 0
+
+ if dst_abbr is not None:
+ if dst_offset := m.group("dstoff"):
+ try:
+ dst_offset = _parse_tz_delta(dst_offset)
+ except ValueError as e:
+ raise ValueError(f"Invalid DST offset in {tz_str}") from e
+ else:
+ dst_offset = std_offset + 3600
+
+ if not start_end_str:
+ raise ValueError(f"Missing transition rules: {tz_str}")
+
+ start_end_strs = start_end_str[0].split(",", 1)
+ try:
+ start, end = (_parse_dst_start_end(x) for x in start_end_strs)
+ except ValueError as e:
+ raise ValueError(f"Invalid TZ string: {tz_str}") from e
+
+ return _TZStr(std_abbr, std_offset, dst_abbr, dst_offset, start, end)
+ elif start_end_str:
+ raise ValueError(f"Transition rule present without DST: {tz_str}")
+ else:
+ # This is a static ttinfo, don't return _TZStr
+ return _ttinfo(
+ _load_timedelta(std_offset), _load_timedelta(0), std_abbr
+ )
+
+
+def _parse_dst_start_end(dststr):
+ date, *time = dststr.split("/")
+ if date[0] == "M":
+ n_is_julian = False
+ m = re.match(r"M(\d{1,2})\.(\d).(\d)$", date)
+ if m is None:
+ raise ValueError(f"Invalid dst start/end date: {dststr}")
+ date_offset = tuple(map(int, m.groups()))
+ offset = _CalendarOffset(*date_offset)
+ else:
+ if date[0] == "J":
+ n_is_julian = True
+ date = date[1:]
+ else:
+ n_is_julian = False
+
+ doy = int(date)
+ offset = _DayOffset(doy, n_is_julian)
+
+ if time:
+ time_components = list(map(int, time[0].split(":")))
+ n_components = len(time_components)
+ if n_components < 3:
+ time_components.extend([0] * (3 - n_components))
+ offset.hour, offset.minute, offset.second = time_components
+
+ return offset
+
+
+def _parse_tz_delta(tz_delta):
+ match = re.match(
+ r"(?P<sign>[+-])?(?P<h>\d{1,2})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
+ tz_delta,
+ )
+ # Anything passed to this function should already have hit an equivalent
+ # regular expression to find the section to parse.
+ assert match is not None, tz_delta
+
+ h, m, s = (
+ int(v) if v is not None else 0
+ for v in map(match.group, ("h", "m", "s"))
+ )
+
+ total = h * 3600 + m * 60 + s
+
+ if not -86400 < total < 86400:
+ raise ValueError(
+ "Offset must be strictly between -24h and +24h:" + tz_delta
+ )
+
+ # Yes, +5 maps to an offset of -5h
+ if match.group("sign") != "-":
+ total *= -1
+
+ return total