From bdaeb7d237462a629e6c85001317faa85f94a0c6 Mon Sep 17 00:00:00 2001
From: Victor Stinner <victor.stinner@gmail.com>
Date: Mon, 16 Oct 2017 08:44:31 -0700
Subject: bpo-31773: _PyTime_GetPerfCounter() uses _PyTime_t (GH-3983)

* Rewrite win_perf_counter() to only use integers internally.
* Add _PyTime_MulDiv() which compute "ticks * mul / div"
  in two parts (int part and remaining) to prevent integer overflow.
* Clock frequency is checked at initialization for integer overflow.
* Enhance also pymonotonic() to reduce the precision loss on macOS
  (mach_absolute_time() clock).
---
 Include/pytime.h     |   6 +-
 Modules/timemodule.c |   7 ++-
 Python/import.c      |  12 ++--
 Python/pytime.c      | 158 +++++++++++++++++++++++++++++++++++++++------------
 4 files changed, 135 insertions(+), 48 deletions(-)

diff --git a/Include/pytime.h b/Include/pytime.h
index fd95045..488fdc4 100644
--- a/Include/pytime.h
+++ b/Include/pytime.h
@@ -197,7 +197,7 @@ PyAPI_FUNC(int) _PyTime_gmtime(time_t t, struct tm *tm);
 
    The function cannot fail. _PyTime_Init() ensures that the system clock
    works. */
-PyAPI_FUNC(double) _PyTime_GetPerfCounterDouble(void);
+PyAPI_FUNC(_PyTime_t) _PyTime_GetPerfCounter(void);
 
 /* Get the performance counter: clock with the highest available resolution to
    measure a short duration.
@@ -205,8 +205,8 @@ PyAPI_FUNC(double) _PyTime_GetPerfCounterDouble(void);
    Fill info (if set) with information of the function used to get the time.
 
    Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int) _PyTime_GetPerfCounterDoubleWithInfo(
-    double *t,
+PyAPI_FUNC(int) _PyTime_GetPerfCounterWithInfo(
+    _PyTime_t *t,
     _Py_clock_info_t *info);
 
 #ifdef __cplusplus
diff --git a/Modules/timemodule.c b/Modules/timemodule.c
index 3cb1b4e..6af9a90 100644
--- a/Modules/timemodule.c
+++ b/Modules/timemodule.c
@@ -91,11 +91,12 @@ floatclock(_Py_clock_info_t *info)
 static PyObject*
 perf_counter(_Py_clock_info_t *info)
 {
-    double t;
-    if (_PyTime_GetPerfCounterDoubleWithInfo(&t, info) < 0) {
+    _PyTime_t t;
+    if (_PyTime_GetPerfCounterWithInfo(&t, info) < 0) {
         return NULL;
     }
-    return PyFloat_FromDouble(t);
+    double d = _PyTime_AsSecondsDouble(t);
+    return PyFloat_FromDouble(d);
 }
 
 #if defined(MS_WINDOWS) || defined(HAVE_CLOCK)
diff --git a/Python/import.c b/Python/import.c
index 76aa912..d396b4d 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -1669,10 +1669,10 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals,
     else {
         static int ximporttime = 0;
         static int import_level;
-        static double accumulated;
+        static _PyTime_t accumulated;
         _Py_IDENTIFIER(importtime);
 
-        double t1 = 0, accumulated_copy = accumulated;
+        _PyTime_t t1 = 0, accumulated_copy = accumulated;
 
         Py_XDECREF(mod);
 
@@ -1695,7 +1695,7 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals,
 
         if (ximporttime) {
             import_level++;
-            t1 = _PyTime_GetPerfCounterDouble();
+            t1 = _PyTime_GetPerfCounter();
             accumulated = 0;
         }
 
@@ -1711,12 +1711,12 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals,
                                            mod != NULL);
 
         if (ximporttime) {
-            double cum = _PyTime_GetPerfCounterDouble() - t1;
+            _PyTime_t cum = _PyTime_GetPerfCounter() - t1;
 
             import_level--;
             fprintf(stderr, "import time: %9ld | %10ld | %*s%s\n",
-                    (long)ceil((cum - accumulated) * 1e6),
-                    (long)ceil(cum * 1e6),
+                    (long)_PyTime_AsMicroseconds(cum - accumulated, _PyTime_ROUND_CEILING),
+                    (long)_PyTime_AsMicroseconds(cum, _PyTime_ROUND_CEILING),
                     import_level*2, "", PyUnicode_AsUTF8(abs_name));
 
             accumulated = accumulated_copy + cum;
diff --git a/Python/pytime.c b/Python/pytime.c
index 7fd2a90..7b55b10 100644
--- a/Python/pytime.c
+++ b/Python/pytime.c
@@ -42,6 +42,27 @@ _PyTime_overflow(void)
                     "timestamp too large to convert to C _PyTime_t");
 }
 
+
+#if defined(MS_WINDOWS) || defined(__APPLE__)
+Py_LOCAL_INLINE(_PyTime_t)
+_PyTime_MulDiv(_PyTime_t ticks, _PyTime_t mul, _PyTime_t div)
+{
+    _PyTime_t intpart, remaining;
+    /* Compute (ticks * mul / div) in two parts to prevent integer overflow:
+       compute integer part, and then the remaining part.
+
+       (ticks * mul) / div == (ticks / div) * mul + (ticks % div) * mul / div
+
+       The caller must ensure that "(div - 1) * mul" cannot overflow. */
+    intpart = ticks / div;
+    ticks %= div;
+    remaining = ticks * mul;
+    remaining /= div;
+    return intpart * mul + remaining;
+}
+#endif   /* defined(MS_WINDOWS) || defined(__APPLE__) */
+
+
 time_t
 _PyLong_AsTime_t(PyObject *obj)
 {
@@ -700,29 +721,62 @@ pymonotonic(_PyTime_t *tp, _Py_clock_info_t *info, int raise)
 
 #elif defined(__APPLE__)
     static mach_timebase_info_data_t timebase;
-    uint64_t time;
+    static uint64_t t0 = 0;
+    uint64_t ticks;
 
     if (timebase.denom == 0) {
         /* According to the Technical Q&A QA1398, mach_timebase_info() cannot
            fail: https://developer.apple.com/library/mac/#qa/qa1398/ */
         (void)mach_timebase_info(&timebase);
-    }
 
-    time = mach_absolute_time();
+        /* Sanity check: should never occur in practice */
+        if (timebase.numer < 1 || timebase.denom < 1) {
+            PyErr_SetString(PyExc_RuntimeError,
+                            "invalid mach_timebase_info");
+            return -1;
+        }
+
+        /* Check that timebase.numer and timebase.denom can be casted to
+           _PyTime_t. In pratice, timebase uses uint32_t, so casting cannot
+           overflow. At the end, only make sure that the type is uint32_t
+           (_PyTime_t is 64-bit long). */
+        assert(sizeof(timebase.numer) < sizeof(_PyTime_t));
+        assert(sizeof(timebase.denom) < sizeof(_PyTime_t));
 
-    /* apply timebase factor */
-    time *= timebase.numer;
-    time /= timebase.denom;
+        /* Make sure that (ticks * timebase.numer) cannot overflow in
+           _PyTime_MulDiv(), with ticks < timebase.denom.
 
-    *tp = time;
+           Known time bases:
+
+           * always (1, 1) on Intel
+           * (1000000000, 33333335) or (1000000000, 25000000) on PowerPC
+
+           None of these time bases can overflow with 64-bit _PyTime_t, but
+           check for overflow, just in case. */
+        if ((_PyTime_t)timebase.numer > _PyTime_MAX / (_PyTime_t)timebase.denom) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "mach_timebase_info is too large");
+            return -1;
+        }
+
+        t0 = mach_absolute_time();
+    }
 
     if (info) {
         info->implementation = "mach_absolute_time()";
-        info->resolution = (double)timebase.numer / timebase.denom * 1e-9;
+        info->resolution = (double)timebase.numer / (double)timebase.denom * 1e-9;
         info->monotonic = 1;
         info->adjustable = 0;
     }
 
+    ticks = mach_absolute_time();
+    /* Use a "time zero" to reduce precision loss when converting time
+       to floatting point number, as in time.monotonic(). */
+    ticks -= t0;
+    *tp = _PyTime_MulDiv(ticks,
+                         (_PyTime_t)timebase.numer,
+                         (_PyTime_t)timebase.denom);
+
 #elif defined(__hpux)
     hrtime_t time;
 
@@ -802,60 +856,93 @@ _PyTime_GetMonotonicClockWithInfo(_PyTime_t *tp, _Py_clock_info_t *info)
 
 #ifdef MS_WINDOWS
 static int
-win_perf_counter(double *tp, _Py_clock_info_t *info)
+win_perf_counter(_PyTime_t *tp, _Py_clock_info_t *info)
 {
-    static LONGLONG cpu_frequency = 0;
-    static LONGLONG ctrStart;
+    static LONGLONG frequency = 0;
+    static LONGLONG t0 = 0;
     LARGE_INTEGER now;
-    double diff;
+    LONGLONG ticksll;
+    _PyTime_t ticks;
 
-    if (cpu_frequency == 0) {
+    if (frequency == 0) {
         LARGE_INTEGER freq;
-        QueryPerformanceCounter(&now);
-        ctrStart = now.QuadPart;
-        if (!QueryPerformanceFrequency(&freq) || freq.QuadPart == 0) {
+        if (!QueryPerformanceFrequency(&freq)) {
             PyErr_SetFromWindowsErr(0);
             return -1;
         }
-        cpu_frequency = freq.QuadPart;
+        frequency = freq.QuadPart;
+
+        /* Sanity check: should never occur in practice */
+        if (frequency < 1) {
+            PyErr_SetString(PyExc_RuntimeError,
+                            "invalid QueryPerformanceFrequency");
+            return -1;
+        }
+
+        /* Check that frequency can be casted to _PyTime_t.
+
+           Make also sure that (ticks * SEC_TO_NS) cannot overflow in
+           _PyTime_MulDiv(), with ticks < frequency.
+
+           Known QueryPerformanceFrequency() values:
+
+           * 10,000,000 (10 MHz): 100 ns resolution
+           * 3,579,545 Hz (3.6 MHz): 279 ns resolution
+
+           None of these frequencies can overflow with 64-bit _PyTime_t, but
+           check for overflow, just in case. */
+        if (frequency > _PyTime_MAX
+            || frequency > (LONGLONG)_PyTime_MAX / (LONGLONG)SEC_TO_NS) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "QueryPerformanceFrequency is too large");
+            return -1;
+        }
+
+        QueryPerformanceCounter(&now);
+        t0 = now.QuadPart;
     }
-    QueryPerformanceCounter(&now);
-    diff = (double)(now.QuadPart - ctrStart);
+
     if (info) {
         info->implementation = "QueryPerformanceCounter()";
-        info->resolution = 1.0 / (double)cpu_frequency;
+        info->resolution = 1.0 / (double)frequency;
         info->monotonic = 1;
         info->adjustable = 0;
     }
 
-    diff = diff / (double)cpu_frequency;
-    *tp = diff;
+    QueryPerformanceCounter(&now);
+    ticksll = now.QuadPart;
+
+    /* Use a "time zero" to reduce precision loss when converting time
+       to floatting point number, as in time.perf_counter(). */
+    ticksll -= t0;
+
+    /* Make sure that casting LONGLONG to _PyTime_t cannot overflow,
+       both types are signed */
+    Py_BUILD_ASSERT(sizeof(ticksll) <= sizeof(ticks));
+    ticks = (_PyTime_t)ticksll;
+
+    *tp = _PyTime_MulDiv(ticks, SEC_TO_NS, (_PyTime_t)frequency);
     return 0;
 }
 #endif
 
 
 int
-_PyTime_GetPerfCounterDoubleWithInfo(double *d, _Py_clock_info_t *info)
+_PyTime_GetPerfCounterWithInfo(_PyTime_t *t, _Py_clock_info_t *info)
 {
 #ifdef MS_WINDOWS
-    return win_perf_counter(d, info);
+    return win_perf_counter(t, info);
 #else
-    _PyTime_t t;
-    if (_PyTime_GetMonotonicClockWithInfo(&t, info) < 0) {
-        return -1;
-    }
-    *d = _PyTime_AsSecondsDouble(t);
-    return 0;
+    return _PyTime_GetMonotonicClockWithInfo(t, info);
 #endif
 }
 
 
-double
-_PyTime_GetPerfCounterDouble(void)
+_PyTime_t
+_PyTime_GetPerfCounter(void)
 {
-    double t;
-    if (_PyTime_GetPerfCounterDoubleWithInfo(&t, NULL)) {
+    _PyTime_t t;
+    if (_PyTime_GetPerfCounterWithInfo(&t, NULL)) {
         Py_UNREACHABLE();
     }
     return t;
@@ -869,14 +956,13 @@ _PyTime_Init(void)
        are working properly to not have to check for exceptions at runtime. If
        a clock works once, it cannot fail in next calls. */
     _PyTime_t t;
-    double d;
     if (_PyTime_GetSystemClockWithInfo(&t, NULL) < 0) {
         return -1;
     }
     if (_PyTime_GetMonotonicClockWithInfo(&t, NULL) < 0) {
         return -1;
     }
-    if (_PyTime_GetPerfCounterDoubleWithInfo(&d, NULL) < 0) {
+    if (_PyTime_GetPerfCounterWithInfo(&t, NULL) < 0) {
         return -1;
     }
     return 0;
-- 
cgit v0.12