From 8b26c4b8ea4ad0ffc469b73893ae380b133dee06 Mon Sep 17 00:00:00 2001 From: R David Murray Date: Fri, 6 May 2011 21:56:22 -0400 Subject: #11999: sync based on comparing mtimes, not mtime to system clock --- Lib/mailbox.py | 76 +++++++++++++++++++++++++----------------------- Lib/test/test_mailbox.py | 33 +++++++++++---------- Misc/NEWS | 4 +++ 3 files changed, 60 insertions(+), 53 deletions(-) diff --git a/Lib/mailbox.py b/Lib/mailbox.py index b74eeb5..9978359 100644 --- a/Lib/mailbox.py +++ b/Lib/mailbox.py @@ -234,19 +234,24 @@ class Maildir(Mailbox): def __init__(self, dirname, factory=rfc822.Message, create=True): """Initialize a Maildir instance.""" Mailbox.__init__(self, dirname, factory, create) + self._paths = { + 'tmp': os.path.join(self._path, 'tmp'), + 'new': os.path.join(self._path, 'new'), + 'cur': os.path.join(self._path, 'cur'), + } if not os.path.exists(self._path): if create: os.mkdir(self._path, 0700) - os.mkdir(os.path.join(self._path, 'tmp'), 0700) - os.mkdir(os.path.join(self._path, 'new'), 0700) - os.mkdir(os.path.join(self._path, 'cur'), 0700) + for path in self._paths.values(): + os.mkdir(path, 0o700) else: raise NoSuchMailboxError(self._path) self._toc = {} - self._last_read = None # Records last time we read cur/new - # NOTE: we manually invalidate _last_read each time we do any - # modifications ourselves, otherwise we might get tripped up by - # bogus mtime behaviour on some systems (see issue #6896). + self._toc_mtimes = {} + for subdir in ('cur', 'new'): + self._toc_mtimes[subdir] = os.path.getmtime(self._paths[subdir]) + self._last_read = time.time() # Records last time we read cur/new + self._skewfactor = 0.1 # Adjust if os/fs clocks are skewing def add(self, message): """Add message and return assigned key.""" @@ -283,15 +288,11 @@ class Maildir(Mailbox): raise if isinstance(message, MaildirMessage): os.utime(dest, (os.path.getatime(dest), message.get_date())) - # Invalidate cached toc - self._last_read = None return uniq def remove(self, key): """Remove the keyed message; raise KeyError if it doesn't exist.""" os.remove(os.path.join(self._path, self._lookup(key))) - # Invalidate cached toc (only on success) - self._last_read = None def discard(self, key): """If the keyed message exists, remove it.""" @@ -326,8 +327,6 @@ class Maildir(Mailbox): if isinstance(message, MaildirMessage): os.utime(new_path, (os.path.getatime(new_path), message.get_date())) - # Invalidate cached toc - self._last_read = None def get_message(self, key): """Return a Message representation or raise a KeyError.""" @@ -383,8 +382,8 @@ class Maildir(Mailbox): def flush(self): """Write any pending changes to disk.""" # Maildir changes are always written immediately, so there's nothing - # to do except invalidate our cached toc. - self._last_read = None + # to do. + pass def lock(self): """Lock the mailbox.""" @@ -482,36 +481,39 @@ class Maildir(Mailbox): def _refresh(self): """Update table of contents mapping.""" - if self._last_read is not None: - for subdir in ('new', 'cur'): - mtime = os.path.getmtime(os.path.join(self._path, subdir)) - if mtime > self._last_read: - break - else: + # If it has been less than two seconds since the last _refresh() call, + # we have to unconditionally re-read the mailbox just in case it has + # been modified, because os.path.mtime() has a 2 sec resolution in the + # most common worst case (FAT) and a 1 sec resolution typically. This + # results in a few unnecessary re-reads when _refresh() is called + # multiple times in that interval, but once the clock ticks over, we + # will only re-read as needed. Because the filesystem might be being + # served by an independent system with its own clock, we record and + # compare with the mtimes from the filesystem. Because the other + # system's clock might be skewing relative to our clock, we add an + # extra delta to our wait. The default is one tenth second, but is an + # instance variable and so can be adjusted if dealing with a + # particularly skewed or irregular system. + if time.time() - self._last_read > 2 + self._skewfactor: + refresh = False + for subdir in self._toc_mtimes: + mtime = os.path.getmtime(self._paths[subdir]) + if mtime > self._toc_mtimes[subdir]: + refresh = True + self._toc_mtimes[subdir] = mtime + if not refresh: return - - # We record the current time - 1sec so that, if _refresh() is called - # again in the same second, we will always re-read the mailbox - # just in case it's been modified. (os.path.mtime() only has - # 1sec resolution.) This results in a few unnecessary re-reads - # when _refresh() is called multiple times in the same second, - # but once the clock ticks over, we will only re-read as needed. - now = time.time() - 1 - + # Refresh toc self._toc = {} - def update_dir (subdir): - path = os.path.join(self._path, subdir) + for subdir in self._toc_mtimes: + path = self._paths[subdir] for entry in os.listdir(path): p = os.path.join(path, entry) if os.path.isdir(p): continue uniq = entry.split(self.colon)[0] self._toc[uniq] = os.path.join(subdir, entry) - - update_dir('new') - update_dir('cur') - - self._last_read = now + self._last_read = time.time() def _lookup(self, key): """Use TOC to return subpath for given key, or raise a KeyError.""" diff --git a/Lib/test/test_mailbox.py b/Lib/test/test_mailbox.py index 83e3132..3c7a3e6 100644 --- a/Lib/test/test_mailbox.py +++ b/Lib/test/test_mailbox.py @@ -751,27 +751,28 @@ class TestMaildir(TestMailbox): self.assertFalse((perms & 0111)) # Execute bits should all be off. def test_reread(self): - # Wait for 2 seconds - time.sleep(2) - # Initially, the mailbox has not been read and the time is null. - assert getattr(self._box, '_last_read', None) is None - - # Refresh mailbox; the times should now be set to something. - self._box._refresh() - assert getattr(self._box, '_last_read', None) is not None - - # Try calling _refresh() again; the modification times shouldn't have - # changed, so the mailbox should not be re-reading. Re-reading causes - # the ._toc attribute to be assigned a new dictionary object, so - # we'll check that the ._toc attribute isn't a different object. + # Put the last modified times more than two seconds into the past + # (because mtime may have only a two second granularity). + for subdir in ('cur', 'new'): + os.utime(os.path.join(self._box._path, subdir), + (time.time()-5,)*2) + + # Because mtime has a two second granularity in worst case (FAT), a + # refresh is done unconditionally if called for within + # two-second-plus-a-bit of the last one, just in case the mbox has + # changed; so now we have to wait for that interval to expire. + time.sleep(2.01 + self._box._skewfactor) + + # Re-reading causes the ._toc attribute to be assigned a new dictionary + # object, so we'll check that the ._toc attribute isn't a different + # object. orig_toc = self._box._toc def refreshed(): return self._box._toc is not orig_toc - time.sleep(1) # Wait 1sec to ensure time.time()'s value changes self._box._refresh() - assert not refreshed() + self.assertFalse(refreshed()) # Now, write something into cur and remove it. This changes # the mtime and should cause a re-read. @@ -780,7 +781,7 @@ class TestMaildir(TestMailbox): f.close() os.unlink(filename) self._box._refresh() - assert refreshed() + self.assertTrue(refreshed()) class _TestMboxMMDF(TestMailbox): diff --git a/Misc/NEWS b/Misc/NEWS index bdecdf2..7207f4b 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -77,6 +77,10 @@ Core and Builtins Library ------- +- Issue 11999: fixed sporadic sync failure mailbox.Maildir due to its trying to + detect mtime changes by comparing to the system clock instead of to the + previous value of the mtime. + - Issue #10684: shutil.move used to delete a folder on case insensitive filesystems when the source and destination name where the same except for the case. -- cgit v0.12