2 files changed, 149 insertions, 55 deletions
diff --git a/Lib/bz2.py b/Lib/bz2.py
index a50adf7..fe4118f 100644
--- a/Lib/bz2.py
+++ b/Lib/bz2.py
@@ -79,7 +79,8 @@ class BZ2File(io.BufferedIOBase):
             mode = "rb"
             mode_code = _MODE_READ
             self._decompressor = BZ2Decompressor()
-            self._buffer = None
+            self._buffer = b""
+            self._buffer_offset = 0
         elif mode in ("w", "wb"):
             mode = "wb"
             mode_code = _MODE_WRITE
@@ -124,7 +125,8 @@ class BZ2File(io.BufferedIOBase):
                     self._fp = None
                     self._closefp = False
                     self._mode = _MODE_CLOSED
-                    self._buffer = None
+                    self._buffer = b""
+                    self._buffer_offset = 0
 
     @property
     def closed(self):
@@ -174,16 +176,13 @@ class BZ2File(io.BufferedIOBase):
 
     # Fill the readahead buffer if it is empty. Returns False on EOF.
     def _fill_buffer(self):
+        if self._mode == _MODE_READ_EOF:
+            return False
         # Depending on the input data, our call to the decompressor may not
         # return any data. In this case, try again after reading another block.
-        while True:
-            if self._buffer:
-                return True
-
-            if self._decompressor.unused_data:
-                rawblock = self._decompressor.unused_data
-            else:
-                rawblock = self._fp.read(_BUFFER_SIZE)
+        while self._buffer_offset == len(self._buffer):
+            rawblock = (self._decompressor.unused_data or
+                        self._fp.read(_BUFFER_SIZE))
 
             if not rawblock:
                 if self._decompressor.eof:
@@ -199,30 +198,48 @@ class BZ2File(io.BufferedIOBase):
                 self._decompressor = BZ2Decompressor()
 
             self._buffer = self._decompressor.decompress(rawblock)
+            self._buffer_offset = 0
+        return True
 
     # Read data until EOF.
     # If return_data is false, consume the data without returning it.
     def _read_all(self, return_data=True):
+        # The loop assumes that _buffer_offset is 0. Ensure that this is true.
+        self._buffer = self._buffer[self._buffer_offset:]
+        self._buffer_offset = 0
+
         blocks = []
         while self._fill_buffer():
             if return_data:
                 blocks.append(self._buffer)
             self._pos += len(self._buffer)
-            self._buffer = None
+            self._buffer = b""
         if return_data:
             return b"".join(blocks)
 
     # Read a block of up to n bytes.
     # If return_data is false, consume the data without returning it.
     def _read_block(self, n, return_data=True):
+        # If we have enough data buffered, return immediately.
+        end = self._buffer_offset + n
+        if end <= len(self._buffer):
+            data = self._buffer[self._buffer_offset : end]
+            self._buffer_offset = end
+            self._pos += len(data)
+            return data if return_data else None
+
+        # The loop assumes that _buffer_offset is 0. Ensure that this is true.
+        self._buffer = self._buffer[self._buffer_offset:]
+        self._buffer_offset = 0
+
         blocks = []
         while n > 0 and self._fill_buffer():
             if n < len(self._buffer):
                 data = self._buffer[:n]
-                self._buffer = self._buffer[n:]
+                self._buffer_offset = n
             else:
                 data = self._buffer
-                self._buffer = None
+                self._buffer = b""
             if return_data:
                 blocks.append(data)
             self._pos += len(data)
@@ -238,9 +255,9 @@ class BZ2File(io.BufferedIOBase):
         """
         with self._lock:
             self._check_can_read()
-            if self._mode == _MODE_READ_EOF or not self._fill_buffer():
+            if not self._fill_buffer():
                 return b""
-            return self._buffer
+            return self._buffer[self._buffer_offset:]
 
     def read(self, size=-1):
         """Read up to size uncompressed bytes from the file.
@@ -250,7 +267,7 @@ class BZ2File(io.BufferedIOBase):
         """
         with self._lock:
             self._check_can_read()
-            if self._mode == _MODE_READ_EOF or size == 0:
+            if size == 0:
                 return b""
             elif size < 0:
                 return self._read_all()
@@ -268,15 +285,19 @@ class BZ2File(io.BufferedIOBase):
         # In this case we make multiple reads, to avoid returning b"".
         with self._lock:
             self._check_can_read()
-            if (size == 0 or self._mode == _MODE_READ_EOF or
-                not self._fill_buffer()):
+            if (size == 0 or
+                # Only call _fill_buffer() if the buffer is actually empty.
+                # This gives a significant speedup if *size* is small.
+                (self._buffer_offset == len(self._buffer) and not self._fill_buffer())):
                 return b""
-            if 0 < size < len(self._buffer):
-                data = self._buffer[:size]
-                self._buffer = self._buffer[size:]
+            if size > 0:
+                data = self._buffer[self._buffer_offset :
+                                    self._buffer_offset + size]
+                self._buffer_offset += len(data)
             else:
-                data = self._buffer
-                self._buffer = None
+                data = self._buffer[self._buffer_offset:]
+                self._buffer = b""
+                self._buffer_offset = 0
             self._pos += len(data)
             return data
 
@@ -299,6 +320,14 @@ class BZ2File(io.BufferedIOBase):
             raise TypeError("Integer argument expected")
         size = size.__index__()
         with self._lock:
+            # Shortcut for the common case - the whole line is in the buffer.
+            if size < 0:
+                end = self._buffer.find(b"\n", self._buffer_offset) + 1
+                if end > 0:
+                    line = self._buffer[self._buffer_offset : end]
+                    self._buffer_offset = end
+                    self._pos += len(line)
+                    return line
             return io.BufferedIOBase.readline(self, size)
 
     def readlines(self, size=-1):
@@ -345,7 +374,8 @@ class BZ2File(io.BufferedIOBase):
         self._mode = _MODE_READ
         self._pos = 0
         self._decompressor = BZ2Decompressor()
-        self._buffer = None
+        self._buffer = b""
+        self._buffer_offset = 0
 
     def seek(self, offset, whence=0):
         """Change the file position.
@@ -385,8 +415,7 @@ class BZ2File(io.BufferedIOBase):
                 offset -= self._pos
 
             # Read and discard data until we reach the desired position.
-            if self._mode != _MODE_READ_EOF:
-                self._read_block(offset, return_data=False)
+            self._read_block(offset, return_data=False)
 
             return self._pos
 
diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py
index 2420772..24924f6 100644
--- a/Lib/test/test_subprocess.py
+++ b/Lib/test/test_subprocess.py
@@ -1,4 +1,5 @@
 import unittest
+from test import script_helper
 from test import support
 import subprocess
 import sys
@@ -191,15 +192,101 @@ class ProcessTestCase(BaseTestCase):
         p.wait()
         self.assertEqual(p.stderr, None)
 
+    # For use in the test_cwd* tests below.
+    def _normalize_cwd(self, cwd):
+        # Normalize an expected cwd (for Tru64 support).
+        # We can't use os.path.realpath since it doesn't expand Tru64 {memb}
+        # strings.  See bug #1063571.
+        original_cwd = os.getcwd()
+        os.chdir(cwd)
+        cwd = os.getcwd()
+        os.chdir(original_cwd)
+        return cwd
+
+    # For use in the test_cwd* tests below.
+    def _split_python_path(self):
+        # Return normalized (python_dir, python_base).
+        python_path = os.path.realpath(sys.executable)
+        return os.path.split(python_path)
+
+    # For use in the test_cwd* tests below.
+    def _assert_cwd(self, expected_cwd, python_arg, **kwargs):
+        # Invoke Python via Popen, and assert that (1) the call succeeds,
+        # and that (2) the current working directory of the child process
+        # matches *expected_cwd*.
+        p = subprocess.Popen([python_arg, "-c",
+                              "import os, sys; "
+                              "sys.stdout.write(os.getcwd()); "
+                              "sys.exit(47)"],
+                              stdout=subprocess.PIPE,
+                              **kwargs)
+        self.addCleanup(p.stdout.close)
+        p.wait()
+        self.assertEqual(47, p.returncode)
+        normcase = os.path.normcase
+        self.assertEqual(normcase(expected_cwd),
+                         normcase(p.stdout.read().decode("utf-8")))
+
+    def test_cwd(self):
+        # Check that cwd changes the cwd for the child process.
+        temp_dir = tempfile.gettempdir()
+        temp_dir = self._normalize_cwd(temp_dir)
+        self._assert_cwd(temp_dir, sys.executable, cwd=temp_dir)
+
+    def test_cwd_with_relative_arg(self):
+        # Check that Popen looks for args[0] relative to cwd if args[0]
+        # is relative.
+        python_dir, python_base = self._split_python_path()
+        rel_python = os.path.join(os.curdir, python_base)
+        with support.temp_cwd() as wrong_dir:
+            # Before calling with the correct cwd, confirm that the call fails
+            # without cwd and with the wrong cwd.
+            self.assertRaises(FileNotFoundError, subprocess.Popen,
+                              [rel_python])
+            self.assertRaises(FileNotFoundError, subprocess.Popen,
+                              [rel_python], cwd=wrong_dir)
+            python_dir = self._normalize_cwd(python_dir)
+            self._assert_cwd(python_dir, rel_python, cwd=python_dir)
+
+    def test_cwd_with_relative_executable(self):
+        # Check that Popen looks for executable relative to cwd if executable
+        # is relative (and that executable takes precedence over args[0]).
+        python_dir, python_base = self._split_python_path()
+        rel_python = os.path.join(os.curdir, python_base)
+        doesntexist = "somethingyoudonthave"
+        with support.temp_cwd() as wrong_dir:
+            # Before calling with the correct cwd, confirm that the call fails
+            # without cwd and with the wrong cwd.
+            self.assertRaises(FileNotFoundError, subprocess.Popen,
+                              [doesntexist], executable=rel_python)
+            self.assertRaises(FileNotFoundError, subprocess.Popen,
+                              [doesntexist], executable=rel_python,
+                              cwd=wrong_dir)
+            python_dir = self._normalize_cwd(python_dir)
+            self._assert_cwd(python_dir, doesntexist, executable=rel_python,
+                             cwd=python_dir)
+
+    def test_cwd_with_absolute_arg(self):
+        # Check that Popen can find the executable when the cwd is wrong
+        # if args[0] is an absolute path.
+        python_dir, python_base = self._split_python_path()
+        abs_python = os.path.join(python_dir, python_base)
+        rel_python = os.path.join(os.curdir, python_base)
+        with script_helper.temp_dir() as wrong_dir:
+            # Before calling with an absolute path, confirm that using a
+            # relative path fails.
+            self.assertRaises(FileNotFoundError, subprocess.Popen,
+                              [rel_python], cwd=wrong_dir)
+            wrong_dir = self._normalize_cwd(wrong_dir)
+            self._assert_cwd(wrong_dir, abs_python, cwd=wrong_dir)
+
     @unittest.skipIf(sys.base_prefix != sys.prefix,
                      'Test is not venv-compatible')
     def test_executable_with_cwd(self):
-        python_dir = os.path.dirname(os.path.realpath(sys.executable))
-        p = subprocess.Popen(["somethingyoudonthave", "-c",
-                              "import sys; sys.exit(47)"],
-                             executable=sys.executable, cwd=python_dir)
-        p.wait()
-        self.assertEqual(p.returncode, 47)
+        python_dir, python_base = self._split_python_path()
+        python_dir = self._normalize_cwd(python_dir)
+        self._assert_cwd(python_dir, "somethingyoudonthave",
+                         executable=sys.executable, cwd=python_dir)
 
     @unittest.skipIf(sys.base_prefix != sys.prefix,
                      'Test is not venv-compatible')
@@ -208,11 +295,7 @@ class ProcessTestCase(BaseTestCase):
     def test_executable_without_cwd(self):
         # For a normal installation, it should work without 'cwd'
         # argument.  For test runs in the build directory, see #7774.
-        p = subprocess.Popen(["somethingyoudonthave", "-c",
-                              "import sys; sys.exit(47)"],
-                             executable=sys.executable)
-        p.wait()
-        self.assertEqual(p.returncode, 47)
+        self._assert_cwd('', "somethingyoudonthave", executable=sys.executable)
 
     def test_stdin_pipe(self):
         # stdin redirection
@@ -369,24 +452,6 @@ class ProcessTestCase(BaseTestCase):
         p.wait()
         self.assertEqual(p.stdin, None)
 
-    def test_cwd(self):
-        tmpdir = tempfile.gettempdir()
-        # We cannot use os.path.realpath to canonicalize the path,
-        # since it doesn't expand Tru64 {memb} strings. See bug 1063571.
-        cwd = os.getcwd()
-        os.chdir(tmpdir)
-        tmpdir = os.getcwd()
-        os.chdir(cwd)
-        p = subprocess.Popen([sys.executable, "-c",
-                              'import sys,os;'
-                              'sys.stdout.write(os.getcwd())'],
-                             stdout=subprocess.PIPE,
-                             cwd=tmpdir)
-        self.addCleanup(p.stdout.close)
-        normcase = os.path.normcase
-        self.assertEqual(normcase(p.stdout.read().decode("utf-8")),
-                         normcase(tmpdir))
-
     def test_env(self):
         newenv = os.environ.copy()
         newenv["FRUIT"] = "orange"